diff --git a/mintlify-docs/LICENSE b/mintlify-docs/LICENSE new file mode 100644 index 0000000000..5411374274 --- /dev/null +++ b/mintlify-docs/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2023 Mintlify + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/mintlify-docs/README.md b/mintlify-docs/README.md new file mode 100644 index 0000000000..055c983adb --- /dev/null +++ b/mintlify-docs/README.md @@ -0,0 +1,43 @@ +# Mintlify Starter Kit + +Use the starter kit to get your docs deployed and ready to customize. + +Click the green **Use this template** button at the top of this repo to copy the Mintlify starter kit. The starter kit contains examples with + +- Guide pages +- Navigation +- Customizations +- API reference pages +- Use of popular components + +**[Follow the full quickstart guide](https://starter.mintlify.com/quickstart)** + +## Development + +Install the [Mintlify CLI](https://www.npmjs.com/package/mint) to preview your documentation changes locally. To install, use the following command: + +``` +npm i -g mint +``` + +Run the following command at the root of your documentation, where your `docs.json` is located: + +``` +mint dev +``` + +View your local preview at `http://localhost:3000`. + +## Publishing changes + +Install our GitHub app from your [dashboard](https://dashboard.mintlify.com/settings/organization/github-app) to propagate changes from your repo to your deployment. Changes are deployed to production automatically after pushing to the default branch. + +## Need help? + +### Troubleshooting + +- If your dev environment isn't running: Run `mint update` to ensure you have the most recent version of the CLI. +- If a page loads as a 404: Make sure you are running in a folder with a valid `docs.json`. + +### Resources +- [Mintlify documentation](https://mintlify.com/docs) diff --git a/mintlify-docs/assets/a_tensor_formalism_for_computer_science.pdf b/mintlify-docs/assets/a_tensor_formalism_for_computer_science.pdf new file mode 100644 index 0000000000..b103256731 Binary files /dev/null and b/mintlify-docs/assets/a_tensor_formalism_for_computer_science.pdf differ diff --git a/mintlify-docs/assets/attribute-memory-Vespa.xls b/mintlify-docs/assets/attribute-memory-Vespa.xls new file mode 100644 index 0000000000..41958c3de0 Binary files /dev/null and b/mintlify-docs/assets/attribute-memory-Vespa.xls differ diff --git a/mintlify-docs/assets/commits-release.png b/mintlify-docs/assets/commits-release.png new file mode 100644 index 0000000000..13039c61e2 Binary files /dev/null and b/mintlify-docs/assets/commits-release.png differ diff --git a/mintlify-docs/assets/cover-image.png b/mintlify-docs/assets/cover-image.png new file mode 100644 index 0000000000..dff3529287 Binary files /dev/null and b/mintlify-docs/assets/cover-image.png differ diff --git a/mintlify-docs/assets/fonts/Roobert-Medium.woff b/mintlify-docs/assets/fonts/Roobert-Medium.woff new file mode 100644 index 0000000000..d8a95adf5c Binary files /dev/null and b/mintlify-docs/assets/fonts/Roobert-Medium.woff differ diff --git a/mintlify-docs/assets/fonts/Roobert-Medium.woff2 b/mintlify-docs/assets/fonts/Roobert-Medium.woff2 new file mode 100644 index 0000000000..b5d52020ab Binary files /dev/null and b/mintlify-docs/assets/fonts/Roobert-Medium.woff2 differ diff --git a/mintlify-docs/assets/fonts/Roobert-MediumItalic.woff b/mintlify-docs/assets/fonts/Roobert-MediumItalic.woff new file mode 100644 index 0000000000..a0f62a56e6 Binary files /dev/null and b/mintlify-docs/assets/fonts/Roobert-MediumItalic.woff differ diff --git a/mintlify-docs/assets/fonts/Roobert-MediumItalic.woff2 b/mintlify-docs/assets/fonts/Roobert-MediumItalic.woff2 new file mode 100644 index 0000000000..39c52008a3 Binary files /dev/null and b/mintlify-docs/assets/fonts/Roobert-MediumItalic.woff2 differ diff --git a/mintlify-docs/assets/fonts/Roobert-Regular.woff b/mintlify-docs/assets/fonts/Roobert-Regular.woff new file mode 100644 index 0000000000..5985b8b1c4 Binary files /dev/null and b/mintlify-docs/assets/fonts/Roobert-Regular.woff differ diff --git a/mintlify-docs/assets/fonts/Roobert-Regular.woff2 b/mintlify-docs/assets/fonts/Roobert-Regular.woff2 new file mode 100644 index 0000000000..78791dbf44 Binary files /dev/null and b/mintlify-docs/assets/fonts/Roobert-Regular.woff2 differ diff --git a/mintlify-docs/assets/fonts/Roobert-RegularItalic.woff b/mintlify-docs/assets/fonts/Roobert-RegularItalic.woff new file mode 100644 index 0000000000..3d7bdf6eca Binary files /dev/null and b/mintlify-docs/assets/fonts/Roobert-RegularItalic.woff differ diff --git a/mintlify-docs/assets/fonts/Roobert-RegularItalic.woff2 b/mintlify-docs/assets/fonts/Roobert-RegularItalic.woff2 new file mode 100644 index 0000000000..65b66ee797 Binary files /dev/null and b/mintlify-docs/assets/fonts/Roobert-RegularItalic.woff2 differ diff --git a/mintlify-docs/assets/fonts/Roobert-SemiBold.woff b/mintlify-docs/assets/fonts/Roobert-SemiBold.woff new file mode 100644 index 0000000000..f252deb4ea Binary files /dev/null and b/mintlify-docs/assets/fonts/Roobert-SemiBold.woff differ diff --git a/mintlify-docs/assets/fonts/Roobert-SemiBold.woff2 b/mintlify-docs/assets/fonts/Roobert-SemiBold.woff2 new file mode 100644 index 0000000000..7720398e20 Binary files /dev/null and b/mintlify-docs/assets/fonts/Roobert-SemiBold.woff2 differ diff --git a/mintlify-docs/assets/fonts/Roobert-SemiBoldItalic.woff b/mintlify-docs/assets/fonts/Roobert-SemiBoldItalic.woff new file mode 100644 index 0000000000..b39e55aa6e Binary files /dev/null and b/mintlify-docs/assets/fonts/Roobert-SemiBoldItalic.woff differ diff --git a/mintlify-docs/assets/fonts/Roobert-SemiBoldItalic.woff2 b/mintlify-docs/assets/fonts/Roobert-SemiBoldItalic.woff2 new file mode 100644 index 0000000000..51d21e3e68 Binary files /dev/null and b/mintlify-docs/assets/fonts/Roobert-SemiBoldItalic.woff2 differ diff --git a/mintlify-docs/assets/graph-image.png b/mintlify-docs/assets/graph-image.png new file mode 100644 index 0000000000..0d09f733c6 Binary files /dev/null and b/mintlify-docs/assets/graph-image.png differ diff --git a/mintlify-docs/assets/icons/arrow-down.svg b/mintlify-docs/assets/icons/arrow-down.svg new file mode 100644 index 0000000000..594625557d --- /dev/null +++ b/mintlify-docs/assets/icons/arrow-down.svg @@ -0,0 +1,3 @@ + + + diff --git a/mintlify-docs/assets/icons/arrow-up.svg b/mintlify-docs/assets/icons/arrow-up.svg new file mode 100644 index 0000000000..b7a49ac5ba --- /dev/null +++ b/mintlify-docs/assets/icons/arrow-up.svg @@ -0,0 +1,3 @@ + + + diff --git a/mintlify-docs/assets/img/1x6.svg b/mintlify-docs/assets/img/1x6.svg new file mode 100644 index 0000000000..39c2e129be --- /dev/null +++ b/mintlify-docs/assets/img/1x6.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/2x3.svg b/mintlify-docs/assets/img/2x3.svg new file mode 100644 index 0000000000..9e760aa8b2 --- /dev/null +++ b/mintlify-docs/assets/img/2x3.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/3Dplot.png b/mintlify-docs/assets/img/3Dplot.png new file mode 100644 index 0000000000..509a4e05a9 Binary files /dev/null and b/mintlify-docs/assets/img/3Dplot.png differ diff --git a/mintlify-docs/assets/img/3x2.svg b/mintlify-docs/assets/img/3x2.svg new file mode 100644 index 0000000000..d2bc89766f --- /dev/null +++ b/mintlify-docs/assets/img/3x2.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/6x1.svg b/mintlify-docs/assets/img/6x1.svg new file mode 100644 index 0000000000..a52b7754aa --- /dev/null +++ b/mintlify-docs/assets/img/6x1.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/CI-integration.png b/mintlify-docs/assets/img/CI-integration.png new file mode 100644 index 0000000000..4c7af9d8f6 Binary files /dev/null and b/mintlify-docs/assets/img/CI-integration.png differ diff --git a/mintlify-docs/assets/img/QPS-scaling.svg b/mintlify-docs/assets/img/QPS-scaling.svg new file mode 100644 index 0000000000..a7835640ae --- /dev/null +++ b/mintlify-docs/assets/img/QPS-scaling.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/ScalingLatencyFactor0.005.svg b/mintlify-docs/assets/img/ScalingLatencyFactor0.005.svg new file mode 100644 index 0000000000..2fb9a5c77b --- /dev/null +++ b/mintlify-docs/assets/img/ScalingLatencyFactor0.005.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/ScalingLatencyFactor0.5.svg b/mintlify-docs/assets/img/ScalingLatencyFactor0.5.svg new file mode 100644 index 0000000000..50fda3fed4 --- /dev/null +++ b/mintlify-docs/assets/img/ScalingLatencyFactor0.5.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/Threads-per-search.svg b/mintlify-docs/assets/img/Threads-per-search.svg new file mode 100644 index 0000000000..e0d2b80af0 --- /dev/null +++ b/mintlify-docs/assets/img/Threads-per-search.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/add-node-move-buckets.svg b/mintlify-docs/assets/img/add-node-move-buckets.svg new file mode 100644 index 0000000000..1956b8f101 --- /dev/null +++ b/mintlify-docs/assets/img/add-node-move-buckets.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/app-download-dev.png b/mintlify-docs/assets/img/app-download-dev.png new file mode 100644 index 0000000000..291c7e2c77 Binary files /dev/null and b/mintlify-docs/assets/img/app-download-dev.png differ diff --git a/mintlify-docs/assets/img/app-download-prod.png b/mintlify-docs/assets/img/app-download-prod.png new file mode 100644 index 0000000000..3c8ad0fdd0 Binary files /dev/null and b/mintlify-docs/assets/img/app-download-prod.png differ diff --git a/mintlify-docs/assets/img/application-key.png b/mintlify-docs/assets/img/application-key.png new file mode 100644 index 0000000000..ab04a20651 Binary files /dev/null and b/mintlify-docs/assets/img/application-key.png differ diff --git a/mintlify-docs/assets/img/archive-aws-access-logs.png b/mintlify-docs/assets/img/archive-aws-access-logs.png new file mode 100644 index 0000000000..cf7288ec64 Binary files /dev/null and b/mintlify-docs/assets/img/archive-aws-access-logs.png differ diff --git a/mintlify-docs/assets/img/archive-aws-configure-access.png b/mintlify-docs/assets/img/archive-aws-configure-access.png new file mode 100644 index 0000000000..d3945436b6 Binary files /dev/null and b/mintlify-docs/assets/img/archive-aws-configure-access.png differ diff --git a/mintlify-docs/assets/img/archive-aws-enclave.png b/mintlify-docs/assets/img/archive-aws-enclave.png new file mode 100644 index 0000000000..bdc6be12f2 Binary files /dev/null and b/mintlify-docs/assets/img/archive-aws-enclave.png differ diff --git a/mintlify-docs/assets/img/archive-aws-expanded-dropdown.png b/mintlify-docs/assets/img/archive-aws-expanded-dropdown.png new file mode 100644 index 0000000000..9fc3369320 Binary files /dev/null and b/mintlify-docs/assets/img/archive-aws-expanded-dropdown.png differ diff --git a/mintlify-docs/assets/img/archive-azure-access-logs.png b/mintlify-docs/assets/img/archive-azure-access-logs.png new file mode 100644 index 0000000000..9cb171d905 Binary files /dev/null and b/mintlify-docs/assets/img/archive-azure-access-logs.png differ diff --git a/mintlify-docs/assets/img/archive-azure-configure-access.png b/mintlify-docs/assets/img/archive-azure-configure-access.png new file mode 100644 index 0000000000..74afe31d15 Binary files /dev/null and b/mintlify-docs/assets/img/archive-azure-configure-access.png differ diff --git a/mintlify-docs/assets/img/archive-azure-expanded-dropdown.png b/mintlify-docs/assets/img/archive-azure-expanded-dropdown.png new file mode 100644 index 0000000000..fb73624fe3 Binary files /dev/null and b/mintlify-docs/assets/img/archive-azure-expanded-dropdown.png differ diff --git a/mintlify-docs/assets/img/archive-gcp-access-logs.png b/mintlify-docs/assets/img/archive-gcp-access-logs.png new file mode 100644 index 0000000000..83650969ce Binary files /dev/null and b/mintlify-docs/assets/img/archive-gcp-access-logs.png differ diff --git a/mintlify-docs/assets/img/archive-gcp-configure-access.png b/mintlify-docs/assets/img/archive-gcp-configure-access.png new file mode 100644 index 0000000000..19d27ca1ed Binary files /dev/null and b/mintlify-docs/assets/img/archive-gcp-configure-access.png differ diff --git a/mintlify-docs/assets/img/archive-gcp-expanded-dropdown.png b/mintlify-docs/assets/img/archive-gcp-expanded-dropdown.png new file mode 100644 index 0000000000..6ceb9d8a3f Binary files /dev/null and b/mintlify-docs/assets/img/archive-gcp-expanded-dropdown.png differ diff --git a/mintlify-docs/assets/img/attributes-indexes.svg b/mintlify-docs/assets/img/attributes-indexes.svg new file mode 100644 index 0000000000..4f0285e839 --- /dev/null +++ b/mintlify-docs/assets/img/attributes-indexes.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/attributes-update.svg b/mintlify-docs/assets/img/attributes-update.svg new file mode 100644 index 0000000000..b9889dd707 --- /dev/null +++ b/mintlify-docs/assets/img/attributes-update.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/attributes.svg b/mintlify-docs/assets/img/attributes.svg new file mode 100644 index 0000000000..d91da73aa5 --- /dev/null +++ b/mintlify-docs/assets/img/attributes.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/automated-deployment-pin.png b/mintlify-docs/assets/img/automated-deployment-pin.png new file mode 100644 index 0000000000..410ef27ac9 Binary files /dev/null and b/mintlify-docs/assets/img/automated-deployment-pin.png differ diff --git a/mintlify-docs/assets/img/automated-deployment-production-test.png b/mintlify-docs/assets/img/automated-deployment-production-test.png new file mode 100644 index 0000000000..23297a0b78 Binary files /dev/null and b/mintlify-docs/assets/img/automated-deployment-production-test.png differ diff --git a/mintlify-docs/assets/img/automated-deployment-restart.png b/mintlify-docs/assets/img/automated-deployment-restart.png new file mode 100644 index 0000000000..efd0b998e8 Binary files /dev/null and b/mintlify-docs/assets/img/automated-deployment-restart.png differ diff --git a/mintlify-docs/assets/img/automated-deployment-supersede.png b/mintlify-docs/assets/img/automated-deployment-supersede.png new file mode 100644 index 0000000000..f96a0925ac Binary files /dev/null and b/mintlify-docs/assets/img/automated-deployment-supersede.png differ diff --git a/mintlify-docs/assets/img/automated-deployments-complex.png b/mintlify-docs/assets/img/automated-deployments-complex.png new file mode 100644 index 0000000000..c918afb784 Binary files /dev/null and b/mintlify-docs/assets/img/automated-deployments-complex.png differ diff --git a/mintlify-docs/assets/img/automated-deployments-overview.png b/mintlify-docs/assets/img/automated-deployments-overview.png new file mode 100644 index 0000000000..6e84b83dc2 Binary files /dev/null and b/mintlify-docs/assets/img/automated-deployments-overview.png differ diff --git a/mintlify-docs/assets/img/block-window.png b/mintlify-docs/assets/img/block-window.png new file mode 100644 index 0000000000..86407ba457 Binary files /dev/null and b/mintlify-docs/assets/img/block-window.png differ diff --git a/mintlify-docs/assets/img/bucket-node-sequence.svg b/mintlify-docs/assets/img/bucket-node-sequence.svg new file mode 100644 index 0000000000..e4e9220241 --- /dev/null +++ b/mintlify-docs/assets/img/bucket-node-sequence.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/canary-instance-one-app.png b/mintlify-docs/assets/img/canary-instance-one-app.png new file mode 100644 index 0000000000..80846ae08f Binary files /dev/null and b/mintlify-docs/assets/img/canary-instance-one-app.png differ diff --git a/mintlify-docs/assets/img/canaryapp.png b/mintlify-docs/assets/img/canaryapp.png new file mode 100644 index 0000000000..9460ac3e98 Binary files /dev/null and b/mintlify-docs/assets/img/canaryapp.png differ diff --git a/mintlify-docs/assets/img/cloud-benchmarks.svg b/mintlify-docs/assets/img/cloud-benchmarks.svg new file mode 100644 index 0000000000..4b09b59229 --- /dev/null +++ b/mintlify-docs/assets/img/cloud-benchmarks.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/config-assembly.svg b/mintlify-docs/assets/img/config-assembly.svg new file mode 100644 index 0000000000..2d31025174 --- /dev/null +++ b/mintlify-docs/assets/img/config-assembly.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/config-delivery.svg b/mintlify-docs/assets/img/config-delivery.svg new file mode 100644 index 0000000000..6ff70b08f8 --- /dev/null +++ b/mintlify-docs/assets/img/config-delivery.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/config-sentinel.svg b/mintlify-docs/assets/img/config-sentinel.svg new file mode 100644 index 0000000000..4578c8337f --- /dev/null +++ b/mintlify-docs/assets/img/config-sentinel.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/console-notifications.png b/mintlify-docs/assets/img/console-notifications.png new file mode 100644 index 0000000000..5e9733f818 Binary files /dev/null and b/mintlify-docs/assets/img/console-notifications.png differ diff --git a/mintlify-docs/assets/img/console/autoscale.png b/mintlify-docs/assets/img/console/autoscale.png new file mode 100644 index 0000000000..59746c1301 Binary files /dev/null and b/mintlify-docs/assets/img/console/autoscale.png differ diff --git a/mintlify-docs/assets/img/console/delete-production-deployment.png b/mintlify-docs/assets/img/console/delete-production-deployment.png new file mode 100644 index 0000000000..d828829e2f Binary files /dev/null and b/mintlify-docs/assets/img/console/delete-production-deployment.png differ diff --git a/mintlify-docs/assets/img/console/security.png b/mintlify-docs/assets/img/console/security.png new file mode 100644 index 0000000000..5730d77d46 Binary files /dev/null and b/mintlify-docs/assets/img/console/security.png differ diff --git a/mintlify-docs/assets/img/console/tuning.png b/mintlify-docs/assets/img/console/tuning.png new file mode 100644 index 0000000000..54cb529cdd Binary files /dev/null and b/mintlify-docs/assets/img/console/tuning.png differ diff --git a/mintlify-docs/assets/img/console/upgrade.png b/mintlify-docs/assets/img/console/upgrade.png new file mode 100644 index 0000000000..93c2bec0f7 Binary files /dev/null and b/mintlify-docs/assets/img/console/upgrade.png differ diff --git a/mintlify-docs/assets/img/console/zone-overview.png b/mintlify-docs/assets/img/console/zone-overview.png new file mode 100644 index 0000000000..fd9d9dd5e3 Binary files /dev/null and b/mintlify-docs/assets/img/console/zone-overview.png differ diff --git a/mintlify-docs/assets/img/container-components.svg b/mintlify-docs/assets/img/container-components.svg new file mode 100644 index 0000000000..fc7724d763 --- /dev/null +++ b/mintlify-docs/assets/img/container-components.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/dandelion-song.png b/mintlify-docs/assets/img/dandelion-song.png new file mode 100644 index 0000000000..1b77f46a1c Binary files /dev/null and b/mintlify-docs/assets/img/dandelion-song.png differ diff --git a/mintlify-docs/assets/img/dashboard.png b/mintlify-docs/assets/img/dashboard.png new file mode 100644 index 0000000000..80713a1535 Binary files /dev/null and b/mintlify-docs/assets/img/dashboard.png differ diff --git a/mintlify-docs/assets/img/deployment-with-system-test.png b/mintlify-docs/assets/img/deployment-with-system-test.png new file mode 100644 index 0000000000..c43f8cbed9 Binary files /dev/null and b/mintlify-docs/assets/img/deployment-with-system-test.png differ diff --git a/mintlify-docs/assets/img/diversity-1.png b/mintlify-docs/assets/img/diversity-1.png new file mode 100644 index 0000000000..497349440d Binary files /dev/null and b/mintlify-docs/assets/img/diversity-1.png differ diff --git a/mintlify-docs/assets/img/document-processing-class-diagram.svg b/mintlify-docs/assets/img/document-processing-class-diagram.svg new file mode 100644 index 0000000000..dcf3a63921 --- /dev/null +++ b/mintlify-docs/assets/img/document-processing-class-diagram.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/ecommerce-facets.png b/mintlify-docs/assets/img/ecommerce-facets.png new file mode 100644 index 0000000000..c1db5c2c06 Binary files /dev/null and b/mintlify-docs/assets/img/ecommerce-facets.png differ diff --git a/mintlify-docs/assets/img/elastic-fail.svg b/mintlify-docs/assets/img/elastic-fail.svg new file mode 100644 index 0000000000..bc66a7f0c8 --- /dev/null +++ b/mintlify-docs/assets/img/elastic-fail.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/elastic-feed-container.svg b/mintlify-docs/assets/img/elastic-feed-container.svg new file mode 100644 index 0000000000..6f4e60b3bc --- /dev/null +++ b/mintlify-docs/assets/img/elastic-feed-container.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/elastic-feed-vespafeeder.svg b/mintlify-docs/assets/img/elastic-feed-vespafeeder.svg new file mode 100644 index 0000000000..7e31c97cdb --- /dev/null +++ b/mintlify-docs/assets/img/elastic-feed-vespafeeder.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/elastic-feed.svg b/mintlify-docs/assets/img/elastic-feed.svg new file mode 100644 index 0000000000..ff159a93af --- /dev/null +++ b/mintlify-docs/assets/img/elastic-feed.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/elastic-grow.svg b/mintlify-docs/assets/img/elastic-grow.svg new file mode 100644 index 0000000000..77b1df9646 --- /dev/null +++ b/mintlify-docs/assets/img/elastic-grow.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/elastic-visit-get.svg b/mintlify-docs/assets/img/elastic-visit-get.svg new file mode 100644 index 0000000000..ef7b5c24db --- /dev/null +++ b/mintlify-docs/assets/img/elastic-visit-get.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/enclave-architecture.png b/mintlify-docs/assets/img/enclave-architecture.png new file mode 100644 index 0000000000..3452c556ce Binary files /dev/null and b/mintlify-docs/assets/img/enclave-architecture.png differ diff --git a/mintlify-docs/assets/img/federation-simple.svg b/mintlify-docs/assets/img/federation-simple.svg new file mode 100644 index 0000000000..5ee055f66c --- /dev/null +++ b/mintlify-docs/assets/img/federation-simple.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/federation.svg b/mintlify-docs/assets/img/federation.svg new file mode 100644 index 0000000000..0125c63a49 --- /dev/null +++ b/mintlify-docs/assets/img/federation.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/flat-content-distribution.svg b/mintlify-docs/assets/img/flat-content-distribution.svg new file mode 100644 index 0000000000..1cfa710fcc --- /dev/null +++ b/mintlify-docs/assets/img/flat-content-distribution.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/free-trial.png b/mintlify-docs/assets/img/free-trial.png new file mode 100644 index 0000000000..509085a108 Binary files /dev/null and b/mintlify-docs/assets/img/free-trial.png differ diff --git a/mintlify-docs/assets/img/geo/path1.png b/mintlify-docs/assets/img/geo/path1.png new file mode 100644 index 0000000000..14adfc650c Binary files /dev/null and b/mintlify-docs/assets/img/geo/path1.png differ diff --git a/mintlify-docs/assets/img/geo/path2.png b/mintlify-docs/assets/img/geo/path2.png new file mode 100644 index 0000000000..0b11df1e36 Binary files /dev/null and b/mintlify-docs/assets/img/geo/path2.png differ diff --git a/mintlify-docs/assets/img/geo/path3.png b/mintlify-docs/assets/img/geo/path3.png new file mode 100644 index 0000000000..70fbdf7ffc Binary files /dev/null and b/mintlify-docs/assets/img/geo/path3.png differ diff --git a/mintlify-docs/assets/img/geo/path4.png b/mintlify-docs/assets/img/geo/path4.png new file mode 100644 index 0000000000..b1fdfdd8d6 Binary files /dev/null and b/mintlify-docs/assets/img/geo/path4.png differ diff --git a/mintlify-docs/assets/img/geo/path5.png b/mintlify-docs/assets/img/geo/path5.png new file mode 100644 index 0000000000..8cbca7c2b4 Binary files /dev/null and b/mintlify-docs/assets/img/geo/path5.png differ diff --git a/mintlify-docs/assets/img/grafana-metrics.png b/mintlify-docs/assets/img/grafana-metrics.png new file mode 100644 index 0000000000..fe13ee07ac Binary files /dev/null and b/mintlify-docs/assets/img/grafana-metrics.png differ diff --git a/mintlify-docs/assets/img/grouped-content-distribution.svg b/mintlify-docs/assets/img/grouped-content-distribution.svg new file mode 100644 index 0000000000..23fbdc8e59 --- /dev/null +++ b/mintlify-docs/assets/img/grouped-content-distribution.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/grouped-topology.svg b/mintlify-docs/assets/img/grouped-topology.svg new file mode 100644 index 0000000000..209d5d10b8 --- /dev/null +++ b/mintlify-docs/assets/img/grouped-topology.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/health-api.svg b/mintlify-docs/assets/img/health-api.svg new file mode 100644 index 0000000000..cd9a8d498b --- /dev/null +++ b/mintlify-docs/assets/img/health-api.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/ide.gif b/mintlify-docs/assets/img/ide.gif new file mode 100644 index 0000000000..ca3520635e Binary files /dev/null and b/mintlify-docs/assets/img/ide.gif differ diff --git a/mintlify-docs/assets/img/index-bootstrap.svg b/mintlify-docs/assets/img/index-bootstrap.svg new file mode 100644 index 0000000000..0f0c2198fa --- /dev/null +++ b/mintlify-docs/assets/img/index-bootstrap.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/inheritance-overview.svg b/mintlify-docs/assets/img/inheritance-overview.svg new file mode 100644 index 0000000000..a3bebf544b --- /dev/null +++ b/mintlify-docs/assets/img/inheritance-overview.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/instances-zones.svg b/mintlify-docs/assets/img/instances-zones.svg new file mode 100644 index 0000000000..279e637590 --- /dev/null +++ b/mintlify-docs/assets/img/instances-zones.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/jvm-dump.png b/mintlify-docs/assets/img/jvm-dump.png new file mode 100644 index 0000000000..0bc54ee579 Binary files /dev/null and b/mintlify-docs/assets/img/jvm-dump.png differ diff --git a/mintlify-docs/assets/img/latency-documents.svg b/mintlify-docs/assets/img/latency-documents.svg new file mode 100644 index 0000000000..d05201c06a --- /dev/null +++ b/mintlify-docs/assets/img/latency-documents.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/latency-rank-profile.png b/mintlify-docs/assets/img/latency-rank-profile.png new file mode 100644 index 0000000000..7c96af4077 Binary files /dev/null and b/mintlify-docs/assets/img/latency-rank-profile.png differ diff --git a/mintlify-docs/assets/img/latency-total.png b/mintlify-docs/assets/img/latency-total.png new file mode 100644 index 0000000000..d55b0dfb7a Binary files /dev/null and b/mintlify-docs/assets/img/latency-total.png differ diff --git a/mintlify-docs/assets/img/llm-rag-searcher.svg b/mintlify-docs/assets/img/llm-rag-searcher.svg new file mode 100644 index 0000000000..2d978b9a3c --- /dev/null +++ b/mintlify-docs/assets/img/llm-rag-searcher.svg @@ -0,0 +1,21 @@ + + + eyJ2ZXJzaW9uIjoiMSIsImVuY29kaW5nIjoiYnN0cmluZyIsImNvbXByZXNzZWQiOnRydWUsImVuY29kZWQiOiJ4nO1cXGlTXHUwMDFiR1x1MDAxM/7uX0GRr9Fk7iPfQMKYXHUwMDE4MCDuN6nUXCKtxKLTq1x1MDAxNVfK//3tWVx1MDAxZHvrwIJcYsdyXHUwMDE1hj1GvdP9dD/d07P/fNjY2Fxmnvru5u9cdTAwMWKb7mPNaXt133nY/NVcdTAwMWW/d/2B1+vCKVx1MDAxYf496Fxy/Vp45W1cdTAwMTD0XHUwMDA3v//2W3RcdTAwMDeq9Tqju9y223G7wVx1MDAwMK77XHUwMDFm/L2x8U/4XHUwMDEzznh1e+9OufHQXHUwMDBl2Fx1MDAwMW57lafbc3+79vn2S3hreNFEXHUwMDE4361cdTAwMDVOt9l2o1OPcJxgRVx1MDAxMCVcXHOmxPTMXHUwMDEznGFGIElcdTAwMTjVhEYnXHUwMDFlvHpwa1x1MDAxZoBcdTAwMWKEqWL2Lmw/ZnrJres1b1x1MDAwMzu0VoiyxLgjXHUwMDAxft/A0yODwO+13HKv3fOtlL9cdTAwMTCjSI1GMt44tVbT71xyu/XpNYHvdFx1MDAwN33Hh0mJrmt47XY1eFxuR4eJhUncTH3HxUT01PGiu+BLm7ddd2AnnkyP9vpOzVx1MDAwYp7CmYuewkrY36uHOvorksl3Ou6eVVJ32G7HXHUwMDA37tbHXHUwMDAzT3RcdTAwMTkpio2PfIvEdF07XHUwMDA2JVRJKTDX0zORQTHG0kdcdTAwMGZ73dC4XGLRwjBqRPTg3qBcdTAwMDJWXHUwMDE1hMM2nPbAjabbyraTtri41cUsr3RqTr/q6n7l5oz5u5eVvath72o6gVx063N8v/ewOT3z7ddZ45b3evVLXFxyxPl+7fmyfF/+eF1ur2Dcw6ct51Bis+3f75zW/r5cdTAwMTLDysHZYuOOf4t0O+zXndFcdTAwMDRcdTAwMTJFKMNKM21iQGl73VZa8e1erVx1MDAxNc35h5jAKUjr08+1zs7pXHUwMDE1+3S8U7tg973K1+1SXHUwMDE20oH7XHUwMDE4pNBMKEaKakGEXHQ/Klx1MDAwMWmOKaJcXGFqMoAmXHUwMDAy5yDYILpcZnZd++8/g93EiTFIXHUwMDAxbJiBT5U4XHUwMDA3pDRmXHUwMDFmXHUwMDE5kFxuSbBcdTAwMTGcv1x1MDAwMKRcdEG+3zAjO7P2XHUwMDA1k7u/f/DbydbuRtV1/Nqt68c01+tcdTAwMDZV7zl8XHUwMDAymTj60el47TCMJIbbantNO1x1MDAxNZs1kD0+XHUwMDEyzEfgQfCbXlx1MDAxMPT60dlcdTAwMWGM6Hhd189qpOd7Ta/rtE/nXG5cdTAwMGJP7n6amjaiM1x1MDAwMLjPe48wT499eVx1MDAxZPj77mD3snq0/4KYXHUwMDFhadNOhVx1MDAxNFx1MDAwMlx1MDAxOVx1MDAwM1x1MDAwZYjNXG6oY+BGt07hqFxmXCJMKoirPLwmmvGVxNVfbmiD3txcdTAwMTTg8tap3Vx1MDAwZX33XSBziahKiFx1MDAxMlorKmRcdTAwMWGa1l+SzNEpYDlIXG7hWL9cdTAwMDSwM6MqzGfpRlaPsX64K3nnjd3PZ2W2guj3vLXfrFx1MDAwNLx+zImh7GP5srz/pbyy6CfgJ49D7eXR77x77bh3oHn1pXVu1GG9h+9ai0Q/cHNcYpwvZWSEXHUwMDBmk0BcdTAwMWbnXHUwMDEyXHRcbjxXZNFcdTAwMDendIym/lxmgC9cdTAwMGWAnDNcbtYg80gqJcUklXGOwf+RXHUwMDE3kdRV0rJM9DtcdTAwMWW6/tNah7yUhEvEudmMOOVcdTAwMGaSWNNcdTAwMWFcdTAwMTmpmNA0XHUwMDA0W6TvXHUwMDEwa0JcIkUgWWFUMslcco1malx1MDAwMjkpXHUwMDEw4Vx1MDAxNFx1MDAxM1x1MDAxNaaPOKb4KfowRETKJMNcdTAwMTTMlYF5sFx1MDAxNUe8XHUwMDFmXHSM2ZhHi2JcdTAwMWXgXHUwMDEwXHUwMDBiLuJcdTAwMGU7XHUwMDE281x1MDAxOE1cdTAwMWadgFQrZiD9VC/B6Fx1MDAwMlx1MDAxY1XGLXhRlPZ7XjqmRr9tROZcdTAwMTH+Mf39r19zry60Sfsp5ZhjNGAmRLadQVDudTpeXHUwMDAwT3pkpcx4zMDxg22vW/e6zfQ5t1uPzsRUOy5cdTAwMDbtLVD5XHS9Um04XGKBxLgyWFx1MDAwMmVcdTAwMTTaaFx1MDAwNd42dlXT6YeaQFx1MDAwNKxC6nHsJFwiaz5W4C3rXHUwMDBibl0nY4wgcvxcXJxEpJ3PxK9YT7WZY4NaY1JkhJSCsdDFjHDk5Lb9yqDuNvbvby9p9eFQdVx1MDAwZnePXHUwMDBm1oflZj1ZmENwpYDMSDA0oVx1MDAxOaT0Sf9KOVx1MDAxMoJcdTAwMWLIXHUwMDE5JTGYSpFcdTAwMTJsOeLfaNS1o6NrpkVcdTAwMDFCkJBg6JpcdTAwMGJhjSOHIXGGjFx1MDAwNiG0gvRcdTAwMDbzmF7G3lx1MDAwNrRcdTAwMDVcdTAwMTiPU+m4j4vMcHjeOjih10Q8XHUwMDAzU9slftU7UFx1MDAxZuPWTFx1MDAxZjVxgpvLRrVyaahzfnR5c3dcdTAwMTe/oFa93j0/+WS8L41P1YvjSvu0WmdcdTAwMTNcdTAwMWG7cme6NFx1MDAwYnmRf1vYSSRcdTAwMDdbzPksiuXEuZV6XFzwTFJpXHRcdTAwMTYuqJRcblx1MDAxOGD8doI10pRcdTAwMDHXXGKLI0KS+eOB96ZEMCWZUlKyyGbDXHUwMDAxqU3MXHUwMDE5XHUwMDAxolx03ERwquZcciiQXHUwMDAy2zdcdTAwMDaCXHUwMDAxXHUwMDA1ooR5cjhIKeBLKCT8QHtcdTAwMDWm84aDXGZFXHUwMDFhoLqGXHUwMDE4XHRcdTAwMTmkMMnnZVx1MDAwNGEjtVx1MDAwMMhTgbGWc8UziFx1MDAxM3hcdTAwMTDguJJcdCVSw1EksFx1MDAxMsDTwOtcdTAwMGI9bzBDkKTGXGKljCaCJFx1MDAxZpVhRFx1MDAwNVx1MDAxY6RcdTAwMTBcdTAwMWbDKZw3mlWe4lKB75JcdTAwMTi0kVJcdTAwMDRHoHZcdTAwMGVKMoTAXGbPVYT1ReBcdTAwMGXhJsi74Y/kaGBHVFx1MDAwMcg4kE2FXHJbZDRcdTAwMWLoiDZcZsNcdTAwMGaRfFhcdTAwMDLjXHRqrHxUXHUwMDFiOV+vdjzKmMaMKVx1MDAwM/lnUj6DXGI1ikKOzIXV/Vx1MDAwMuJhJGE4w4R1XHUwMDFlkLGnmFxiZ5AuKUVswddcdTAwMThl5iuDIa6whJu4XHUwMDE0XG7UmFx1MDAxOFx1MDAxMKxcXFx1MDAxOVxuolx1MDAwMVx1MDAwNkH6+eNcdTAwMDH7Z1rC81x1MDAxONBcdTAwMDaImbC7XHUwMDEyoFZxXHUwMDAzQFx1MDAwNNVzPl+3glx1MDAwM2wtqaJcdTAwMTRcdTAwMWWM68TklayqwEdgiGRCgkGRuV5cdTAwMDBgye3EQezi3CRBUTJcYlxcMehcdTAwMWMgXHUwMDAx8VXM1Vx1MDAwNYe54eCAJFx1MDAwMU1ApFx1MDAxNYnROJJcdTAwMDLsXHUwMDA0xlx1MDAwMdJcdTAwMDKaYGLeeJZHQlgnRlBcdTAwMThcdTAwMTbCatr6wlx1MDAwYux4zIROgIHvm+v3XG687of4/0U0zG23vf4gl4lcdTAwMTlwb4U5O1x1MDAxMGBcZmqOcZO5VGyvNTzaMmKbnDXumrvth37/rPlcdTAwMGWomERcdTAwMTY5XHUwMDEwnoD8gykmuVx1MDAxONinXHUwMDBlXHUwMDE3VbCx3oSlXHUwMDA0W1x1MDAxNVx1MDAxNcPwLWD9XHUwMDE0XFwzt3JkuVx1MDAxOKVcdTAwMTBcZiBcblx1MDAxMVx1MDAwMlk5aEZFiplUO4XlaeDdf5KxJcjYTPCkaiPxJKYwh7GgXHUwMDExhEQhcS5wZi+1/ss1i1x1MDAwMtgwJsDVMsgkIV2zZcEkbMDpp7pcdTAwMGVWVcGZYlx1MDAwNthcdTAwMTZ8XHKQNyOBXHUwMDFmQWTLYlx1MDAwNqchXCJcYjBcdTAwMTbgVPlcYnlfhluURXx3QVx1MDAwMUJcdTAwMTSDOFxmMdZImFtcdTAwMDZcXCVG7yZcdTAwMTVcdTAwMDXgulx1MDAxYUiAUqH6pck+/ZtlMGmYrjSJKbaz1O0zo/FcYujBxZPr7vtHjmjdt/Zl0/273tpZaOlcdTAwMWaCMdJcZlx1MDAxYs6SfTyQ5Fx1MDAwMDm261x1MDAxZePKTrZcdTAwMTYrJKJvsvrxiyPqutF4K8dcdTAwMTXJ+lx1MDAwNitcdTAwMWZcdTAwMDJ0XHUwMDBmiaXJlE9DXHUwMDAwZ/pcdTAwMDEmsVx1MDAwMPgspGhcXEdcdTAwMTescOHjRc5isvAxWkFf65WPtIhcdTAwMGItfcyN3FJcdTAwMTVyXlxybFx1MDAwZlg7X5zyzm5mWs/IzVx1MDAwNUNcdTAwMTC9TbolXHUwMDEwXHUwMDEyWCChgkDyXHSpM46yvtWHbk5cdTAwMTHB4ErVqNdBR19VXHUwMDFjuinhmFMsYutcdTAwMWE/Xuxefpngc6c9PDnYVqfmuMNJ7Y/2M9vheVGdXCJcdTAwMDPGTSSkuZCFWzW7pVjhYVx1MDAxY9YhKFx1MDAxOFwiIdVcclx1MDAxYj/hosy8vKfYXSq2tNT9XHUwMDBiXHUwMDA07+qWa1x1MDAwNi1f4bvTvcOdXHUwMDAwu4/HvYU6XHUwMDE3XG6Dt2BcdTAwMThcdMg4bSEonO6fsXt8dLVte1pKXCKl1Hlte4Rk1kmnwTtcXEldr1x1MDAxNdFJ+D5xXHUwMDA3w3aw1uE7LeJcdTAwMTKdXHUwMDBis53a3Fx1MDAwZT2KJVwiRrB4b3tIlrVcdTAwMTlHuFxm0IxdXG6gJLdPKNbu8l5cdTAwMWJcdTAwMTNeXHJvyzTjYcy5hCCU15ggMi1F03JcbldYyJehsKBcdTAwMTUv11XHzG8ruNmulXvXX1x1MDAwZrelfKhcXO1cdTAwMWP3r7r/dn/7d3RcdTAwMGVOgttcblx1MDAxZNJcZvDmz95cInGS2lx1MDAwNTzBXGaWKeBcdTAwMDLpQzJcdTAwMWa4UiCZXHUwMDE3XCKZRnxcdTAwMTnYrmVz35uGScq5UUBF8lLcYnhcdTAwMTJOtFGJXHUwMDE221VFSWNcdTAwMTlcdTAwMTNdwigzUbLsuzDkxp/dvt/r9N8mXFx2vHo9XHUwMDFlkZJcdTAwMTFzXmhLXHUwMDA30fFcdTAwMTOk5V9NKsx18ZZcdTAwMDWsNeFqmSp2w/dcdTAwMWUq5vrv1uevXHUwMDFmr2//IKePd9WLNc+FwcJcdTAwMTDHWtmVR6ljpj/pcySUXHUwMDE4u1x1MDAwZcosTX+9Yja1K8xYc0q1bXDWObXsXHUwMDFj1zhcdTAwMDEupoLyRIj8L2XG77ayXFyodPspRfqOxvlcdTAwMTD/v1xi7snAXHUwMDFh20WIVfroNNOxy4mSykW2O4zAXjnvbH08NDXhNzvNo6vqwWP5gq452GGKkVx1MDAxNkxcdTAwMTA5qpUnu+5cdTAwMDD+yNiGXHUwMDA2Olx1MDAwM+YvZFx00yyDoblcdTAwMWJcdTAwMTGmbFx1MDAxOfRFhKQrKHclTrxccnqXXHUwMDBlsJuj3V1cdTAwMWLltpeYujfbV5b55iXy1dncfFanPbFbw2yHpdRcZoyPqmT7gZBcdTAwMDTZjljbXHUwMDEwZD80Y1I5XHUwMDFiO5VBTGKj+bjmxWZcdTAwMDDjfaSvb7vL01DMXHUwMDE509ntYVZ6Wtwxo0FLKlGg/vfrRavt54xfnbGybJhaNiqvsNqNXHUwMDExsDpNbbE7bOrRRMeuXHUwMDFh1bpcdTAwMTkyhlx1MDAxMTz+QFx1MDAxMM42xS9Ugp+9nzQhXHUwMDE0Z1xcXHS7uEKIpFx1MDAwMP2MTIQjXHUwMDA1vkDx8ZtcdTAwMTeoyci08kb9nDJavD5aaPCQ83Eq481cdTAwMTTzWEN/5+aPfXKwy0/M48ndXHUwMDFlcVx1MDAxYpJU1iWZLkxcdTAwMTEkooZcdTAwMWJKRoaeanSZlFx1MDAxNFl6V+KqU4RcdTAwMDUrlFx1MDAxMydcdTAwMDZcdFx1MDAwMVx1MDAxMFx1MDAxZarfqMa3ospbuS0ubzq+X74vuc1B475aXHUwMDFh4OFcdTAwMWLsgZ057sWwe893K89cdTAwMTdn/mPzobUtatzZX2zcjCd85X61XCLuP1x1MDAwM8VcdTAwMDRMXHUwMDFh3E280jJ31TtXS+tccmPbw80s+Vx1MDAxZjfKJGE8LTBOYDyjz/M7c4CFXG6W0yTAWFq4kn6195JcdTAwMDRUXHUwMDAz33U6XHUwMDFif3b91PrRSstwc1wiUaZcdTAwMTVlJFRapCVShNnOaWaKQDhBRmNb4UmWxcEyXHUwMDEwwJeS7Fx1MDAxYl/y0lx1MDAwMm2XwimMXHUwMDA2eVx1MDAwNlx1MDAxNbFM42dasEhaYOvcwClx3ipycX1cdTAwMWN8XG62W5XWaVx1MDAxMfn1koJSxsjeXCIrWJiAh3trXGJoQ0PCbbeHkNh+jzRcdTAwMDNcdTAwMWZcdTAwMTFwPN1ZtWRWMNvDJFNcdTAwMTUupOCQq1x1MDAxMIFcdFNEZttthX21k93bK1x1MDAwNERrirNCrSwtWIT0zPRXWDOUdlXCvrqKpXzYlNoqXHUwMDE0vvgmJyrOius/nVTOXHUwMDBiOihcdTAwMTNcdTAwMDVtqtmX4ERcdTAwMTSQY1x0XHUwMDA2v1Am9y6dVCljYqnbXs85LeNcdTAwMDdcZpFYKS2VtE1cdTAwMGYkZ1x1MDAxZr/dXHUwMDA0ZKJcXDSbiC3km1o3J2Lr3DVP22dbJe/u40FNuUdcdTAwMDVcdTAwMGVcdTAwMTNToVx1MDAwNVhcdTAwMDdXIJ20S1x1MDAxNVx1MDAxOalcZmJcZjPDx2V18+rOabb8M19cIoSBfVPwROmOu9BJ2Vx1MDAxY2HUj5fxUW/WifejuyiKXHUwMDA1zLNRuS9cdTAwMTFihbuq4HrNIPC9qJN+arBrn91n0qFcdTAwMWa70e/u/uzQP1RcdTAwMDfdTvf08vaxY55FNVhcdTAwMDTIwJTANUHaLkiqx09cdTAwMTkkLdFcYrfXwieajXlcdTAwMTn4K3TV1tw6rztcdTAwMDU4fk8v3ctDsq1SQ5SieUBcdTAwMTYmfXBKNsD5wo3kRS9cdTAwMDN77bbaMlh90vGuXHUwMDFm3DIy5uPtw9gxbTr9fjWAiZrygc17z33Yzt2ebD/WwYVotdbqhlx1MDAwNeBvXHUwMDFmvv1cdTAwMWa1dtFeIn0= + + + + + LLM/RAG SearcherQuerySearchResultCreate promptLLM ClientStream resultResultContent \ No newline at end of file diff --git a/mintlify-docs/assets/img/load.png b/mintlify-docs/assets/img/load.png new file mode 100644 index 0000000000..88299ca525 Binary files /dev/null and b/mintlify-docs/assets/img/load.png differ diff --git a/mintlify-docs/assets/img/lose-node-move-buckets.svg b/mintlify-docs/assets/img/lose-node-move-buckets.svg new file mode 100644 index 0000000000..6f643e6231 --- /dev/null +++ b/mintlify-docs/assets/img/lose-node-move-buckets.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/manage-users.png b/mintlify-docs/assets/img/manage-users.png new file mode 100644 index 0000000000..c1ed9d0c60 Binary files /dev/null and b/mintlify-docs/assets/img/manage-users.png differ diff --git a/mintlify-docs/assets/img/memory-visualizer-1.png b/mintlify-docs/assets/img/memory-visualizer-1.png new file mode 100644 index 0000000000..0ddd21f65d Binary files /dev/null and b/mintlify-docs/assets/img/memory-visualizer-1.png differ diff --git a/mintlify-docs/assets/img/metrics-api.svg b/mintlify-docs/assets/img/metrics-api.svg new file mode 100644 index 0000000000..cbf09fbc36 --- /dev/null +++ b/mintlify-docs/assets/img/metrics-api.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/monitoring-annotations-example.png b/mintlify-docs/assets/img/monitoring-annotations-example.png new file mode 100644 index 0000000000..5fbb473965 Binary files /dev/null and b/mintlify-docs/assets/img/monitoring-annotations-example.png differ diff --git a/mintlify-docs/assets/img/monitoring-container-thread-pools.png b/mintlify-docs/assets/img/monitoring-container-thread-pools.png new file mode 100644 index 0000000000..6066cc495c Binary files /dev/null and b/mintlify-docs/assets/img/monitoring-container-thread-pools.png differ diff --git a/mintlify-docs/assets/img/monitoring-dashboard-tabs.png b/mintlify-docs/assets/img/monitoring-dashboard-tabs.png new file mode 100644 index 0000000000..2bcd50674c Binary files /dev/null and b/mintlify-docs/assets/img/monitoring-dashboard-tabs.png differ diff --git a/mintlify-docs/assets/img/monitoring-getting-started.svg b/mintlify-docs/assets/img/monitoring-getting-started.svg new file mode 100644 index 0000000000..ec65fb28b9 --- /dev/null +++ b/mintlify-docs/assets/img/monitoring-getting-started.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/monitoring-health-indicators.png b/mintlify-docs/assets/img/monitoring-health-indicators.png new file mode 100644 index 0000000000..5186c9f67e Binary files /dev/null and b/mintlify-docs/assets/img/monitoring-health-indicators.png differ diff --git a/mintlify-docs/assets/img/monitoring-jvm-memory.png b/mintlify-docs/assets/img/monitoring-jvm-memory.png new file mode 100644 index 0000000000..1390207d0e Binary files /dev/null and b/mintlify-docs/assets/img/monitoring-jvm-memory.png differ diff --git a/mintlify-docs/assets/img/monitoring-rank-profile-rows.png b/mintlify-docs/assets/img/monitoring-rank-profile-rows.png new file mode 100644 index 0000000000..93bd1e0cdf Binary files /dev/null and b/mintlify-docs/assets/img/monitoring-rank-profile-rows.png differ diff --git a/mintlify-docs/assets/img/nodes.svg b/mintlify-docs/assets/img/nodes.svg new file mode 100644 index 0000000000..c0febd35da --- /dev/null +++ b/mintlify-docs/assets/img/nodes.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/overall-architecture.png b/mintlify-docs/assets/img/overall-architecture.png new file mode 100644 index 0000000000..0bae54dc67 Binary files /dev/null and b/mintlify-docs/assets/img/overall-architecture.png differ diff --git a/mintlify-docs/assets/img/parent-child.svg b/mintlify-docs/assets/img/parent-child.svg new file mode 100644 index 0000000000..4d379dde2d --- /dev/null +++ b/mintlify-docs/assets/img/parent-child.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/phased-ranking-rag.png b/mintlify-docs/assets/img/phased-ranking-rag.png new file mode 100644 index 0000000000..cf72a5ccd2 Binary files /dev/null and b/mintlify-docs/assets/img/phased-ranking-rag.png differ diff --git a/mintlify-docs/assets/img/phased-ranking.png b/mintlify-docs/assets/img/phased-ranking.png new file mode 100644 index 0000000000..72bee90c1e Binary files /dev/null and b/mintlify-docs/assets/img/phased-ranking.png differ diff --git a/mintlify-docs/assets/img/pin-version.png b/mintlify-docs/assets/img/pin-version.png new file mode 100644 index 0000000000..5267da3226 Binary files /dev/null and b/mintlify-docs/assets/img/pin-version.png differ diff --git a/mintlify-docs/assets/img/pipeline-1.png b/mintlify-docs/assets/img/pipeline-1.png new file mode 100644 index 0000000000..286c143075 Binary files /dev/null and b/mintlify-docs/assets/img/pipeline-1.png differ diff --git a/mintlify-docs/assets/img/pipeline-2.png b/mintlify-docs/assets/img/pipeline-2.png new file mode 100644 index 0000000000..15d8458b7c Binary files /dev/null and b/mintlify-docs/assets/img/pipeline-2.png differ diff --git a/mintlify-docs/assets/img/pipeline-3.png b/mintlify-docs/assets/img/pipeline-3.png new file mode 100644 index 0000000000..a202c29720 Binary files /dev/null and b/mintlify-docs/assets/img/pipeline-3.png differ diff --git a/mintlify-docs/assets/img/prodapp.png b/mintlify-docs/assets/img/prodapp.png new file mode 100644 index 0000000000..19a28fcf85 Binary files /dev/null and b/mintlify-docs/assets/img/prodapp.png differ diff --git a/mintlify-docs/assets/img/proton-databases.svg b/mintlify-docs/assets/img/proton-databases.svg new file mode 100644 index 0000000000..a7398a6862 --- /dev/null +++ b/mintlify-docs/assets/img/proton-databases.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/proton-feed.svg b/mintlify-docs/assets/img/proton-feed.svg new file mode 100644 index 0000000000..4c48980b7e --- /dev/null +++ b/mintlify-docs/assets/img/proton-feed.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/proton-query.svg b/mintlify-docs/assets/img/proton-query.svg new file mode 100644 index 0000000000..ec3819789b --- /dev/null +++ b/mintlify-docs/assets/img/proton-query.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/query-dispatch.svg b/mintlify-docs/assets/img/query-dispatch.svg new file mode 100644 index 0000000000..645603f83a --- /dev/null +++ b/mintlify-docs/assets/img/query-dispatch.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/query-groups.svg b/mintlify-docs/assets/img/query-groups.svg new file mode 100644 index 0000000000..1406c36fa3 --- /dev/null +++ b/mintlify-docs/assets/img/query-groups.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/query-to-response.svg b/mintlify-docs/assets/img/query-to-response.svg new file mode 100644 index 0000000000..390675f334 --- /dev/null +++ b/mintlify-docs/assets/img/query-to-response.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/querytree.svg b/mintlify-docs/assets/img/querytree.svg new file mode 100644 index 0000000000..7737d09bde --- /dev/null +++ b/mintlify-docs/assets/img/querytree.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/reindex-progress.png b/mintlify-docs/assets/img/reindex-progress.png new file mode 100644 index 0000000000..190689e3fe Binary files /dev/null and b/mintlify-docs/assets/img/reindex-progress.png differ diff --git a/mintlify-docs/assets/img/relevance/blog-freshness.png b/mintlify-docs/assets/img/relevance/blog-freshness.png new file mode 100644 index 0000000000..03f8f567a1 Binary files /dev/null and b/mintlify-docs/assets/img/relevance/blog-freshness.png differ diff --git a/mintlify-docs/assets/img/relevance/closeness-logscale.png b/mintlify-docs/assets/img/relevance/closeness-logscale.png new file mode 100644 index 0000000000..77048438a3 Binary files /dev/null and b/mintlify-docs/assets/img/relevance/closeness-logscale.png differ diff --git a/mintlify-docs/assets/img/relevance/freshness-logscale.png b/mintlify-docs/assets/img/relevance/freshness-logscale.png new file mode 100644 index 0000000000..7cd8e46c19 Binary files /dev/null and b/mintlify-docs/assets/img/relevance/freshness-logscale.png differ diff --git a/mintlify-docs/assets/img/relevance/match-phase-max-hits.png b/mintlify-docs/assets/img/relevance/match-phase-max-hits.png new file mode 100644 index 0000000000..018a10db6f Binary files /dev/null and b/mintlify-docs/assets/img/relevance/match-phase-max-hits.png differ diff --git a/mintlify-docs/assets/img/relevance/plot-firstocc-tune.png b/mintlify-docs/assets/img/relevance/plot-firstocc-tune.png new file mode 100644 index 0000000000..63feaf5a11 Binary files /dev/null and b/mintlify-docs/assets/img/relevance/plot-firstocc-tune.png differ diff --git a/mintlify-docs/assets/img/relevance/plot-firstocc-weight.png b/mintlify-docs/assets/img/relevance/plot-firstocc-weight.png new file mode 100644 index 0000000000..fd9ddf3cc7 Binary files /dev/null and b/mintlify-docs/assets/img/relevance/plot-firstocc-weight.png differ diff --git a/mintlify-docs/assets/img/relevance/plot-numocc-tune.png b/mintlify-docs/assets/img/relevance/plot-numocc-tune.png new file mode 100644 index 0000000000..4f862ed70e Binary files /dev/null and b/mintlify-docs/assets/img/relevance/plot-numocc-tune.png differ diff --git a/mintlify-docs/assets/img/relevance/plot-numocc-weight.png b/mintlify-docs/assets/img/relevance/plot-numocc-weight.png new file mode 100644 index 0000000000..417355b56d Binary files /dev/null and b/mintlify-docs/assets/img/relevance/plot-numocc-weight.png differ diff --git a/mintlify-docs/assets/img/relevance/plot-proximity-tune.png b/mintlify-docs/assets/img/relevance/plot-proximity-tune.png new file mode 100644 index 0000000000..22c1567b5d Binary files /dev/null and b/mintlify-docs/assets/img/relevance/plot-proximity-tune.png differ diff --git a/mintlify-docs/assets/img/relevance/plot-proximity-weight.png b/mintlify-docs/assets/img/relevance/plot-proximity-weight.png new file mode 100644 index 0000000000..ef3e966c32 Binary files /dev/null and b/mintlify-docs/assets/img/relevance/plot-proximity-weight.png differ diff --git a/mintlify-docs/assets/img/relevance/ranktype-about.png b/mintlify-docs/assets/img/relevance/ranktype-about.png new file mode 100644 index 0000000000..59dc03ea52 Binary files /dev/null and b/mintlify-docs/assets/img/relevance/ranktype-about.png differ diff --git a/mintlify-docs/assets/img/relevance/ranktype-tags.png b/mintlify-docs/assets/img/relevance/ranktype-tags.png new file mode 100644 index 0000000000..a5ce1e2e6b Binary files /dev/null and b/mintlify-docs/assets/img/relevance/ranktype-tags.png differ diff --git a/mintlify-docs/assets/img/relevance/segment-example.png b/mintlify-docs/assets/img/relevance/segment-example.png new file mode 100644 index 0000000000..2cff8d687b Binary files /dev/null and b/mintlify-docs/assets/img/relevance/segment-example.png differ diff --git a/mintlify-docs/assets/img/resource-suggestions-1.png b/mintlify-docs/assets/img/resource-suggestions-1.png new file mode 100644 index 0000000000..d17ba7abe1 Binary files /dev/null and b/mintlify-docs/assets/img/resource-suggestions-1.png differ diff --git a/mintlify-docs/assets/img/retrieval-ranking.svg b/mintlify-docs/assets/img/retrieval-ranking.svg new file mode 100644 index 0000000000..4dea18de15 --- /dev/null +++ b/mintlify-docs/assets/img/retrieval-ranking.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/routing.svg b/mintlify-docs/assets/img/routing.svg new file mode 100644 index 0000000000..6c595cfd6c --- /dev/null +++ b/mintlify-docs/assets/img/routing.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/rpms.svg b/mintlify-docs/assets/img/rpms.svg new file mode 100644 index 0000000000..6cc042c9e0 --- /dev/null +++ b/mintlify-docs/assets/img/rpms.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/schemas-and-content-clusters-multiple-proton.svg b/mintlify-docs/assets/img/schemas-and-content-clusters-multiple-proton.svg new file mode 100644 index 0000000000..123c991d77 --- /dev/null +++ b/mintlify-docs/assets/img/schemas-and-content-clusters-multiple-proton.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/schemas-and-content-clusters.svg b/mintlify-docs/assets/img/schemas-and-content-clusters.svg new file mode 100644 index 0000000000..633356233b --- /dev/null +++ b/mintlify-docs/assets/img/schemas-and-content-clusters.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/secret-store-secret.png b/mintlify-docs/assets/img/secret-store-secret.png new file mode 100644 index 0000000000..79f17de442 Binary files /dev/null and b/mintlify-docs/assets/img/secret-store-secret.png differ diff --git a/mintlify-docs/assets/img/secret-store.png b/mintlify-docs/assets/img/secret-store.png new file mode 100644 index 0000000000..5fc3c2ed3f Binary files /dev/null and b/mintlify-docs/assets/img/secret-store.png differ diff --git a/mintlify-docs/assets/img/service-isolation.png b/mintlify-docs/assets/img/service-isolation.png new file mode 100644 index 0000000000..9d91956f23 Binary files /dev/null and b/mintlify-docs/assets/img/service-isolation.png differ diff --git a/mintlify-docs/assets/img/shopping-1.png b/mintlify-docs/assets/img/shopping-1.png new file mode 100644 index 0000000000..d28e02c885 Binary files /dev/null and b/mintlify-docs/assets/img/shopping-1.png differ diff --git a/mintlify-docs/assets/img/skip-tests.png b/mintlify-docs/assets/img/skip-tests.png new file mode 100644 index 0000000000..3fae9b622a Binary files /dev/null and b/mintlify-docs/assets/img/skip-tests.png differ diff --git a/mintlify-docs/assets/img/support-dev.png b/mintlify-docs/assets/img/support-dev.png new file mode 100644 index 0000000000..39d0564941 Binary files /dev/null and b/mintlify-docs/assets/img/support-dev.png differ diff --git a/mintlify-docs/assets/img/support-prod.png b/mintlify-docs/assets/img/support-prod.png new file mode 100644 index 0000000000..85d79ede6d Binary files /dev/null and b/mintlify-docs/assets/img/support-prod.png differ diff --git a/mintlify-docs/assets/img/tenants-apps-instances.svg b/mintlify-docs/assets/img/tenants-apps-instances.svg new file mode 100644 index 0000000000..3803ba59c5 --- /dev/null +++ b/mintlify-docs/assets/img/tenants-apps-instances.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/tensor-guide.png b/mintlify-docs/assets/img/tensor-guide.png new file mode 100644 index 0000000000..6f69361507 Binary files /dev/null and b/mintlify-docs/assets/img/tensor-guide.png differ diff --git a/mintlify-docs/assets/img/tensor-mapped.png b/mintlify-docs/assets/img/tensor-mapped.png new file mode 100644 index 0000000000..9d31d12aa6 Binary files /dev/null and b/mintlify-docs/assets/img/tensor-mapped.png differ diff --git a/mintlify-docs/assets/img/tutorials/bm25_dotP_scatter.png b/mintlify-docs/assets/img/tutorials/bm25_dotP_scatter.png new file mode 100644 index 0000000000..da9d5db585 Binary files /dev/null and b/mintlify-docs/assets/img/tutorials/bm25_dotP_scatter.png differ diff --git a/mintlify-docs/assets/img/tutorials/bm25_hist.png b/mintlify-docs/assets/img/tutorials/bm25_hist.png new file mode 100644 index 0000000000..0a370bfde6 Binary files /dev/null and b/mintlify-docs/assets/img/tutorials/bm25_hist.png differ diff --git a/mintlify-docs/assets/img/tutorials/dotP_hist.png b/mintlify-docs/assets/img/tutorials/dotP_hist.png new file mode 100644 index 0000000000..8160eacb12 Binary files /dev/null and b/mintlify-docs/assets/img/tutorials/dotP_hist.png differ diff --git a/mintlify-docs/assets/img/tutorials/embeddings.png b/mintlify-docs/assets/img/tutorials/embeddings.png new file mode 100644 index 0000000000..f882541f35 Binary files /dev/null and b/mintlify-docs/assets/img/tutorials/embeddings.png differ diff --git a/mintlify-docs/assets/img/tutorials/mf.png b/mintlify-docs/assets/img/tutorials/mf.png new file mode 100644 index 0000000000..10fe24c646 Binary files /dev/null and b/mintlify-docs/assets/img/tutorials/mf.png differ diff --git a/mintlify-docs/assets/img/tutorials/mrr_boxplot.png b/mintlify-docs/assets/img/tutorials/mrr_boxplot.png new file mode 100644 index 0000000000..3041741114 Binary files /dev/null and b/mintlify-docs/assets/img/tutorials/mrr_boxplot.png differ diff --git a/mintlify-docs/assets/img/tutorials/rag-blueprint-overview.svg b/mintlify-docs/assets/img/tutorials/rag-blueprint-overview.svg new file mode 100644 index 0000000000..f6a9d0c7b8 --- /dev/null +++ b/mintlify-docs/assets/img/tutorials/rag-blueprint-overview.svg @@ -0,0 +1,103 @@ + +/* Copyright 2019 The Recursive Project Authors (github.com/arrowtype/recursive) + +This Font Software is licensed under the SIL Open Font License, Version 1.1. +This license is copied below, and is also available with a FAQ at: +http://scripts.sil.org/OFL + + +----------------------------------------------------------- +SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007 +----------------------------------------------------------- + +PREAMBLE +The goals of the Open Font License (OFL) are to stimulate worldwide +development of collaborative font projects, to support the font creation +efforts of academic and linguistic communities, and to provide a free and +open framework in which fonts may be shared and improved in partnership +with others. + +The OFL allows the licensed fonts to be used, studied, modified and +redistributed freely as long as they are not sold by themselves. The +fonts, including any derivative works, can be bundled, embedded, +redistributed and/or sold with any software provided that any reserved +names are not used by derivative works. The fonts and derivatives, +however, cannot be released under any other type of license. The +requirement for fonts to remain under this license does not apply +to any document created using the fonts or their derivatives. + +DEFINITIONS +"Font Software" refers to the set of files released by the Copyright +Holder(s) under this license and clearly marked as such. This may +include source files, build scripts and documentation. + +"Reserved Font Name" refers to any names specified as such after the +copyright statement(s). + +"Original Version" refers to the collection of Font Software components as +distributed by the Copyright Holder(s). + +"Modified Version" refers to any derivative made by adding to, deleting, +or substituting -- in part or in whole -- any of the components of the +Original Version, by changing formats or by porting the Font Software to a +new environment. + +"Author" refers to any designer, engineer, programmer, technical +writer or other person who contributed to the Font Software. + +PERMISSION & CONDITIONS +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Font Software, to use, study, copy, merge, embed, modify, +redistribute, and sell modified and unmodified copies of the Font +Software, subject to the following conditions: + +1) Neither the Font Software nor any of its individual components, +in Original or Modified Versions, may be sold by itself. + +2) Original or Modified Versions of the Font Software may be bundled, +redistributed and/or sold with any software, provided that each copy +contains the above copyright notice and this license. These can be +included either as stand-alone text files, human-readable headers or +in the appropriate machine-readable metadata fields within text or +binary files as long as those fields can be easily viewed by the user. + +3) No Modified Version of the Font Software may use the Reserved Font +Name(s) unless explicit written permission is granted by the corresponding +Copyright Holder. This restriction only applies to the primary font name as +presented to the users. + +4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font +Software shall not be used to promote, endorse or advertise any +Modified Version, except to acknowledge the contribution(s) of the +Copyright Holder(s) and the Author(s) or with their explicit written +permission. + +5) The Font Software, modified or unmodified, in part or in whole, +must be distributed entirely under this license, and must not be +distributed under any other license. The requirement for fonts to +remain under this license does not apply to any document created +using the Font Software. + +TERMINATION +This license becomes null and void if any of the above conditions are +not met. + +DISCLAIMER +THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE +COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL +DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM +OTHER DEALINGS IN THE FONT SOFTWARE. + */ +@font-face { + font-family: 'Recursive Variable'; + font-style: normal; + font-display: swap; + font-weight: 300 1000; + src: url("data:font/woff2;base64,d09GMgABAAAAAPcIABUAAAACJSQAAPaKAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAAGoRdG4GeGhyUaj9IVkFSgw4/TVZBUoEQBmA/U1RBVIFiJzwAhTYvgSgRCAqCuVCCjUsLhR4AMISZWgE2AiQDijQEIAWNdgePWQwHWwsLciPUuFunhKfbEACk2ixr6r9nDdsY6H3rPE+lIgbUNXjTYym3Y+cs/G8s+f//T04mI+62cmurr8/zDwBolAYqhV6iCC9ea1taRa2L9zFaa22tnLWyn3utEZwwYYsTW6ObuTAjhiBIdOWC66Ku+pJCUbGLUPeuHKAogidBrZwJ7htKBdOCaUFSI2+ryeH3awTm6nhkeaSsOY6Erj/XQXFDex2aga7YsWsGagQVF1SInrBleec8pxXl8/3pNyqNKomD0iXlX0ae7cEFn6e/zDwuETSwGPaXJHHfh7iC5aNPEMsocnxRpk/MUCaIIn94F8WY8v2LV4Y4ckRqnGA31VWSWs/+NBiMC29mWELaanLCVVp7Mr4QUd3XR2RV91gLvpkvEEfdIJ0WNcTr5t+9kQkxQAgBGREwAQyIbBUMsmTJWCJimMuBLBlbNzhWdJcqurFILSojgCLYFAQsWmX9fosLcYUQSd4Av9z+7Sp2He9d1bteXuQutl2t+pbFGCWgRAoWFpFmMaK/wnSIjR9MbBCrYPg6/f8zRjKIXfmB+uBm89NJUnwIa2H1zF66t2A44Pnvx2jnvr9qWRSYTvNEtJBVQlNPkcrGHTKhkwkR3sA71U+O7Ti2BciWJdkycBxum9LGB4w6hI/P+yKGvUG7dkAlSBr4/v+QvDPz39tG0oS2W+vYGvB6yvE9oYCFJKh/cK7MM+kR0TGlcG0KSSkFHJCcHtsJNylevh1giejHfvfSN0kfTZosFDGJVLFMJRNK10xi2IE67V/bLtsu+MRvp37g8UM38XBJO0XKNPAkmWCEDJTMTMQKC3/AP19n9YP6hcgNZdJMk28WPu1593ZINQQPPfBt2v9DtE1V1/Sqy+8zmW2PmK1oGlFCgujAKMOIOOIhZP5tav/3DqBk2ZYdWODsI+L2IxW9NYoDH+j0WGLJnM1SwiZJFs7wJZjKWVUu0CAWkIvQrrJMYrIILJNsLqKu2NDS60f83X0THH6BuA9z+p80GgEb4sRJHuEC8/HauL9NF+i4V0A+A0H7ABuwY0unilLbqtr3gQ0xnGEhiMba7L2oImapkaKmxqROc1apQGAKEBbIigsAYx9o2X+CtcAQkKO//vSd/k93zox3HTdNCiooeJHtE6SvZ9hExetPJ06fvxh7fHwcxydJHB8ff0meJE+Sx5NkPB6vFSiK/0cABLi2iT4mFm05Ay0RSuGCZf7X/UenvyVt6lBm9Ol7qfGbt4AxQ3P7L+mV8H8oCSrUDdeb+c0FOPn/v828r/X3oDpCmBVFJmsiSFiaEDlb2qUuw+/Mr2IhKySDR9wBnU4JB4h2e+5Xutf29m9AENALSiFWlyu6tOXdfXtGq7UMLOuHQEZK16QqqcqkqPI//zqtX4kWgZuAwZQ0MC7vCWy/JyD5vZrq6fZWb++Z2SY9CfSkJ2whjCMkcEBgh2ZGSICFAEMfQ3BKdTyt2UnmnJQpZOrTU/FTgUgIXCDJGnCmeJItntZwqh1vycyWVv5a8WzN/mX/Zn/+9nP2t/Tfnz3/+/218P/P0mYz+XO3z+5SuopCOHReonDRqxTTORn+tu5opakIh8VYUDmxuKAUQnFQgv+bWkVVdveue7M35b7djG4A2xABO3gESr9+qVT6Kjm13bbV0eMJ7p7sScElT5DVvSFFFCIKKQNyKT2+d4QcgImwY8fgAXr2tflloWaDW31OclnqGwZawL8Qg/DX9HtD2JCN85H+iX29s5nXsKsr4/DICJPCqj8v+xzJOqAo1LbIeEVFKHqBRRKoJaF5HAhZHwtgk90MB6Fp6QwpehPLAih/ku5u8xRVyqcK4j+xpv/n3HG6mTZjNmMNTQgSmCoiArKLBHx/NZlqkpDA6YcrGMsk069t/v/H1P9jw41p99sxLlysISDS/6E8if/Oxm/gyquKiIr68LiR//2eZP/vr+0liEgIQURkkNlSunX7J9v7+Dmec5IkSVbWGGslp+5987quNvNjjDBOLVPelGw6ZM4KXrQv9rXd7DDC2mEkd62WuaplWMCe+i9inEGUnAMV3ZYh+orZc0PzJwRFE2ANzAmXBIEZdPUQNdQ/YBFMaOPCgAwmQJgzwpYDTtxYrRiuXLGON3z5IUgoIkVDRIREaci2VXIFJA+QAkCKACl12lWqodTahT32ygGBUtej0iBdqEMmNNUES7bNlUuBfM1hpTXsdcRKV0GoGzzlpVzjLb+6o4DAxFQAIhlSYKKBQEIN1HpWJi6Y0smnA6SwG0LThQFOAbFsaBwvgMhucqSMowHH++m4e+n/bsBMUdK0zuRkrQRBFLWq9lEbNwRTkgmShOsERp0tq+S9FrRe+/TTTlLJ5KmR1EwrLcIdakNNAkjFqZ7ih19ptsD7oQYOkEAxAGtn4ySHJ9hBUqyu5Qs8X/mdP93k4S6cb6FoG1l9P97FfVK3U3f7PJgJJPvojcfUcHkRDT7Q1PALiBd5DHmJnUwWPxtjhyNbJEeJlMrqnCFUxUgmZE9z7ZXjxL1iMrm4OLoaAgKgCmQYQCl94/u4aIVDD7H2XqYC3O5DCwM36EEPBgZO2NpaAbofZKgqs7EGBYffCQkQQSRZtjylah103DmXNeroT2AAyNCgowpr9nN/conLXOMWd2mhlbZ9EiQe04d05zlEXbZf1DbqivBZ7KQR+/ZBKqAQpGsPu5l4iVSJVFhZKblZJrJPExmUyM/FchsOiD/Fk+JXcUSCw1Xxv+iUKK/2SQg8MMgk4q2ReOS9hC510Q5JbpX4RiZv+dIeVantNVsrytIexXrRr+3n4rU9lIk78ar4ruw7catwRloU8gAA4A3wMjh1l6iogs7ML6zGLDdsA3+Vf3dOl8FH5X+C66HCvM7Ai+V6UK5A9cjl5zvjVlS8K6frY6TYQU0l+Z8+oL47UaGVXzoCmrJ7DWir6q1HZsBzYE85o85Aj/cNeUlNHmRCPlD+fyMO1j9QXFbVK6qgKXjb78kvKldaDjTPKMvU7zqB/BTUCH2pWKI4BA7ABv7MqQbA7ZiH8ZaK6hxoOjTPgc9HwQ1WlPIOW+8q/1zHGKZtCGmC16PKDslq7apqWcwJel69ec1I9R4XQMwM0GwByCr7Hs8HrjrlegQsqQFw4w62ba0flCWDJajJNjPb9enLCpR7zDJ5RfUnkwha7Xhy1WjwMHDb5iF//VsmhETHtC8w2ZEUGa6e7Ksgzq0JPlO3acQLCv+RxJ2MgCKmNbd751lv8obUY9S1Feq5b2PUPVfs5durPiN5jxDzydGhlb/tHIWuLa0ngrWrCIiXsqMs9Dr0MUz5uhW1Uy3GWHVnKHOOGbXiu8Np99t0t9UXcffV693AvMZ9sxRacBtqy3OelvRd3aZHadI85tRvmg3PFVZODnkZOBkL+plZEbwpWmW1uBLzHKd9MQbex49GRI6MaNTL1UuyF9tuyYoL6dCuU6/k+UQfWQ/vgvwf9sNaEjnfzlAeTEJv2wiIYKXxIzCB5pyIXzHXx8wlalgRz7hbSpV3Ke4+ozBQSlyli2P+vTyghho9SPBgnPMzhizu8k7R14Y/cwsJv0ka/vIMsheNYQyDOZdr/xodfvbn8C309NfvMLYOYjpct7Ps2W9BM7cHvwcer4fs5sByu2wc4o+U5NXQUeDD/XncpNcVtSIqpRzX8atrC0TsKGmt0xqq7PNgd+ygmdt83hNu/5upfvEzcF2LvVeHCAuqFu7J7Xi5NLvnBMoyrmnMyqv5dmsan9WlEe14EsTCvqIWx6SOWKW21a2uxkDwEh7Q90NRGAiuCWMtllU+4rfgmT37zf4kz2r8koc0uuYbWzArPtnb/BGhOf9pXeMo5ipHQh1fNQv1jsFcX4QJQYvd83GHM0YsdwT3S6D2NJvcXJoZFov6NUw9HXrJ2wUjfTMvOCsox2fbNqKBtor3o/e+N/FgPWPEJvB/n2jb2LTeAdIYN1S/hgklvgFqBqI8qfs4nc+wSbmnPETvY6FjGc3DslN5cEVSX43w6qpxWMofUc1lkwVt51cgIr5rdBbxck57XGMF/JIM3ph01KmwzUl7tXnTuaNnXBNyio11CuO9vYu4Is8jybyL8IbK6GEf97O22FeqFVaUDCercAQMAbJuNo7+n4Qwy9IiMQVplV4Jycalp7j6GlDNp4BLiU854Lu6vvowfba3j13YDde+B5C7DCtIJxozxCqzj9rnfuvWmzCeii+KA155uS22G1R5r33HbsUcxYieo1VC8eMoj2wz1SEDLnJga+2yB964XzwnbD+2BwAITpQUW+Mmb0iN1+0+e9ges6ftr73iKtVBEsoMHCZuDYUDJaUxRJbgFLLUlg0a0Ph1yD06ffv9BuiNt22pPWpP2R9tGR+o5vj9juy4QAF0XPkRSbNVUbiau0o3eUEBjOPRzYVvHPgmwD+BO5v8SY7tJM7IWasCVJGqeFWmKk9V7ux1jt6bvuuccxqdu87DpBio7H/i/AtA9KBRQ73dBs3CxsHFi0lIyYDBa+k8DvUxxBhrbLHHEVeE09/5nWNLuevkguiXdKbyu7hs4m64TXfRpZPC/xR26zV/3xBg9FNtuVMcw/SjvhU5KjW2ZGr/w+Rl8DbdjwS10YVkKl4W/ev03O/xoCe95I0niVe02GFflEgx9qD4auTfUXvdLp1nejFgnHmt9c6DMatimNf2Bt6fzdgLfdHre4v59vjeL2yipW4b6qlMUlYWr3b1RV3xGnZr958qn+3FJtprvvvs/hS/74umAAVY8jQnERN8kGAhomyiE0mUJFmKVCSk5Nhi68jzFChSGq58zTTXyuXa66BjM5FkVrpqrdygvJb9WyW8MqEikdvGnL5F/YElmEzviZa7dh3Xd8M3dtP39RYeyepavGVasTUwf+6lP7fn/yJebFLkVge39O1ux5fbvg53rNP92gUvS3K7HqAedk03tr9ZqU7uSn8jumqnjqSsjDXd3KJuqbs97Jmnyju22p0b01476ntT/6hsAIj3Jp28izKHGRTnX5tcV+m8vmpn+LGcZyBtN6NN6S56W3BaPqkQBHEaxTiol8WNuE37hQqyZqcUe8ZUtkgkB4k1vlcTj4ZFNQyHerWXvUDxsXUCSuveqSJBVmSemnYWUxObIrVjVngTlJgnLcFeYCGs+u4X29PV4LuVGL5zkv+Fbok97Czgc43NEdmt/o1bubVpbdIFyL5Q0YacvYBaxFRbFUO4FrIzxQG9EsKOX2Glq7+QCDRYFsWakERyChIC6iTs1MU2zCJDkfGHY00Hix0VmKsMyhqXj9Z77IVdskFOIo17+r7wCARmeaDMQUKvxZwwitlL8p+AWiqD08euGrGilzGos+dmKKiG/NCWCHuHupQ8iOcqG5UwmkfJqbCpsGozINkEwxJkGCjvAWxpe0eqdEBXa6/EQDDEbESzAGWmWfPVGN7YBYOAUVO/rIaeXgPpPdTU1V0jTSzUpVufflJDRpD9ih5b67bggiMTexdhERyBBxVKlRwsq9LKjsoz5PhaBQOTQBDq+ep0hZVXzYmXFCq0IDHLtcIyQEo/jzFNMM+cnsX0GcwS5oAcb8F6Ht5Vxb77onj7D+fwEb5EYsSKEy9BknwFR8njKJIrb2skwoYbPvBdMCYqi0GsOPESJC1FyqcCjpJP4xYn3rR34JF+L2MlLrXXgU6SDULDNIZxEybJzJGbp6A6iMGwjzZiBBalBMCioMSrDtFJcsol6pTGxLgJk2TmyM1TUB3EM/ksCk4iP2mv8IHv/Ji/9QIEJvIVg1hx4iVIkq/gKPk07jzmC4RH9B168e+lea/mqjkAR2VVOKpkYq4dHXVC0lNfgzSMMeMmmoTMHLl5CipE2JBnREtPhA1gmBQdyEDAKoonppLL210X7iDe+DxN7sn3e3SxP9yVeKRbqceeqSUdgR0waMRBo1464rVJx/w3ok/75uaMHxTOUiHAXfnRgF+joxEHwscLz8RcJ56NUC863k5b18tgm+pMowH0dSbwCmnaI9ZWu6MOdJLU5a4bffpJG4w3VDZMIxhr3JgwSWaO3DwFFeJmOPLHStQbo6nWao/Qpl2HTpK6InWjTz/pBrEhbViMNFY2Lk1gkswcuXkKKsSzwT+WG94waNK6R6ztyhUdOknqcteNPv2kDcYbKhumEYw1bkyYJDNHbp6CCvFf3CxHaYY6GJrWwu7X6tgjqo3aq3Wgk6SueN3o00/aYGJDZcPaCMYaVxMmk2GO3DwFFeL5FMbg8kakmlMuJ8wTgMubcAIAzFMpJv3NoKmWavdZK1ztirbaW3Wgk6SueN3o00/aIA2VDdMIxho3JkySmSM3T0GFeFIQMguNMLCZMLUUD58ZC6sJla3OQ9NacY9EW+2iQydJXZG60Ve/km6QGioblkbYqG6s2rgxYZLMHLl5CqqD+DJ8j7dtgxb+HCSQUVDR0MdBuvQsps8ggXuWkG05J7ayslYV1SpZnVdNphZH97NqldqpA50kdUXqpqeqT39SGtSGSsNGjDUuJkySmSM3T0GFeLIoOHHpkJBRUNHQp84Y2HGELj2L6WfAuJIJmWIpHj4zFgmCWZJtPPsyB+GYk7RSrE7IXLG1yo07D5688o7kA9/8+uWP9QIEFgyRmGIRJ16CpFIitpTXVb4CJZX5qkDl6txrMt2VfkNzLVnurxVqT6mDOpWkLnTr8aSnnfTpTyoNpjdUNpxipJf0Gm8aqzZuTJjsm4PMHLn5FFAdxONUcFI5FdOgM5gsdm5wnuQjEIqKc1uO9Jf9wZEWEjIKKhr61EKbOmO0qBoTGjRpYdGOHRlXMikzFUvx8JllHmTRMkkQz1LZsufAsZWjWmU1Ya6t1go37jx48po38eHLj7/1AgQKbtMARWNzIi0GseLESyjRLankUoq00kWGTFnljSpfgZLKWlXMXKWuztLUXe03NNfi635urWXtKXUYnSR1UTf14Imn9bXqF1JDDUsj0mik1+pNY47GMWGyb5CZIzefAqqDeJwKSi3ToDOYLHZuJA++QCgSry8KAyNjE9Nj9g4uRs0Qbqjfw7AIkwZNWli0safDOHTpWUyfAUNGuJYwziQxUyzFw2fGnIVlE8AsPbJSy1lbwWa21uxU9l3n4JWjzIlahdU5+3KhNSVhrmJtbnDnwZNX63LnrfMxfPMbO3+sFyBwQVhIvNCBCxPhImwQ2UZ3UV3bFC9abU4kxSBWnHgJJVJSKbkUKXVppaWXLjJkyirbV84wtrS12ja1vdxWOygvWH6/CqRCqSilnVKxT8mttPpjGlC2xsBYwsVLYmuS901O+STKl1HLNENnMFnsnMTcYB75KhCK6rhLetcVqUxez1d/7AoxeLZn8ignpmabWy1SLFtV2xbrdlrt+o79d3ApOCPfDEmQUVDR0KdOGNhxoEuvxdBnMK5kEskUS/HwmbGYAHOI5wgnK1sNYa7u1go37jx48so7kg98+fG3XoDABZtChy8M4SJsEGmTaJsTpRSDWHHiJZRISaXkUkYtdc7SqqUbGTJlle2T05aUtmrb1PbyEst3VCAViZ2VVJVWpipUrs5Zk3aXfkNzrb7ayzpEJ0ldwbqphz1WPSnrjfdU6/PVD2mDwzdUNkwj9LLV62pvaEyNmzDpG5k5cvMUVAfxiZXzCeoDJ1foDCaL/WxTT7mKLxCKFpslUtdIZXJ9hYGRsYmp2ebcgpadWlXbVuvbrrx4BeglSZ31/wapUWnBQDbF9Ep1pl/zKkW/GrRCUaRM+aXi7cf26NVvLztoqDocdtIpYrcnlC79S4e9MdKJfVmlF22v+lzuj9+uedPl3jg4dNSw2cgw0EGxyTAxkWFDhmPj4BCiYAAuMXgk4JO2cjI/icOEGhYLgGUZQHpADLaCEQyHHZTsNo+Dk4ALKm6bzCrFsLiAqwx85TYZV14YrLNJePPBVGFT8BMIIwhaqsGSUtqbgAVxJ2io3wvZZDPam4OJ5IC2bhjqsQV6ZUHIBp0cGBmxNYzajk4uuCbBNvU3jHyFSBTZCaF4AFICjNK9CBUqaR4tUR02lbua4dzTZcfM5mT2fRsDp3WjHjbhTsB5ams6rx8VKagMu2C/9gbOhMlw/IwrfuSMk1CMtMaVB/KV3z1GM83ydnMtQWm1c5hZ/hMIWmtHgfZjB7Q46FRuX+UakArJIeqaY5S1vtG6yUOVp36AG6+PdDLcJrJ4NYVJhHfaMHvEaBKRKLw5n7iou0OMekdjvofBYmiWDYNgEoakG4SeTLDJL6QHDcPqoXGOKqjQsAvtQdCnyU9rg2g0Luo/J0BJ5K5Okt3QPfqUOg4eCgY9izHof0j7uQBKIdG2CaQBvUPKGEJ5/M3CQke/U2lhS706U4cz4d86jpNA9utOp0ej8iQqWOh3OkOMzvAQRqLv2qiMhWzZQ56M4nwEI6J+ZJu09u7zvOirDw/qaQLsF7MQkOEU0rMYowKoz0QDLWen9I7qzAIR8/WjHZ0FUIjatUUxDnWyANQLA6SJd9u4Ntav35w6EkZvM0gMnEbR12Y1hgbxTinqJPK+6ju6ZnkmJ7Z+zdT6T+M9sjY0B2mPet56krp29TZ7webV1ppp4vZqbkbqx3WtaakftWZNKhqojjuSyrZqf4VZqu9fFhcHlT3NqRESZe3z3F5jAv78sUbNq5SxX/T1q9avgmbUsgzg60b8/FC3yjnJ1xmG28S6Y6bMxF/dWBkOsTFSsaqPXKfh0y3Av6NsF941AagsSBsYwaUKS8eocUqqLaZVW+tkpC2IfZfvkIU2QvxBQ8ydyQuHc3vsroC190bpuMXT90cuoErfu4AKAIzrhQlLM62yARKYkF5byg4A1cHDVXg1UgFGay4vjETDycYjr/YcnAhFlBkB3+VsPM9kMYajpb6BwfTWiBbSa3Hi54afVhyxfGKZCITmNBu3+QpwG5Buf/wGkP8hb+4TqxrfpbLs6yfDIwUSmCZeODz8nLSuSRM1lyAoemLvLL4uOydnkBBajHiCowDawSepXlmIKFW0Pe6NARrLZy+xFGeS28gpppj36CweDhPfGq/dbQSpW6VRU85DdJy4fID1K5OWVM6tsk0wsO6hX9YJ3czJZ8Q2Zax8eJ3eeuURxK4GCSs1B5M0f4e4/ok8/6jzA93EzrEzKK19n1lE6wlbhv9+Se3LVKa970M/O2Hq8hw5rGKdMDBM9171IqhK7/LA0u/pzZfoKU7qHpoxr9GRx2IIJG4V26Rt6cIha/XldXbVq5yC6qoKb6VPljU6ErKbNV3FLj0ZFYM6E9ChUX2OOwqAdL3whd2OMFqFqanee7s4qTy10z98lUqF7VwKkK5NPWWdkqbDchVGjWPlQE5ava6N3iY03s96wuMRm7iyphjpu9pRDGKDlUqaRCjkJliQm8ZqMdx2e85bmbJb6oxoJaYQH1YcC11GC4biFObRDq5F9BQ2c0HMFzQBSMvR7B+2ocn6OQCOP9UfYy7ZIyrx5HcYL2KGKHOjiF8IEnvx22qQ+Zh6tOOZ/cJJUs9oArCaT/EBhuO8j2ewkD/sdS+0j8Vm8IG9Cu4gJulUhjI0sH4iaMPGjBjXqwWfhh9KtUGlRmG1QyimJ8DkJKaUlFCDToWtikQ9GysOAosUJAcZHK1ybgpmFBWIBrSS8ugWwoIXlZHG0waUV0cOIydLp0tQTBGDBrXsGkVAuFzUmgWQGPVpQjWkUicfrw4mYWQlejAWr+CT2b2+mkw90PLVJ8jTeODf3kDb607VwL/Xs4kKmn+9JQPAzP00AYLW+8+sAjrEHj/27OEO1QDPH//1AaNP8wwMxlh6y4u/x2e8NZM2ZLufKjhkQLaG8jwoK0+FHGgNeVEARZCIkil7Z56Oq1S1yMH0f+d0WbfUokyA+4OutVnZDosPMre3w8MXOx6eaejEzCxkgUqdGj7zBP+ftrsRMEgA5+BOmOiY2Lj4hMQvrFaQ56oeB1r3rvlH/FbLZ/EI9nmALZhiBe6GK7xeLnSH8ij0sCCCS9xnwN4nPVKOIQPDc1HYmFcyQ4V5OGrS4lMNXg1r5f2pBGn6FP30ggvN9a9jf7XHIV2B0QFixkQxbynfTerGZC2skUmsO9tEC0jc8l1F3aDvBK+8stV4AbVlya4XJahZTJCfBEivb7jF076Vo7WRuPa9JeB6y0e+8td6UKAiGN9HagAqV8DUc1H1FXX5mOlwB5s5iiSAXhi0XYHcP65B/iCPSaaaeX5toIIsf6V76oq2cM0wsXc4vEmEWT+Mwyw494R3pS9odO+KFvxBX2MHn4EEMyL+HGraILigQqwVPdehgY2CAvCopKLWZmMI4uGrEdTRTxx11XOx+hpoqFGPNcOQIkArqg1aI9/B+9C5QOy0My665BcNfnU5/nd25otc0eiqa6674aZbbruzZtzq25NV3OH26THHE+Oh+FqIJXqy50ncoPpQKny/Gp4KU6WKJWAdX206RLDQhusSjTXR1KXy3nF/UwuXKdDyHfg3XaGNttq9Iz85/bXlap11cc279Efu8ni3ftyiMRiLVnAfMwR2kKAhmJTmIsNdJ6NXIAXojjidprcja/DKglqsw+YdjZz8DqIq8JeOk9MgP037IKTuYvUuuJLfOaAOss9f7xSoL/jeIdjBDYuwC2Y7CyAlSf5QJdr7IViDpCpDeMhJWZTgC/iEpFwypDrv/BV3gBf+6zTX7wr0Rl7sTPN2l198tFWCpizWDTnrPKSdhf/4g+gMXvLwTGhIq4e915cscq4dwteaYeqUvvq6hnVLrRKrwciHDEUI6B/0wUA+EGV0AwYbmfboZmxd3bp65dL7+L8Nnq07z7crb+bgkuaWQHgNTkdZUx5lFPJWWksBKEGR1yeDAA8ZbWahrZaNyreysxyZR6FvV61UdTCAYmBPCh2i0AKsSHNnpgGzS0JqupEG7roLBJVgahm4xSGhl6XXEWApgFCe4L1VWriiWGujlyhk3GIOAVMtdawQlImB4UtAZtgBuzbWYYABHwwFvEWohTOQ/kNokghkQmg2oEIBQosJgTqI0GpEoAmENitsrqAGoMuAn0T2ygAWB7B+s+wpTTRbu1+5ZZeWt+KEOiMk17286NJs1hxcaqkFMDz1dfpf2aUsR4KXznECOtXvtVVtwc73Q7JhNiU5qzilk2z7zkl/zE8B2pWhBRzavXoMiuKnAbR9lICb8L7SYg6AFCANDJw+758yDpg4A1DvAdXG6pBHoh6gXWx6jpR7rqsDF2vcMi/RKQPKxy3bxjA8doQ/nAbO7ZtNXbDQKVdd5KY1h7f0icb87FV/X7+QPoEkUZucBNSHgIrLkIytF3abNXAKByBmzFOO2tMlw4df8GiDAEAws5WO+eRqSoC1+fhnwTpQajeLn/iI5AdV79vJmTJx5SW4thdgsV2C5k5xoSMBbGSf2l2xjD633umw/aoH4bOvca1UBx3UZwxUWyYJgPycFsUCr2LMRifyEJo2WIRA6R71I56G8yM2OczlR9R/y2tdLGdGGAAe9yRaI4sActQ7tkaYINc6KddGNEDBpuXgE0AkXZ5KB4lhICCe3VTKwzHzeCWUDA5fzZD8NRJQih51XPUwoGhPX6fvesL1jkze4DvhxnBPuCnVx9ycaWjoceGA+jodXqVldjeDH7C6hGx7Q+iwphv9tt3d/6c4URth8inqhoVOPZLg5zwqLql1jxCuoanJu+dybssQcIWhfujHfuXXgSiGUlGTZD6twW3htnH7VV6VVV211UwdqWtprvSGZqz9pHZE19SpsxTogMusyK6sMMQPovriAE0oJbVPZVXx00vVofLf7T+Xm+IDmHdsvtvluoudqjzddedr8zpfTmWpUOUPEYCvfp7WnjaeJp6mnbpO7IwL8h+P/9EYKlEJCGBtkzPN8+5uGRovrq/uGb+yL39p88BHD730yD0tnprQ764+Q57501v/959BoxLM/f0rht33VZMff1udzj+80OobhAGfzPhs1t9embegwhPTgUBJjgDfyPtG33Mb/+Bb+kmsUq+zzjnv+fbtXwoadFYqRAMdAxMPn4BQETmIgpKK2pdWMzGzgDmVmLSWG3cePHnJ48PfegEChQkXYYN8m8WIFSdegv8pkC5DpizbFNpOaKl2Hbp0kug2Od3ixEGPg9GHdQKAd2mv0GFXP6bAQ95Di3oM8UfYJ8KtFJV7uU30TooP/4+/l9LnxPbRB2lb+Mv8o1Q0J02LPkmV4WdHn6W94T8pLxoNykqhC6hNOxznZp63IJb/BIDV/zWqtgeki8cS/huOn3tMlkeFiGZKUFs+liTRGymq8H5GHJ38kUUC8eNf3EGYSyOmb2dAVuN9iZQHGN696CxLC4qxNdZJhOOf1ZFVEQFkS9Agge8BtLlQue3/zFIeg9Q3Hf4SaYIswl1ycRbaGwnRQdz64aVMkMCePQaNW2QGxw1w148fOB27Dls9V30kbl7z+GJ3A2pxZBH3aOUOgPyfztcjaDKJbEuC+Q7allishQQ5SCQJfKYJrEp92UWcYSIBZ4SAuYSwipl9hkJRRDAiZaHdLAwOcIaP2BGqUrvNiW96qafP0HiSSrZE82PXViyJpCVXcTl1G+/YYnBFQlFHyD7ve7nHAUmrXkAQcXjMjG0bpynSsTEcRTsoSeI+TrBUYV640jWUxY6QXtrBOzt7skyHeEeWfX0KcJ3dwym6+ClnS6Z3sEIxFNV1y3rhBZplGIzIdUXBkWcY1nmRU534WThLFjk85NDsT51lZZknkRfMFU+s+y7L4wzHMSjKIiiC46yEor4vGJERLSUl5zn8CsWQDE62W7RxiqJBSuN0n2Ipj4Xp53E8DlE0wvUYjQidYXwTubcO6dO0SfgkNb/gCzues/KilRfLqXSgWKmUuZEVLIuwEATTsyWyn/s5wa5wCmiUCgdr4NtqEGcqetcsscpXY8WrebHpHaKtY8Fh6INcxPwu1g9+szQl9rKy7rMucYKXEdtGn5R23+kv0RvSRsqS7tFgXhG+QAWNC3PIS2/wuoTCckFEKGEEQhkkDHV6LZNXjVIAIzOkUgB6/NmqgV83C7PiPAb9FQp1Z9trd0ymAlPWa0QCp+FRds7VHxmFV4AwgjnfCx3uJUBEmAKAmTSmipMsv77AUEaBl64wy15vkZFNTwVbxp4EfQEqSOrhYbSpwIC0cGB6HPiaAFSFcdwmOB4YoglGowf80lVTmFgRUfAx3Dh0+SX8eEgPMOnOFJIxqJ6jZZB4uEje+UO0wJv7hAAXkL2YceD5ilHwOM6ZRXWY/TAIDdxDodkliV6QwkHwe2OvVHNZoTT8LrbajvNYzsRl1cyOGMzw8RopqyO4IYpcCbliRd14Az1jLZCrqxI8hWOOMRwG+jrThmAsLnLFcKNKE+ECFYjQ+DZ6r2SmetCzsYp46y+FKYeiYfgtsFT9hDwymKRyONTxPIeOc7VO7GxeMfFPDSZKsL3mNC59iaVfGx97NaMVAZi5QIpedfWSEMAFEJ+Ujc5Gz+NKdXRex3MStjxQC0TUrTlS+8f4jBXx2ftpLDeF8uOpxV4MVepzULZ3yGpFXTpkxAZXQC+UW4bJRGzVVp5RMIPVkec0G+sFayvRfOk8+FRmxa6vJvOLqKTICw1pjE0BZDIjJ81d3ILe3YEp5uutPrEyLJEBBrQdP2vwdFZnrXSU3sk3hIFSvp5bgv5Y+6WVcZDVaLVIrlKmZkIrZ9GWIuAcwkO+uHWB2EpfgdibKKUawyrI1s9E5Varwz4nXThAxBN6KL+feWlV6zx8ArINVhXW7FU7etnRFoFoMwDU1dPHW65zJQ0/0575rJGS3Jnq4iRQqgjULvHB/BHyWyiOEa80/UqchdmizWx1Hq+IuWHscifNviEgXAlf2Bmgow8QG3mvAq8vrsAq2PAzXU4RjYXXEYvxenGo/BJy3gDGooxTff0UYWGYi5N+x0HgyXUMZjIdH135HpDtFFjOuEL2z36mz3n60HSjCrWFphqqay/4OwK6zr8NSK/2N4jKLLhRF/et40xBZijds8xxxOvFhCK3neRp6yS3g9e1OVv12IUiH7hWXDbYN2QgLM5tguAMskyI68UEvPAzrZQolxVehwVEJqKYDeqCl4kePNWnCjDqWq/w55uFpPSQRqLRl+jmxhYZfRzGm3hoEfMKhPbmuNdOkh7yNoaD273TJa14vhk9U48ECS/Ks7WTk5s63JqzmpPOzJ1ujAutAOa8gI1menbVVobm4NqDCKmrVEregF6aBBt1MbzAShmhDa/jPfGGH4FiKgSz/TOUcFyieBpVbywXSLlO4sYKkaQN7XyfJXFEXjbHhxJ211uUrwyhSIDYDLDKJk1qN08PfZE13pgaIanfysXbLCNjk6OJD5hvRLYMk0fbPL2VI5656xd4ifhkDPnN54DKpxysaSkKI/XW+BmI+/Kre8P7LIv0bwAITxLK+DIFne6fBuzfwI5a2mFuJ1A9npbDauv41eFnYhf8BCzJhEDmAkHFSSCaOWARNzFIdnJZbSX6Ecn6fVN1CK1QxaULz/ILRLV7XvQSeTW778mo/OAom+GjVqwdUeo4M1NLYm9sxGZ/xHkzQWpS3UOl6L8DiUMq7kU0AAlQ9YTTwsdbQdgxAmIR4z2kyHKgAiGtum+F7wXn60KVteCGgBep0aPhZSKCFjDzAibJ9SRbGov1Z08YUvoEVmNCqu5qr2soaUOyMTlOXnVF07GyO8GyERvLFFZxYG6aeL9ZDfvsRKCFoT1Zte+fzt4IBcYaZrqey0dvPMBXs06i7N7uhGdytlBYYdbydJdWrLysT8Y3syA8e+JYu3mlLNnmyErriUgGQCyln2s3h7skZ5uuFgjIKalzTQSa/Y8zrS4p3QOkSnL0sDkNOA40uQlODZGIWn5o4eBJrS/2LoWiA5YoV5P+Vi0Ci1uq3uS6WIHr8DVQ9DpQE1dPVZN/eRKkL+RGYoSVKSr4VEbnY7Ayf8weinM2VKYk25obxnWE5Bve7wNRUdjyOJwGe1oZkeuUsz2WSuQkU4POYzTySZlrLGpN/+Ex+rYAopuW/KDAMmX4wQin6M0OGt1gjw3CWTE4prlNznHO8GzTe0/3JK9y9wQdaxmstndMzTNnelPX7n2x/GHTO2ZjawtaBvG8RTNOFWH7gIQvy3lCv55vXKI6wVqE5doAm8hF9WqzodKgBKHdh3P0tBX7sTI+hLld0tt8Qt2yu+tyAC1M5b3FWEt04XLCmFqe+yjKJdszfPSGXm0qsS4JywklB3EMH9LAgbuVKKpAsWKTn1eb00HOsgUXypt9jd0EgQV5oVPUbaWtY+xukqctZ7hSRfXYhHEUwP73LiZgfTE3O5rKt7fbx9cytnm+2r3okM1Yyaxdv1MIBF2Net0E1Nz3qqcCPXyBbepyu2ztldVK0ILvCPdQK6V1kAw/QQeA762W79EX24wPWKbfy41QDhF7lTHqymOeTmkW/Djjq+PhIjnLA/iUqDdu6yjlmnrWd1ZbxxDjKOiSePgG9RlBo1RsqFymUPRUSaUL55DIj6pdQDLfBsTz0z5H67BFGQ5K5XbW+S/p8JL57GIK0RmvKlPw92GmbcBh3uja2fj6i8YuU9yV+JjtVdJ5GluXoYXIDWIDf+08IDmM45DTzbrdm3mWTYfZQreXyDpEsnrSVU4dUVilAiHwRVfpkRuy0rxNfpXq++vTH7jrTGiVCaKBts0Dh/ygpuyM35fK1erbD0D8ylBYmLOLJqTGyuGVK0a9BTpXUfomvoXuE5PnJJfuWl2pXMYsRXW4HwQJYwaOUzXCrxOupraOmWWtye1vpYxMyZg2uvqYKMtj8Alsjdwu6sGJo//IdrYrxBEThsqTXdO+ANUw5xHhzzdxYqsB+h0GNCyrO9tzTe++L4aahx74QkkKyw9DRc+P7F9vXucCU/xpX5MxBc3jDCD1aa241PNkdlVGNjmLukCJbQvlwPs488wC6nK6BUqewRZSh067ikXZeLyCfoKCwkrXQbAp7iT0Y4Fpa/QO0UWvhoKPX5m3knC4lbuIwRwJUAU3s31nSl5Y4giXWUmojx4w02LkyPILfJSR7ojux/ZuWdCBmnAxuEIbtfAxwMEhW+sICQ0tEmO5cLBwFOp+dPiVDSTD3LpExGRevdMOxLQnN8B0ohOYjaG8aI4BoESgS7Ms9Z0Fj8Jtx9syxDfOhXuoLoTLS8Ym5oOnEBrX0zAsMk0tRjwqDMJG4Hy5pAJwwuYhKig7Cb8f4LO5edL7irPTbsyqthO7OB3kHDq1WyJNYLU3KzkOKUH1XJVs6nOKdm3fDyloDNgQ4wKJbw/mde2wYctu4NEc4nqdkBDoyoGXrLwpQFqMFLmRitdvQ15XCZW1U7wFsnUzGFG+NLfJJqWDyj0Habp0sfIt/3Jd8YUnze3i489FtwF+BKp5ACOv6bUttJW2CybcFooMSHtu5KjtwATuXYRcpz+x88CRcEBFceA//4OF8F/wQpMSf3uDoW7CLg5Bi1mqaNbu717bDdB1Vn50OwzYTyF4TPtxmj8UpCT8+68AwsTj1LBPCBJ3t674iISKSnFT7fHxnHRVaJBHlHxosDDElCUpmR4erSwyKTm5ydCBvwv7uo9cjuCemI4UgsPpcvJVuk5huMFWzSFUfw8PoQNSBDC7aW62TNeLU37z9fZtpcoi3f5bstIS2h1077GDp7YyPd0SguAed6fEV662eLbtkH0PQuNHCS+r3sA7cafRLuQKE3NfpZDAYz1EmH7yrAa+gzjxxKh2IYg3NqsbAdt1/HdG31HRKOlvigeoDn/28dBHFBKmo8FwVliTOr5tCVejDHF+UkWl44fqjBz0yvXGRkznFbKszHmO6sDzQLOQgCZaQeax4r9YWgLoz72b35JAtMqZ6RUfE98U6E4o/EdhfvCx7tM+gtSWHkXON+MG8iCgEMzYN/5xYxR94WTPNhAWLnq9j4SljGLjUchyRqC6+OY2uGhXmDah2WTWmB8HQtonT9bNRwgBoln8p6Kr0VXhG5eFIENgImDDiHzhQGNpohwNinFNa/qleB2abcJm96KybVAz8cD/tsoT3tVb3WSfxkv2AW+o4TsVgdlLilf5hOHB6zMNCY0ymekwaxcthrSBgzKxCbQYfFmEe+5rswZXqQ4CliVtiKysnf0lz9n1VgQgitHpj9SHcp0iNxDEaW3bGGInTH95YDREk1zmm5rwWhjpei1MzXwtjFbvNiexED80WOisv2RVlul1Gaq9eNBBR/lolksJ2zCZ7oynItt9f/n/Jizqi6Pk+TXHGHDepK4Ydc521c9GtFc9WU4YXo/ucQecbHRXb43qScEd5l2PC3KCBDRZWQn5ye4uXBs1zYE3+Ny+fjNa2ntymItwzxrIz5kWm9hAN7fH8iQlvb1CGBfWdBFM4air1j7bg/5Tp7qhDmDCPtTo0NmzWQd0AM5Qa8ZbkRsjQ5vRpZUieZssnHNr+pwkQpPQ5v2nThmmDrrS8TaCgdhoef39F+Sqa8YUbuThrQL5427jZEQqU2neV/+jb1kjlV0mA4au0JiCt27W2/SWvXi2Lfx9iW9diCMHYNE6yZ2FaUtQTy8VNrMJwtmImE1ARKzkEMWkfdDAHQ/b1sN2Iowm+gZ4H92ufle6cT/q25G88ct8lX1L77WqSIKlbEGuj4aVpNao0kn2Db2FTi7zvgXE0iByi6scvv6AA1PqyynPWxcNV1gqU745FkvhOSmM9q+i5mXpjN+9EtlhY+yBG+Q7eSAYGTyze4fL4nfyWQkPP2e2NmhEYQPEw2cWJKloIns62kDCczhXaGVIYu9R4vRMNT7cydq24qoS+PS4xfyrorpWIXtWGfw/Uht1Suqrm7zMOElmsMtEglCUsRrUxbpkGgA3qMGuXioBY4G3eBmQ7u3a6WC7nV/62xDJzACJEpHfeVxanVf0Nyhe5+4th6Ddl1IRm+ZUEKXYRyVo6spWt83nM3HANAzMXmFvF5dT/s9egtC4AQHXI1YLyp/1kOVEiLLGTzLZHo16mrPEFMaN/IE6bqLwmbeKHoKDOcA2aFEZwdSWwom56qK8kUbIMXrMVAQ38hchdezA8RUBiqTkjZqri2BKR84Tusi1Qm12t9bbIyJxCB1DQInIWaFTQO/UDLDBcXTRlXCnwev9gGZarEE9KVOyONcTFEkrRgvma7mXumJgPbpyNGCX37Uusl5DNrWOTM5lib0KIAkwrfEqMo8rKaiX4MD1GxNHge8gxliFx8smmSKSnLZv2MHl/hCcqa+t2jHKeDzsSqS/tOvO+uX3GdqADsGz7/IFZ+1s4E6n7zGxIfuMGcxbOHliM/aVNJEysz/YdK0ZKP9pUACEdf36OOw2uUWwsEd1zlEusrEk8wSghHLI0COjWlzOe6utak2QIxGujyaPiAEGwU7OXhyBAxYq4DqavbixWQqqXWlo96gX9WfzB2/qfjCXTbYWepWcxB7/Q6vw41XFS7TkewIvsVSNWAoqjsX77KpnkYFUgTmwKEj/BDUAIJfRp8wc6DTgkBzBKqhbDUq9cCJG5IBDe1chwXD5fXvOgXhErzXH38C7IWovjHbsALKI/869xyRD17COO8KVKWw7MV6jbXKzRnlrIGI0Er7IiSTH3wfdMsQQakaOG4q4dpJuc//jZiMLvUXy9XYn5uJow6LrbMibFcXZxDGRcPcpXupppMF/+J8J4J+iBVRDaX6pMBnYZPdHcuSuSZhCuVGHnHHgRnpIZbCgWyNmHkQv7I0lscscOzipPvs0jUOfXOqPN62lmDT2n4FqImTFDJ+lJZgHhByWYC77ycIro15mG0nr7QM3uDv/YoP+Njhk5WwvQlY7tOcvOwF295diqPoGEFxnKU87iAUo0MQ/bWqn0t2anvtEJ3T/7wU6niPl7d9fES5fVoLfo3zBCs/vXP7XL1sPS7iPqjC6mtXqlHwhAm6eb+BsQS5K3gzhidOhoVRwv/YeSMkIVvxgTP74PQaFWD/yCFURgZ2UrL6kTpb2qu/t93ukypX/N5EhmVShBrOXFua1KikmGuRqR8hQVoJHPAOtJjOJsn/QIT12TqhoXQ3PlLLPEwyulMczdM35PZ+JdsGAy037/l+Thxc+IabaLQIWr/OS9FgIgeXr0yl/EaLMMEzUX0YkQZ0/JJunuN6q/cs52YsxSl1XIUx8l80OkOJ6EPAHEtuNuaIOL0pkasAUJX7wDt5xRqlesZLdeFoaC8IMCohAPRjSus2AEEJZ1I8LphSMwdoCF7H7dZPP6VDGxrMkfyHARmrqyM028+Lzm5CkZrXV7cPrbBjtNu7uf/nO7Y3voWQJ6Xa2a5sD4Z7pxSKD5mG+kH82f35XIkjo0w19DBnNEYmZQ1ZlKJqcY6uRrLZjKXs59FgdGZMAlwfP3hzPDkqgUPattcgBqsccDGTNf6cRgTLv2ek2KrGNRFChgIbcx9Y0ZhcghvJlLtGNi7w4tr9km0J6CZg4l1b/cNK/uwGWYHfuD/VfN8voMfB5f+hSe8NpFbXGWtKic68gfRDyToOCwQ5WaXhxeDbbLiHk/GRR7zxjvCQ0Q+TiosehChL+C8DNhHAOdr2mirqALIM3CQwizqORZVXtso5JdmEpYFuoqBwTkai07rQ+yuLHfzeTjySJbCLkVMqftKvN203bNXfc+sfy70LtjHmj9R54GxAgDAZgHAnR2X27G8JdjXAKdvT39TskVqvYFZKKdHw5Oyut3KKgFr5cVOHilRpTWXXPXGhjyBpj4Q9Q8LRvPVHHKpN5ZZ1aKAwlkrJ5JZ937jrgDYa6egM6ROM9IMAoaQTL/dKW0sul0pZyfyNYYonSmmlUJC6rjoIEn0BCNN9q5bWXVwscZbUWY2vkoo2f+XFHiKMrziq2kdBKCYwDWv7pMBUo+9Wnaoy62wUlPJ0SklFRW6MyBw9eCSvuE7pE5ctXLPcwtVZ5skNjqg2fVx7eJPT8/LtA4St1i4pvnMLDV5T5E8bSVkHADTb6AMbcBRQMIaVDeAHpM7R19nKbKIiiWrE5XBxhkTMQhbVrd+32cFYw1JzidVvr3dKZM8GjCwQGIeBISk88fjXEVcfUsmpf2idkW6Z3vRItwSf3HisvqusjTlV9QCZNth7yGjLulCMN6RCO0gbx1dNv0liImMXMLZM7+ed2bthaJVwsqi8/AYuS82vjQmdxpXglWhMMqR1FJZ0lqAAsCmhzHJOnAwx6waai80GwNxTOQ0DdHLfNuSefuhEhqvXmZ8/G8yygsrM95bs3i1BwW42HHM/SXE949p7tR3x5MEBn2LVMLSu8adPBda56qpwWvXvbofroaPAUpjSdJTJLt/bFKDqLRgsuyCoqhxozGuCD+7PAUrJHB/cNjxdvM/WpE7GEuioOalOqOqVF5RiokcmxAWuZqNXtTcns4o17H0Aa0pAuQY++8DfQvAyuyN8zmb/zCBMrQLaGX6MkRKMYhzjxvi8PGoPLRDeE4imkjIbAs5qGBqvfHS2t8qZ9WUKN8A3luvNNZ5xRV/TF1UUi+yq7ugO6kGY5EofP7S035cE8JarP86rduy4GlWOvXVaJtezDkk2acq+eSq2iM7NLiSyjEdhAoKsM1k3l3PZ7jvrwLDxcRoyD1XZmlb7HMGgcMY2uIXtED1gXwYvMY7q8FpeRYFXLAxnvnFkxY6HJ5bTCAI3lrk1YYZ4qASSB6oIsogFwl8Gd8IR1kWXc0FOE9/F4jeI+RY153LHCsXy6pPgKDbFhQS4FVCZU6mpgpfzUxXU4RJ5gCCRWiIiw2srYp0m7h30rQ2v8q15vd31DK9gK6O7JWzTVavI/JLwrV1Wu1Wa0hIyEQLd0UFY/2vBM00v6LNv0Py4wAPQCXe1mCtoABBfCwy7vVNOlxuDYmN3nG7EHxxvfbiqfGnEt+8IhS+7IeIS2ojqLrSVwWfXfcWYmxNeHax3qlnLgvsWk5vlrwlnBgBipA/tD+PD03vCmkMyDu/NgnVVAJ/RvruRadbX6PbCxwaj9bLtfHhR9T7EGGVYfYvWasuKuirdnaTogw/yAgzFp8NGdx/0fi8VOrbJ5LvZP2aLvP7zE/Axx3hZYi0fKT3z4ynniI9KayNaIKcoV8/w6jZbvl0S4Jl2t/pDL2GCEPZLCVpkU5dA61CWxSdd68ennicaWHS4aL3KNjrh9ACUXvF3CdX4lOtBqmP2mlCaTyal7128QeNQe+ud3SBiSSvh6q2DeIxyXyqUOW3WXIWLJUP6hWdkoBTxU+u5WUbCqJeRnSnuH/msTzi9EIuVitczzVooLFM+9h86iCQ7hLXdzcdxtq7HhqhcVFYu8q1e5mT0zxjSZ0UGjIwxTyNHtbIMfEtQb/LyLnICufPMD63kWnn69DdZ59qkwf7ii3FQDZRrU1es0+7z2ShGQmUNQaYhUlftQ0VPFz4hqU0E6O2Bab95gXr9wzw4Q2UTsIXGjalw1oZpQ0nc3N8PCwKwU/SPrgAYUI9AwYwPEfIjEA/RzOCqFQU6BtHKi1aI7RXeJVxf67I5xRtjwjOVV49P0sA3ZaUX+Ca5VblVslq/+oMs8T/Yd5gglJ/yh+HGFFiN5DOpZ/qhgjD8mGJ/ebz7CsTlsHOzJ+la5TXfQ/BK38b6y3xhR5d3QevkdkslLRImECFuyCifFq4TLBNO8wae1HwDH0Ud0yZnhgUDfzRecYmOxyXoyn0y26GTQvV2ejh//Gc9MvljUQOKRHsBkcVC7MWgMkZ/B9td5P25+hG+Jhwisr+fQXXiwXR/KCgN7aCczCk7dZAZX9eNZQKsECKYeIYsPJWAwFgP5sGkvkUukq22I192tzhZ76+5xtfJve6utrqgtRmQT8DSzC05ZM7YMXN1Jpq7wApNw2pa15eAMIBT8O8r4UHaqcYWt4pX8h2f467XOfNWKQTsXe/milOQkCgZRbWF3ZdPq0tqcrcJPaHy01iGoXjFkd+tswQ6hjNUuiFtNY2aXkBhqNeDDki2Gj3JAiapn0i9UPg0mKGIRpTPkeT65QpVJPpV8In5IFXcWYW2+I6zzZncY9SzSAyLX7t9/WRn6HEfDqXYkKWHwZ7L0szIL8Gar1yTuJfv9kQMtC+S/L0aexv0k8d0U6LvskAqMhR+IPADGGE7myvNz251SWLSSEu51s1ZuWJwjhk8cUFSlnknsu8p0Mg4qhxonCVil33Z6k/6r99sCELU3uBQMwHlYmyT/pgi7XR69pR0+5LJ2W10VkqSBaL770S7EsEnKpNQse1/fNeepOXfIXkcU2rjRi2eZW2MVYuaBivukxRPjE7tqjdTtcEuFYGirrjYKXj0yBbclReKaw3WiYkWIlP0/jYE6YkuvG2cB0TfJXB4pVFaQeFzy8ir4clUAbCE8CSZdT1vs8x+a4EijV6GxRnSqdMmvuUxHRriQuOS9c02tq42Od/HTW7fUr0N2t+xFbY0KKl+JK/t9tZiLuqp/9Srqvo5wT4Z/UZYP1P+A73okHq9Hwsjls0ySQQwtXDwtodGAA82BLwXKnJIqk1Kta845xTnD4ySCR8A+ELgZJevhCP0jlreJaRy2aQJsr9tWJza4/UydJShtNgjl/lXblso9fiVuB2Zhk9+vF268MvccG7gbjWVEbMyrdu46H8S3ctFSSMLNI3zNvv0NIYDU6SqvatYmmopsumvyCvtKVcxKo0tBD//IqUZBE/49pcrHRyTDxq1mfozlF2ig5znNgIajetSatyIJd3G69TtrUVyjqC7pVCcCWS9bB+LH36DtN0oDu9PyjTpHNf8uYaWtL1nc0WkCUBzFPm5S0KGNNQbsumvyqGOlqgGkySWgh//mc42CUs28GsRba7ZadejAWQ+ZlUqDjChylxmCuDC/8oCsOLZmTUlbzouEYJS+3amM5nYMmXaxw13WlWCyrqazTvCo4sp0BdcG9bK3kUgRnIHd2yE/QV1UL3hfUu1s9kG11xJsQEWuDDa5hD81O/+U37+WPiFh4YXHesdCJmZo04GlTMx6VioSCx+vz6rpLGS4v/vnay7X8NDQMLC0gfFs/TeMhZZ6Pu8Ae78tKDdqWKKxs9f4T9sCoBaiSdpP/26sGfusnEdjBb9WHRatmp+/QWMofy3eJzzzwOUnYlTgKRnfAkcb1GVlDSo4auHb4FiDpiyw1o+n42EFravJ6cPp8mI+59oNm2vacLrMyef/EwZmpG2y+0IgZCOpN/Gi3vA1jD0aQza9vgjPsfq8SAjaOXSxubXFLLDzjZp5bMmSMkfawL1xqwzscudyLneGbap8G7D3WG0nBncz63b/Nyp6+8tNvY19WJJH2fw677nGnrX/+c5FfR30IexltRnyn1+dbd73Evj0dTxPyGsQ4LFuybZtxUpexY4dKB2jsKZqtNUwCitLlm98rFeomd20IRckQsBAO401X27pLdCUqY4FhOZxo+jp9WIkJ1jxp1wguO2xQcJAMq3lWWTc3pzmCcVDYhG+TyJ4dMODjbyBj+wiyc+IgqymOT8gc+iR1D2YF9dv8IRspLv3AFpp0QK2jbvBZ8YMeiLCe3SJh6tgX36StyAx0a49Pd09NmbtVfjkk2AZx5IUrxHFDDMed1ubo7iB73W33k6/yR03qmLGMcXmrG1a5k6YlXHdmGpZ1joFDJJxqdd7AWs3htZbeHCHeD3H+g1orzYaa0pN8uYXCmKCjKHZVmzC2v+OD6uZR/8BoJA9nw0Qtbhn13E3mZVakuM8KqpMXnijoFJ1UmlLYe6OlX7HFT88dQlwPav3cklKxNi7N9iJBcEF6Mu3a8pfPs8ib8CU6yVjzQO9Gnj3pAsTa1YBqjKTdBl5qQ8jTWz9yV2gaJUzybZrHhi81wIisbClXv+IDYR9PGKSTf7qcDdvZvI9yGj1N4Ee6NXCwIUCOlLFCTrbfUwJWMi75Z3UbnkdPVP6gA/xdDwtYPAuhsS3+phvskNWuLOzyypwCkgPYmx2yWyZFu/eJssJESYBHSEnE2STqEzS5s09W+eRpX8q5UFlMNTV66VlEoJf15PQl01clQinaJO58uST4ViyiZLIiCwNkY6XPFi6R1RLhqAuRRvUomgSE6ME3MVgbLGsyB+Ay3kehoKGWhNSylSdyk5VXtVlJSWAYbIDOWlFvSAZlc/KFp8lUYPiGdGwcOAbLNmiMKlgvSldAq2HNpDxLp2BJ1PcHDPKb+C381ofKxylVzATzAQrxghtJnI5ZDKt7ePWigckk5KxE9jC2FMLIk+pHWb5SnDVOdKHkiWypdJxoIpMTpDZObZcYihkNflb1pj8GJKpKTuyJmFLONP3Sk2+5lVbbgkFNaaEJXE/qz8oUdN2vad2Oc6C3gKNCBjH6pYv3DTsugpzbAJgkck5KmFV8V7XI/Cm6PtJG4SPs6lIsharIPkKhvzoc3myFuNK63bbvWwvCi/EXbgOZbl5Al6k65VUxATvCAhXHmg8viSpazcNmYaq0USlUDB5WjglL7ijRdlhxB06dTR3nsjUMH/fbZX7CMHXwN5cX7Z3FZsDksqdwrfipykEBm1OfU2dNQB6yInPIYz/yocHxj6aZPIaftzfMc+QYFuUnoDM5Lx+Q6OmOkrUL7BM/Kb5+S42+5XgOPWrF4RY9/kXQ+KmilmqcmVSMSafp6/jaeubW5sbtM5qKqRPcGUyami4ewS4ddESFr9/j4Fn+CPn8ztiBnWunKVj7USMdbF4TElolweCAbk8Rt5C9G7ekKr23mb+JUGr9ZyS/82eMpIMCJ93bZznsiZk/PtoHO9+XVwG20KSMr1IZl39ghn4vel3rOnjygNqYdPh+W1MsWnARmjyKYZVj2mv/DSsfkoSKbzLZ3fOAeoClsJJ1F+9BjITRccXpTiLiZXRVy1mvJzw7SgOyy4q6z7rcBdb/DpZWN8teLr7lsUvK4kbobihC8bMcf32WH1nKEgiRAj08IDPl+uAK7kRTNzU5cr2z9ditzWVDJVZm2qswLfBFQW0FQHFJeE1urYqu2diQKCNDc690u9IocsZrHDIrVul2wXtuPJD2wSZukP8AwmDgzzoh8FoJw4jhLzb4CbX/BG7Ndrs1EmJJO5VM3P3F+n9OLK+PbuqEtY2lk/Y+Sl1hcisHNpIZ4Qusprldr9KmirP6al+NIZmoS895pmsbEfV08BVgqmpubnJxCRGOWwibI3Ce3hHKpVIqZNLS4eGwrSI4lHLtgae2utniFaq+M+1kBAdDjTTtF5GeAqoKKOjBUxmwbbXTrxgYy3GXZWylOt7rDHd/AJy9PXZaRANa5qr9P7wTUyOTnpS8FDeVSnMzeYL6EjpETTb4IxA93tNDydhT9x5pz9NTiX4QCFH7ROyzhb8kfX5nHEFVGQl++Pu4BgMbDAoN2n5BehzVXkZXx0hb3fqw6C6KGXWNXlnys1tWWeuQrotdIXylth4QhLXjRnZ/s7bxwWGP2QVDuBbMirHRI6ODgyOlMkxsQDEqXkwUEBHRuOj/HrdC5qsf54ouC9Oj6xcujJCpGQkEgpP1TEjPkAAlFY2uX5ANeNQxUMao9EH3cVCLpUnrXqo5S3DoEx288ZOgTgnn0HE2WuCyneMWIuTbjQQI1nlExrq6vRUqYSKc3GkBz00MVHEUbKRzEDL/izK6o1J64jDmsoZbHf4lfAop5r9FWtQCXqU/dJOn4QhsgXU9CEL9QlvfaacBjDIfHLLo5Z4j0iPpO9pLV9q12Gl5QIYv5/GKOTNwcHK1j301RiCix/dvBPA8TK8h+fXXFm0SGEPkpgkdUX+dk3dkdXLVs+nMrcV2U4mAva9+uaNgocA3+P86zwidQoJra/5+T2xoBr3E+t+WgrQhhzyjA545ItjI6HFytVA7Crr+b/okFyZzIB9v9BAWiCSLInOl/ZM4O4fN5BW8+zv/X1n+hAYYBE8jfPwUaJQJEQTK0AIoqImIVSqYC/Mo4lZLElzxlEAyH1cxNe9ZLO9pOOLHlfzxCI+l3x499QailjE45I2zy9aCxgcIdSyhlau3orM4giS5VLDukIL8bvW+S56DG3Qi1j86teUmvddfC6ZcCWJvu41fWk/j3PuBJ5F8+SwjIfZUgpI/iLSKAL2Qif4FkFhspAUsYDVQqg9aJutPpZzLO7yhuwplSsq6ix588tvtCXNus7kMUHVq6NNcSNsT8DkZ/yYjly4V5bbt7ya5bF63kIbUDsnm7On4c3ev99YJ95cFJ7aApidDSqUTD8PExQHV68rbmK50jgqDauK+cnpnorCJiuH94L88GtHK3FkFwqq7hevodwqPLYGtVY2NVVaELsgKUCRJltJGGF0Ugt8BDAeN1q24KxiRR+EmErVzogj4zWkvBcoYdt1/53WgFzFHE70JeEWklQiQ9cRmQ6YWjpyCdukz6o+xCcmf7uK9NGKihrAORJaCM4wqwv80KkGZAvJdrMxrucowGCV1U84Vn40is9YwYCCY7Gne+Eqc5yygWqD0AYT7A7iuRVest6eEpPa6Mo1MaiH5jm5FsM/mO1nhVUqOaVubUziKE1L3iOhOU2TAOwDj3ergLE5NJZSDvrV4raS9rIuT6+a3PnJeBxZXWC5wjsTRhmDn1VtH6oB4iVtCM4UtZb3osH2VtieVU5FjFQh5qCBM86BhGiGUqFUMlgW2AIsnUPpPBUV5Sp+yjD+z8HoimViZoSKb8KhKOnUoh22g7gtsTe2znHxsTpWqb2Sk+B7dQadJHIuK3Yaqij/Wgs8JBFSCKKfqZX6a1tq4kyzzBuRFRtrKJuVqK0xjGW4OC6FqwIqForTorSViQbGBSd6kygPNvm4D9yQtPnvtRNXrwcDe1Tp5qpaieZHSjDAgp0N8nM746hQs7Is0Ka2JYrKip31HbC/uqvMuLL7XherQRu+WMkFgnO4T09XB9F0g0ktQADnjF5LdL3dprj4KgpbUm4K40Udxw/YVukGLooqZNWq+tS/vo/o2808BA7rioWYFzj234EaxAWwMAw19nwhqQRrFM2AccEHtGudMStNb4qlbJEKY624mpfPuYgpkl0KsajSmPxhFS49cvmejPgZr+LjNiHP5U72qsOBPnVxwknVKSurHb+5NVmxOq21BCy45o9TYlddiyeMEy9/I52VeLmVMOjRM8z2ZKcRUC6U1UqdQf5AbGdYNuCpaFaWaYJk1a0COhK4EBL0YUmOxRTlr78qavXsjPE65jIvaFJNNWmj4r+bHkGmhXFIj8axtl/pHkJjS5cb/Y6WditQsptoNxT8NXVcyKuo1jiWpLAxkKmPM8ktr032vRH66TUMWYXsfur0Q0frHpIplpn1SVzMzRkXEB1HKlxlTiVS8e6SuJrmsSFPvUzjUt84cOeoSHevmQRSCCCx9GUfU2v66DMeJrUks3A7+0OsMrLmgyn51CamEfk/Jv/mz3fgN3JPq9mY6nePGO1+1gW5Ak24EKDPOiE3o2s5X3hZDoMJQpK+AJUpku2yKD+XkkBwvgz9lwLX/IgEeO6e+Z++TmWPfyJErYD22b9+a3xsXPpfDlvXdMhTA3/+fORJnftrlqklf24ZWyRadz+CqdOgk5s6Z7FQcVqldwAHwB6VBI+7ETXtKBQXHLTRcEQUPFbE3GG98eWLeP7dRF21dz7A4GcnLY+AnZeJ56q98Nf3T3ypPEKu3KVlv4+cNC5Se8WqSNmwAwx6ZUb9T7KgY7hMGfGIZ00QfJ9w8fp1wRLZBzPCB5j3zD7SVqEDQVNbxUaJTPb9gY889rmnTa9fz10NLB2nrgrh44yr/xIdj+0ThKzASBLUn0o1xyMyGX6j81+FM7aZmv/Woar26UXqV8yYIoat15XoWXh4mo5iOkOcTjIvKGUgXaVe8gUEkFeJxMUfdCiRwCUH/X1cIHvlqTNUbZmR9uDjMVLDxxAy8dB0Ni4WBgstsHzxsx6mUrfxtAJfy7mqg31KWSZITUAdYTOA3Msgw7luDK9UiKghVjzyjEYD4BPuYZavlGYnfPVEJQHVWOorUSNcr+3qgkuZyaee9aOdcSEZPGBSQiNvD/OC70l4EbMM/rmS3PwXZ2Ez/eF2nCqu0nPlSP/vAzXvGOr/ieWt+ig/RFEwsJU/XdDrXrsIISMNW9/ZIM6KpvKgXKzWSiTSh2Nf1qjdEmT61GZALS2CFwR1HrqHni5PgbLD73eASBBSgB55jc10vP7hSkfA9NJDRX6x+rQJlXj0wavzbOgekgF0vdMlVw38T4tu4HvvB/w6XiKgTPqEuwqTm32gPlDQSJWp330XyS3OEvg29zuPcYRAR+4WpyroR3AvRhVCp/9Mr1M9Wo+t1zyyYr9Mwlomo1eAc4ktyp1ZbHYmZh2Q/oB/nmVumHCnmFjI73iWacUktyjEM7GUAXW7rEVh2nZhYSCFqWFiTaYhsYHzSWLrt96UJt9KhJiqP7yqt7loA0w1fNffI1uGHmcRo8Qtt7O1GS1OWeD9oPITHc3Bn0JMCRShZrGHFcKSkPU0i4IKwfReer6Drv1EeVDBT+8je3snkU2MjGOWyeDS6J2Ys+44cP5kDCuu4nwBVAta+uo03YLqSXBW+XHOHQO+XaAxCWQtm1YHPg++MImXGsx4GCO3AXpV8Zporjcc7nTqidgC5FibtzQp0dmllOrfLaT0h0u0X3CTyaEKfvsVL9R01uRRyPxG2CALeBSmUCwdgI0yv1dhfkPwwZsPBGzTHxaY/8Z8vsH0O5x6L0Cl4d03KlaC6h1CjSGHZjDrr0v5dReDMcbqzhx43c+5FU+ZHUbFLVngegTBtDz9BERdJylUHNkKjbmfDgAFlBSJFrHA85MY0+EyLorBEhi7TmJjH+N15H07iS3UmpK1GNSPogZxw7/MDLD2pIdZOHIUm+my/j9kF/BXgcRLicNzTCMWsn+QNMtL+y/TO+++I9B08hKJI8fwAue6Nv2H9L1pPLfAHV2pukqaWngfmLeORIgQ5R7T1p+OShnZbhVGBoRtQ6yZx/Pb8Otb1i8HohuflPb5Q0DyPxIxQvD+JPNq/iuBSGRb0YUDR3zlMEwiPD4UxLV3hFUGd/7+5XL3j8cPbJv1s5f8PQiyLZKch0AqD61tH/h7KaPNco32xYv3Rnc3UYT4OMmRf4rOou39UyX3vvQD4iz9awmkNktqa4Sk5s5FbUv/HoBaLejPnr33Ms/rUzFYdKocuIKiJAUUnIOQ61EEfAp0gShU+DGOjr+ekWTEmMnDJCYDD3kW8zQvyo/wKt5jpYBXUMQIg0UMKP1GWFcKXdchKNTzowmyklrQbEJug2ysFTqElHTsr8cDfOQ6yBxLKpT6hKjr9I4CR8izi6S+Sh4QIFce61kSz2vWC26CNqLYAc43w06cANt5RK3d9kp27WFILNWvwRxzAVt4+Ho/I+Tp7xfXHj0t+pGNNdlgdVHa2QjGSFQHJz6Q98qSNOC/XT65O/Etb+22VfHjGe5KcFMO5RJ+JLxkpu7o81ZzXA2zDdcId929ZUKWWYu2IgPVSLJbeH3+Qeuy/C0aHl7GSKHIPtB0ncwg4wk5wonp8dzjZDoZjmS5t4a+6gCZ7FeEiv8Y9/MaXoZq03d11wWuSuJZgHvdZWOv6sAbyWuo/vxG/Cgt3Hd9tY+6d8KDra61ucVaRuiFgBAmVV2blXHDySi/IGpnSdAhWpBtb5JL56So9qNCj23GQK0Cw/+/Gq4d4oTWHgiPb2s8YKBVQ6H5K4esPZ5mSlLURZKjfNpcSDrJy8RhtK52T41r/UpcOJzeZw0BSBkw7YFq1kzILRE1i4/Lr0nr9LxlT3zDf9C2DhatnN2H8/NZETWw8NLHvrbyRluMCr69wOpRRBampoghRonx5eJrvTlr1J12JJKv/fKqLj4z+WeUbdV4iFuzNAYywF1gF2h6w3bg2XEmtb9G/VlxlUIdc7oDJdkfJaFijVdarww4y7xBdVKa2yl/MSM/+zIy58fUTks0/l8q/11ZWwoagJILsvtF6Xm5DQnZUjFCSLvp4mjE0j7bKZKKX79qNcVbPGaeRyNln93Eo+HXDm3j0kEzON8mvBTTchVGpejNFLfgj/tDm8rcdMlEveMIIsaPFWIl+UHOHZ+f2CWhipes5HUnB9R/Gwprf3gizbHL/1r5HuimI3WMpH9sAQHpxuIoKK9wZ91in0/o8Jmo8kK7mLJCdUXZEnVbnSvKqnY7VGuqMrm2rSzNWfnpj/5JEl7jX/PQuT/GdFOv8wyXbxC7uHc31Xt2X7oUDGx5nWGxWxjFNUe/HAwg6xiGySF+C7dZmvcRxRISXbF7tQFd0BDuBDuA4DEsJai4F4LY0xdvDfzZcxFAJ6ehPLp9m++319E9OuaCbOeOLBgvLRawFfeHeuqh9bU1tXrWVlMLpAoFjnMrmFTauSA0R8JRfq3e/QFFqxl7Tt2JotH2/51tgOjoZ0h5Mvnkk/7x/JVpQIYoq6SCI1DAzxKaVBZridtiDyNNWmBkC7qn83m5/Kyy+lZb89WdAbGTwvR7U7gAK1Pf9rvHSsZKB0kcEq31pROapLZycjfRoR7kd9jbba3Wv3wKjOP73ynl5tiWeEgB+1koFwKwyNCRXlsZCMR2CEJN0FmIl80DY67WB1Nl4BWh7RXrcETkXtBY0AZkUAUuSGeS5d51wBBYgFFIQWtBMxDFbFNgNl/b+7T4TWvYVfktGm+b7LB8aKPeU5Qrb5l/x+adeWFLUU7v2RjK34ElmJxBV7BqOJoHSlck9FlAQm5PYtHMk+sKrxjueqeIdAFaSn4cohV4KJki6Q9a5L50k8etAIuN823CK17pXQaBCSK9mOLAxV2yK1QkluUkfUxKUuSsFB5QSiU9j/bSf370/2ocZndFlxZ6r+1SCp1Fi4J+O0k1mXUEadmShBhsO2p9VBeqEf6s7XRbVQSLay8rM+Fkf7kNy10qDudYKGZMo/KoUVQeqBrR3t7ebWWiiDap/tLPhT0prSpTnjByQnd1+Vg6sOjFQRLG1MzA7txvHpLClRMeEqcdjcmyrj5L8v8Ha4uhuakWYmlZRIc6TzOiWC9qEVODzqk3Hqzesua6Go0PDUWngKkWXcLUYxqA4OioUU2UpLthhcctVdIqowG3g9g8OljL5m+YHq8ue4cdSZaRHNhC0rKwlxCN6IxlEw6Mlq16/JJeolNhZv5+zpSZGRyz0HTqRJWl1FEtzlGRmNgOTo16hMB7e3uKmdlMoz1O6zM2C3CVfMecBm2HXI60wn2u5JuEJ6K3tED8kL+Rl2OqAqwMyBCfRlkl1bJinUHljYIwPImXlzCof6C3sk/Nv5Hq2180qtnPbRz9ZWB/wQQ4Tx4yPYKNTjLHURfVNTW99uD8lZukTpFdYwm3fX9WHislRx1c0o5FTcAfALuN6Wxtyhiqu3EUB4sRDGxbUxZucQ/LAk1qgzxUPGZtM+cWpymg9dAzrq6ivLsLqGXEhn1lU8kjIWm/N9IGecqzgvdlyLimiumFKpY93GQ19KQO2ySpJwte04vctaK7SRitKK3ruq9dtvy9+0XVKUk/vziMVFfZeUYLBQ2kYbZwnDHkdNXG3NmR4AhZA21VxxQhTWI31nH1A0FaGVHHt+oRWToC10WXKSO2dvcA8PBzgjpuxiPGr+CxDk4LFxcWa4T6jW0//LBU18tNe135OAcr6GHDmFpYYgTYhbcNOjLt23hmIWzO/FwUEAfJ/89ZOQydyQZ05+sXe8uXNR2q8czOemrrZguI5Y2Haj2LPdcYo12wvT1y3CnJfHw+VOC4jLQjfNwmqHrirRqxuQtyMuQMVKBl4r0W9i8YpV4hVZhEojGl+FHBPYdPydngtRiJEBBEYDt5ysS46039o75wAND9UdIg/FaScqy0SGNfNPCLI8O6subYSqu08lqD8GlzlKl+ULUDun/VYR+G2RxlKO6HdijXdd4ZY63rIeFLaFOziaVhYV0cCdGG+nrDMr5kd7tAzFiYFrK5Zh6iG6ESg8wpupS4FNjB5cEWHlZy3Ni6mOvQnfS7lJ14AqIhmc1pAYOLP/JYLm4CoMz94lFcl33Un5XFq5p823t7bUY+ShK/OADy4ZMcp3FKPeUeKUvLImVIdE9Oz5d0z+EBzey33EgsT+QRNaP1GloNNJAW4hdMjplDyvCK3U85Z7/nRsofRhAJhMylPGbgfKPJoDMQEINrBnRnO4b8B41FZBkM9189Zn7KVet3XDkGHCUrfAtU3cq+3KLyz4D45NRnUjr6T+HV3AWGuoYCLm780wBLr9k5TR/f3MfpB/4QST65Ff9Lp21obm1p6DyqyTMlXYIkaIWjbt3R09tjpoMgfSq5u5t7e5tBnENBTwhxtbrpZ2llbmGQGdVXBJlgoTn5WwcPjY8P7Vx4IrVhNzwWphcrePm7IDOHh8fGhzpfNAhOvWda4elnknabAXNkNZVH0qPPD2xl2aIYgu0B6BeeUSMrSdcLsLYOPB+lr8ZglaKLtiBQsPdZOfAdy+/+b6qdr/M+6yERT58i5wILKaqUmlpI+/5SoSfpCDj2v/N7/1wF8HQVnx0i4gMijgZuZ8S8F9ZuZ9meGiEYsRan13YAhC+aq/Odu1o+eN5wpxepdTOxf13CXVL7Ql2xaCJDTWGD0cgEodVfPTO8EUs27ZbrPtCK+WLtBzrd119Kvn5C+0/AkW4/2MJmaf95dZ1+4AofaJ5V0o6HiW4R6tEt1a8BKOMhElkLOmP/O+P/Bkzjikr3U0WD49TfitVfwRAVY1uAH6Yzf9sjGIKyELYqS1tyhohPEHEk+MjjLHZajfSoK56l45OF8o/l/ByDZeUiVb5/vUApW6FzJm20+4ZEymEI1BURK8iomS0GbdFLJ/4PNqDLwwKewx3vVZETVXHMTtUqE9W2J58aix82XE4JXbWtpVHJllfO/wu1gT4DA7ZXdpqfCNJ1yh5OM7/58VBS8DEeOdq2uH69p7CPjxRU9Z+KCM6cXi+z9/XaGQolsFO0/fZHA40GYz5XTB7/WEZbSmdG3Em9h7C8y8pEr7r/nhNodJP+yZnZmcm0Pg33skPfy6a3z4CSyWdZJIPQMnGNY/X3v8MLrMFUe4hcgJ1jquAUP6Uk1bPnNLyKNheqx9ycU8C9GtFfN6ynVI6EUlvtr1OzPAGquKCq7p3jJJxPO8+OkI3WHO83MOWmm6Ss9dgr2Wj2x2pRf3cV7ZATY4Njnmr7cLcrrvRgj62Z+sc8eMwh0fvZ00DjYTAEjBaMhhgQBAxeR9ISf7woOnbEF/Emx3rv029vyJloXGHpaLNQA9msnYSB1NgoKGRenIWMre1mh0EnvexzNFCZ5ZzlBPGOV3bUc9Ado14p22b35yURFIYIU6Gi9uoPxnRygmzjoH7el3Bon4Wv3n6MdalvXU6N80B6YYqUS/JlUjbjqcf/fFgoHhwcPhnhPT0f4gKDZJR1vxEU7HMvsRDaa5zp2b23t6NOSeKmSujeztU9U6XYJqt1DxZFIa9HjWIQL5fy9blUVIQRF3MEWT1xByhXo13SV8pvrPJLx/3xZvRE4ko8V4mbLikU+O7Ebm5UmZxGx5a12fU4HasDpG0amdPrgUyyTzkGPeHqjeXHzTYt/UYhDwKv1bS+gpc5Iju7UWz5PNj+OUjKYAyO/oF+QE/OeqX+iV/b3Fk4KhV/xfPaoQ6boyNQpC+cxD1MIgYEMmGVf0dPmIp86o6Iv1NWoi+fsjLRw8NDI0UkR06j41NTZSrZnlBmcSgWLedqTsDYkXeYbUFue8Ns13ujMju2Oh2QiRQ9oz04KOWHvaOlucXM0TO3XyqUb8udq2X8+9XzSkXhsTQGuqkykajU8mAgSmYK2SS6WYaZDIyW95V2XjxVwkMyYliYVlGhXIew8gzlmoeIO0q7vIO+gc8IaO38CsgDVqtvxVJJRNN46iWr6Ed2pSjHjMWOv76s/iXQH/nZgQ6pak++6BeeitGS3bhnPlxW95IkWA9Iv4ZPtsafPXXzRrlSF9kahmyPm8Lt2yN6NA93XIaGNHwSpJgMxtuJx0Hf5MgkMEhuCFFv4L2SeEnsXX/NCi/2jNruzfRletn8XtLPDBrtXHm7bJ3ikS581ZhSry7Uvw62jXD5Ij6+7ce8NHJFVTPQNDpNw8r+agqIyBWwRZyQr86ufLuB3bB2uotWaDukNWEwKNMlZI899chQV9fQoCf5IhEzvmws/MSHF00Xe3A6gVja9ktBTnkqqbULbxTemxoqRYmonEqMaTHuc/P33X/cj6FUMgi+sR0SE4b0WnPHtKhEhJkiOLGMzMwsninhjezYIeciKUjcF+qpKdtKV0uvuLIls0sWilWAgBDBVXBzj0HHqGLX5BTez/9uWv689Pxj/9yLIirpSFzVSbpLIpVS0RqUogne3+LWaFNvf14vsigJGfYBBAZBACCUABoAkfg6Yc4rAgUV8wVqFFCtptq4Rt7nFyw7tYv5UM3PUI1t3OMSKYCBGkU1tstGtY0/B2dUUEEFFXRjkuUyDdQooBrbHW7L1yMPz0NUxaZ8Q6u1Rmt1D/ss0jgYHQCLdVbFVqu1hj22uHO3nlrVTBNarTXs8WVqr71xjkog1qxVWq01Wsse69UqrdYa9thoZ3Wc6ntqVPXHtIcEs/bVoH1rvzpOxcYFYP5pxJzmDBe5VP+lBnYa+utV8GuCXZ7lc/6ot0PKNXgs6LemsRZkfmiyVKihll3sYZ8d3K65O+W4+PVxq1Xmdf9sTFA6Fnq+mZVk6upAyLnL/dSwotVao7W6h32fmG9kiVY315REen16m1pVQo5Waw173HFUW2arblbFonxDq7WGPe7mrvgSuiVS41Pv7dlugv7HPChxp+D4YxVvodLL22puzVS/tqtxy79phwJzfnZ7uJadkZ/TPb6GkvOxZ+7YL92ucYiCfuX49e5QQwZgAfylaxeUtNuSLyCI3Y3kb2MRfgh6XiVWTIfw660Q3oSm3Fap+QLClbWhptxWx+atAbGTPtn3KCQnHbI9PL7qd/juM7V8YA5Fvcqw7eDQNWwT37EB8DuM3/xquFF0X0bm7+i2uLDtFv4GrSwv1lXZRH9amRUgmdJO4OgadT0nIt/JyrklRcd5uTo2at3nYylXR6/tDPAaz7AjEiF89i21mKP1CMf+5EaF4NJfDJU3a4cKQq25sRBmzeKVEC/YZIQJsj62WCAIC3q38v7JWAi26938XDwjwd4vlZyOg4rMSDt26c+RD63crWarXm+spPFad/XK/3KhGnSff1cPa5vOJIDeNnUfbA3RMkWeZPTRyI47yb0VG3J7jhZfzPotfv09RpO7HVpzeCj199Y1CzhCy3DE1X0NcWCGbHO2yS5Aj//t6HiPTNZ4crLZlsC29ibxjrg+7/u441hQeSRypRMQ8HW+VOPJr2vDerHG8Xdda/UwoPFteetSH8nydCfdGc3HAbBjSAftQl+BMWAm52sB7EfYdTg0/w//3JkEGaH3A+814iKSl3SLXE/+P/lmIcqKF7YUXkPZRDVTX6XO0zbS9fRrjP8xm1kC1hH2ICfOaeVMcNZyHuY8yznD7eBe5f7M28Tn8Q18Lz/L7+P/K/he2C9cLtwuPCicE14UfilaJgbElyUdkiHJB5IfpAVZ9iVK3k7St2RDspvyDaAEfB48B14Gb4B/QW0KgWKfMqJsVI6qkPVX1f9pXtY+rPPo9ukP698yPG8M+V+lOhOqvdd0wPSZ2Wfebb5g/ttitWy33IA1cC18HP7Wut2WtmN6zzjGnUHnD65H3XXu/4o2FGuL3yxNl33iWezFenf5Mr7v/Q8EygMfBr4P3ArSgoqgKxgLNgfHg3cFdwSPBb8IoUOq0IrQ5bAzfE8kF+VG76t4seKzmD7WG9sXx8Un4i8l1In+xIlkRfLjymcrv6rSVJVUVVV9UR2ovjtlTr2d+ietSD+W/iYjyupzshpbza+12FpH7Vjtx3W6uqq6/9WL6++qP99ga3ik4YPGNU3OJmdztMXSSm4Ttn3TfqiD2rERkQtZOtd33Ztfkn+f8z2u+6XuL3su9xncDfU900/vH+zf1n9hwDqwjbfBX4OuwenBS0NLhk3D10ceHh0aY439b3xmQjTx3sS3E/9N0iYVk+7JxOSnUzunXdOJ6fbpfldctahq0e8zhBnxDDwTmmlYrJ9VzL43++2S55b2L12+dPvSg0vnljUuL1oBr7i24q871O+4sDKyiuewqh8BoJkz1X9m/tsUeX8BgNUBTi+gr+sCAKzaxKEXRtZbctY8g/H176YN8cfpWcRwpWYuZePzr2f5vn4jSPQ6KJ+v/ntLGzKifb4Q9C0BM32QtNVbxYpbzk24fPrzFL6UgWL7x0Ii8ycts6i5bzXCwlvphs3X5ysy6zRn+F1yb0/TAqiYS1j/+vAP1A9fHC++Aco7v+XYf+y8bfxNvWN6WHRWdR+CgKq3Co+f1ETnvgXN8q+Xipr+sDXRbP4bq1liP956gSMqify/WzY3ShZkOAuv3u+maPxXXOCM6BXvmNDXwXqjf6yEKCUSBXOb2NbzRzy3jruaxgC5dLEH/1CLpFNpMCnvtOxLb0yI5MJMMiGqYuGPa/Znrqpm9p8426Y0iOC7ByU7w0tBQL/fEsNnHhx4/WmGMdqjWN8qhfiHW8p42O37KHL+CqCqMZ95oCiSRFdTJLg6IsGt/Nen6O5WP8YWnBUlzXj0OjsmJsBfgiigAv2w3fO3J0ho/OpDJlSXWb/4pHRz0g5aSsEohwYI/f788ET5XgK3+v8Hrgk+j9GixRM8zarXe/aCOMSra766/me3FHIKr8LOxkJ7Utm6beb+joKGs1hlkkLx+h8dnG2nOrPRdglEWC4qMm5Vb3blWDUMFmdM725hmwlkizV/sQ70KPb5hVBEarU81SIVf37RABJfpAbsmBzRBeiRUF1d86sGRZ9oy0xOIQq8clb8rq5SpSOZdFFyAJu9s3FSKWLufzlHu4dM+8pXZvDgjZuH3GY+GNgYDj5Mdo4IeH6JBSF3HwcfPCZN1MHXo9feDJgxjos3x522DCoAx8CCqJkRMjM9/zS1e0oCCotWFpYLbk3rjuTUQ9zepxYOdNAB6TMMQVS4cbTIOZTfjHjmXxaQDEexnHuyLWAZPFK1Rql365kdv+sJlZEYTpi4U3QfbbaCeWmpZ2ZKNYVE+gV/qzhWN/iSPQ6ZiTshzciDgAyIkfyEAhI6EPXyBoRV78/8yuhACwKSGKaaZzdQW0Ujb39JZhMSxQ5lTkjJzrpphlTkP78BJVsimbtz/ufhgGkHndxI2Q6QfJDRrzyfZlwZ3k8oFiHEjuoLx3pUCbpEMmTMZzgvACyN8RCUFytmh/rwq2jA5ZaAn7B3e7FQ3yOatuvUVRTFVyA/L144n+p3AwiOEj++w2J4fggblRTdfDix60vBtqL6+j/UDZpTFYi7vTbtSrWhnKSW9Elg4xoNqyDRXEciyZI375F8aSQAWwtIubyn81KtEzsXYSZyzp3bvZvdY9IJjFr+9cNeBZKRpbdJR/tmoBeNh42MaeGU75q0zWr21qsHCunceX+pgDQe3Bz1TBgtxP9xcHheZqpjaVryoHFiKSGAwoWlMQP4W6vsH3vfQIKSJ7RQWCTAdNfreF2AgoCW0u4/7F2gAiYiMAFW7sakE23ki7/qS5Zu/zT3p6KwF9I0URenc29GF5xnR26YxEMJxx9cGngLEU1ZxtORMH5wYWVjKeS9hv+Px8Pun/YWkiL5zj9C4fDc//EQNjeDMqIXkqNPT9ClLJ8e/RAxxhMsnZsaIJBAoY0hMK6zHx8W6HT32a8ggGdZZKw0rAAxgCmglQ0YwOnooVNuehgmwGiwdQvAeY54+ZOZ97PRaS8IKW3OC8jbNyijzhFAzdG0psHg6kLyzjIgRaaGoGrubTfl49wTVBHlMvChYvU/Ycz73w9z/+jxsLfEZnPEbhl1kgCqnoscR6yNkrmPYna984rLu57VSSrduSFH916UGua1rzmd0RFeh4/0wn6f4SI9F1IpoeLme0kcyuwPqcxkX7hS0mvo9hsxNKkHlhU8gZ4BD2a6lcZ225bYhvMGR/Ipd8Pf5UcQEIEoMAgJZE5k09VViIautThi0lvLAQeOb1csEvjWzcK+zQlmZkQiMo4gP5mxUQseBMLL9HcHKvIrWtzJqwlbuZwbC9rFMzydab7OcYVF5PTdVUa909SGrrkuedI+JRFQvNezxF58mvoZp+az7r74XSpiAeWeZ+mKJMIOuhlYCIwLEbV16JaeX2w++bOu3k1Z8tY1SxuBUIzjOBTfsmzT7LojkrBGeyjQ/SgHE1cIi2uFG2fmL5uVYDLHrpSJUUTaX2kvZ8zocf71lYpjlawMhjL9PLHAmTAvcbbxD8F/uh+iUIanbGRYgVQCpZwyChRqAEMoR1jLc/d+4+ZG/FeQ6A8HSGXbPhvtvhzLIJxxLL5fSFjwZbT6ZhNF0UBXFTCYvcLu901O438D0mr0A8Vsi1AjJMmTXOdrh8oUa9FWHHv0Qp/IWCDo85F7CZeWYNFT9m31A2ZbGAt/nIBbGE4UG3LfCtjwlbroC1czh+/zvaUq1/Ph1AKDhoHpfcale6Te1+iLHxgma9R8Y/m7CYJBwfaDNIgt0U7jCHmtwf3+35+ZhOKdorO95iGVzfYslIOE2+GFFsb7LpTdQUCTBXOHU9bNETQKgb0EyV6rIh7h9dINA+xjr0e7zCyl60rEFtkWbOE9OChj+Hmcd953kgA+rdq83TMjkZJ1Haasv2DbftLN+NLcayHnKxKodvIa+fjDxLW41mAWMlpWwyAI7djMiS2SlCCIaGKaAgaMZ6XMcwUsnwbH8Y/yn047ryUPoUu9ZLu+o2eCFTTZUSAURqK6J7+SALWaORZbmZTBIyK3Xd1aSrIUmDoXjXFvxUDXNJitaVNV2fhIMeC2jNeSGYaOxtO0zh/pp+0ZWdq/P6sevXkX4vddLQMwwEt3VM04dOyW9IOUKjLg+rX1dHr6smpmwY1llWm0R83/cX8REhshEkk+SOyLt5Q9DXhLK8Z4yE1KJvTYzuXaOgxl9se6ufHx0Ri3pt81tGihWrJ82uPXcbHOfnrK+tVPflHvja8ivraemTHQw+p+KQOkWK6fhdr+77VpyuNA4/67jlOoQJKPUGXsbkW8u7wU1zbwnwNzgn4Rl8upa+oR9/s8InLiM0yBuVq7jdCIgDhkrXH0nr4zY45Q+guj4piWmOPcWgIwdQqyEXrcDK4j/qG12gv9czaLdWwemLE6IgSJH1m7GnZo+TytcQoYP7zzZk9favrZeVn+0ca90zBi7m8lJQBTJ+HQBN+rCsnKAR+SC3OfRwAXY+tFjYwdkM3RTCYmXOW+i2PxDRhYbTljVtSC4yKyF3kAwnL+RDqO7+mm7QUmsW/WmAk5cl7fglP8jQUS0YB0bdZ77kxKhpd0q1zYM+vykIvY5G1ZlmTBmmrZbPt4er7LyYpKYhbXS3g5dCqBYOriAwmW2x6QSdtuWWYH9q5XmxHgzXWhYZ84xi79QrPU6aP04iSDHjVKa/NHdItq4GfLiFSEaePRNx5JFShSOr4OhjNVeUb3RpY8V/8XkGbh859JAqIUdPw4okqOugXn5x9cBJ6/OT+CYqdfbl8pBoXEY3+YNDlOL4gpxbXilrHjJWQaJKoojP0Bm9kWKR7lvc4npp5dgM2iuqoD0VjnuD3kQ9pdvkKD0u3ES3ArJu8l+EMiLXl++uN1aniNr1tTW5tDkgC2mSrgV1/h0uA6J1iHWAYTtjbnTdcuq+xytNF88xWEc0xf4ovHwmrh2Gb+rGyebHXYINLwIrfuDjQZaXTOy/v98UrRkCVvvEmofBPMpXWsvLMksxM1WkYGe+kfqGG0OzP97vcpyZzFgWQx/Xicmq7cukoSeIHgw5FvpME2JxMju+L8xwNuxLJReyHEPcDyOVVKd+IHx/CJWf3XX3xu02Lgx+AmOWTre38+/xRahgL1fwiz9+YZ00/6k/n6JPAAfjvmTxFx56x9XMwXnv8FW3ISufsMXK9V0G78YjIN7Mbo4vrm5vYmyzr2ExdLkwNAXEzSlXdX3u7kXK7sAh8THdmtO8Lw2UM8NyOyu43oQS+Uqz//U/k/Mndmp3F+anb91BBuaofHHZnGwTCYXWXmYWEkehiukxabI+R+ha0My5tlZc3J1eso3YMiK0XJ2L75rovF1siSNlCPOg5m/u6Cd12X2fA9jJhCWsqm5v2hZTqr7UUuCAXgLaHjwp+pBmuTT7PYZej4W6eEzb0qJtSHp5Xbl+zr71SfrJN9FTwQQF+CsqMgDH/hbd6ubSH/hKKPN1xzxeyXp3Irw7QelsCztS6PlJCFmffnBh78i72dcZFoIUO2yfujB2hIOzHzb8emCqbVB3NVYm5EDy2sqVI2+14qsJbqdFytxw4/LJYSDAEt5xCVveMCUeesRFdzOzjOUnCAD+bgf5OZ98PnjH8xNPx82Ou4yY5le0Fr/pEFvzo3ztYtk7zObcSR7/h/EzBzEhah1fmfnRghPTyz8SYJ2HHvh44VcmAq6V40UTPXrEB0t7SNOWf2577CnBJPprNtB/fBX52RxzNVoOAQ9wJ5P6R7vmIwGOWl3cu84/iJWENdc6eJizP/eY1BaCJQZdz1rl9g0MiELEJK/0DAwfjm9I3dDddugkiZpzeQsyYm45Lka69l738KVBojwsIDcGFC+HKXXMDC6udqQcg11x/dGUMQjOEkvpLWEasOjAmkZ2fDg0AVBHmWoWck52vWsaPtE82gQiTe5reZIRN+dYa3ZXtkyHtLhALTgsNfeX6huzYJFCwXEVW9NWh9BnculA+F7Kp850Y1Fy2ZaZtnIJIP4kKWSrWjAtiiFV1M8cauZHcZzS4WOOdVh9xIW1j/5qItfQlq3fNc+/6Gdoj44CdHkXFVz34WDZCo6mdjY9tcW/nEutd5ZX/mmmowp5l6itlc5Tt4U7D3m9l1J7BDhwrnvnIufrR0+7a2BAKbEsFGkWYzLFWCo5p/9wjD4eStRTyYH+KQnYikiCyZscnyMIzi1jZm2OV0snW2GVfkbN/SLden1xpkgHnJAUpgv62Aseh6Pk/m3/VHbRnt7alqea6jCxRZs1JBz1DbNNQNDg+5usFEDsTq3kauCIJkea427CqINkB9yzAKAyxs/GNeMIwDU3cv2XthD+dCQc3vdMJM3FNXQasIjmGVKMY/cvYALlpQ7GaxCHpEpaGlwbzUc97C4xkPBwKPHdIRT2JltFAygkGrvMpqIyOdbJNEpeIQtQ1Wa1lNqnB5/BUkkHtMKhtLK3470SpYpFSuN2jaCkcexgfoA4PX89onpKFk00fEG6BZ4SeKHcrIk9VyHh+eXQh4qFLZMte1MD6Gd9ki+9EXJex/zvjj7U/fhJ+/wSyy++WDiSLRRrr38GyUMW2+4VhN1CNNCQ23ieTuqlaO9hCzunozSS5+TTKD9qL+zVEcjWGXUw/3UBTpuT3WAOa7qt+qlBvQJkYJLo1iyR5eK0Bp43O+II/V2RoMiHDpBhzV//+iA6uDd54pFv/eUBlE4scPPtqlZ9A735qfc/QcqRI8xZQIa41RyrtW2lZ47SZf7fps7wIH6+YWSQS/WefiautZFZ/eBea+h04py5OPMAHkHZHA5pm4nfZ7RoFjKsRXd1NIJCr9/NSFMGSK7XemR/Of7Ys8r1UJ2fHj/sBOtjN2SInW+YNffM/LiuKWobtl4ZWr0JoXE+GJ5q2YwFzWAozoGGdw8FtShbkW1BFsy0klnuqQ2rz29twIgc6odXUBJSlKDMaHTjo9+egr/pVld4zKNlCC153LX5yxfUqGzbu/kASZuliskGayywIda1VjJpQFVIZ241RSBBon0ZZ/EHA4AMf2R7gxCa0URdVUKp/mQb9POHr/rIm+5Pnbh5MlkU2FUNjH5838BqndWSIJkqLUMOv1B5MucY8j4Mf/sY4w1pS2caNiiXMy4wlZL+bzdVazXdvygPke4h33NEitsFiG4xIghW8zI0xYG1/bSykg1tIaCxRpKyxcVcFM/9SLChSsB3tniuZkc26YI7OsYW69kRxkbs4DuQMdVdQxhTa8D9PRryaBoleFGR8wcJ6hQ2EUfJoByTGY/9HxeDTKDPRZYErddanwtmtEgH5bmZ4JQkYQnNPbVsO2tbGfacDgFKbvDJjt080Ts7tWjcP1R9FIMJgQiigvSzbg6KE//RUaMEutjg+SzWwjGf6vlVDx0xd+SXVmg7XrtYuscM8prcRf5UC6EEPAwNkcNf7vl54nHMe0cm+pwUJK9Q87s5dsbauz/xR7iAgJJDwAz/gh/EMlSpAFdYIfF624Kyx4/UFs2+yOr/Uul0HQ36tYJEU5/UHezT6+Um8n6B5amhxggvGUogVVfmHLpOFwerXDvW6xp1RKndUy6l27ax+gxjoNAdz5TiagtTFqrAv1XLlJ0EeK4Si3kbGEsGBOq32C+nqPlisT4cOJylglyjV6JYkbDqJvUmXBufaFBIgXUxQJk2LzMPbZ2qb/g08/ZaSoqp3Oz9nhJ4aa3VHuc8Ps2u/f11+965kyWcuIzLNf583OuWsElJH5lsWeIoK8smLIBu+PGzTCmHAxnic2xxJICT6pbUbdr+8zksbkmmJIzcwfH9W/XEMhCWovVsBcWNrAZVQ/ZSsm7Ac+TN6YVnYZXtbYfKhOl9fQEw1JUfWJxAk0Kxtu4G9NGiisdfc3KjB3mgC8y4NWWwMK7YtVvBWxT+AZRqIxtDmWLXQLg0Nye6qmm8VvevGNvqVKYf/73RSEfaPVHyx9PQmWWap+F9OXEvLI0l6A72b8oyRyBPVrNwVJ0pPZOdtf/PRNvz+CSiUccJz82eRBNPQosVcLhNbKNZM47jH1UO/LlJP6FMs+SqcRnGK1OmMPkm69AyylhOdBVxEQCpmxYCudKu/yLA4RRNL4e3qMbyBrRVsxQfwPE7tcvj/dVXTdgIxn3iXkvcT8zX6OdkG3FE24F2XkyVSfWmWa19OYc1iYS2oc9zbTiR501T5o9EeNxt0z3nbj5rwRcT793lH7em+RmCm/zpkd4JThO0yEK72+CgyLUj1uzelwRPIXjlsJ8fJjCLnDITfTZ13tXkw7UePmJjgJYaVa539GeQ8hvzX0GeRHxo9DFjmcJF+A/i+0FQBeQyhxyTo6MLP9UrDtFw9GZblqs93w7OoyuhfAfLscsOpjpyyq5W180DFAEFUWenXO0AdGDEAS5GwrWAeOjAoIv6AfCMbYvrmzE1rxxCl+yJz1j0J4Kb3yy+/PwpCInyaf/7cmxWjPg37knrxgnCfAfC/Okrque3eQTiNsTR7+o4Mmccz6zZ1dX5jIdy0dQeAJlv4/eM4nYgjsvUGxHzMMcKogyZNti9tpJjiBFh/rS6jge5hD/zkorQw+90f/X1q24zfa/dE68v+k4zkaA/ODNVZS4A5y1fpxngAxvRTs2HXd5Kuzsytebqv7/b5ExLmAPXz60+/J4s9bBGEr+FFgZpD6PmLWnJ/ZqN2s120IT4k8WLdTY2irl5vQ+wG0Sta0qCl4dRWFF+rUizynDxj6i5Omhj3bn+H5yVXfTiENJfDfEQqLcp3vX2c4XumH9329OpKFjm8Cx+9wHGvE0Z1p3B0tsR0bPqxiNMsLbPAW1ZKE7VT175CY+v/bSaythR0jWMQYnX3PS9SKjDjtdVK1H/lf1DP7OCjssk9SsPynVet+p62vlzEEB56llHB2+8eyWN5aRC4tdJ2CVODUjVwuW6fI324FXtgZGTncu1dcFfMFNuCLw+qHC7OpLew9H4qbk/8n+8hsGMrZsdmV00zlT73zu+TxmPCDvI+l3ftIhz/KIgyKlfdVPl35KPAjfOrHn4DMjjharqCAFbCuVnhweS8xK9OvHQi939xk2xLdQriQcV5a7ZKt+XZJmT1SvpegTcPZfHdKpjL9w4RSMibMsxBGJJIp1nZVVPfec6qFG7hUWCh7NzLLJXdw+ggDgH7h3fD/xYl7RpclL4ldvmNbXKMRed8qfXKs2sxuCcSHOvEAZQvZYGKZ+UEiCnxU2IYkT505CUWhf3ZOxCxRyJVDJl5QuWoxeFqkc9NaJ7dm/MYeld5hDkUqFctEeEFuK75snikyeMte3vJ73G5UjmXgSOEsYO5d5KESpPDtUhtMmB8LLYk6CsDyUb6uUT67ne8/jruSl3l0cyRQCsaU3umfF50ws3h08+OfL/GVq4dHuXUY9TheFWJrRz4UZ/PdvnvnDK6mGLbekDqbFplq+iDlqAZTwNxZxtvX8Zj13COWCdMYyTxfSuZzxFL0o/TCsVSrYG0dF6Q2imVy6H8L0KgRSWjZRfyFr6k1I+ohqS4N94icqZLV6UtdlehUo3ivNpupFnTBkJaE/02vNqRtxSPnO7cEIPPKp0LydSJ6WMP2mPYLhiIZ/q/Pnga3CAvPijTWP04SrCiuEHf2U3J/sD86TrPSbs8lce6voWEvSNdYSjzH4YJ/8QMXSihyvc0flfwqIfm+Yy5Dy0mkiHLP/+G954ZWBJnYFNC6lN0nktovbu5kOgwNtXJKYdBt7mxp42WNiPcIArBN1/CiogwR97fd6lDpf96zNOwrvx/mdue+uRf2qnSNpcap+d0ScfHfXVjaEa8wlfQGVukM+x4is5Enxv5wTiwymrnbp/oJDvIxyP62przShfa9Yf+KA3nw5KdlQ4wa6h89rFo4YNH0VHnj5eK1ctnK8JnIiCrErDxNtHwhZFzuvwxaTGuZzC3TdlxPC173Q7NH+qUl+mH/ufep4p6qWVWIoHnri0WKksohKB6saAYTLpeOlmimmG4hfrc7O5K1OthgHwCbrtR9VecBOadojvLhQ2vks8RMxHCcwGARmjQXe1e1zApYHTJYmGEBc8b6yNjN1DhL0HeGeglrlh66ksDQxnJPAin7ZihT2tjCwYwhjVpp6kFSyWcvu4f8lYfEaiCdgFnNsOCPmc9Xsequompax5iI5pLTHrbOMJFeXbids3RgF86KF4FlslcAbBrJQysJBQbRuVF+I5K00Xjc/DfHC4IkSyBYxLu+7gxlbcNC6LGOUCVsTtGYVTl7NXndwY5SFpX+CRTAAw9cBE4f1x7XPh73B3SwyLFbQnomMeqLmSdaZ2Kz6LiGNwnJU7u4Bm2u3ghOuHi+QLDy7TLHVsAq8nGzqht082JcxvYrKp1piN0VIxKXvCjScEA8PYdhKS7joEQL+0yi5a3wGoGjPAw/rnsDYUZclGMDODjLbuOLFdvZF1o21YKx+dHlpqOWvmAnIGcUb6uT1LkwHHZiS+Zh9oMRg3a6WZb0VoKPcELMjURVYcv1S5+BtV6jmablpVv0KuSdi+pIZ6CzU9N2Ktnd4c1w6Iy4ZNmYDVPgNnNrEe5ZLQWdiBwUDw0HCo8tC3DNrQTOGJu3dZuztxueTJRRNdOw4eNyubAOtmJN0p0FrQZoSxHFY7zid3rGq8dNlYcdm715aqOc41hAS8hQGn/ujYNxy8mHaJSULOBsFrpQWxCgH3AIXOaqdhjEUdL6p9ygbx4nyb1xda9/RQMSu3SWCAldO/ZtT7WtNs83moJMiW/tqPEkfQQsQCDBGnDUHRB+1oAK6XKxGc3zk4ta4MUyUcUn8HnTdKGKs7zqBH4OR2FfTBWrK6w4e+UvkJg7L5YWTATJcZU3V7GlYAXVl6KVmNSPjMdj1amJNLI+xdM4+j2YPI+KbkT/GIfhloL7rUjEGwxq/1RGn49nLgN6zIn1gwj+SXk94/XFcTJjtQk/B5RPvfYlasL9t0A2/PDSrJRxWIZXVfbfP4Vp5ldX5PEX3WTBlcn8F+Z8CfMh4nK2oBbQUv7eUBfY78HQ5IhrzTtRdaAID5e9RWB5pYunOUQkSYbpi5auFe/9AdEFmO54F45b8mipxlRdZLuYsBDRAWvrosK2WhY5dIBqr50L3RFep9PIJJjP3n8mSexs8hZBZisOVKOTPodDifgpUub6gmhqJHGIRF1Velv2WAzBodHpxfH2/eh7PZd+0GWLHI10NsQ1oEgoV5kgZNdxyP9ce0+6h5H3VaVyOR3v/wwavgoSif594/DTlwjgHyUu6aysyU9OTY3mGxs9WnWfegE/7LXfeNwqP2mnYz3ifUriAIAEDjjtUYf90F+SLczA/L51TVv6GzRlsueKl4xvPcyOpoQIZjjyDkpygjokQ25kIrqUzaY29aPNKLtwI8FpGiDcMg2SttxAdFC11GZ9+yCra4pD6EZeX8YgXbUt+6kXEXZt+b/fef3TYYQbjVqT46ELWxK5ChXemuxqLcRm+O31ln/puUpWB6aq4/3+iCDbWIvNA8ncuT9EaDmu2rr/f/Plgi1RfKB05dQm9Lj1tmp5YScXD8dFpshzk0YDFJ12dhp1kHo6T/uFGY7E/8W5nutO9dE+vhJrrdncPk/ZrHu0qKpjQSgiFhSPV/HKXsrtIwjyFGPY+V00PB4T1WXKbVUDLn7udjkLXetrp+AAuq6OxGwffgRxa9vV5Mixxn4dp70h5r4ukCgPJ9Z0B/8OZ6qi4MRTVEGuowkFEQO8LCmvPbBwFuA6zCZ7sq+JbT2DAdv1BnHdth/LDkBVptpDSceZpsL93nSDxpE4goyix028sorMaq+FVgCb9IPOu4/IY/qiEY1B5SooHtcFxCkv8wpOVQweSp/9JejYYJaoxSO31kPAtjDHODkluGxH8cHFDQ89rnTSAAzYp7tc4NTBjp/ppdutO3Cpr5zklou7jMz8TV8m+97BhhUlU2MTSpR0XeL82psHmF/dsPqX/PR6s7Bq1Kb5X7hVZGufozwzvBhOhvNa6x8erGvjaeQ7bhAhHr8BBhvMtaDgvTKqD1IKW/o78jfXCBzLKrCjW5AmyaU9zJSkzuFVqwRzE7BRzjPbly+3Zia/dOpPnFE3smWWphD/4cufu/6i2oL5pxy8q/p/j8sXObYpx2FNt+dMaDSfS/SjBZ8XH9npliReQr6Qze96lvzkdv+qAie30ydFej6fAl/2KDAhryMewlpwSAOEf93ubflD8oc9kiCUhYGnR5yEfIup1jxp4+EP7sBjsaNztw6rrnFAix80QTG9KOuX8ccQgC39LQkUYnNWy6e0Qrm8XCM264TqVpCESYzP86ntwMDkDO/qhUjiwLu2ePd2B/JzPSWzjmQlOCY11aAWQCyLGvRx/uNXv/yUOkyxZt7fyxlZwbAVcZjvHXiJ75c76wZXQ2G/CxNwmB35ZkdEhE4XRN69BN/MdMFq1GrE3Y68rVMUzXA0Rd3cQbmKqMcUmArLBl31VoXt+7uFZM1jF2+mFUtnIc4+Bo7MEOUs26zTC7B47uNAG6x2FzQ7jDgM9/aKgrRrxDl9cy9JY89FH/hUT420lwevuB4yWhK1YchhL3HSSbzaKYtntyik8rxRLR0fsNj/Nay9Mk1uGOAVFrbRP+Gxc2OFJOWSHrZuffgVvS7nMjvAx3DgeoVZ/uNSSt3SVjTxJVNvp1QOu7Hv4osX4SkO+ib8OfDyaHejuANPD/rdJAvF3XcrkbOpqGgt9/iE76fTXaHLYh9XxKaqWsg7YOQmfYPCkG0t0o9vw6WSUXpkMR8Zsqg00y+KJDHfJMtS8T65vKpAIxcYDXKM7HTjc1zLAr+ik1/DsG13bI4PBI2EJqbTJcXjizMQeLIXRJmPJ8H5eXT7SZnEsMrMisE1KPTMtaMikFUve0jTnKiQOYvtn58TpaPit0b52rL2wwnzJk1QrGMiQbg1DkyTveHl541CKkusOEyo/HDSEivRN880SCi24DsV+NaYx5BLbEjCiX606idwsAgW9StMsCXLDQwG6+pofFiXHRS61pzKs8qYx5TDJHxn4jXiEO/iqIoEJmQNP5hrtBIaOHRLlraOM0p9mWcokR0Iutacj+yYxTx2MN9df1xWpaTe7KiKxjljl5yveW3vZVGAbYBndz1VNUVvGAk31ynfJ+3waqNFBIjSUlBGp7OTrXFQ1SWbFZT2VMfZEHgnE4XCgW5RkBv3SGQMgLHZ7/ZthmNNHy5PKd1WwtTDkIJL+4D9HRpOPgp0LgKfWaiMgoxh+IUrXKIRs9TkrUVPUTp7H3jcWDcUbKM/4AZdYx76208Y+n0m7mdJLnl1eI8Bt7gvFZP3tLO3KQJcBNIOnQHw8T/5eizkOC7AdgcHPUqnOP8lldvlGdzVZ78866i74k/M6n5rxWx4wRQqMW42G89W8Hyfr+4oiWWLj1ejylhcChmKyWkOSwlE7XxZRi727UaZ4zHiheoOI+2gHxCY2rKEe7PnL7bM1ZSXTv7Sv7UDlV/sKZ/tyUF3qKrYJ0s6OITrcepC8vEJw17TyYQcdRspJGZLCZpJCiOJLKslw5tedzAiUNip9346YbsFtJdUVZlntzjc3+eEzV7kVemiq1aBj4utctkK1GB58wS+LQmV80eFXCMo6CvwatQcIzlgUOlKMy2UW3y/AHLtQ7Qh55IvbIjTdonXQAoC/OD0/ubsSrQMPDP19BFXWXjjy3jIE0xSnFwjjgEppZUMEvZC8fxJZHzCH1ps2DjegYBBDhuJwWCcdTbKbU4yjRDpQqfw5e8NDeEhY0FWFOwfPxVX5399tZBfngrRN2wIs2GBQ6cFtZaRVZGCEiWuCESlnaUL6ohPHuX6vGv9XUFmWnqqTGjlvItFbY3V/Nw8qh984BJvVmiJY8nfcpA7HAaEDl2yMjfuOIamWnEIMGlsdiVJ7FoTgszEdGty1opmGF6Ubq+eCioaa2yH3uuiy8WrH7gEfZM0bn63igfz8e6x2q+F54eNbCqRYRuyHXdIiptQxpJxxQJte2jGyUpkKd40kV9mECBjJG0q18ZI4x48sLSB2LgaBNna4Ene9sZkus1CiZIkMnj1/3YVgWVky0vRMWNonSx1Zaph8I+3C1Uz/XF2Pt/76Gg/zkD/2qsghXVt6dbreTuIniS1YlGe6AMT23TTu7BjImUodv9Cvs+QcIBqTcEkScF/yZm7D8USyp70XbNSrziadUaErpmwaIqq6YZR+fuCSBiEYJP2nQFl6r1BaopNjkhoco1zwk5mPj3sfHTm/ZPl4E6R+na9NnMkJCF1XS8nSrSn2aEdtBuRMDsq35AWLdmy1Ayc1p/m1ideUE6eZ94DyBKDu6kisSScajgU42GiWtmR5sQ4PYOp7wi/Jnt2T2HkcKBhKzyX4ko//ckvYg5UekNlJAPgWSQHL6bxnw8mz3JkUHR6eQGWZHZudLD42FErJ/ZkcsuBJUGuzHT4EgOwoJ7NlMUyWAl3XUw4a4bnTo7RgigW94sYx8uKwGKVTso8A/sYyQo2kneGpScDmaE4p++n2IYO154eC2X2oLVDSKW09hkRJkEHjxJLjEi7aocGkmT4cgAssY1Uai2MsIKsKTJWufeje2fsX387KEh1z36gZDjNJac7XvZx4PVDg/NZQqE/MXBRq0o4AxS51eFyJSp3U3SYHa3q2VAqIU2hPSeR050G2F6v6vIRjIDRU2zxQN1yXX6bs0G/YUrrWL5FsSHGSotHy+WreC2X1qw2rfotaqLUhbsjVvUPGYbFi1km5XUsBUd4gkHDjIR622JKnfNuH56e+rByutB+nH9zZQr/l2G7rpfuOMPr2gffDC7Pza7E2TxKn94bRZtSKPTC/i0G5u6mqpEuF+Os8UhjKJwIus+gYUqiKH6oyrYIvAjvaYRrNcMeHE9Dn7TtHbK4w5gP9oLpZhhatJPMg9sHKAyWs2U2TBCuNVMnK3Lr7BWG1MBynVT8qx9NI0vV2EU3ObCF3D+jgccYcLDST02mU19g6VVk9DZ2WHktMeaus2+vhVgWTBa9Ffx7tWhy3jeo9kvZgnvEnxhPp4XSGMPL5/1uYFka3cDF4FJlNbAMHV2xTMMwnXzH8OWlXbhaPsqZLTGaoQ4wPjUaTPy76WYqnU5R2SZ39Y1u/rMu8HJ5MTFP1M2BWC0E4MTH9M5Kw8/AgFBXbxYJu0GWHhiWqyvAW3zM8b+9d+mfwjy3V+0jeF02bHabcjS5ELkimYcxxSqXzuOAv1UDJIXv+OwOC2HxZDmfGlYPOYaBTOhdURyrtZzGike5lhwNP6V46rnNwZXJwR/SzSHnUN/XKs1uzD2dw7lM3IHkGK2YjtQ6VKLxOvfsoIebf5aH1T4AK+iejLSXWm+dk8/X9aemPZuCM98/Ln9uPJ6O7C39q/qWiLxy7Vs+r9sdK5i9HxSyC1/9XcwemN9rY6wMnXJ9/tKwHNdp3SGA5LqvVY6PhJzb3gu8aFih3j5UYgOzPlfCIZ/53b7sbs97DfXp7Iczmaeb7fy3ZGJTi+ewFN4gzkE9vHE+uNxsta1q1qCbV10oaUYQKJh0s9oawrfM5Gc0kkZM57COPk38RpcgGyWUtzo9P1xczhir2K2nL7u1AnLNDQHfN5kM29WQTR9s7ewDb/LLHuN4p68ngHAjnSgXfIN5tmHmz1/gwMqUZD/doOb7ikyfM00NfdBlD+9srA5LtOn51e1tIAmKKBRom2RbPw1+6bmkfREcomDL1U3evn5wlzwegzm/Yosl7fs5CPkScHQaxQMUSEltp5ojPUE6LxJA2WC/rVIN/5pBifY57aSHXWQVZmxn3SAQ9j+ihjnbnnSsp63fNKPl/FWMASSjMqV3d89QQMk5G6HvO/TJDSgOhsORaOxXiAWuMWbwme69afaF016ado111ZRHquvbSHEdZXm22sDSG2dxBYM8BL4IiiDYKgW2kXYR8Kr9Aahy2VP/6PeH2373mHT6NvkP2sp++g/9crfkS+1jXViYwqVn7tsSZ/inf/GBeWgU7HPFL65p0qiHtXhW33o6+duP+UluEYsGvWKfJd7YaUC9FUFNj9DWxBvNBG4ivO5wRzBRClmo2nqlQiOX9F91gti+x1XJhCcgFCZF7tTBGEFeekfjmWcQXMJkO+qP5xu3aATwEcYOkviPXVc/JhuM7LVx1Um7ieu4OoNuWnjtAmmd5JdoveG/qwtQyYAPLJk4ivKSxFJNf7sx0Lc5YDFXahqBF3R6FpYYUtiAXC+hodK43273UwWI0ZKy0xvUKGbhiiOkUcgVw9ANbCA3lmx2l0qicOuIDvuYbq0++0+VTO4cDpPI1SgbzxfP5uzfv4oA3vWu50L/WcKLwibJcGbLHtBB2dTDfl7zoYc5tyzbWlq3WrcYkHpcg9LLvlguYY+i4V41tC3dkZXJlA9SowKMKk8S3QnBtOqk2XHsO/WXhrP4sgK0LTl26HFiP+yjXLfzbBHfQW5kSKrscqoTp3V/WCK7HHF6GdY7ULjL0Al9leWJdloiiDmO4pLAYVXe3wsGG9PROh8FIisS57SuG5M+KweCHuXTpahlzOATVLApGr6qMOTCYGN5HbAxUUYULEvgURE4CBSuw5pZxX0sA7nDspbdy2BLSitqDVe8zaSzKLqD1nV/K6oHLSRVKG00XPSm9fmYpHkJjMhzXNtWDq7VNkPFm72AxJaUBw64NSHFNHcTKpWiaAIt4UeXuuw62gG5ag8mIKdl96CSZIwPAxGrHlDEZevNaMJVdxkQaFjmJMPxughonsRdX+DRZ624m4J6EwkwSEW9Xi1AB5s1QWh1vHi85nVHVFFD75ZXBfUNgaxCRcXCGwukATY9Xtb1JNFV3l2IdsLWNslfP0+sxmHtPLuJJbHZh5O5+dUwvbJ5MRkM+Hfag/4B0mJYW6/c1g4zwSN+QZb4wqomyywcXu6L5hfYOf4oIghBhbHJ8V31hoW3C9SCDNYQNivz4ec6UHsCfgOmRV3P/nLq64dRk6OCjnj/r6R0T0YCaDGvPjmfzeZ6OylliEKJtP2UlUgBNn2ongje0FRFNLpCmB7cxnHyptXqi9f3gphyW8plMdlub+TMJVZNHLmk6+3y4om2dgnPk3FcoTZNZPW5u4FjAR8xSveZ4krPdNzv7iwk6x9WLNklyfwHXP60K/AjT0JmqePXFgMChoX9b6Tj+PUf/eLhaCLuZeMCVESfzd4DhYqHwLsNmlMMr4k8SxD2BY2eFBXNudVyVfLvVx20xq4PVHuvNbnfj/0vJe/JjiTbDvHoFaVq+sxomrsO5G6J9Rt9gYZknU+AUjhhX5jSWAJ1rsuyoA33SaPIlCRZ2W9zq2JZzoyjpGU63dHEuseRMIgwgihJylBxgKrM7dX0WJ/TxeuK/VGZlJr3ZE9YB5pAVDAiZiI0DQlf0DU7GNCWW/lQ+N4Fw3YGrI/l0FsVoSOy+RGFjProvWaeJStOixw/FvHT+Tug10OOd2W1hMA/rxt63XJqUWXz7lFOUyVVK4d3V6ACdXs1bX5MoIM0/4X+H3thUOSidY1l1WZxNyzUm9zUjRc5Zg+sqCALqxxNLCfmErHHC5tT7jERDf2QvVoaS3VpeLz/Y9WZ/U+13Oifl5mfnrRktKZQ7GQ80NnWOq/p1EJN75FY3dml7tWUvdJRqs6CIlZvt5yq+wXMVp3dM5hoXEYpQbru9e2RBWlQEtPjKAHLv793u/IQOmQac9b2qefo8s3+21ou+jfXCj899XbNOO0PWbYYrpTVz+l9X/InS8yZ208dpumw5dWz6KchuBhoce2n21NRadjhznCsWWu4/j0pGi9iTFn71uZDNScuZdM5s76PB5cOGLcXajofyIYhwdZZvGWEGMkKux1CMczXvW4URx3qqI2rIGB/9h3vHGR3say0x7xfMYzxLd8ZAfuoyjocAEJQkkpkgFQmCxil9c8ncEVMm/94/+WZ7K29Y+/5cudLE5K8U372/sQ0D21+rywoAyLSY7OFfKCyFEyKkxTyRZqcAbyFcymwldClRObCg9hGtMPmIjkfjcFKth2N52r+hdhKr9BYtV5YyqgtSO32hIRyXMJdKGEy4oVTbkZ8exi8dUYyvJier7tZ2mEA0tIivqYPJpyTEJPOes830ows0BCskheu9Hpp2rUXaPS60iQTxkyUxZhFq7n9SpoptW7qkp+GgLhHj9Ip7I0Lw7VIa7GvsXsBcj4a1x+X65VDiattX9jjl8pz5FE+7aJchWiDzMgEiCW1J0uvrohv7+p5FUtzNZkfWxPm33PhtvNKvWQlZCZNBeIiDLSxQVtiDe3v/4ShM/vtCUMyUNw5+3Wmc58RNTu2cIQqs5ZF/o+2p2wy4fQSM1UU4XORUdJpvIF7QiMRCKYZitOiqjfSVVA8OSTFibmKEM4CobDB0SRBbwbBl7e+prHNK086ocOljjq29eDXBQqq+SyI7tc1ZqxgikemN4g8XA3BKZrwZX3dqPCnDBvrWi73VlGxRkF3UciqRMpmJuzK7dAPl1ggTH3M0CeXKruS4Ta2JOjd+1A2JXpr9eQSCFIhyB90CUlvMmYdDNYnEVx+x1b1IJbTqDgxD5l9srAUxooVjuR1Mwwg8EKL5uArj/xnWSTBBw9gbDfdvwxNqlKjasvsmxjrLbLlXt65QZOBse50i8DV3YFpJ4HKfZmGs6ncVv4rUrsffSnoqc5msplFO3zHoThvkL5k5G8/ZepccnV6hwNyrAipVYyvxn2LPif8drLCBB6uXRWWJZmZRIZA6xw1Vpy2K42tlQZaklLIavF2z5bKLAnoeilK3kpmXrenqVS3o43q9EY7f2Eq7+zc2ajTqBOlYeFx/REWMGxtwC/TBDmPx5NMqXhixRMIdmRSQYOpdDToWkG65SBXVwLBAR1OSxGHWYnGB2ZILTEjcoHl1PTPfiU3WF31PMusWpyYhQoRkMuMy3bpo6/QUOTqz0sAuj2rAkCZRd7LbqC477niaikrG067URmiXtyvHbWDmdRZ4CFfMEBTP6wcrh828XmcjdIuN8lSLh2VpOn2JQxktOBZ0BySYNRnBCAwhO72GrZozhAlKub79tEt9WRq6KeCLcYIRuSpJGSbTdR3xgLC+pmodaZAl/D7YKMpuMFyaQ6D6HR+6IuudPFgC9iqr/JoJ61TBPMYzsbzUH1K7GPpOtcGg/bHtQHPFW0J7z1HpdH75JYjgiq1+vsUK2QrnOMXon2EL1l2WvWc2d/in3hhm8I0UiSIThk5PIYhbCm15/PZxaSrgapgZCUfDnwpCmsa3haoeGgPu0vcUVYFvdtIhq0KkRHwIoSbOXUbrWn8CHTvRjiB/tFRkd27ej5ZcjQ9DpJvq004tzVduC2boiTINhMBpXe7T18a43y+FtiarLqRMUy5C5wjUGg22lmaZnHioWfbIf1oar9a0OeYyaHRsjX5LpjCN2CPPTDQClo1ZU0V3cCjP2b1TcPFfL5YYBL2lnJK1XQN1Yphm8bSQw1cDFU4Alu/CFbKpcI+xzAc25gbvqKeosq6ZfzM9hwKoMCm1svKvfFwMhl1XauFkQWB14vAhal0hmkbijZ2XVOBC1dXXzgWFUUiK5ODosf9R0rlSefL6oaly6qS7TEMgSFueedaXEDgWQZrOamqH4SdQpAQHOj9Yvv/evm87oKReqkBL3wk3zpACjAj7qvCh+6BF/60bnU6cRhYMk2cVNfDpvqLGdUG9YL4V67DM28ZmtBEgBdkLaojBEUR8xcrVFSyAwGDYJDZUpWmKVk5VlSNgIJBtLnn+RbzU1WksNt38NAYbLM4DFWQV2Gwkm7wLVXC6abZwXTd5LgvnyDJSjdZ/AWaJCH/Pc8RJZGTTzdQXJOi9JXueMpZFhOsEADIBmwNjcpYynKZGi1USHs0cUwjT0yGMafqGjvVxHgEZaz2KOEu1AN669SYDCsFj7zZ37SFQlEESHMHBM+FJvTmcZz7tYvHLriuNHthA0FOZ3ukHkBjYBzJFC6OcF8V/3VWFIvENoahJJEeGsYlsLnfz3msGfrL0Tfzdjr39+6aOOg8dsyLjjV6xnFLg9I7Be0VOE0z/ZSs6kZiCxQhYod2QLXQRHgwoJEO6OnBQGpzFQ0/yTMMy/HgGPOY+unagj01XWIETZbYROeGAfsLqgklXDW8iTX/W7/fkXA1CNC/j3+uTb+mXujfXQi6SaZ4QyAFtHOGEd6wtB2WmZ5NY3XFcxnWYGz7Ev1R2G0biHAhcQMDwV1bF4D9GU6nINEEXoMpuJOgVTSG9DmhuT7uHLiXQoDBPYvVipfAkr6u98Bj6VM+941UQaCtc0dPkvzKWbZtKCLHMSyMT3kSqYuXUVBN5V1bRQmdto7FjskdXkCqY0kch5HxcRMF85kbzzovu8tSpOSzLi8sFH29/LHLpdPUhrqHCHJuQ0K/4+3071f3DgyD/fDSLkme5iL0t6WEkSASCczpSCAljuQbW3ilSLJuCkieV+SCFFUn6neF9iwnY5lGUuM89qvE3MKKFxhZNmwZOQKOlTIuowXln3BcVTrREj0oE9FavAZxjglEJTdIqLA1DowlOV7QC5VKFfGAd3qihRSLY1uiYC/83Ux3R0wkTXut14cQGd0KQy9dsASGk/aCtHmmz0dRCKW8D2K2sRLF4Jzx/WgwLp5NMqxgSro+20zrMuq0Sr1TJ9vcuY/N4BUiV0qdWc8QDTnAuspGdV0wnjjCS4pMvNM3ytuHS6/iYu88tENwWSvb9M7S8yNdEhWt4ABsNy4WqBoppZAHGFLyJ//Zn/6/86p5YToZvNze+wA7vJ+RyZQ8fu6B/T6I2NoU7N79Yn3xIbeORjKFZ8QfA+9VzhthojXE2a74Jsocg4uP3+KvpsMgB8bfnD88blQq+2r9Y8OcQIpottEPRM+jAJC54kFg5A8Btk+tZt+LxovkfOW4t4kt8Lnzp5lDefnE/waXotFnYUDNbRUT3j72xGn16+7pV2YKt04fMYCH3OERDbokau9HdheswdUGm2y0RB7pvc0fm0s34/JJw1zQ89SI/hNLoXJYyFdwqQsAYYCfXeWapjw4xJQrc0MOHTooRq/WEh7ELP4UIlylF1c6neGYANYa660kj6I3DwpfXoOgkN8dQqQWw9HQNPX7bD6nc+CP42OBzXgHDbE7fz7O4E7z5sLgFRdP9JVGazEaC6+5onTpHFYSc/hKPmPm5HWkKYfHhEzyJL56CD6cUiiyX8DiBcTjKlOKfUaVVSf2ysy912eH+ZpCA4wj1Ckz+Kgt8CzvPV6P++21g4xON78JUu0zpIRt6DdQdaAMDi4RkEZreBI21WENElokYh8Zjqe9HSuKOoPxJQYgydywoKSO+GtYSNddSzJ2yLNxVj0PmlMvOe3YaTW12CcW+oWj2YbAyV0mtm5yHC6Vifgbi/vy2f6hR3E0DyUIPpzJ+JFCpg0GUxSpqs0kDO4b2JfNz8lqs9k+B0l+0Jyend6TQICIYIHlNybMZCpUU3dFhGbTHAp63WeNY9KNsUMUGg8htnkPKPGcFbhmQwzPeOs2AkAqe7rsmdvXhSVtlsnnZr39+URXx61fK4hcCl0sx6LYhZWe4zGzqa3TQaF2LiPjwRhFWko7g9DsKpamGaYORqIZZDVbqjk/KXGtfYRJeyd1xqbbu5bQuYLsZz3/x7LT3mWiPZiIVx2LPUfrMdlZowSjJBKRYMtGQYpAERVF7cvRbDI5DLuYeLUEXdskEiSxauMb3cDY+EOLLb4ZCoSulotUV4gqhu0Q3wMzJ4EEqyBI8zx13eUtuUe+I9EUcZJmOzqz5U8nHlgMzSSItXe9Wc8fbdqQNFaippkWBRuYOqeWbkwZzxD8lNhquLV7vz0SydeaLKvaafao1+3eEyA1X1myfmQhhyzqh5I5tfeu9LVnbh5XD9PjPAY8EUbOJS+jWxOVYHgRHg+ZA6/sNeufHc3oqus4YTZaMqKqWnw+xv6Qd1XBsXiM3ryxKWQy19loRZHVcko7h/f1UZn1BgvMPS2fU/R0W+oiqGj7SBPwe+qGKz65pcEjChTZzTl0Z5bs8efUsyUS0404q5/fVpDKZG6WP3Gxt39+9gOfkySd/vj08ub29nFic/QLCP+viSGge76xMzSIP7y+6bt172Yvl+V6+KfenJ0blQOUm8Dci/KlcH7YmFzbTmt6gdJLa2PwD/eE3vO/ji4f7qfGikccFI3FcLJAsE1tDfOXEg2FprMkHP4O/xpc2Bsy5dIZKcKik40nhd7ggfy051qG6TWaRnvkMPCIJfM45Mgt3ZZD0KlnToDyYDmc7ljVg2Vxx/+/yZhbLhnnWcJ5WN9J4QQD0g+8+LiufgmvpujDFVWWoUsqfkssiYTEjmaKQJRUXe0Kxdlk8lwXRJF9pjajeKx47IKAo5x/bDn3VVG5S+gGhP1stkPjM5drvP90AnDD9PwqeUAAvMxyNLYykjCYgUiyUWWJiROljlb6kBbQxki5cvZH0n1mkWliELDUC/+9t/zKpUE6XdgyundHjP9f99GokTMGs/qtrSu8LNNMw+wIOAx8qUnoSChfiZJmGbP0XlKEAzozbkdvBU7gzeaZtZtWLmWKuDnqrCryawMTR9u9SLzkVBZmODzGsdWhgN6ap7wZFbxLBocHTTHRoVF/SxkRWy6uFvq78M6Ij1ol+zHaBZYiyI0FfGAYVApanDN+NvfWOLbgNRsMLJ8GCguVkA3tvqhKopaGezUWsSBhV9gJpdIaK9XTShAltqF4NZ1fiaatmZt28tsmvcK89nGtg2nx7ml807usV2vq4G6yI0BlUSZa4bPmPlGCO7PY04wxy1D0Y+yNpZWViQF0fVokLkwH/oXdMwJAIQwth0mrXH27uJkKZPboKU20tQdfBM0Vsj4iVeicNTp9M05S4M2GdhtdJr1QuIxajZx9CAS/I2buE2nD7xYr03SjXiy8R5tieGqY27wfX+XYg6oVBoRJe0IsRadkrypiqZJqcPpfROTMoCi6P2yYQ3fn5/uHw/wn2+5QLARCCcZO6Nvxg1NN7WnvajyrV9iu5TXK9kO7DSyT0LN7NCP4Z/5Rez/VRRMAA8s+MBaFIjCujGK5w8Om4N7/PCbV49xXSbbyFS0ZTsZe+dld0IioNLPrHAQZYz9+cAYGBpb/dU3nXK9iuU+JAR36RxU3N9ek/V7ye38dWE0xHK18u2gSUVp2pIWNIk2lzgKBjauiotM5Vd/DKVYrXuKmV3HUfPdnihkWQRWQLf02fiWxinKL7hygDgW9/alNwbyx8Ieb0egQigxkfUpxK7ZNW4LfIT8UFeJIoVIpWL0eGdFZO2cFTePg3UsZTaagkt1TjbZzwjtnTSY48v3Xnq4YBlNZS6Nt24lae5hkAC9ahrh4sqrObr32EOeK0vcWnVcfcCNi5k6mNhKhfOZUwQh8PS3o+idXPneKUmbcy3/7EXS00X6VkrgjSIyAi/IhF8yh52F32PiYdxsC90HX9CgDE2vx7d4dTJkFfNVwHJVViWaklBZEsHOKwU7PKsGVfnrhGgNWn5jG5q2KcCJjfAMmOCYcd3tGiJyrHBw7XBVOsC1eCy1tS4G3CbdzorJLo9Uqane1lT/lUb+6ZAubAgImSxVc1BauMS5/YLHHFwTQrEa68FPEtht2dkGVL/r9um46ES5DbPuDDIdBYup9L5w1O+OqSH7X0SssbHuhk/iixv7QL1VPU52sYGm3ZU0y4jvrMoZt44EZtJfQ7h0Pf22Aw0TPwcutaHSUhksUFzeqNRRrrmWxx5WCi3ofTyPh3Now3TBY7rwwnUxOq6hiYc3zhe01wD79yU9oUuGshlX7NwdzbD6/cHJmtHyckyi5G6jB8Iw7ynXAPLh7l0OnvPc8Zgx1RXH6jou75AMAKOk44Dg2YktYHituVO67Xrx8RnhAjE0FcUAxIDDaqKHNZ0yS3xOwFFYzy4Ahg7mV8PVK7/9PxoTG8YboXXpDqbLqLnLf1RSxb2SppAI2FvhG7/OI7AZ7r7j3kUaBpW5a8KaySkkiWKXe4NWz6+ZfmtXxRGJqSThmpNFmQPV9ABqyfc2J5GJg9Tl/1GiMkJzijRAKIkIrASC5Vuc6aZ48yjpjMArdyg18ZvlORybAoFvxZLmQP4LXprDOPE2TONoynjmmpDIVxZ4zB/08SzugamFuCtgMZbqlW/4obUGEvoIcaZnLEM4wQe+wwyHMJIF8OQJ4q7KS3kj7PB4/mpYkufYMVi7hO8vXcKhkibBYaxhUK8MjVexIT5I49C3GK96OdVu3WO71D995tnST3amPf8+2IDwqctQa9ehaSiKApVp1XUehq41Jlk2hw6m8FwQ+3xOQRbzbw1ALzgVbY6IP7A0gmtAGlZ39ynVNN5Znqsma2mWCPK+bIm7pFZfA830uGe5I6BGvjtMYCaWKY0igB1gscSzQIqpVsnJvRD0TX2v9dqdrxdDMNOKvUqCGcVhrIOlYtM4Z/fb+UBZ0uypKKJQStKDYKmvjjKkB0iJ4SyPZNI71mDfdSzkIok98MpBUTabPPrBlHsrQtJUoCnx38wWGYlWjO/jBnxdZstdxOAIDUyRVj84/Cb7WDE0Ph6PhcGCBLNbtKCl2jXpRLm0f1VIWZalZKVcqVb5HGtxW+6m3lKDpRdEdnpu1yVJx5ehydI9PDGH5UTfLimIwngx6m+iNJALYx4Jpj4qpJNM/oGQJiKz60QzVU2qV5r7f9tFsY1KLSJgjREFKl+EYZyNkrXlHMDy/bLVMjEjBDJN5bVN8Ks/zyWzRg1Jos19UizUn3iv13ZRz6Oo7ubfStj8iSaHCRI8GK5qhqFICIhfU3kFwigLKv+XLmyRjgnUriageaaquV8UunbLgijGCajjsXLGtWcqbGImjCAxDMZZOeAEvILWpXmQrosIiIx7OlF5JGgd0SYvNxwsxzWCXiXhYNGhlZNWx64mlEqwgYVBMwo6FHHe54UuKUL/3U1dKi1/ds5YmUEjPsc0EN+1nvcSN8hGeg3b2Nkm+EHd1Lo0P8NceFt8GkZxfZC/XBB/xpBcn5IbllaWarXCwKh2WInF4stvk2O95jmVIjgKMjIZN95m7k/3iiKUpEuPzb8+3casg6mRkL6VaA/iMlrLnMUNxr05CygzbTw5inceq1U+9QZdymSy8JtUkvGBZhqbw+pbEmMq0vRZVmLd/sDcx586s6upW8b7qVUpn8OrD5ofpxBPwMy9AxJen9uKF8viJln5cOmv2YvGmbngxpCJZcjk7elw3PKdpichd7eH3ayb31+JKw9LK8qctA8hB1qi2B4nWKS0HWRpnmToGvJlwyYIgR95+2jguRR0cVjiORh6c6jYHQlxTnzsXqbaCfRKH/5st85/lZvN+XBAkh8lRBfE/OJrCyw6a6m9r7Z9Fmv6J/GDeG6o0DjwDTMHCRQoU0XbWpZ/itkE9iet1I6pav1lOhlAyJLBYamgtHLtT1x97XcOBz0sGCNEYMIwMOoQn6bAo3eQvS3LnWS/pD4F1WyFbudNN+UbJ5yAILuNG2A0qcj5fsssUXu2sA+NhvBKShWfbjKTKT3ysoriHuLau4VQhxjnc1kkkgmTaQpVzkhSaZk/h1yCsQmcA7BXdmJEV5ac8Vh3axVlZlQ3nXkjz1b0nCW2VZxzTgp1Nl7VFORvMd7Bxu5I8+AWrYPUG9S4kEeiZtJq28VCw7sDvYFipWT1DYjLsWRiz+5hg1HVt26DwvOc4tmN7YRSNiq4jCuhKwZ+V6hWWDnKoTFb9mTegUNQqsqS2nZxJnt1Npk/kmiJaRJhyHFRjgulrbSgpBy1oVaCDK5ZFgAbOPDuGG6ZumJah3g/yLcTrMoyQEgemWKuwT4WgOklag2gvMwHZWUgP+n3kQ8VgPF9goshXckcWxRLCrZQ+PSo2vlypSKca+IxKVqahl3a878sb7iVvIYPNjGd5ZoHHseHi0Y0t0jo+oI3VgjQtsrRFLFqr+IGxIrEdW46Y73oFLmtm2/4qmWvy+a7iXpDl+Ni/5Pv9XipSYGxbQDFiBdaA1rqUM1Cd9vEJErx6mQ5r0vHpqdEuKztg4MAOVRCYSK+9ixM4jCxx0FnXlUnQEsaFKozzkX7IiGsOCcGwvRRtWzp+pmLuaU5oWSqbprvFaeCpogIKrwJbn+mcRMd+haKsym8sUV6taI1RwpwPtwejA53FpiBDitNGPgGaV7FISWKZGkatOqqekgVA82tAxdVkmztWjPoyn29Q2v+G4Xqdc5wzJOsvGgjjcKnozcSJZDqWbll0ZSOcG+fYlKzX8HqpFHpavFZjJUWW0ts985erWHl1Jj517b2GN8XouGc26xjUuUdD8DU8VPKrEXBO7u96nE88PItkeAzdaHTawlaX+KOoklkVzQqSXlEoJw9y517K5THN9Hd1dI8sh6mWgGNXGipjqUFv9rPThdXlOZOx95bCERjOqE4LgUpVw1wCD7rrK60Lc0Vyy9DBmKlDqmvamgY8sxLZ4cYfnBq1aS4yFImiEHddBhiTpw0jyc0SNoCYCvaA2jKtQDkIW/8UJwgJAPDFrWBoJhpdKRZyMtPm0NcAghNFm0vRsuGGim7liO2FOS8bsUFpeHVpnFkP4yiI+spkAO0ZHQ+bOGEahtX6VGTYEz7NrAUgrFWrzCROZpa2oNCaLE/6l03YtOQBug3VPVPgNYIfqv7RBNLzfsSzBiuSwKt60kChmoxqK5opkqhyWwmP8azS9iTsmroWVXw1Ndjx+1LBJh820cNOGGXCgklz8geqBdSWsNsSPNEFp3leGHumgNYMs+/Yc0iOhB368DGuRlCK6we+Lrkvw9KkF/qy3VpLfEfhlXZ2KWheN6Mr+n3m4bPPjhiyDEadCr8SVIPPcXd5WBGX7bfhUz5k4J0JuH41YlOQJVufESTPs416tEpj6OW2xipPbNqCPW8C11FGUgRaXfLSKPSaSbp+aiRDRFDBetcnvBVBR9HsCW9WHlN4AYwx524bftJDek4zVqx9IxAVh7ryJNNHugJFKq23h/Opw0eHtUha4xPTd48N1qHv2deHoQWUh8pzum1xMApQ0tybtpaHUv4TiorvyvuNt8P9rzfu8iV189h65uhl17hccUXua7aW45h+H8e9m433wf3n5ssXlfWja9zhi4RaQMsAjjSsSLyBlCjvZpkxFCSdSyRceWSU6Cv0husZl/ytk8/U/Cue50fuqX3nYAP7oLLOixnZrHTajAI8LRsRe2XYRVXcSHqDBAcQJ75I4CS5JbKn4lbdNA/vLdn2onmBOqpYZ/PuG4vTU/dLfdxiRgPOCzz9dfCGfK9PxTHBJDnJE4188WidZ74WCbR5nyDJJf8nNE0SjVBTzMFGfHN0LAkyj5ZVL7hcZx27mzAYZkZ5jTTlO/MZUeIKjJCj8ucgswHoFXrbfY7LfmW51Ynywfak8PLVsycEoEK2gtlAHma2dslb8iddKQ596ieTlvjyc6C12N8w/FJMe1IMJW6J+nmG/tf6TJRh8owLU3VHS+J/U+l09le3dgN+KdR7lna+2LpC7fAlsHKem3PTQ3T/fKEQMNdd4vz9cKT5aSvNd1/8JgIslOclCtd93w9pikm9XKI+xNATzprox7HeSFDZJoTw2b0goN5dP0irs7/MztyKg9Nqh9c+by5UdrUkkaRSDTMb0ll8C+pbTqb26PvGDVSRFHsCr8MHvoXVZHreVBVJosvj9kZjTCKqC4+vktB8aQjz18dQNKl6SWHrwJAXhnHjpIvxnmSyrHHl6hjGw2GRlDLKYKz4+fhA1v3RObw0bdDI9KHgFTs9aZDv3oqZ1bYRXcPdNtNXNWN9EDwEdEZvEoIV9XrMJbaQasChvxcMe2m3265jqxLAQuIbEo3w82vp0yqnqZ/C54GeKw1X/L2elmTH7DRJti/7q3q72017g4A+zgclH4+Tfknr2dl4m5Vsf4Iz3iNxoWmzsBS4N/Uw6OflbfKvP390XJBR01x51H8maJMz/uWJK5v54aDkK3Epm682JMc9xjqOr0TbweeHWGGTZDM8r5VBbibAkujiiOYrQT5fbxl+Es+jA4VOIpEekONhHIeHYHGfNznDyBWsolypgFg6k7fXKgWV9zJ0OpqJtascEbfXdFynsVjxmfcadMoAJnyUfUIy9To7Pll1mToumTwOhqrnocv02VfbT3kDwVAIpMwbLXCbiU9pXsqfXfRPK0OPrhf+X3J8pVFqv9d7muaY/eaHQf8qdUSBoVAw4EtpzTx+qFTCasDJtxxcOlR4LO0MUe9y7umDpzBxfr62k8Y1R0fkrx7fPt6aPUUI3NEnN9VVzpLP/fsN3djqWwKJFMs6y9Zl7Gy2A7SYd+CzMS8FQIjLbvxmEilQvbSEUUoFS/5WWzUM4rZqt8iI1wdDuW1/ynXhS9vsWmlqYc8WsXbFIgjrTiYKAEIUGS05O1mVD/ioX7k8Dc//5dnrUTJfomunCtkDKVMlF/M1bdo2/f46Rzeg6kDBJHSF+QPNU9C6On2ZpirdBH8GXenCa5R3Rsll8Lf9VOEqOlQ4sRApSdA2txV3QxKDw5ae49iheHNIh6RmL2/TXwTGeGF/y6d3PFPFmnQaxj913QmHVPDS/JZBY/XibtmtVEZnC94Mu2mWpl253myLXtKbHMwJbYQrS2giQS4Om6h2LFjhp3G7tSgtbFNsT0cfak7YBcm1eTsRCb43tmkgcSoza1U/hI5GoXDWcCORVAOXEotOwOqDE7PPhegf5EMASo8IttDupu4Hj5X8bX5Xko3EGURUV00WZPc57LioN9kb77Eso5WoUXgbfaHnhFgP90xrmHEJ9Q6VLNfYxiRtmgOPZhvmcnRZ+VsNSSHuPl2Hyep/q/PomGGMsW5spPN9lmOYSgphe0q6645RgJh+rrCGYUwIRhsRye4qykC9+W0y2NS9K3jJY5X2M4nMS6DCRTe0aF/YP5TM316C0ii5cgklQtPB7qDpGv//zHDV/zsMiVSwDw646T6Jfto9WwXfuyZwpVXuocpsEpP867vB39oe7aGRFLUVqW36rJacEauqAESEHgRbSlkaAg0gl12jzHxIUhQJQzWw6jw1Hq1FJf4mfvyWbP3aiS1JFGmKwNCj73nZsSijP7Fs1FW/7jp0hs7sJqH6Sa1er+WyR0bu6IJlButIf2JapilLoiCMSzsEcu8KbjJP/PRggHfqTTduPeI4FkXSqWTjcjcJzeX99pc0Itls4Nhw7jNCCzK8OQUfPPXklTh1AV3tieeO/fFxfr7AKXqPLeA9rV3nag2SJADg3MKIdKBrawIYsq8+s//EPI2gGwfEovA6ilsHTkDxeHQrVgyRx3UCzzXdkTLP4TIRSze3ze1wLEU03AvhuUbc9QKNJggqHA4Ck/o6hqxv67rL1Bs/XNvg8BRUii6mx5LAC0rdVGUH2le3LQ4pyFDh47UXAl9gOAz3tPy/S+6pNCopmt7KqYNg/NgxvKxp6q9OAwvbkqzq9Up4q2BYnlcMx/c9YHYwNG4k6riQ15+1OnQ/3X89PBWrL8cFlchrjoWlpTVvCAQj9xK2wzumHbY6et73wadSDUZxsqapwOygzW5kFPY77LpSHQYFRonE+fl1s5TEGwECrYvz6eP93d3V6ajffkLJrfaUbDYw1FoU+kcswxDw9IDYkvrGQKU5lKoSnWAr5M9HkIFmojzrJIlXLO8aN7E9JIDEsb0CTktuL9PPfemWBB1oAk0at6bpbRRByuNZh9iS+qTrdcS7qDYfSf58BCmLg9AG2rhj35vrqIrIsS01u5J2G99yJJ4XM/XPMyzj6lWpLW8ZkztInnrB0xxMx60ynGDwKucdBmHiRZdo3Xz3H3Ia1wBQxHGkwN1Z53fmHoctCTr3DEmILsYvXJydng7TVuyIPrR5cBdJx4J+rG5ficMgsPUWtDnelCvjkFIjFLGKXa/LDig0muf4vLGmBCAQlSmdcI+MZXkhvH0zXeb9YrJAa7OTIUBZJuXaQYiDCvLyHezcq1+NQ4kETHCCKJNGoNBkIh6PNp6YsNCjTidxlew7T/m1bItD3vaE+sTwt8a9wrgWb4ClbDZfKFauPE4ieI3JwqQluPbv7rs/+llxhLLXa1XzmUcPQJMpN7q9AWTOTkzcm2koHIrCLPDCPKTc1WybwRJw7ZOZtdc/XFlkMgUcYMbYu2A403Gt77wAVTYBuPMXLdQhjL0KD+2NR9/hQNgdZsrLVciUltaKfOcTCRu9vMb4Qq5RyV126OP/+HP1KJEkrN1jRqARmX2vihhYtURH8cgvoY6ahyLXyzwD1LNXIGB01z/nhCiCIBWFS0WqHkjjMOdzktGN/Brh+8/s8vj9peha1z/9p9fnniaxLC+Iykp14mNoKKz1oVqJzxGkOMSP2J/e0G732qO2Wq4VmVGN/m8PI9M7aux/J8ZWWsdRIlMGweB7njMcIzDQrZjTCp9mftk1Xh5vGIcbK9N2IhQIJ0TAJk+aMRC438gAb7VNFlJmC8jPFabDwnwYmHxpP15nRxzY26cnNzpNG2VcVxWJI6A9GG6FUwgCzzEUp2mKJPDYhuCMSj+jh3oJlGAlBakkWVF1J+7lbeGIoXzyciNsS5dlLdkMTwI51dh3ZR+1bVMYanunBCo2xleKLuu64XLP6iqKOsx4O8ePRaiZ9munNQ1LbUNpkPZEDkWSZi6rlvH/qc3u4IKsyDKPVpWXaL0Z7xmGaWiqIut0G644MLRpYKLo57FGtocQwLW+ZNuWrskCywqqadKndRn40XE6am+zkmZ4nZy7UQXrUEMMRtXQ9WVj8+QZWZJV3XIcW+E4e07ECRxy19WwWZm6REDOfYujAiXsQSLHSjI3UR1kCoHVKiZ6nfUuzE/OBlOalitDBE8ZoWOops4wDMuyDIFgFN+ESll6UZIliaM30bqzLqAVaUXXTV0g2q7s243ptlo9xJ/I6nnZGdeSYtYXgEkMUxM21aQpVciPCl7q3sWlzirtzbO/0yF4cJpbJz7ZWov8NSDSUu+AneQop2egUxGuo3CtWsoR8ZUg8Evi1FHwZuiUGjOzPl+fE2CpagYP5WzXvWvTL/fJZoalCbTQyZelLGuKyLIcp4LVqUKyH7S47RL9JEBc96QitaTGyVfeLmPZn0NRcvNdf/aut0BX/vcV+vY5lRzkVkKx24+m67KPFA0EaYSX/DgOsU+aGNaMr4RJEhO3qUaDaitxmhpPTeiPurW8fjOX4Mi96N6K6psZ+SE80t91Dq5nRcGChv/3DH1t9tdeWyOzKFzRZ2Mf+22Bs+u5bC1EvGObKm8qMo+D7OWiXxRZYEREdf39PUjHwr5PSJn6driMmcAbyhkgztN/gOrFbbRw9yXbcXQKygOjh4k8TDjfQn5ixOshVALfR6vfWEffbufRLsbR3rkvD4K3yVtPqprVKZxbIAiT4sZLb9W3Xusk2T3hk8Tp7FrI73q78tcP9PrTcy76F29ibK6xP0n3LfWVx5FU56daUqsiroZ8X+fNVCInyuLNkmJMdFZPebIokgSSJsHLfq+f+C8oiiEbJJnNyGxGIhWqjnsU9rIIZ9NuW5Kf4Z9j/6z7z17HaDNlSSWcYFxGMjLIdI1OJwnQ6S1Nt76Ad9ww9hYlWVGJ+tjVAZ4cIOb+xX4KwIzoCtKwbWN6xU78XluGH4xJh7HI/IRQmxd3nERtB3NpMlU6lkQe/kvsn7VVjlB3H6+WqzWxi/t4gmJZPolclVeZeikyKPiFMTwdhrGeb/ab+2EkNOLwgPY3mZadHki+isEeSClLT+o8RHph7wpdryV+d9JNkHEQLFBObWwG3RIR6pEVG0LJoSVoMfwf2S/CJS/QjOT3h7FGNuGA3iYzEjXNpO9wOdKPr7w0mLaqQlRTl9Hc05hvXPvaoR3EzO7WUhIvVElFShwnFt/DEt88lKXBtKBjt25JQLRp3WdRRqspsxhzVoUzb6PwX7S4osMrhjnf/QnGslhKYRvaoOygF348cCgocqMFKz3DwWblcm655NF9Yyv0vBOx0Q8uhotNdKKGl2RiLdxEEAgCC0e7AcPumbLgit1wW9JpY2vtEUOSSNS3hlo9c7sn7hrOjIpraY6Wx7Is7XI3RmPPI4Uw9HVumwhNv2HruqpsfPoum0hOrxBmw5KLbHV3xJ51ARzVJQzsz5Z3l6HX09ntCtthDvai1skVtvB1izposMBRq4G8HeA4m6eNiJETKFuru7L57S/JudufvoGgJwLz5+AwbEwUxx3VPrio+VVMnyfjUx1kVXj7C6zGb7gpF1ztavyJdBHQ8p7h7vFj69/+srv98euPyenZRX4OwoHsWm+UHdIwSxbIeVQMju5gZ/JWda66rUuzmRZaUZIWjsC06hfGxBIpeFAPDUMwRMgJNKQ90YKaeHTUPmnWcTNj0DSlFsCBYq3MxECLCvCmmCvulTHAxiKhOJNnqa2pWASrul5LIv1rDWFEi93kQHBCOp2ohREy4akGBq06Lz9K+rzuYE28MEprw9kd/SVyZETdXfMM1uyPZJloxtbyHQFhIvJfmSgSN2/GLx8/JEwN5B2cXJpslD1BzsLSov9a9o3EmSjLfwhlktSt3Ds2MlazYQ5BPfccDiR4xAVBMIlxH7+Jp0GA5aYm2gxG/5/EGYsmlEupIpw5qz42w4Ngamx0Zv04l8rAogo8m0KM7KWPEtG0F04WsHmryfPOEFvop1bN/v1EK2nXW6oKWV8ihQMWa5raWAWYpT0tHJLSshXlb4663R/ZKQcb9Xt9mbGBEe662++tWj/MdpTvfxlzs1W+wCcXY4qccMEp+MGXsJJ5PBhn+FZAt0iJW7AhXekaqtUagJ7UBUTvdpMcR/reEYskZM5k4yWD3r2l/InsBejpdjxiRy863EHMfVjBaLXh3BoitK4lsOyDoGFr4B0cJtyPetFEp4qlBZrj5KgcKTkEJTjd81tdPVKhkTZarV49NJQBIvwvVlnd7WJ1Z9WgcgONH75VJnuEHl00PQHpLA4sGlfpTM57SM/+8KAVW+AHRzYa5AENgTUBAuBCx+I+U9whSH1d0XSZhUEK5knJDYPAB1aXRT06NKMUvftF93cUS0fTIHjN3vft5zHx/3vtwwZT0Z4IgfY0jntoVB4CoIdpPMcBbKFFjEiu75DYmluSIltnA7M1r+YOCjhj+T17xODWeWfvrLlcC6PToNgPV4AYLdKY1NWECghnKgz+1VUS46ennxujdNQXuvCQoFxaRVc6AsrszpLg8kRnQPODnzaDo5iBgUCZf/sQRtP5RTyYH6GhYCvyBSnY0oz9VaeANLcFhVdd0x95L+jY9GQ1osjqdntXLHripY9sCDy6liJwliUzI/4q92zbjxHDQ5oBpuMm1kMUyQ4RWWp0rYS+9ZEiVkVnhPp0xzai0eSQJWkROa4FGik3c55hOL+34b300cD1PpdQS3qDlL7vWCoHNaAiyFsdz2Nm9cejIjBchYP9hVDJmiKMw4X8iPeSDFRLhteNomlBzckP1Y0VLmlLry/a3MLD0ghxOlr3f/yefxkZ1A50DgVNf+RvMamzXVqNili3PREzBZU4fm7deAMB50jEeHsrA3/saiQETSnKt9S1tJjHpY0ZimzB/3nVoJFdBT2R6UP98aOAi4t0vySszl9ZU5efmg0AdwP7gVpgN3CQ2Vmfinls1U7Gnze+GJ+aUVz8fs5DHzAPtIGwYagpqOWQjf+Rr4+MDvZNfYfIaZiri3YOTy+eyDeHPS4rDMNmvUIqlUmDAWAL0Aw0Az1AuwkdvlIfwSwb9AN3LGyX96i4D4kjkaaoAiUyubrb3768BI7dT7SVsFVw1p/I2TgZv3GK0AGXL1dnf7785tnJFULfYIQCYNEuZS+3CQShh45LVeW4JVUOKlfjGYxNtNuDG8QCKkyBAbptXiQBkvANwd8San66hc43uCHOWm9eJ0i5+IMDGrp1WY6zMC4e6uYlwAirLOrfHLj9+Hp4b0c/V3GnMsFxJzJFxd4uXWw6oLlGj7o3cR9deHuH7/JyE27vIwjT4XNp2r5lQr/HR/ubMxe3SVJ6eP+cYbwY46kvUKoGZNk442+RuKcw1VW/A60qoy5lJvGbYRRCyeQXB3mIGuuEkhOCSab2PXAppLgNg9lfXvN9z+8Us12O5/5APsQgjICiu2hF0RVBRZbYHqk0haC733QiVI0wDheVqoq21AzKtKQWkASXYe4WCwzJSNl7rbsp+iZflePtYdGKXDhwMljgKVrb7wt+mqSVZGmEcYgCBnVS8JUwwiekOgsVs9lktmf3HtNRjuipBwNGlT0X09UL52tYTbns3lB/dDc1ccz7CPcy91bkJVCvCG2c8QypwNkzu9y7KOrcYa+/cMS4pdPb3Xzdm8ibuNyJHfnGc93kclwzhMOZ0LlNc6/xtorb3Xx5j146uikcPi3Qa15qnG8cC2wXehv2sUMvyjWEcx7nPc90CoXDqR6lDyNNk8Zj45lpzhLIYP36vrY99Aw7BV/731jgBIYWzrKioUCV/Hj/IBrMNLTL4aJces7Y24fhbsV1K//+ZTAClpMBvsgq5yg5pR+oeGnRNHwQzm9+hABSYxpArNyl17kXRdSHBBHc6JqMlLmeQeWrfwSy/qIGWXQiy+vc2yHnz+xrd54YCxf/LWYK8RHxqokx1bp8uE0eIOK5uVrXyY8u4TMj+Vf0d1GHi5s7Qt3xWtsK3C13S10uriobrgRii8MsELfu0l/A+fE2KX/kpaOKXPPp8X/Kqut6ZzoXZBqvjRcB/lR7Yqc2338XRUmKkb5952B6ukMSa9BM4xkSRfkek4y4izBuvqoyBcr71LiNw0Fnz7/SddEbEGRextDXwa4SZm1D3ce8gzXW5Uvq5v89gPZeXqLvf/ouSe7cAeYwEsRYAcSDdO8JYYIw8lI+mS150+SR2pTydGaBgowBsXdGtnGrDtkcfgLvSKJQYsxoBPz1U2psDC+IIl/AK4sqH+bMfE57qpSp5nEk/fOCYru9o2B0hwJN94p6Bmw+Ol5bboHzs3R+wc90GS0dzsmPV2PnAMDD1sQemF484Qv7vScEgHrxnfHCU+og7vH4guGNHyiDJRLNh5KVxS3v/P9QMZ+RCELK58vq4gY1PKIjARXHQWyfRHknXuodiaJUy4lEEz6+sME2UnVTlOTOVBaCyZrAUWThLBsRLdS5u1bqbn075ZkbyQI1a7RyyIJ1pr7WhA5Sarh0hzZOJ4rZEmusPHf38PQ00csp2Dl+vI7upULA9JyD+acc7jUnk1E/PXo3GYZcwE9axFOUxgPiEdEZRvcES5nIrVbK1lBqVRIYl39O6IRq27U3Xxvn63yjX1pajoUbkSDWC5+/dOPklWGJ/OK8M7RSGetL0YpSpvLvIChymBqs5nWPPGfbusLSnCRS1eMjUVK0rLmhdjpRO2Tj2c4CJ2mOw3fkB64iy8D5O5gWnhW2jtPnmi1lTYGDA0r3qNVkF/bAtel3IoEp0SrK2xi+LgOHqUPsnnoE1RkniIZXH0M+xi2Qj6zUxvqJlW/zTF09vi1Ld0WuQVvdv/57IvM0PJGGOr116zNLqy5/KJ67mwSv/2/PixfWje5gcn55riwceGztaBNZBF7d7uvFAVdUNxzK3ak5OGSPpA+q69OXwPENEMz+zMUpb7hyDIYYx9/M7UA2dk7FdMsoRoXBE4iJs++mVrEj09Db/p8fZvRgebZu+CfxlSCwNIkdROB4k/TQ4AKePcxqimzsRCuMpqmhDBIWWRX2sIdbUTBzFHYGYv/bBqPK3YvFxeaqCSlaTk9FOJoMI1m3lzv7zNpIz2VJDEWgHTWEznF/IAi1zZwBUzzjDp7ZvyvQfJppfesbTQwvm5U6Ltj96heeTYdpK0Rn7fbdZ5tmcGof9HtWlxyL9K2zhJnNN5FFlJhdZgnuyvVg5losZ6o7Sie+MTiOzsjDKUOeOctxMgdt8bPcZSl8M7P78kMggpdvWGZoUP0DQs8PK2qUoAhDYZjejk8IrmiiKQ6qX1wUF5NkIXBw+JOnz/OnJ0ozMQnGo2eqSDuBeyO+mJRKAfYLp3lZB2wLU7TxQ++ksksZHjRVo00nDE0z2g0jx/AHrqt1zvMcX7mVsVrL3hGZdasHKappqCxOCFjtkyfXaFmUFVXnKSnpqk3KiTlqWvPMdZVzLAoIbgx6G2+muSY8WfY2serIgN32nxEYKlCuE7fXvoLDCxiNI2hDmNYnjIxsjTg0hgPzu4sQLe8olG6p5yxAn5glsM/wQZkXV9K0lzYtiYRfweMT3Lo18q7H43Z7fFFUMIFTCrGhOdU+uWs0wgGt1PHnLfLY2JjhUKUMohRLKB3Xl24a+pb0ZD0/DExxR3j1nXhejPtFHsgU1bYioCua5QWyRqpOe/Ed/5YrAvtukt8e7leO4VBhlTbC8OHyInTRyAua2gHfDxUOi4yZU4MxyzE00bA/BInZrpNy1SXbHtowBk6QyIYuNXFpfVhtBEIKXGtcjNa6C4mCJ7ONdwRhgCShXspmRth3Mzjq0CAK+pfNVCrmF8seRPN0f8FqOJHEsEN6qfItz9/LeOA4V/Xo5D2KZliOqqStkvT8NexFaV0solebkeRcxhGJ3m/Dvgzo0IBaoMe66wOtcziZf/p5y5bB7Sc1Tdd/a+1qAgZtA//HrGlVMV++HcetnXtxCrx8ayV/2HThy0Sp8uFztmMjAczrm0y8oLd4BxjQxMWjAANbv/u60+kY5c0ZfTrfkeNX+lcYIPDwVnI+HltzW5ZIk6xxfv5ehUOEHHx5sM/xgoitMIGtZTW6TRlFen+6zjEUpW673S2V51f+HxtFTca3KbwP/7cZyK6DjMu8YCe90ZIPVboO+hYSTdiWdmi4fNQmxT2+WNVFhGCxYz6yxQwtgSepWkqjUdNgRbAKVp+mxHx97aHBlbt2Xb/7YODyxyXp4e6e7u6+ulQjbAQ5QMFWGrGqqPqErbIEDnigW9dveriSiI7oko35/qJE0rSrSngGRviZpXjmYLK2MyHf2593MJgxsiU6BV+qhloknds6VeULOfn/QT/wjG+XLrRm+PMGIoBUBnMalU6+ihY61YT58GzZa5IpMAK4l47bA7cnUVt3PeG1pYD7prpINUz8UC8Gdk/PaLeTwBnAknkLZ5G39AMu+emfRQBdgvETHJcyo56xttXkh9gizbZMQyNe+9LU1AiZxje5FgGGorGpTBpaOG1WTyM5/8HqjkCP1Ymmh+OrabMpjdEaXbIIcmKusz8eOhuCxFIAwstwqGzK5Gz4SdxXAJmcirZHFL5q+t3VwFbboMQ0onhtAIqtTgmoPG5KeJrtB4Ln6tLdfJ11Ykpofa/1zsciM0ugoXZu6vn9Tz7Bm2WTTjIWLJ+YQGyKqgcUkqDyPJUo1lJsjku6n2Ik7E6hCRQspHcyGHMrbX6hF1cIpnG6f4eyUpr4clwiYJPtSa3GnKqaUtLpW07qseiPj3WdPyWxa70edowuvNi/MjM9YHjLOneMGMHYF0+1H+PAaTqMxmuNKBG9rJMBDx7UJDzN9gPBc4eSu3oWrRm0PN66b67kaTqWSn4s1FP9H9/ZqMtm4cWo1cmJVOq1g1LrJYL3xAYZtD/Ieh+brDbZhkamhHFZB6OYDSc2w8ed9nUz52JZ/Cnn+ZxZLuEXakmP+gACWdhw0HnuY2scXhm6pjMRfAOrq7sMMHqAY+WRQjIMr4d5adTE/x1JZFeQtVB955lyGbtkub/iYTY2btC1gSCP6EmMtmx0i8ubDwYJYMtoXnJ43hHO3oev9BGa4bYE/J77Ipeom4dHtUMxBUJbuC64L38DCXKZZuSlpDI+a/P6/eznL0vuXYjp83uSrru9tZYZbdI/lqTNcTTVqo+xE6klirkzyfhxv7g8hBl9xkI7exTnK5MnGSibFR4OELpbRNen3e6cuu2PGoRSdLkpXVGryINSo2FgZ0wGoOyquokWOYxMXpo4cCW6kFIlSTCwyHKVJ3dDnpA7o8OzBP3O2H4Z72kgTKNEkAX07PW3k8loKmn0397i2HzFOQnMnqqKOXuIENGgQhL2QBSS+KFqIvVG7vuLH+CDZb00+ybizYyc2e6wVJf1ljwFp8/+EhwIbe8yx1OO/A2Hj2UuJdLS1uY7wz/obNXYgal+RVc/w0LegqZHL0l1uDo3NyZMDa3nP7z08fVLnXOqaVi64HOIaT8LfxfmbS1AMElivZCT7CFeTDKpun5efJ85wrrp6/YLe6zAx+oxOdi1eg2sNrQ2zNWPFkTFH2yPPeNYVYVmHZh5XjlVpBmc1UPYukiWtzEIovUaLoXVlIZR1NrtqLTN9uz6j3z56dVlP6zTVf0f34ATSXHDnqeth4misLDSXyGdKXyA1UpGgIGZs4oxK5SJEzRnhepaxD5GGuy0jlFu5pPFW8RnmNVKih70r9wYpsLt1SuuD4WwiW1OELvSAFEESWSorWaC2EEoHVJr6dzQdAf44CijtrpvRbKlyCs3WaPLVfd0BYl+n8wpxhHigNEqGAN88Mdp4ZW+8ahIu3Tb7as0RVJtliN2Lt4d8tAAFe4QzqTdxTrJfS/Mu579OTgw2gh0s12m4uk93/6nGozmvjpaqn4TDEhH1HJDPrRm184F2XiiP5R5miahYr702jrfHOqlytAysmzmnumGjsBv+A95pWrVZJsXDneYJoYh+V1eEnm+PMy9RmZ5eAc2VZbvylSOaxpFOwAOKbIYuFxDE20MJ4ozl+7KeHRgA+dtF39MI4gWzA5wYdudLvFIxA8Zh0AoKUmJzli+9CRFG7414FQd8Bpx9IAKnAtxD3qPb5TEzaGhbLpKqnXrPJ9t0VJuOKdbTvwgdA2Dq0tAEVjwIRClxn7O74f8YabwbYCmA8fWqtbgwFaKTh/B/3oGZrQ1SsbRlEqsY0/Am+4B40won1BdaAbVJwSnao+HdLNHk6Uty9d0WWAI13Sn8aZ3AqHdgSUqOLfiSVaM28Wg0zFA4OOKK+kWgF9Fvhk6cKC0iZaMcKzrM2XrAAdfTRQjDrTTrQxgSHyapZg4coPFQK5ojIs5q0eugJD4OP+2C5lMy+sRJiRKWU1FoTgVtVfRtx0iJGOdIU9IK0vk9C6n11NMNpGImEl5zleF4E1/kYhRtQ9IstDsDuXmN/Q2JU3bISbU5SK/LO9xvXaMLP5oJE5zyQxAqAFo8UMlBW9MYIa+DjqIBGAXkXAUZSc27L6Hf5SbXsIfgFSWrUrisEHHexRf6VCfuQuF8zQhZhhrhhaLEH3H4DOJkhZEbURQHZjrJBYH7kFvgAGv/TC5j0MgeaIZfl9X8CK1AoOajJamrqWiCa+1dnA5uRrBEsdaEGJZkXJHk/nLNgZSxDZdogB34V4BDFymjtXpO5U5Ba6sLsDd3KTTqxmSaNM3k2a79rHTytCiCzpT2/XU0NntaAKH4thAgF4JYWw1BWXQlk6YCAqUkVR9v5rCtK4x1vqxDFZz9Ogmsb+8NYxtMm7vFSRFkfgGPyRMNBHIxqHd/r1apVSjFFUd06xKEkVBHM5FIzY7U1/5/SxobE7wRisO9P4gfcxGP2AgI9oN+wzPekRDYKmb3hsESjUFMJXT7XXuaGh4Pf4AtkpQwkxVmGnQ77U2pGuilOOjRYs71WIFIxDUIoQxaJBkCn6WZN6X13Cxt0XhP/CfMtMajS4SlIREsUqmwF2uUZY/AkWaXqCbGWsLOexOFMWI3zGH8zAlz0WbhE/ZZi/Kmx+TdCyopckuQ0/kqmU3dZbIOBG5uZmOAqxkbV3JKR6mhLWNKZcGvGxOAl40Cqk2b+p6nAiWerMhL8DzDk3x9JMsmgMcMP+/gB1wcoAw1mwvzAZ6hw9D3Siy3RwfthdmA9Xx74pw5Fco78cw3pNsxDVbUm0X9jiaco2z5j6RBpAYW2yRP4paDhSXkiVzpk/R1TWdibws2EAkLOB+x0rEdTAdwmgTzrLWbYvlkDcjwd+f0b8betpbIFWmlYMvObwzuqisUrt3EBsh2419vFXM3nmYgJoAwZ3xJeyMZ+BYtLHr19KDPS5HK7g7cTjhmL0HEe9FtP2LHoQRN1xq26LwmYdF0niMqhkWTJUYpnH/Jb5yfju9VqZM1w2yGlEStXaVsCf41a6ffbd6KUhRFy5v418jqf/66cvRKvm1d2KB/onrzf7Sc4e69uaIgw8vw8j1TKKpPwkq8c+8M/73GwAwJpghlOujlRiuWAaR3c7e+hET8S+n3Dao5EnrjGMfB+YDfog7h/kqnZtVzsCPi9zX6fZ9IZQLxtXi14MJxcj+LHhQNGED7XuT+diBvglxKZwzejbLXHeImguSk+h/f0aPHB319j2pd7d3NTb9o4PSt1iqu31C5XJH+sa3qtgrh6A9JhGJ+NpxJvlmLjbA9/ApiHcVKR1fOruZSasKgWdZrgPVHVOys5b8LrrY8S4A1jt1PE/LKVUduX1PqkuZuxq97PngvfPHq4uLs/8cJGJxby23UqINz8ZS8t6OKnpF7ED0/BuxDu/Jez/Sk163RK2MPzbXqCiKpnGwmMtmM9bRQLUaDxCoESbWEUoKwcX64U0ROHZAHNfBarmU5nEUUZsfla2eO1YGUjgByaUghBLsti8Y37TO+ts4iqG1ye6/yEdd8pFBNzFIqdlKCKkEJNDmgxQR6N2FpZXve56j0vDseFhYj+ADVZGQSkaEmOASJHiZg/rXP4Dh4zd/szGfTyfjwaDr6Tr+f7VavQ63SVxwJE8MT942mVFXFBoqAwOYNy64lEjsKAeJOecubljoTRZPp8wQ1qFpvHnGzZJZ1o1CR+Xok9I7JEESW+fjCo7EiEsH9t+GktmzDJFEAMarFoKzPXcrE3uzkrO63Oxl1Fa47iAQbEbATNEbtEBJxRmNNw0pbM/0NRh50RDqUrulyHUc8u5KV1XlzAqSDiVfZW8013TDNHxdbT8q9QYtsu49ScNhmplknsfdnoo8f9zVNuvFaIbQtH0hZJiXA33ewt5kqK5wlvMZrMbhS6d+A6GXvMZzzX2zYvk2evvp+6v0+J1jpiqqwf0o57H20onlsNF4VC6Pr4xG1mMUpeC+mSNSgUHgP+I8LEKtxU4GF+OUDPP/JFAEQTGMQrQ97bu12ngv7tDkvR7aOLZJ7OCiJPiWarrhgJ/6tAhwLlC6bYAz/9mIfeIIwaPdszHPluMnXf+MJsn7PVR4ruV69rTpxJ5q+fFwe9govzqof8bFKgEEaG4BvdUHV+VLqk+lW6B+3Jaybg8yA+K2zyUqU82onmU+MfnH7m9IVrBsxyspc+NaAs+xNAmYnlQu6YFS94D1teaNresJYjASPSN8yBIWTHVmoGK2ad7AwFIwgRjRZq7+u6JqFW5SJgaaqlwBAI072rPPG5WNb6eFTB09k0Jdpk61zmPULiA7amMqExWtbq02mrkMgwiGM7JpWsGZaywWq/XGi/mefd7ofYFE8yaNYlsiQ5Vbfnad6+ds27LUkm0aqt7qZjTnDFAlVDrTgK0LaaE0rXiJWxo6r5HAVmLPnbGD3b1XyheKAJMyg1QWhwMKPk+9H3wQIJnUtidC9FiWBK78qK6utrZj74Ryk9iA1LG0lWQQFOfE5sfT2diD6iOBofHiXmdnR8fS5EpJp4H3kEJ7tPGskpZgtxMztbC9qzjFSIMx9FqQuqksWJqW7XeJMGEo88RwZ4OmiAI3WuObNCRpzaIQYk6iFQzfmqjuhikdJqaM12S9lNkmAmNUDPtA+zUdL3pJop5eU9cak3TVep2f9gqC5NTUOknhX02M6vq3UAAAo4pCjcnzoo1RCWwyG+7B8w+4dxIjGa1okZtcVhNzz2HQAi8S6yznTRDDbnHpkTQF+Wxye+BeV8l8iVoP3uZcZqYXCCWtqR66gU70/WeaRWf3yEQyKR9GQ7kzUQ3XEZAh2R7Z4omVYlO2nPiKEZ7R8uPQhwJf0OrtPPGxCLqHzIwBXrI/TTTv1VfErIaJ2/P2K3dCA3xmdjZKoxcSTkXkh3bVC+fOUJsDzz42UJcCNW9GZL9AQKIC7mycjZufCO5htf0oFHocGqNo+/fNpBTarmvyuAJmUtUp7Pic4RrJa1p4Ob90gzX5/58HtZht46mCi02cN7Qh2hhDFF6Ad90IP8PAjJzLddpkeGncOPsYmeowu8WOywdZo1QnxLJXzhxd3AymUkYsox5vmGJGypf9pVCStPna2d1aou4jXVjnwth1pEXZmrz9AGgN3RDaCjFIN3C1LORe9q9l/szkn3FjIF2b39DOBl9bw8J+XySFK+U6yxf3Ga911V3gICRSpKpARu2KKr/hBUGdbUyzitL+z09oTN1JCoPPv1SHhHOGfAH2NhESM1Kh9ke6lsjGQazp88kNCWww66TND6YS98HdztiDyLIFiXJ03K1/iB5wbrpdVASV00faFb/rRcAKuPv1Fs40UiFul4TXpemmMZBiC5rUZpeLzlZ4sl7xnH4u2Pke97r+7hy2pV5zJTjs6hmn+gknEK5cLEtIFJBV+do3KNel6abZm2dCdVdgZZcTz9ZHUwhAqM/+pTjKWb2kuq6pRC8ls0sH55xYL248Wjjzq81M5Kzvg4b4WQwV0jNxLU0iwZ6Rsvo9Xl2o5/PnAxkKQjwsqBl/v4I8VKNsciCqdZQw9IkhAtSlFUxsN1X5HFOuOfTnF9yr4w4kkNB5DhxtxT2zC6veUJzWC3x1GV1clAZXOup1ApHAkcRUtC1bP4Gyb/kWpY2ympEk9O850B/9VZKKXAQv0Lx8P1a/Ons+GT8xgJBRfgy694komI57noyDMWsOYULo1StlZ2GxWnBt9a5GnJhq5RBcpU52YG10cSuyVONAX6gdCKkzbM0/DRZXGqzkjbaAE7F2arGnm1VjqOcdieiSJ+t5MzppPrvclUY3EYnKjFu5c16nb4xLKgWsBaA2NLosi9poZ/KZt0li7sDdf35tAjNyyZt3XyneTbp4mdmqTcHUovEnMm6CJv+3a8FKxjV1bXfkPL94wcvqCzqvj47I/sXeeuFQvJhdn/WSBEOk146RDq2g1QN1Q6m+JoFPQgh6N95pgO027DlAkCgi9O1pSM9EAy4cWNZmi6yeE0tltw+rFMXLBmbTWa+lkXfhVlszy5HQCYMvZNHkizAA55mo7OS6yhu/pXrxIhpcYqP9Favt2kkseYrMk8SOfTeCrtpEW7kwx9ZusRBntm3bst3EqjSyo24Gyt6iC42grYdOhPiA/j9l9HmCAtQba60b4uPSphDyx3CSM1tEfHVoNfeN9lNH6SHu3BSIpkdB9yNfg3I7aCzWf64kYSnAEIdlpAy2XjK/uLCKKGDNtcqOc8PCRRZeORtpZrlMtVxq22yMSeRwZNjIBJBYC+wz11dXt8/e+gmq7uiHNeVx2D5ctUA++1Pk/C5H/QvRJPQXkANv0DXr69hSFdsOQzGEofknMbuVuvRVjMae4bI9GKzhyU4Z10NY+E4r3vDj2EYxJOW21t3+Hq8UMRo5dZ/Ba7isrbZObYu7fBsELeH6+NK+i9XJjx5oWGhznsyXDmqTPVPgGAKXTbh9Cu3F3O6hw870g+Da+FhObAF/TmHSE3CJ5aiW3qpeSiYsAIUKTi0ioVI5xd9aiDdKrKI/LnMMq46anbb3pxg51wllJ+dNu0aoE/ETNsOBvkJm306Ffiuf1wWa7Ir4lauri+GoI+OVsdiyeYwDyVYrC4dDYQtq5x11OM19lKmKvSA5ZVMOF8R8hUlMtVJmP/1dsUJ6SqwglyNd84n+WlCtquoP6BG3x7vtNnQufw5YqD4xBaiSUYv1Avb9eWH76yP/edlXy3fTZ4aPr/DdWL2+I8H8fz+FYtnuQu2EC3htMgmsTYwc2VSFp8JZQVXI2AL2yO3PjonC3hRZgoctNOKLnRV0n0iWrewr3y4CBoAiEdG8/Y4vfaSBqqJLHuhiW25Z802kjlgeA1+N50Ore9AOnVpbmubYrgo4N7fP4pLzc4Nv4Fw/QvXvztnA20mDE3ThXaWQ0RwJcdLpzLahcRSBIVvXZv8E5g60xS+lFPGBr1uVUIO34wi/5mUnuadQhhMS4oMy9mBFS4peCG5GQd4hWnxjKIsT5817f3F/Nh7PloS/SFl6fet8w8eWdw5s+kLBnYPGYFPd0KNL2bIwegGHHlyTynK2HBW9yAuiThCWwl7L7+LJu8e84Z+PxrP1ScEvRargBasSgXC5Uvi1LvHcUYU7XEYnbuhPfN5D28LlIga0t/QUDmruFPTGI3xXnT4Sudq5an4vFeuCSi4T0Imch0ub6tJetziyQmOfNfyyvYqv1ciKDhUvRiw0tdxzPBqRtwMSIGBmLejzYwad8F8XHEk/i54UcDCbF0fN/cy2c19i88WeljQevzVgjR3dXv3Tx4bdpAdFZ9Zs9uXAJGW+7ci19tEcT4/wUrYI9PJiP25gLLE0TcADwwNDqUUdr78nqIalfXlM1ag4lJhrvt1m8tin3KwFQ0cl7ZY0Wi1bZjMaoaIpN1J6/soBeoWyuaOqmyoVk9HQzmVjc33+8qB/xQC+zbVtsnOyqvxt7HxalIrCIi3nH+tmVz70HCRKFTo0ZGudoniJZ21JqhVdMWzoEguHkgMYIqNJ2+cWXT7U4wAbo6hgFW1mTn1+1e1R2hcUgchgPlLDdHIgtW2ybACwRU6ilagCjX05/8bOd7/iQx9IG+fVd8CkP46TOMXSlC1xI7/8jmUzeDI3BZ7Bfy8KpOpIwGgt+94g0K6JXF0JR1zNXJ5jQzTL8KKsylw1a05faDadT4PSoH0fkqEedCNXE8tYaEB6+nuWwE25fVg/oN1dKHoRaNjbdRSWz1Re27Nt1BHEuOp5oecaMlk03TThc1lbL5o7kW/3TbCa9y8Gfqdj1RigZZbp27GoENYuRMNIwpmDETlJRLhv6OMpdODPaDPxKYOxk6mXUXV0H/uvV21jvK7feGaXYikSKZOl5yvcg2raSlILpLQocQyc4tCGV+8Z4DmKqJxPsvl6h1Dw0RSO3oyBZjkSSp5uYvXndXdoFImdSpnQS5L4nOTyVzvBF3mx6xEtqYV+DeDcQlIfYpStJWPA12aZz9FYwFgr8WSOPmHrGRJ+6isex/GCJDFT7RAHvIuBd8a4mBOdq1sQOT7Q+mM+thxH4hyH/EaPhlor4399+Mroznd5QdYY+D3p0UjZRV7ZTJVmNpPSL/u+qwRLeC44aqHX///Xo4ywWi0r331J1VQJ/xKah3RFbBSsnMeC7ebC7MqcpYfcicdF9gFVbGbyGV2S5P9RJUccMMRp42Udx4wX4fb4Wl/H7hvtHYZC6aGbiQJDMnjDroHV2OVrw3fke3DErrvD/vnNzf0zzMz625gZxF2/TLo13cb3Co2uwvADM/gCKB8qILzmnOuNh4PhMJBZxG03gtvuRz4KOEZr9dJhztwwxm7XrEzXHJlil9fxE8/99pu2D063vWzWwRqCE49rIFrH8+33MFy7f7YzGPUiNyh1DCx9OQq78yPdNbYh/qAWBsae3jvKmveatfD/vN7qmZERcfWN857ZkyujrgiCdxRMvriaOcv/m9vEj5G5xSrOELfgf/pyMuj3ev3YVGWZblLTMzUbBXurnKmU0pJSE9qdwtFV3Rwdf9wxHflU1779DfPPDtvXRJrE6vnDEzb4h5K9bJgOXHcYlSUEO+zkw/k6E3DsqDuXJzxggrmW5o/rxVAKWYauE7iL/E9vvcoHWfO4eHjpz/K4M5AaHhTDq2kA5PBglzvPbEh+/FsyfpAUf8nVVOvuFwBvsBJD8/L8xg/wXN9rA8EV+hmf87gqJDClq0WU5kRZMGLqSldJmmJcVugbYNHjgA8yEBMzGXz6M+z9aSi/b1CMeL2CoFoaNwD0yxslOSw0qI/Icb4XBH5ktSlQh0YHNrZooOXsRz8Pvqtx/C61T9C+CKnmjJwqm2zmSdeplXX+E1+upPkdAbf7app7h8MHEixzukc3ZPG8KNopxb7TbS0nWxSzmswPKk0ocNgYU62DiX761ft7fFBI9O6LpaJJm5rDo0JURhG5g+30O2zA6Rcva1ons+E1keZ87AlvScPBQCo/9AQ4bJs8Vs7ljqwcf4LEncHL504bL/uETJvLttpwTePmB/F448ctLjgNooGPMKx9+HxXEeGs7v9ZW7KoTB3c+fW8DOdWWnajP6vmBwucrUuh6k21csw9P4NvN0paf4xZMPV9dny+xVcsjiOw2QOPRAKGE1H/8n1z9bMYKcoMfpRQXXNtQ6vcV73Q5RudWvPXLG7vcm0d8I7CyPQr3UWEt7s9LhTs/LAmv3OQpuGQx45S+ctu7lEGMHlmwBG88E31Qr/bwkJ5B9miN4rr4ZmvE4pA2fWB8p4iGcoxiiRwBKJl2KFMMeZNrlCYLUta7c4NHWPrXPSz7kK3/apEXC3L8V7FkafiIYtzKo8t34/j6L6RVWWxvSjDQnvp62saSTNIKpSp8MF8YJBTqRXn4/bKDY9u/t27TOKZGiDTlPNl9LxIzn95Af8Q6V97WxzRKgNKAcoXcxDpDFmJ0PPZ0lZdO2Av0S8OQfcn5WwUEJkU+hRRBQGxrXvTfmwqz1PnYK6GlRn0OD/dmLETQQCPLXhZpLH5bo5KS5ZirKGntUkeDLrie8ynyVgot2cMfJLaP/a+kqsXzu7gEvd+dMi6v9mcjYLDVFKdPElI5ru3svfcsmgUGxDiBtOMXa76YwB11Ngwn+k6KIBAOcyIxYNsLbz0ggWUEsT1fbVuYVbU6/qG+POEMTBdads42lhj2xaznTg6cnWxwTikYrDWymgyh1AMbkxj6fmm7bIFgUPlvGb0zutOBPfc2wAukagKOeQnmnlfZHf+4CR5MGrqgMdXJYphWJFBq1b9ILuhCHjh9dLT0Pu7D/ZTZsH02Bpz28Ch5r5PnW7mF7VEvkyr4+q6pm1Ic8QwuAEmkBZPVDgPrQaADaSmn1Iwnn2u8XVNpd7pG7l3rotQhNkOXeGjzsEgkvyQIBMvTgHvEif8BhAazlCf/DAGTPXRIsedHJEKzD1MCzlCznBX3UwR2f1iw5t3Uu6QFdLZo2DYE4LzvzTD0ReGKpJgxtxq8VEZZGXsTxn9HHIx2+eIff9jNhVzJ5MRVC3DKN4g/AnaaOD4b+D5kiu0bzJQuoxieR7kNIO177z4KXMWmbH7pA3d88NoHe+jt2mkvANHQvpvn1yhID+SzFn0SdXFySuxB+GvfLESBr2I2aPTcSKIWV6Zn55Kpb0rcmQcHty0toicxMCbDMKV3XT9ealTJl5aWb5DG1KNQgLo282CcAhivJ4qwQCfdxlOrbaC2FN0BdeGVMk+cDWvkfnb/jb2pGM5foo7X1P/hYBrxRRgf37RUwootu2sV+oAIhVDDUScvSEB0isHvL9NQAAOvPUzvrNaz1Gc/6cg6DQAvvjzLTqS1l2A6pMlYeroHQA1yJQGs38HMeSpDZ4JZ0aoGrcVN/6dxgQLX6DMseTDiwsxIx+JYdZP7eWueDABv/Ln56PynSBtoSudK5Io2coNroMO4lL9oK+0tth65VwtFE7HdLRg2WHGZI3TP2lwIWu9HBaik/tD3mLDL6PZ+Y4fn7mvr5L6tZCVvyHC5gR8e9j8XYXWrjuPEKCEy173tXFQkrg+j3C+Y0MyS3WcjNoPLhYVzMHWxJvxzrJHyCwkyopEkYziB4/mnM6OCBgRy+xkASUe9o7Zs0AWwkZZLrvO6vVknZ3RE6thUdrreLSj1X7K1nb8U4/LYWvPWTO4lKoymKOyRgtLtkqvZcAxGucdFmCPuoRNEeYszW/GcpVVnQ+PJOJhv+QjYXV0I+JgpLMryItIhhrbTjHSrCfw4mTtilp/V+WEGyzsrihVeUdvWLWfs+YlgPl/oYCl0fEJvVwtWIteF1G9nQiOZD/S/C96m6UgsNQGsBwWyBKZDRMxc6YXrB53RNo6bFfYTlUqdoE9ZL1Mh9kzJtvNQLQJsCoDluxnH/M6MYPZRClLgAFQ0tl00no/WC5rDLfvhfCx45Qv+7IbIHt7WVT22QMRu60MFWjYQ+YmtOLOW5rfORqOYZbgYR5h3MWRrs5SWbFeGYnxc9mpx5Bz/+0KAG5KDxIH4Kv7sUrJao7n1cUumQoANwL4bSAOHoTAYABKhDEYXrGDo5eiIeGaowuqA80MYwA132ZRv2JCUEWDwqAIwbjYQhGUkyjYlWgT1lUkHtO6JgGzkia9MhnYDEPb4I4g27EYW1JUsZ3YjW6VGA+5KgQTetlbL/uwmNjmMJR8xlHUcoNj0PKdd/zhPf17otdWJ2FOPHYKSn5BjObEmNPQ8qwzsCYU/1kYFQO4K8zB8bMX2iZNilS5uGBmFm5SmZIdKK+00GDRpjKdbFspRURInRBUaEdZL0SctubXdSGzQzGOal1CdUxDPKyce8ej4ev0s4yFD+Qq/R8IJ5CgX4d4Ihp4ByDIBFI6X/2EXfuKCspQd1zVUx+6mBiT3knKkf6BKJe2HapmSMkTWNY9sBUpS1XkU1qlWFO3pMyOgu7VKxmmckjhNHArcx/v0N7XVPiSPOLvT9xXinnH9q6WEpPi7B5SKhcDe8djOlM7NxFKV66TySEjPRPVR/tFclnrTkHEvFU75WWzW3kFv3OVj93Z2IX8wqenfvCJwKm466ii61W8Y5X2YZg/avtloQtgwHdTqPTXEYFgwNRTnnrilngJllJaJZ/ahXlerz7tRNSCrKLNUpaM6oYUeW/7jgGDbktRUqRwOvf96hGpVur3iy7x/eRs9c4hn4JDErjDQ7o0GbJkyubJS451vEVteYgpH41FlP+Hc1GefBlEUq1FigLfU5FCO5UoFiqIqG2kMqXKVSgvUnxU0832uOC9i079LRhvz02hQhqkg7PORU3QF6nh69RSMazRC+rIgYEp4ahQWJTp8XqA33wnk+T1jhq5HsOH6Xm/reHVWMNDJBYJCjRwWvyu2T3derRpx/DqaKBgQOwJGL8wtoemckfEmGYC/484v95lVyy1opDPjrhvFpeEzD5lXf9twFKtxiPHY4NXnAPcjXrplYPeGLPPXvtVie3ydYLEuj/qEf6wXL5QLJUr1Vq90Wy1O91efwAZluOHgogkWVEx0XTDtKjtuJ4fhFGcDM/jAsXouG37hOjpk+ZCn77Yh+MZjlijSBpXSKvugkI0Y/8DNMMBJBBFh3vKMwJRhnUbgDOc8poMBMlCVNhRGQ4gQbIQZS3XWQXegF/DMIhDtRkFVB3wwvPB8yEpzqFqZw+GT2aW3TlBGEX+un2LDYQGkuY24PbA5PkQKqx79+Zxfzv9XEGzAr+Cb2HT8wcC5B8Sl6eIC4QSUtLN+yTQdPVAViFLrZGesxFvpc8lGNCew0IK150hOCv5t7bdacuJVbOSyEqc7gwrWZm5i2y+6n4TuSkZCplOhZ6QBQcwwuvwKHFnO+wCHE+ZkUy3QO8qZCCtYE22LJWMtcD3y5BDvIQBc3t5HkpQKzPXp5nYCFcO9Tk/sPLNfI5W/ML0MCMiqHcBta+oDWqJRuQRdQR/hldgfVqPQXsXaP9K29CWdMQeceZdzHsdm1e009LDn196KHmYvxDfnplUjgpkuTvzimWJ5KSlODtNMG/6/CvyakpYd35Fa7wDAAAA") format('woff2-variations'); + unicode-range: U+0000-00FF,U+0131,U+0152-0153,U+02BB-02BC,U+02C6,U+02DA,U+02DC,U+0304,U+0308,U+0329,U+2000-206F,U+20AC,U+2122,U+2191,U+2193,U+2212,U+2215,U+FEFF,U+FFFD; +}@import url("https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.2.0/css/all.min.css"); p {margin: 0;}per chunkper chunkhamming distunpack to floatoptionalDocumentSplit into ChunksChunksText IndicesEmbedding inferenceBinary VectorsQueryBinary EmbeddingFloat EmbeddingMatch-phaseMatchfeaturesRankfeaturesMatchfeatures2nd-phase rankingTop k(GBDT Model)1st-phase ranking All matched(Linear Model)Global-phaseTop kChunk selectionTop n chunksLLM / Answer Generation \ No newline at end of file diff --git a/mintlify-docs/assets/img/tutorials/text_search_baseline_pointwise_listwise_rr.png b/mintlify-docs/assets/img/tutorials/text_search_baseline_pointwise_listwise_rr.png new file mode 100644 index 0000000000..c8adbb19c5 Binary files /dev/null and b/mintlify-docs/assets/img/tutorials/text_search_baseline_pointwise_listwise_rr.png differ diff --git a/mintlify-docs/assets/img/vespa-cloud-enclave-aws.png b/mintlify-docs/assets/img/vespa-cloud-enclave-aws.png new file mode 100644 index 0000000000..0f7f87278c Binary files /dev/null and b/mintlify-docs/assets/img/vespa-cloud-enclave-aws.png differ diff --git a/mintlify-docs/assets/img/vespa-cloud-enclave-azure.png b/mintlify-docs/assets/img/vespa-cloud-enclave-azure.png new file mode 100644 index 0000000000..dce93b5c4a Binary files /dev/null and b/mintlify-docs/assets/img/vespa-cloud-enclave-azure.png differ diff --git a/mintlify-docs/assets/img/vespa-cloud-enclave-gcp.png b/mintlify-docs/assets/img/vespa-cloud-enclave-gcp.png new file mode 100644 index 0000000000..18c4d14f39 Binary files /dev/null and b/mintlify-docs/assets/img/vespa-cloud-enclave-gcp.png differ diff --git a/mintlify-docs/assets/img/vespa-operator-architecture.png b/mintlify-docs/assets/img/vespa-operator-architecture.png new file mode 100644 index 0000000000..d1602f5862 Binary files /dev/null and b/mintlify-docs/assets/img/vespa-operator-architecture.png differ diff --git a/mintlify-docs/assets/img/vespa-overview-docproc.svg b/mintlify-docs/assets/img/vespa-overview-docproc.svg new file mode 100644 index 0000000000..4578e7780b --- /dev/null +++ b/mintlify-docs/assets/img/vespa-overview-docproc.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/vespa-overview-embeddings-1.svg b/mintlify-docs/assets/img/vespa-overview-embeddings-1.svg new file mode 100644 index 0000000000..a41d3e6b0a --- /dev/null +++ b/mintlify-docs/assets/img/vespa-overview-embeddings-1.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/vespa-overview-embeddings-2.svg b/mintlify-docs/assets/img/vespa-overview-embeddings-2.svg new file mode 100644 index 0000000000..41f027ae2c --- /dev/null +++ b/mintlify-docs/assets/img/vespa-overview-embeddings-2.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/vespa-overview-linguistics.svg b/mintlify-docs/assets/img/vespa-overview-linguistics.svg new file mode 100644 index 0000000000..69638142ea --- /dev/null +++ b/mintlify-docs/assets/img/vespa-overview-linguistics.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/vespa-overview-searcher.svg b/mintlify-docs/assets/img/vespa-overview-searcher.svg new file mode 100644 index 0000000000..5cdbf73cc7 --- /dev/null +++ b/mintlify-docs/assets/img/vespa-overview-searcher.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/vespa-overview.svg b/mintlify-docs/assets/img/vespa-overview.svg new file mode 100644 index 0000000000..e70332d35c --- /dev/null +++ b/mintlify-docs/assets/img/vespa-overview.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/mintlify-docs/assets/img/video-thumbs/deploying-a-vespa-searcher.png b/mintlify-docs/assets/img/video-thumbs/deploying-a-vespa-searcher.png new file mode 100644 index 0000000000..f89d3ac230 Binary files /dev/null and b/mintlify-docs/assets/img/video-thumbs/deploying-a-vespa-searcher.png differ diff --git a/mintlify-docs/assets/img/vpc-1.png b/mintlify-docs/assets/img/vpc-1.png new file mode 100644 index 0000000000..796cb73afc Binary files /dev/null and b/mintlify-docs/assets/img/vpc-1.png differ diff --git a/mintlify-docs/assets/img/vpc-2.png b/mintlify-docs/assets/img/vpc-2.png new file mode 100644 index 0000000000..76cd84a932 Binary files /dev/null and b/mintlify-docs/assets/img/vpc-2.png differ diff --git a/mintlify-docs/assets/logos/Vespa-logo-dark-rgb.svg b/mintlify-docs/assets/logos/Vespa-logo-dark-rgb.svg new file mode 100644 index 0000000000..6f3c141bb7 --- /dev/null +++ b/mintlify-docs/assets/logos/Vespa-logo-dark-rgb.svg @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mintlify-docs/assets/logos/Vespa-logo-white-rgb.svg b/mintlify-docs/assets/logos/Vespa-logo-white-rgb.svg new file mode 100644 index 0000000000..fdb0431d01 --- /dev/null +++ b/mintlify-docs/assets/logos/Vespa-logo-white-rgb.svg @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mintlify-docs/assets/logos/logo.svg b/mintlify-docs/assets/logos/logo.svg new file mode 100644 index 0000000000..fed667cadf --- /dev/null +++ b/mintlify-docs/assets/logos/logo.svg @@ -0,0 +1,16 @@ + + + + + + + + + + + + + + + + diff --git a/mintlify-docs/assets/logos/vespa-logo-green-rgb.svg b/mintlify-docs/assets/logos/vespa-logo-green-rgb.svg new file mode 100644 index 0000000000..7ba3e2dd20 --- /dev/null +++ b/mintlify-docs/assets/logos/vespa-logo-green-rgb.svg @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/mintlify-docs/docs.json b/mintlify-docs/docs.json new file mode 100644 index 0000000000..0714262745 --- /dev/null +++ b/mintlify-docs/docs.json @@ -0,0 +1,623 @@ +{ + "$schema": "https://mintlify.com/docs.json", + "theme": "linden", + "name": "Vespa Documentation", + "colors": { + "primary": "#61D790", + "light": "#61D790", + "dark": "#61D790" + }, + "favicon": "/favicon.png", + "appearance": { + "default": "dark" + }, + "fonts": { + "heading": { + "family": "Roobert", + "source": "https://vespa.ai/vespa-content/themes/website-wp-theme/fonts/Roobert-Medium.woff", + "format": "woff" + }, + "body": { + "family": "Roobert", + "source": "https://vespa.ai/vespa-content/themes/website-wp-theme/fonts/Roobert-Regular.woff", + "format": "woff" + } + }, + "navigation": { + "tabs": [ + { + "tab": "Home", + "icon": "house", + "pages": ["index"] + }, + { + "tab": "Guides", + "icon": "book-open", + "pages": [ + { + "group": "Vespa Basics", + "pages": [ + "en/basics/deploy-an-application", + "en/basics/applications", + "en/basics/schemas", + "en/basics/writing", + "en/basics/querying", + "en/basics/ranking", + "en/basics/operations", + "en/basics/whats-more" + ] + }, + { + "group": "Learn More", + "pages": [ + "en/learn/overview", + "en/learn/llm-help", + "en/learn/features", + "en/learn/tutorials", + "en/learn/glossary", + "en/learn/releases", + "en/learn/tenant-apps-instances", + "en/learn/migrating-to-cloud", + "en/learn/migrating-from-elastic-search", + "en/learn/about-documentation", + "en/learn/contributing" + ] + }, + { + "group": "Applications & Components", + "pages": [ + "en/applications/developer-guide", + "en/applications/ide-support", + "en/applications/deployment", + "en/applications/vespaignore", + "en/applications/containers", + "en/applications/components", + "en/applications/searchers", + "en/applications/document-processors", + "en/applications/request-handlers", + "en/applications/result-renderers", + "en/applications/dependency-injection", + "en/applications/configuring-components", + "en/applications/chaining", + "en/applications/inspecting-structured-data", + "en/applications/web-services", + "en/applications/unit-testing", + "en/applications/testing", + "en/applications/config-system", + "en/applications/processing", + "en/applications/bundles", + "en/applications/using-zookeeper", + "en/applications/http-servers-and-filters", + "en/applications/pluggable-frameworks", + "en/applications/configapi-dev" + ] + }, + { + "group": "Schemas and documents", + "pages": [ + "en/schemas/documents", + "en/schemas/inheritance-in-schemas", + "en/schemas/concrete-documents", + "en/schemas/parent-child", + "en/schemas/structs", + "en/schemas/predicate-fields", + "en/schemas/exposing-schema-information" + ] + }, + { + "group": "Reading and writing", + "pages": [ + "en/writing/reads-and-writes", + "en/writing/document-v1-api-guide", + "en/writing/indexing", + "en/writing/initial-batch-feed", + "en/writing/visiting", + "en/writing/document-api-guide", + "en/writing/partial-updates", + "en/writing/batch-delete", + "en/writing/feed-block", + "en/writing/document-routing", + "en/writing/indexing-paged-vectors" + ] + }, + { + "group": "Querying", + "pages": [ + "en/querying/query-api", + "en/querying/query-language", + "en/querying/grouping", + "en/querying/federation", + "en/querying/query-profiles", + "en/querying/vector-search-intro", + "en/querying/nearest-neighbor-search", + "en/querying/approximate-nn-hnsw", + "en/querying/nearest-neighbor-search-guide", + "en/querying/text-matching", + "en/querying/searching-multivalue-fields", + "en/querying/geo-search", + "en/querying/document-summaries", + "en/querying/result-diversity", + "en/querying/page-templates" + ] + }, + { + "group": "Ranking and inference", + "pages": [ + "en/ranking/ranking-intro", + "en/ranking/ranking-expressions-features", + "en/ranking/multivalue-query-operators", + "en/ranking/tensor-user-guide", + "en/ranking/tensor-examples", + "en/ranking/phased-ranking", + "en/ranking/tensorflow", + "en/ranking/onnx", + "en/ranking/xgboost", + "en/ranking/lightgbm", + "en/ranking/wand", + "en/ranking/bm25", + "en/ranking/nativerank", + "en/ranking/cross-encoders", + "en/ranking/reranking-in-searcher", + "en/ranking/significance", + "en/ranking/stateless-model-evaluation" + ] + }, + { + "group": "RAG and embedding", + "pages": [ + "en/rag/rag", + "en/rag/working-with-chunks", + "en/rag/embedding", + "en/rag/binarizing-vectors", + "en/rag/llms-in-vespa", + "en/rag/local-llms", + "en/rag/external-llms", + "en/rag/document-enrichment", + "en/rag/model-hub" + ] + }, + { + "group": "Linguistics and text processing", + "pages": [ + { + "group": "Linguistics", + "pages": [ + "/en/linguistics/linguistics", + "/en/linguistics/linguistics-opennlp", + "/en/linguistics/lucene-linguistics", + "/en/linguistics/linguistics-custom" + ] + }, + "en/linguistics/query-rewriting", + "en/linguistics/troubleshooting-encoding" + ] + }, + { + "group": "content and elasticity", + "pages": [ + "/en/content/proton", + "/en/content/content-nodes", + "/en/content/elasticity", + "/en/content/attributes", + "/en/content/consistency", + "/en/content/idealstate", + "/en/content/buckets" + + ] + }, + { + "group": "Performance", + "pages": [ + "en/performance", + "en/performance/practical-search-performance-guide", + "en/performance/sizing-search", + "en/performance/sizing-feeding", + "en/performance/node-resources", + { + "group": "Instance types", + "pages": [ + "en/performance/instance-types/aws-instance-types", + "en/performance/instance-types/gcp-instance-types", + "en/performance/instance-types/azure-instance-types" + ] + }, + "en/performance/topology-and-resizing", + "en/performance/streaming-search", + "en/performance/benchmarking", + "en/performance/benchmarking-cloud", + "en/performance/memory-visualizer", + "en/performance/profiling", + "en/performance/container-tuning", + "en/performance/rate-limiting-searcher", + "en/performance/graceful-degradation", + "en/performance/caches-in-vespa", + "en/performance/container-http", + "en/performance/http2", + "en/performance/feature-tuning", + "en/performance/valgrind" + ] + }, + { + "group": "Operations", + "pages": [ + "en/cloud/quota", + "en/operations/environments", + "en/operations/zones", + "en/operations/production-deployment", + "en/operations/deployment-variants", + "en/operations/automated-deployments", + "en/operations/autoscaling", + { + "group": "Enclave: Bring your own cloud", + "pages": [ + "en/operations/enclave/enclave", + "en/operations/enclave/aws-getting-started", + "en/operations/enclave/aws-architecture", + "en/operations/enclave/azure-getting-started", + "en/operations/enclave/azure-architecture", + "en/operations/enclave/gcp-getting-started", + "en/operations/enclave/gcp-architecture", + "en/operations/enclave/archive", + "en/operations/enclave/operations" + ] + }, + "en/operations/reindexing", + "en/operations/data-management", + "en/operations/cloning", + "en/operations/monitoring", + "en/operations/metrics", + "en/operations/notifications", + "en/cloud/support", + "en/operations/deployment-patterns", + "en/operations/private-endpoints", + "en/operations/endpoint-routing", + "en/operations/access-logging", + { + "group": "Artifact archive", + "pages": [ + "en/operations/archive/archive-guide", + "en/operations/archive/archive-guide-aws", + "en/operations/archive/archive-guide-gcp" + ] + }, + "en/operations/deleting-applications", + { + "group": "Self-managed", + "pages": [ + "en/operations/self-managed/admin-procedures", + "en/operations/self-managed/multinode-systems", + "en/operations/self-managed/files-processes-and-ports", + "en/operations/self-managed/node-setup", + "en/operations/self-managed/using-kubernetes-with-vespa", + "en/operations/self-managed/build-install", + "en/operations/self-managed/monitoring", + "en/operations/self-managed/content-node-recovery", + "en/operations/self-managed/configuration-server", + "en/operations/self-managed/live-upgrade", + "en/operations/self-managed/config-sentinel", + "en/operations/self-managed/config-proxy", + "en/operations/self-managed/docker-containers", + "en/operations/self-managed/vespa-gpu-container", + "en/operations/self-managed/cpu-support", + "en/operations/self-managed/slobrok", + "en/operations/self-managed/procedure-change-attribute-index", + "en/operations/self-managed/container", + "en/operations/self-managed/sizing-examples", + "en/operations/self-managed/vespa-support" + ] + }, + { + "group": "Kubernetes", + "pages": [ + "en/operations/kubernetes/vespa-on-kubernetes", + "en/operations/kubernetes/architecture", + { + "group": "Deployment", + "pages": [ + "en/operations/kubernetes/deployment/installation", + "en/operations/kubernetes/deployment/local-deployment", + "en/operations/kubernetes/deployment/ecr-pull-through-cache", + "en/operations/kubernetes/deployment/dev-mode", + "en/operations/kubernetes/deployment/permissions" + ] + }, + { + "group": "Operations", + "pages": [ + "en/operations/kubernetes/operations/operations", + "en/operations/kubernetes/operations/upgrades", + "en/operations/kubernetes/operations/delete-vespaset", + "en/operations/kubernetes/operations/monitoring" + ] + }, + { + "group": "Configuration", + "pages": [ + "en/operations/kubernetes/configuration/configure-local-storage-type", + "en/operations/kubernetes/logging", + "en/operations/kubernetes/ingress", + "en/operations/kubernetes/custom-overrides-podtemplate", + "en/operations/kubernetes/tls" + ] + } + ] + } + ] + }, + { + "group": "Security", + "pages": [ + "en/security/security", + "en/security/guide", + "en/security/secret-store", + "en/security/cloudflare-workers", + "en/security/whitepaper", + "en/security/securing-your-vespa-installation", + "en/security/mtls" + ] + }, + { + "group": "Clients", + "pages": [ + "en/clients/vespa-cli", + "en/clients/python-client", + "en/clients/vespa-feed-client", + "en/clients/http-best-practices" + ] + }, + { + "group": "Modules", + "pages": [ + { + "group": "E-commerce", + "pages": [ + "en/modules/e-commerce/multi-currency-filtering", + "en/modules/e-commerce/saved-search", + "en/modules/e-commerce/using-features-together" + ] + } + ] + } + ] + }, + { + "tab": "FAQ", + "icon": "circle-question", + "pages": [ + { + "group": "FAQ", + "pages": ["en/learn/faq"] + } + ] + }, + { + "tab": "Reference", + "icon": "code", + "groups": [ + { + "group": "APIs", + "pages": [ + "en/reference/api/api", + "en/reference/api/query", + "en/reference/api/document-v1", + "en/reference/api/state-v1", + "en/reference/api/deploy-v2", + "en/reference/api/application-v2", + "en/reference/api/config-v2", + "en/reference/api/cluster-v2", + "en/reference/api/metrics-v1", + "en/reference/api/metrics-v2", + "en/reference/api/prometheus-v1" + ] + }, + { + "group": "Applications and components", + "pages": [ + "en/reference/applications/application-packages", + { + "group": "services.xml", + "pages": [ + "en/reference/applications/services/services", + "en/reference/applications/services/admin", + "en/reference/applications/services/container", + "en/reference/applications/services/content", + "en/reference/applications/services/docproc", + "en/reference/applications/services/http", + "en/reference/applications/services/processing", + "en/reference/applications/services/search" + ] + }, + "en/reference/applications/deployment", + "en/reference/applications/hosts", + "en/reference/applications/validation-overrides", + "en/reference/applications/components", + "en/reference/applications/config-files", + "en/reference/applications/testing", + "en/reference/applications/testing-java" + ] + }, + { + "group": "Schemas and documents", + "pages": [ + "en/reference/schemas/schemas", + "en/reference/schemas/document-json-format", + "en/reference/schemas/document-field-path" + ] + }, + { + "group": "Reading and writing", + "pages": [ + "en/reference/writing/indexing-language", + "en/reference/writing/document-selector-language" + ] + }, + { + "group": "Querying", + "pages": [ + "en/reference/querying/yql", + "en/reference/querying/simple-query-language", + "en/reference/querying/json-query-language", + "en/reference/querying/grouping-language", + "en/reference/querying/sorting-language", + "en/reference/querying/query-profiles", + "en/reference/querying/semantic-rules", + "en/reference/querying/default-result-format", + "en/reference/querying/page-result-format", + "en/reference/querying/page-templates" + ] + }, + { + "group": "Ranking and inference", + "pages": [ + "en/reference/ranking/ranking-expressions", + "en/reference/ranking/tensor", + "en/reference/ranking/rank-features", + "en/reference/ranking/nativerank", + "en/reference/ranking/string-segment-match", + "en/reference/ranking/rank-feature-configuration", + "en/reference/ranking/rank-types", + "en/reference/ranking/model-files", + "en/reference/ranking/constant-tensor-json-format" + ] + }, + { + "group": "RAG and embedding", + "pages": ["en/reference/rag/chunking", "en/reference/rag/embedding"] + }, + { + "group": "Operations", + "pages": [ + "en/reference/operations/health-checks", + "en/reference/operations/log-files", + "en/reference/operations/tools", + { + "group": "Metrics", + "pages": [ + "en/reference/operations/metrics/metrics", + "en/reference/operations/metrics/default-metric-set", + "en/reference/operations/metrics/vespa-metric-set", + "en/reference/operations/metrics/metric-units", + "en/reference/operations/metrics/container", + "en/reference/operations/metrics/distributor", + "en/reference/operations/metrics/searchnode", + "en/reference/operations/metrics/storage", + "en/reference/operations/metrics/configserver", + "en/reference/operations/metrics/logd", + "en/reference/operations/metrics/nodeadmin", + "en/reference/operations/metrics/slobrok", + "en/reference/operations/metrics/clustercontroller", + "en/reference/operations/metrics/sentinel" + ] + }, + { + "group": "Self-managed", + "pages": ["en/reference/operations/self-managed/tools"] + } + ] + }, + { + "group": "Security", + "pages": ["en/reference/security/mtls"] + }, + { + "group": "Clients", + "pages": [ + { + "group": "Vespa CLI", + "pages": [ + "en/reference/clients/vespa-cli/vespa", + "en/reference/clients/vespa-cli/vespa_activate", + "en/reference/clients/vespa-cli/vespa_auth", + "en/reference/clients/vespa-cli/vespa_clone", + "en/reference/clients/vespa-cli/vespa_config", + "en/reference/clients/vespa-cli/vespa_curl", + "en/reference/clients/vespa-cli/vespa_deploy", + "en/reference/clients/vespa-cli/vespa_destroy", + "en/reference/clients/vespa-cli/vespa_document", + "en/reference/clients/vespa-cli/vespa_feed", + "en/reference/clients/vespa-cli/vespa_fetch", + "en/reference/clients/vespa-cli/vespa_inspect", + "en/reference/clients/vespa-cli/vespa_log", + "en/reference/clients/vespa-cli/vespa_prepare", + "en/reference/clients/vespa-cli/vespa_prod", + "en/reference/clients/vespa-cli/vespa_query", + "en/reference/clients/vespa-cli/vespa_status", + "en/reference/clients/vespa-cli/vespa_test", + "en/reference/clients/vespa-cli/vespa_version", + "en/reference/clients/vespa-cli/vespa_visit" + ] + } + ] + } + ] + }, + { + "tab": "Changelog", + "icon": "clock-rotate-left", + "pages": [ + "en/reference/release-notes/vespa7", + "en/reference/release-notes/vespa8", + "en/reference/release-notes/vespa9" + ] + } + ], + "global": { + "anchors": [ + { + "anchor": "About", + "href": "https://vespa.ai/company/", + "icon": "users" + }, + { + "anchor": "Blog", + "href": "https://blog.vespa.ai/.", + "icon": "newspaper" + } + ] + } + }, + "logo": { + "light": "/logo/light.svg", + "dark": "/logo/dark.svg", + "href": "https://vespa.ai" + }, + "navbar": { + "links": [ + { + "label": "Console login", + "href": "https://console.vespa-cloud.com/", + "icon": "user" + }, + { + "label": " ", + "icon": "github", + "href": "https://github.com/vespa-engine/vespa/" + } + ], + "primary": { + "type": "button", + "label": "Free trial", + "href": "https://vespa.ai/free-trial/" + } + }, + "contextual": { + "options": [ + "copy", + "view", + "chatgpt", + "claude", + "perplexity", + "mcp", + "cursor", + "vscode" + ] + }, + "footer": { + "socials": { + "github": "https://github.com/vespa-engine", + "linkedin": "https://www.linkedin.com/company/vespa-ai/posts/?feedView=all", + "x": "https://x.com/vespaengine", + "youtube": "https://www.youtube.com/channel/UCVXw_f6UHff8-V9FA1LMIiw" + } + } +} diff --git a/mintlify-docs/en/applications/bundles.mdx b/mintlify-docs/en/applications/bundles.mdx new file mode 100644 index 0000000000..96de1eece9 --- /dev/null +++ b/mintlify-docs/en/applications/bundles.mdx @@ -0,0 +1,689 @@ +--- +title: "Bundles" +description: "The Container uses [OSGi](https://osgi.org) to provide a modular platform for developing applications that can be composed of many reusable components. The user can deploy, upgrade and remove these components at runtime." +--- + +## OSGi + +OSGi is a framework for modular development of Java applications, where a set of resources called *bundles* can be installed. OSGi allows the developer to control which resources (Java packages) in a bundle that should be available to other bundles. Hence, you can explicitly declare a bundle's public API, and also ensure that internal implementation details remain hidden. + +Unless you're already familiar with OSGi, we recommend reading Richard S. Hall's presentation [Learning to ignore OSGi](https://cwiki.apache.org/confluence/download/attachments/7956/Learning_to_ignore_OSGi.pdf), which explains the most important aspects that you must relate to as a bundle developer. There are other good OSGi tutorials available: + +- [OSGi for Dummies](https://thiloshon.wordpress.com/2020/03/04/osgi-for-dummies/) +- [OSGi Modularity and Services - Tutorial](https://www.vogella.com/tutorials/OSGi/article.html) (You can ignore the part about OSGi services.) + +JDisc uses OSGi's *module* and *lifecycle* layers, and does not provide any functionality from the *service* layer. + +## OSGi bundles + +An OSGi bundle is a regular JAR file with a MANIFEST.MF file that describes its content, what the bundle requires (imports) from other bundles, and what it provides (exports) to other bundles. Below is an example of a typical bundle manifest with the most important headers: + +```java +Bundle-SymbolicName: com.yahoo.helloworld +Bundle-Description: A Hello World bundle +Bundle-Version: 1.0.0 +Export-Package: com.yahoo.helloworld;version="1.0.0" +Import-Package: org.osgi.framework;version="1.3.0" +``` + +The meaning of the headers in this bundle manifest is as follows: + +- `Bundle-SymbolicName` - The unique identifier of the bundle. +- `Bundle-Description` - A human-readable description of the bundle's functionality. +- `Bundle-Version` - Designates a version number to the bundle. +- `Export-Package` - Expresses which Java packages contained in a bundle will be made available to the outside world. +- `Import-Package` - Indicates which Java packages will be required from the outside world to fulfill the dependencies needed in a bundle. + +Note that OSGi has a strict definition of version numbers that need to be followed for bundles to work correctly. See the [OSGi javadoc](https://docs.osgi.org/javadoc/r4v42/org/osgi/framework/Version.html#Version(java.lang.String)) for details. As a general advice, never use more than three numbers in the version (major, minor, micro). + +## Building an OSGi bundle + +As long as the project was created by following steps in the [Developer Guide](/en/applications/developer-guide), the code is already being packaged into an OSGi bundle by the [Maven bundle plugin](#maven-bundle-plugin). However, if migrating an existing Maven project, change the packaging statement to: + +```xml +container-plugin +``` + +and add the plugin to the build instructions: + +```xml + + com.yahoo.vespa + bundle-plugin +{/* Find latest version at search.maven.org/search?q=g:com.yahoo.vespa%20a:bundle-plugin */} + {{site.variables.vespa_version}} + true + + true + + +``` + +Because OSGi introduces a different runtime environment from what Maven provides when running unit tests, one will not observe any loading and linking errors until trying to deploy the application onto a running Container. Errors triggered at this stage will be the likes of `ClassNotFoundException` and `NoClassDefFoundError`. To debug these types of errors, inspect the stack traces in the [error log](/en/reference/operations/log-files), and refer to [troubleshooting](#troubleshooting). + +[vespa-logfmt](/en/reference/operations/self-managed/tools#vespa-logfmt) with its *--nldequote* option is useful when reading logs. + +The test suite needs to cover deployment of the application bundle to ensure that its dynamic loading and linking issues are covered. + + + +## Depending on non-OSGi ready libraries + +Unfortunately, many popular Java libraries have yet to be bundled with the appropriate manifest that makes them OSGi-compatible. The simplest solution to this is to set the scope of the problematic dependency to **compile** in your pom.xml file. This will cause the bundle plugin to package the whole library into your bundle's JAR file. Until the offending library becomes available as an OSGi bundle, it means that your bundle will be bigger (in number of bytes), and that classes of that library can not be shared across application bundles. + +The practical implication of this feature is that the bundle plugin copies the compile-scoped dependency, and its transitive dependencies, into the final JAR file, and adds a `Bundle-ClassPath` instruction to its manifest that references those dependencies. + +Although this approach works for most non-OSGi libraries, it only works for libraries where the jar file is *self-contained*. If, on the other hand, the library depends on other installed files, it must be treated as if it was a [JNI library](#depending-on-JNI-libraries). + +## Depending on JNI Libraries + +This section details alternatives for using native code in the container. + +### OSGi bundles containing native code + +OSGi jars may contain .so files, which can be loaded in the standard way from Java code in the bundle. Note that since only one instance of an .so can be loaded at any time, it is not possible to hot swap a jar containing .so files - when such jars are changed the [new configuration will not take effect until the container is restarted](/en/applications/components#JNI-requires-restart). Therefore, it is often a good idea to package a .so file and its Java API into a separate bundle from the rest of your code to avoid having to restart the container on all code changes. + +### Add JNI code to the global classpath + +When the JNI dependency cannot be packaged in a bundle, and you run on an environment where you can install files locally on the container nodes, you can add the dependency to the container's classpath and explicitly export the packages to make them visible to OSGi bundles. + +Add the following configuration in the top level *services* element in [services.xml](/en/reference/applications/services/container): + +```xml + + + + /lib/jars/foo.jar:/path/bar.jar + com.foo,com.bar + + + ... + +``` + +Adding the config at the top level ensures that it's applied to all jdisc clusters. + +The packages are now available and visible, but they must still be imported by the application bundle that uses the library. Here is how to configure the bundle plugin to enforce an import of the packages to the bundle: + +```xml highlight={5-7} + + com.yahoo.vespa + bundle-plugin + true + + com.foo,com.bar + + +``` + +When adding a library to the classpath it becomes globally visible, and exempt from the package visibility management of OSGi. If another bundle contains the same library, there will be class loading issues. + +## Maven bundle plugin + +The *bundle-plugin* is used to build and package components for the [Vespa Container](/en/applications/components) with Maven. Refer to the [multiple-bundles sample app](https://github.com/vespa-engine/sample-apps/tree/master/examples/multiple-bundles) for a practical example. + +The minimal Maven *pom.xml* configuration is: + +```xml highlight={8, 18, 27, 43} + + + 4.0.0 + com.yahoo.example + basic-application + container-plugin {/* Use Vespa packaging */} + 1.0.1 + + +{/* Find latest version at search.maven.org/search?q=g:com.yahoo.vespa */} + {{site.variables.vespa_version}} + + + + + {/* Build the bundles */} + com.yahoo.vespa + bundle-plugin + ${vespa.version} + true + + true + + + {/* Zip the application package */} + com.yahoo.vespa + vespa-application-maven-plugin + ${vespa.version} + + + + packageApplication + + + + + + + + + {/* Vespa dependencies */} + com.yahoo.vespa + container + ${vespa.version} + provided + + + +``` + +To create a deployable [application package](/en/basics/applications), run: + +```bash +$ mvn install package +``` + +The bundle plugin automates generation of configuration classes by invoking the maven step *generate-resources* - read more in [configuring-components.html](/en/applications/configuring-components) + +### Including Your Own Maven Submodules + +You can include your own Maven submodules as dependencies within your Vespa component bundle. This allows you to share code and functionality between different components within your project. + +To include a submodule as a dependency, add it to your bundle's pom.xml in scope *compile*: + +```xml + + your.project.groupId + your-submodule-artifactId + compile + +``` + +Replace `your.project.groupId` with the actual groupId of your project and `your-submodule-artifactId` with the artifactId of your submodule. + +### Including third-party libraries + +Include external dependencies into the bundle by specifying them as dependencies: + +```xml + + org.apache.httpcomponents.client5 + httpclient5 + 5.0.3 + compile + +``` + +All packages in the library will then be available for use. + +If the external dependency is packaged as an OSGi bundle, it can be deployed as-is by setting the scope to *provided*: + +```xml highlight={5} + + org.apache.httpcomponents.client5 + httpclient5-osgi + 5.0-beta2 + provided + +``` + +Then, add the jar to the *components* folder of your application package, along with your own bundles. In this case, only packages exported by the author of the library will be available for use by your bundle (see the section below). + +### Exporting, Importing and Including Packages from Bundles + +OSGi features information hiding — by default all the classes used inside a bundle are invisible from the outside. Also, the bundle will by default only see (all) the packages in the Java and Container + Vespa APIs. If any other package is needed by the bundle, then it must happen in one of three ways: + +- Some additional packages are exported by the container and may be *imported* explicitly by a bundle +- In addition, any deployed bundle may export packages on its own, which may then be imported by another bundle +- Finally, the bundle may include its own JAR libraries + +One can export packages from a bundle by annotating the package. E.g. to export *com.mydomain.mypackage*, create *package-info.java* in the package directory with: + +```java +@ExportPackage(version = @Version(major=1, minor=0, micro=0)) +package com.mydomain.mypackage; +import com.yahoo.osgi.annotation.ExportPackage; +import com.yahoo.osgi.annotation.Version; +``` + +The Maven plugin will place such information in the manifest of the plugin JAR built to be picked up by the Container. + +Note that this may also be used with bundles that do not contain any searchers but libraries used by other searchers - a bundle may just exist to export some libraries and never have any searchers instantiated. + +Bundles may *import* packages (exported by some other bundle or by the container). The maven plugin will automatically import any package used from bundles it compiles against(i.e. maven dependencies with scope provided). + +As mentioned above, each exported package has a version associated with it. Similarly, an import of a package has a version range associated with it. The version range determines which exported packages can be used. The range used by the maven plugin is the current version(i.e. the version of the package available at compile time) up to the next major version (not including). + +To learn more about OSGi manifests and bundle packaging (e.g. how to include Java libraries and native code), please refer to the OSGi spec at [the OSGi home page](https://osgi.org). + +More details in [troubleshooting](#troubleshooting). + +### Bundle Plugin Warnings + +The bundle plugin will emit warnings for the following common issues that may cause problems at runtime: + +[WARNING] This project uses packages that are not part of Vespa's public api Only Vespa types that are in Java packages annotated with @PublicApi should be used in application code, as other types are not guaranteed to be stable across Vespa releases. [WARNING] This project does not have 'container' as provided dependency All application bundles must have the com.yahoo.vespa:container artifact as a provided scoped dependency, to ensure that the generated 'Import-Package' OSGi header contains the Java packages provided by the Vespa runtime. [WARNING] Artifacts provided from Vespa runtime are included in compile scope This makes the bundle unnecessarily large and may cause problems at runtime, as these artifacts will be embedded in the bundle. Run mvn dependency:tree to identify the source of transitive dependencies, and add the necessary exclusions in pom.xml. If an artifact must be included, e.g. because a specific version is needed, an exception can be added with the configuration parameter allowEmbeddedArtifacts. [WARNING] This project defines packages that are also defined in provided scoped dependencies Overlapping Java packages between bundles will usually cause problems at runtime, because the OSGi framework will only be able to resolve classes from one of the bundles. + +### Configuring the Bundle-Plugin + +The bundle plugin can be configured to tailor the resulting bundle to specific needs. + +```xml + + + + com.yahoo.vespa + bundle-plugin + ${vespa.version} + true + + true/false + + true/false + + + + + + + + + + +``` + +| Element | Description | +| :--- | :--- | +| failOnWarnings | If true, the maven build will fail upon warnings for e.g. using Vespa types that are not annotated with [@PublicApi](https://javadoc.io/doc/com.yahoo.vespa/annotations/latest/com/yahoo/api/annotations/PublicApi.html). This should always be set to *true* to ensure that your project will compile successfully on future Vespa releases. Default is *false* | +| allowEmbeddedArtifacts | A comma-separated list of maven artifacts to allow embedding in the bundle, on the format *groupId:artifactId* | +| attachBundleArtifact | Whether to attach the bundle jar artifact to the build. Use this if you want to install and deploy the bundle jar along with the default jar. Default is *false* | +| bundleClassifierName | If *attachBundleArtifact* is true, this will be used as classifier for the bundle jar artifact. Default is *bundle* | +| discApplicationClass | The fully qualified class name of the Application to be started by JDisc | +| discPreInstallBundle | The name of the bundles that jDISC must pre-install | +| bundleVersion | The version of this bundle. Defaults to the Maven project version | +| bundleSymbolicName | The symbolic name of this bundle. Defaults to the Maven artifact ID | +| bundleActivator | The fully qualified class name of the bundle activator | +| configGenVersion | The version of *com.yahoo.vespa.configlib.config-class-plugin* that will be used to generate config classes | +| configModels | List of config models | + +### Bundle Plugin Troubleshooting + +A package *p* is imported if all of this hold: + + + + Using a class in *p* directly (i.e. not with reflection) in the bundle + + + + There's no classes in the bundle that is in *p* + + + + There's a bundle that exports *p*, and compiling against this bundle + + To debug, run + + ```bash + $ mvn -X package + ``` + + and look at Defined packages (=packages in the bundle), Exported packages of dependencies, Referenced packages(= packages used). A package is imported if it is in Exported packages and Referenced packages but not in Defined packages. + + + + +## Troubleshooting + +This section describes how to troubleshoot the most common errors when working with bundles: + +- [Bundle reload](#bundle-reload) +- [Could not create component](#could-not-create-component) +- [Class not found](#class-not-found) +- [Slow Container start](#slow-container-start) +- [Unresolved constraint](#unresolved-constraint) +- [Multiple implementations of the same class](#multiple-implementations-of-the-same-class) + +### Bundle reload + +Bundles that are uninstalled between re-configs are logged like this: + +```java +INFO : qrserver Container.com.yahoo.container.core.config.ApplicationBundleLoader +Bundles to schedule for uninstall: [com.yahoo.vespatest.ExtraHitSearcher [67]] +``` + +And in case there are none, it shows the empty set: + +```java +INFO : qrserver Container.com.yahoo.container.core.config.ApplicationBundleLoader +Bundles to schedule for uninstall: [] +``` + +### Could not create component + +The Container fails to start if it cannot load bundles. Example, using wrong bundle name in the [multiple-bundles](https://github.com/vespa-engine/sample-apps/tree/master/examples/multiple-bundles) sample app: + +```xml + + + - + + +``` + +Looking at what is actually deployed in *multiple-bundles*: + +```bash +$ ls -1 target/*.jar +target/multiple-bundles-1.0.0-deploy.jar +target/multiple-bundles-1.0.0-without-dependencies.jar +target/multiple-bundles-lib-1.0.1-deploy.jar +``` + +Error in log: + +```java expandable +[2020-01-23 14:28:01.367] WARNING : qrserver Container.com.yahoo.container.di.Container + Failed to set up new component graph. Retaining previous component generation. + exception= +java.lang.IllegalArgumentException: Could not create a component with id 'com.mydomain.lib.FibonacciProducer'. +Tried to load class directly, since no bundle was found for spec: multiple-bundles-typo. +If a bundle with the same name is installed, there is a either a version mismatch or the installed bundle's version contains a qualifier string. + at com.yahoo.osgi.OsgiImpl.resolveFromClassPath(OsgiImpl.java:74) + at com.yahoo.osgi.OsgiImpl.resolveClass(OsgiImpl.java:65) + at com.yahoo.container.di.Container.addNodes(Container.java:228) + at com.yahoo.container.di.Container.createComponentsGraph(Container.java:217) + at com.yahoo.container.di.Container.getConfigAndCreateGraph(Container.java:160) + at com.yahoo.container.di.Container.getNewComponentGraph(Container.java:84) + at com.yahoo.container.core.config.HandlersConfigurerDi.getNewComponentGraph(HandlersConfigurerDi.java:145) + at com.yahoo.container.jdisc.ConfiguredApplication.lambda$startReconfigurerThread$1(ConfiguredApplication.java:275) + at java.base/java.lang.Thread.run(Thread.java:834) + +[2020-01-23 14:28:01.367] ERROR : qrserver Container.com.yahoo.container.jdisc.ConfiguredApplication + Reconfiguration failed, your application package must be fixed, unless this is a JNI reload issue: Could not create a component with id 'com.mydomain.lib.FibonacciProducer'. Tried to load class directly, since no bundle was found for spec: multiple-bundles-typo. If a bundle with the same name is installed, there is a either a version mismatch or the installed bundle's version contains a qualifier string. + exception= + java.lang.IllegalArgumentException: Could not create a component with id 'com.mydomain.lib.FibonacciProducer'. Tried to load class directly, since no bundle was found for spec: multiple-bundles-typo. If a bundle with the same name is installed, there is a either a version mismatch or the installed bundle's version contains a qualifier string. + at com.yahoo.osgi.OsgiImpl.resolveFromClassPath(OsgiImpl.java:74) + at com.yahoo.osgi.OsgiImpl.resolveClass(OsgiImpl.java:65) + at com.yahoo.container.di.Container.addNodes(Container.java:228) + at com.yahoo.container.di.Container.createComponentsGraph(Container.java:217) + at com.yahoo.container.di.Container.getConfigAndCreateGraph(Container.java:160) + at com.yahoo.container.di.Container.getNewComponentGraph(Container.java:84) + at com.yahoo.container.core.config.HandlersConfigurerDi.getNewComponentGraph(HandlersConfigurerDi.java:145) + at com.yahoo.container.jdisc.ConfiguredApplication.lambda$startReconfigurerThread$1(ConfiguredApplication.java:275) + at java.base/java.lang.Thread.run(Thread.java:834) +``` + +Make sure that the jar files (i.e. bundles) are actually deployed with correct names per *services.xml*. + +### Could not load class + +If a component is added to services.xml, and its class cannot be found in the declared bundle, the container will fail to start. For example: + +```xml + +``` + +The log will contain an error like this: + +```java +java.lang.IllegalArgumentException: Could not load class 'com.example.MissingClass' from bundle my-bundle +``` + +If you see this error, please make sure that the class actually exists in the given bundle. Also, verify that the `id` (or `class`) tag refers to the component class, and not e.g. a java package or the bundle name. + +### Class not found + +All classes that are referred to in a user bundle must either be embedded in the bundle, or imported from another bundle by an `Import-Package` statement in the bundle manifest. When this rule has been breached, we get one of the most commonly seen exceptions when working with OSGi bundles: + +```java +... +exception= +java.lang.NoClassDefFoundError: com/acme/utils/Helper +... +java.lang.ClassNotFoundException: com.acme.utils.Helper not found by my_bundle [29] +``` + +For the [bundle-plugin](#maven-bundle-plugin) to automatically add an Import-Package statement to the bundle's manifest, that package must be exported from another bundle that is declared as a 'provided' scope dependency in *pom.xml*. If the dependency that contains the missing class is under your own control, make sure it's packaged as an OSGi bundle, and [export the package](#exporting-importing-and-including-packages-from-bundles) from that bundle. If not, the simplest way to resolve the issue is to embed the dependency in your own bundle, by setting its scope to 'compile' instead of 'provided'. + +If the strategy above does not resolve the case, it's most likely because the class in question is loaded by reflection, e.g. `Class.forName("com.acme.utils.Helper")`. This is quite common when working with libraries for pluggable frameworks, for which there is a separate [troubleshooting doc](/en/applications/pluggable-frameworks). + +### Slow Container start + +In the vespa log, a container startup looks like: + +```java +[2021-01-07 10:13:35.325] INFO : container Container.com.yahoo.container.core.config.ApplicationBundleLoader +Installed bundles: {[0]org.apache.felix.framework:6.0.3, [1]container-disc:7.335.22 ... +... +[2021-01-07 10:26:57.291] INFO : container Container.com.yahoo.container.jdisc.ConfiguredApplication +Switching to the latest deployed set of configurations and components. Application config generation: 1 +``` + +The container is ready at the last log line - note the long startup time. To get more details on what the container is doing at startup, inspect the ComponentGraph debug log. Find the container service name (here: "container"), set debug logging and restart the container: + +```bash +$ vespa-sentinel-cmd list +vespa-sentinel-cmd 'sentinel.ls' OK. +container state=RUNNING mode=AUTO pid=246585 exitstatus=0 id="default/container.0" + +$ vespa-logctl container:com.yahoo.container.di.componentgraph.core debug=on + +$ vespa-stop-services && vespa-start-services + +# Find DEBUG log messages for component creation, like: + +[2021-01-07 10:13:37.006] DEBUG : container Container.com.yahoo.container.di.componentgraph.core.ComponentGraph +Trying the fallback injector to create component of class com.yahoo.container.jdisc.messagebus.SessionCache to inject +into component 'chain.mychain in MbusServer' of type 'com.yahoo.container.jdisc.messagebus.MbusServerProvider'. +[2021-01-07 10:14:14.082] DEBUG : container Container.com.yahoo.container.di.componentgraph.core.ComponentNode +Constructing 'com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry' +[2021-01-07 10:26:54.669] DEBUG : container Container.com.yahoo.container.di.componentgraph.core.ComponentNode +Finished constructing 'com.yahoo.search.query.profile.compiled.CompiledQueryProfileRegistry' +``` + +In this particular example, query profile compilation takes a long time. + +### Unresolved constraint + +If the bundle has an Import-Package for a package that is not available at runtime, the OSGi framework will report an unresolved constraint error. The symptom as seen in the log is: + +```java +org.osgi.framework.BundleException: Unresolved constraint in bundle my_bundle [29]: +Unable to resolve 29.0: +missing requirement [29.0] osgi.wiring.package; (osgi.wiring.package=com.acme.utils) +at org.apache.felix.framework.Felix.resolveBundleRevision(Felix.java:3974) +``` + +This means that the missing class resides in a 'provided' dependency referred to from the bundle's *pom.xml*, either directly or transitively. In order to make the dependency available at runtime, there are two options: + +- The easiest is to set the dependency as 'compile' scope (instead of 'provided') to embed it in your own bundle. This works fine in most cases, unless two of the dependencies need two different versions of the same library. +- Add the missing jar file to the `components/` folder of the application package, along with your own bundles. The maven-dependency-plugin has a goal called 'copy-dependencies' to help with this. + +If the missing jar is a transitive dependency, maven can help visualize the dependency graph of the project: + +```bash +$ mvn dependency:tree +``` + +### Multiple implementations of the same class + +When two bundles interact via their public APIs, it is crucial that both bundles resolve each and every participating class to the same `Class` object. If not, we will get error messages like: + +```java +java.lang.LinkageError: loader constraint violation: when resolving field +"DATETIME" the class loader (instance of +org/apache/felix/framework/BundleWiringImpl$BundleClassLoaderJava5) of the referring +class, javax/xml/datatype/DatatypeConstants, and the class loader (instance of +) for the field's resolved type, pe/DatatypeConstants, have different Class +objects for that type +``` + +or: + +```java +java.lang.LinkageError: loader constraint violation: loader (instance of ) +previously initiated loading for a different type with name "javax/xml/namespace/QName" +``` + +or (less frequently): + +```java +java.lang.ClassCastException: com.acme.utils.Helper cannot be cast to com.acme.utils.Helper +``` + +All these error messages indicate that multiple implementations of one or more classes are used at runtime - possible root causes: + +- Two interacting user bundles embed the same Java package. +- A user bundle embeds a Java package that is exported from one of the JDisc bundles. + +Usually, the "duplicate" package is pulled in by the user bundle transitively from a library dependency. + +#### Multiple implementations example + +Let's take a look at an example resolving the duplicate *javax.xml.namespace.QName* class from the error message above. + +All 'javax.xml' packages in the JDK are exported by the JDisc core bundle. This means that they should be imported by user bundles, instead of embedded inside them. Hence, ensure that there are no classes from packages prefixed by 'javax.xml' in the bundle. Find out which library that pulls in the package: + + + + Extract the full component jar, including any embedded jars. One tool that does the job is [rjar](https://github.com/pojosontheweb/rjar/). + + + + Search the folder where the jar was extracted for 'javax.xml' classes: + + ```bash highlight={4} + $ find . | grep "javax/xml/.*\.class" + + ... + ./my_bundle-deploy.jar/dependencies/stax-api-1.0.1.jar/javax/xml/namespace/QName.class + ... + ``` + + + + + Find out which libraries that pulled in the offending classes - here it was `stax-api-1.0.1`. Usually, these libraries are not pulled in by the pom as direct dependencies, but rather transitively via another library being used. Use maven's dependency plugin from the application directory to find the direct dependency: + + ```bash highlight={4} + $ mvn dependency:tree -Dverbose + [INFO] +- com.acme.utils:jersey-utils:1.0.0:compile + [INFO] | +- com.sun.jersey:jersey-json:jar:1.13:compile + [INFO] | | +- org.codehaus.jettison:jettison:jar:1.1:compile + [INFO] | | \- stax:stax-api:jar:1.0.1:compile + ``` + + Observe that `stax:stax-api:1.0.1` is pulled in transitively from the direct dependency `com.acme.utils:jersey-utils`. + + + + To exclude `stax:stax-api`, add the appropriate `exclusion` from the direct dependency `com.acme.utils:jersey-utils` in *pom.xml*: + + + ```xml highlight={5-10} + + com.acme.utils + jersey-utils + 1.0.0 + + + stax + stax-api + + + + ``` + + + + +#### Multiple implementations example slf4j-api + +This is similar to the previous example, but logging libraries are maybe the most common problem teams encounter. Here we will see the symptom, use dependency:tree and add an exclusion. The symptom: + +```java +java.lang.RuntimeException: An exception occurred while +constructing 'com.acme.utils.Helper in acme-utils' +Caused by: java.lang.LinkageError: loader constraint violation: when resolving method +"org.slf4j.impl.StaticLoggerBinder.getLoggerFactory()Lorg/slf4j/ILoggerFactory;" +the class loader (instance of org/apache/felix/framework/BundleWiringImpl$BundleClassLoaderJava5) of the +current class, org/slf4j/LoggerFactory, and the class loader (instance of +sun/misc/Launcher$AppClassLoader) for the method's defining class, +org/slf4j/impl/StaticLoggerBinder, have different Class objects for the type +org/slf4j/ILoggerFactory used in the signature +at +org.slf4j.LoggerFactory.getILoggerFactory(LoggerFactory.java:299) +at +org.slf4j.LoggerFactory.getLogger(LoggerFactory.java:269) +``` + +Running *mvn dependency:tree* in the previous example gives: + +```txt +[INFO] +- com.yahoo.vespa:container-dev:jar:5.28.29:provided +[INFO] | +- com.yahoo.vespa:jdisc_core:jar:5.28.29:provided +[INFO] | | +- (org.slf4j:slf4j-api:jar:1.7.5:compile - scope updated from provided; omitted for duplicate) +... +[INFO] +- com.acme.utils:smartlib:jar:1.0.0:compile +[INFO] | +- org.slf4j:slf4j-api:jar:1.6.6:compile +``` + +See that slf4j-api is no longer provided from container-dev, which it should. To fix this, add an exclusion on the offender: + +```xml + + com.acme.utils + smartlib + 1.0.0 + compile + + + org.slf4j + slf4j-api + +``` + +But it still does not work! And we can see why: + +```txt +$ jar -tf mailsearch-docprocs-deploy.jar | grep slf + +dependencies/slf4j-api-1.7.5.jar +``` + +Something still pulls in slf4j... Other candidates: + +```txt +[INFO] \- com.yahoo.vespa:application:jar:5.28.29:test +... +[INFO] +- com.yahoo.vespa:zkfacade:jar:5.28.29:test +[INFO] | +- org.apache.curator:curator-recipes:jar:2.4.1:test +[INFO] | | +- org.apache.curator:curator-framework:jar:2.4.1:test +[INFO] | | | +- org.apache.curator:curator-client:jar:2.4.1:test +[INFO] | | | | +- (org.slf4j:slf4j-api:jar:1.6.4:test - omitted for conflict with 1.7.5) +... +[INFO] | +- (org.slf4j:slf4j-jdk14:jar:1.7.5:test - omitted for duplicate) +``` + +Added the right excludes for application and used mvn dependency:tree and verified that all references were gone, except the ones for container-dev. Still found: + +```txt +$ jar -tf mailsearch-docprocs-deploy.jar | grep slf +dependencies/slf4j-api-1.7.5.jar +``` + +One can make it work by managing this dependency explicitly - add this at POM top-level: + +```xml + + + + org.slf4j + slf4j-api + 1.7.5 + provided + + + +``` diff --git a/mintlify-docs/en/applications/chaining.mdx b/mintlify-docs/en/applications/chaining.mdx new file mode 100644 index 0000000000..6ec00dc00a --- /dev/null +++ b/mintlify-docs/en/applications/chaining.mdx @@ -0,0 +1,209 @@ +--- +title: "Chained Components" +sidebarTitle: "Chaining" +description: "[Processors](/en/applications/processing), [searcher plug-ins](/en/applications/searchers) and [document processors](/en/applications/document-processors) are chained components. They are executed serially, with each providing some service or transform, and other optionally depending on these. In other words, a chain is a set of components with dependencies. Javadoc: [com.yahoo.component.chain.Chain](https://javadoc.io/doc/com.yahoo.vespa/chain/latest/com/yahoo/component/chain/Chain)" +--- + +It is useful to read the [federation guide](/en/querying/federation) before this document. + +A chained component has three basic differences from a component in general: + +- The named services it *provides* to other components in the chain. +- The list of services or checkpoints which the component itself should be *before* in a chain, in other words, its dependents. +- The list of services or checkpoints which the component itself should be *after* in a chain, in other words, its dependencies. + +What a component should be placed before, what it should be placed after and what itself provides, may be either defined using Java annotations directly on the component class, or it may be added specifically to the component declarations in [services.xml](/en/reference/applications/services/container). In general, the implementation should have as many of the necessary annotations as practical, leaving the application specific configuration clean and simple to work with. + +## Ordering Components + +The execution order of the components in a chain is not defined by the order of the components in the configuration. Instead, the order is defined by adding the *ordering constraints* to the components: + +- Any component may declare that it `@Provides` some named functionality (the names are just labels that have no meaning to the container). +- Any component may declare that it must be placed `@Before` some named functionality, +- or that it must be placed `@After` some functionality. + +The container will pick any ordering of a chain consistent with the constraints of the components in the chain. + +Dependencies can be added in two ways. Dependencies which are due to the code should be added as annotations in the code: + +```java highlight={4-6} +import com.yahoo.processing.*; +import com.yahoo.component.chain.dependencies.*; + +@Provides("SourceSelection") +@Before("Federation") +@After("IntentModel") + +public class SimpleProcessor extends Processor { + @Override + public Response process(Request request, Execution execution) { + //TODO: Implement this + } +} +``` + +Multiple functionality names may be specified by using the syntax `@Provides/Before/After({"A", "B"})`. + +Annotations which do not belong in the code may be added in the [configuration](/en/reference/applications/services/container): + +```xml highlight={8} + + + + + + + + ai.vespa.examples.Processor1 + + + + + + + + +``` + +For convenience, components always `Provides` their own fully qualified class name (the package and simple class name concatenated, e.g. `ai.vespa.examples.SimpleProcessor`) and their simple name (that is, only the class name, like `SimpleProcessor` in our searcher case), so it is always possible to declare that one must execute before or after some particular component. This goes for both general processors, searchers and document processors. + +Finally, note that ordering constraints are just that; in particular they are not used to determine if a given search chain, or set of search chains, is “complete”. + +## Chain Inheritance + +As implied by examples above, chains may inherit other chains in *services.xml*. + +```xml + + + + + + + + + + + + + + + +``` + +A chain will include all components from the chains named in the optional `inherits` attribute, exclude from that set all components named in the also optional `excludes` attribute and add all the components listed inside the defining tag. Both `inherits` and `excludes` are space delimited lists of reference names. + +For search chains, there are two built-in search chains which are especially useful to inherit from, `native` and `vespa`. `native` is a basic search chain, containing the basic functionality most systems will need anyway, `vespa` inherits from `native` and adds a few extra searchers which most installations containing Vespa backends will need. + +```xml + + + + + + + + + + + + +``` + +## Unit Tests + +A component should be unit tested in a chain containing the components it depends on. It is not necessary to run the dependency handling framework to achieve that, as the `com.yahoo.component.chain.Chain` class has several constructors which are easy to use while testing. + +```java +Chain c = new Chain(new UselessSearcher("first"), +new UselessSearcher("second"), +new UselessSearcher("third")); +Execution e = new Execution(c, Execution.Context.createContextStub(null)); +Result r = e.search(new Query()); +``` + +The above is a rather useless test, but it illustrates how the basic workflow can be simulated. The constructor will create a chain with supplied searchers in the given order (it will not analyze any annotations). + +## Passing Information Between Components + +When different searchers or document processors depend on shared classes or field names, it is good practice defining the name only in a single place. An [example](/en/applications/searchers#passing-information-between-searchers) in the searcher development introduction illustrates an easy way to do that. + +## Invoking a Specific Search Chain + +The search chain to use can be selected in the request, by adding the request parameter: `searchChain=myChain` + +If no chain is selected in the query, the chain called `default` will be used. If no chain called `default` has been configured, the chain called `native` will be used. The *native* chain is always present and contains a basic set of searchers needed in most applications. Custom chains will usually inherit the native chain to include those searchers. + +The search chain can also be set in a [query profile](/en/querying/query-profiles). + +## Example: Configuration + +Annotations which do not belong in the code may be added in the configuration, here a simple example with [search chains](/en/reference/applications/services/search#chain): + +```xml highlight={9-12} + + + + + + + Cache + Statistics + Logging + SimpleTest + + + + + + +``` + +And for [document processor chains](/en/reference/applications/services/docproc), it becomes: + +```xml highlight={5} + + + + + TextMetrics + + + + + + + +``` + +For searcher plugins the class [com.yahoo.search.searchchain.PhaseNames](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/search/searchchain/PhaseNames) defines a set of checkpoints third party searchers may use to help order themselves when extending the Vespa search chains. + +Note that ordering constraints are just that; in particular they are not used to determine if a given search chain, or set of search chains, is “complete”. + +## Example: Cache with async write + +Use case: In a search chain, do early return and do further search asynchronously using [ExecutorService](https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/concurrent/ExecutorService). + +Pseudocode: If cache hit (e.g. using Redis), just return cached data. If cache miss, return null data and let the following searcher finish further query and write back to cache: + +```java +public Result search(Query query, Execution execution) { + // cache lookup + if (cache_hit) { + return result; + } + else { + execution.search(query); // invoke async cache update searcher next in chain + return result; + } +} +``` diff --git a/mintlify-docs/en/applications/components.mdx b/mintlify-docs/en/applications/components.mdx new file mode 100644 index 0000000000..1e17926658 --- /dev/null +++ b/mintlify-docs/en/applications/components.mdx @@ -0,0 +1,259 @@ +--- +title: "Container Components" +sidebarTitle: "Components" +description: "This document explains the common concepts necessary to develop all types of Container components." +--- + +All components must extend a base class from the Container code module. For example, searchers must extend the class `com.yahoo.search.Searcher`. The main available component types are: + +- [processors](/en/applications/processing) +- [searchers](/en/applications/searchers) +- [document processors](/en/applications/document-processors) +- [search result renderers](/en/applications/result-renderers) +- [provider components](/en/applications/dependency-injection#special-components). + +Searchers and document processors belong to a subclass of components called [chained components](/en/applications/chaining). For an introduction to how the different component types interact, refer to the [overview of component types](/en/reference/applications/components#component-types). + +The components of the search container are usually deployed as part of an [OSGi bundle](/en/applications/bundles). Build the bundles using maven and the [bundle plugin](/en/applications/bundles#maven-bundle-plugin). Refer to the [multiple-bundles sample app](https://github.com/vespa-engine/sample-apps/tree/master/examples/multiple-bundles) for a multi-bundle example. + +## Concurrency + +Components will be executed concurrently by multiple threads. This places an important constraint on all component classes: *non-final instance variables are not safe.* They must be eliminated, or made thread-safe somehow. + +## Resource management + +Components that use threads, file handles or other native resources that needs to be released when the component falls out of scope, must override a method called `deconstruct`. Here is an example implementation from a component that uses a thread pool named 'executor': + +```java +@Override +public void deconstruct() { + super.deconstruct(); + try { + executor.shutdown(); + executor.awaitTermination(10, TimeUnit.SECONDS); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } +} +``` + +Note that it is always advisable to call the super-method first. Also see [SharedResource.java](https://github.com/vespa-engine/vespa/blob/master/jdisc_core/src/main/java/com/yahoo/jdisc/SharedResource.java) for how to configure [debug options](/en/reference/applications/services/container#jvm) for use in tools like YourKit. This can be used to track component lifetime / (de)construction issues, e.g.: + +```xml + + + +``` + +Read more in [container profiling](/en/performance/profiling#profiling-the-query-container). + +## Dependency injection + +The components might need to access resources, such as other components or config. These are injected directly into the constructor. The following types of constructor dependencies are allowed: + + - [Config objects](/en/applications/configuring-components) + - [Other components](/en/applications/dependency-injection) + - [The Linguistics library](/en/linguistics/linguistics) + - [System info](#the-systeminfo-injectable-component) + +The [Component Reference](/en/reference/applications/components#injectable-components) contains a complete list of built-in injectable components. + +If your component class needs more than one public constructor, the one to be used by the container must be annotated with `@com.yahoo.component.annotation.Inject` from [annotations](https://search.maven.org/artifact/com.yahoo.vespa/annotations). + +### The SystemInfo Injectable Component + +This component provides information about the environment that the component is running in, for example + +- The zone in the Vespa Cloud, if applicable. +- The number of nodes in the container cluster, and their indices. +- The index of the node this is running on. + +The two latter can be used e.g. for [bucket testing](/en/applications/testing#feature-switches-and-bucket-tests) new features on a subset of nodes. Please note that the node indices are not necessarily contiguous or starting from zero. + +## Deploying a Component + +The container will create one or more instances of the component, as specified in [the application package](#adding-component-to-application-package). The container will create a new instance of this component only when it is reconfigured, so any data needed by the component can be read and prepared from a constructor in the component. + +See the full API available to components at the [Container Javadoc](https://javadoc.io/doc/com.yahoo.vespa/container-disc/latest/com/yahoo/container/package-summary). + +Once the component passes unit tests, it can be deployed. The steps involved are building the component jar file, adding it to the Vespa application package and deploying the application package. These steps are described in the following sections, using a searcher as example. + +### Building the Plugin .jar + +To build the plugin jar, call `mvn install` in the project directory. It can then be found in the target directory, and will have the suffix *-deploy.jar*. + +Assume for the rest of the document that the artifactId is `com.yahoo.search.example.SimpleSearcher` and the version is `1.0`. The plugin built will then have the name *com.yahoo.search.example.SimpleSearcher-1.0-deploy.jar*. + +### Adding the Plugin to the Vespa Application Package + +The previous step should produce a plugin jar file, which may now be deployed to Vespa by adding it to an [application package](/en/basics/applications): A directory containing at minimum *hosts.xml* and *services.xml*. + +- put `com.yahoo.search.example.SimpleSearcher-1.0-deploy.jar` in the `components/` directory under the application package root +- modify [services.xml](/en/reference/applications/services/services) to include the Searcher + +To include the searcher, define a search chain and add the searcher to it. Example: + +```xml + + + + + + + + + + + + + + + + + + + + + + + +``` + +The searcher id above is resolved to the plugin jar we added by the `Bundle-SymbolicName` ([a field in the manifest of the jar file](/en/applications/bundles)), which is determined by the `artifactId`, and to the right class within the bundle by the class name. By keeping the `searcher id`, `class name` and `artifactId` the same, we keep things simple, but more advanced use where this is possible is also supported. This will be explained in later sections. + +For a reference to these tags, see [the search chains reference](/en/reference/applications/services/search#chain). + +Example `hosts.xml`: + +```xml + + + + node1 + + +``` + +By creating a directory containing this `services.xml`, `hosts.xml` and `components/com.yahoo.search.example.SimpleSearcher-1.0-deploy.jar`, that directory becomes a complete application package containing a bundle, which can now be deployed. + +### Deploying the Application Package + +Set up a Vespa instance using the [quick start](/en/basics/deploy-an-application-local). Once the component and the config are added to the application package, it can be [deployed](/en/basics/applications#deploying-applications) by running `vespa deploy`. These steps will copy any changed bundles to the nodes in the cluster which needs them and switch queries over to running the new component versions. + +This works safely without requiring any processes to be restarted, even if the application package contains changes to classes which are already running queries. The switch is atomic from the point of view of the query - all queries will execute to completion, either using only the components of the last version of the application package or only the new ones, so interdependent changes in multiple searcher components can be deployed without problems. + +#### JNI requires restart + +The exception to the above is bundles containing JNI packages. There can only be one instance of the native library, so such bundles cannot reload. Best practice is to load the JNI library in the constructor, as this will cause the new bundle *not* to load, but continue on the current version. A subsequent restart will load the new bundle. This will hence not cause failures. Alternatively, if the JNI library is initialized lazily (e.g. on first invocation), bundle reloads will succeed, but subsequent invocations of code using the JNI library will fail. Hence, the new version will run, but fail. + +A warning is issued in the log when deploying rather than the normal *Switching to the latest deployed set of handlers* - example: + +```txt +[2016-09-21 14:22:05.387] WARNING : container stderr Cannot load mylib native library +``` + +To minimize restarts, it is recommended to put JNI components in minimal, separate bundles. This will prevent reload of the JNI-bundles, unless the JNI-bundle itself is changed. + +#### Monitoring the active Application + +All containers also provide a built-in handler that outputs JSON formatted information about the active application, including its components and chains (it can also be configured to show [a user-defined version](/en/reference/applications/application-packages#versioning-application-packages)). The handler answers to requests with the path `/ApplicationStatus`. For example, if 'localhost' runs a container with HTTP configured on port 8080: + +```txt +http://localhost:8080/ApplicationStatus +``` + +### Including third-party libraries + +External dependencies [can be included into the bundle](/en/applications/bundles#maven-bundle-plugin). + +### Exporting, importing and including packages in bundles + +[OSGi features information hiding - by default all the classes used inside a bundle are invisible from the outside.](/en/applications/bundles) + +### Global and exported packages + +The JDisc Container has one set of *global* packages. These are packages that are available with no import, and constitutes the supported API of the JDisc Container. Backwards incompatible changes are not made to these packages. + +There is also a set of *exported* packages. These are available for import, and includes all legacy packages, plus extension packages which are not part of the core API. Note that these are not considered to be "public" APIs, as global packages are, and backwards incompatible changes *can* be made to these packages, or they may be removed. + +The list of exported and global packages is available in the [container-disc pom.xml](https://github.com/vespa-engine/vespa/blob/master/container-disc/pom.xml), in `project/properties/exportedPackages` and `project/properties/globalPackages`. + +### Versions + +All the elements of the search container which may be referenced by an id may be *versioned*, that includes chains, components and query profiles. This allows multiple versions of these elements to be used at the same time, including multiple versions of the same classes, which is handy for [bucket testing](/en/applications/testing#feature-switches-and-bucket-tests) new versions. + +An id or id reference may include a version by using the following syntax: `name:version`. This works with ids in search requests, services.xml, code and query profiles. + +A version has the format: + +```txt +major.minor.micro.qualifier +``` + +where major, minor and micro are integers and qualifier is a string. Any right-hand portion of the version string may be skipped. In *versions*, skipped values mean "0" (and *empty* for the qualifier). In *version references* skipped values means "unspecified". Any unspecified number will be matched to the highest number available, while a qualifier specified *must* be matched exactly if it is specified (qualifiers are rarely used). + +To specify the version of a bundle, specify version in pom.xml (we recommend not using *qualifier*): + +```xml +com.yahoo.example +MyPlugin +major.minor.micro +``` + +This will automatically be used to set the `Bundle-Version` in the bundle manifest. + +For more details, see [component versioning](/en/reference/applications/components#component-versioning). + +## Troubleshooting + +### Container start + +If there is some error in the application package, it will usually be detected during the `vespa prepare` step and cause an error message. However, some classes of errors are only detected once the application is deployed. When redeploying an application, it is therefore recommended watching the vespa log by running: + +```txt +vespa-logfmt -N +``` + +The new application is active after the INFO message: + +```txt +Switched to the latest deployed set of handlers...; +``` + +If this message does not appear after a reasonable amount of time after completion of `vespa activate`, one will see some errors or warnings instead, that will help debug the application. + +### Component load + +At deployment or container start, components are constructed. Construction can fail - to debug, enable more logging (replace "container" as needed with container id): + +```txt +$ vespa-logctl container:com.yahoo.container.di.componentgraph.core.ComponentNode debug=on +.com.yahoo.container.di.componentgraph.core.ComponentNode ON ON ON ON ON ON ON OFF +``` + +Look for "Constructing" and "Finished constructing" in *vespa.log* - this identifies components that did not construct. + +Model downloading failures look like the below and are caused by a fail to download the model to the container: + +```json +ERROR container Container.com.yahoo.jdisc.core.StandaloneMain JDisc exiting: Throwable caught: +exception= +java.lang.RuntimeException: Not able to create config builder for payload '{ +"tokenizerPath": "\\"\\" https://huggingface.co/Snowflake/snowflake-arctic-embed-l/raw/main/tokenizer.json \\"\\"", +"transformerModel": "\\"\\" https://huggingface.co/Snowflake/snowflake-arctic-embed-l/resolve/main/onnx/model_int8.onnx \\"\\"", +"transformerMaxTokens": 512, +"transformerInputIds": "input_ids", +"transformerAttentionMask": "attention_mask", +"transformerTokenTypeIds": "token_type_ids", +"transformerOutput": "last_hidden_state", +"normalize": true, +"poolingStrategy": "cls", +"transformerExecutionMode": "sequential", +"transformerInterOpThreads": 1, +"transformerIntraOpThreads": -4, +"transformerGpuDevice": 0 +} +``` + +Check urls / names, and that the model can be downloaded in the network the Vespa Container is running. diff --git a/mintlify-docs/en/applications/config-system.mdx b/mintlify-docs/en/applications/config-system.mdx new file mode 100644 index 0000000000..244172ea43 --- /dev/null +++ b/mintlify-docs/en/applications/config-system.mdx @@ -0,0 +1,170 @@ +--- +title: "The Config System" +description: "The config system in Vespa is responsible for turning the application package into live configuration of all the nodes, processes and components that realizes the running system. Here we deep dive into various aspects of how this works." +--- + +## Node configuration + +The problem of configuring nodes can be divided into three parts, each addressed by different solutions: + +- **Node system level configuration:** Configure OS level settings such as time zone as well as user privileges on the node. +- **Package management**: Ensure that the correct set of software packages is installed on the nodes. This functionality is provided by three tools working together. +- **Vespa configuration:** Starts the configured set of processes on each node with their configured startup parameters and provides dynamic configuration to the modules run by these services. *Configuration* here is any data which: + + - can not be fixed at compile time + - is static most of the time + +Note that by these definitions, this allows all the nodes to have the same software packages (disregarding version differences, discussed later), as variations in what services are run on each node and in their behavior is achieved entirely by using Vespa Configuration. This allows managing the complexity of node variations completely within the configuration system, rather than across multiple systems. + +Configuring a system can be divided into: + +- **Configuration assembly:** Assembly of a complete set of configurations for delivery from the inputs provided by the parties involved in configuring the system +- **Configuration delivery:** Definition of individual configurations, APIs for requesting and accessing configuration, and the mechanism for delivering configurations from their source to the receiving components + +This division allows the problem of reliable configuration delivery in large distributed systems to be addressed in configuration delivery, while the complexities of assembling complete configurations can be treated as a vm-local design problem. + +An important feature of Vespa Configuration is the nature of the interface between the delivery and assembly subsystems. The assembly subsystem creates as output a (Java) object model of the distributed system. The delivery subsystem queries this model to obtain concrete configurations of all the components of the system. This allows the assembly subsystem to accept higher level, and simpler to use, abstractions as input and automatically derive detailed configurations with the correct interdependencies. This division insulates the external interface and the components being configured from changes in each other. In addition, the system model provides the home for logic implementing node/component instance variations of configuration. + +## Configuration assembly + +Config assembly is the process of turning the configuration input sources into an object model of the desired system, which can respond to queries for configs given a name and config id. Config assembly for Vespa systems can become complex, because it involves merging information owned by multiple parties: + +- **Vespa operations** own the nodes and controls assignment of nodes to services/applications +- **Vespa service providers** own services which hosts multiple applications running on Vespa +- **Vespa applications** define the final applications running on nodes and shared services + +The current config model assembly procedure uses a single source - the *application package*. The application package is a directory structure containing defined files and subdirectories which together completely defines the system - including which nodes belong in the system, which services they should run and the configuration of these services and their components. When the application deployer wants to change the application, [vespa prepare](/en/reference/clients/vespa-cli/vespa_prepare) is issued to a config server, with the application package as argument. + +At this point the system model is assembled and validated and any feedback is issued to the deployer. If the deployer decides to make the new configuration active, a [vespa activate](/en/reference/clients/vespa-cli/vespa_activate) is then issued, causing the config server cluster to switch to the new system model and respond with new configs on any active subscriptions where the new system model caused the config to change. This ensures that subscribers gets new configs timely on changes, and that the changes propagated are the minimal set such that small changes to an application package causes correspondingly small changes to the system. + + +![](/assets/img/config-assembly.svg) + + +The config model itself is pluggable, so that service providers may write plugins for assembling a particular service. The plugins are written in Java, and is installed together with the Vespa Configuration. Service plugins define their own syntax for specifying services that may be configured by Vespa applications. This allows the applications to be specified in an abstract manner, decoupled from the configuration that is delivered to the components. + +## Configuration delivery + +Configuration delivery encompasses the following aspects: + +- Definition of configurations +- The component view (API) of configuration +- Configuration delivery mechanism + +These aspects work together to realize the following goals: + +- Eliminate inconsistency between code and configuration. +- Eliminate inconsistency between the desired configuration and the state on each node. +- Limit temporary inconsistencies after reconfiguration. + +The next three subsections discusses the three aspects above, followed by subsections on two special concerns - bootstrapping and system upgrades. + +### Configuration definitions + +A *configuration* is a set of simple or array key-values with a name and a type, which can possibly be nested - example: + +```txt +myProperty "myvalue" +myArray[1] +myArray[0].key1 "someValue" +myArray[0].key2 1337 +``` + +The *type definition* (or class) of a configuration object defines and documents the set of fields a configuration may contain with their types and default values. It has a name as well as a namespace. For example, the above config instance may have this definition: + +```txt +namespace=foo.bar + +# Documentation of this key +myProperty string default="foo" + +# etc. +myArray[].key1 string +myArray[].key2 int default=0 +``` + +An individual config typically contains a coherent set of settings regarding some topic, such as *logging* or *indexing*. A complete system consists of many instances of many config types. + +### Component view + +Individual components of a system consumes one or more such configs and use their values to influence their behavior. APIs are needed for *requesting* configs and for *accessing* the values of those configs as they are provided. + +*Access* to configs happens through a (Java or C++) class generated from the config definition file. This ensures that any inconsistency between the fields declared in a config type and the expectations of the code accessing it are caught at compile time. The config definition is best viewed as another class with an alternative form of source syntax belonging to the components consuming it. A Maven target is provided for generating such classes from config definition types. + +Components may use two different methods for *requesting* configurations subscription and dependency injection: + +**Subscription:** The component sets up *ConfigSubscriber*, then subscribes to one or more configs. This is the simple approach, there are [other ways of](/en/applications/configapi-dev) getting configs too: + +```java +ConfigSubscriber subscriber = new ConfigSubscriber(); +ConfigHandle handle = subscriber.subscribe(MyConfig.class, "myId"); +if (!subscriber.nextConfig()) throw new RuntimeException("Config timed out."); +if (handle.isChanged()) { + String message = handle.getConfig().myKey(); + // ... consume the rest of this config +} +``` + +**Dependency injection:** The component declares its config dependencies in the constructor and subscriptions are set up on its behalf. When changed configs are available a new instance of the component is created. The advantage of this method is that configs are immutable throughout the lifetime of the component such that no thread coordination is required. This method is currently only available in Java using the [Container](/en/applications/containers). + +```java +public MyComponent(MyConfig config) { + String myKey = config.myKey(); + // ... consume the rest of this config +} +``` + +For unit testing, [configs can be created with Builders](/en/applications/configapi-dev#unit-testing), submitted directly to components. + +### Delivery mechanism + +The config delivery mechanism is responsible for ensuring that a new config instance is delivered to subscribing components, each time there is a change to the system model causing that config instance to change. A config subscription is identified by two parameters, the *config definition name and namespace* and the [config id](/en/applications/configapi-dev#config-id) used to identify the particular component instance making the subscription. + +The in-process config library will forward these subscription requests to a node local [config proxy](/en/operations/self-managed/config-proxy), which provides caching and fan-in from processes to node. The proxy in turn issues these subscriptions to a node in the configuration server cluster, each of which hosts a copy of the system model and resolves config requests by querying the system model. + +To provide config server failover, the config subscriptions are implemented as long-timeout gets, which are immediately resent when they time out, but conceptually this is best understood as push subscriptions: + + +![](/assets/img/config-delivery.svg) + + +As configs are not stored as files locally on the nodes, there is no possibility of inconsistencies due to local edits, or of nodes coming out of maintenance with a stale configuration. As configuration changes are pushed as soon as the config server cluster allows, time inconsistencies during reconfigurations are minimized, although not avoided as there is no global transaction. + +Application code and config is generally pulled from the config server - it is however possible to use the [url](/en/reference/applications/config-files#url) config type to refer to any resource to download to nodes. + +### Bootstrapping + +Each Vespa node runs a [config-sentinel](/en/operations/self-managed/config-sentinel) process which start and maintains services run on a node. + +### System upgrades + +The configuration server will up/downgrade between config versions on the fly on minor upgrades which causes discrepancies between the config definitions requested from those produced by the configuration model. Major upgrades, which involve incompatible changes to the configuration protocol or the system model, require a [procedure](/en/operations/self-managed/config-proxy). + +## Notes + +Find more information for using the Vespa config API in the [reference doc](/en/applications/configapi-dev). + +Vespa Configuration makes the following assumptions about the nodes using it: + +- All nodes have the software packages needed to run the configuration system and any services which will be configured to run on the node. This usually means that all nodes have the same software, although this is not a requirement +- All nodes have [VESPA_CONFIGSERVERS](/en/operations/self-managed/files-processes-and-ports#environment-variables) set +- All nodes know their fully qualified domain name + +Reading this document is not necessary in order to use Vespa or to develop Java components for the Vespa container - for this purpose, refer to [Configuring components](/en/applications/configuring-components). + +## Further reads + +- [Configuration server operations](/en/operations/self-managed/configuration-server) is a good resource for troubleshooting. +- Refer to the [bundle plugin](/en/applications/bundles#maven-bundle-plugin) for how to build an application package with Java components. +- During development on a local instance it can be handy to just wipe the state completely and start over: + + + + [Delete all config server state](/en/operations/self-managed/configuration-server#zookeeper-recovery) on all config servers + + + + Run [vespa-remove-index](/en/reference/operations/self-managed/tools#vespa-remove-index) to wipe content nodes + + + diff --git a/mintlify-docs/en/applications/configapi-dev.mdx b/mintlify-docs/en/applications/configapi-dev.mdx new file mode 100644 index 0000000000..eff0900877 --- /dev/null +++ b/mintlify-docs/en/applications/configapi-dev.mdx @@ -0,0 +1,329 @@ +--- +title: "Cloud Config API" +sidebarTitle: "Java Config API" +description: "This document describes how to use the C++ and Java versions of the Cloud config API (the 'config API'). This API is used internally in Vespa, and reading this document is not necessary in order to use Vespa or to develop Java components for the Vespa container. For this purpose, please refer to [Configuring components](/en/applications/configuring-components) instead." +--- + +Throughout this document, we will use as example an application serving up a configurable message. + +## Creating a Config Definition + +The first thing to do when deciding to use the config API is to define the config you want to use in your application. This is described in the [configuration file reference](/en/reference/applications/config-files). Here we will use the definition `motd.def` from the complete example at the end of the document: + +```text +namespace=myproject + +message string default="NO MESSAGE" +port int default=1337 +``` + +## Generating Source Code and Accessing Config in Code + +Before you can access config in your program you will need to generate source code for the config definition. Simple steps for how you can generate API code and use the API are provided for [Java](/en/applications/configapi-dev#the-java-config-api). See also [javadoc](https://javadoc.io/doc/com.yahoo.vespa/config-lib)) + +We also recommend that you read the [general guidelines](#guidelines) for examples of advanced usage and recommendations for how to use the API. + +## Config ID + +The config id specified when requesting config is essentially an identifier of the component requesting config. The config server contains a config object model, which maps a request for a given config name and config id to the correct configproducer instance, which will merge default values from the config definition with config from the object model and config set in `services.xml` to produce the final config instance. + +The config id is given to a service via the VESPA\_CONFIG\_ID environment variable. The [config sentinel](/en/operations/self-managed/config-sentinel) sets the environment variable to the id given by the config model. This id should then be used by the service to subscribe for config. If you are running multiple services, each of them will be assigned a **unique config id** for that service, and a service should not subscribe using any config id other than its own. + +If you need to get config for a services that is not part of the model (i.e. it is not specified in the services.xml), but that you want to specify values for in services.xml, use the config id `client`. + +## Schema Compatibility Rules + +A schema incompatibility occurs if the config class (for example `MotdConfig` in the C++ and Java sections above) was built from a different def-file than the one the server is seeing and using to serve config. Some such incompatibilities are automatically handled by the config system, others lead to error. This is useful to know during development/testing of a config schema. + +Let *S* denote a config definition called *motd* which the server is using, and *C* denote a config definition also called *motd* which the client is using, i.e. the one that created `MotdConfig` used when subscribing. The following is the system's behavior: + +| | | +| :--- | :--- | +| Compatible Changes | These schema mismatches are handled automatically by the configserver:
- C is missing a config value that S has: The server will omit that value from the response.
- C has an additional config value with a default value: The server will include that value in the response.
- C and S both have a config value, but the default values differ: The server will use C's default value. | +| Incompatible Changes | These schema mismatches are not handled by the config server, and will typically lead to error in the subscription API because of missing values (though in principle some consumers of config may tolerate them):
- C has an additional config value without a default value: The server will not include anything for that value.
- C has the type of a config value changed, for example from string to int: The server will print an error message, and not include anything for that value. The user must use an entirely new name for the config if such a change must be made. | + +As with any data schema, it is wise to be conservative about changing it if the system will have new versions in the future. For a `def` schema, removing a config value constitutes a semantic change that may lead to problems when an older version of some config subscriber asks for config. In large deployments, the risk associated with this increases, because of the higher cost of a full restart of everything. + +Consequently, one should prefer creating a new config name, to removing a config value from a schema. + +## Creating a Deployable Application Package + +The application package consists of the following files: + +``` +app/services.xml +app/hosts.xml +``` + +The services file contains the services that is handled by the config model plugin. The hosts file contains: +```xml + + + node0 + + +``` + +## Setting Up a Running System + +To get a running system, first install the cloudconfig package, start the config server, then deploy the application: Prepare the application: + +$ vespa prepare /path/to/app/folder +Activate the application:$ vespa activate /path/to/app/folder + +Then, start vespa. This will start the application and pass it its config id via the VESPA\_CONFIG\_ID environment variable. + +## Advanced Usage of the Config API + +For a simple application, having only 1 config may suffice. In a typical server application, however, the number of config settings can become large. Therefore, we **encourage** that you split the config settings into multiple logical classes. This section covers how you can use a ConfigSubscriber to subscribe to multiple configs and how you should group configs based on their dependencies. Configs can either be: + +- Independent static configs +- Dependent static configs +- Dependent dynamic configs + +We will give a few examples of how you can cope with these different scenarios. The code examples are given in a pseudo format common to C++ and Java, but they should be easy to convert to their language specific equivalents. + +### Independent Static Configs + +Independent configs means that it does not matter if one of them is updated independently of the other. In this case, you might as well use one ConfigSubscriber for each of the configs, but it might become tedious to check all of them. Therefore, the recommended way is to manage all of these configs using one ConfigSubscriber. In this setup, it is also typical to split the subscription phase from the config check/retrieval part. The subscribing part: + +```c++ C ++ +ConfigSubscriber subscriber; +ConfigHandle::UP fooHandle = subscriber.subscribe(...); +ConfigHandle::UP barHandle = subscriber.subscribe(...); +ConfigHandle::UP bazHandle = subscriber.subscribe(...); +``` +```java Java +ConfigSubscriber subscriber; +ConfigHandle fooHandle = subscriber.subscribe(FooConfig.class, ...); +ConfigHandle barHandle = subscriber.subscribe(BarConfig.class, ...); +ConfigHandle bazHandle = subscriber.subscribe(BazConfig.class, ...); +``` + +And the retrieval part: + +``` +if (subscriber.nextConfig()) { + if (fooHandle->isChanged()) { + // Reconfigure foo + } + if (barHandle->isChanged()) { + // Reconfigure bar + } + if (bazHandle->isChanged()) { + // Reconfigure baz + } +} +``` + +This allows you to perform the config fetch part either in its own thread or as part of some other event thread in your application. + +### Dependent Static Configs + +Dependent configs means that one of your configs depends on the value in another config. The most common is that you have one config which contains the config id to use when subscribing to the second config. In addition, your system may need that the configs are updated to the same **generation**. + + +**Note:** + +A generation is a monotonically increasing number which is increased each time an application is deployed with `vespa deploy`. Certain applications may require that all configs are of the same generation to ensure consistency, especially container-like applications. All configs subscribed to by a ConfigSubscriber are guaranteed to be of the same generation. + + +The configs are static in the sense that the config id used does not change. The recommended way to approach this is to use a two phase setup, where you fetch the initial configs in the first phase, and then subscribe to both the initial and derived configs in order to ensure that they are of the same generation. Assume that the InitialConfig config contains two fields named *derived1* and *derived2*: + +```c++ C++ +ConfigSubscriber initialSubscriber; +ConfigHandle::UP initialHandle = subscriber.subscribe(...); +while (!subscriber.nextConfig()); // Ensure that we actually get initial config. +std::auto_ptr initialConfig = initialHandle->getConfig(); + +ConfigSubscriber subscriber; +... = subscriber.subscribe(...); +... = subscriber.subscribe(initialConfig->derived1); +... = subscriber.subscribe(initialConfig->derived1); +``` + +```java Java +ConfigSubscriber initialSubscriber; +ConfigHandle initialHandle = subscriber.subscribe(InitialConfig.class, ...); +while (!subscriber.nextConfig()); // Ensure that we actually get initial config. +InitialConfig initialConfig = initialHandle.getConfig(); + +ConfigSubscriber subscriber; +... = subscriber.subscribe(InitialConfig.class, ...); +... = subscriber.subscribe(DerivedConfig.class, initialConfig.derived1); +... = subscriber.subscribe(DerivedConfig.class, initialConfig.derived1); +``` + +You can then check the configs in the same way as for independent static configs, and be sure that all your configs are of the same generation. The reason why you need to create a new ConfigSubscriber is that **once you have called nextConfig(), you cannot add or remove new subscribers**. + +### Dependent Dynamic Configs + +Dynamic configs mean that the set of configs that you subscribe for may change between each deployment. This is the hardest case to solve, and how hard it is depends on how many levels of configs you have. The most common one is to have a set of bootstrap configs, and another set of configs that may change depending on the bootstrap configs (typically in an application that has plugins). To cover this case, you can use a class named `ConfigRetriever`. Currently, it is **only available in the C++ API**. + +The ConfigRetriever uses the same mechanisms as the ConfigSubscriber to ensure that you get a consistent set of configs. In addition, two more classes called `ConfigKeySet` and `ConfigSnapshot` are added. The ConfigRetriever takes in a set of configs used to bootstrap the system in its constructor. This set does not change. It then provides one method, `getConfigs(ConfigKeySet)`. The method returns a ConfigSnapshot of the next generation of bootstrap configs or derived configs. + +To create the ConfigRetriever, you must first populate a set of bootstrap configs: + +```xml +ConfigKeySet bootstrapKeys; +bootstrapKeys.add(configId); +bootstrapKeys.add(configId); +``` + +The bootstrap configs are typically configs that will always be needed by your application. Once you have defined your set, you can create the retriever and fetch a ConfigSnapshot of the bootstrap configs: + +``` +ConfigRetriever retri ever(bootstrapKeys); +ConfigSnapshot bootstrapConfigs = retriever.getConfigs(); +``` + +The ConfigSnapshot contains the bootstrap config, and you may use that to fetch the individual configs. You need to provide the config id and the type in order for the snapshot to know which config to look for: + +```xml +if (!bootstrapConfigs.empty()) { + std::auto_ptr bootstrapFoo = bootstrapConfigs.getConfig(configId); + std::auto_ptr bootstrapBar = bootstrapConfigs.getConfig(configId); +``` + +The snapshot returned is empty if the retriever was unable to get the configs. In that case, you can try calling the same method again. + +Once you have the bootstrap configs, you know the config ids for the other components that you should subscribe for, and you can define a new key set. Let's assume that bootstrapFoo contains an array of config ids we should subscribe for. + +```java +ConfigKeySet pluginKeySet; +for (size_t i = 0; i < (*bootstrapFoo).pluginConfigId.size; i++) { + pluginKeySet.add((*bootstrapFoo).pluginConfigId[i]); +} +``` + +In this example we know the type of config requested, but this could be done in another way letting the plugin add keys to the set. + +Now that the derived configs have been added to the pluginKeySet, we can request a snapshot of them: + +```java +ConfigSnapshot pluginConfigs = retriever.getConfigs(pluginKeySet); +if (!pluginConfigs.empty()) { + // Configure each plugin with a config picked from the snapshot. +} +``` +And that's it. When calling the method without any key parameters, the snapshot returned by this method may be empty if **the config could not be fetched within the timeout**, or **the generation of configs has changed**. To check if you should call getBootstrapConfigs() again, you can use the `bootstrapRequired()` method. If it returns true, you will have to call getBootstrapConfigs() again, because the plugin configs have been updated, and you need a new bootstrap generation to match it. If it returns false, you may call getConfigs() again to try and get a new generation of plugin configs. + +We recommend that you use the retriever API if you have a use case like this. The alternative is to create your own mechanism using two ConfigSubscriber classes, but this is **not** recommended. + +### Advice on Config Modelling + +Regardless of which of these types of configs you have, it is recommended that you always fetch all the configs you need **before** you start configuring your system. This is because the user may deploy multiple different version of the config that may cause your components to get conflicting config values. A common pitfall is to treat dependent configs as independent, thereby causing inconsistency in your application when a config update for config A arrives before config B. The ConfigSubscriber was created to minimize the possibility of making this mistake, by ensuring that all of the configs comes from the same config reload. + +**Tip:** Set up your entire *tree* of configs in one thread to ensure consistency, and configure your system once all of the configs have arrived. This also maps best to the ConfigSubscriber, since it is not thread safe. + +## The Java config API + +Assumption: a [def file](/en/applications/configapi-dev), which is the schema for one of your configs, is created and put in `src/main/resources/configdefinitions/`. + +To generate source code for the def-file, invoke the `config-class-plugin` from *pom.xml*, in the ``, `` section: + +```xml + + com.yahoo.vespa + config-class-plugin + ${vespa.version} + + + config-gen + + config-gen + + + + +``` + +The generated classes will be saved to `target/generated-sources/vespa-configgen-plugin`, when the `generate-sources` phase of the build is executed. The def-file [`motd.def`](/en/applications/configapi-dev) is used in this tutorial, and a class called `MotdConfig` was generated (in the package `myproject`). It is a subtype of `ConfigInstance`. + +When using only the config system (and not other parts of Vespa or the JDisc container), pull in that by using this in pom.xml: + +```xml + + com.yahoo.vespa + config + ${vespa.version} + provided + +``` + +## Subscribing and getting config + +To retrieve the config in the application, create a `ConfigSubscriber`. A `ConfigSubscriber` is capable of subscribing to one or more configs. The example shown here uses simplified error handling: + +```java +ConfigSubscriber subscriber = new ConfigSubscriber(); +ConfigHandle handle = subscriber.subscribe(MotdConfig.class, "motdserver2/0"); +if (!subscriber.nextConfig()) throw new RuntimeException("Config timed out."); +if (handle.isChanged()) { + String message = handle.getConfig().message(); + int port = handle.getConfig().port(); +} +``` + +Note that `isChanged()` always will be true after the first call to `nextConfig()`, it is included here to illustrate the API. + +In many cases one will do this from a thread which loops the `nextConfig()` call, and reconfigures your application if `isChanged()` is true. + +The second parameter to `subscribe()`, *"motdserver2/0"*, is the [config id](/en/applications/configapi-dev#config-id). + +If one `ConfigSubscriber` subscribes to multiple configs, `nextConfig()` will only return true if the configs are of the same generation, i.e. they are "in sync". + +See the [com.yahoo.config](https://javadoc.io/doc/com.yahoo.vespa/config-lib) javadoc for details. Example: + +```java +ConfigSubscriber subscriber = new ConfigSubscriber(); +ConfigHandle motdHandle = subscriber.subscribe(MotdConfig.class, "motdserver2/0"); +ConfigHandle anotherHandle = subscriber.subscribe(AnotherConfig.class, "motdserver2/0"); +if (!subscriber.nextConfig()) throw new RuntimeException("Config timed out."); +// We now have a synchronized new generation for these two configs. +if (motdHandle.isChanged()) { + String message = motdHandle.getConfig().message(); + int port = motdHandle.getConfig().port(); +} +if (anotherHandle.isChanged()) { + String myfield = anotherHandle.getConfig().getMyField(); +} +``` + +## Simplified subscription + +In cases like the first example above, where you only subscribe to one config, you may also subscribe using the `ConfigSubscriber.SingleSubscriber` interface. In this case, you define a `configure()` method from the interface, and call a special `subscribe()`. The method will start a dedicated config fetcher thread for you. The method will throw an exception in the user thread if initial configuration fails, and print a warning in the config thread if it fails afterwards. Example: + +```java +public class MyConfigSubscriber implements ConfigSubscriber.SingleSubscriber { + + public MyConfigSubscriber(String configId) { + new ConfigSubscriber().subscribe(this, MotdConfig.class, configId); + } + + @Override + public void configure(MotdConfig config) { + // configuration logic here + } +} +``` + +The disadvantage to using this is that one cannot implement custom error handling or otherwise track config changes. If needed, use the generic method above. + +## Unit testing config + +When instantiating a [ConfigSubscriber](https://javadoc.io/doc/com.yahoo.vespa/config/latest/com/yahoo/config/subscription/ConfigSubscriber.html), one can give it a [ConfigSource](https://javadoc.io/doc/com.yahoo.vespa/config/latest/com/yahoo/config/subscription/ConfigSource.html). One such source is a `ConfigSet`. It consists of a set of `Builder`s. This is an example of instantiating a subscriber using this
- it uses 2 types of config, that were generated from files `app.def` and `string.def`: + +```java +ConfigSet myConfigs = new ConfigSet(); +AppConfig.Builder a0builder = new AppConfig.Builder().message("A message, 0").times(88); +AppConfig.Builder a1builder = new AppConfig.Builder().message("A message, 1").times(89); +myConfigs.add("app/0", a0builder); +myConfigs.add("app/1", a1builder); +myConfigs.add("bar", new StringConfig.Builder().stringVal("StringVal")); +ConfigSubscriber subscriber = new ConfigSubscriber(myConfigs); +``` + +To help with unit testing, each config type has a corresponding builder type. The `Builder` is mutable whereas the `ConfigInstance` is not. Use this to set up config fixtures for unit tests. The `ConfigSubscriber` has a `reload()` method which is used in tests to force the subscriptions into a new generation. It emulates a `vespa activate` operation after you have updated the `ConfigSet`. + +A full example can be found in [ConfigSetSubscriptionTest.java](https://github.com/vespa-engine/vespa/blob/master/config/src/test/java/com/yahoo/config/subscription/ConfigSetSubscriptionTest.java). diff --git a/mintlify-docs/en/applications/configuring-components.mdx b/mintlify-docs/en/applications/configuring-components.mdx new file mode 100644 index 0000000000..11f6cf4534 --- /dev/null +++ b/mintlify-docs/en/applications/configuring-components.mdx @@ -0,0 +1,138 @@ +--- +title: "Configuring Java components" +description: "Any Java component might require some sort of configuration, be it simple strings or integers, or more complex structures. Because of all the boilerplate code that commonly goes into classes to hold such configuration, this often degenerates into a collection of key-value string pairs (e.g. [javax.servlet.FilterConfig](https://docs.oracle.com/javaee/6/api/javax/servlet/FilterConfig)). To avoid this, Vespa has custom, type-safe configuration to all [Container](/en/applications/containers) components. Get started with the [Developer Guide](/en/applications/developer-guide), try the [album-recommendation-java](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation-java) sample application." +--- + +Configurable components in short: + +- Create a [config definition](/en/reference/applications/config-files#config-definition-files) file +- Use the Vespa [bundle plugin](/en/applications/bundles#maven-bundle-plugin) to generate a config class from the definition +- Inject config objects in the application code + +The application code is interfacing with config through the generated code — code and config is always in sync. This configuration should be used for all state which is assumed to stay constant for the *lifetime of the component instance*. Use [deploy](/en/basics/applications) to push and activate code and config changes. + +## Config definition + +Write a [config definition](/en/reference/applications/config-files#config-definition-files) file and place it in the application's `src/main/resources/configdefinitions/` directory, e.g. `src/main/resources/configdefinitions/my-component.def`: + +```text +package=com.mydomain.mypackage + +myCode int default=42 +myMessage string default="" +``` + +## Generating config classes + +Generating config classes is done by the *bundle plugin*: + +```bash +$ mvn generate-resources +``` + +The generated the config classes are written to `target/generated-sources/vespa-configgen-plugin/`. In the above example, the config definition file was named *my-component.def* and its package declaration is *com.mydomain.mypackage*. The full name of the generated java class will be *com.mydomain.mypackage.MyComponentConfig* + +It is a good idea to generate the config classes first, *then* resolve dependencies and compile in the IDE. + +## Using config in code + +The generated config class is now available for the component through [constructor injection](/en/applications/dependency-injection), which means that the component can declare the generated class as one of its constructor arguments: + +```java +package com.mydomain.mypackage; +public class MyComponent { + private final int code; + private final String message; + @Inject + public MyComponent(MyComponentConfig config) { + code = config.myCode(); + message = config.myMessage(); + } +} +``` + +The Container will create and inject the config instance. To override the default values of the config, [specify](/en/reference/applications/config-files#generic-configuration-in-services-xml) values in `src/main/application/services.xml`, like: + +```xml + + + + 132 + Hello, World! + + + +``` + +and the deployed instance of `MyComponent` is constructed using a corresponding instance of `MyComponentConfig`. + +## Unit testing configurable components + +The generated config class provides a builder API that makes it easy to create config objects for unit testing. Example that sets up a unit test for the `MyComponent` class from the example above: + +```java +import static com.mydomain.mypackage.MyComponentConfig.*; +public class MyComponentTest { + @Test + public void requireThatMyComponentGetsConfig() { + MyComponentConfig config = new MyComponentConfig.Builder() + .myCode(668) + .myMessage("Neighbour of the beast") + .build(); + MyComponent component = new MyComponent(config); + … + } +} +``` + +The config class used here is simple — see a separate example of [building a complex configuration object](/en/applications/unit-testing#unit-testing-configurable-components). + +## Adding files to the component configuration + +This section describes what to do if the component needs larger configuration objects that are stored in files, e.g. machine-learned models, [automata](/en/reference/operations/tools#vespa-makefsa) or large tables. Before proceeding, take a look at how to create [provider components](/en/applications/dependency-injection#special-components) — instead of integrating large objects into e.g. a searcher or processor, it might be better to split the resource-demanding part of the component's configuration into a separate provider component. The procedure described below can be applied to any component type. + +Files can be transferred using either [file distribution](/en/applications/deployment#file-distribution) or URL download. File distribution is used when the files are added to the application package. If for some reason this is not convenient, e.g. due to size, origin of file or update frequency, Vespa can download the file and make it available for the component. Both types are set up in the config definition file. File distribution uses the `path` config type, and URL downloading the `url` type. You can also use the `model` type for machine-learned models that can be referenced by both model-id, used on Vespa Cloud, and url/path, used on self-hosted deployments. See [the config file reference](/en/reference/applications/config-files) for details. + +In the following example we will show the usage of all three types. Assume this config definition, named `my-component.def`: + +```java +package=com.mydomain.mypackage + +myFile path +myUrl url +myModel model +``` + +The file must reside in the application package, and the path (relative to the application package root) must be given in the component's configuration in `services.xml`: + +```xml + + + + my-files/my-file.txt + /en/reference/query-api-reference.html + + + + +``` + +An example component that uses these files: + +```java +package com.mydomain.mypackage; +import java.io.File; +public class MyComponent { + private final File fileFromFileDistribution; + private final File fileFromUrlDownload; + public MyComponent(MyComponentConfig config) { + pathFromFileDistribution = config.myFile(); + fileFromUrlDownload = config.myUrl(); + modelFilePath = config.myModel(); + } +} +``` + +The `myFile()` and `myModel()` getter returns a `java.nio.Path` object, while the `myUrl()` getter returns a `java.io.File` object. The container framework guarantees that these files are fully present at the given location before the component constructor is invoked, so they can always be accessed right away. + +When the client asks for config that uses the `url` or `model` config type with a URL, the content will be downloaded and cached on the nodes that need it. If you want to change the content, the application package needs to be updated with a new URL for the changed content and the application [deployed](/en/basics/applications), otherwise the cached content will still be used. This avoids unintended changes to the application if the content of a URL changes. diff --git a/mintlify-docs/en/applications/containers.mdx b/mintlify-docs/en/applications/containers.mdx new file mode 100644 index 0000000000..1ecad7ef1c --- /dev/null +++ b/mintlify-docs/en/applications/containers.mdx @@ -0,0 +1,50 @@ +--- +title: "Container clusters" +description: "Vespa's Java container - JDisc, hosts all application components as well as the stateless logic of Vespa itself." +--- + +Which particular components are hosted by a container cluster is configured in services.xml. The main features of JDIsc are: + +- HTTP serving out of the box from an embedded Jetty server, and support for plugging in other transport mechanisms. +- Integration with the config system of Vespa which allows components to [receive up-to-date config](/en/applications/configuring-components) (by constructor injection) resulting from application deployment. +- [Dependency injection based on Guice](/en/applications/dependency-injection) (Felix), but extended for configs and component collections. +- A component model based on [OSGi](/en/applications/bundles) which allows component to be (re)deployed to running servers, and to control which APIs they expose to others. +- The features above combine to allow application package changes (changes to components, configuration or data) to be applied by Vespa without disrupting request serving nor requiring restarts. +- Standard component types exists for: + - [general request handling](/en/applications/request-handlers) + - [chained request-response processing](/en/applications/processing) + - [processing document writes](/en/applications/document-processors) + - [intercepting queries and results](/en/applications/searchers) + - [rendering responses](/en/applications/result-renderers) + + Application components can be of any other type as well and do not need to reference any Vespa API to be loaded and managed by the container. +- A general [chain composition](/en/applications/chaining) mechanism for components. + +## Developing Components + +- The JDisc container provides a framework for processing requests and responses, named *Processing* - its building blocks are: + - [Chains](/en/applications/chaining) of other components that are to be executed serially, with each providing some service or transform + - [Processors](/en/applications/processing) that change the request and / or the response. They may also make multiple forward requests, in series or parallel, or manufacture the response content themselves + - [Renderers](/en/applications/processing#response-rendering) that are used to serialize a Processor's response before returning it to a client +- Application Lifecycle and unit testing: + - [Configuring components](/en/applications/configuring-components) with custom configuration + - [Component injection](/en/applications/dependency-injection) allows components to access other application components + - Learn how to [build OSGi bundles](/en/applications/bundles) and how to [troubleshoot](/en/applications/bundles#troubleshooting) classloading issues + - Using [Libraries for Pluggable Frameworks](/en/applications/pluggable-frameworks) from a component may result in class loading issues that require extra setup in the application + - [Unit testing configurable components](/en/applications/unit-testing#unit-testing-configurable-components) +- Handlers and filters: + - [Http servers and security filters](/en/applications/http-servers-and-filters) for incoming connections on HTTP and HTTPS + - [Request handlers](/en/applications/request-handlers) to process incoming requests and generate responses +- Searchers and Document Processors: + - [Searcher](/en/applications/searchers) and [search result renderer](/en/applications/result-renderers) development + - [Document processing](/en/applications/document-processors) + +## Reference documentation + +- [services.xml](/en/reference/applications/services/container) + +## Other related documents + +- [Designing RESTful web services](/en/applications/web-services) as Vespa Components +- [healthchecks](/en/reference/operations/health-checks) - using the Container with a VIP +- [Vespa Component Reference](/en/reference/applications/components): The Container's request processing lifecycle diff --git a/mintlify-docs/en/applications/dependency-injection.mdx b/mintlify-docs/en/applications/dependency-injection.mdx new file mode 100644 index 0000000000..9107dd1b76 --- /dev/null +++ b/mintlify-docs/en/applications/dependency-injection.mdx @@ -0,0 +1,140 @@ +--- +title: "Dependency injection" +description: "The Container (a.k.a. JDisc container) implements a dependency injection framework that allows components to declare arbitrary dependencies on configuration and other components in the application. This document explains how to write a container component that depends on another component. See the [reference](/en/reference/applications/components#injectable-components) for a list of injectable components." +--- + +The container relies on auto-injection instead of Guice modules. All components declared in the container cluster are available for injection, and the dependent component only needs to declare the dependency as a constructor parameter. In general, dependency injection involves at least three elements: + +- a dependent consumer, +- a declaration of a component's dependencies, +- an injector that creates instances of classes that implement a given dependency on request. + +Notes: + +- The dependent object describes what software component it depends on to do its work. The injector decides what concrete classes satisfy the requirements of the dependent object, and provides them to the dependent +- The Container encapsulates the injector, and the consumer and all its dependencies are considered to be components. +- The Container only supports constructor injection (i.e. all dependencies must be declared in a component's constructor). +- Circular dependencies is not supported. + +Refer to the [multiple-bundles sample app](https://github.com/vespa-engine/sample-apps/tree/master/examples/multiple-bundles) for a practical example. + +## Depending on another component + +A component that depends on another is considered to be a *consumer*. A component's dependencies is whatever its `@Inject`-annotated constructor declares as arguments. E.g. the component: + +```java +package com.yahoo.example; +import com.yahoo.component.annotation.Inject; +public class MyComponent { + private final MyDependency dependency; + @Inject + public MyComponent(MyDependency dependency) { + this.dependency = dependency; + } +} +``` + +has a dependency on the class `com.yahoo.example.MyDependency`. To deploy `MyComponent`, register `MyDependency` in `services.xml`: + +```xml + + + + +``` + +Upon deployment, the Container will first instantiate `MyDependency`, and then pass that instance to the constructor of `MyComponent`. Multiple consumers can take the same dependency. One can also [inject configuration](/en/applications/configuring-components) to components. + + +**Note:** + +A component will be reconstructed only when one of its dependencies, configuration, or its class changes - all which only occurs when you re-deploy your application package. Reconstruction is transitive; if component A depends on component B, and component B depends on component C, then a reconfiguration of component B causes a reconfiguration of A, but not of C. Reconfiguration of C causes a reconstruction of both A and B. + + +### Extending components + +When injecting two components when one extends the other, the dependency injection code does not know which of the two to use as the argument for the parent class. To resolve this, inject a `ComponentRegistry` (see below), and look up its entries, like `getComponent(XXX.class.getName())`. + +### Specify the bundle + +The example above assumes the bundle name can be deducted from the class name. This is not always the case, and you will get class loading problems like: + +```txt +Caused by: java.lang.IllegalArgumentException: Could not create a component with id +'com.yahoo.example.My'. +Tried to load class directly, since no bundle was found for spec: +com.yahoo.example.Dependency +``` + +To remedy, specify the jar file (i.e. bundle) with the component: + +```txt + + + +``` + +## Depending on all components of a specific type + +Consider the use-case where a component chooses between various strategies, and each strategy is implemented as a separate component. Since the number and type of strategies is unknown when implementing the consumer, it is impossible to make a constructor that lists all of them. This is where the `ComponentRegistry` comes into play. E.g. the following component: + +```java +package com.yahoo.example; +public class MyComponent { + private final ComponentRegistry strategies; + @Inject + public MyComponent(ComponentRegistry strategies) { + this.strategies = strategies; + } +} +``` + +declares a dependency on the set of all components registered in `services.xml` that are instances of the class `Strategy` (including subclasses). The `ComponentRegistry` class provides accessors for components based on their [component id](/en/reference/applications/services/container#component). + +## Special Components + +There are cases where a component cannot be directly injected to its consumers - example: + +- The component must be instantiated via a factory method instead of its constructor +- Each consumer must have a unique instance of the dependency class +- The component uses native resources that must be cleaned up when the component goes out of scope + +For these situations, JDisc supports injection, and optional deconstruction, via its `Provider` interface: + +```java +public interface Provider { +T get(); +void deconstruct(); +} +``` + +`get()` is called by JDisc each time it needs to instantiate the specific component type. `deconstruct()` is only called after reconfiguring the system with a new application, where the current provider instance is either removed or replaced due to modified dependencies. + +Following the earlier example, declare a provider for the `MyDependency` class, that returns a new instance for each consumer: + +```java +package com.yahoo.example; +import com.yahoo.container.di.componentgraph.Provider; +public class MyDependencyProvider implements Provider { + @Override + public MyDependency get() { + return new MyDependency(); + } + @Override + public void deconstruct() { } +} +``` + +Using this provider, `services.xml` has two instances of `MyComponent`, each getting a unique instance of `MyDependency`: + +```xml + + + + + +``` + +Upon deployment, the Container will first instantiate `MyDependencyProvider`, and then invoke `MyDependencyProvider.get()` for each instantiation of `MyComponent`. + +A provider can declare constructor dependencies, just like any other component. diff --git a/mintlify-docs/en/applications/deployment.mdx b/mintlify-docs/en/applications/deployment.mdx new file mode 100644 index 0000000000..4d80ca5f7a --- /dev/null +++ b/mintlify-docs/en/applications/deployment.mdx @@ -0,0 +1,116 @@ +--- +title: "Deployment" +description: "In this document we explain various aspects of application deployment in detail. Refer to [application deployment](/en/basics/applications#deploying-applications) for an introduction." +--- + +## Convergence + +After the deployment command has succeeded, the application package will take effect, but this does not complete immediately in the distributed system that is your running application; it happens through a distributed *convergence* process that you can track from the command line or console. Refer to the [deploy reference](/en/reference/applications/application-packages#deploy) for detailed steps run when deploying an application. + +You can get the status of the last deployment by using the status command: + +```shell +$ vespa status deployment +``` + +## Rollback + +Hover over the instance square to the left, click pin, give a reason - this will start the downgrade process: + + +![](/assets/img/pin-version.png) + + +The pinning to a new version starts a new deployment, and can be rolled out as a normal rollout. To speed it up, cancel system and staging test jobs by clicking abort. + + +![](/assets/img/skip-tests.png) + + +Skipping tests is at the application owners own discretion and risk: + +- A system test on this version has already been run on an earlier deployment. Skipping this can be considered safe, for that reason. +- A staging test has never been run before when rolling back, this path is untested. + +Of the two, the staging test takes longer to run. The user decides whether to skip testing phases or not. With this, a user can control whether to immediately roll back a version including test phases or not, as well as rolling out to production zones in parallel or not. + +After the pin to rollback, make sure to update the code repository so the next deployments is in sync, and remove the pin for later deployments. + +### Follow-up steps + +Generally, to roll back an application package change, deploy again with the previous version to roll back to. The above section describes the fast-track rollback. The alternatives are: + +1. With automation: Revert the code in the source code repository, and let the automation roll out the new version. You can speed up the deployment by skipping tests and clicking "deploy now" in the deployment graph in the console. +2. If you have trouble rebuilding a good package, you can download a previous package from Vespa Cloud: Use the [console](/en/operations/automated-deployments#source-code-repository-integration) to pick the good version, download it and deploy again. Hover of the [instance](/en/operations/automated-deployments#block-windows) (normally called "default") to skip the system and staging test to speed up the deployment, if needed. +3. On self-managed instances, regenerate the good version from source for new deployment, see also the [deploy API](/en/reference/api/deploy-v2#rollback) + +## File distribution + +The application package can have components and other large files. When an app is deployed, these files are distributed to the nodes: + +- Components (i.e bundles) +- Files with type *path* and *url* in config, see [Adding files to the component configuration](/en/applications/configuring-components#adding-files-to-the-component-configuration) +- Machine learned models +- [Constant tensors](/en/reference/schemas/schemas#constant) + +When new components or files specified in config are distributed, the container gets a new file reference, waits for it to be available and switches to new config when all files are available. + + +![](/assets/img/config-delivery.svg) + + +## Deploying remote models + +Most application packages are stored as source code in a code repository. However, some resources are generated or too large to store in a code repository, like models or an [FSA](/en/reference/operations/tools#vespa-makefsa). + +Machine learned models in Vespa, are stored in the application package under the *models* directory. This might be inconvenient for some applications, for instance for models that are frequently retrained on some remote system. Also, models might be too large to fit within the constraints of the version control system. + +The solution is to download the models from the remote location during the application package build. This is simply implemented by adding a step in *pom.xml* (see [example](https://github.com/vespa-cloud/cord-19-search/blob/main/pom.xml)): + +```xml expandable + + + + org.codehaus.mojo + exec-maven-plugin + 1.4.0 + + + download-model + generate-resources + + exec + + + bin/download_models.sh + + target/application/models + MODEL-URL + + + + + + + +``` + +*bin/download_model.sh* example: + +```txt +#!/bin/bash + +DIR="$1" +URL="$2" + +echo "[INFO] Downloading $URL into $DIR" + +mkdir -p $DIR +pushd $DIR +curl -O $URL +popd +``` + +Any necessary credentials for authentication and authorization should be added to this script, as well as any unpacking of archives (for TensorFlow models for instance). + +Also see the [model](/en/reference/applications/config-files#model) config type to specify resources that should be downloaded by container nodes during convergence. diff --git a/mintlify-docs/en/applications/developer-guide.mdx b/mintlify-docs/en/applications/developer-guide.mdx new file mode 100644 index 0000000000..c9f2bd6ba8 --- /dev/null +++ b/mintlify-docs/en/applications/developer-guide.mdx @@ -0,0 +1,182 @@ +--- +title: "Developer Guide" +description: "This document explains how to develop applications, including basic terminology, tips on using the Vespa Cloud Console, and how to benchmark and size your application. See [deploy a sample application](/en/basics/deploy-an-application) to deploy a basic sample application, and [automated deployments](/en/operations/automated-deployments) on making production deployments safe routine occurences." +--- + + +## Manual deployments + +Developers will typically deploy their application to the `dev` [zone](/en/operations/zones) during development. Each deployment is owned by a *tenant*, and each specified *instance* is a separate copy of the application; this lets developers work on independent copies of the same application, or collaborate on a shared one, as they prefer—more details [here](/en/learn/tenant-apps-instances). These values can be set in the Vespa Cloud UI when deploying, or with each of the build and deploy tools, as shown in the respective getting-started guides. + +Additionally, a deployment may specify a different [zone](/en/operations/zones) to deploy to, instead of the default `dev` zone. + +### Auto downsizing + +Deployments to `dev` are downscaled to one small node by default, so that applications can be deployed there without changing `services.xml`. See [performance testing](#performance-testing) for how to disable auto downsizing using `deploy:environment="dev"`. + +### Availability + +The `dev` zone is a sandbox and not for production serving; It has no uptime guarantees. + +An automated Vespa software upgrade can be triggered at any time, and this may lead to some downtime if you have only one node per cluster (as with the default [auto downsizing](#auto-downsizing)). + +## Performance testing + +For performance testing, to avoid auto downsizing, lock the [resources](/en/reference/applications/services/services) using `deploy:environment="dev"`: + +```xml + + + +``` + +Read more in [benchmarking](/en/performance/benchmarking-cloud) and [variants in services.xml](/en/operations/deployment-variants). + +## Component overview + + +![](/assets/img/vespa-overview.svg) + + +Application packages can contain Java components to be run in container clusters. The most common component types are: + +- [Searchers](/en/applications/searchers), which can modify or build the query, modify the result, implement workflows issuing multiple queries etc. +- [Document processors](/en/applications/document-processors) that can modify incoming write operations. +- [Handlers](/en/applications/request-handlers) that can implement custom web service APIs. +- [Renderers](/en/applications/result-renderers) that are used to define custom result formats. + +Components are constructed by dependency injection and are reloaded safely on deployment without restarts. See the [container documentation](/en/applications/containers) for more details. + +See [deploy an application having Java components](/en/basics/deploy-an-application-java), and [troubleshooting](/en/operations/self-managed/admin-procedures#troubleshooting). + +## Developing Components + +The development cycle consists of creating the component, deploying the application package to Vespa, writing tests, and iterating. These steps refer to files in [album-recommendation-java](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation-java): + +| | | +| :--- | :--- | +| **Build** | All the Vespa sample applications use the [bundle plugin](/en/applications/bundles#maven-bundle-plugin) to build the components. | +| **Configure** | A key Vespa feature is code and configuration consistency, deployed using an [application package](/en/basics/applications). This ensures that code and configuration is in sync, and loaded atomically when deployed. This is done by generating config classes from config definition files. In Vespa and application code, configuration is therefore accessed through generated config classes. The Maven target `generate-sources` (invoked by `mvn install`) uses [metal-names.def](https://github.com/vespa-engine/sample-apps/blob/master/album-recommendation-java/app/src/main/resources/configdefinitions/metal-names.def) to generate `target/generated-sources/vespa-configgen-plugin/com/mydomain/example/MetalNamesConfig.java`. After generating config classes, they will resolve in tools like [IntelliJ IDEA](https://www.jetbrains.com/idea/download/). | +| **Tests** | Examples unit tests are found in [MetalSearcherTest.java](https://github.com/vespa-engine/sample-apps/blob/master/album-recommendation-java/app/src/test/java/ai/vespa/example/album/MetalSearcherTest.java). `testAddedOrTerm1` and `testAddedOrTerm2` illustrates two ways of doing the same test: The first setting up the minimal search chain for [YQL](/en/querying/query-language) programmatically. The second uses [`com.yahoo.application.Application`](https://javadoc.io/doc/com.yahoo.vespa/application/latest/com/yahoo/application/Application), which sets up the application package and simplifies testing. Read more in [unit testing](/en/applications/unit-testing). | + +## Debugging Components + + +**Important:** + +The debugging procedure only works for endpoints with an open debug port - most managed services don't do this for security reasons. + + +Vespa Cloud does not allow debugging over the *Java Debug Wire Protocol (JDWP)* due to the protocol's inherent lack of security measures. If you need interactive debugging, deploy your application to a self-hosted Vespa installation (below) and manually [add the *JDWP* agent to JVM options](#debugging-components). + +You may debug your Java code by requesting either a JVM heap dump or a Java Flight Recorder recording through the [Vespa Cloud Console](https://console.vespa-cloud.com/). Go to your application's cluster overview and select *export JVM artifact* on any *container* node. The process will take up to a few minutes. You'll find the steps to download the dump on the Console once it's completed. Extract the files from the downloaded Zstandard-compressed archive, and use the free [JDK Mission Control](https://www.oracle.com/java/technologies/jdk-mission-control) utility to inspect the dump/recording. + + +![](/assets/img/jvm-dump.png) + + +To debug a [Searcher](/en/applications/searchers) / [Document Processor](/en/applications/document-processors) / [Component](/en/applications/components) running in a self-hosted container, set up a remote debugging configuration in the IDEA - IntelliJ example: + + + + Run -> Edit Configurations... + + + + Click `+` to add a new configuration. + + + + Select the "Remote JVM Debug" option in the left-most pane. + + + + Set hostname to the host running the container, change the port if needed. + + + + Set the container's [jvm options](/en/reference/applications/services/container#jvm) to the value in "Command line arguments for remote JVM": + + ```xml + + + + + + ``` + + + + Re-deploy the application, then restart Vespa on the node that runs the container. Make sure the port is published if using a Docker/Podman container, e.g.: + + ```bash + $ docker run --detach --name vespa --hostname vespa-container \ + --publish 127.0.0.1:8080:8080 --publish 127.0.0.1:19071:19071 --publish 127.0.0.1:5005:5005 \ + vespaengine/vespa + ``` + + + Start debugging! Check *vespa.log* for errors. + + + + + + +Find *Debugging a Vespa Searcher* in the vespaengine [youtube channel](https://www.youtube.com/@vespaai)! + + + + +## Developing system and staging tests + +When using Vespa Cloud, system and tests are most easily developed using a test deployment in a `dev` zone to run the tests against. Refer to [general testing guide](/en/applications/testing) for a discussion of the different test types, and the [basic HTTP tests](/en/reference/applications/testing) or [Java JUnit tests](/en/reference/applications/testing-java) reference for how to write the relevant tests. + +If using the [Vespa CLI](/en/clients/vespa-cli) to deploy and run [basic HTTP tests](/en/reference/applications/testing), the same commands as in the test reference will just work, provided the CLI is configured to use the `cloud` target. + +### Running Java tests + +With Maven, and [Java Junit tests](/en/reference/applications/testing-java), some additional configuration is required, to infuse the test runtime on the local machine with API and data plane credentials: + +```bash +$ mvn test \ +-D test.categories=system \ +-D dataPlaneKeyFile=data-plane-private-key.pem -D dataPlaneCertificateFile=data-plane-public-cert.pem \ +-D apiKey="$API_KEY" +``` + +The `apiKey` is used to fetch the *dev* instance's endpoints. The data plane key and certificate pair is used by [ai.vespa.hosted.cd.Endpoint](https://github.com/vespa-engine/vespa/blob/master/cloud/tenant-cd-api/src/main/java/ai/vespa/hosted/cd/Endpoint.java) to access the application endpoint. Note that the `-D vespa.test.config` argument is gone; this configuration is automatically fetched from the Vespa Cloud API—hence the need for the API key. + +When running Vespa self-hosted like in the [sample application](/en/basics/deploy-an-application-local), no authentication is required by default, to either API or container, and specifying a data plane key and certificate will instead cause the test to fail, since the correct SSL context is the Java default in this case. + +Make sure the TestRuntime is able to start. As it will init an SSL context, make sure to remove config when running locally, in order to use a default context. Remove properties from *pom.xml* and IDE debug configuration. + +Developers can also set these parameters in the IDE run configuration to debug system tests: + +```txt +-D test.categories=system +-D tenant=my_tenant +-D application=my_app +-D instance=my_instance +-D apiKeyFile=/path/to/myname.mytenant.pem +-D dataPlaneCertificateFile=data-plane-public-cert.pem +-D dataPlaneKeyFile=data-plane-private-key.pem +``` + +## Tips and troubleshooting + +- Vespa Cloud upgrades daily, and applications in `dev` also have their Vespa platform upgraded. This usually happens at the opposite time of day of when deployments are made to each instance, and takes some minutes. Deployments without redundancy will be unavailable during the upgrade. +- Failure to deploy, due to authentication (HTTP code 401) or authorization (HTTP code 403), is most often due to wrong configuration of `tenant` and/or `application`, when using command line tools to deploy. Ensure the values set with Vespa CLI or in `pom.xml` match what is configured in the UI. +- In case of data plane failure, remember to copy the public certificate to `src/main/application/security/clients.pem` before building and deploying. This is handled by the Vespa CLI `vespa auth cert` command. +- To run Java [system and staging tests](/en/reference/applications/testing-java) in an IDE, ensure all API and data plane keys and certificates are configured in the IDE as well; not all IDEs pick up all settings from `pom.xml` correctly: + + ```txt + -Dtest.categories=system + -DapiKeyFile=/path-to/tname.pem + -DdataPlaneCertificateFile=/path-to/data-plane-public-cert.pem + -DdataPlaneKeyFile=/path-to/data-plane-private-key.pem + ``` diff --git a/mintlify-docs/en/applications/document-processors.mdx b/mintlify-docs/en/applications/document-processors.mdx new file mode 100644 index 0000000000..786c48e606 --- /dev/null +++ b/mintlify-docs/en/applications/document-processors.mdx @@ -0,0 +1,244 @@ +--- +title: "Document processors" +description: "This document describes how to develop and deploy *Document Processors*, often called *docproc* in this documentation. Document processing is a framework to create [chains](/en/applications/chaining) of configurable [components](/en/applications/components), that read and modify document operations." +--- + +The input source splits the input data into logical units called [documents](/en/schemas/documents). A [feeder application](/en/writing/reads-and-writes) sends the documents into a document processing chain. This chain is an ordered list of document processors. Document processing examples range from language detection, HTML removal and natural language processing to mail attachment processing, character set transcoding and image thumbnailing. At the end of the processing chain, extracted data will typically be set in some fields in the document. + +The motivation for document processing is that code and configuration is atomically deployed, as like all Vespa components. It is also easy to build components that access data in Vespa as part of processing. + +To get started, see the [sample application](https://github.com/vespa-engine/sample-apps/tree/master/examples/document-processing). Read [indexing](/en/writing/indexing) to understand deployment and routing. As document processors are chained components just like Searchers, read [Searcher Development](/en/applications/searchers). For reference, see the [Javadoc](https://javadoc.io/doc/com.yahoo.vespa/docproc), and [services.xml](/en/reference/applications/services/docproc). + + +![](/assets/img/vespa-overview-docproc.svg) + + +## Deploying a Document Processor + +Refer to [album-recommendation-docproc](https://github.com/vespa-engine/sample-apps/tree/master/examples/document-processing) to get started, [LyricsDocumentProcessor.java](https://github.com/vespa-engine/sample-apps/blob/master/examples/document-processing/src/main/java/ai/vespa/example/album/LyricsDocumentProcessor.java) is a document processor example. Add the document processor in [services.xml](/en/reference/applications/services/docproc), and then add it to a [chain](#chains). The type of processing done by the processor dictates what chain it should be part of: + +- If it does general data-processing, such as populating some document fields from others, looking up data in external services, etc., it should be added to a general docproc chain. +- If, and only if, it does processing required for *indexing* +- or requires this to have already been run — it should be added to a chain which inherits the *indexing* chain, and which is used for indexing by a content cluster. + +An example that adds a general document processor to the "default" chain, and an indexing related processor to the chain for a particular content cluster: + +```xml highlight={4, 8, 18} + + + + + + + + + + + + + + + ... + + + + +``` + +The "default" chain, if it exists, is run by default, before the chain used for indexing. The default indexing chain is called "indexing", and *must* be inherited by any chain that is to replace it. + +To run through any chain, specify a [route](/en/writing/document-routing) which includes the chain. For example, the route `default/chain.my-chain indexing` would route feed operations through the chain "my-chain" in the "default" container cluster, and then to the "indexing" hop, which resolves to the specified indexing chain for each content cluster the document should be sent to. More details can be found in [indexing](/en/writing/document-routing#document-processing): + +## Document Processors + +A document processor is a component extending `com.yahoo.docproc.DocumentProcessor`. All document processors must implement `process()`: + +```txt +public Progress process(Processing processing); +``` + +When the container receives a document operation, it will create a new `Processing`, and add the `DocumentPut`s, `DocumentUpdate`s or `DocumentRemove`s to the `List` accessible through `Processing.getDocumentOperations()`. The latter is useful also where a processing should be stopped by doing `Processing.getDocumentOperations().clear()` before `Progress.DONE`, say for blocklist use, to stop a `DocumentPut/Update`. + +Furthermore, the call stack of the document processing chain in question will be *copied* to `Processing.callStack()`, so that document processors may freely modify the flow of control for this processing without affecting all other processings going on. After creation, the `Processing` is added to an internal queue. + +A worker thread will retrieve a `Processing` from the input queue, and run its document operations through its call stack. A minimal, no-op document processor implementation is thus: + +```java +import com.yahoo.docproc.*; +public class SimpleDocumentProcessor extends DocumentProcessor { + public Progress process(Processing processing) { + return Progress.DONE; + } +} +``` + +The `process()` method should loop through all document operations in `Processing.getDocumentOperations()`, do whatever it sees fit to them, and return a `Progress`: + +```java +public Progress process(Processing processing) { + for (DocumentOperation op : processing.getDocumentOperations()) { + if (op instanceof DocumentPut) { + DocumentPut put = (DocumentPut) op; + // TODO do something to 'put here + } else if (op instanceof DocumentUpdate) { + DocumentUpdate update = (DocumentUpdate) op; + // TODO do something to 'update' here + } else if (op instanceof DocumentRemove) { + DocumentRemove remove = (DocumentRemove) op; + // TODO do something to 'remove' here + } +} +return Progress.DONE; +} +``` + +| Return code | Description | +| :--- | :--- | +| `Progress.DONE` | Returned if a document processor has successfully processed a `Processing`. | +| `Progress.FAILED` | Processing failed and the input message should return a *fatal* failure back to the feeding application, meaning that this application will not try to re-feed this document operation. Return an error message/reason by calling `withReason()`. This result is represented as a `500 Internal Server Error` response in [Document v1](/en/writing/document-v1-api-guide). Example: `if (op instanceof DocumentPut) { return Progress.FAILED.withReason("PUT is not supported"); }` | +| `Progress.INVALID_INPUT` | Available since 8.584. Processing failed due to invalid input, like a malformed document operation. This result is represented as a `400 Bad Request` response in [Document v1](/en/writing/document-v1-api-guide). | +| `Progress.LATER` | See [execution model](#execution-model). The document processor wants to release the calling thread and be called again later. This is useful if e.g. calling an external service with high latency. The document processor may then save its state in the `Processing` and resume when called again later. There are no guarantees as to *when* the processor is called again with this `Processing`; it is simply appended to the back of the input queue. By the use of `Progress.LATER`, this is an asynchronous model, where the processing of a document operation does not need to consume one thread for its entire lifespan. Note, however, that the document processors themselves are shared between all processing operations in a chain, and must thus be implemented [thread-safe](#state). | + +| Exception | Description | +| :--- | :--- | +| `com.yahoo.docproc.TransientFailureException` | Processing failed and the input message should return a *transient* failure back to the feeding application, meaning that this application *may* try to re-feed this document operation. | +| `RuntimeException` | Throwing any other `RuntimeException` means same behavior as for `Progress.FAILED`. | + +## Chains + +The call stack mentioned above is another name for a *document processor chain*. Document processor chains are a special case of the general [component chains](/en/applications/chaining) - to avoid confusion some concepts are explained here as well. A document processor chain is nothing more than a list of document processor instances, having an id, and represented as a stack. The document processor chains are typically not created for every processing, but are part of the configuration. Multiple ones may exist at the same time, the chain to execute will be specified by the message bus destination of the incoming message. The same document processor instance may exist in multiple document processor chains, which is why the `CallStack` of the `Processing` is responsible for knowing the next document processor to invoke in a particular message. + +The execution order of the document processors in a chain are not ordered explicitly, but by [ordering constraints](/en/applications/chaining#ordering-components) declared in the document processors or their configuration. + +## Execution model + +The Document Processing Framework works like this: + + + + A thread from the message bus layer appends an incoming message to an internal priority queue, shared between all document processing chains configured on a node. The priority is set based on the message bus priority of the message. Messages of the same priority are ordered FIFO. + + + + One worker thread from the docproc thread pool picks one message from the head of the queue, deserializes it, copies the call stack (chain) in question, and runs it through the document processors. + + + + Processing finishes if **(a)** the document(s) has passed successfully through the whole chain, or **(b)** a document processor in the chain has returned `Progress.FAILED` or thrown an exception. + + + + The same thread passes the message on to the message bus layer for further transport on to its destination. + + + + +There is a single instance of each document processor chain. In every chain, there is a single instance of each document processor - unless a chain is configured with multiple, identical document processors - this is a rare case. + +As is evident from the model above, multiple worker threads execute the document processors in a chain concurrently. Thus, many threads of execution can be going through `process()` in a document processor, at the same time. + +This model places an important constraint on document processor classes: *instance variables are not safe.* They must be eliminated, or made thread-safe somehow. + +Also see [Resource management](/en/applications/components#resource-management), use `deconstruct()` in order to not leak resources. + +### Asynchronous execution + +The execution model outlined above also shows one important restriction: If a document processor performs any high-latency operation in its process() method, a docproc worker thread will be occupied. With all *n* worker threads blocking on an external resource, throughput will be limited. This can be fixed by saving the state in the Processing object, and returning `Progress.LATER`. A document processor doing a high-latency operation should use a pattern like this: + + + + Check a self-defined context variable in Processing for status. Basically, *have we seen this Processing before?* + + + + If no: + + 1. We have been given a Processing object fresh off the network, we have not seen this before. Process it up until the high-latency operation. + 2. Start the high-latency operation (possibly in a separate thread). + 3. Save the state of the operation in a self-defined context variable in the Processing. + 4. Return `Progress.LATER`. This Processing is the appended to the back of the input queue, and we will be called again later. + + + + If yes: + + 1. Retrieve the reference that we set in our self-defined context variable in Processing. + 2. Is the high-latency operation done? If so, return `Progress.DONE`. + 3. Is it not yet done? Return `Progress.LATER` again. + + + +As is evident, this will let the finite set of document processing threads to do more work at the same time. + +## State + +Any state in the document processor for the particular Processing should be kept as local variables in the process method, while state which should be shared by all Processings should be kept as member variables. As the latter kind will be accessed by multiple threads at any one time, the state of such member variables must be *thread-safe*. This critical restriction is similar to those of e.g. the Servlet API. Options for implementing a multithread-safe document processor with instance variables: + +1. Use immutable (and preferably final) objects: they never change after they are constructed; no modifications to their state occurs after the DocumentProcessor constructor returns. +2. Use a single instance of a thread-safe class. +3. Create a single instance and synchronize access to it across all threads (but this will severely limit scalability). +4. Arrange for each thread to have its own instance, e.g. with a `ThreadLocal`. + +### Processing Context Variables + +`Processing` has a map `String -> Object` that can be used to pass information between document processors. It is also useful when using `Progress.LATER` to save the state of a processing - see [Processing.java](https://github.com/vespa-engine/vespa/blob/master/docproc/src/main/java/com/yahoo/docproc/Processing.java) for `get/setVariable` and more. + +The [sample application](https://github.com/vespa-engine/sample-apps/tree/master/examples/document-processing) uses such context variables, too. + +## Operation ordering + +### Feed ordering + +Ordering of feed operations is not guaranteed. Operations on different documents will be done concurrently and are therefore not ordered. However, Vespa guarantees that operations on the same document are processed in the order they were fed if they enter vespa at the *same* feed endpoint. + +### Document processing ordering + +Document operations that are produced inside a document processor obey the same rules as at feed time. If you either split the input into other documents or into multiple operations to the same document, Vespa will ensure that operations to the same document ID are sequenced and are delivered in the order they enter. + +## (Re)configuring Document Processing + +Consider the following configuration: + +```xml highlight={7-9} + + + + + + + + value + + + + + + +``` + +Changing chain ids, components in a chain, component configuration and schema mapping all takes effect after deployment - no restart required. Changing a *cluster name* (i.e. the container id) requires a restart of docproc services after *vespa activate*. + +Note when adding or modifying a processing chain in a running cluster; if at the same time deploying a *new* document processor (i.e. a document processor that was unknown to Vespa at the time the cluster was started), the container must be restarted: + +```txt +$ vespa-sentinel-cmd restart container +``` + +## Class diagram + + +![](/assets/img/document-processing-class-diagram.svg) + + +The framework core supports asynchronous processing, processing one or multiple documents or document updates at the same time, document processors that makes dynamic decisions about the processing flow and passing of information between processors outside the document or document update: + +- One or more named `Docproc Services` may be created. One of the services is the *default*. +- A service accepts subclasses of `DocumentOperation` for processing, meaning `DocumentPuts`, `DocumentUpdates` and `DocumentRemoves`. It has a `Call Stack` which lists the calls to make to various `DocumentProcessors` to process each DocumentOperation handed to the service. +- Call Stacks consist of `Calls`, which refer to the Document Processor instance to call. +- Document puts and document updates are processed asynchronously, the state is kept in a `Processing` for its duration (instead of in a thread or process). A Document Processor may make some asynchronous calls (typically to remote services) and return to the framework that it should be called again later for the same Processing to handle the outcome of the calls. +- A processing contains its own copy of the Call Stack of the Docproc Service to keep track of what to call next. Document Processors may modify this Call Stack to dynamically decide the processing steps required to process a DocumentOperation. +- A Processing may contain one or more DocumentOperations to be processed as a unit. +- A Processing has a `context`, which is a Map of named values which can be used to pass arguments between processors. +- Processings are prepared to be stored to disk, to allow a high number of ongoing long-term processings per node. diff --git a/mintlify-docs/en/applications/http-servers-and-filters.mdx b/mintlify-docs/en/applications/http-servers-and-filters.mdx new file mode 100644 index 0000000000..9960cca6e9 --- /dev/null +++ b/mintlify-docs/en/applications/http-servers-and-filters.mdx @@ -0,0 +1,199 @@ +--- +title: "Http servers and filters" +description: "This document explains how to set up http servers and filters in the Container. Before proceeding, familiarize with the [Developer Guide](/en/applications/developer-guide)." +--- + +## Set up Http servers + +To accept http requests on e.g. port 8090, add an `http` section with a server to *services.xml*: + +```xml + + + + + + +``` + +To verify that the new server is running, check the default handler on the root path, which will return a list of all http servers: + +```txt +$ curl http://localhost:8090/ +``` + +Adding an `http` section to *services.xml* **disables the default http server** at port 8080. + +Binding to privileged ports (< 1024) is supported. Note that this **only** works when running as a standalone container, and **not** when running as a Vespa cluster. + +### Configure the HTTP Server + +Configuration settings for the server can be modified by setting values for the `jdisc.http.connector` config inside the `server` element: + +```xml + + + + + + false + + + + +``` + +Note that it is not allowed to set the `listenPort` in the http-server config, as it conflicts with the port that is set in the *port* attribute in the *server* element. For a complete list of configuration fields that can be set, refer to the config definition schema in [jdisc.http.connector.def](https://github.com/vespa-engine/vespa/blob/master/container-disc/src/main/resources/configdefinitions/jdisc.http.jdisc.http.connector.def). + +### TLS + +TLS can be configured using either the [ssl](/en/reference/applications/services/http#ssl) or the [ssl-provider](/en/reference/applications/services/http#ssl-provider) element. + +```xml + + + + + /path/to/private-key.pem + /path/to/certificate.pem + /path/to/ca-certificates.pem + want + + TLS_AES_128_GCM_SHA256, + TLS_AES_256_GCM_SHA384, + TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, + TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 + + TLSv1.2,TLSv1.3 + + + + + + + +``` + +Refer to the [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) sample application for an example. + +## Set up Filter Chains + +There are two main types of filters: + +- request filters +- response filters + +Request filters run before the handler that processes the request, and response filters run after. They are used for tasks such as authentication, error checking and modifying headers. + +### Using Filter Chains + +Filter chains are set up by using the `request-chain` and `response-chain` elements inside the [filtering](/en/reference/applications/services/http#filtering) element. Example setting up two request filter chains, and one response filter chain: + +```xml + + + + + + + + + + + + + + + + + + + +``` + +Filters that should be used in more than one chain, must be defined directly in the `filtering` element, as shown with `request-filter1` in the example above. + +To actually use a filter chain, add one or more URI [bindings](/en/reference/applications/services/http#binding): + +```xml + + + + + http://*/* + + + + + http://*/* + + + + +``` + +These bindings say that both the request chain and the response chain should be used when the request URI matches `http://*/*`. So both a request filter chain and a response filter chain can be used on a single request. However, only one request chain will be used if there are multiple request chains that have a binding that matches a request. And vice versa for response chains. Refer to the [javadoc](https://javadoc.io/doc/com.yahoo.vespa/jdisc_core/latest/com/yahoo/jdisc/application/UriPattern.html) for information about which chain that will be used in such cases. + +In order to bind a filter chain to a specific *server*, add the server port to the binding: + +```xml + + + http://*:8080/* + http://*:9000/* + +``` + +A request must match a filter chain if any filter is configured. A 403 response is returned for non-matching request. This semantic can be disabled - see [strict-mode](/en/reference/applications/services/http#filtering). + +#### Excluding Filters from an Inherited Chain + +Say you have a request filter chain that you are binding to most of your URIs. Now, you want to run almost the same chain on another URI, but you need to exclude one of the filters. This is done by adding `excludes`, which takes a space separated list of filter ids, to the [chain element](/en/reference/applications/services/http#chain). Example where a security filter is excluded from an inherited chain for *status.html*: + +```xml + + http://*/status.html + +``` + +### Creating a custom Filter + +Create an [application package](/en/applications/developer-guide) with artifactId `filter-bundle`. Create a new file `filter-bundle/components/src/main/java/com/yahoo/demo/TestRequestFilter.java`: + +```java expandable +package com.yahoo.demo; +import com.yahoo.jdisc.*; +import com.yahoo.jdisc.handler.*; +import com.yahoo.jdisc.http.*; +import com.yahoo.jdisc.http.filter.RequestFilter; +import java.net.*; +import java.nio.ByteBuffer; +public class TestRequestFilter extends AbstractResource implements RequestFilter { + @Override + public void filter(HttpRequest httpRequest, ResponseHandler responseHandler) { + if (isLocalAddress(httpRequest.getRemoteAddress())) { + rejectRequest(httpRequest, responseHandler); + } else { + httpRequest.context().put("X-NOT-LOCALHOST", "true"); + } +} +private boolean isLocalAddress(SocketAddress socketAddress) { + if (socketAddress instanceof InetSocketAddress) { + InetAddress address = ((InetSocketAddress)socketAddress).getAddress(); + return address.isAnyLocalAddress() || address.isLoopbackAddress(); + } else { + return false; +} +} +private void rejectRequest(HttpRequest request, ResponseHandler responseHandler) { + HttpResponse response = HttpResponse.newInstance(request, Response.Status.FORBIDDEN); + ContentChannel channel = responseHandler.handleResponse(response); + channel.write(ByteBuffer.wrap("Not accessible by localhost.".getBytes()), null); + channel.close(null); +} +} +``` + +Build a bundle, and place it in the [application package](/en/basics/applications)'s *components* directory. diff --git a/mintlify-docs/en/applications/ide-support.mdx b/mintlify-docs/en/applications/ide-support.mdx new file mode 100644 index 0000000000..28399ef86d --- /dev/null +++ b/mintlify-docs/en/applications/ide-support.mdx @@ -0,0 +1,15 @@ +--- +title: "IDE support" +description: "Vespa provides plugins for working with schemas and rank profiles in IDE's:" +--- + +- VSCode: [VS Code extension](https://marketplace.visualstudio.com/items?itemName=vespaai.vespa-language-support) +- Cursor, code-server and other VS Code compatible IDEs: [VS Code extension in Open VSX registry](https://open-vsx.org/extension/vespaai/vespa-language-support) +- IntelliJ, PyCharm or WebStorm: [Jetbrains plugin](https://plugins.jetbrains.com/plugin/18074-vespa-schema-language-support) +- Vim: [neovim](https://blog.vespa.ai/interns-languageserver/#neovim-plugin) + +If you are working with non-trivial Vespa applications, installing a plugin is highly recommended! + + +![](/assets/img/ide.gif) + diff --git a/mintlify-docs/en/applications/inspecting-structured-data.mdx b/mintlify-docs/en/applications/inspecting-structured-data.mdx new file mode 100644 index 0000000000..4fbcc40ea7 --- /dev/null +++ b/mintlify-docs/en/applications/inspecting-structured-data.mdx @@ -0,0 +1,156 @@ +--- +title: "Inspecting structured data in a Searcher" +description: "The [Data Access API](https://javadoc.io/doc/com.yahoo.vespa/vespajlib/latest/com/yahoo/data/access/package-summary) is used to access structured data such as arrays and weighted sets." +--- + +## Use Case: accessing array attributes + +The following illustrates accessing some field that is of array type: + +```java expandable +import com.yahoo.search.*; +import com.yahoo.search.result.*; +import com.yahoo.search.searchchain.*; +import com.yahoo.data.access.*; +@After(PhaseNames.TRANSFORMED_QUERY) +@Before(PhaseNames.BLENDED_RESULT) +public class SimpleTestSearcher extends Searcher { + public Result search(Query query, Execution execution) { + Result r = execution.search(query); + execution.fill(r); + for (Hit hit : r.hits().asList()) { + if (hit.isMeta()) continue; + Object o = hit.getField("titles"); + if (o instanceof Inspectable) { + StringBuilder pasteBuf = new StringBuilder(); + Inspectable field = (Inspectable) o; + Inspector arr = field.inspect(); + for (int i = 0; i < arr.entryCount(); i++) { + pasteBuf.append(arr.entry(i).asString("")); + if (i+1 < arr.entryCount()) { + pasteBuf.append(", "); + } + } + hit.setField("titles", pasteBuf.toString()); + } + } + return r; + } +} +``` + +Here we assume there is a field in our schema like this: + +```text +field titles type array { +indexing: attribute | summary +} +``` + +Again we process each hit, this time traversing the array and building a string which contains all the titles, transforming a field looking like this: + +```json +"titles": [ +"Bond", +"James Bond" +] +``` + +into this output: + +```json +"titles": "Bond, James Bond" +``` + +## Use Case: accessing weighted set attributes + +The following example illustrates accessing data held in a weighted set. Note that the Data Access API doesn't have a "set" or "weighted set" concept; the weighted set is represented as an unordered array of objects where each object has an "item" and a "weight" field. The weight is a long integer value, while the item type will vary according to the field type as declared in the schema. + +```java expandable +import com.yahoo.search.*; +import com.yahoo.search.result.*; +import com.yahoo.search.searchchain.*; +import com.yahoo.data.access.*; +@After(PhaseNames.TRANSFORMED_QUERY) +@Before(PhaseNames.BLENDED_RESULT) +public class SimpleTestSearcher extends Searcher { + public Result search(Query query, Execution execution) { + Result r = execution.search(query); + execution.fill(r); + for (Hit hit : r.hits().asList()) { + processHit(hit); + } + return r; + } + void processHit(Hit hit) { + if (hit.isMeta()) return; + Object o = hit.getField("titles"); + if (o instanceof Inspectable) { + StringBuilder pasteBuf = new StringBuilder(); + Inspectable field = (Inspectable) o; + Inspector arr = field.inspect(); + for (int i = 0; i < arr.entryCount(); i++) { + String sval = arr.entry(i).field("item").asString(""); + long weight = arr.entry(i).field("weight").asLong(0); + pasteBuf.append("title: "); + pasteBuf.append(sval); + pasteBuf.append("["); + pasteBuf.append(weight); + pasteBuf.append("]"); + if (i+1 < arr.entryCount()) { + pasteBuf.append(", "); + } + } + hit.setField("alternates", pasteBuf.toString()); + } + } +} +``` + +Here we assume there is a field in the schema like: + +```txt +field titles type weightedset { +indexing: attribute | summary +} +``` + +Again we process each hit, and format each element of the weighted set, transforming this input: + +```json +"titles": { +"Bond": 15, +"James Bond": 89 +} +``` + +into this output: + +```json +"alternates": "title: Bond[15], title: James Bond[89]" +``` + +## Unit testing with structured data + +For unit testing it is useful to be able to create structured data fields programmatically. This case be done using `Slime`: + +```java +import com.yahoo.slime.*; +import com.yahoo.data.access.slime.SlimeAdapter; +// Struct example: +Slime slime = new Slime(); +Cursor struct = slime.setObject(); +struct.setString("foo", "bar"); +struct.setDouble("number", 1.0); +myHit.setField("mystruct", new SlimeAdapter(struct)); +// Array example: +Slime slime = new Slime(); +Cursor array = slime.setArray(); +array.addString("foo"); +array.addString("bar"); +myHit.setField("myarray", new SlimeAdapter(array)); +// Arrays and objects can be arbitrarily nested +// Alternatively, create the slime structure from a JSON string: +Slime slime = SlimeUtils.jsonToSlime(myJsonString.getBytes(StandardCharsets.UTF_8)); +myHit.setField("myfield", new SlimeAdapter(slime.get())); +``` diff --git a/mintlify-docs/en/applications/pluggable-frameworks.mdx b/mintlify-docs/en/applications/pluggable-frameworks.mdx new file mode 100644 index 0000000000..47221b61ee --- /dev/null +++ b/mintlify-docs/en/applications/pluggable-frameworks.mdx @@ -0,0 +1,53 @@ +--- +title: "Using pluggable frameworks" +description: "Many libraries provide pluggable architectures via Service Provider Interfaces (SPI)." +--- + +## Troubleshooting and Configuring the Application + +Libraries for pluggable frameworks rely on loading classes dynamically at runtime, usually via `Class.forName("…")`. If the package of the class that is loaded is not imported by our user bundle, this will result in the following error: + +```java +java.lang.ClassNotFoundException: com.sun.imageio.plugins.jpeg.JPEGImageReaderSpi not found by my-bundle [29] +at +org.apache.felix.framework.BundleWiringImpl.findClassOrResourceByDelegation(BundleWiringImpl.java:1532) +``` + +The example above is from using the [Image I/O framework](https://docs.oracle.com/javase/6/docs/technotes/guides/imageio/). In this case, notice that the missing class is from a `com.sun` package, which is available in the SDK. + +### Importing the Missing Package + +The `ClassNotFoundException` means that the bundle is not importing the package. The [bundle-plugin](/en/applications/bundles#maven-bundle-plugin) will usually not have added an import since the class is only referred to from a string in a `Class.forName("…")` statement. Hence, add an explicit `importPackage` in the bundle's pom.xml: + +```xml highlight={8} + + + + com.yahoo.vespa + bundle-plugin + ... + + com.sun.imageio.plugins.jpeg + ... + + + + +``` + +The `importPackage` configuration option takes a comma-separated list of packages. Adding multiple `importPackage` elements in pom.xml means that only one of them will take effect. + +### Exporting the Missing Package from the Container + +As mentioned, the missing package in this example is part of the SDK. In these cases, we must tell the Container to export the missing package. When running in [cluster mode](/en/operations/self-managed/multinode-systems#aws-ecs), this is done in `services.xml`: + +```xml + + + + com.sun.imageio.plugins.jpeg + + + ... + +``` diff --git a/mintlify-docs/en/applications/processing.mdx b/mintlify-docs/en/applications/processing.mdx new file mode 100644 index 0000000000..a298618554 --- /dev/null +++ b/mintlify-docs/en/applications/processing.mdx @@ -0,0 +1,325 @@ +--- +title: "Request-Response Processing" +description: "*Processing* makes it easy to create low-latency request/response processing applications. It is the recommended way of creating such applications on top of JDisc, but can also be used independently of JDisc. Processing lets you define application behavior by combining Processors performing simple tasks. Processors use a synchronous call model, but the underlying IO may be asynchronous." +--- + + +Javadoc: +[com.yahoo.processing.Processor](https://javadoc.io/doc/com.yahoo.vespa/processing/latest/com/yahoo/processing/Processor.html) +[com.yahoo.processing.rendering.Renderer](https://javadoc.io/doc/com.yahoo.vespa/processing/latest/com/yahoo/processing/rendering/Renderer.html) + +## Using processing + +To use processing, add this dependency to *pom.xml*: + +```xml + + com.yahoo.vespa + container + 8.689.26 {/* Find latest version at [search.maven.org/search?q=g:com.yahoo.vespa%20a:container](https://search.maven.org/search?q=g:com.yahoo.vespa%20a:container) */} + provided + +``` + +Or read [how to start a deployable project from scratch](/en/applications/developer-guide). + +## Processors + +A *processor* subclasses Processor and implements a single method: + +```java +package com.mydomain.example; + +import com.yahoo.processing.\*; +import com.yahoo.processing.execution.Execution; +import com.yahoo.processing.test.ProcessorLibrary.StringData; + +public class ExampleProcessor extends Processor { + + @Override + public Response process(Request request, Execution execution) { + // Process the Request: + request.properties().set("foo","bar"); + + // Pass on to the next processor in the chain + Response response=execution.process(request); + + // process the response + response.data().add(new StringData(request,"Hello, world!")); + + return response; + } + +} +``` + +Processors may work on both the request and response, pass on the request one or more times to further processors or create the result data internally or by contacting a remote service. The result data may be a nested composite structure where content is contributed by multiple processors. + +## Chaining Processors + +Processors should carry out a single task and are combined into complete applications. This is achieved using Chains: + +```java +Chain myChain=new Chain(new ExampleProcessor(), + new FooProcessor(), + new BarProcessor()); +Response response=new Execution(myChain).process(request); // execute this chain +``` + +This executes the three processors in order. The Execution keeps track of the execution state so the same processor instances may be used in many chains at the same time. When the execution reaches the end of the chain, the execution returns an empty Response to the processor calling it. An AsyncExecution class is provided as a convenience to perform an execution in a separate thread instead. + +In most cases it is more convenient to configure chains and processor instances using external configuration. Chains of processors may be specified in a [processing](/en/reference/applications/services/processing) element in the *[services.xml](/en/reference/applications/services/services)* file in the application package. The compiled processors are added to the application package as [OSGi components](/en/applications/components). Chain configuration allows chains to be defined as *sets* of processors with ordering constraints, such that the global ordering of processors can be figured out by the framework, and set operations con chains can be used to define extensions and variants of chains. + +## Asynchronous Results + +In some cases it is useful to return a Response before all the data in it is available. This allows returning a partial response to clients with low latency even though the complete response contains some data arriving more slowly. The slow data can be added to the Response as a placeholder where actual data will arrive later. The processing framework allows waiting or listening for such completion events as [Guava ListenableFutures.](https://guava.dev/releases/snapshot/api/docs/com/google/common/util/concurrent/ListenableFuture.html) + +If *all* data is added to the Response as future placeholders the processing framework becomes completely non-blocking. + +## Dependency Injection + +Processors in real applications will typically depend on some configuration and/or other components to run. Such dependencies should be declared as straightforward constructor arguments to allow them to be injected at construction time. + +The container runtime used to host the processing framework uses a dependency injection framework based in Guice, see [container components](/en/applications/components). + +As a processor may participate in many processing executions at one time, field values in a processing class should usually be immutable after construction is completed. + +## Response Rendering + +A *Renderer* is used to serialize the Response for return to a client. Renderers are subclasses of `com.yahoo.processing.rendering.Renderer`. A convenience superclass which handles waiting for future data in the asynchronous case is provided as `com.yahoo.processing.rendering.AsynchronousSectionedRenderer`. The default renderer, which renders in a simple JSON format is [com.yahoo.processing.rendering.ProcessingRenderer](https://github.com/vespa-engine/vespa/blob/master/container-disc/src/main/java/com/yahoo/processing/rendering/ProcessingRenderer.java) and can be subclassed to customize rendering of each kind of Data item. + +Processors are regular [components](/en/applications/components) which are added to the application package in the [renderer section](/en/reference/applications/services/processing#renderer) of the *services.xml* file. A renderer is selected in the request by setting the `format` parameter in the request to the renderer id. + +## Subclassing of Processing + +The Processing framework is meant to be generic and minimal. In some domains it is useful to employ a richer model of Processors, Requests, Responses and Executions targeted to that domain. An example is the [Search domain](/en/applications/searchers), where Searchers, Queries and Results subclass Processors, Requests and Responses. The Processing framework is designed to allow such subclassing to build richer frameworks on top. + +## Testing Processors with an Application + +A processor can be tested running inside a container. We create a JDisc from *services.xml*: + +```java expandable +import com.yahoo.application.container.JDisc; +import com.yahoo.application.Networking; + +import com.yahoo.processing.Request; +import com.yahoo.processing.Response; + +import com.yahoo.component.ComponentSpecification; + +import org.junit.Test; + +import static org.junit.Assert.assertThat; +import static org.junit.matchers.JUnitMatchers.containsString; + +public class ContainerTest { + @Test + public void testSearch() { + String servicesXml = + "" + + " " + + " " + + " " + + " " + + " " + + ""; + try (JDisc container = JDisc.fromServicesXml(servicesXml, Networking.disable)) { + Response response = container.processing().process(ComponentSpecification.fromString("default"), new Request()); + assertThat(response.data().get(0).toString(), containsString("Hello, world!")); + } + + } +} +``` + +We can also examine which processors are in a chain and their ordering: + +```java +ChainRegistry chains = container.processing().getChains(); +Chain defaultChain = chains.getComponent("default"); +boolean foundExampleProcessor = false; +for (Processor processor: defaultChain.components()) { + if ("ExampleProcessor".equals(processor.getClassName())) + foundExampleProcessor = true; +} + + +assertTrue("No instance of ExampleProcessor found in the default chain", foundExampleProcessor) +``` + +## Selecting a Non-default Processor Chain + +A complete application will usually be composed of several processor chains, which may or may not invoke each other. To select a chain configured with another `id` than "default", add the chain ID as a GET parameter named `chain`. + +In other words, given a chain named "testbed", as in: + +```xml + + + + + + + +``` + + +The chain testbed could be tested from the command line by doing: + +```bash +$ curl http://*hostname*:*port*/processing/?chain=testbed +``` + +## References + +- [Developing web services](/en/applications/web-services). +- [com.yahoo.processing](https://javadoc.io/doc/com.yahoo.vespa/processing/latest/com/yahoo/processing/package-summary.html) javadoc +- [Guava Javadoc](https://guava.dev/releases/snapshot/api/docs/). + +## Common tasks with processing + +This section contains a collection of "how do I" explanations with processing. Most of these pertains to the jDisc binding of Processing, but note that Processing is independent of jDisc and may be invoked programmatically in any environment. + +### Accessing the HTTP request from Processors + +Processors which interface with the network layer may need to access the network level request to access headers or request data, or to make outgoing calls through jDisc. The jDisc request is available through request properties: + +httpRequest = (com.yahoo.container.jdisc.HttpRequest)processingRequest.properties().get("jdisc.request"); + +### Setting response headers from Processors + +Response headers may be added to any Response by adding instances of `com.yahoo.processing.handler.ResponseHeaders` to the Response (ResponseHeaders is a kind of response Data). Multiple instances of this may be added to the Response, and the complete set of headers returned is the superset of all such objects. Example Processor: + +```java expandable +import com.yahoo.processing.Processor; +import com.yahoo.processing.Request; +import com.yahoo.processing.Response; +import com.yahoo.processing.handler.ResponseHeaders; +import com.yahoo.processing.execution.Execution; + +import java.util.Collections; +import java.util.Map; +import java.util.List; + +public class ResponseHeaderSetter extends Processor { + + private final Map> responseHeaders; + + public ResponseHeaderSetter(Map> responseHeaders) { + this.responseHeaders = Collections.unmodifiableMap(responseHeaders); + } + + @Override + public Response process(Request request, Execution execution) { + Response response = execution.process(request); + response.data().add(new ResponseHeaders(responseHeaders, request)); + return response; + } + +} +``` + +## Example Processors + +This section lists a few example processors which shows some use cases for the asynchronous aspects of the API. + +```java expandable +import com.yahoo.component.chain.Chain; +import com.yahoo.processing.Processor; +import com.yahoo.processing.Request; +import com.yahoo.processing.Response; +import com.yahoo.processing.execution.AsyncExecution; +import com.yahoo.processing.execution.Execution; +import com.yahoo.processing.response.FutureResponse; + +import java.util.\*; + +/\*\* + \* Call a number of chains in parallel + \*/ +public class Federator extends Processor { + + private final List> chains; + + public Federator(Chain … chains) { + this.chains= Arrays.asList(chains); + } + + @Override + public Response process(Request request, Execution execution) { + List futureResponses=new ArrayList(chains.size()); + for (Chain chain : chains) { + futureResponses.add(new AsyncExecution(chain,execution).process(request)); + } + Response response=execution.process(request); + AsyncExecution.waitForAll(futureResponses,1000); + for (FutureResponse futureResponse : futureResponses) { + Response federatedResponse=futureResponse.get(); + response.data().add(federatedResponse.data()); + response.mergeWith(federatedResponse); + } + return response; + } +} +``` + +```java expandable +import com.yahoo.processing.\*; +import com.yahoo.processing.execution.Execution; +import com.yahoo.processing.response.\*; +import com.yahoo.processing.test.ProcessorLibrary.StringData; + +/\*\* + \* A data producer which producer data which will receive asynchronously. + \* This is not a realistic, thread safe implementation as only the incoming data + \* from the last created incoming data can be completed. + \*/ +public class AsyncDataProducer extends Processor { + + private IncomingData incomingData; + + @Override + public Response process(Request request, Execution execution) { + DataList dataList = ArrayDataList.createAsync(request); // Default implementation + incomingData=dataList.incoming(); + return new Response(dataList); + } + + /\*\* Called by some other data producing thread, later \*/ + public void completeLateData() { + incomingData.addLast(new StringData(incomingData.getOwner().request(), + "A late hello, world!")); + } + +} +``` + +```java expandable +import com.google.common.util.concurrent.MoreExecutors; +import com.yahoo.component.chain.Chain; +import com.yahoo.processing.\*; +import com.yahoo.processing.execution.\*; + +/\*\* + \* A processor which registers a listener on the future completion of + \* asynchronously arriving data to perform another chain at that point. + \*/ +public class AsyncDataProcessingInitiator extends Processor { + + private final Chain asyncChain; + + public AsyncDataProcessingInitiator(Chain asyncChain) { + this.asyncChain=asyncChain; + } + + @Override + public Response process(Request request, Execution execution) { + Response response=execution.process(request); + response.data().complete().addListener(new RunnableExecution(request, + new ExecutionWithResponse(asyncChain, response, execution)), + MoreExecutors.sameThreadExecutor()); + return response; + } + +} +``` \ No newline at end of file diff --git a/mintlify-docs/en/applications/request-handlers.mdx b/mintlify-docs/en/applications/request-handlers.mdx new file mode 100644 index 0000000000..ca6bbe6381 --- /dev/null +++ b/mintlify-docs/en/applications/request-handlers.mdx @@ -0,0 +1,42 @@ +--- +title: "Request handlers" +description: "This document explains how to implement and deploy a custom request handler." +--- + +In most cases, implementing your own request handlers is unnecessary, as both searchers and processors can access the request data directly. However, there are a few cases where custom request handlers are useful: + + +1. You need to implement a custom REST API. +2. Your application needs to control which parameters are used to route requests to a particular search or processing chain. + +## Implementing a request handler + +Upon receiving a request, the request handler must consume its content, process it, and then return a response. The most convenient way to implement a request handler is by subclassing the [ThreadedHttpRequestHandler](https://javadoc.io/doc/com.yahoo.vespa/container-disc/latest/com/yahoo/container/jdisc/ThreadedHttpRequestHandler). + +This utility base class uses a synchronous API and a multithreaded execution model. It also implements a lot of functionality that is needed by most request handlers: + +- queries are automatically written to the access log +- an HTTP date header is added to the response (if your own code adds a date header, it will not be overwritten, though) +- logging of exceptions and queries that time out +- automatic shutdown when an Error is thrown + +### Example request handler implementations + +The [Vespa sample apps](https://github.com/vespa-engine/sample-apps) on GitHub contains a few example request handler implementations: + +| Handler | Description | +| :--- | :--- | +| [DemoHandler](https://github.com/vespa-engine/sample-apps/blob/master/examples/http-api-using-request-handlers-and-processors/src/main/java/ai/vespa/examples/DemoHandler.java) | A handler that modifies a request before dispatching it to the `ProcessingHandler`. This handler is also used in the [HTTP API tutorial](/en/learn/tutorials/http-api). Note that since this depends on ProcessingHandler you must add `processing` to your `container` tag to use it. If you want to issue Queries instead, have com.yahoo.search.searchchain.ExecutionFactory injected instead and use it to create executions and call search/fill on them. | + +## Deploying a request handler + +To deploy a request handler in an application, use the [handler](/en/reference/applications/services/container#handler) element in *services.xml*: + +```xml highlight={2-4} + + + http://*/* + +``` + +A request handler may be bound to zero or more URI patterns by adding a [binding](/en/reference/applications/services/container#binding) element for each pattern. diff --git a/mintlify-docs/en/applications/result-renderers.mdx b/mintlify-docs/en/applications/result-renderers.mdx new file mode 100644 index 0000000000..1e8c68d288 --- /dev/null +++ b/mintlify-docs/en/applications/result-renderers.mdx @@ -0,0 +1,273 @@ +--- +title: "Result renderers" +description: "Vespa provides a default JSON format for query results. *Renderers* can be configured to implement custom formats, like binary and text format. Renderers should not be used to implement business logic - that should go in [Searchers](/en/applications/searchers), [Handlers](/en/applications/request-handlers) or [Processors](/en/applications/processing). This guide assumes familiarity with the [Developer Guide](/en/applications/developer-guide)." +--- + +Renderers are implemented by subclassing one of: + +- [com.yahoo.search.rendering.Renderer](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/search/rendering/Renderer) +- [com.yahoo.search.rendering.SectionedRenderer](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/search/rendering/SectionedRenderer) +- [com.yahoo.processing.rendering.AsynchronousSectionedRenderer<Result>](https://javadoc.io/doc/com.yahoo.vespa/container-disc/latest/com/yahoo/processing/rendering/AsynchronousSectionedRenderer) + +SectionedRenderer differs from Renderer by providing each part to be rendered in separate steps. It is therefore easier to implement a SectionedRenderer than a regular Renderer. AsynchronousSectionedRenderer has a similar API to SectionedRenderer, but supports asynchronously fetched hit contents, so if supporting slow clients or backends is a priority, this offers some advantages. AsynchronousSectionedRenderer also exposes an OutputStream instead of a Writer, so if the backend data contains data encoded the same way as the output from the container (often UTF-8), performance gains are possible. + +All renderers are [components](/en/applications/components). They are built and deployed like all other container components, and supports [custom config](/en/applications/configuring-components). + +Renderers do *not* need to be thread safe - they can safely use and store state during rendering in member variables. The container supports this by cloning the renderers just before rendering the search result. To support cloning correctly, the renderers are required to obey the following contract: + +1. At construction time, only final members shall be initialized, and these must refer to immutable data only. +2. State mutated during rendering shall be initialized in the init method. + +To enable a renderer, add to [services.xml](/en/reference/applications/services/container): + +```xml highlight={6-8} + + + … + + + + + … + + … + +``` + +To use the renderer, add [&presentation.format=\[id\]](/en/reference/api/query#presentation.format) to queries - in this case `&presentation.format=MyRenderer`. + +## Renderer + +The simplest form of a renderer is extending `Renderer`. The `render` method does all the work - the derived class is expected to extract all the entities of interest itself and render them. Simple example: + +```java +public class SimpleRenderer extends Renderer { + @Override + public void render(Writer writer, Result result) throws IOException { + writer.write("The result contains " + result.getHitCount() + " hits."); + } + + @Override + public String getEncoding() { + return "utf-8"; + } + + @Override + public String getMimeType() { + return "text/plain"; + } +} +``` + +More complex example: + +```java expandable +/** + * Render result sets as plain text. First line is whether an error occurred, + * second rendering initialization time stamp, then each line is the ID of each + * document returned, and the last line is time stamp for when the renderer was finished. + */ +public class DemoRenderer extends Renderer { + private String heading; + + /** + * No global, shared state to set. + */ + public DemoRenderer() { + } + + @Override + protected void render(Writer writer, Result result) throws IOException { + if (result.hits().getErrorHit() == null) { + writer.write("OK\n"); + } else { + writer.write("Oops!\n"); + } + writer.write(heading + "\n"); + renderHits(writer, result.hits()); + writer.write("Rendering finished work: " + System.currentTimeMillis() + "\n"); + } + + private void renderHits(Writer writer, HitGroup hits) throws IOException { + for (Iterator i = hits.deepIterator(); i.hasNext();) { + Hit h = i.next(); + if (h.types().contains("summary")) { + String id = h.getDisplayId(); + if (id != null) { + writer.write(id + "\n"); + } + } + } + } + + @Override + public String getEncoding() { + return "utf-8"; + } + + @Override + public String getMimeType() { + return "text/plain"; + } + + /** + * Initialize mutable, per-result set state here. + */ + @Override + public void init() { + long time = System.currentTimeMillis(); + heading = "Renderer initialized: " + time; + } + +} +``` + +## SectionedRenderer + +To create a SectionedRenderer, subclass it and implement all its abstract methods. For each non-compound entity such as regular hits and query contexts, there are an associated method with the same name: + +```java +public class DemoRenderer extends SectionedRenderer { + + @Override + public void hit(Writer writer, Hit hit) throws IOException { + writer.write("Hit: " + hit.getField("documentid") + "\n"); + } +} +``` + +For each compound entity, such as hit groups and the result itself, there are pairs of methods, named `begin` and `end`: + +```java +public class DemoRenderer extends SectionedRenderer { + + private int indentation; + + @Override + public void beginHitGroup(PrintWriter writer, HitGroup hitGroup) throws IOException { + writer.write("Begin hit group:" + hitGroup.getId() + "\n"); + ++indentation; + } + + @Override + public void endHitGroup(PrintWriter writer, HitGroup hitGroup) throws IOException { + --indentation; + writer.write("End hit group:" + hitGroup.getId() + "\n"); + } +} +``` + +```text +For a compound entity, a method will be called for each of its members after its `begin`\-method and before its `end`\-method has been called: + + Call sequence + ------------------- +Result { 1. beginResult() + HitGroup { 2. beginHitGroup() + Hit 3. hit() + Hit 4. hit() + Hit 5. hit() + } 6. endHitGroup() +} 7. endResult() +``` + +For [grouping results](/en/querying/grouping), there is a dedicated set of callbacks available: + +- `beginGroup()` / `endGroup()` +- `beginGroupList()` / `endGroupList()` +- `beginHitList()` / `endHitList()` + +All of `Group`, `GroupList` and `HitList` are subclasses of `HitGroup`, and the default implementation of the above methods is provided that calls `beginHitGroup()` and `endHitGroup()`, respectively. Furthermore, since all the attributes of those classes are regular fields as defined by the root `Hit` class, output is made by simply implementing `beginHitGroup()`, `endHitGroup()`, and `hit()`. + +### JSON example + +Read the [default JSON result format](/en/reference/querying/default-result-format) before implementing custom JSON renderers. Example: Render a set of fields containing JSON data as a JSON array. In other words, dump a variable length array containing all available data, ignore everything else and silently ignore error states (i.e. good for prototyping): + +```java expandable +package com.yahoo.mysearcher; + +import com.yahoo.search.Result; +import com.yahoo.search.query.context.QueryContext; +import com.yahoo.search.rendering.SectionedRenderer; +import com.yahoo.search.result.ErrorMessage; +import com.yahoo.search.result.Hit; +import com.yahoo.search.result.HitGroup; + +import java.io.IOException; +import java.io.Writer; +import java.util.Collection; + +public class MyRenderer extends SectionedRenderer { + /** + * A marker variable for the hit rendering to know whether + * the hit being rendered is the first one that is rendered. + */ + boolean firstHit; + + public void init() { + firstHit = true; + } + + @Override + public String getEncoding() { + return "utf-8"; + } + + @Override + public String getMimeType() { + return "application/json"; + } + + @Override + public void beginResult(Writer writer, Result result) throws IOException { + writer.write("["); + } + + @Override + public void endResult(Writer writer, Result result) throws IOException { + writer.write("]"); + } + + @Override + public void error(Writer writer, Collection errorMessages) throws IOException { + // swallows errors silently + } + + @Override + public void emptyResult(Writer writer, Result result) throws IOException { + //write nothing. + } + + @Override + public void queryContext(Writer writer, QueryContext queryContext) throws IOException { + //write nothing. + } + + @Override + public void beginHitGroup(Writer writer, HitGroup hitGroup) throws IOException { + //write nothing. + } + + @Override + public void endHitGroup(Writer writer, HitGroup hitGroup) throws IOException { + //write nothing. + } + + @Override + public void hit(Writer writer, Hit hit) throws IOException { + if (!firstHit) { + writer.write(",\n"); + } + writer.write(hit.toString()); + firstHit = false; + } +} +``` + +## AsynchronousSectionedRenderer<Result> + +This is the same as for the [processing framework](/en/applications/processing#response-rendering). It is conceptually similar to SectionedRenderer, but has no special cases for search results as such. The utility method getResponse() has a parametrized return type, though, so templating the renderer on `Result` takes away some of the hassle. + +Find an example in [DemoRenderer.java](https://github.com/vespa-engine/sample-apps/blob/master/examples/http-api-using-request-handlers-and-processors/src/main/java/ai/vespa/examples/DemoRenderer.java). diff --git a/mintlify-docs/en/applications/searchers.mdx b/mintlify-docs/en/applications/searchers.mdx new file mode 100644 index 0000000000..c2d3bd4951 --- /dev/null +++ b/mintlify-docs/en/applications/searchers.mdx @@ -0,0 +1,427 @@ +--- +title: "Searchers" +description: "The *Container* is the home for all global processing of user actions (represented as queries) and their results. It provides a development and hosting environment for processing [components](/en/applications/components), and a model for composing such components developed by multiple development teams into a functional whole." +--- + +This document describes how to develop and deploy Searcher components. To get started with development, see the [Developer Guide](/en/applications/developer-guide). For reference, see the [Container javadoc](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/search/package-summary), and the [services.xml reference](/en/reference/applications/services/processing#chain). + +Best practise for queries is submitting the user-generated query as-is to Vespa, then use Searcher components to implement additional logic. Refer to the [Query HTTP API](/en/querying/query-api#http). + + +![](/assets/img/vespa-overview-searcher.svg) + + +## Searchers + +The components of the search container are called *Searchers*. A searcher is a component - usually deployed as part of an OSGi bundle - which extends the class `com.yahoo.search.Searcher`. All Searchers must implement a single method: + +```java +public Result search(Query query, Execution execution); +``` + +When the container receives a request, it will create a Query representing it and execute a configured list of such Searcher components. This is done by calling the `search()` method on the first searcher in the list. That searcher is responsible for passing the call to the next searcher in the list (or not, as it sees fit). This is done by calling `search()` on the Executor given, which keeps track of where we are in the list of Searchers. Hence, this is a noop searcher implementation: + +```java +public Result search(Query query, Execution execution) { + return execution.search(query); +} +``` + +Eventually the search call will reach the end of the list of searchers. The last searcher in the list may create a Result (somehow), which is now passed back up the call chain until it reaches the top. The container will then translate that Result back to a response to the incoming request. + +As is evident from this description, this is a synchronous model, where each request is processed in a dedicated worker thread until the result is returned. This synchronous model is implemented with [multi-threading of individual searchers](#keeping-state-in-searchers). + +The single searcher method is sufficient to express all kinds of functionality, e.g.: + +- A *query processor* will modify the query, then pass it on to the next searcher. +- A *result processor* will pass the query on to get the result, then modify the result before returning it. +- A *result producer* which produces a result by some internal lookup or (more typically) by sending a network request to a backend will translate the query to the desired execution and instantiate and return a Result holding the outcome. +- A *workflow* might pass the query on multiple times in a loop and gradually build up a Result for return from the Results received from each Query execution, or choose to pass a particular Query in an if-else loop etc. + +## Queries and Results + +The **Query** in the search container is the container of all the information needed to create a result to the request, including: + +- The parameters received in the request, including the user's query string, or chosen action. +- The parameters in the chosen [query profile](/en/querying/query-profiles), if any. +- The desired execution, including the boolean query tree. This information is gradually created from the request and query profile by Searcher components. +- Any objects of any type containing information created by Searchers along the way. + +The **Result** encapsulates all the data generated from a Query. The Result contains a composite tree of Hit objects organized in lists called HitGroups (the Result points to the topmost group). Each Hit contains some particular data item which is deemed relevant to the Query. The Hit objects has a general key-value storage, but are also polymorphic to support representing more structured information. See the [inspecting structured data](/en/applications/inspecting-structured-data) documentation for details about handling structured information in a Searcher. + +As Hits may be hierarchically organized into hit lists, the Result object is capable of representing any organization of the results. For example, in a federated system the hits are initially organized in one hit group per source. Upstream searchers may reorganize this into something that fits the user's need better, e.g a single blended group, or one group per likely interpretation of the query etc. + +## Search Chains + +The lists of searchers mentioned above are called *search chains*. Search chains are a special case of the [general component chains](/en/applications/chaining). A search chain is nothing more than a list of searcher instances having an id. The search chains are typically not created for every query but are part of the configuration. Multiple ones may exist at the same time, the chain to execute may be specified in the request. If nothing is specified, a default one is used. The same Searcher instance may exist in multiple search chains, which is why the Execution object is responsible for knowing the next Searcher to invoke in a particular request. + +Search chains may also be executed programmatically (typically from a Searcher), synchronously or asynchronously: + +```java +// Get a chain by id +SearchChain myChain = execution.searchChainRegistry().getComponent("myChain"); +// Execute it in the same thread +Result result = new Execution(myChain, execution.context()).search(query); +// ... or in another thread +Execution settings = new Execution(myChain, execution.context()); +FutureResult futureResult = new AsyncExecution(settings).search(query); +FutureResult otherFutureResult = new AsyncExecution(settings).search(otherQuery); +``` + +Asynchronous execution is useful in cases like [federation](/en/querying/federation), where a searcher forks a Query to multiple search chains in parallel, each getting results from a particular source. Also, as in the example, it is allowed to use the same Execution instance to construct multiple AsyncExecution instances, as the state is only copied from the constructor argument. + +The execution order of the searchers in a chain are not ordered explicitly, but by [ordering constraints](/en/applications/chaining) declared in the searchers or their configuration. Also read the [search reference](/en/reference/applications/services/search). + +### Writing a Searcher + +Example of a complete searcher: + +```java +package com.yahoo.search.example; +import com.yahoo.search.*; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; +/** +* A searcher adding a new hit. +*/ +public class SimpleSearcher extends Searcher { + public Result search(Query query, Execution execution) { + Result result = execution.search(query); // Pass on to the next searcher to get results + Hit hit = new Hit("test"); + hit.setField("message", "Hello world"); + result.hits().add(hit); + return result; + } +} +``` + +The container will create one or more instances of this class and place it in the desired search chain(s) to serve queries, as specified in [the configuration](/en/applications/components#adding-component-to-application-package). The first line in this searcher forwards the query to whatever is the next searcher in the chain this is a part of. This will eventually produce a Result, which is modified and then passed back to the previous searcher in this chain. The container will create a new instance of this searcher only when it is reconfigured, so any data needed by the searcher can be read and prepared from a constructor in the searcher. Constructors may also accept [configuration](/en/applications/components#dependency-injection), as any other pluggable component. + +Find the full API available to searchers in the [Search Container Javadoc](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/search/package-summary). + +### Testing a Searcher + +Before there is any point in testing a searcher in a real system, it should pass a set of unit tests which test it in isolation or together with the few searchers it interacts with. To do this, we can write unit tests which programmatically sets up a search chain containing the searcher to be tested, the searchers it interacts with (if any) and a searcher which produces mock results appropriate for the tests. Here is a simple example testing the Searcher above: + +```java expandable +package com.yahoo.search.example.test; + +import com.yahoo.search.*; +import com.yahoo.search.searchchain.*; +import com.yahoo.search.example.SimpleSearcher; + +public class SimpleSearcherTestCase extends junit.framework.TestCase { + +public void testBasics() { + // Create chain + Chain searchChain = new Chain(new SimpleSearcher()); + + // Create an empty context, in a running container this would be + // populated with settings used by different searcher. Tests must + // set this according to their own requirements. + Execution.Context context = Execution.Context.createContextStub(null); + Execution execution = new Execution(searchChain, context); + + // Execute it + Result result = execution.search(new Query("search/?query=some test query")); + + // Assert the result has the expected hit by scanning for the ID + assertNotNull(result.hits().get("test")); + } +} +``` + +In this case, no searcher producing mock results is needed because the searcher we are testing does not care what the Result contains. If the search chain ends with a searcher which produces no result, the framework will simply return an empty result, which is what happens in this case. A test adding a mock searcher producing results are shown in [federation](/en/querying/federation#unit-testing-the-result-processor). + +To write unit tests of the whole application package, see the [Developer Guide](/en/applications/developer-guide). + +### Deploying a Searcher + +Once the searcher passes unit tests, it can be deployed to the Vespa system hosting it. The procedure is the same as described in [deploying a component](/en/applications/components#deploying-a-component). First [build the component jar](/en/applications/components#building-the-plugin-jar). To include the searcher in *services.xml*, define a search chain and add the searcher to it - example: + +```xml + + + + + + + + + + + + + + + + + +``` + +This defines the search chain `default`, which will be used in queries when no other chain is explicitly specified. The searcher id above is resolved to the component bundle jar we added by the symbolic name in the manifest, and to the right class within the bundle by the class name. By keeping all these three the same, we keep things simple, but more advanced use where this is possible is also supported, see later sections. + +See the [search chains reference](/en/reference/applications/services/search#chain). + +Example *hosts.xml*: + +```xml + + + + node1 + + +``` + +By creating a directory containing *services.xml*, *hosts.xml* and *components/Simplesearcher.jar*, that directory becomes a complete application package containing a bundle, which can now be deployed to a Vespa instance. + +After deployment, query the application: [http://localhost:8080/search?query=best](http://localhost:8080/search/?query=best). + +### Testing a Searcher with an Application + +A searcher can also be tested running inside a container. Create an instance from the *container* part of the *services.xml* file above: + +```java expandable +import com.yahoo.component.ComponentSpecification; +import com.yahoo.application.container.JDisc; +import com.yahoo.application.Networking; + +import com.yahoo.search.Query; +import com.yahoo.search.Result; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +public class ContainerTest { + @Test + public void testSearch() { + String servicesXml = + "" + + " " + + " " + + " " + + " " + + " " + + ""; + try (JDisc container = JDisc.fromServicesXml(servicesXml, Networking.disabled)) { + Result result = container.search().process(ComponentSpecification.fromString("default"), + new Query("search/?query=test+query")); + assertNotNull(result.hits().get("test")); + } + } +} +``` + +Examine which searchers are in a chain and their ordering: + +```java +ChainRegistry chains = container.searching().getChains(); +Chain defaultChain = chains.getComponent("default"); + +boolean foundSimpleSearcher = false; +for (Searcher searcher: defaultChain.components()) { + if ("com.yahoo.search.example.SimpleSearcher".equals(searcher.getClassName())) + foundSimpleSearcher = true; +} + +assertTrue("No instance of SimpleSearcher found in the default chain", foundSimpleSearcher); +``` + +## Passing information between Searchers + +The query object is used to pass information between searchers. A part of the query is a general property store which may hold any object. Any values set in the request or in the query profile is available through these properties, but in addition searchers may add any objects they create. This is useful when some searcher component is producing information later consumed by some other. Example: + +```java +import com.yahoo.search.*; +import com.yahoo.search.searchchain.*; +@Provides(SomeObject.NAME) +public class ProducerSearcher extends Searcher { + public Result search(Query query, Execution execution) { + SomeObject.setTo(query.properties(), new SomeObject(query)); + return execution.search(query); // Pass to next in chain + } +} +``` + +```java +import com.yahoo.search.*; +import com.yahoo.search.searchchain.*; +@After(SomeObject.NAME) +public class ConsumerSearcher extends Searcher { + public Result search(Query query, Execution execution) { + SomeObject someObject = SomeObject.getFrom(query.properties()); + ... + return execution.search(query); // Pass to next in chain + } +} +``` + +```java +import com.yahoo.search.query.Properties; +public final class SomeObject { + public static final String NAME = "SomeObject"; + public static void setTo(Properties properties, SomeObject value) { + properties.set(NAME, value); + } + @SuppressWarnings("unchecked") + public static SomeObject getFrom(Properties properties) { + return (SomeObject) properties.get(NAME); + } +} +``` + +This code illustrates two idioms such searchers should follow when exchanging data: + +- The key to an object should be exactly the same as the short name of the object stored. +- The searcher should declare that it *provides* exactly the same name (and of course the consumers must declare that they need to be *after* the object is provided). + +When it does not cause unwanted dependencies, it is recommended to wrap the property get and put in a (static) `getFrom` and `setTo` method in the stored object, to allow storage and lookup without having to mention the key unnecessarily outside the object. + +Note that the objects are passed as regular in-memory references, so there is no noticeable overhead in this. However, in some situations (like when federating to multiple sources) the query will need to be cloned. The query will then attempt to clone the added properties. Those that implement Cloneable will have clone called, the rest will be copied by reference. + + +**Important:** + +It is important that objects added to the query which contains mutable state are **deep cloned** to avoid bugs. + + +On the other hand, cloning objects which should not change is wasteful, they should be copied by reference. Hence, the guidelines are: + +- Objects which should not be modified downstream should enforce immutability when added to the Query, either by not offering any mutator methods, or by being *frozen* (in a state where any mutator call causes an exception). Objects which *enforces* mutability should either not implement Cloneable or should implement a shallow clone. +- Objects which should support downstream modifications **must** implement Cloneable and offer a clone method which performs deep copying. + +### Query Context + +In some cases there is a need for passing information between searchers beyond those who see the same Query object. For this purpose, the Query provides a QueryContext object which provides a shared data view to all Searchers working on the same request. The context provides (among other things) a facility for setting properties (named objects). The context can be accessed safely from all the threads working on a request without incurring synchronization overhead (with some caveats), but provides linear, not constant lookup time. To set and retrieve such properties, use: + +```java +{query|result}.getContext(true).setProperty(name, value) +{query|result}.getContext(true).getProperty(name) +``` + +## Parametrizing Searchers + +It is easy to pass arguments to searchers - any key-value looked up in the query properties in the searcher can be passed as is in the request, or in a query profile. Example: + +```java +String myParameter = query.properties().getString("my.parameter", "defaultValue"); +``` + +This value can be set by adding `&my.parameter=myValue` to the request. Guidelines: + +- Names should use camelCase with the first letter in small caps +- Dots are used for nesting and have a special meaning in [query profiles](/en/querying/query-profiles). They exist to aid organization of the space of parameters, which easily grows quite large. Usually, the right thing to do is to create a separate name space for each searcher - i.e. use the same dotted prefix for all parameters, as in `myfeature.a`, `myfeature.b` etc. In addition to helping keep the search API clean, this allows various query profiles containing settings for all values in `myfeature` to be defined and selected at run time, which is often useful. +- To make such parameter APIs easier to use, one should also consider creating a [query profile type](/en/querying/query-profiles#query-profile-types) defining the valid parameters. This will be in the form of an XML file accompanying the bundle. This allows checking and optionally enforcement of validity of request parameter and query profile settings of the parameters. + +Parameters should be used for all query state which it is reasonable and just as cheap to assume may change with every query. Good candidates are e.g. numerical values to algorithms and switches to business logic. + +## Execution model + +In broad strokes, the Container works like this: + + + + The main thread picks up one of the requests waiting in the queue of the input socket + + + + This thread selects the search chain to be used to answer this request, and hands off the actual execution of the chain to a worker thread (there are many such worker threads) + + + + The worker thread calls all searchers in the search chain in turn, starting with the first one + + + + Each searcher returns results + + + + Results are eventually rendered (maybe using a template) into the buffer of the output socket + + + +There is a single instance of each search chain. In every chain, there is a single instance of each searcher. (Unless a chain is configured with multiple, identical searchers - this is a rare case.) + +When simultaneous requests arrive for the same search chain, multiple worker threads execute the searchers in that chain. A searcher can therefore be executed concurrently by multiple threads; many threads of execution can be going through the `search()` method, concurrently. + +This model places an important constraint on searcher classes: *instance variables are not safe.* They must be eliminated, or made thread-safe somehow. + +## Keeping state in Searchers + +As the passing of queries and results happen on the call stack, the container will allocate many worker threads to execute queries, using one thread per query until the result is returned. + +This means that any state we wish to keep along in the searcher for this particular query until the result is returned should be kept as local variables in the search method, while state which should be shared by all queries should be kept as member variables. As the latter kind will be accessed by multiple threads at any one time, the state of such member variables must be *multithread safe*. + +This critical restriction is similar to those of e.g. the Servlet API. A quick example should drive the point home: + +```java +public class SafeSearcher extends Searcher { + public Result search(Query query, Execution execution) { + long count = (Long) query.properties().get("Count"); + count++; + return execution.search(query); + } +} +public class UnsafeSearcher extends Searcher { + private long count; + public Result search(Query query, Execution execution) { + count = (Long) query.properties().get("Count"); + count++; // unsafe + return execution.search(query); + } +} +``` + +The second example uses an instance variable, which will be accessed concurrently by multiple threads. Without proper concurrency controls (such as synchronization), such access is inherently unsafe and may yield inconsistent results, and/or data corruption. + +Options for implementing a multithread-safe searcher with instance variables: + +1. Use immutable objects: they never change after they are constructed; no modifications to their state occurs after the Searcher constructor returns. +2. Use a single instance of a thread-safe class. +3. Create a single instance and synchronize access to it across all threads (but this will severely limit your scalability). +4. Arrange for each thread to have its own instance, e.g. with a `ThreadLocal`. + +## Multiphase searching + +The model of a single pass fetching results from a Query described in this document is sometimes too simplistic to produce good performance. The search container supports *multiphase searching* to address such cases. With multiphase searching, the hits of the result is first filled with some minimal information. This minimally filled result is sent up the search chain where some of the hits are hopefully removed. When more information is needed, a second fill request is sent down the search chain to fetch more data for just those hits remaining in the result. This can happen in repeated stages, working on progressively smaller sets of hits containing progressively more expensive information. + +The container supports this by offering `fill` methods on execution, which may be called to request more information added to the hits of the result from a searcher. In addition, the backends and backend providers must support multiphase searching (this is currently only the case for internal Vespa clusters). + +Any searchers should assume they are operating in a multiphase setup, meaning: + +- Searchers which changes the query or contain workflows do not need to do anything +- Searchers which accesses field information (not just id and relevance) from hits should **always** call either `fill()` to get the default set of fields for each hit type or `fill(summaryClassName)` to get a particular collection of fields known to exist in the backend(s) in question. Calling fill on a result which contains already-filled hits is cheap. +- Federating searchers should implement both the regular `search` method and the `fill` method. The fill method must request filling down the source branches which has remaining hits in the result. +- Backend searchers, which wish to support multiphase searching, should initially deliver unfilled hits and implement a `fill` method which fills the hits in the given result belonging to that backend with information from the backend. + + +**Note:** + +[vespa-match-features](https://vinted.engineering/2025/11/06/vespa-match-features/) is a good article on multiphase searching, result fill and match-features. + + +## Error handling + +If your searcher encounters a problem and wants to signal an error, set an error hit in the result object by calling `result.hits().addError(errorMsg)`. + +See the FAQ for [timeouts](/en/learn/faq#how-is-the-query-timeout-computed). + +## Timeouts + +How to gracefully handle a timeout inside a Searcher? `Result result = execution.search(query)` can result in a timeout - when printed: + +```txt +Container.com...vespa.Searcher result: Result: Source 'top-chain': 12: Timed out: Error in execution of chain 'top-chain': Chain timed out. +``` + +When having a tree of chains (see [federation](/en/querying/federation#timeout-behavior)), where the main chain calls one chain per source, and in this case, one of the source chains times out (e.g. does not return a Result within its deadline), this can happen. + +It is not generally possible to prevent this from ever happening, but searchers can check `query.getTimeLeft` before doing time-consuming stuff, and pass `query.getTimeLeft() - a_little` as timeout to processes they initiate (such as network calls) that are able to take a deadline themselves. + +## WordItem + +In a Searcher, one often will use [WordItem](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/WordItem) to modify the current query, or create a new query based on input query terms, or results from the current query. To keep linguistic settings (e.g. stemming) from the parent query, set `isFromQuery` to true - [example](https://github.com/vespa-cloud/vespa-documentation-search/blob/main/src/main/java/ai/vespa/cloud/docsearch/DocumentationSearcher.java). diff --git a/mintlify-docs/en/applications/testing.mdx b/mintlify-docs/en/applications/testing.mdx new file mode 100644 index 0000000000..dd6274e91b --- /dev/null +++ b/mintlify-docs/en/applications/testing.mdx @@ -0,0 +1,154 @@ +--- +title: "System testing" +description: "A system tests suite is an invaluable tool both when developing and maintaining a complex Vespa application. These are functional tests which are run against a deployment of the application package to verify, and use its HTTP APIs to execute feed and query operations which are compared to expected outcomes. Vespa provides two formalizations of this:" +--- + +- [Basic HTTP tests](/en/reference/applications/testing), expressing requests and expected responses as JSON, and run with the [Vespa CLI](/en/clients/vespa-cli). +- [Java JUnit tests](/en/reference/applications/testing-java), for more advanced tests, run as regular Java tests, with some extra configuration. + +These two frameworks also includes an upgrade—or staging—test construct for scenarios where the application is upgraded, and state in the backend depends on the old application configuration; as well as a production verification test—basically a health check for production deployments. For system and staging tests, the frameworks provide an easy way to perform HTTP request against a designated test deployment, separating the tests from the deployment and configuration of the test clusters. + +This document describes how each of these test categories can be run as part of an imagined CI/CD system for safely deploying changes to a Vespa application in a continuous manner. + +Finally, find a section on [A/B-testing / bucket tests](/en/applications/testing#feature-switches-and-bucket-tests) using feature switches. + +## System tests + +System tests are just functional tests that verify a deployed Vespa application behaves as expected when fed and queried. Running a system test is as simple as making a separate deployment with the application package to test, and then running the system test suite, or one or a few or those tests. + + +**Note:** + +Each system test should be self-contained, i.e., it should be able to run each test in isolation; or all tests, in any order. To achieve this, **system tests should generally start by clearing all documents from the cluster to test.** This is the case with our sample system tests, so take care to not run them against a production cluster. + + +For the most part, system tests must be updated due to changes in the application package. Rarely, an upgrade of the Vespa version may also lead to changed functionality, but within major versions, this should only be new features and bug fixes. In any case, it is a good idea to always run system tests against a dedicated test deployment—both before upgrading the Vespa platform, and the application package—before deploying the change to production. + +### Running system tests + +The [Vespa CLI](/en/clients/vespa-cli) makes it easy to set up a test deployment, and run system and staging tests. To run a system test, first set up a test deployment: + + + +```sh +$ vespa deploy --wait 600 +``` + +Run the basic HTTP tests (prefer using this test suite for regular tests) - also see the [example](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/CI-CD/production-deployment-with-tests) application: + + + +```sh +$ vespa test tests/system-test/feed-and-search-test.json +``` + +Example Java API tests (use for complex test cases) - also see the [example](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/CI-CD/production-deployment-with-tests-java) application: + + + +```sh +$ mvn test -D test.categories=system -D vespa.test.config=/path-to/test-config.json +``` + +The test config file used by the test runner in the maven-plugin defines the endpoints for each of the clusters in [services.xml](/en/reference/applications/services/services) as fields under a `localEndpoints` JSON object: + + + +```json +{ + "localEndpoints": { + "query-service": "http://localhost:8080/", + "feed-service" : "http://localhost:8081/" + } +} +``` + +`feed-service` is the endpoint of the container cluster with `` in [services.xml](/en/reference/applications/services/services). `query-service` is the endpoint of the container cluster with `` in [services.xml](/en/reference/applications/services/services). + +## Staging tests + +The goal of staging (upgrade) tests is *not* to ensure the new deployment satisfies its functional specifications, as that should be covered by system tests; rather, it is to ensure the upgrade of the application package and/or Vespa platform does not break the application, and is compatible with the behavior expected by existing clients. + +As an example, consider a change in how documents are indexed, e.g., adding new document processor. A system test would test verify this new behavior by feeding a document, and then verifying the document processor modified the document, or perhaps did something else. A staging test, on the other hand, would feed the document *before* the document processor was added, and querying for the document after the upgrade could give different results from what the system test would expect. + +Many such changes, which require additional action post-deployment, are also guarded by [validation overrides](/en/reference/applications/validation-overrides), but the staging test is then a great way of figuring out what the exact consequences of the change are, and how to deal with it. + +As opposed to system tests, staging tests are not self-contained, as the state change during upgrade is precisely what is tested. Instead, execution order of any staging tests that modify state, particularly after upgrade, must be controlled. Indeed, some changes will require re-feeding data, and this should then be part of the *staging test* code. Finally, it is also good to verify the expected state prior to upgrade. + +The clients of a Vespa application should be compatible with both the system and staging test expectations, and this dictates the workflow when deploying a breaking change - steps: + + + + The application code and system and staging tests are updated, so tests pass; and clients are updated to reflect the updated test code. + + + + The application is upgraded. + + + + The *staging setup* code is updated to match the new application code. + + Again, it is a good idea to always run staging tests before deployment of every change—be it a change in the application package, or an upgrade of the Vespa platform. + + + + +### Running staging tests + +See [system tests](#system-tests) above for links to example applications. Steps: + + + + A dedicated deployment is made with the *current* setup (package and Vespa version). + + + + *staging setup* code is run to put the test cluster in a particular state—typically one that mimics the state in production clusters. + + + + The deployment is then upgraded to the *new* setup (package and/or Vespa version). + + + + *staging test* code is run to verify the cluster behaves as expected post-upgrade. + + Example using JSON-tests: + + ```sh + # load old application code, deploy it, run setup + $ vespa deploy --wait 600 + $ vespa test tests/staging-setup + + # make changes to the application, deploy it, run tests + $ vespa deploy --wait 120 + $ vespa test tests/staging-test + ``` + + Example using Java tests (see [system tests](#running-system-tests) for *test-config.json*): + + ```sh + # load old application code, deploy it, run setup + $ vespa deploy --wait 600 + $ mvn test -D test.categories=staging-setup -D vespa.test.config=/path-to/test-config.json + + # make changes to the application, deploy it, run tests + $ vespa deploy --wait 120 + $ mvn test -D test.categories=staging -D vespa.test.config=/path-to/test-config.json + ``` + + + + +## Feature switches and bucket tests + +With continuous deployment, it is not practical to hold off releasing a feature until it is done, test it manually until convinced it works and then release it to production. What to do instead? The answer is *feature switches*: release new features to production as they are developed, but include logic which keeps them deactivated until they are ready, or until they have been verified in production with a subset of users. + +*Bucket tests* is the practice of systematically testing new features or behavior for a controlled subset of users. This is common practice when releasing new science models, as they are difficult to verify in test, but can also be used for other features. + +To test new behavior in Vespa, use a combination of [search chains](/en/applications/chaining) and [rank profiles](/en/reference/schemas/schemas#rank-profile), controlled by [query profiles](/en/querying/query-profiles), where one query profile corresponds to one bucket. These features support inheritance to make it easy to express variation without repetition. + +Sometimes a new feature requires [incompatible changes to a data field](/en/reference/schemas/schemas#modifying-schemas). To be able to CD such changes, it is necessary to create a new field containing the new version of the data. This costs extra resources but less than the alternative: standing up a new system copy with the new data. New fields can be added and populated while the system is live. + +One way to reduce the need for incompatible changes can be decreased by making the semantics of the fields more precise. E.g., if a field is defined as the "quality" of a document, where a higher number means higher quality, a new algorithm which produces a different range and distribution will typically be an incompatible change. However, if the field is defined more precisely as the average time spent on the document once it is clicked, then a new algorithm which produces better estimates of this value will not be an incompatible change. Using precise semantics also have the advantage of making it easier to understand if the use of the data and its statistical properties are reasonable. diff --git a/mintlify-docs/en/applications/unit-testing.mdx b/mintlify-docs/en/applications/unit-testing.mdx new file mode 100644 index 0000000000..c01abb6752 --- /dev/null +++ b/mintlify-docs/en/applications/unit-testing.mdx @@ -0,0 +1,151 @@ +--- +title: "Unit testing" +description: "This document describes how to test application functionality in a local Java vm. See [automated deployments](/en/operations/automated-deployments) for how to create system, staging and production verification tests." +--- + +## Unit testing using Application + +The [Application](https://javadoc.io/doc/com.yahoo.vespa/application/latest/com/yahoo/application/Application.html) class is useful when writing unit tests. Application uses the application package configuration and set up a container instance for testing. The [JDisc](https://javadoc.io/doc/com.yahoo.vespa/application/latest/com/yahoo/application/container/JDisc.html) class that is accessed by the test through [app.getJDisc(clusterName)](https://javadoc.io/page/com.yahoo.vespa/application/latest/com/yahoo/application/Application#getJDisc-java.lang.String-) - this class has methods for using all common [component types](/en/reference/applications/components). + +Refer to [MetalSearcherTest.java](https://github.com/vespa-engine/sample-apps/blob/master/album-recommendation-java/app/src/test/java/ai/vespa/example/album/MetalSearcherTest.java) for example use. Notice how the test disables the network layer in order to run tests in parallel. + + +**Note:** + +`Application` does not set up *content* nodes, only *container*. It is hence fully stateless, and intended for unit testing the functionality of application components. The *ClusterSearcher* will not find any content nodes and log errors if invoked. Write a *System Test* to test end-to-end features like search. + + +For prototyping, enable the network interface, instantiate the Container and run requests using a browser: + +```java +public class ApplicationMain { + @Test + public static void main(String[] args) throws Exception { + try (com.yahoo.application.Application app = com.yahoo.application.Application.fromApplicationPackage( + FileSystems.getDefault().getPath("src/main/application"), + Networking.enable)) { + app.getClass(); + Thread.sleep(Long.MAX_VALUE); + } + } +} +``` + +## Unit Testing Configurable Components + +How to programmatically build configuration instances for unit testing. Read the [Developer Guide](/en/applications/developer-guide) first. + +To be able to write self-contained unit tests using configuration classes generated from a schema, it is necessary to instantiate the configuration without the use of for instance an external services file. Configuration classes contain their own builders which are useful for solving exactly this problem. By using builders, the configuration will be created as an immutable, type-safe object, exactly the same as used during deployment. + +### Configuration schema + +Assume the config definition file `demo.def` with the following schema: + +```java +package=com.mydomain.demo + +toplevel[].term string +toplevel[].number int +toplevel[].largenumber long +toplevel[].secondlevel[].name string +toplevel[].secondlevel[].magnitude double + +simplename string +simplenumber int +simplevaluearray[] string + +coordinate.x double +coordinate.y double +coordinate.name string +``` + +In other words, the configuration class will be `com.mydomain.demo.DemoConfig`, and it will contain an array of structures, a couple of top-level primitives (*simplename* and *simplenumber*), an array of primitive values (*simplevaluearray*) and a structure (*coordinate*). + +### Using configuration builders + +All structured objects in the cloud configuration system have their own Builder as a nested class. So, in the above example, one would get `DemoConfig.Builder` for the complete configuration class, `DemoConfig.Toplevel.Builder` for the top-level array, `DemoConfig.Toplevel.Secondlevel.Builder` for the inner array, and `DemoConfig.Coordinate.Builder` for the structure. + +A configuration object, or substructure, is easiest instantiated using a constructor accepting the corresponding *Builder* class, an array of structures should use the constructor accepting an array of *Builder* instances, and an array of primitive values simply accepts a java.util.Collection of the corresponding primitive value class: + +```java expandable +package com.mydomain.demo; +import static org.junit.Assert.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.junit.Test; +import static com.mydomain.demo.DemoConfig.Toplevel; +import static com.mydomain.demo.DemoConfig.Toplevel.Secondlevel; +import static com.mydomain.demo.DemoConfig.Coordinate; +public class DemoTest { + /** + * An example showing how to build a relatively complex, mixed type + * configuration including arrays of primitive elements, nested arrays and + * arrays of structures and so on. + */ + @Test + public final void test() { + // We need use builders to safely create the graph of immutable + // configuration objects. Each generated configuration class contains + // the builder for creating an instance of itself. This pattern is + // repeated for structures. So, in our case, we have four structured + // levels. The complete configuration class, DemoConfig, the top-level, + // nested array, Toplevel, the contained array, Secondlevel and the + // structure Coordinate. This leaves us with using four distinct + // builder classes: DemoConfig.Builder, Toplevel.Builder, + // Secondlevel.Builder and Coordinate.Builder. + // Chained setters are the most used pattern for the builders: + DemoConfig forTesting = new DemoConfig(new DemoConfig.Builder() + .simplename("basic chained setter for the string simplename") + .simplenumber(42) + .toplevel(buildTopLevelArray()) + .simplevaluearray( + Arrays.asList(new String[] { "primitive", "arrays", + "are", "easier", "to", "build", "than", + "arrays", "of", "structures" })) + .coordinate( + new Coordinate.Builder() + .name("have no idea what to call this one") + .x(1e300d).y(1e-300d))); + assertTrue(forTesting != null); // ;) + } + /** + * It is often the more readable solution to use helper methods to build + * configuration arrays. + * + * @return a list of Toplevel.Builder instances + */ + private List buildTopLevelArray() { + // Note how the Builder classes tend to work on Collection classes and + // mutable objects, while the config ready for use is bolted down and + // immutable: + List configArray = new ArrayList(3); + String[] configStrings = new String[] { "a", "b", "c" }; + int[] configNumbers = new int[] { 1, 2, 3 }; + long[] configLargeNumbers = new long[] { 1L + (long) Integer.MAX_VALUE, + 2L + (long) Integer.MAX_VALUE, 3L + (long) Integer.MAX_VALUE }; + for (int i = 0; i < configStrings.length; ++i) { + configArray.add(new Toplevel.Builder().number(configNumbers[i]) + .largenumber(configLargeNumbers[i]).term(configStrings[i]) + .secondlevel(buildSecondLevelArray(2))); + } + return configArray; + } + /** + * Once again, the building of an array is delegated to a helper method + * + * @param subelements + * the length of the returned list + * @return a list of SecondLevel.Builder + */ + private List buildSecondLevelArray(int subelements) { + List builders = new ArrayList( + subelements); + for (int i = 0; i < subelements; ++i) { + builders.add(new Secondlevel.Builder().name(String.valueOf(i)) + .magnitude((double) i)); + } + return builders; + } +} +``` diff --git a/mintlify-docs/en/applications/using-zookeeper.mdx b/mintlify-docs/en/applications/using-zookeeper.mdx new file mode 100644 index 0000000000..e5f4b910ac --- /dev/null +++ b/mintlify-docs/en/applications/using-zookeeper.mdx @@ -0,0 +1,59 @@ +--- +title: "Using ZooKeeper" +description: "The Vespa container supports [ZooKeeper](https://zookeeper.apache.org/), which allows distributed synchronization across nodes in a container cluster." +--- + +Once enabled all nodes in a container cluster will automatically form a ZooKeeper ensemble, and participate as servers. Vespa takes care of reconfiguring ZooKeeper members when nodes are added or removed from the container cluster. + + +**Note:** + +Vespa enforces an optimal node limit for clusters with ZooKeeper. Application packages that violate this node count will be rejected. The valid number of nodes is 3, 5 or 7. See [#15762](https://github.com/vespa-engine/vespa/issues/15762) for other node counts. + + +## Configuration + + + + ZooKeeper must be explicitly enabled in the [container cluster configuration](/en/reference/applications/services/container#zookeeper). + + + + The application must specify a dependency on `zkfacade`. Example for `pom.xml`: + + ```xml + + com.yahoo.vespa + zkfacade + [vespa-version] + provided + + ``` + + + + +## Code example + +ZooKeeper features are exposed through [VespaCurator](https://github.com/vespa-engine/vespa/blob/master/zkfacade/src/main/java/com/yahoo/vespa/curator/api/VespaCurator.java). [Inject](/en/applications/dependency-injection) `VespaCurator` to use it. [Handler](/en/applications/request-handlers) example: + +```java +public class MyRequestHandler extends ThreadedHttpRequestHandler { + private final VespaCurator curator; + @Inject + public CuratorHandler(Executor executor, VespaCurator curator) { + super(executor); + this.curator = curator; + } + @Override + public HttpResponse handle(HttpRequest httpRequest) { + Path lockPath = Path.fromString("/locks/mylock"); + Duration timeout = Duration.ofSeconds(1); + try (var lock = curator.lock(lockPath, timeout)) { + // Do something while holding lock + } catch (Exception e) { + throw new RuntimeException("Failed to acquire lock " + lockPath, e); + } +} +} +``` diff --git a/mintlify-docs/en/applications/vespaignore.mdx b/mintlify-docs/en/applications/vespaignore.mdx new file mode 100644 index 0000000000..20108f8dc7 --- /dev/null +++ b/mintlify-docs/en/applications/vespaignore.mdx @@ -0,0 +1,42 @@ +--- +title: ".vespaignore" +sidebarTitle: ".vespaignore file" +description: "When deploying an [application package](/en/reference/applications/application-packages) with [Vespa CLI](/en/clients/vespa-cli), a `.vespaignore` file (similar to `.gitignore`) can be added to the package to prevent specific files or path patterns from being included in the deployed package." +--- + + +Ignoring files is useful when the Vespa application directory contains files that are only used for development purposes, and are not directly referenced by the application. + +## Location + +The `.vespaignore` file must be placed at the same level as [services.xml](/en/reference/applications/services/services). Having multiple `.vespaignore` at different path levels is not supported. + +## Example + +This is an example of a `.vespaignore` file that excludes files and directories rarely needed in an application package. + +```txt +# exclude hidden files and readme +.DS_Store +.gitignore +README.md + +# exclude feed input +ext/ + +# exclude auxiliary scripts +*.py +*.sh +``` + +## Format + +The `.vespaignore` format is a subset of the `.gitignore` format, where: + +- Lines starting with `#` are ignored and can be used for comments +- Each non-empty line specifies a path pattern to ignore +- Patterns are relative to `services.xml` +- A pattern can be either a literal string, or a pattern string as consumed by [filepath.Match](https://pkg.go.dev/path/filepath#Match) +- Lines ending with `/` always denote a directory, e.g. the pattern `foo/` will match the directory `foo` (and any files below), but not the file `foo` + +Complex rules, such as negated patterns and recursive globbing (`**`) are not supported. diff --git a/mintlify-docs/en/applications/web-services.mdx b/mintlify-docs/en/applications/web-services.mdx new file mode 100644 index 0000000000..174dcf6fa9 --- /dev/null +++ b/mintlify-docs/en/applications/web-services.mdx @@ -0,0 +1,130 @@ +--- +title: "Developing Web Service Applications" +sidebarTitle: "Developing Web Services" +description: "This document explains how to develop (REST) web service type applications on the container - design options, accessing the request path, returning a status code etc. There are two types of web service APIs:" +--- + +- Fine-grained APIs with closed semantics – for example *return the number of stars of an article* +- Coarse-grained APIs with open semantics – for example *return a page containing the most relevant mixture of stuff for this user and action* + +With coarse-grained APIs, the container can help handle the complexity typically involved in the implementation of such APIs by providing a way to compose and federate components contributing to processing the request and provide and modify the returned data, and a way to allow such requests to start returning before they are finished to reduce latency with large responses. This is the [processing](/en/applications/processing) framework (or, in the case of search-like application, the [searcher](/en/applications/searchers) specialization). + +In addition, the [container](/en/reference/applications/components#component-types) features a generic mechanism allowing a [request handler](/en/applications/request-handlers) to be [bound](/en/reference/applications/components#binding) to a URI pattern and invoked to handle all requests matching that pattern. This is useful where there is no need to handle complexity and/or federation of various kinds of data in the response. Both the approaches above are actually implemented as built-in request handlers. + +A custom request handler may be written to parse the url path/method and dispatch to an appropriate chain of processing components. A "main" processing chain may be written to do the same by dispatching to other chains. The simplest way to invoke a specific chain of processors is to forward a query to the `ProcessingHandler` with the request property `chain` set to the name of the chain to invoke: + +```java +import com.yahoo.component.annotation.Inject; +public class DemoHandler extends com.yahoo.container.jdisc.ThreadedHttpRequestHandler { + ... + @Inject + public DemoHandler(Executor executor, ProcessingHandler processingHandler) { + super(executor); + this.processingHandler = processingHandler; + } + ... + @Override + public HttpResponse handle(HttpRequest request) { + HttpRequest processingRequest = new HttpRequest.Builder(request) + .put(com.yahoo.processing.Request.CHAIN, "theProcessingChainIWant") + .createDirectRequest(); + HttpResponse r = processingHandler.handle(processingRequest); + return r; + } + ... +} +``` + +## Accessing the HTTP request + +Custom [request handlers](/en/applications/request-handlers), are given a [com.yahoo.container.jdisc.HttpRequest](https://javadoc.io/doc/com.yahoo.vespa/container-disc/latest/com/yahoo/container/jdisc/HttpRequest), with direct access to associated properties and request data. + +In [Processing](/en/applications/processing), the Processors are given a [com.yahoo.processing.Request](https://javadoc.io/doc/com.yahoo.vespa/container-disc/latest/com/yahoo/processing/Request) containing the HTTP URL parameters: + +```txt +// url parameters are added to properties +String urlParameter = request.properties().get("urlParameterName"); + +// jdisc request context is added with prefix context +Object contextValue = request.properties().get("context.contextKey"); +``` + +If needed, a Processor can retrieve the entire HTTP request via a utility function: + +```java +import com.yahoo.container.jdisc.HttpRequest; ... + +// Retrieve the underlying HTTP request: Optional httpRequest = HttpRequest.getHttpRequest(request); + +if (httpRequest.isPresent()) { + // The POST data input stream: + InputStream in = httpRequest.get().getData(); + // The HTTP method: + Method method = httpRequest.get().getMethod(); +} +``` + +### Setting the HTTP status and HTTP headers + +In Processing, the return status can be set by adding a special Data item to the Response: + +```bash +response.data().add(new com.yahoo.processing.handler.ResponseStatus(404, request)); +``` + +If no such data element is present, the status will be determined by the container. If it contains data able to render, it will be 200, otherwise it will be determined by any ErrorMessage present in the response. + +### Setting response headers from Processors + +Response headers may be added to any Response by adding instances of `com.yahoo.processing.handler.ResponseHeaders` to the Response (ResponseHeaders is a kind of response Data). Multiple instances of this may be added to the Response, and the complete set of headers returned is the superset of all such objects. Example Processor: + +```bash +processingResponse.data().add(new com.yahoo.processing.handler.ResponseHeaders(myHeaders, request)); +``` + +Request handlers may in general set their return status, and manipulate headers directly on the HttpRequest. + +## Queries + +Sometimes all that is needed is letting the standard query framework reply for more paths than standard. This is possible by adding extra [binding](/en/reference/applications/services/search#binding)s inside the `` element in `services.xml`. Writing a custom [request handler](/en/applications/request-handlers) is recommended if the application is a standalone HTTP API, and especially if there are properties used with the same name as those in the [Query API](/en/reference/api/query). A request handler may query the search components running in the same container without any appreciable overhead: + +### Invoking Vespa queries from a component + +To invoke Vespa queries from a component, have an instance of [ExecutionFactory](https://github.com/vespa-engine/vespa/blob/master/container-search/src/main/java/com/yahoo/search/searchchain/ExecutionFactory.java) injected in the constructor and use its API to construct and issue the query. The container this runs in must include the `` tag for the ExecutionFactory to be available. Example: + +```java expandable +import com.yahoo.component.annotation.Inject; +import com.yahoo.component.ComponentId; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.component.Chain; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.searchchain.ExecutionFactory; + +public class MyComponent { + + private final ExecutionFactory executionFactory; + + @Inject + public MyComponent(ExecutionFactory executionFactory) { + this.executionFactory = executionFactory; + } + + Result executeQuery(Query query, String chainId) { + Chain searchChain = executionFactory.searchChainRegistry().getChain(new ComponentId(chainId)); + Execution execution = executionFactory.newExecution(searchChain); + query.getModel().setExecution(execution); + return execution.search(query); + } + +} +``` + +ExecutionFactory depends on the search chains, so it cannot be injected into any component which is part of the search chains. But from within a Searcher it is not needed as the Execution passed gives what is needed: + +- Access the search chains: execution.context().searchChainRegistry(). +- Create a new Execution: new Execution(mySearchChain, execution.context()) + +This is the right way since it ties that execution to the one you're in. + +One hence cannot execute a search chain from the search chain component constructor to e.g. refresh a cache. It is impossible since the search chains can't be constructed until this constructor returns. An alternative is to extract the refreshing into a separate component which has both the client and execution factory injected into it. \ No newline at end of file diff --git a/mintlify-docs/en/basics/applications.mdx b/mintlify-docs/en/basics/applications.mdx new file mode 100644 index 0000000000..0044a5bcdf --- /dev/null +++ b/mintlify-docs/en/basics/applications.mdx @@ -0,0 +1,106 @@ +--- +title: Vespa applications +--- + +You use Vespa by deploying an *application* to it. Why applications? Because Vespa handles both data and the computations you do over them — together an application. + +An application is specified by an *application package* — a directory with some files. The application package contains *everything* that is needed to run your application: config, schemas, components, ML models, and so on. + +The *only* way to change an application is to make the change in the application package and then deploy it again. Vespa will then safely change the running system to match the new application package revision, without impacting queries, writes, or data. + +## A minimal application package + +You can create a complete application package with just a single file: `services.xml`. This file specifies the clusters that your application should run. It could just be a single stateless cluster — what's called *container* — like this: + +```xml + + + + + + + +``` + +Put this in a file called `services.xml`, and you have created the world's smallest application package. However, this won't do much; usually you want to have a `content` cluster which can store data, maintain indexes, and run the distributed part of queries. You'll also want your container cluster to load the necessary middleware for this. With that we get a services file like this: + +```xml + + + + + + + + + + 2 + + + + + + + + +``` + +This specifies a pretty normal simple Vespa application, but now we need another file: the schema of the document type we'll use. This goes into the directory `schemas/`, so our application package now looks like this: + +```txt +services.xml +schemas/myschema.sd +``` + +The schema file describes a kind of data and the computations (such as ranking/scoring) you want to do over it. At minimum it just lists the fields of that data type and whether each field should be indexed: + +```txt +schema myschema { + + document myschema { + + field text type string { + indexing: summary | index + } + + field embedding type tensor(x[384]) { + indexing: attribute | index + } + + field popularity type double { + indexing: summary | attribute + } + + } + +} +``` + +With these two files we have specified a fully functional application that can do text, vector and hybrid search with filtering. + +Rather than creating applications from scratch like this, you can also clone one of our sample applications as a starting point like we did in [getting started](/en/basics/deploy-an-application). + +To read more on schemas, see the [schemas](/en/basics/schemas) guide. To see everything an application package can contain, see the [application package reference](/en/reference/applications/application-packages). + +## Deploying applications + +To create running instances of an application, or make changes to one take effect, you *deploy* it. Deployments to the dev zone and to self-managed clusters set up a single instance, while deployments to production can set up multiple instances in one or more regions. + +To deploy an application package you use the [deploy command](/en/clients/vespa-cli#deployment) in Vespa CLI: + +```bash +vespa deploy . +``` + +This will deploy the application package at the current directory to the current target and the default dev zone (use `vespa deploy -h` to see other options). + +Deployment to production zones use a separate command: + +```bash +vespa prod deploy . +``` + +Production deployments also require an additional file in the application package to specify where it should be deployed: `deployment.xml`. See [production deployment](/en/operations/production-deployment). The recommended way to deploy to production is by setting up a continuous deployment job — see [automated deployments](/en/operations/automated-deployments). + +Deploying a change to an application package is generally safe to do at any time. It does not disrupt queries and writes, and invalid or destructive changes are rejected before taking effect. You can also add tests that verify the application before deployment to production zones. + diff --git a/mintlify-docs/en/basics/deploy-an-application-java.mdx b/mintlify-docs/en/basics/deploy-an-application-java.mdx new file mode 100644 index 0000000000..e5f3e6c492 --- /dev/null +++ b/mintlify-docs/en/basics/deploy-an-application-java.mdx @@ -0,0 +1,110 @@ +--- +title: Deploy an application having Java components +--- + +Follow these steps to deploy a Vespa application which includes Java components to the [dev zone](/en/operations/environments#dev) on Vespa Cloud (for free). + +Alternative versions of this guide: + +- [Deploy an application using pyvespa](https://vespa-engine.github.io/pyvespa/getting-started-pyvespa-cloud.html) - for Python developers +- [Deploy an application without Java components](/en/basics/deploy-an-application) +- [Deploy an application without Vespa CLI](/en/basics/deploy-an-application-shell) +- [Deploy an application locally](/en/basics/deploy-an-application-local) +- [Deploy an application having Java components locally](/en/basics/deploy-an-application-local-java) + + + +**Prerequisites:** + +- [Java 17](https://openjdk.org/projects/jdk/17/). +- [Apache Maven](https://maven.apache.org/install.html) to build the application. + + +Setup: + + + +**Create a [tenant](/en/learn/tenant-apps-instances) on Vespa Cloud:** + + +**Install the [Vespa CLI](/en/clients/vespa-cli)** using [Homebrew](https://brew.sh/): + +```bash +$ brew install vespa-cli +``` + + +**Configure the Vespa client:** + +```bash +$ vespa config set target cloud +$ vespa config set application vespa-team.autotest +``` + + +**Get Vespa Cloud control plane access:** + +```bash +$ vespa auth login +``` + + + +**Clone a sample [application](/en/basics/applications):** + +```bash +$ vespa clone album-recommendation myapp && cd myapp +``` + + +**Add a certificate for [data plane access](/en/security/guide#data-plane) to the application:** + +```bash +$ vespa auth cert app +``` + + + + +Steps: + + + +**Build the application:** + +```bash +$ mvn install -f app +``` + + +**[Deploy](/en/basics/applications#deploying-applications) the application:** + +```bash +$ vespa deploy --wait 600 ./app +``` + + +**[Feed](/en/writing/reads-and-writes) [documents](/en/schemas/documents):** + +```bash +$ vespa feed app/src/test/resources/*.json +``` + + +**Run [queries](/en/querying/query-api):** + +```bash +vespa query "select * from music where album contains 'head'" +``` +```bash +vespa query \ + "select * from music where true" \ + "ranking=rank_albums" \ + "ranking.features.query(user_profile)={{cat:pop}:0.8,{cat:rock}:0.2,{cat:jazz}:0.1}" +``` + + + +Congratulations, you have deployed your first Vespa application! Application instances in the [dev zone](/en/operations/environments#dev) will by default keep running for 14 days after the last deployment. You can control this in the [console](https://console.vespa-cloud.com/). + +You can inspect the source code for this app at [sample-apps](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation-java). \ No newline at end of file diff --git a/mintlify-docs/en/basics/deploy-an-application-local-java.mdx b/mintlify-docs/en/basics/deploy-an-application-local-java.mdx new file mode 100644 index 0000000000..93bc2dbe8e --- /dev/null +++ b/mintlify-docs/en/basics/deploy-an-application-local-java.mdx @@ -0,0 +1,106 @@ +--- +title: Deploy an application having Java components locally +--- + +Follow these steps to deploy a Vespa application which includes Java components to the [dev zone](/en/operations/environments#dev) on Vespa Cloud (for free). + +Alternative versions of this guide: + +- [Deploy an application using pyvespa](https://vespa-engine.github.io/pyvespa/getting-started-pyvespa-cloud.html) - for Python developers +- [Deploy an application without Java components](/en/basics/deploy-an-application) +- [Deploy an application without Vespa CLI](/en/basics/deploy-an-application-shell) +- [Deploy an application locally](/en/basics/deploy-an-application-local) +- [Deploy an application having Java components locally](/en/basics/deploy-an-application-local-java) + + + +**Prerequisites:** + +- [Java 17](https://openjdk.org/projects/jdk/17/). +- [Apache Maven](https://maven.apache.org/install.html) to build the application. + +Setup: + + + +**Create a [tenant](/en/learn/tenant-apps-instances) on Vespa Cloud:** + + +**Install the [Vespa CLI](/en/clients/vespa-cli)** using [Homebrew](https://brew.sh/): + +```bash +$ brew install vespa-cli +``` + + +**Configure the Vespa client:** + +```bash +$ vespa config set target cloud +$ vespa config set application vespa-team.autotest +``` + + +**Get Vespa Cloud control plane access:** + +```bash +$ vespa auth login +``` + + +**Clone a sample [application](/en/basics/applications):** + +```bash +$ vespa clone album-recommendation-java myapp && cd myapp +``` + + +**Add a certificate for [data plane access](/en/security/guide#data-plane) to the application:** + +```bash +$ vespa auth cert app +``` + + +Steps: + + + +**Build the application:** + +```bash +$ mvn -U -f app package +``` + + +**[Deploy](/en/basics/applications#deploying-applications) the application:** + +```bash +$ vespa deploy --wait 600 ./app +``` +The first deployment may take a few minutes while nodes are provisioned. + + +**[Feed](/en/writing/reads-and-writes) [documents](/en/schemas/documents):** + +```bash +$ vespa feed app/src/test/resources/*.json +``` + + +**Run [queries](/en/querying/query-api):** +```bash +vespa query "select * from music where album contains 'head'" +``` +```bash +vespa query \ + "select * from music where true" \ + "ranking=rank_albums" \ + "ranking.features.query(user_profile)={{cat:pop}:0.8,{cat:rock}:0.2,{cat:jazz}:0.1}" +``` + + + +Congratulations, you have deployed your first Vespa application! Application instances in the [dev zone](/en/operations/environments#dev) will by default keep running for 14 days after the last deployment. You can control this in the [console](https://console.vespa-cloud.com/). + +You can inspect the source code for this app at [sample-apps](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation-java). diff --git a/mintlify-docs/en/basics/deploy-an-application-local.mdx b/mintlify-docs/en/basics/deploy-an-application-local.mdx new file mode 100644 index 0000000000..d6ec166d5c --- /dev/null +++ b/mintlify-docs/en/basics/deploy-an-application-local.mdx @@ -0,0 +1,119 @@ +--- +title: Deploy an application locally +--- + +Follow these steps to deploy a Vespa application on your own machine. + +Alternative versions of this guide: + +- [Deploy an application using pyvespa](https://vespa-engine.github.io/pyvespa/getting-started-pyvespa-cloud.html) - for Python developers +- [Deploy an application](/en/basics/deploy-an-application) +- [Deploy an application having Java components](/en/basics/deploy-an-application-java) +- [Deploy an application without Vespa CLI](/en/basics/deploy-an-application-shell) +- [Deploy an application having Java components locally](/en/basics/deploy-an-application-local-java) + +This is tested with _vespaengine/vespa:8.692.16_ container image. + + + +**Prerequisites:** + +- Linux, macOS or Windows 10 Pro on x86\_64 or arm64, with [Podman Desktop](https://podman.io/) or [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed, with an engine running. + - Alternatively, start the Podman daemon: + ```bash + $ podman machine init --memory 6000 + $ podman machine start + ``` + - See [Docker Containers](/en/operations/self-managed/docker-containers) for system limits and other settings. + +- For CPUs older than Haswell (2013), see [CPU Support](/en/operations/self-managed/cpu-support). +- Memory: Minimum 4 GB RAM dedicated to Docker/Podman. [Memory recommendations](/en/operations/self-managed/node-setup#memory-settings). +- Disk: Avoid `NO_SPACE` - the vespaengine/vespa container image + headroom for data requires disk space. [Read more](/en/writing/feed-block). +- [Homebrew](https://brew.sh/) to install the [Vespa CLI](/en/clients/vespa-cli), or download the Vespa CLI from [Github releases](https://github.com/vespa-engine/vespa/releases). + + + + + +**Validate the environment:** + +```bash +$ docker info | grep "Total Memory" + +or + +$ podman info | grep "memTotal" +``` + + +**Install the [Vespa CLI](/en/clients/vespa-cli)** using [Homebrew](https://brew.sh/): + +```bash +$ brew install vespa-cli +``` +Windows/No Homebrew? See the [Vespa CLI page](/en/clients/vespa-cli) to download directly. + + +**Set local target:** +```bash +$ vespa config set target local +``` + + +**Start a Vespa Docker container:** +```bash +docker run --detach --name vespa --hostname vespa-container \ + --publish 8080:8080 --publish 19071:19071 \ + vespaengine/vespa +``` +Alternatively, use `podman` in the command above. + +The port `8080` is published to make the search and feed interfaces accessible from outside the container, `19071` is the deploy-endpoint. Only one container named `vespa` can run at a time, so change the name if needed. See [Docker containers](/en/operations/self-managed/docker-containers) for more insights. + + + +**Clone a sample [application](/en/basics/applications):** +```bash +$ vespa clone album-recommendation myapp && cd myapp +``` + + + +**[Deploy](/en/basics/applications#deploying-applications) the application:** +```bash +$ vespa deploy --wait 300 ./app +``` + + + +**[Feed](/en/writing/reads-and-writes) [documents](/en/schemas/documents):** +```bash +$ vespa feed dataset/documents.jsonl +``` + + +**Run [queries](/en/querying/query-api):** +```bash +vespa query "select * from music where album contains 'head'" +``` +```bash +vespa query \ + "select * from music where true" \ + "ranking=rank_albums" \ + "ranking.features.query(user_profile)={{cat:pop}:0.8,{cat:rock}:0.2,{cat:jazz}:0.1}" +``` + + +**Get documents:** +```bash +vespa document get id:mynamespace:music::a-head-full-of-dreams +``` +```bash +vespa visit +``` + +Get a document by ID, or export all documents - see [/document/v1](/en/writing/document-v1-api-guide) and [vespa visit](/en/writing/visiting). + + + +Congratulations, you have deployed your first Vespa application! \ No newline at end of file diff --git a/mintlify-docs/en/basics/deploy-an-application-shell.mdx b/mintlify-docs/en/basics/deploy-an-application-shell.mdx new file mode 100644 index 0000000000..e020606335 --- /dev/null +++ b/mintlify-docs/en/basics/deploy-an-application-shell.mdx @@ -0,0 +1,129 @@ +--- +title: Deploy an application without Vespa CLI +--- + +This lets you deploy an application to the [dev zone](/en/operations/environments#dev) on Vespa Cloud (for free). + +Alternative versions of this guide: + +- [Deploy an application using pyvespa](https://vespa-engine.github.io/pyvespa/getting-started-pyvespa-cloud.html) - for Python developers +- [Deploy an application](/en/basics/deploy-an-application) +- [Deploy an application having Java components](/en/basics/deploy-an-application-java) +- [Deploy an application locally](/en/basics/deploy-an-application-local) +- [Deploy an application with Java components locally](/en/basics/deploy-an-application-local-java) + + + + +**Prerequisites:** + +- git - or download the files from [album-recommendation](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation) +- zip - or other tool to create a .zip file +- curl - or other tool to send HTTP requests with security credentials +- OpenSSL + + +Steps: + + + +**Create a [tenant](/en/learn/tenant-apps-instances) on Vespa Cloud:** + +Go to [console.vespa-cloud.com](https://console.vespa-cloud.com/) and create your tenant (unless you already have one). + + +**Clone a sample [application](/en/basics/applications):** +```bash +git clone --depth 1 https://github.com/vespa-engine/sample-apps.git && \ + cd sample-apps/album-recommendation +``` +See [sample-apps](https://github.com/vespa-engine/sample-apps) for other sample apps you can clone + + +**Add a certificate for [data plane access](/en/security/guide#data-plane) to the application:** + +On Unix or Mac, use `openssl`: + +```bash +openssl req -x509 -nodes -days 14 -newkey rsa:4096 \ + -subj "/CN=cloud.vespa.example" \ + -keyout data-plane-private-key.pem -out data-plane-public-cert.pem +``` +On Windows, the certificate has to be created with [New-SelfSignedCertificate](https://learn.microsoft.com/en-us/powershell/module/pki/new-selfsignedcertificate) in PowerShell, and then exported to PEM format using [certutil](https://learn.microsoft.com/en-us/windows-server/administration/windows-commands/certutil). + +Once the certificate has been created, add it to the application package. + +```bash +mkdir -p app/security && \ + cp data-plane-public-cert.pem app/security/clients.pem +``` + + + +**Create a deployable application package zip:** +```bash +( cd app && zip -r ../application.zip . ) +``` + + + +**Deploy the application:** + +In the [console](https://console.vespa-cloud.com/), click *Deploy Application*. Use "myapp" as the application name, leave the defaults. Make sure *DEV* is selected, and upload the `application.zip`. Click *Create and deploy*. + +The first deployment may take a few minutes while nodes are provisioned. + + + +**Verify the application endpoint:** + +```bash +ENDPOINT=https://name.myapp.tenant-name.aws-us-east-1c.dev.z.vespa-app.cloud/ +``` +```bash +curl --cert data-plane-public-cert.pem --key data-plane-private-key.pem $ENDPOINT +``` +You can find the endpoint in the console deployment output, set it for later use and test it. You can also [do this in a browser](/en/security/guide#using-a-browser). + + +**[Feed](/en/writing/reads-and-writes) [documents](/en/schemas/documents):** +```bash +curl --cert data-plane-public-cert.pem --key data-plane-private-key.pem \ + -H "Content-Type:application/json" \ + --data-binary @ext/A-Head-Full-of-Dreams.json \ + $ENDPOINT/document/v1/mynamespace/music/docid/a-head-full-of-dreams +``` +```bash +curl --cert data-plane-public-cert.pem --key data-plane-private-key.pem \ + -H "Content-Type:application/json" \ + --data-binary @ext/Love-Is-Here-To-Stay.json \ + $ENDPOINT/document/v1/mynamespace/music/docid/love-is-here-to-stay +``` +```bash +curl --cert data-plane-public-cert.pem --key data-plane-private-key.pem \ + -H "Content-Type:application/json" \ + --data-binary @ext/Hardwired...To-Self-Destruct.json \ + $ENDPOINT/document/v1/mynamespace/music/docid/hardwired-to-self-destruct +``` + + +**Run [queries](/en/querying/query-api):** +```bash +curl --cert data-plane-public-cert.pem --key data-plane-private-key.pem \ + -X POST -H "Content-Type: application/json" --data ' + { + "yql": "select * from music where true", + "ranking": { + "profile": "rank_albums", + "features": { + "query(user_profile)": "{{cat:pop}:0.8,{cat:rock}:0.2,{cat:jazz}:0.1}" + } + } + }' \ + $ENDPOINT/search/ +``` + + + + +Congratulations, you have deployed your first Vespa application! Application instances in the [dev zone](/en/operations/environments#dev) will by default keep running for 14 days after the last deployment. You can control this in the [console](https://console.vespa-cloud.com/). diff --git a/mintlify-docs/en/basics/deploy-an-application.mdx b/mintlify-docs/en/basics/deploy-an-application.mdx new file mode 100644 index 0000000000..3891c93a62 --- /dev/null +++ b/mintlify-docs/en/basics/deploy-an-application.mdx @@ -0,0 +1,111 @@ +--- +title: Deploy an application +description: "Follow these steps to deploy a Vespa application to the [dev zone](/en/operations/environments#dev) on Vespa Cloud (for free)." +--- + +{/* If you change this also make the same change in deploy-an-application-{shell,java} */} + +**Alternative versions of this guide:** + +- [Deploy an application using pyvespa](https://vespa-engine.github.io/pyvespa/getting-started-pyvespa-cloud.html) — for Python developers +- [Deploy an application having Java components](/en/basics/deploy-an-application-java) +- [Deploy an application without Vespa CLI](/en/basics/deploy-an-application-shell) +- [Deploy an application locally](/en/basics/deploy-an-application-local) +- [Deploy an application having Java components locally](/en/basics/deploy-an-application-local-java) + +## Setup + + + + Create a [tenant](/en/learn/tenant-apps-instances) on Vespa Cloud: + + Go to [console.vespa-cloud.com](https://console.vespa-cloud.com/) and create your tenant (unless you already have one). + + + + Install the [Vespa CLI](/en/clients/vespa-cli) using [Homebrew](https://brew.sh/): + + ```bash + brew install vespa-cli + ``` + + Windows or no Homebrew? See the [Vespa CLI](/en/clients/vespa-cli) page to download directly. + + + + ```bash + export VESPA_CLI_HOME=$PWD/.vespa + vespa config set target cloud + vespa config set application vespa-team.autotest + ``` + + Use the tenant name from step 1 instead of `vespa-team`, and replace it in other steps in this guide too. + + + + ```bash + vespa auth login + ``` + + Follow the instructions from the command to authenticate. + + + + **Clone a sample [application](/en/basics/applications):** + + ```bash + vespa clone album-recommendation myapp && cd myapp + ``` + + See [sample-apps](https://github.com/vespa-engine/sample-apps) for other sample apps you can clone. + + + + **Add a certificate for [data plane access](/en/security/guide#data-plane) to the application:** + + ```bash + vespa auth cert app + ``` + + It is a good idea to take note of the path to the `.pem` files written here. + + + +Steps: + + + + [Deploy](/en/basics/applications#deploying-applications) the application: + + ```bash + vespa deploy --wait 600 ./app + ``` + + The first deployment may take a few minutes while nodes are provisioned. + + + + [Feed](/en/writing/reads-and-writes) [documents](/en/schemas/documents): + + ```bash + vespa feed dataset/documents.jsonl + ``` + + + + Run [queries](/en/querying/query-api): + + ```bash + vespa query "select * from music where album contains 'head'" + ``` + + ```bash + vespa query \ + "select * from music where true" \ + "ranking=rank_albums" \ + "ranking.features.query(user_profile)={{cat:pop}:0.8,{cat:rock}:0.2,{cat:jazz}:0.1}" + ``` + + + +Congratulations, you have deployed your first Vespa application! Application instances in the [dev zone](/en/operations/environments#dev) will by default keep running for 14 days after the last deployment. You can control this in the [console](https://console.vespa-cloud.com/). \ No newline at end of file diff --git a/mintlify-docs/en/basics/operations.mdx b/mintlify-docs/en/basics/operations.mdx new file mode 100644 index 0000000000..4cb1fadb4d --- /dev/null +++ b/mintlify-docs/en/basics/operations.mdx @@ -0,0 +1,25 @@ +--- +title: Operations +description: "A deployed Vespa application is a self-contained highly available, distributed stateful system. Operating these at scale is difficult, so Vespa automates this to the extent possible in the deployment environment it is running." +--- + +| Deployment environment | Automated operations | Suitable for | +| :--- | :--- | :--- | +| Vespa self-managed/open source | Application deployment (single application, single instance), application change (except rolling restarts), data redistribution, failover | Development | +| Vespa Kubernetes Operator | Application deployment (single application, single instance), application change, data redistribution, failover, node provisioning, failed node replacement, node type change, [autoscaling](/en/operations/autoscaling), [endpoint routing](/en/operations/endpoint-routing), encryption | Production in environments outside hyperscalers | +| Vespa Cloud | Application deployment (multiple applications, instances, [regions](/en/operations/zones), clouds), application change, data redistribution, failover, node provisioning, failed node replacement, node type change, [autoscaling](/en/operations/autoscaling), [endpoint routing](/en/operations/endpoint-routing), encryption, Vespa platform and OS upgrades, continuous deployment pipeline with verification, metrics and management console | Development, production on hyperscalers (including in [customer accounts and VPCs](/en/operations/enclave/enclave)) | + +Vespa is designed to enable applications to evolve in production. This includes these aspects: + +- Application package changes are managed by Vespa's built-in control plane to be carried out without impacting queries or writes. If a change cannot be made without impacting queries or writes, it is rejected on deployment (and will require a [validation](/en/reference/applications/validation-overrides) override to be allowed). +- The operations supported by Vespa are those that can be scaled to hundreds of nodes, billions of documents and hundreds of thousands of queries per second. If you can run it on a single machine, you can scale it. +- The hardware resources available in a cluster can be changed both up and down. Redistribution will happen automatically in the background, without limiting resource usage to avoid impacting queries and writes. +- When possible (on Vespa Cloud), new revisions of applications are deployed in test zones where they can be verified by application-supplied functional tests before being allowed to progress to production. + +## Performance and scaling + +Content clusters in Vespa can be scaled to any amount of content by adding more nodes (horizontal scaling). Data will redistribute automatically, and there's no need for manual tuning of the process. To scale to large amounts of queries, content clusters can also be scaled by adding multiple *groups* of nodes (vertical scaling). Each group contains a single copy of the corpus and container clusters will automatically load balance over groups. + +A Vespa application can consist of any number of stateless and stateful clusters. On larger applications it can be beneficial to split different functions into separate clusters that can be optimized separately. For example, having one stateless container cluster for feeding and another for querying, or using different content clusters for different data schemas. + +Read more in [elasticity](/en/content/elasticity) and the [performance guide](/en/performance/). diff --git a/mintlify-docs/en/basics/querying.mdx b/mintlify-docs/en/basics/querying.mdx new file mode 100644 index 0000000000..edac9a9a53 --- /dev/null +++ b/mintlify-docs/en/basics/querying.mdx @@ -0,0 +1,91 @@ +--- +title: Querying +description: "An introduction to querying with Vespa." +--- + +## Queries + +Queries in Vespa are expressed as a YQL string: a query language identical to SQL for structured data, with additions for vector and full-text search, for example: + +```sql +select * from mySchema where myTextField contains 'someWord' and myNumber > 10.0 +``` + +You can also search multiple fields with one query item (like "contains"), by defining [fieldset](/en/reference/schemas/schemas#fieldset) in the schema. + +Any nested combination of and/or and so on is supported; see the full syntax in the [query language reference](/en/reference/querying/yql). + +## Query requests + +Queries are sent as HTTP requests, to the endpoint of a container cluster having `` in `services.xml`. The YQL query is sent as the `yql` parameter (HTTP encoded): + +```text +endpoint-url/search/?yql=select+%2A+from+sources+%2A+where+true +``` + +The Vespa CLI can do this for you: + +```bash +vespa query "select * from sources * where true" +``` + +You can add the `-v` option to see the HTTP request that this becomes. + +On Vespa Cloud your application will by default get an mTLS certificate that you use to make requests. If you want to use an access token, you can [add one in the console](/en/security/guide#configuring-tokens). + +## Query request parameters + +In addition to the YQL parameter, you can send other query request parameters to supply data such as user/llm query text, vectors, and parameters controlling the query execution. These are added to HTTP requests in the obvious way, and passed to Vespa CLI by adding multiple arguments: + +```bash +vespa query -v "select * from sources * where true" "timeout=100ms" +``` + +You can also send the query parameters [as a JSON payload](/en/querying/query-api#http) instead of as request parameters: + +```bash +curl -H "Content-Type: application/json" \ + --data '{"yql" : "select * from sources * where true"}' \ + endpoint-url/search/ +``` + +To see all the parameters accepted, see the [query API reference](/en/reference/api/query). + +You may end up wanting to set many query parameters in your queries. Instead of passing them in the request, you can create a query profile in the application package containing all the parameters and just specify the profile in the request — see [query profiles](/en/querying/query-profiles). + +## Querying with text + +You can use the `text` YQL operator to retrieve or rank using raw text. This will process the text and (by default) search it with the [WeakAnd](/en/ranking/wand#weakand) text search operator. + +You can pass the text directly, or refer to a separate request parameter (using `@parameter`): + +```bash +vespa query "select * from sources * where title contains text(@query)" \ + "query=Any text, from a human/llm" +``` + +You can set options controlling how the text is to be parsed and matched; see the [text() reference documentation](/en/reference/querying/yql#text). + +## Querying with vectors + +Querying by vectors is done using the `nearestNeighbor` YQL operator, which takes a document and query vector: + +```bash +vespa query 'select * from sources * where {targetHits: 100}nearestNeighbor(my_vector_field, my_query_vector)' \ + ranking=my_rank_profile \ + 'input.query(my_query_vector)'='[1,2,3]' +``` + +Read more in [nearest neighbor search](/en/querying/nearest-neighbor-search). + +You can combine multiple `nearestNeighbor`, `text` and other operators in any way: + +```bash +vespa query "select * from sources * where (({targetHits: 300}nearestNeighbor(my_title_embedding, my_query_vector)) \ + or ({targetHits: 150}nearestNeighbor(my_body_embeddings, my_query_vector)) \ + or title contains text(@query) or body contains text(@query)) \ + and range(title, 0.0, 500.0) and category in ('c1', 'c2') \ + and !(blacklisted=true)" \ + "input.query(my_query_vector)=embed(@query)" \ + "query=Hello, world! " +``` diff --git a/mintlify-docs/en/basics/ranking.mdx b/mintlify-docs/en/basics/ranking.mdx new file mode 100644 index 0000000000..eb73078ab5 --- /dev/null +++ b/mintlify-docs/en/basics/ranking.mdx @@ -0,0 +1,134 @@ +--- +title: Ranking +--- + +*Ranking* in Vespa is the computation that is done on matching documents during query execution. These are specified as [ranking functions](/en/ranking/ranking-expressions-features) in *rank profiles* in the schema. + +The special function named `first-phase` will determine the initial *rank* of the matches, such that the top k can be selected as response to a query: + +```js +rank-profile my-rank-profile { + first-phase { + expression: 0.7 * bm25(text) + 0.3 * attribute(popularity) + } +} +``` + +## Ranking functions and features + +The ranking functions can be any mathematical function combining rank features, including [tensor math](/en/ranking/tensor-user-guide#ranking-with-tensors) and [machine-learned models](#machine-learned-model-inference). + +The rank features these functions can use are of three categories: + +- **Document features**, using `attribute(fieldName)`: any document field which has `attribute` in the indexing statement. +- **Query features**, aka inputs, using `query(name)`: any value sent with the query as an input. When these are tensors (not scalars) they must be declared as an input in the rank profile. +- **Match features**: a built-in feature which says something about how well a query and document matches, e.g. bm25 or closeness. + +Refer to the [full list of rank features](/en/reference/ranking/rank-features). + +Query features (inputs) that are tensors must be declared in the rank profile: + +```js +rank-profile my-rank-profile { + inputs { + query(user_context) tensor(x[3]) + } + first-phase { + expression: bm25(text) + sum(query(user_context) * attribute(document_context)) + } +} +``` + +This is also how the type of query vectors in vector search are declared. + +## Rank profiles + +A schema can have any number of rank profiles specifying computations and ranking for different use cases, experiments, and so on. Queries select one using the [ranking.profile](/en/reference/api/query#ranking.profile) parameter in requests or a [query profile](/en/querying/query-profiles). If no profile is specified in the request, the one called `default` is used, and if that isn't specified in the schema, a default one ranking by the [nativeRank](/en/ranking/nativerank) feature is used. Another built-in rank profile `unranked` is also always available. Specifying this boosts serving performance in queries which do not need ranking because ordering is not important or [explicit field sorting](/en/reference/querying/sorting-language) is used. + +To avoid very long schema files, rank profiles can also be specified in their own files in the application package, named `schemas/[schema-name]/[profile-name].profile`. See the [schema reference](/en/reference/schemas/schemas#rank-profile) for documentation of all the content of rank profiles. + +Rank profiles can inherit other profiles to avoid duplication, as in `rank-profile myProfile inherits default, another`. + +## Phased ranking + +In addition to first-phase which specifies the initial ranking that will be applied on all matching documents during matching, rank profiles can also specify functions that will be applied to *rerank* the top k documents before returning the final result. This is useful to direct more computation towards the most promising candidate documents: + +```js +schema myapp { + + rank-profile my-rank-profile { + + first-phase { + expression { + attribute(quality) * freshness(timestamp) + bm25(title) + } + } + + second-phase { + expression: xgboost(my_xgboost_reranker) + total-rerank-count: 1000 # Over all nodes + } + + global-phase { + expression: sum(onnx(my_large_onnx_model)) + rerank-count: 20 + } + + } + +} +``` + +The `second-phase` expression is executed locally on the content node, using local data. This is efficient on thousands of candidates. The `global-phase` expression is executed on the global result set after merging, in the container node and is best used for any very expensive and high quality final reranking. See [phased ranking](/en/ranking/phased-ranking) for details. + +## Ranking functions + +A rank profile can define any number of functions which can be used in other ranking expressions or (when taking no arguments) be returned with results. + +```js expandable +schema myapp { + + rank-profile my-rank-profile { + + function clickProbability() { + expression: xgboost('myClickModel') + } + + function textRanking(field) { + expression: 0.7 * bm25(field) + 0.3 * nativeProximity(field) + } + + first-phase { + expression { + 0.1 * clickProbability() + 0.2 * closeness(embeddingsField) + + 0.3 * textRanking(titleField) + + 0.4 * textRanking(bodyField) + } + } + + summary-features { + clickProbability() # Returned with every matched document + } + + } + +} +``` + +Read more in [ranking expressions and functions](/en/ranking/ranking-expressions-features). + +## Layered ranking + +In addition to ranking *documents*, a rank profile can also rank and select array elements within documents. This is most commonly used to select individual chunks within documents in RAG applications — see [working with chunks](/en/rag/working-with-chunks#layered-ranking-selecting-chunks-to-return). + +## Machine-Learned model inference + +The best quality is achieved by learning relevance functions using machine learning from a training set. Vespa lets you use machine-learned models in these formats in distributed ranking (first- and second phase): + +- [ONNX](/en/ranking/onnx), allowing importing models from ML frameworks like Tensorflow, PyTorch and scikit-learn. +- [XGBoost](/en/ranking/xgboost) +- [LightGBM](/en/ranking/lightgbm) + +As these are exposed as rank features, they can be used in ranking expressions exactly like any other rank feature. + diff --git a/mintlify-docs/en/basics/schemas.mdx b/mintlify-docs/en/basics/schemas.mdx new file mode 100644 index 0000000000..1577338dff --- /dev/null +++ b/mintlify-docs/en/basics/schemas.mdx @@ -0,0 +1,103 @@ +--- +title: Schemas +description: "This is an introduction to schemas in Vespa. You can find all the details in the [schema reference](/en/reference/schemas/schemas)." +--- + +A schema defines a type of data and what we want to compute over it. An application package can contain multiple schemas for different kinds of data. Each content cluster specified in `services.xml` refers to the schemas that should be stored and indexed in that cluster. Schemas can inherit other schemas to avoid repeating common content. + +Schemas are placed in files named the same as the schema, with the ending `.sd` (for schema definition), in the `schemas/` directory of the application package. + +## Document fields + +A schema contains a document type, which is a named collection of fields: + +```js +schema mySchema { + + document mySchema { + + field myField type string { + indexing: summary | index + } + + ... more fields + + } + +} +``` + +Each field has a type, a way it should be processed and indexed, and optionally other settings. The main decision you make is how the field should be used in queries, determined by the `indexing` statement: + +- `indexing: summary`: The field should be available in query responses ([document summaries](/en/querying/document-summaries)). +- `indexing: index`: If a string: create a full-text on-disk index. If a tensor: create an HNSW vector index (requires `attribute` in addition). +- `indexing: attribute`: For any field type: make the field value available for structured search (exact, range, regexp etc.), ranking, sorting, grouping, and aggregation in the [in-memory column store](/en/content/attributes). Suitable for structured data. +- `indexing: attribute` and `attribute: fast-search`: As above, but in addition, create an index over this data to make it an efficient filter. Suitable for structured fields that are used as strong filters in queries. + +The indexing statement can contain multiple expressions separated by a pipe character, and these can also preprocess the value, so the pipe should be read as passing to the next expression, as on Unix. See the [reference](/en/reference/schemas/schemas#field) for all the types and content of fields. + +When a schema is defined and added to a content cluster, you can [write data](/en/basics/writing) according to it, and [query](/en/basics/querying) using the attributes and indexed fields in it. Indexing always happens automatically in real time. + +## Synthetic fields + +The document type in the schema defines the fields that you can put and get (read and write) for that document type. However, sometimes you want to take an input field and process it in some way before it is stored/indexed. To do that, you can create additional synthetic fields outside the document in the schema (for example using the [embed](/en/rag/embedding) function): + +```js +schema mySchema { + + document mySchema { + + field myField type string { + indexing: summary | index + } + + ... + + } + + field mySyntheticField type tensor(x[386]) { + indexing: input myField | embed | attribute | index + } + +} +``` + +## Rank profiles + +A *rank profile* specifies what should be computed over the data described by the schema, and how the documents of it should be ranked to select the ones to return in a query response: + +```js +schema mySchema { + + ... + + rank-profile hybrid { + + first-phase { + expression { + 0.3 * bm25(myText) + + 0.5 * closeness(myEmbedding) + + 0.2 * attribute(popularity) + } + } + + } + +} +``` + +A schema can have any number of rank profiles for different use cases, experiments and so on, and each can have multiple functions that compute some value to be returned or used in ranking. In addition to simple math functions like the above these can also be machine-learned models. See [ranking](/en/basics/ranking) for more. + +## Working with schemas + +Schemas may become thousands of lines, with inheritance, multiple rank functions calling each other and so on. The most efficient way of working with them is to use an IDE and install the Vespa plugin to get syntax highlighting, completions and navigation — see [IDE support](/en/applications/ide-support). + +What happens if you change the schema of a running application? + +- **Adding new fields**: No problem; the new field will be added and have no value until data is written to it. +- **Changing how a field is indexed**: This will automatically cause a background reindexing on Vespa Cloud, but in the meantime there may be inconsistency in how the field is used in queries and writes, so in production it is sometimes preferable to create a new field instead. +- **Removing a field**: Data and indexes are removed for the field. +- **Changing the type of a field**: Existing data and indexes are removed for the field. For this reason, it is often preferable to add a new field instead, populate it, switch usages to the new field, then remove the old. + +You can find the details in [modifying schemas](/en/reference/schemas/schemas#modifying-schemas). + diff --git a/mintlify-docs/en/basics/whats-more.mdx b/mintlify-docs/en/basics/whats-more.mdx new file mode 100644 index 0000000000..089a4ce5ca --- /dev/null +++ b/mintlify-docs/en/basics/whats-more.mdx @@ -0,0 +1,17 @@ +--- +title: What's more +description: "The Vespa basics articles introduce the central concepts in Vespa, but can't cover everything needed to build complete applications." +--- + +Some additional important features are: + +- **[Grouping and aggregation](/en/querying/grouping)** (faceting): Grouping in the query language lets you specify hierarchical groupings and aggregations that will be performed over all the matches to a query distributed over all participating nodes. +- **Streaming search**: In applications where queries search fixed small subsets of all data (such as a user or tenant) it is not cost-effective to build indexes. For these use cases Vespa supports a [streaming mode](/en/performance/streaming-search) which delivers low-latency search without the cost of maintaining indexes or even keeping data in memory. +- **Application components**: Applications can include Java components that implement application logic, such as intercepting queries and results and implement custom workflows ([Searchers](/en/applications/searchers)), modify write operations ([document processors](/en/applications/document-processors)), and implementing custom APIs ([handlers](/en/applications/request-handlers)). +- **Parent-child relations**: Joins are not supported in Vespa because they wouldn't scale, but the special case where one side of the join is much smaller than the other is supported. This is called a [parent-child relation](/en/schemas/parent-child). +- **Federation**: Most applications federate over multiple types of content. Vespa will federate over all schemas and clusters by default, and includes a [federation framework](/en/querying/federation) which lets you define application-specific schemes to formulate queries to each content type, include content from other services, combine content in application-specific ways and so on. +- **Predicate fields**: Sometimes it is useful to allow documents to specify when they should be matched, as conditions on properties sent with the query, for example to let content target specific kinds of users. This can be done using [predicate fields](/en/schemas/predicate-fields). +- **Geo search**: By using [geo fields](/en/querying/geo-search), you can find documents within a given area, use distance to the query in ranking, or even retrieve by distance to a path for route planning. +- **Mutable attributes**: By defining [mutable attributes](/en/reference/schemas/schemas#mutate) on the documents, applications can collect statistics in real time on each document to track how often they are matched, ranked, and returned in results. + +Read more in the full [features](/en/learn/features) list. diff --git a/mintlify-docs/en/basics/writing.mdx b/mintlify-docs/en/basics/writing.mdx new file mode 100644 index 0000000000..45cce4a74c --- /dev/null +++ b/mintlify-docs/en/basics/writing.mdx @@ -0,0 +1,70 @@ +--- +title: Writing +description: "This is an introduction to writing data into Vespa." +--- + +## Documents + +Once you have added one or more schemas to an application, and have added `` in `services.xml` to the container cluster you want to handle writes, you can send writes following those schemas. A document is written as a JSON map containing a value for each field: + +```json +{ + "put": "id:my-namespace:my-documenttype::my-id-string", + "fields": { + "myTextField": "Hello world!", + "myNumericAttribute": 13.8, + "myEmbedding": [0.3, 1.45, 1.03] + } +} +``` + +Each document has an id, which has two parts which can be decided freely: + + - The **namespace**, which is just a string used to avoid name collisions if you have multiple kinds of clients deciding ids and not used for any other purpose + - The **id string**, which can be any string you want, for example a product id or a url + +Fields can remain empty; you do not need to set a value for every field defined in the document type. + +You can find complete information on the document format in the [document JSON format reference](/en/reference/schemas/document-json-format). + +## Writing documents + +Documents are written to your application instance's *write endpoint*, using the [document/v1](/en/writing/document-v1-api-guide) HTTP API. You can use the API directly, or use one of the clients provided by Vespa: + +- **Command line, with [Vespa CLI](/en/clients/vespa-cli)**: [`vespa feed`](/en/clients/vespa-cli#documents) to feed one or many documents to Vespa. +- **Python, with [PyVespa](https://vespa-engine.github.io/pyvespa/)**: [`application.feed_iterable(...)`](https://vespa-engine.github.io/pyvespa/reads-writes.html#feeding-operations-from-a-file) +- **Java, with the [Java Feed Client](/en/clients/vespa-feed-client)**: [`myFeedClient.put(id, json, params)`](/en/clients/vespa-feed-client#example-java) + +Documents can also be removed, retrieved, and updated using the same API and clients. + +## Updating documents + +Documents can be fully replaced by a new version by writing them again, but you can also update any individual fields of existing documents. This is especially useful for updating attribute fields such as e.g. behavior signals or prices at high throughput, without impacting other fields and indexes. + +Updates are sent in the same ways as document puts; it's just the format that's different: + +```json +{ + "update": "id:my-namespace:my-documenttype::my-id-string", + "fields": { + "myTextField": { + "assign": "Some new value" + } + } +} +``` + +Updates can also increment numerical values, add to arrays and tensor etc. Read more in the [partial update guide](/en/writing/partial-updates). + +## Writes are streamed and realtime + +Write operations to Vespa are streamed (using HTTP/2), and processed asynchronously. There is no need for a separate batch API to feed with the maximal throughput a system can handle; servers will push back by responding more slowly when they are close to saturation, and clients use this signal to back off, allowing them to dynamically converge at the maximal throughput a system can handle. + +The write operations to Vespa are always applied in real time: when a write operation is asynchronously acknowledged, the write operation is persisted, fully processed and the result is visible in all subsequent queries. Vespa achieves this by a unique index design, combining in-memory mutable structures with (for full-text) disk-backed posting lists. + +Read more in the [feed sizing doc](/en/performance/sizing-feeding). + +## The document API can also return documents + +In addition to supporting writes, the document/v1 HTTP API can also return single documents by id (get), and stream any selection of a document corpus (visit). Visiting is used for background and one-time jobs such as backup and scraping content for offline machine learning. It is designed to have minimal impact on the running system rather than returning with low latency. Read more in [the document/v1 guide](/en/writing/document-v1-api-guide#data-dump). + diff --git a/mintlify-docs/en/clients/http-best-practices.mdx b/mintlify-docs/en/clients/http-best-practices.mdx new file mode 100644 index 0000000000..c0d7d7c236 --- /dev/null +++ b/mintlify-docs/en/clients/http-best-practices.mdx @@ -0,0 +1,43 @@ +--- +title: HTTP Best Practices +sidebarTitle: "HTTP best practices" +--- + +## Always re-use connections +As connections to a JDisc container cluster are terminated at the individual container nodes, the cost of connection overhead will impact their serving capability. This is especially important for HTTPS/TLS as full TLS handshakes are expensive in terms of CPU cycles. A handshake also entails multiple network round-trips that certainly degrades request latency for new connections. A client instance should therefore re-use HTTPS connections if possible for subsequent requests. + +Note that some client implementation may not re-use connections by default. For instance *Apache HttpClient (Java)* [will by default not re-use connections when configured with a client X.509 certificate](https://stackoverflow.com/a/13049131/1615280). Most programmatic clients require the response content to be fully consumed/read for a connection to be reused. + +## Use multiple connections +Clients performing feed/query must use sufficient number of connections to spread the load evenly among all containers in a cluster. This is due to container clusters being served through a layer 4 load balancer (*Network Load Balancer*). +Too few connections overall may result in an unbalanced workload, and some containers may not receive any traffic at all. This aspect is particular relevant for applications with large container clusters and/or few client instances. + +## Be aware of server-initiated connection termination +Vespa Cloud will terminate idle connections after a timeout and active connections after a max age threshold is exceeded. +The latter is performed gracefully through mechanisms in the HTTP protocol. +* *HTTP/1.1*: A `Connection: close` header is added to the response for the subsequent request received after timeout. +* *HTTP/2*: A `GOAWAY` frame with error code `NO_ERROR (0x0)` is returned for the subsequent request received after timeout. Be aware that some client implementation may not handle this scenario gracefully. + +Both the idle timeout and max age threshold are aggressive to regularly rebalanced traffic. This ensures that new container nodes quickly receives traffic from existing client instances, for example when new resources are introduced by the [autoscaler](/en/operations/autoscaling). + +To avoid connection termination issues, clients should configure client-side idle timeouts to **less than 30 seconds** and connection TTL (max age) to **less than 45 seconds**. Proactively closing connections before the server does helps prevent errors caused by server-initiated terminations. Connections should still be reused for subsequent requests — these timeouts control when idle or long-lived connections are recycled, not disabled. Disabling connection reuse entirely would incur the cost of a new TCP connection with TLS handshake for every request. + +## Prefer HTTP/2 +We recommend *HTTP/2* over *HTTP/1.1*. *HTTP/2* multiplexes multiple concurrent requests over a single connection, +and its binary protocol is more compact and efficient. +See Vespa's documentation on [HTTP/2](/en/performance/http2) for more details. + +## Be deliberate with timeouts and retries +Make sure to configure your clients with sensible timeouts and retry policies. +Too low timeouts combined with aggressive retries may cause havoc on your Vespa application if latency increases due to overload. + +Handle *transient failures* and *partial failures* through a retry strategy with backoff, for instance *capped exponential backoff* with a random *jitter*. +Consider implementing a [*circuit-breaker*](https://martinfowler.com/bliki/CircuitBreaker.html) for failures persisting over a longer time-span. + +Only retry requests on *server errors* - not on *client errors*. A client should typically not retry requests after receiving a `400 Bad Request` response, or retry a TLS connection after handshake fails with client's X.509 certificate being expired. + +Be careful when handling 5xx responses, especially `503 Service Unavailable` and `504 Gateway Timeout`. These responses typically indicate an overloaded system, and blindly retrying without backoff will only worsen the situation. Clients should reduce overall throughput when receiving such responses. + +The same principle applies to `429 Too Many Requests` responses from the [Document v1 API](/en/writing/document-v1-api-guide), which indicates that the client is exceeding the system's feed capacity. Clients should implement strategies such as reducing the request rate by a specific percentage, introducing exponential backoff, or pausing requests for a short duration before retrying. These adjustments help prevent further overload and allow the system to recover. + +For more general advise on retries and timeouts see *Amazon Builder's Library*'s [excellent article](https://aws.amazon.com/builders-library/timeouts-retries-and-backoff-with-jitter/) on the subject. diff --git a/mintlify-docs/en/clients/python-client.mdx b/mintlify-docs/en/clients/python-client.mdx new file mode 100644 index 0000000000..66d0f12153 --- /dev/null +++ b/mintlify-docs/en/clients/python-client.mdx @@ -0,0 +1,4 @@ +--- +title: "Python Client(py Vespa)" +url: "https://vespa-engine.github.io/pyvespa/" +--- \ No newline at end of file diff --git a/mintlify-docs/en/clients/vespa-cli.mdx b/mintlify-docs/en/clients/vespa-cli.mdx new file mode 100644 index 0000000000..8848d85a27 --- /dev/null +++ b/mintlify-docs/en/clients/vespa-cli.mdx @@ -0,0 +1,218 @@ +--- +title: "Vespa CLI" +--- + + +Vespa CLI is the command-line client for Vespa. It is a single binary without any runtime dependencies and is available for Linux, macOS and Windows. With Vespa CLI you can: + +- Clone the [sample applications](https://github.com/vespa-engine/sample-apps/) repository +- Deploy your application to a Vespa installation running locally or remote +- Deploy your application on [Vespa Cloud](/) +- Feed and [query](/en/querying/query-language#query-examples) documents +- Send custom requests with automatic authentication +- Automate deployment operations with [vespa auth api-key](/en/reference/clients/vespa-cli/vespa_auth_api-key) + +Install Vespa CLI: + +- Homebrew: `brew install vespa-cli` +- Mise: `mise use vespa-cli` +- [Download from GitHub](https://github.com/vespa-engine/vespa/releases) + +To learn the basics on how to use Vespa CLI, see the [quick start guide](/en/basics/deploy-an-application-local) or the [cheat sheet below](#cheat-sheet). + +See the [reference documentation](/en/reference/clients/vespa-cli/vespa) for documentation of individual Vespa CLI commands and their options. This documentation is also bundled with CLI and accessible through `vespa help + ` or `man vespa-`. + +MTLS keypair location: + +```bash +$ ls -l .vespa/mytenant.myapp.default/ +total 16 +-rw-r--r-- 1 name staff 3273 Nov 7 08:02 data-plane-private-key.pem +-rw-r--r-- 1 name staff 1697 Nov 7 08:02 data-plane-public-cert.pem +``` + +The `.vespa` directory should be in the home directory or cwd. Remember to run `vespa config set target cloud` for Vespa Cloud endpoints. + +## Cheat sheet + +### Install, configure and run + +```bash +# Install - make sure to upgrade frequently for new features +$ brew install vespa-cli +$ brew upgrade vespa-cli + +# Set home dir to a writeable directory - useful in some container contexts +$ export VESPA_CLI_HOME=/tmp + +# export a token value for dataplane access +$ export VESPA_CLI_DATA_PLANE_TOKEN='value-of-token' + +# Get help +$ vespa document put --help +``` + +### Login and init + +```bash +# Use endpoints on localhost +$ vespa config set target local + +# Use Vespa Cloud +$ vespa config set target cloud + +# Use a browser to log into Vespa Cloud +$ vespa auth login + +# Configure application instance +$ vespa config set application vespa-team.vespacloud-docsearch.default + +# Configure application instance, override global configuration (write to local .vespa) +$ vespa config set --local application vespa-team.vespacloud-docsearch.other +``` + +### Deployment + +```bash expandable +# Deploy an application package from cwd +$ vespa deploy + +# Deploy to a specific zone +$ vespa deploy -z dev.aws-us-east-1c + +# Get the deployed application package as a .zip-file +$ vespa fetch + +# Deploy an application package from cwd to a prod zone with CD pipeline in Vespa Cloud using deployment.xml +$ vespa prod deploy + +# Track deployment to Vespa Cloud status +$ vespa status + +# Validate endpoint status, get endpoint only +$ vespa status --format=plain + +# Remove a deployment from Vespa Cloud +$ vespa destroy -a vespa-team.vespacloud-docsearch.other +``` + +### Documents + +```bash expandable +# Put a document from file +$ vespa document put file-with-one-doc.json + +# Put a document +$ vespa document put id:mynamespace:music::a-head-full-of-dreams --data ' +{ + "fields": { + "album": "A Head Full of Dreams", + "artist": "Coldplay" + } +}' + +# Put a document, ID in JSON +$ vespa document put --data ' +{ + "put": "id:mynamespace:music::a-head-full-of-dreams", + "fields": { + "album": "A Head Full of Dreams", + "artist": "Coldplay" + } +}' + +# Update a document +$ vespa document update id:mynamespace:music::a-head-full-of-dreams --data ' +{ + "fields": { + "album": { + "assign": "A Head Full of Thoughts" + } + } +}' + +# Get one or more documents +$ vespa document get id:mynamespace:music::a-head-full-of-dreams +$ vespa document get id:mynamespace:music::a-head-full-of-dreams id:mynamespace:music::when-we-all-fall-asleep-where-do-we-go + +# Delete a document +$ vespa document remove id:mynamespace:music::a-head-full-of-dreams + +# Feed multiple documents or feed from stdin +$ vespa feed *.jsonl +$ cat docs.json | vespa feed - + +# Feed to Vespa Cloud +$ vespa feed --application mytenant.myapp -target https://b123e1db.b68a1234.z.vespa-app.cloud feedfile.json + +# Print successful and failed operations: +$ vespa feed --verbose docs.json + +# Display a periodic summary every 3 seconds while feeding: +$ vespa feed --progress=3 docs.json + +# Export all documents in "doc" schema, using "default" container cluster +$ vespa visit --zone prod.aws-us-east-1c --cluster default --selection doc + +# Export slice 0 of 10 - approximately 10% of the documents +$ vespa visit --slices 10 --slice-id 0 + +# List IDs - great for counting total number of documents +$ vespa visit --field-set "[id]" + +# Export fields "title" and "term_count" from "doc" schema +$ vespa visit --field-set "doc:title,term_count" + +# Export documents using a selection string +$ vespa visit --selection 'doc.last_updated > now() - 86400' + +# Export all documents in "doc" schema, in "open" namespace +$ vespa visit --selection 'doc AND id.namespace == "open"' + +# Export a specific document, including synthetic (generated) fields +$ vespa visit --selection 'id == "id:en:doc::doc-en-7764"' --field-set '[all]' + +# Copy documents from one cluster to another: +$ vespa visit --target http://localhost:8080 | vespa feed --target http://localhost:9090 - +``` + + +Notes: + +- The input files for `vespa feed` contains either a JSON array of feed operations, or one JSON operation per line ([JSONL](https://jsonlines.org/)). +- The [\](/en/reference/applications/services/container#document-api) must be enabled in the container before documents can be fed or accessed - see [example](https://github.com/vespa-engine/sample-apps/blob/master/album-recommendation/app/services.xml). +- For automation, see example usage in a [GitHub Action](https://github.com/vespa-engine/documentation/blob/master/.github/workflows/feed.yml). This action uses security credentials in `VESPA_CLI_DATA_PLANE_CERT` and `VESPA_CLI_DATA_PLANE_KEY` for easy security management in GitHub. + + +### Queries + +```bash expandable +# Query for all documents in all schemas / sources +$ vespa query 'yql=select * from sources * where true' + +# YQL parameter is assumed if missing - this is equivalent to the above +$ vespa query 'select * from sources * where true' + +# Query with an extra query API parameter +$ vespa query 'select * from music where album contains "head"' \ + hits=5 + +# Use verbose to print a curl equivalent, too +$ vespa query -v 'select * from music where album contains "head"' hits=5 + +# Query a different port (after modifying http server port) +$ vespa query 'select * from sources * where true' -t 'http://127.0.0.1:9080' + +# Use a query file - useful for large queries, e.g., when using query vectors +$ vespa query --file queries-vector.json +``` +Example query file: + +```json +{ + "yql": "select product_id, title from products where {totalTargetHits: 200}nearestNeighbor(dense_embedding, q_vector)", + "input.query(q_vector)": [-0.050548091530799866, ... ,0.028366032987833023], + "ranking": "vector_distance" +} +``` \ No newline at end of file diff --git a/mintlify-docs/en/clients/vespa-feed-client.mdx b/mintlify-docs/en/clients/vespa-feed-client.mdx new file mode 100644 index 0000000000..47b8f2a2ff --- /dev/null +++ b/mintlify-docs/en/clients/vespa-feed-client.mdx @@ -0,0 +1,145 @@ +--- +title: "vespa-feed-client" +sidebarTitle: "Java feed client" +--- + +- Java library and command line client for feeding document operations using [Document v1](/en/writing/document-v1-api-guide) over [HTTP/2](/en/performance/http2). +- Asynchronous, high-performance Java implementation, with retries and dynamic throttling. +- Supports a JSON array of feed operations, as well as [JSONL](https://jsonlines.org): one operation JSON per line. + +## Installing + +### Java library + +The Java library is available as a [Maven JAR artifact](https://search.maven.org/search?q=g:com.yahoo.vespa%20a:vespa-feed-client) at Maven Central. It requires minimum JDK17. + +Find an example application using this client at [client-java](https://github.com/vespa-engine/sample-apps/blob/master/examples/clients/client-java/README.md). + +### Command line client + +Two alternatives: + +- Install [_vespa-clients_/_vespa_](/en/operations/self-managed/build-install) RPM package. +- Download [vespa-feed-client **zip** artifact](https://search.maven.org/artifact/com.yahoo.vespa/vespa-feed-client-cli) from Maven Central. + +Download example: + +```bash +$ F_REPO="https://repo1.maven.org/maven2/com/yahoo/vespa/vespa-feed-client-cli" && \ + F_VER=$(curl -Ss "${F_REPO}/maven-metadata.xml" | sed -n 's/.*\(.*\)<.*>/\1/p') && \ + curl -SsLo vespa-feed-client-cli.zip ${F_REPO}/${F_VER}/vespa-feed-client-cli-${F_VER}-zip.zip && \ + unzip -o vespa-feed-client-cli.zip +``` + +## Enable feed endpoint in Vespa + +Requirements: + +- [Document API must be enabled on container](/en/reference/applications/services/container#document-api). + +HTTP/2 over [TLS](/en/reference/applications/services/http#ssl) is optional but recommended from a security perspective. + +Example _services.xml_ with TLS: + +```xml highlight= {4-13} + + + + + + + /path/to/private-key.pem + /path/to/certificate.pem + /path/ca-certificates.pem + + + + + + + +``` + +Example _services.xml_ without TLS: + +```xml highlight= {4} + + + + + + +``` + +## Using the client + +The Javadoc for the programmatic API is available at [javadoc.io](https://javadoc.io/doc/com.yahoo.vespa/vespa-feed-client-api). See output of `$ vespa-feed-client --help` for usage. + +Use `--speed-test` for bandwidth testing. + +### Example Java + +Add _vespa-feed-client_ as dependency to your Maven (or other build system using Maven for dependency management): + +``` + + com.yahoo.vespa + vespa-feed-client + 8.689.26 + +``` + +Code examples are listed in the [vespa-feed-client source code](https://github.com/vespa-engine/vespa/tree/master/vespa-feed-client-api/src/test/java/ai/vespa/feed/client/examples) on GitHub. + +- [JsonFileFeederExample.java](https://github.com/vespa-engine/vespa/blob/master/vespa-feed-client-api/src/test/java/ai/vespa/feed/client/examples/JsonFileFeederExample.java) +- [JsonStreamFeederExample.java](https://github.com/vespa-engine/vespa/blob/master/vespa-feed-client-api/src/test/java/ai/vespa/feed/client/examples/JsonStreamFeederExample.java) +- [SimpleExample.java](https://github.com/vespa-engine/vespa/blob/master/vespa-feed-client-api/src/test/java/ai/vespa/feed/client/examples/SimpleExample.java) + +### Example command line + +HTTP/2 over TLS: + +```bash +$ vespa-feed-client \ + --connections 4 \ + --certificate cert.pem --private-key key.pem --ca-certificates ca.pem \ + --file /path/to/json/file \ + --endpoint https://container-endpoint:443/ +``` + +The input must be either a proper JSON array, or a series, of JSON feed operations ([JSONL](https://jsonlines.org)), in the format described for the Vespa feed client [here](../reference/schemas/document-json-format#document-operations). + +HTTP/2 without TLS: + +```bash +$ vespa-feed-client \ + --connections 4 \ + --file /path/to/json/file \ + --endpoint http://container-endpoint:8080/ +``` + +## Tuning for multi-worker pipelines + +A common pattern is feeding from an [Apache Beam](https://beam.apache.org/) topology (e.g., [Google Cloud Dataflow](https://docs.cloud.google.com/dataflow/docs/overview)). It is important to balance the number of workers and the connection settings. + +As each of the workers initializes its own `FeedClient` instance, the default settings can create too many connections. In this example we assume 128 workers and 10 Vespa Container nodes. With defaults (8 connections per endpoint, 128 max streams per connection), 128 workers opens 1,024 connections - each requiring a TLS handshake to the endpoint - which is a major source of container CPU overhead. + +Recommended configuration per worker ([Javadoc](https://javadoc.io/doc/com.yahoo.vespa/vespa-feed-client-api/latest/ai/vespa/feed/client/package-summary.html)): + +```java +FeedClient client = FeedClientBuilder.create(endpoint) + .setConnectionsPerEndpoint(1) + .setMaxStreamPerConnection(maxStreams) + .setInitialInflightFactor(factor) + .build(); +``` + +- `setConnectionsPerEndpoint(1)`: One connection per worker gives 128 total, which is more than sufficient for 10 container nodes. +- `setMaxStreamPerConnection(maxStreams)`: Calculate based on the target feed rate and total number of workers. For example, if the target is 50k docs/sec across 128 workers, each worker needs ~390 docs/sec. With typical per-document latency of 5-10 ms, each worker needs ~2-4 concurrent streams. +- `setInitialInflightFactor(factor)`: The dynamic throttler starts at a low inflight count and slowly ramps up via random walk. If you observe slow ramp-up at the start of a feed job, set this to a higher value (e.g., 4-8) to start closer to the optimal inflight level. The factor multiplies the minimum inflight (2 x connectionsPerEndpoint x endpoints), so with 1 connection and factor 8, you'd start at 16 inflight instead of 2. + + + **Important:** Each worker should create a single `FeedClient` instance and reuse it for the lifetime of the worker. Creating new instances per batch or per document group defeats connection reuse and prevents the throttler from converging. + + +Also, use vespa-feed-client 8.657 or later, for the latest improvements to connection handling and stability. \ No newline at end of file diff --git a/mintlify-docs/en/cloud/image.png b/mintlify-docs/en/cloud/image.png new file mode 100644 index 0000000000..497d81f650 Binary files /dev/null and b/mintlify-docs/en/cloud/image.png differ diff --git a/mintlify-docs/en/cloud/quota.mdx b/mintlify-docs/en/cloud/quota.mdx new file mode 100644 index 0000000000..33d1e7bae6 --- /dev/null +++ b/mintlify-docs/en/cloud/quota.mdx @@ -0,0 +1,15 @@ +--- +title: Quota +description: "Tenants in Vespa Cloud have a quota that limits the amount of resources a tenant can use. The quota is expressed as *$/hour*, and is based on the maximum possible cost for a Vespa application." +--- + +That means, if you are using [autoscaling](/en/operations/autoscaling), the quota it will use is based on the maximum configured size of the application. + +You can see how much quota your applications are using in the Vespa Cloud console. The quota a tenant has depends on the [plan](https://vespa.ai/pricing/) the tenant is on: + +| Plan | Quota | +|:-----|:-----| +| Trial | \$2/hour | +| All other plans | \$10/hour | + +Contact [Support](https://vespa.ai/support) to change the quota. \ No newline at end of file diff --git a/mintlify-docs/en/cloud/support.mdx b/mintlify-docs/en/cloud/support.mdx new file mode 100644 index 0000000000..112e476f55 --- /dev/null +++ b/mintlify-docs/en/cloud/support.mdx @@ -0,0 +1,60 @@ +--- +title: "Cloud and Enterprise support" +sidebarTitle: "Support" +--- + +Support options and other resources like status tracking are found at [Vespa Support](https://vespa.ai/support/). + +## Create a support case + +Open a support case using the support portal at [support.vespa.ai](https://support.vespa.ai/). + +Use this for: + +- Production support (reads and writes) +- Deployment support (making changes to applications) +- Technical support (general, including user access) +- Feature requests + +Use the support portal to track your ongoing cases. + +In case of any problems with the support portal itself, mail [support@vespa.ai](mailto:support@vespa.ai). + +You must be a [registered user](https://console.vespa-cloud.com/link/tenant/account/users) in your organization's [tenant](/en/learn/tenant-apps-instances.html) in the Vespa Console to create a support case. + +## Escalate a support case + +Support response times are defined for the different [support levels](https://cloud.vespa.ai/price-calculator?_gl=1*yoo5sb*_gcl_au*ODE0ODM4MTI2LjE3Nzk3MjQ3OTY.). + +To escalate a support case for response within defined SLA, first create the case, then use "Escalate to oncall" to page the Vespa Team: + + +![Example support case](/en/cloud/image.png) + + +An escalation will be acknowledged in the support case ticket within support SLA. Note that non-escalated cases will be handled during regular business hours. + +Depending on support level, your organization might have a shared Slack channel with the Vespa Team. Such a channel does not have an SLA, and does not replace the need to create a support ticket. The Slack channel is used on a best-effort basis and can be a useful tool in the support case process. + +## Incident management + +Depending on the severity of a support case, the Vespa Team might create an incident. + +A customer can request the incident process to be initiated in a support case. + +### Incident process + +An incident creation triggers the incident process. When the incident is resolved, a root cause analysis (RCA) is performed. + +During an incident, the support case is updated with relevant status at regular intervals: + +- The teams can mutually agree to use a shared Slack-channel for status and coordinated work - in this case, the support case will have a link to the Slack channel +- The teams can mutually agree on next steps and timing, in the support case ticket or Slack channel. + +### Post-mortem + +The incident process includes a post-mortem event. Post-mortems are held weekly, on cases that are closed at least two days before the post-mortem, and all relevant information for the post-mortem is made available. + +Post-mortems are internal to Vespa.ai. A customer can request a joint post-mortem meeting after the Vespa.ai-internal post-mortem is completed. + +A Post-mortem report is shared with the customer within 7 days of the post-mortem event. \ No newline at end of file diff --git a/mintlify-docs/en/content/attributes.mdx b/mintlify-docs/en/content/attributes.mdx new file mode 100644 index 0000000000..dd4e15ddf7 --- /dev/null +++ b/mintlify-docs/en/content/attributes.mdx @@ -0,0 +1,292 @@ +--- +title: "Document attributes" +description: "An *attribute* is a [schema](/en/reference/schemas/schemas#attribute) keyword, specifying the indexing for a field:" +--- + +```txt +field price type int { +indexing: attribute +} +``` + +Attribute properties and use cases: + +- Flexible [match modes](/en/reference/schemas/schemas#match) including exact match, prefix match, and case-sensitive matching, but not text matching (tokenization and linguistic processing). +- High sustained update rates (avoiding read-apply-write patterns). Any mutating operation against an attribute field is written to Vespa's [transaction log](/en/content/proton#transaction-log) and persisted, but appending to the log is sequential access, not random. Read more in [partial updates](/en/writing/partial-updates). +- Instant query updates - values are immediately searchable. +- [Document Summaries](/en/querying/document-summaries) are memory-only operations if all fields are attributes. +- [Numerical range queries](/en/reference/querying/yql#numeric). + + ```txt + where price > 100 + ``` + +- [Grouping](/en/querying/grouping) - aggregate results into groups - it is also great for generating diversity in results. + + ```txt + all(group(customer) each(max(3) each(output(summary())))) + ``` + +- [Ranking](/en/basics/ranking) - use attribute values directly in rank functions. + + ```txt + rank-profile rank_fresh { + first-phase { + expression { freshness(timestamp) } + } + } + ``` + +- [Sorting](/en/reference/querying/sorting-language) - order results by attribute value. + + ```txt + order by price asc, release_date desc + ``` + +- [Parent/child](/en/schemas/parent-child) - import attribute values from global parent documents. + + ```txt + import field advertiser_ref.name as advertiser_name {} + ``` + +The other field option is *index* - use [index](/en/content/proton#index) for fields used for [text search](/en/querying/text-matching), with [stemming](/en/linguistics/linguistics-opennlp#stemming) and [normalization](/en/linguistics/linguistics-opennlp#normalization). + +An attribute is an in-memory data structure. Attributes speed up query execution and [document updates](/en/writing/partial-updates), trading off memory. As data structures are regularly optimized, consider both static and temporary resource usage - see [attribute memory usage](#attribute-memory-usage) below. Use attributes in document summaries to limit access to storage to generate result sets. + + +![](/assets/img/attributes-update.svg) + + +Configuration overview: + +| | || +| --- | --- | --- | +| **fast-search** | Also see the [reference](/en/reference/schemas/schemas#attribute). Add an [index structure](#index-structures) to improve query performance: ``` field titles type array { indexing : summary \| attribute attribute: fast-search }``` | +| **fast-access** | For high-throughput updates, all nodes with a replica should have the attribute loaded in memory. Depending on replication factor and other configuration, this is not always the case. Use [fast-access](/en/reference/schemas/schemas#attribute) to increase feed rates by having replicas on all nodes in memory - see the [reference](/en/reference/schemas/schemas#attribute) and [sizing feeding](/en/performance/sizing-feeding). ``` field titles type array { indexing : summary \| attribute attribute: fast-access }``` | +| **distance-metric** | Features like [nearest neighbor search](/en/querying/nearest-neighbor-search) require a [distance-metric](/en/reference/schemas/schemas#distance-metric), and can also have an `hsnw index` to speed up queries. Read more in [approximate nearest neighbor](/en/querying/approximate-nn-hnsw). Pay attention to the field's `index` setting to enable the index: ``` field image_sift_encoding type tensor(x\[128\]) { indexing: summary \| attribute \| index attribute { distance-metric: euclidean } index { hnsw { max-links-per-node: 16 neighbors-to-explore-at-insert: 500 } } }``` | + +The attribute field's data type decides which data structures are used by the attribute to store values for that field across all documents on a content node. For some data types, a combination of data structures is used: + +- *Attribute Multivalue Mapping* stores arrays of values for array and weighed set types. +- *Attribute Enum Store* stores unique strings for all string attributes and unique values for attributes with [fast-search](/en/content/attributes#fast-search). +- *Attribute Tensor Store* stores tensor values for all tensor attributes. + +In the following illustration, a row represents a document, while a named column represents an attribute. + + +![](/assets/img/attributes.svg) + + +Attributes can be: + +| Type | Size | Description | +| :--- | :--- | :--- | +| Single-valued | Fixed | Like the "A" attribute, example `int`. The element size is the size of the type, like 4 bytes for an integer. A memory buffer (indexed by Local ID) holds all values directly. | +| Multi-valued | Fixed | Like the "B" attribute, example `array`. A memory buffer (indexed by Local ID) is holding references (32 bit) to where in the *Multivalue Mapping* the arrays are stored. The *Multivalue Mapping* consists of multiple memory buffers, where arrays of the same size are co-located in the same buffer. | +| Multi-valued | Variable | Like the "B" attribute, example `array`. A memory buffer (indexed by Local ID) is holding references (32 bit) to where in the *Multivalue Mapping* the arrays are stored. The unique strings are stored in the *Enum Store*, and the arrays in the *Multivalue Mapping* stores the references (32 bit) to the strings in the *Enum Store*. The *Enum Store* consists of multiple memory buffers. | +| Single-valued | Variable | Like the "C" attribute, example `string`. A memory buffer (indexed by Local ID) is holding references (32 bit) to where in the *Enum Store* the strings are stored. | +| Tensor | Fixed / Variable | Like the "D" attribute, example `tensor(x{},y[64])`. A memory buffer (indexed by Local ID) is holding references (32 bit) to where in the *Tensor Store* the tensor values are stored. The memory layout in the *Tensor Store* depends on the tensor type. | + +The "A", "B", "C" and "D" attribute memory buffers have attribute values or references in Local ID (LID) order - see [document meta store](#document-meta-store). + +When updating an attribute, the full value is written. This also applies to [multivalue](/en/basics/schemas#document-fields) fields - example adding an item to an array: + +1. Space for the new array is reserved in a memory buffer +2. The current value is copied +3. The new element is written + +This means that larger fields will copy more data at updates. It also implies that updates to [weighted sets](/en/reference/schemas/schemas#weightedset) are faster when using numeric keys (less memory and easier comparisons). + +Data stored in the *Multivalue Mapping*, *Enum Store* and *Tensor Store* is referenced using 32 bit references. This address space can go full, and then feeding is blocked - [learn more](/en/writing/feed-block). For array or weighted set attributes, the max limit on the number of documents that can have the same number of values is approx 2 billion per node. For string attributes or attributes with [fast-search](/en/content/attributes#fast-search), the max limit on the number of unique values is approx 2 billion per node. + +## Index structures + +Without `fast-search`, attribute access is a memory lookup, being one value or all values, depending on query execution. An attribute is a linear array-like data structure - matching documents potentially means scanning *all* attribute values. + +Setting [fast-search](/en/reference/schemas/schemas#attribute) creates an index structure for quicker lookup and search. This consists of a [dictionary](/en/reference/schemas/schemas#dictionary) pointing to posting lists. This uses more memory, and also more CPU when updating documents. It increases steady state memory usage for all attribute types and also add initialization overhead for numeric types. + +The default dictionary is a b-tree of attribute *values*, pointing to an *occurrence* b-tree (posting list) of local doc IDs for each value, exemplified in the A-attribute below. Using `dictionary: hash` on the attribute generates a hash table of attributes values pointing to the posting lists, as in the C-attribute (short posting lists are represented as arrays instead of b-trees): + + +![](/assets/img/attributes-indexes.svg) + + +Notes: + +- If a value occurs in many documents, the *occurrence* b-tree grows large. For such values, a boolean-occurrence list (i.e. bitvector) is generated in addition to the b-tree. +- Setting `fast-search` is not observable in the files on disk, other than size. +- `fast-search` causes a memory increase even for empty fields, due to the extra index structures created. E.g. single value fields will have the "undefined value" when empty, and there is a posting list for this value. +- The *value* b-tree enables fast range-searches in numerical attributes. This is also available for `hash`-based dictionaries, but slower as a full scan is needed. + +Using `fast-search` has many implications, read more in [when to use fast-search](/en/performance/feature-tuning#when-to-use-fast-search-for-attribute-fields). + +## Attribute memory usage + +Attribute structures are regularly optimized, and this causes temporary resource usage - read more in [maintenance jobs](/en/content/proton#proton-maintenance-jobs). The memory footprint of an attribute depends on a few factors, data type being the most important: + +- Numeric (int, long, byte, and double) and Boolean (bit) types - fixed length and fix cost per document +- String type - the footprint depends on the length of the strings and how many unique strings that needs to be stored. + +Collection types like array and weighted sets increases the memory usage some, but the main factor is the average number of values per document. String attributes are typically the largest attributes, and requires most memory during initialization - use boolean/numeric types where possible. Example, refer to formulas below: + +```js +schema foo { +document bar { + field titles type array { + indexing: summary | attribute + } + } +} +``` + +- Assume average 10 values per document, average string length 15, 100k unique strings and 20M documents. +- Steady state memory usage is approx 1 GB \(20M\*4\*(6/5) + 20M\*10\*4\*(6/5) + 100k\*(15+1+4+4)\*(6/5)\). +- During initialization (loading attribute from disk) an additional 2.4 GB is allocated \(20M\*10\*(4+4+4)\), for each value: + +- local document ID +- enum value +- weight +- Increasing the average number of values per document to 20 (double) will also double the memory footprint during initialization (4.8 GB). + +When doing the capacity planning, keep in mind the maximum footprint, which occurs during initialization. For the steady state footprint, the number of unique values is important for string attributes. + +Check the [Example attribute sizing spreadsheet](/assets/attribute-memory-Vespa.xls), with various data types and collection types. It also contains estimates for how many documents a 48 GB RAM node can hold, taking initialization into account. + +[Multivalue](/en/basics/schemas#document-fields) attributes use an adaptive approach in how data is stored in memory, and up to 2 billion documents per node is supported. + + +**Pro-tip:** + +The proton */state/v1/* interface can be explored for attribute memory usage. This is an undocumented debug-interface, subject to change at any moment - example: *http://localhost:19110/state/v1/custom/component/documentdb/music/subdb/ready/attribute/artist* + + +## Attribute file usage + +Attribute data is stored in two locations on disk: + +- The attribute store in memory, which is regularly flushed to disk. At startup, the flushed files are used to quickly populate the memory structures, resulting in a much quicker startup compared to generating the attribute store from the source in the document store. The attribute store will temporarily double its disk usage when generating a new flush file, see [attribute flush](/en/content/proton#attribute-flush). +- The document store on disk. Documents here are used to (re)generate index structures, as well as being the source for replica generation across nodes. Note that the attribute data is stored in the document store regardless of the [summary](/en/querying/document-summaries) configuration. + +The different field types use various data types for storage, see below, a conservative rule of thumb for steady-state disk usage is hence twice the data size. + +## Sizing + +Attribute sizing is not an exact science but rather an approximation. The reason is that they vary in size. Both the number of documents, number of values, and uniqueness of the values are variable. The components of the attributes that occupy memory are: + +| Abbreviation | Concept | Comment | +| :--- | :--- | :--- | +| D | Number of documents | Number of documents on the node, or rather the maximum number of local document IDs allocated | +| V | Average number of values per document | Only applicable for arrays and weighted sets | +| U | Number of unique values | Only applies for strings or if [fast-search](/en/reference/schemas/schemas#attribute) is set | +| FW | Fixed data width | sizeof(T) for numerics, 1 byte for strings, 1 bit for boolean | +| WW | Weight width | Width of the weight in a weighted set, 4 bytes. 0 bytes for arrays. | +| EIW | Enum index width | Width of the index into the enum store, 4 bytes. Used by all strings and other attributes if [fast-search](/en/reference/schemas/schemas#attribute) is set | +| VW | Variable data width | strlen(s) for strings, 0 bytes for the rest | +| PW | Posting entry width | Width of a posting list entry, 4 bytes for singlevalue, 8 bytes for array and weighted sets. Only applies if [fast-search](/en/reference/schemas/schemas#attribute) is set. | +| PIW | Posting index width | Width of the index into the store of posting lists; 4 bytes | +| MIW | Multivalue index width | Width of the index into the multivalue mapping; 4 bytes | +| ROF | Resize overhead factor | Default is 6/5. This is the average overhead in any dynamic vector due to resizing strategy. Resize strategy is 50% indicating that structure is 5/6 full on average. | + +### Components + +| Component | Formula | Approx Factor | Applies to | +| :--- | :--- | :--- | :--- | +| Document vector | D * ((FW or EIW) or MIW) | ROF | FW for singlevalue numeric attributes and MIW for multivalue attributes. EIW for singlevalue string or if the attribute is singlevalue fast-search | +| Multivalue mapping | D * V * ((FW or EIW) + WW) | ROF | Applicable only for array or weighted sets. EIW if string or fast-search | +| Enum store | U * ((FW + VW) + 4 + ((EIW + PIW) or EIW)) | ROF | Applicable for strings or if fast-search is set. (EIW + PIW) if fast-search is set, EIW otherwise. | +| Posting list | D * V * PW | ROF | Applicable if fast-search is set | + +### Variants + +| Type | Components | Formula | +| :--- | :--- | :--- | +| Numeric singlevalue plain | Document vector | D * FW * ROF | +| Numeric multivalue value plain | Document vector, Multivalue mapping | D * MIW * ROF + D * V * (FW+WW) * ROF | +| Numeric singlevalue fast-search | Document vector, Enum store, Posting List | D * EIW * ROF + U * (FW+4+EIW+PIW) * ROF + D * PW * ROF | +| Numeric multivalue value fast-search | Document vector, Multivalue mapping, Enum store, Posting List | D * MIW * ROF + D * V * (EIW+WW) * ROF + U * (FW+4+EIW+PIW) * ROF + D * V * PW * ROF | +| Singlevalue string plain | Document vector, Enum store | D * EIW * ROF + U * (FW+VW+4+EIW) * ROF | +| Singlevalue string fast-search | Document vector, Enum store, Posting List | D * EIW * ROF + U * (FW+VW+4+EIW+PIW) * ROF + D * PW * ROF | +| Multivalue string plain | Document vector, Multivalue mapping, Enum store | D * MIW * ROF + D * V * (EIW+WW) * ROF + U * (FW+VW+4+EIW) * ROF | +| Multivalue string fast-search | Document vector, Multivalue mapping, Enum store, Posting list | D * MIW * ROF + D * V * (EIW+WW) * ROF + U * (FW+VW+4+EIW+PIW) * ROF + D * V * PW * ROF | +| Boolean singlevalue | Document vector | D * FW * ROF | + +## Paged attributes + +Regular attribute fields are guaranteed to be in-memory, while the [paged](/en/reference/schemas/schemas#attribute) attribute setting allows paging the attribute data out of memory to disk. The `paged` setting is *not* supported for the following types: + +- [tensor](/en/reference/schemas/schemas#tensor) with [fast-rank](/en/reference/schemas/schemas#attribute). +- [predicate](/en/reference/schemas/schemas#predicate). + +For attribute fields using [fast-search](/en/reference/schemas/schemas#attribute), the memory needed for dictionary and index structures are never paged out to disk. + +Using the `paged` setting for attributes is an alternative when there are memory resource constraints and the attribute data is only accessed by a limited number of hits per query during ranking. E.g. a dense tensor attribute which is only used during a [re-ranking phase](/en/ranking/phased-ranking), where the number of attribute accesses are limited by the re-ranking phase count. + +For example using a second phase [total-rerank-count](/en/reference/schemas/schemas#secondphase-total-rerank-count) of 100 will limit the maximum number of page-ins/disk access per query to 100. Running at 100 QPS would need up to 10K disk accesses per second. This is the worst case if none of the accessed attribute data were paged into memory already. This depends on access locality and memory pressure (size of the attribute data versus available memory). + +In this example, we have a dense tensor with 1024 [int8](/en/reference/ranking/tensor#tensor-type-spec) values. The tensor attribute is only accessed during re-ranking (second-phase ranking expression): + +```txt +schema foo { + document foo { + field tensordata type tensor(x[1024]) { + indexing: attribute + attribute: paged + } + } + rank-profile foo { + first-phase {} + second-phase { + total-rerank-count: 100 + expression: sum(attribute(tensordata)) + } + } +} +``` + +For some use cases where serving latency SLA is not strict and query throughput is low, the `paged` attribute setting might be a tuning alternative, as it allows storing more data per node. + +### Paged attributes disadvantages + +The disadvantages of using *paged* attributes are many: + +- Unpredictable query latency as attribute access might touch disk. Limited queries per second throughput per node (depends on the locality of document re-ranking requests). +- Paged attributes are implemented by file-backed memory mappings. The performance depends on the [Linux virtual memory management](https://tldp.org/LDP/tlk/mm/memory.html) ability to page data in and out. Using many threads per search/high query throughput might cause high system (kernel) CPU and system unresponsiveness. +- The content node's total memory utilization will be close to 100% when using paged attributes. It's up to the Linux kernel to determine what part of the attribute data is paged into memory based on access patterns. A good understanding of how the Linux virtual memory management system works is recommended before enabling paged attributes. +- The [memory usage metrics](/en/performance/sizing-search#metrics-for-vespa-sizing) from content nodes are not reflecting the reality when using paged attributes. They can indicate a usage that is much higher than the available memory on the node. This is because attribute memory usage is reported as the amount of data contained in the attribute, and whether this data is paged out to disk is controlled by the Linux kernel. +- Using paged attributes doubles the disk usage of attribute data. For example if the original attribute size is 92 GB (100M documents of the above 1024 int8 per document schema), using the `paged` setting will double the attribute disk usage to close to 200 GB. +- Changing the `paged` setting (e.g. removing the option) on a running system might cause hard out-of-memory situations as without `paged`, the content nodes will attempt loading the attribute into memory without the option for page outs. +- Using a paged attribute in [first-phase](/en/ranking/phased-ranking) ranking can result in extremely high query latency if a large amount of the corpus is retrieved by the query. The number of disk accesses will, in the worst case, be equal to the number of hits the query produces. A similar problem can occur if running a query that searches a paged attribute. +- Using `paged` in combination with [HNSW indexing](/en/querying/approximate-nn-hnsw) is *strongly* discouraged. *HNSW* indexing also searches and reads tensors during indexing, causing random access during feeding. Once the system memory usage reaches 100%, the Linux kernel will start paging pages in and out of memory. This can cause a high system (kernel) CPU and slows down HNSW indexing throughput significantly. + +## Mutable attributes + +[Mutable attributes](/en/reference/schemas/schemas#mutate) is document metadata for matching and ranking performance per document. + +The attribute values are mutated as part of query execution, as defined in rank profiles - see [rank phase statistics](/en/ranking/phased-ranking#rank-phase-statistics) for details. + +## Document meta store + +The document meta store is an in-memory data structure for all documents on a node. It is an *implicit attribute*, and is [compacted](/en/content/proton#lid-space-compaction) and [flushed](/en/content/proton#attribute-flush). Memory usage for applications with small documents / no other attributes can be dominated by this attribute. + +The document meta store scales linearly with number of documents - using approximately 30 bytes per document. The metric *content.proton.documentdb.ready.attribute.memory_usage.allocated_bytes* for `"field": "[documentmetastore]"` is the size of the document meta store in memory - use the [metric API](/en/reference/api/state-v1#state-v1-metrics) to find the size - in this example, the node has 9M ready documents with 52 bytes in memory per document: + +```json highlight= {10,14} +{ + "name": "content.proton.documentdb.ready.attribute.memory_usage.allocated_bytes", + "description": "The number of allocated bytes", + "values": { + "average": 4.69736008E8, + "count": 12, + "rate": 0.2, + "min": 469736008, + "max": 469736008, + "last": 469736008 + }, + "dimensions": { + "documenttype": "doctype", + "field": "[documentmetastore]" + } +}, +``` + +The above is for the *ready* documents, also check *removed* and *notready* - refer to [sub-databases](/en/content/proton#sub-databases). diff --git a/mintlify-docs/en/content/buckets.mdx b/mintlify-docs/en/content/buckets.mdx new file mode 100644 index 0000000000..beb55d1346 --- /dev/null +++ b/mintlify-docs/en/content/buckets.mdx @@ -0,0 +1,98 @@ +--- +title: "Buckets" +description: "The content layer splits the document space into chunks called *buckets*, and algorithmically maps documents to buckets by their id. The cluster automatically splits and joins buckets to maintain a uniform distribution across all nodes and to keep bucket sizes within configurable limits." +--- + +Documents have string identifiers that maps to a 58 bit numeric location. A bucket is defined as all the documents that shares a given amount of the least significant bits within the location. The amount of bits used controls how many buckets will exist. For instance, if a bucket contains all documents whose 8 LSB bits is 0x01, the bucket can be split in two by using the 9th bit in the location to split them. Similarly, buckets can be joined by requiring one less bit in common. + +## Distribution + +Distribution happens in several layers. + + - Documents map to 58 bit numeric locations. + - Locations map to buckets + - Buckets map to distributors responsible for handling requests related to those buckets. + - Buckets map to content nodes responsible for storing replicas of buckets. + +### Document to location distribution + +Document identifiers use [document identifier schemes](/en/schemas/documents) to map documents to locations. This way it is possible to co-locate data within buckets by enforcing some documents to have common LSB bits. Specifying a group or numeric value with the n and g options overrides the 32 LSB bits of the location. Only use when required, e.g. when using streaming search for personal search. + +### Location to bucket distribution + +The cluster state contains a distribution bit count, which is the amount of location bits to use to generate buckets which can be mapped to distributors. + +The cluster state may change the number of distribution bits to adjust the number of buckets distributed at this level. When adding more nodes to the cluster, the number of buckets increases in order for the distribution to remain uniform. + +Altering the distribution bit count causes a redistribution of all buckets. + +If locations have been overridden to co-localize documents into few units, the distribution of documents into these buckets may be skewed. + +### Bucket to distributor distribution + +Buckets are mapped to distributors using the ideal state algorithm. + +### Bucket to content node distribution + +Buckets are mapped to content nodes using the ideal state algorithm. As the content nodes persist data, changing bucket ownership takes more time/resources than on the distributors. + +There is usually a replica of a bucket on the same content node as the distributor owning the bucket, as the same algorithm is used. + +The distributors may split the buckets further than the distribution bit count indicates, allowing more units to be distributed among the content nodes to create a more even distribution, while not affecting routing from client to distributors. + +## Maintenance operations + +The content layer defines a set of maintenance operations to keep the cluster balanced. Distributors schedule maintenance operations and issue them to content nodes. Maintenance operations are typically not high priority requests. Scheduling a maintenance operation does not block any external operations. + +| | | +| :--- | :--- | +| **Split bucket** | Split a bucket in two, by enforcing the documents within the new buckets to have more location bits in common. Buckets are split either because they have grown too big, or because the cluster wants to use more distribution bits. | +| **Join bucket** | Join two buckets into one. If a bucket has been previously split due to being large, but documents have now been deleted, the bucket can be joined again. | +| **Merge bucket** | If there are multiple replicas of a bucket, but they do not store the same set of versioned documents, _merge_ is used to synchronize the replicas. A special case of a merge is a one-way merge, which may be done if some of the replicas are to be deleted right after the merge. Merging is used not only to fix inconsistent bucket replicas, but also to move buckets between nodes. To move a bucket, an empty replica is created on the target node, a merge is executed, and the source bucket is deleted. | +| **Create bucket** | This operation exist merely for the distributor to notify a content node that it is now to store documents for this bucket too. This allows content nodes to refuse operations towards buckets it does not own. The ability to refuse traffic is a safeguard to avoid inconsistencies. If a client talks to a distributor that is no longer working correctly, we rather want its requests to fail than to alter the content cluster in strange ways. | +| **Delete bucket** | Drop stored state for a bucket and reject further requests for it | +| **(De)activate bucket** | Activate bucket for search results - refer to [bucket management](/en/content/proton#bucket-management) | +| **Garbage collections** | If configured, documents are periodically garbage collected through background maintenance operations. | + +### Bucket split size + +The distributors may split existing buckets further to keep bucket sizes at manageable levels, or to ensure more units to split among the backends and their partitions. + +Using small buckets, the distribution will be more uniform and bucket operations will be smaller. Using large buckets, less memory is needed for metadata operations and bucket splitting and joining is less frequent. + +The size limits may be altered by configuring [bucket splitting](/en/reference/applications/services/content#bucket-splitting). + +## Document to bucket distribution + +Each document has a document identifier following a document identifier [uri scheme](/en/schemas/documents). From this scheme a 58 bit numeric _location_ is generated. Typically, all the bits are created from an MD5 checksum of the whole identifier. + +Schemes specifying a _groupname_, will have the LSB bits of the location set to a hash of the _groupname_. Thus, all documents belonging to that group will have locations with similar least significant bits, which will put them in the same bucket. If buckets end up split far enough to use more bits than the hash bits overridden by the group, the data will be split into many buckets, but each will typically only contain data for that group. + +MD5 checksums maps document identifiers to random locations. This creates a uniform bucket distribution, and is default. For some use cases, it is better to co-locate documents, optimizing grouped access - an example is personal documents. By enforcing some documents to map to similar locations, these documents are likely to end up in the same actual buckets. There are several use cases for where this may be useful: + +- When migrating documents for some entity between clusters, this may be implemented more efficient if the entity is contained in just a few buckets rather than having documents scattered around all the existing buckets. +- If operations to the cluster is clustered somehow, clustering the documents equally in the backend may make better use of caches. For instance, if a service stores data for users, and traffic is typically created for users at short intervals while the users are actively using the service, clustering user data may allow a lot of the user traffic to be easily cached by generic bucket caches. + +If the `n=` option is specified, the 32 LSB bits of the given number overrides the 32 LSB bits of the location. If the `g=` option is specified, a hash is created of the group name, the hash value is then used as if it were specified with `n=`. When the location is calculated, it is mapped to a bucket. Clients map locations to buckets using [distribution bits](#location-to-bucket-distribution). + +Distributors map locations to buckets by searching their bucket database, which is sorted in inverse location order. The common case is that there is one. If there are several, there is currently inconsistent bucket splitting. If there are none, the distributor will create a new bucket for the request if it is a request that may create new data. Typically, new buckets are generated split according to the distribution bit count. + +Content nodes should rarely need to map documents to buckets, as distributors specify bucket targets for all requests. However, as external operations are not queued during bucket splits and joins, the content nodes remap operations to avoid having to fail them due to a bucket having recently been split or joined. + +### Limitations + +One basic limitation to the document to location mapping is that it may never change. If it changes, then documents will suddenly be in the wrong buckets in the cluster. This would violate a core invariant in the system, and is not supported. + +To allow new functionality, document identifier schemes may be extended or created that maps to location in new ways, but the already existing ones must map the same way as they have always done. + +Current document identifier schemes typically allow the 32 least significant bits to be overridden for co-localization, while the remaining 26 bits are reserved for bits created from the MD5 checksum. + +### Splitting + +When there are enough documents co-localized to the same bucket, causing the bucket to be split, it will typically need to split past the 32 LSB. At this split-level and beyond, there is no longer a 1-1 relationship between the node owning the bucket and the nodes its replica data will be stored on. + +The effect of this is that documents sharing a location will be spread across nodes in the entire cluster once they reach a certain size. This enables efficient parallel processing. + +## Bucket space + +Buckets exist in the _default_ or _global_ bucket space. \ No newline at end of file diff --git a/mintlify-docs/en/content/consistency.mdx b/mintlify-docs/en/content/consistency.mdx new file mode 100644 index 0000000000..b6af387670 --- /dev/null +++ b/mintlify-docs/en/content/consistency.mdx @@ -0,0 +1,113 @@ +--- +title: "Vesa Consistency Model" +sidebarTitle: "Consistency Model" +description: "Vespa offers configurable data redundancy with eventual consistency across replicas. It's designed for high efficiency under workloads where eventual consistency is an acceptable tradeoff. This document aims to go into some detail on what these tradeoffs are, and what you, as a user, can expect." +--- + +## Vespa and CAP + +Vespa may be considered a limited subset of AP under the [CAP theorem](https://en.wikipedia.org/wiki/CAP_theorem). + +Under CAP, there is a fundamental limitation of whether any distributed system can offer guarantees on consistency (C) or availability (A) in scenarios where nodes are partitioned (P) from each other. Since there is no escaping that partitions can and will happen, we talk either of systems that are _either_ CP or AP. + +Consistency (C) in CAP implies that reads and writes are strongly consistent, i.e. the system offers _linearizability_. Weaker forms such as causal consistency or "read your writes" consistency is _not_ sufficient. As mentioned initially, Vespa is an eventually consistent data store and therefore does not offer this property. In practice, Consistency requires the use of a majority consensus algorithm, which Vespa does not currently use. + +Availability (A) in CAP implies that _all requests_ receive a non-error response regardless of how the network may be partitioned. Vespa is dependent on a centralized (but fault tolerant) node health checker and coordinator. A network partition may take place between the coordinator and a subset of nodes. Operations to nodes in this subset aren't guaranteed to succeed until the partition heals. As a consequence, Vespa is not _guaranteed_ to be strongly available, so we treat this as a "limited subset" of AP (though this is not technically part of the CAP definition). + +In _practice_, the best-effort semantics of Vespa have proven to be both robust and highly available in common datacenter networks. + +## Write durability and consistency + +When a client receives a successful [write](/en/writing/reads-and-writes) response, the operation has been written and synced to disk. The replication level is configurable. Operations are by default written on _all_ available replica nodes before sending a response. "Available" here means being Up in the [cluster state](/en/content/content-nodes#cluster-state), which is determined by the fault-tolerant, centralized Cluster Controller service. If a cluster has a total of 3 nodes, 2 of these are available and the replication factor is 3, writes will be ACKed to the client if both the available nodes ACK the operation. + +On each replica node, operations are persisted to a write-ahead log before being applied. The system will automatically recover after a crash by replaying logged operations. Writes are guaranteed to be synced to durable storage prior to sending a successful response to the client, so acknowledged writes are retained even in the face of sudden power loss. + +If a client receives a failure response for a write operation, the operation may or may not have taken place on a subset of the replicas. If not all replicas could be written to, they are considered divergent (out of sync). The system detects and reconciles divergent replicas. This happens without any required user intervention. + +Each document write assigns a new wall-clock timestamp to the resulting document version. As a consequence, configure servers with NTP to keep clock drift as small as possible. Large clock drifts may result in timestamp collisions or unexpected operation orderings. + +Vespa has support for conditional writes for individual documents through test-and-set operations. Multi-document transactions are not supported. + +After a successful response, changes to the search indexes are immediately visible by default. + +## Read consistency + +Reads are consistent on a best-effort basis and are not guaranteed to be linearizable. + +When using a [Get](/en/reference/api/document-v1#get) or [Visit](/en/writing/visiting) operation, the client will never observe a partially updated document. For these read operations, writes behave as if they are atomic. + +Searches may observe partial updates, as updates are not atomic across index structures. This can only happen _after_ a write has started, but _before_ it's complete. Once a write is complete, all index updates are visible. + +Searches may observe transient loss of coverage when nodes go down. Vespa will restore coverage automatically when this happens. How fast this happens depends on the configured [searchable-copies](/en/reference/applications/services/content#searchable-copies) value. + +If replicas diverge during a Get, Vespa performs a read-repair. This fetches the requested document from all divergent replicas. The client then receives the version with the newest timestamp. + +If replicas diverge during a Visit, the behavior is slightly different between the Document V1 API and [vespa-visit](/en/reference/operations/self-managed/tools#vespa-visit): + +- Document V1 will prefer immediately visiting the replica that contains the most documents. This means it's possible for a subset of documents in a bucket to not be returned. +- `vespa-visit` will by default retry visiting the bucket until it is in sync. This may take a long time if large parts of the system are out of sync. + +The rationale for this difference in behavior is that Document V1 is usually called in a real-time request context, whereas `vespa-visit` is usually called in a background/batch processing context. + +Visitor operations iterate over the document corpus in an implementation-specific order. Any given document is returned in the state it was in at the time the visitor iterated over the data bucket containing the document. This means there is no snapshot isolation—a document mutation happening concurrently with a visitor may or may not be reflected in the returned document set, depending on whether the mutation happened before or after iteration of the bucket containing the document. + +## Replica reconciliation + +Reconciliation is the act of bringing divergent replicas back into sync. This usually happens after a node restarts or fails. It will also happen after network partitions. + +Unlike several other eventually consistent databases, Vespa doesn't use distributed replica operation logs. Instead, reconciling replicas involves exchanging sets of timestamped documents. Reconciliation is complete once the union set of documents is present on all replicas. Metadata is checksummed to determine whether replicas are in sync with each other. + +When reconciling replicas, the newest available version of a document will "win" and become visible. This version may be a remove (tombstone). Tombstones are replicated in the same way as regular documents. + +Reconciliation happens the document level, not at the field level. I.e. there is no merging of individual fields across different versions. + +If a test-and-set operation updates at least one replica, it will eventually become visible on the other replicas. + +The reconciliation operation is referred to as a "merge" in the rest of the Vespa documentation. + +Tombstone entries have a configurable time-to-live before they are compacted away. Nodes that have been partitioned away from the network for a longer period of time than this TTL should ideally have their indexes removed before being allowed back into the cluster. If not, there is a risk of resurrecting previously removed documents. Vespa does not currently detect or handle this scenario automatically. + +See the documentation on [data-retention-vs-size](/en/operations/self-managed/admin-procedures#data-retention-vs-size). + +## Q/A + + + +When the distributor process that is responsible for a particular data bucket receives a Get operation, it checks its locally cached replica metadata state for inconsistencies. + +If all replicas have consistent metadata, the operation is routed to a single replica—preferably located on the same host as the distributor, if present. This is the normal case when the bucket replicas are in sync. + +If there is at least one replica metadata mismatch, the distributor automatically initiates a read-repair process: + +1. The distributor splits the bucket replicas into subsets based on their metadata, where all replicas in each subset have the same metadata. It then sends a lightweight metadata-only Get to one replica in each subset. The core assumption is that all these replicas have the same set of document versions, and that it suffices to consult one replica in the set. If a metadata read fails, the distributor will automatically fail over to another replica in the subset. +2. It then sends one full Get to a node in the replica set that returned the _highest_ timestamp. + +This means that if you have 100 replicas and 1 has different metadata from the remaining 99, only 2 nodes in total will be initially queried, and only 1 will receive the actual (full) Get read. + +Similar algorithms are used by other operations that may trigger read/write-repair. + + + +Unfortunately not. Vespa does not offer any cross-document transactions, so in this case strong consistency implies single-object _linearizability_ (as opposed to _strict serializability_ across multiple objects). Linearizability requires the ability to reach a majority consensus amongst a particular known and stable configuration of replicas (side note: replica sets can be reconfigured in strongly consistent algorithms like Raft and Paxos, but such a reconfiguration must also be threaded through the consensus machinery). + +The active replica set for a given data bucket (and thus the documents it logically contains) is ephemeral and dynamic based on the nodes that are currently available in the cluster (as seen from the cluster controller). This precludes having a stable set of replicas that can be used for reaching majority decisions. + +See also [Vespa and CAP](#vespa-and-cap). + + + +Stale document versions may be returned when all replicas containing the most recent document version have become unavailable. + +Example scenario (for simplicity—but without loss of generality—assuming redundancy 1) in a cluster with two nodes `{A, B}`: + +1. Document X is stored in a replica on node A with timestamp 100. +2. Node A goes down; node B takes over ownership. +3. A write request is received for document X; it is stored on node B with timestamp 200 and ACKed to the client. +4. Node B goes down. +5. Node A comes back up. +6. A read request arrives for document X. The only visible replica is on node A, which ends up serving the request. +7. The document version at timestamp 100 is returned to the client. + +Since the write at `t=200` _happens-after_ the write at `t=100`, returning the version at `t=100` violates linearizability. + + \ No newline at end of file diff --git a/mintlify-docs/en/content/content-nodes.mdx b/mintlify-docs/en/content/content-nodes.mdx new file mode 100644 index 0000000000..eb6f19741a --- /dev/null +++ b/mintlify-docs/en/content/content-nodes.mdx @@ -0,0 +1,330 @@ +--- +title: "Content nodes and states" +--- + + +![](/assets/img/elastic-feed.svg) + + +Content cluster processes are *distributor*, *proton* and *cluster controller*. + +The distributor calculates the correct content node using the distribution algorithm and the [cluster state](#cluster-state). With no known cluster state, the client library will send requests to a random node, which replies with the updated cluster state if the node was incorrect. Cluster states are versioned, such that clients hitting outdated distributors do not override updated states with old states. + +The [distributor](#distributor) keeps track of which content nodes that stores replicas of each bucket (maximum one replica each), based on [redundancy](/en/reference/applications/services/content#redundancy) and information from the *cluster controller*. A bucket maps to one distributor only. A distributor keeps a bucket database with bucket metadata. The metadata holds which content nodes store replicas of the buckets, the checksum of the bucket content and the number of documents and meta entries within the bucket. Each document is algorithmically mapped to a bucket and forwarded to the correct content nodes. The distributors detect whether there are enough bucket replicas on the content nodes and add/remove as needed. Write operations wait for replies from every replica and fail if less than redundancy are persisted within timeout. + +The [cluster controller](#cluster-controller) manages the state of the distributor and content nodes. This *cluster state* is used by the document processing chains to know which distributor to send documents to, as well as by the distributor to know which content nodes should have which bucket. + +## Cluster state + +There are three kinds of state: [unit state](/en/reference/api/cluster-v2#state-unit), [user state](/en/reference/api/cluster-v2#state-user) and [generated state](/en/reference/api/cluster-v2#state-generated) (a.k.a. *cluster state*). + +For new cluster states, the cluster state version is incremented, and the new cluster state is broadcast to all nodes. There is a minimum time between each cluster state change. + +It is possible to set a minimum capacity for the cluster state to be `up`. + +If a cluster has so many nodes unavailable that it is considered down, the state of each node is irrelevant, and thus new cluster states will not be created and broadcast before enough nodes are back for the cluster to come back up. A cluster state indicating the entire cluster is down, may thus have outdated data on the node level. + +## Cluster controller + +The main task of the cluster controller is to maintain the [cluster state](#cluster-state). This is done by *polling* nodes for state, *generating* a cluster state, which is then *broadcast* to all the content nodes in the cluster. Note that clients do not interface with the cluster controller - they get the cluster state from the distributors - [details](#distributor). + + +| Task | Description | +| :--- | :--- | +| Node state polling | The cluster controller polls nodes, sending the current cluster state. If the cluster state is no longer correct, the node returns correct information immediately. If the state is correct, the request lingers on the node, such that the node can reply to it immediately if its state changes. After a while, the cluster controller will send a new state request to the node, even with one pending. This triggers a reply to the lingering request and makes the new one linger instead. Hence, nodes have a pending state request.
During a controlled node shutdown, it starts the shutdown process by responding to the pending state request that it is now stopping.
**Note:** As controlled restarts or shutdowns are implemented as TERM signals from the config-sentinel, the cluster controller is not able to differ between controlled and other shutdowns. | +| Cluster state generation | The cluster controller translates unit and user states into the generated cluster state | +| Cluster state broadcast | When node unit states are received, a cluster controller internal cluster state is updated. New cluster states are distributed with a minimum interval between. A grace period per unit state too - e.g., distributors and content nodes that are on the same node often stop at the same time.
The version number is incremented, and the new cluster state is broadcast.
If cluster state version is reset, distributors and content node processes may have to be restarted in order for the system to converge to the new state. Nodes will reject lower cluster state versions to prevent race conditions caused by overlapping cluster controller leadership periods. | + +See [cluster controller configuration](/en/operations/self-managed/admin-procedures#cluster-controller-configuration). + +### Master election + +Vespa can be configured with one cluster controller. Reads and writes will work well in case of cluster controller down, but other changes to the cluster (like a content node going down) will not be handled. It is hence recommended to configure a set of cluster controllers. + +The cluster controller nodes elect a master, which does the node polling and cluster state broadcast. The other cluster controller nodes only exist to do master election and potentially take over if the master dies. + +All cluster controllers will vote for the cluster controller with the lowest index that says it is ready. If a cluster controller has more than half of the votes, it will be elected master. As a majority vote is required, the number of cluster controllers should be an odd number of 3 or greater. A fresh master will not broadcast states before a transition time is passed, allowing an old master to have some time to realize it is no longer the master. + +## Distributor + +Buckets are mapped to distributors using the [ideal state algorithm](/en/content/idealstate). As the cluster state changes, buckets are re-mapped immediately. The mapping does not overlap - a bucket is owned by one distributor. + +Distributors do not persist the bucket database, the bucket-to-content-node mapping is kept in memory in the distributor. Document count, persisted size and a metadata checksum per bucket is stored as well. At distributor (re)start, content nodes are polled for bucket information, and return which buckets are owned by this distributor (using the ideal state algorithm). There is no centralized bucket directory node. Likewise, at any distributor cluster state change, content nodes are polled for bucket handover - a distributor will then handle a new set of buckets. + +Document operations are mapped to content nodes based on bucket locations - each put/update/get/remove is mapped to a [bucket](/en/content/buckets) and sent to the right content nodes. To manage the document set as it grows and nodes change, buckets move between content nodes. + +Document API clients (i.e. container nodes with [document-api](/en/reference/applications/services/container#document-api)) do not communicate directly with the cluster controller, and do not know the cluster state at startup. Clients therefore start out by sending requests to a random distributor. If the document operation hits the wrong distributor, `WRONG_DISTRIBUTION` is returned, with the current cluster state in the response. `WRONG_DISTRIBUTION` is hence expected and normal at cold start / state change events. + +### Timestamps + +[Write operations](/en/writing/reads-and-writes) have a *last modified time* timestamp assigned when passing through the distributor. The timestamp is guaranteed to be unique within the [bucket](/en/content/buckets) where it is stored. The timestamp is used by the content layer to decide which operation is newest. These timestamps can be used when [visiting](/en/writing/visiting), to process/retrieve documents within a given time range. To guarantee unique timestamps, they are in microseconds - the microsecond part is generated to avoid conflicts with other documents. + +If documents are migrated *between* clusters, the target cluster will have new timestamps for their entries. Also, when [reprocessing documents](/en/applications/document-processors) *within* a cluster, documents will have new timestamps, even if not modified. + +### Ordering + +The Document API uses the [document ID](/en/schemas/documents#document-ids) to order operations. A Document API client ensures that only one operation is pending at the same time. This ensures that if a client sends multiple operations for the same document, they will be processed in a defined order. This is done by queueing pending operations *locally* at the client. + + +**Note:** + +If sending two write operations to the same document, and the first operation fails, the enqueued operation is sent. In other words, the client does not assume there exists any kind of dependency between separate operations to the same document. If you need to enforce this, use [test-and-set conditions](/en/writing/document-v1-api-guide#conditional-writes) for writes. + + +If *different* clients have pending operations on the same document, the order is unspecified. + +### Maintenance operations + +Distributors track which content nodes have which buckets in their bucket database. Distributors then use the [ideal state algorithm](/en/content/idealstate) to generate bucket *maintenance operations*. A stable system has all buckets located per the ideal state: + +- If buckets have too few replicas, new are generated on other content nodes. +- If the replicas differ, a bucket merge is issued to get replicas consistent. +- If a buckets has too many replicas, superfluous are deleted. Buckets are merged, if inconsistent, before deletion. +- If two buckets exist, such that both may contain the same document, the buckets are split or joined to remove such overlapping buckets. Read more on [inconsistent buckets](/en/content/buckets). +- If buckets are too small/large, they will be joined or split. + +The maintenance operations have different priorities. If no maintenance operations are needed, the cluster is said to be in the *ideal state*. The distributors synchronize maintenance load with user load, e.g. to remap requests to other buckets after bucket splitting and joining. + +### Restart + +When a distributor stops, it will try to respond to any pending cluster state request first. New incoming requests after shutdown is commenced will fail immediately, as the socket is no longer accepting requests. Cluster controllers will thus detect processes stopping almost immediately. + +The cluster state will be updated with the new state internally in the cluster controller. Then the cluster controller will wait for maximum [min_time_between_new_systemstates](https://github.com/vespa-engine/vespa/blob/master/configdefinitions/src/vespa/fleetcontroller.def) before publishing the new cluster state - this to reduce short-term state fluctuations. + +The cluster controller has the option of setting states to make other distributors take over ownership of buckets, or mask the change, making the buckets owned by the distributor restarting unavailable for the time being. + +If the distributor transitions from `up` to `down`, other distributors will request metadata from the content nodes to take over ownership of buckets previously owned by the restarting distributor. Until the distributors have gathered this new metadata from all the content nodes, requests for these buckets can not be served, and will fail back to client. When the restarting node comes back up and is marked `up` in the cluster state again, the additional nodes will discard knowledge of the extra buckets they previously acquired. + +For requests with timeouts of several seconds, the transition should be invisible due to automatic client resending. Requests with a lower timeout might fail, and it is up to the application whether to resend or handle failed requests. + +Requests to buckets not owned by the restarting distributor will not be affected. + +## Content node + +The content node runs *proton*, which is the query backend. + +### Restart + +When a content node does a controlled restart, it marks itself in the `stopping` state and rejects new requests. It will process its pending request queue before shutting down. Consequently, client requests are typically unaffected by content node restarts. The currently pending requests will typically be completed. New copies of buckets will be created on other nodes, to store new requests in appropriate redundancy. This happens whether node transitions through `down` or `maintenance` state. The difference being that if transitioning through `maintenance`, the distributor will not start any effort of synchronizing new copies with existing copies. They will just store the new requests until the maintenance node comes back up. + +When starting, content nodes will start with gathering information on what buckets it has data stored for. While this is happening, the service layer will expose that it is `down`. + +## Metrics + +| Metric | Description | +| :--- | :--- | +| .idealstate.idealstate_diff | This metric tries to create a single value indicating distance to the ideal state. A value of zero indicates that the cluster is in the ideal state. Graphed values of this metric gives a good indication for how fast the cluster gets back to the ideal state after changes. Note that some issues may hide other issues, so sometimes the graph may appear to stand still or even go a bit up again, as resolving one issue may have detected one or several others. | +| .idealstate.buckets_toofewcopies | Specifically lists how many buckets have too few copies. Compare to the *buckets* metric to see how big a portion of the cluster this is. | +| .idealstate.buckets_toomanycopies | Specifically lists how many buckets have too many copies. Compare to the *buckets* metric to see how big a portion of the cluster this is. | +| .idealstate.buckets | The total number of buckets managed. Used by other metrics reporting bucket counts to know how big a part of the cluster they relate to. | +| .idealstate.buckets_notrusted | Lists how many buckets have no trusted copies. Without trusted buckets operations against the bucket may have poor performance, having to send requests to many copies to try and create consistent replies. | +| .idealstate.delete_bucket.pending | Lists how many buckets that needs to be deleted. | +| .idealstate.merge_bucket.pending | Lists how many buckets there are, where we suspect not all copies store identical document sets. | +| .idealstate.split_bucket.pending | Lists how many buckets are currently being split. | +| .idealstate.join_bucket.pending | Lists how many buckets are currently being joined. | +| .idealstate.set_bucket_state.pending | Lists how many buckets are currently altered for active state. These are high priority requests which should finish fast, so these requests should seldom be seen as pending. | + +Example, using the [quickstart](/en/basics/deploy-an-application-local) - find the distributor port (look for HTTP): + +```txt +$ docker exec vespa vespa-model-inspect service distributor + +distributor @ vespa-container : content +music/distributor/0 + tcp/vespa-container:19112 (MESSAGING) + tcp/vespa-container:19113 (STATUS RPC) + tcp/vespa-container:19114 (STATE STATUS HTTP) +``` + +Get the metric value: + +```txt +$ docker exec vespa curl -s http://localhost:19114/state/v1/metrics | jq . | \ + grep -A 10 idealstate.merge_bucket.pending + + "name": "vds.idealstate.merge_bucket.pending", + "description": "The number of operations pending", + "values": { + "average": 0, + "sum": 0, + "count": 1, + "rate": 0.016666, + "min": 0, + "max": 0, + "last": 0 + }, +``` + +## /cluster/v2 API examples + +Examples of state manipulation using the [/cluster/v2 API](/en/reference/api/cluster-v2). + +List content clusters: + +```txt +$ curl http://localhost:19050/cluster/v2/ +``` + +```json +{ + "cluster": { + "music": { + "link": "/cluster/v2/music" + }, + "books": { + "link": "/cluster/v2/books" + } + } +} +``` + +Get cluster state and list service types within cluster: + +```txt +$ curl http://localhost:19050/cluster/v2/music +``` + +```json +{ + "state": { + "generated": { + "state": "state-generated", + "reason": "description" + } + }, + "service": { + "distributor": { + "link": "/cluster/v2/music/distributor" + }, + "storage": { + "link": "/cluster/v2/music/storage" + } + } +} +``` + +List nodes per service type for cluster: + +```txt +$ curl http://localhost:19050/cluster/v2/music/storage +``` + +```json +{ + "node": { + "0": { + "link": "/cluster/v2/music/storage/0" + }, + "1": { + "link": "/cluster/v2/music/storage/1" + } + } +} +``` + +Get node state: + +```txt +$ curl http://localhost:19050/cluster/v2/music/storage/0 +``` + +```json +{ + "attributes": { + "hierarchical-group": "group0" + }, + "state": { + "generated": { + "state": "up", + "reason": "" + }, + "unit": { + "state": "up", + "reason": "" + }, + "user": { + "state": "up", + "reason": "" + } + }, + "metrics": { + "bucket-count": 0, + "unique-document-count": 0, + "unique-document-total-size": 0 + } +} +``` + +Get all nodes, including topology information (see `hierarchical-group`): + +```txt +$ curl http://localhost:19050/cluster/v2/music/?recursive=true +``` + +```json expandable +{ + "state": { + "generated": { + "state": "up", + "reason": "" + } + }, + "service": { + "storage": { + "node": { + "0": { + "attributes": { + "hierarchical-group": "group0" + }, + "state": { + "generated": { + "state": "up", + "reason": "" + }, + "unit": { + "state": "up", + "reason": "" + }, + "user": { + "state": "up", + "reason": "" + } + }, + "metrics": { + "bucket-count": 0, + "unique-document-count": 0, + "unique-document-total-size": 0 + } + } + } + } + } +} +``` + +Set node user state: + +```txt +curl -X PUT -H "Content-Type: application/json" --data ' + { + "state": { + "user": { + "state": "retired", + "reason": "This node will be removed soon" + } + } + }' \ + http://localhost:19050/cluster/v2/music/storage/0 +``` + +```json +{ + "wasModified": true, + "reason": "ok" +} +``` + +## Further reading + +- Refer to [administrative procedures](/en/operations/self-managed/admin-procedures) for configuration and state monitoring / management. +- Try the [Multinode testing and observability](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode) sample app to get familiar with interfaces and behavior. diff --git a/mintlify-docs/en/content/elasticity.mdx b/mintlify-docs/en/content/elasticity.mdx new file mode 100644 index 0000000000..e6297c22d8 --- /dev/null +++ b/mintlify-docs/en/content/elasticity.mdx @@ -0,0 +1,141 @@ +--- +title: "Elasticity" +description: "Vespa clusters can be grown and shrunk while serving queries and writes. Documents in content clusters are automatically redistributed on changes to maintain an even distribution with minimal data movement. To resize, just change the [nodes](/en/reference/applications/services/services#nodes) and redeploy the application - no restarts needed." +--- + + +![](/en/content/images/elastic-grow.png) + + +Documents are managed by Vespa in chunks called [buckets](#buckets). The size and number of buckets are completely managed by Vespa and there is never any need to manually control sharding. + +The elasticity mechanism is also used to recover from a node loss: New replicas of documents are created automatically on other nodes to maintain the configured redundancy. Failed nodes is therefore not a problem that requires immediate attention - clusters will self-heal from node failures as long as there are sufficient resources. + + +![](/en/content/images/elastic-fail.png) + + +When you want to remove nodes from a content cluster, you can have the system migrate data off them in an orderly fashion prior to removal. This is done by marking nodes as *retired*. This is useful to remove nodes that should be retired, but also to migrate a cluster to entirely new nodes while online: Add the new nodes, mark the old nodes retired, wait for the data to be redistributed and remove the old nodes. + +The auto-elasticity is configured for a normal fail-safe operation, but there are tradeoffs like recovery speed and resource usage. Learn more in [procedures](/en/operations/self-managed/admin-procedures#content-cluster-configuration). + +## Adding nodes + +To add or remove nodes from a content cluster, just `nodes` tag of the [content](/en/reference/applications/services/content) cluster in [services.xml](/en/reference/applications/services/services) and [redeploy](/en/basics/applications#deploying-applications). Read more in [procedures](/en/operations/self-managed/admin-procedures). + +When adding a new node, a new *ideal state* is calculated for all buckets. The buckets mapped to the new node are moved, the superfluous are removed. See redistribution example - add a new node to the system, with redundancy n=2: + + +![](/assets/img/add-node-move-buckets.svg) + + +The distribution algorithm generates a random node sequence for each bucket. In this example with n=2, replicas map to the two nodes sorted first. The illustration shows how placement onto two nodes changes as a third node is added. The new node takes over as primary for the buckets where it got sorted first, and as secondary for the buckets where it got sorted second. This ensures minimal data movement when nodes come and go, and allows capacity to be changed easily. + +No buckets are moved between the existing nodes when a new node is added. Based on the pseudo-random sequences, some buckets change from primary to secondary, or are removed. Multiple nodes can be added in the same deployment. + +## Removing nodes + +Whether a node fails or is *retired*, the same redistribution happens. If the node is retired, replicas are generated on the other nodes and the node stays up, but with no active replicas. Example of redistribution after node failure, n=2: + + +![](/assets/img/lose-node-move-buckets.svg) + + +Here, node 2 fails. This node held the active replicas of bucket 2 and 6. Once the node fails the secondary replicas are set active. If they were already in a *ready* state, they start serving queries immediately, otherwise they will index replicas, see [searchable-copies](/en/reference/applications/services/content#searchable-copies). All buckets that no longer have secondary replicas are merged to the remaining nodes according to the ideal state. + +## Grouped distribution + +Nodes in content clusters can be placed in [groups](/en/reference/applications/services/content#group). A group of nodes in a content cluster will have one or more complete replicas of the entire document corpus. + + +![](/en/content/images/query-groups.png) + + +This is useful in the cases listed below: + +| | | +| :--- | :--- | +| **Cluster upgrade** | With multiple groups it becomes safe to take out a full group for upgrade instead of just one node at a time. [Read more](/en/operations/self-managed/live-upgrade). +| **Query throughput** | Applications with high query rates and/or high static query cost can use groups to scale to higher query rates since Vespa will automatically send a query to just a single group. [Read more](/en/performance/sizing-search) +| **Topology** | By using groups you can control replica placement over network switches or racks to ensure there is redundancy at the switch and rack level. + +Tuning group sizes and node resources enables applications to easily find the latency/cost sweet spot, the elasticity operations are automatic and queries and writes work as usual with no downtime. + + +### Pinning groups + +While each group contains the same data, they may not return exactly the same search results since each node in each group sees a unique subset of the groups data written in a unique order. This can lead to some inconsistency when users are paging over multiple result pages. To avoid this, Vespa supports *pinning* to a particular search group: Results contains a `searchGroups` field in the top level in results (root.fields.searchGroup) containing the integer index of the group that produced the result. This can be passed back in the query for the next page as the `model.searchGroup` query parameter. + +Note that: + +- The searchGroup field is only present in results where it is informative and unique: It will not be present when searching content clusters without multiple groups, or when composed of hits from multiple grouped queries. +- The model.searchGroup parameter is a soft preference and always safe to pass: If the group is unavailable or non-existent another group will be used. + +## Changing topology + +A Vespa elasticity feature is the ability to change topology (i.e. grouped distribution) without service disruption. This is a live change, and will auto-redistribute documents to the new topology. + +Also read [topology change](/en/operations/self-managed/admin-procedures#topology-change) if running Vespa self-hosted - the below steps are general for all hosting options. + +### Replicas + +When changing topology, pay attention to the [min-redundancy](/en/reference/applications/services/content#min-redundancy) setting - this setting configures a *minimum* number of replicas in a cluster, the *actual* number is topology dependent - example: + +A flat cluster with min-redundancy n=2 and 15 nodes is changed into a grouped cluster with 3 groups with 5 nodes each (total node count and n is kept unchanged). In this case, the actual redundancy will be 3 after the change, as each of the 3 groups will have at least 1 replica for full query coverage. The practical consequence is that disk and memory requirements per node *increases* due to the change to topology. It is therefore important to calculate the actual replica count before reconfiguring topology. + +### Query coverage + +Changing topology might cause query coverage loss in the transition, unless steps taken in the right order. If full coverage is not important, just make the change and wait for document redistribution to complete. + +To keep full query coverage, make sure not to change both group size and number of groups at the same time: + +1. To add nodes for more data, or to have less data per node, increase group size. E.g., in a 2-group cluster with 8 nodes per group, add 4 nodes for a 25% capacity increase with 10 nodes per group. +2. If the goal is to add query capacity, add one or more groups, with the same node count as existing group(s). A flat cluster is the same as one group - if the flat cluster has 8 nodes, change to a grouped cluster with 2 groups of 8 nodes per group. This will add an empty group, which is put in query serving once populated. + +In short, if the end-state means both changing number of groups and node count per group, do this as separate steps, as a combination of the above. Between each step, wait for document redistribution to complete using the `merge_bucket.pending` metric - see [example](/en/writing/initial-batch-feed). + +## Buckets + +To manage documents, Vespa groups them in *buckets*, using hashing or hints in the [document ID](/en/schemas/documents). + +A document Put or Update is sent to all replicas of the bucket with the document. If bucket replicas are out of sync, a bucket merge operation is run to re-sync the bucket. A bucket contains [tombstones](/en/operations/self-managed/admin-procedures#data-retention-vs-size) of recently removed documents. + +Buckets are split when they grow too large, and joined when they shrink. This is a key feature for high performance in small to large instances, and eliminates need for downtime or manual operations when scaling. Buckets are purely a content management concept, and data is not stored or indexed in separate buckets, nor does queries relate to buckets in any way. Read more in [buckets](/en/content/buckets). + +## Ideal state distribution algorithm + +The [ideal state distribution algorithm](/en/content/idealstate) uses a variant of the [CRUSH algorithm](https://ceph.io/assets/pdfs/weil-crush-sc06.pdf) to decide bucket placement. It makes a minimal number of documents move when nodes are added or removed. Central to the algorithm is the assignment of a node sequence to each bucket: + + +![](/assets/img/bucket-node-sequence.svg) + + +Steps to assign a bucket to a set of nodes: + + + +Seed a random generator with the bucket ID to generate a pseudo-random sequence of numbers. Using the bucket ID as seed will then always generate the same sequence for the bucket. + + +Nodes are ordered by [distribution-key](/en/reference/applications/services/content#node), assign the random number in that order. E.g. a node with distribution-key 0 will get the first random number, node 1 the second. + + +Sort the node list by the random number. + + +Select nodes in descending random number order - above, node 1, 3 and 0 will store bucket 0x3c000000000000a0 with n=3 (redundancy). For n=2, node 1 and 3 will store the bucket. This specification of where to place a bucket is called the bucket's *ideal state*. + + + +Repeat this for all buckets in the system. + +## Consistency + +Consistency is maintained at bucket level. Content nodes calculate local checksums based on the bucket contents, and the distributors compare checksums across the bucket replicas. A *bucket merge* is issued to resolve inconsistency, when detected. While there are inconsistent bucket replicas, operations are routed to the "best" replica. + +As buckets are split and joined, it is possible for replicas of a bucket to be split at different levels. A node may have been down while its buckets have been split or joined. This is called *inconsistent bucket splitting*. Bucket checksums can not be compared across buckets with different split levels. Consequently, content nodes do not know whether all documents exist in enough replicas in this state. Due to this, inconsistent splitting is one of the highest maintenance priorities. After all buckets are split or joined back to the same level, the content nodes can verify that all the replicas are consistent and fix any detected issues with a merge. [Read more](/en/content/consistency). + +## Further reading + +- [content nodes](/en/content/content-nodes) +- [proton](/en/content/proton) - see *ready* state diff --git a/mintlify-docs/en/content/idealstate.mdx b/mintlify-docs/en/content/idealstate.mdx new file mode 100644 index 0000000000..8a03c1020c --- /dev/null +++ b/mintlify-docs/en/content/idealstate.mdx @@ -0,0 +1,246 @@ +--- +title: "Distribution algorithm" +--- + +The distribution algorithm decides what nodes should be responsible for a given bucket. This is used directly in the clients to calculate distributor to talk to. Content nodes need time to move buckets when the distribution is changing, so routing to content nodes is done using tracked current state. The distribution algorithm decides which content nodes is wanted to store the bucket copies though, and due to this, the algorithm is also referred to as the ideal state algorithm. + +The input to the distribution algorithm is a bucket identifier, together with knowledge about what nodes are available, and what their capacities are. + +The output of the distribution algorithm is a sorted list of the available nodes. The first node in the order is the node most preferred to handle a given bucket. Currently, the highest order distributor node will be the owning distributor, and the redundancy factor decides how many of the highest order content nodes are preferred to store copies for a bucket. + +To enable minimal transfer of buckets when the list of available nodes changes, the removal or addition of nodes should not alter the sort order of the remaining nodes. + +Desired qualities for the ideal state algorithm: + +| | | +| :--- | :--- | +| **Minimal reassignment on cluster state change** | - If a node goes down, only buckets that resided on that node should be reassigned.
- If a node comes up, only buckets that are moved to the new node should relocate.
- Increasing the capacity of a single node should only move buckets to that node.
- Reducing the capacity of a single node should only move buckets away from that node. | +| **No skew in distribution** | - Nodes should get an amount of data relative to their capacity. | +| **Lightweight** | - A simple algorithm that is easy to understand is a plus. Being lightweight to calculate is also a plus, giving more options of how to use it, without needing to cache results. | + +## Computational cost + +When considering how efficient the algorithm have to be, it is important to consider how often we need to calculate the ideal locations. Calculations are needed for the following tasks: + +- A client needs to map buckets to the distributors. If there are few buckets existing, all the results can be cached in clients, but for larger clusters, a lot of buckets may need to exist to create an even distribution, and caching becomes more memory intensive. Preferably the computational cost is cheap enough, such that no caching is needed. Currently, no caching is done by clients, but there is typically less than a million buckets, so caching all results would still have been viable. +- Distributors need to calculate ideal state for a single bucket to verify that incoming operations are mapped to the correct distributor (clients have cluster state matching the distributor). This could be eliminated for buckets pre-existing in the bucket database, which would be true in most all cases. Currently, calculation is done for all requests. +- Distributors need to calculate correct content nodes to create bucket copies on when operations to currently non-existing buckets come in. This is typically only something happening at the start of the cluster lifetime though. Normally buckets are created through splitting or joining existing buckets. +- Distributors need to calculate ideal state to check if any maintenance operations need to be done for a bucket. +- Content nodes need to calculate ideal state for a single bucket to verify that the correct distributor sent the request. This could be cached or served through bucket database but currently there is no need. + +As long as the algorithm is cheap, we can avoid needing to cache the result. The cache will then not limit scalability, and we have less dependencies and complexity within the content layer. The current algorithm has shown itself cheap enough, such that little caching has been needed. + +## A simple example: Modulo + +A simple approach would be to use a modulo operation to find the most preferred node, and then just order the nodes in configured order from there, skipping nodes that are currently not available: + + +$$ +most\ preferred\ node = bucket\ \%\ nodecount +$$ + +Properties: + +- Computational lightweight and easy to understand +- Perfect distribution among nodes. +- Total redistribution on state change. + +By just skipping currently unavailable nodes, nodes can go down and up with minimal movement. However, if the number of configured nodes change, practically all buckets will be redistributed. As the content layer is intended to be scalable, this breaks with one of the intentions and this algorithm has thus not been considered. + +## Weighted random election + +This is the algorithm that is currently used for distribution in the content layer, as it fits our use case well. + +To avoid a total redistribution on state change, the mapping can not be heavily dependent on the number of nodes in the cluster. By using random numbers, we can distribute the buckets randomly between the nodes, in such a fashion that altering the cluster state has a small impact. As we need the result to be reproducible, we obviously need to use a pseudo-random number generator and not real random numbers. + +The idea is as follows. To find the location of a given bucket, seed a random number generator with the bucket identifier, when draw one number for each node. The drawn numbers will then decide upon the preferred node order for that specific bucket. + +For this to be reproducible, all nodes need to draw the same numbers each time. Each node is assigned a distribution key in the configuration. This key decides what random number the node will be assigned. For instance, a node with distribution key 13, will be assigned the 14th random number generated. (As the first will go to the node with key 0). The existence of this node then also requires us to always generate at least 14 random numbers to do the calculation. + +Thus, one may end up calculating random numbers for nodes that are currently not available, either because they are temporarily down, or because the configuration have left holes in the distribution key space. It is recommended to not leave too large holes in the distribution key space to not waste too much. + +Using this approach, if you add another node to the cluster, it will roll for each bucket. It should thus steal ownership of some of the buckets. As all the numbers are random, it will steal buckets from all the other nodes, thus, given that the bucket count is large compared to the number of nodes, it will steal on average 1/n of the buckets from each pre-existing node, where n is the number of nodes in the current cluster. Likewise, if a node is removed from the cluster, the remaining nodes will divide the extra load between them. + +### Weighting nodes + +By enforcing all the numbers drawn to be floating point numbers between 0 and 1, we can introduce node weights using the following formula: + +$$ +r^{1/c} +$$ + +Where r is the floating point number between 0 and 1 that was drawn for a given node, and c is the node capacity, which is the weight of the node. Proof not included here, but this will end up giving each node on average an amount of data that is relative to its capacity. That is, among any nodes there are two nodes X and Y, where the number of buckets given to X should be equal to the number of buckets given to Y multiplied by capacity(X)/capacity(Y). (Given perfect random distribution) + +Altering the weight in a running system will also create a minimal redistribution of data. If we reduce the capacity, all the nodes number will be reduced, and some of its buckets will be taken over by the other nodes, and vice versa if the capacity is increased. Properties: + +- Minimum data movement on state changes. +- Some skew, depending on how good the random number generator is, the amount of nodes we have to divide buckets between, and the number of buckets we have to divide between them. +- Fairly cheap to compute given a reasonable amount of nodes, and an inexpensive pseudo-random number generator. + +### Distribution skew + +The algorithm does generate a bit of skew in the distribution, as it is essentially random. The following attributes decrease the skew: + +- Having more buckets to distribute. +- Having less targets (nodes and partitions) to distribute buckets to. +- Having a more uniform pseudo-random function. + +The more buckets exist, the more metadata needs to be tracked in the distributors though, and operations that wants to scan all the buckets will take longer. Additionally, the backend may want buckets above a given size to improve performance, storage efficiency or similar. Consequently, we typically want to enforce enough buckets for a decent distribution, but not more. + +Then the number of nodes increase, more buckets need to exist to keep the distribution even. If the number of nodes is doubled, the number of buckets must typically more than double to keep the distribution equally even. Thus, this scales worse than linear. It does not scale much worse though, and this has not proved to be a practical problem for the cluster sizes we have used up until now. (A cluster size of a thousand nodes does not seem to be any issue here) + +Having a good and uniform pseudo-random function makes the distribution more even. However, this may require more computationally heavy generators. Currently, we are using a simple and fast algorithm, and it has proved more than sufficient for our needs. + +The distribution to distributors are done to create an even distribution between the nodes. The distributors are free to split the buckets further if the backend wants buckets to contain less data. They can not use fewer buckets than are needed for distribution though. By using a minimum amount of buckets for distribution, the distributors have more freedom to control sizes of buckets. + +### Distribution waste + +To measure how many buckets are needed to create a decent distribution a metric is needed. We have defined a waste metric for this purpose as follows: + +Distribute the buckets to all the units. Assume the size of all units are identical. Assume the unit with the most units assigned to it is at 100% capacity. The wasted space is the percentage of unused capacity compared to the used capacity. + +This definition seems useful as a cluster is considered at full capacity once one of its partitions is at full capacity. Having one node with more buckets than the rest is thus damaging, while having one node with fewer buckets than the rest is just fine. + +Example: There are 4 nodes distributing 18 units. The node with the most units has 6. Distribution waste is `100% * (4 * 6 - 18) / (4 * 6) = 25%`. + +Below we have calculated waste based on number of nodes and the amount of buckets to distribute between them. Bits refer to distribution bits used. A distribution bit count of 16 indicates that there will be 216 buckets. + +The calculations assume all buckets have the same size. This is normally close to true as documents are randomly assigned to buckets. There will be lots of buckets per node too, so a little variance typically evens out fairly well. + +The tables below assume only one partition exist on each node. If you have 4 partitions on 16 nodes, you should rather use the values for `4 * 16 = 64` nodes. + +A higher redundancy factor indicates more buckets to distribute between the same amount of nodes, resulting in a more even distribution. Doubling the redundancy has the same effect as adding one to the distribution bit count. To get values for redundancy 4, the redundancy 2 values can be used, and then the waste will be equal to the value with one less distribution bit used. + +### Calculated waste from various cluster sizes + +A value of 1 indicates 100% waste. A value of 0.1 indicates 10% waste. A waste below 1 % is shown green, below 10% as yellow and below 30% as orange. Red indicates more than 30% waste. + +#### Distribution with redundancy 1: + +| Bits \ Nodes | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +| :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | +| 1 | 0.0000 | 0.0000 | 0.3333 | 0.5000 | 0.6000 | 0.6667 | 0.7143 | 0.7500 | 0.7778 | 0.8000 | 0.8182 | 0.8333 | 0.8462 | 0.8571 | 0.8667 | +| 2 | 0.0000 | 0.3333 | 0.3333 | 0.5000 | 0.2000 | 0.3333 | 0.4286 | 0.5000 | 0.5556 | 0.6000 | 0.6364 | 0.6667 | 0.6923 | 0.7143 | 0.7333 | +| 3 | 0.0000 | 0.2000 | 0.1111 | 0.3333 | 0.2000 | 0.3333 | 0.6190 | 0.6667 | 0.8222 | 0.8400 | 0.8545 | 0.8333 | 0.6923 | 0.7143 | 0.7333 | +| 4 | 0.0000 | 0.1111 | 0.1111 | 0.3333 | 0.3600 | 0.3333 | 0.4286 | 0.5000 | 0.7778 | 0.8000 | 0.8182 | 0.8095 | 0.6923 | 0.7143 | 0.6444 | +| 5 | - | 0.0588 | 0.1111 | 0.2727 | 0.2889 | 0.4074 | 0.2381 | 0.3333 | 0.8129 | 0.8316 | 0.8469 | 0.8519 | 0.8359 | 0.8367 | 0.8359 | +| 6 | - | 0.0000 | 0.0725 | 0.1579 | 0.1467 | 0.1111 | 0.1688 | 0.3846 | 0.7037 | 0.7217 | 0.7470 | 0.7460 | 0.7265 | 0.6952 | 0.6718 | +| 7 | - | 0.0725 | 0.0519 | 0.0857 | 0.0857 | 0.1111 | 0.2050 | 0.2000 | 0.4530 | 0.4667 | 0.5152 | 0.5152 | 0.4530 | 0.3905 | 0.3436 | +| 8 | - | 0.0000 | 0.0078 | 0.0725 | 0.0857 | 0.0922 | 0.1293 | 0.1351 | 0.1634 | 0.1742 | 0.1688 | 0.2381 | 0.2426 | 0.2967 | 0.3173 | +| 9 | - | 0.0039 | 0.0192 | 0.1467 | 0.1607 | 0.1203 | 0.1080 | 0.1111 | 0.1380 | 0.1322 | 0.1218 | 0.1795 | 0.1962 | 0.2381 | 0.2580 | +| 10 | - | 0.0019 | 0.0275 | 0.0922 | 0.0898 | 0.0623 | 0.0741 | 0.0922 | 0.1111 | 0.1018 | 0.1218 | 0.1203 | 0.1438 | 0.1688 | 0.1675 | +| 11 | - | 0.0019 | 0.0234 | 0.0430 | 0.0385 | 0.0248 | 0.0248 | 0.0483 | 0.0636 | 0.0648 | 0.0737 | 0.0725 | 0.0894 | 0.0800 | 0.0958 | +| 12 | - | - | 0.0121 | 0.0285 | 0.0282 | 0.0121 | 0.0149 | 0.0571 | 0.0577 | 0.0562 | 0.0549 | 0.0412 | 0.0510 | 0.0439 | 0.0616 | +| 13 | - | - | 0.0074 | 0.0019 | 0.0070 | 0.0177 | 0.0304 | 0.0303 | 0.0337 | 0.0189 | 0.0252 | 0.0358 | 0.0409 | 0.0501 | 0.0385 | +| 14 | - | - | 0.0041 | 0.0024 | 0.0037 | 0.0027 | 0.0145 | 0.0073 | 0.0101 | 0.0130 | 0.0220 | 0.0234 | 0.0290 | 0.0248 | 0.0195 | +| 15 | - | - | 0.0019 | 0.0021 | 0.0036 | 0.0083 | 0.0059 | 0.0056 | 0.0101 | 0.0097 | 0.0123 | 0.0163 | 0.0150 | 0.0186 | 0.0173 | +| 16 | - | - | 0.0010 | 0.0007 | 0.0010 | 0.0030 | 0.0049 | 0.0039 | 0.0085 | 0.0072 | 0.0097 | 0.0108 | 0.0135 | 0.0141 | 0.0115 | +| 17 | - | - | - | - | - | 0.0030 | 0.0033 | 0.0024 | 0.0036 | 0.0030 | 0.0055 | 0.0091 | 0.0135 | 0.0156 | 0.0143 | +| 18 | - | - | - | - | - | - | 0.0019 | - | 0.0029 | 0.0027 | 0.0043 | 0.0040 | 0.0066 | 0.0061 | 0.0060 | +| 19 | - | - | - | - | - | - | - | - | 0.0019 | - | 0.0021 | 0.0030 | 0.0023 | 0.0031 | 0.0042 | +| 20 | - | - | - | - | - | - | - | - | - | - | - | 0.0029 | 0.0025 | 0.0037 | 0.0044 | +| 21 | - | - | - | - | - | - | - | - | - | - | - | - | 0.0026 | 0.0035 | 0.0040 | + +#### Distribution with redundancy 2: + +| Bits \ Nodes | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | +| :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | +| 1 | 0.0000 | 0.0000 | 0.3333 | 0.5000 | 0.6000 | 0.6667 | 0.4286 | 0.5000 | 0.5556 | 0.6000 | 0.6364 | 0.6667 | 0.6923 | 0.7143 | 0.7333 | +| 2 | 0.0000 | 0.0000 | 0.3333 | 0.3333 | 0.2000 | 0.3333 | 0.4286 | 0.5000 | 0.5556 | 0.6000 | 0.6364 | 0.6667 | 0.6923 | 0.4286 | 0.4667 | +| 3 | 0.0000 | 0.0000 | 0.1111 | 0.2000 | 0.2000 | 0.3333 | 0.4286 | 0.5000 | 0.7037 | 0.7333 | 0.7576 | 0.7778 | 0.7949 | 0.7714 | 0.7333 | +| 4 | 0.0000 | 0.0000 | 0.1111 | 0.2000 | 0.2000 | 0.3333 | 0.3469 | 0.2000 | 0.7460 | 0.7714 | 0.7762 | 0.7778 | 0.7949 | 0.7714 | 0.7630 | +| 5 | - | - | 0.0725 | 0.1579 | 0.2471 | 0.2381 | 0.2967 | 0.2727 | 0.7265 | 0.7538 | 0.7673 | 0.7778 | 0.7949 | 0.7922 | 0.7968 | +| 6 | - | - | 0.0519 | 0.1111 | 0.1742 | 0.1467 | 0.2050 | 0.2381 | 0.6908 | 0.7023 | 0.7016 | 0.7117 | 0.7265 | 0.7229 | 0.7247 | +| 7 | - | - | 0.0303 | 0.0154 | 0.0340 | 0.0303 | 0.0857 | 0.1111 | 0.4921 | 0.4880 | 0.4828 | 0.4797 | 0.5077 | 0.4622 | 0.4667 | +| 8 | - | - | 0.0078 | 0.0303 | 0.0248 | 0.0623 | 0.0857 | 0.0725 | 0.0970 | 0.1322 | 0.1049 | 0.1293 | 0.1620 | 0.1873 | 0.2242 | +| 9 | - | - | 0.0019 | 0.0266 | 0.0519 | 0.0466 | 0.0682 | 0.0791 | 0.0824 | 0.0519 | 0.0691 | 0.0519 | 0.0623 | 0.0741 | 0.0898 | +| 10 | - | - | 0.0063 | 0.0173 | 0.0154 | 0.0275 | 0.0116 | 0.0340 | 0.0558 | 0.0294 | 0.0452 | 0.0466 | 0.0567 | 0.0501 | 0.0584 | +| 11 | - | - | 0.0078 | 0.0049 | 0.0154 | 0.0177 | 0.0149 | 0.0210 | 0.0275 | 0.0177 | 0.0252 | 0.0303 | 0.0305 | 0.0344 | 0.0317 | +| 12 | - | - | - | 0.0073 | 0.0112 | 0.0192 | 0.0231 | 0.0312 | 0.0296 | 0.0177 | 0.0278 | 0.0358 | 0.0245 | 0.0312 | 0.0385 | +| 13 | - | - | - | 0.0061 | 0.0049 | 0.0096 | 0.0112 | 0.0201 | 0.0218 | 0.0088 | 0.0077 | 0.0199 | 0.0138 | 0.0304 | 0.0317 | +| 14 | - | - | - | 0.0059 | 0.0058 | 0.0058 | 0.0057 | 0.0092 | 0.0128 | 0.0082 | 0.0139 | 0.0081 | 0.0096 | 0.0199 | 0.0213 | +| 15 | - | - | - | - | 0.0014 | 0.0039 | 0.0052 | 0.0034 | 0.0051 | 0.0085 | 0.0044 | 0.0072 | 0.0107 | 0.0101 | 0.0082 | +| 16 | - | - | - | - | 0.0016 | 0.0030 | 0.0026 | 0.0036 | 0.0065 | 0.0051 | 0.0061 | 0.0084 | 0.0065 | 0.0083 | 0.0100 | +| 17 | - | - | - | - | - | - | 0.0010 | 0.0020 | 0.0028 | - | 0.0040 | 0.0049 | 0.0067 | 0.0071 | 0.0062 | +| 18 | - | - | - | - | - | - | - | - | 0.0032 | - | 0.0024 | - | 0.0034 | 0.0056 | 0.0041 | +| 19 | - | - | - | - | - | - | - | - | - | - | - | - | 0.0025 | 0.0018 | - | + +#### Distribution with redundancy 2: + +| Bits \ Nodes | 16 | 20 | 32 | 48 | 64 | 100 | 128 | 160 | 200 | 256 | 350 | 500 | 800 | 1000 | 5000 | +| :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | +| 8 | 0.2000 | 0.3081 | 0.2727 | 0.5152 | 0.5294 | 0.5733 | 0.6364 | 0.7091 | 0.7673 | 0.8000 | 0.8537 | 0.8862 | 0.8933 | 0.8976 | 0.9659 | +| 9 | 0.0725 | 0.2242 | 0.1795 | 0.1795 | 0.3043 | 0.3173 | 0.3846 | 0.5077 | 0.5345 | 0.6364 | 0.7340 | 0.7952 | 0.8400 | 0.8720 | 0.9317 | +| 10 | 0.0725 | 0.1322 | 0.1233 | 0.2099 | 0.1579 | 0.2415 | 0.3333 | 0.5733 | 0.4611 | 0.5789 | 0.6558 | 0.7269 | 0.8293 | 0.8425 | 0.8976 | +| 11 | 0.0340 | 0.0857 | 0.0922 | 0.1111 | 0.1233 | 0.1969 | 0.2558 | 0.5937 | 0.5643 | 0.5897 | 0.5965 | 0.6099 | 0.6587 | 0.7591 | 0.8830 | +| 12 | 0.0448 | 0.0385 | 0.0623 | 0.1065 | 0.0986 | 0.1285 | 0.3725 | 0.3831 | 0.4064 | 0.4074 | 0.4799 | 0.4880 | 0.5124 | 0.8328 | 0.8976 | +| 13 | 0.0340 | 0.0328 | 0.0554 | 0.0699 | 0.0623 | 0.0948 | 0.1049 | 0.2183 | 0.2344 | 0.3191 | 0.3498 | 0.4539 | 0.5733 | 0.6656 | 0.8870 | +| 14 | 0.0140 | 0.0189 | 0.0376 | 0.0452 | 0.0466 | 0.0717 | 0.0986 | 0.1057 | 0.1047 | 0.2242 | 0.2853 | 0.2798 | 0.4064 | 0.4959 | 0.8830 | +| 15 | 0.0094 | 0.0118 | 0.0385 | 0.0268 | 0.0331 | 0.0638 | 0.0708 | 0.0775 | 0.0898 | 0.1322 | 0.2133 | 0.2104 | 0.3550 | 0.4446 | 0.8752 | +| 16 | 0.0097 | 0.0081 | 0.0380 | 0.0303 | 0.0362 | 0.0577 | 0.0501 | 0.0627 | 0.0717 | 0.1033 | 0.1733 | 0.1678 | 0.2586 | 0.3101 | 0.8511 | +| 17 | 0.0075 | 0.0066 | 0.0346 | 0.0293 | 0.0154 | 0.0258 | 0.0466 | 0.0546 | 0.0704 | 0.1041 | 0.1469 | 0.1983 | 0.2702 | 0.2972 | 0.7740 | +| 18 | 0.0053 | 0.0057 | 0.0098 | 0.0098 | 0.0122 | 0.0149 | 0.0238 | 0.0300 | 0.0394 | 0.0353 | 0.0434 | 0.0553 | 0.0611 | 0.1782 | 0.6334 | +| 19 | - | 0.0022 | 0.0050 | 0.0162 | 0.0098 | 0.0133 | 0.0149 | 0.0220 | 0.0242 | 0.0252 | 0.0333 | 0.0398 | 0.0495 | 0.0999 | 0.5145 | +| 20 | - | - | 0.0030 | 0.0107 | 0.0088 | 0.0098 | 0.0144 | 0.0140 | 0.0148 | 0.0203 | 0.0195 | 0.0255 | 0.0348 | 0.1133 | 0.4481 | +| 21 | - | - | 0.0043 | 0.0063 | 0.0051 | 0.0074 | 0.0079 | 0.0085 | 0.0086 | 0.0113 | 0.0147 | 0.0170 | 0.0237 | 0.1068 | 0.4422 | +| 22 | - | - | - | 0.0026 | 0.0035 | 0.0037 | 0.0082 | 0.0061 | 0.0077 | 0.0087 | 0.0101 | 0.0134 | 0.0193 | 0.1140 | 0.4635 | +| 23 | - | - | - | 0.0019 | - | 0.0026 | 0.0080 | 0.0055 | 0.0056 | 0.0057 | 0.0063 | 0.0096 | 0.0155 | 0.1294 | 0.4982 | +| 24 | - | - | - | 0.0013 | - | - | 0.0074 | 0.0060 | 0.0058 | 0.0053 | 0.0049 | 0.0068 | 0.0112 | 0.0471 | 0.3219 | +| 25 | - | - | - | - | - | - | - | - | - | 0.0043 | 0.0043 | 0.0058 | 0.0067 | 0.0512 | 0.2543 | +| 26 | - | - | - | - | - | - | - | - | - | - | 0.0040 | 0.0042 | 0.0043 | 0.0051 | 0.0210 | +| 27 | - | - | - | - | - | - | - | - | - | - | - | - | 0.0028 | 0.0157 | 0.0814 | + +### Default number of distribution bits used + +Note that changing the amount of distribution bits used will change what buckets exist, which will change the distribution considerably. We thus do not want to alter the distribution bit count too often. + +Ideally, the users would be allowed to configure minimal and maximal acceptable waste, and the current amount of distribution bits could then just be calculated on the fly. But as computing the waste values above are computational heavy, especially with many nodes and many distribution bits, currently only a couple of profiles are available for you to configure. + +**Vespa Cloud note:** Vespa Cloud locks distribution bit count to 16. This is because Vespa Cloud offers auto-scaling of nodes, and such a scaling decision should not implicitly lead to a full redistribution of data by crossing a distribution bit node count boundary. 16 bits strikes a good balance of low skew and high performance for most production deployments. + +#### Loose mode (default) + +The loose mode allows for more waste, allowing the amount of nodes to change considerably without altering the distribution bit counts. + +| Node count | 1-4 | 5-199 | 200-> | +| :--- | :--- | :--- | :--- | +| Distribution bit count | 8 | 16 | 24 | +| Max calculated waste *) | 3.03 % | 7.17 % | ? | +| Minimum buckets/node **) | 256 - 64 | 13108 - 329 | 83886 - | + +#### Strict mode (not default) + +The strict mode attempts to keep the waste below 1.0 %. When it needs to increase the bit count it increases the bit count significantly to allow considerable more growth before having to adjust the count again. + +| Node count | 1-4 | 5-14 | 15-199 | 200-799 | 800-1499 | 1500-4999 | 5000-> | +| :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | +| Distribution bit count | 8 | 16 | 21 | 25 | 28 | 30 | 32 | +| Max calculated waste *) | 3 % | 0.83 % | 0.86 % | 0.67 % | ? | ? | ? | +| Minimum buckets/node **) | 256 - 64 | 13107 - 4681 | 139810 - 10538 | 167772 - 41995 | 335544 - 179076 | 715827 - 214791 | 858993 - | + +*) Max calculated waste, given redundancy 2 and the max node count in the given range, as shown in the table above. (Note that this assumes equal sized buckets, and that every possible bucket exist. In a real system there will be random variation). + +**) Given a node count and distribution bits, there is a minimum number of buckets enforced to exist. However, splitting due to bucket size may increase this count beyond this number. This value shows the maximum value of the minimum. (That is the number of buckets per node enforced for the lowest node count in the range) Ideally one wants to have few buckets enforced by distribution and rather let bucket size split buckets, as that leaves more freedom to users. + +## Q/A + + +A: This is both expected and intentional—to see why we must look at how the ideal state algorithm works. + +As previously outlined, the ideal state algorithm requires 3 distinct inputs: + +1. The ID of the bucket to be replicated across content nodes. +2. The set of all nodes (i.e. unique distribution keys) in the cluster *across* all groups, and their current availability state (Down, Up, Maintenance etc.). +3. The cluster topology and replication configuration. The topology includes knowledge of all groups. + +From this the algorithm returns a deterministic, ordered sequence of nodes (i.e. distribution keys) across all configured groups. The ordering of nodes is given by their individual pseudo-random node *score*, where higher scoring nodes are considered more *ideal* for storing replicas for a given bucket. The set of nodes in this sequence respects the constraints given by the configured group topology and replication level. + +When computing node scores within a group, the *absolute* distribution keys are used rather than a node's *relative* ordering within the group. This means the individual node scores—and consequently the distribution of bucket replicas—within one group is different (with a very high probability) from all other groups. + +What the ideal state algorithm ensures is that there exists a deterministic, configurable number of replicas per bucket within each group and that they are evenly distributed across each group's nodes—the exact mapping can be considered an unspecified "implementation detail". + +The rationale for using absolute distribution keys rather than relative ordering is closely related to the earlier discussion about why [modulo distribution](/en/content/idealstate#a-simple-example-modulo) is a poor choice. Let $N_g \gt 1$ be the number of nodes in a given group: + +- A relative ordering means that removing—or just reordering—a single node from the configuration can potentially lead to a full redistribution of all data within that group, not just $\frac{1}{N_g}$ of the data. Imagine for instance moving a node from being first in the group to being the last. +- If we require nodes with the same relative index in each group to store the same data set (i.e. a row-column strategy), this immediately suffers in failure scenarios even when just a single node becomes unavailable. Data coverage in the group remains reduced until the node is replaced, as no other nodes can take over responsibility for the data. This is because removing the node leads to the problem in the previous point, where a disproportionally large amount of data must be moved due to the relative ordering changing. With the ideal state algorithm, the remaining nodes in the group will transparently assume ownership of the data, with each node receiving an expected $( \frac{1}{N_g - 1} )$ of the unavailable node's buckets. + \ No newline at end of file diff --git a/mintlify-docs/en/content/images/elastic-fail.png b/mintlify-docs/en/content/images/elastic-fail.png new file mode 100644 index 0000000000..d87fa40cdf Binary files /dev/null and b/mintlify-docs/en/content/images/elastic-fail.png differ diff --git a/mintlify-docs/en/content/images/elastic-grow.png b/mintlify-docs/en/content/images/elastic-grow.png new file mode 100644 index 0000000000..fd2e14789f Binary files /dev/null and b/mintlify-docs/en/content/images/elastic-grow.png differ diff --git a/mintlify-docs/en/content/images/query-groups.png b/mintlify-docs/en/content/images/query-groups.png new file mode 100644 index 0000000000..b0f5332f3a Binary files /dev/null and b/mintlify-docs/en/content/images/query-groups.png differ diff --git a/mintlify-docs/en/content/proton.mdx b/mintlify-docs/en/content/proton.mdx new file mode 100644 index 0000000000..120c80d6cf --- /dev/null +++ b/mintlify-docs/en/content/proton.mdx @@ -0,0 +1,464 @@ +--- +title: "Proton" +sidebarTitle: "Content clusters" +description: "Proton is Vespa's search core and runs on each content node as the *vespa-proton-bin* process. Proton maintains disk and memory structures for documents (organized per document type), handles [read and write operations](/en/writing/reads-and-writes#operations), and execution of [queries](#queries). As the document data is dynamic, disk and memory structures are periodically optimized by [maintenance jobs](#proton-maintenance-jobs)." +--- + +The content node has a *bucket management system* which sends requests to a set of *document databases*, which each consists of three *sub-databases* `ready`, `not ready` and `removed`: + + +![](/assets/img/proton-feed.svg) + + + +### Bucket management + +When the node starts up it first needs to get an overview of what documents and buckets it has. Once metadata for all buckets is known, the content nodes transition from down to up state. As the distributors want quick access to bucket metadata, it maintains an in-memory bucket database to efficiently serve these requests. The state of the bucket database can always be reconstructed from the durably persisted search node state, but this is expensive and therefore only happens at process startup time. + +This database is considered the source of truth for the state of the node's bucket metadata for the duration of the process's lifetime. As incoming operations mutate the state of the documents on the node, it is critical that the database is always kept in sync with these changes. + +### Persistence threads and operation dispatching + +A content node has a pool of *persistence threads* that is created at startup and remains fixed in size for the lifetime of the process. It is the responsibility of the persistence threads to schedule incoming write and read operations received by the content node, dispatch these to the search core, and ensure the bucket database remains in sync with changes caused by write operations. + +Unless explicitly configured, the size of the thread pool is automatically set based on the number of CPU cores available. + +Persistence threads are backed by a *persistence queue*. Read/write-operations received by the RPC subsystem are pushed onto this queue. The queue is operation deadline-aware; if an operation has exceeded its deadline while enqueued, it is immediately failed back to the sender without being executed. This avoids a particular failure scenario where a heavily loaded node spends increasingly more and more time processing already doomed operations, due to not being able to drain its queue quickly enough. + +All operations bound for a particular data bucket (such as Puts, Gets, etc.) execute in the context of a *bucket lock*. Locks are *shared* for reads and *exclusive* for writes. This means that multiple read operations can execute in parallel for the same bucket, but only one write operation can execute for a bucket at any given time (and no reads can be started concurrently with existing writes for a bucket, and vice versa). Note that some of these locking restrictions can be relaxed when it's safe to do so—see [performance optimizations](#performance-optimizations) for details. + +If a persistence thread tries to pop an operation from the queue and sees that the bucket it's bound for is already locked, it will leave the operation in place in the queue and try the next operation(s) instead. This means that although the queue acts as a FIFO for client operations towards a *single* bucket, this is not the case across *multiple* buckets. + +#### Write operations + +Write operations are dispatched as *asynchronous*—i.e., non-blocking—tasks to the search core. This increases parallelism by freeing up persistence threads to handle other operations, and a deeper pipeline enables the search core to optimize transaction log synchronization and batching of data structure updates. + +Since a deeper pipeline comes at the potential cost of increased latency when many operations are in flight, the maximum number of concurrent asynchronous operations is bounded by an adaptive persistence throttling mechanism. The throttler will dynamically scale the window of concurrency until it reaches a saturation point where further increasing the window size also results in increased operation latencies. When the number of in-flight operations hits the current maximum, persistence threads will not dispatch any more writes until the number goes down. Reads can still be processed during this time. + +An asynchronous write-task holds on to the exclusive bucket lock for the duration of its lifetime. Once the search core completes the write operation, the bucket database is updated with the new metadata state of the bucket (which reflects the side effects of the write) prior to releasing the lock. An operation reply is then generated and sent back via the RPC subsystem. + +#### Read operations + +Read operations are always evaluated *synchronously*—i.e. blocking—by persistence threads. To avoid having potentially expensive maintenance read operations (such as those used for [replica reconciliation](/en/content/consistency#replica-reconciliation)) block client operations for prolonged amounts of time, a subset of the persistence threads are *not* allowed to process such maintenance operations. + +Note that the condition evaluation step of a test-and-set write is considered a *read* sub-operation and is therefore done synchronously. Since it's part of a write operation, it happens atomically in the context of the exclusive lock of the higher-level operation. + +#### Performance optimizations + +To reduce thread context switches, some write operations may bypass the persistence thread queues and be directly asynchronously dispatched to the search core from the RPC thread the operation was received at. Such operations must still successfully acquire the appropriate exclusive bucket lock—if the lock cannot be immediately acquired the operation is pushed onto the persistence queue instead. + +To reduce lock contention and thread wakeups, smaller numbers of persistence threads are grouped together in *stripes* that share a dedicated per-stripe persistence queue. Operations are routed deterministically to a particular stripe based on their bucket ID, meaning that stripes operate on non-overlapping parts of the bucket space. Together, the many stripes and queues form one higher-level *logical* queue that covers the entire bucket space. + +If the queue contains multiple *non-conflicting* write operations to the same bucket, these may be dispatched in parallel in the context of the *same* write lock. This avoids having to wait for an entire lock-execute-unlock roundtrip prior to dispatching the next write for the same bucket. An example of conflicting writes is multiple Puts to the same document ID. The maximum number of operations dispatched in parallel is implementation-defined. + +### Document database + +Each document database is responsible for a single document type. It has a component called FeedHandler which takes care of incoming documents, updates, and remove requests. All requests are first written to a [transaction log](#transaction-log), then handed to the appropriate sub-database, based on the request type. + +### Sub-databases + +There are three types of sub-databases, each with its own [document meta store](/en/content/attributes#document-meta-store) and [document store](#document-store). The document meta store holds a map from the document ID to a local ID. This local id is used to address the document in the document store. The document meta store also maintains information on the state of the buckets that are present in the sub-database. + +The sub-databases are maintained by the *Maintenance Controller*. The document distribution changes as the system is resized. When the number of nodes in the system changes, the Maintenance Controller will move documents between the Ready and Not Ready sub-databases to reflect the new distribution. When an entry in the Removed sub-database gets old, it is purged. The sub-databases are: + + +||| +| :--- | :--- | +| **Not Ready** | Holds the redundant documents that are not searchable, i.e. the not ready documents. Documents that are not ready are only stored, not indexed. It takes some processing to move from this state to the ready state. | +| **Ready** | Maintains attributes and indexes of all ready documents and keeps them searchable. One of the ready copies is active while the rest are not active:

**Active**
There should always be exactly one active copy of each document in the system, though intermittently there may be more. These documents produce results when queries are evaluated.

>**Not Active**
The ready copies that are not active are indexed but will not produce results. By being indexed, they are ready to take over immediately if the node holding the active copy becomes unavailable. Read more in searchable-copies. | +|**Removed**|Keeps track of documents that have been removed. The id and timestamp for each document are kept. This information is used when buckets from two nodes are merged. If the removed document exists on another node but with a different timestamp, the most recent entry prevails.| + +## Transaction log + +Content nodes have a transaction log to persist mutating operations. The transaction log persists operations by file append. Having a transaction log simplifies proton's in-memory index structures and enables steady-state high performance, read more below. + +All operations are written and synced to the [transaction log](/en/content/proton#transaction-log). This is sequential (not random) IO, but might impact overall feed performance if running on NAS attached storage where the sync operation has a much higher cost than on local attached storage (e.g., SSD). See [sync-transactionlog](/en/reference/applications/services/content#sync-transactionlog). + +By default, proton will [flush components](/en/reference/applications/services/content#flush-on-shutdown) like attribute vectors and memory index on shutdown, for quicker startup after scheduled restarts. + +## Document store + +Documents are stored as compressed serialized blobs in the *document store*. Put, update and remove operations are persisted in the [transaction log](#transaction-log) before updating the document in the document store. The operation is acked to the client and the result of the operation is immediately seen in search results. + +Files in the document store are written sequentially, and occur in pairs - example: + +```bash +-rw-r--r-- 1 owner users 4133380096 Aug 10 13:36 1467957947689211000.dat +-rw-r--r-- 1 owner users 71192112 Aug 10 13:36 1467957947689211000.idx +``` + +The [maximum size](/en/reference/applications/services/content#summary-store-logstore-maxfilesize): (in bytes) per .dat file on disk can be set using the following: + +```xml highlight= {9} + + + + + + + + + 8000000000 +``` + +Notes: + +- The files are written in sequence. *proton* starts with one pair and grows it until *maxfilesize*. Once full, a new pair is started. +- This means the pair is immutable, except for the last pair, which is written to. + +- Documents exist in multiple versions in multiple files. Older versions are compacted away when a pair is scheduled to be the new active pair - obsolete versions are removed, leaving only the active document version left in a new file pair - which is the new active pair. +- Read more on implications of setting *maxfilesize* in [proton maintenance jobs](/en/content/proton#document-store-compaction). +- Files are written in [chunks](/en/reference/applications/services/content#summary-store-logstore-chunk), using compression settings. + +## Defragmentation + +[Document store compaction](#document-store-compaction), defragments and sort document store files. It removes stale versions of documents (i.e. old versions of updated documents). It is triggered when the disk bloat of the document store is larger than the total disk usage of the document store times [diskbloatfactor](/en/reference/applications/services/content#flushstrategy-native-total-diskbloatfactor). Refer to [summary tuning](/en/reference/applications/services/content#summary) for details. + +Defragmentation status is best observed by tracking the [max_bucket_spread](/en/reference/operations/metrics/searchnode#content_proton_documentdb_ready_document_store_max_bucket_spread) metric over time. A sawtooth pattern is normal for corpora that change over time. The [document_store_compact](/en/reference/operations/metrics/searchnode#content_proton_documentdb_job_document_store_compact) metric tracks when proton is running the document store compaction job. Compaction settings can be set too tight, in that case, the metric is always, or close to, 1. + +When benchmarking, it is important to set the correct compaction settings, and also make sure that proton has compacted files since (can take hours), and is not actively compacting (*document_store_compact* should be 0 most of the time). + + + +**Note:** + +There is no bucket-compaction across files - documents will not move between files. + + +Optimized reads using chunks + +As documents are clustered within the .dat file, proton optimizes reads by reading larger chunks when accessing documents. When visiting, documents are read in *bucket* order. This is the same order as the defragmentation jobs use. + +The first document read in a visit operation for a bucket will read a chunk from the .dat file into memory. Subsequent document accesses are served by a memory lookup only. The chunk size is configured by [maxsize](/en/reference/applications/services/content#summary-store-logstore-chunk-maxsize): + +```xml highlight= {9} + + + + + + + + + 16384 + + +``` + +There can be 2^22=4M chunks. This sets a minimum chunk size based on *maxfilesize* - e.g. an 8G file can have minimum 2k chunk size. Finally, the bucket size is configured by setting [bucket-splitting](/en/reference/applications/services/content#bucket-splitting): + +```xml highlight= {3} + + + +``` + +The following are the relevant sizing units: + +.dat file size - *maxfilesize*. Larger files give fewer files and so better locality, but compaction requires more memory and more time to complete. chunk size - *maxsize*. Smaller chunks give less wasted IO bytes but more IO operations. bucket size - *bucket-splitting*. Larger buckets give fewer buckets and better locality to nodes and files, but incur more overhead during content layer bucket maintenance operations. Overhead can be treated as linear in both CPU, memory and network usage with the bucket size. + +### Memory usage + +The document store has a mapping in memory from local ID (LID) to position in a document store file (.dat). Part of this mapping is persisted in the .idx-file paired to the .dat file. The memory used by the document store is linear with the number of documents and updates to these. + +The metric [content.proton.documentdb.ready.document_store.memory_usage.allocated_bytes](/en/reference/operations/metrics/searchnode#content_proton_documentdb_ready_document_store_memory_usage_allocated_bytes) gives the size in memory - use the [metric API](/en/reference/api/state-v1#state-v1-metrics) to find it. A rule of thumb is 12 bytes per document. + +## Attributes + +[Attribute](/en/content/attributes) fields are in-memory fields used for matching, ranking, sorting and grouping. Each attribute is a separate component that consists of a set of [data structures](/en/content/attributes#data-structures) to store values for that field across all documents in a sub-database. Attributes are managed by the Ready sub-database. Some attributes can also be managed by the Not Ready sub-database, see [high-throughput updates](/en/content/attributes#fast-access) for details. + +## Index + +Index fields are string fields, used for text search. Other field types are [attributes](/en/content/attributes) and [summary fields](/en/querying/document-summaries). + +The Index in the Ready sub-database consists of a memory index and one or more disk indexes. Mutating document operations are applied to the memory index, which is [flushed](#memory-index-flush) regularly. Flushed memory indexes are [merged](#disk-index-fusion) with the primary disk index. + +Proton stores position information in text indices by default, for proximity relevance - `posocc` (below). All the occurrences of a term are stored in the posting list, with its position. This provides superior ranking features, but is somewhat more expensive than just storing a single occurrence per document. For most applications, it is the correct tradeoff, since most of the cost is usually elsewhere and relevance is valuable. + +Applications that only need occurrence information for filtering can use [rank: filter](/en/reference/schemas/schemas#rank) to optimize query performance, using only `boolocc`\-files (below). + +The memory index has a dictionary per index field. This contains all unique words in that field, with mapping to posting lists with position information. The position information is used during ranking, see [nativeRank](/en/ranking/nativerank) for details on how a text match score is calculated. + +The disk index stores the content of each index field in separate folders. Each folder contains: + +- Dictionary. Files: `dictionary.pdat`, `dictionary.spdat`, `dictionary.ssdat`. +- Compressed posting lists with position information. File: `posocc.dat.compressed`. +- Posting lists with only occurrence information (bitvector). These are generated for common words. Files: `boolocc.bdat`, `boolocc.idx`. + +Example: + +```bash +$ pwd +/opt/vespa/var/db/vespa/search/cluster.mycluster/n1/documents/myschema/0.ready/index/index.flush.1/myfield +$ ls -la +total 7632 +drwxr-xr-x 2 org users 145 Oct 29 06:09 . +drwxr-xr-x 74 org users 4096 Oct 29 06:11 .. +-rw-r--r-- 1 org users 4096 Oct 29 06:11 boolocc.bdat +-rw-r--r-- 1 org users 4096 Oct 29 06:11 boolocc.idx +-rw-r--r-- 1 org users 8192 Oct 29 06:11 dictionary.pdat +-rw-r--r-- 1 org users 8192 Oct 29 06:11 dictionary.spdat +-rw-r--r-- 1 org users 4120 Oct 29 06:11 dictionary.ssdat +-rw-r--r-- 1 org users 7778304 Oct 29 06:11 posocc.dat.compressed +``` + +Note that `boolocc`\-files are empty if the number of occurrences is small, like in the example above. + +## Proton maintenance jobs + +The memory and disk data structures used in Proton are periodically optimized by a set of maintenance jobs. These jobs are automatically executed, and some can be tuned in [flush strategy tuning](/en/reference/applications/services/content#flushstrategy). All jobs are described in the table below. + +There is only one instance of each job at a time - e.g., attributes are flushed in sequence. When a job is running, its metric is set to 1 - otherwise 0. Use this to correlate observed performance or resource usage with job runs - see *Run metric* below. + +The *temporary* resources used when jobs are executed are described in *CPU*, *Memory* and *Disk*. The memory and disk usage metrics of components that are optimized by the jobs are described in *Metrics* (with *Metric prefix*). For a list of all available Proton metrics, refer to the searchnode metrics in the [Vespa Metric Set](/en/reference/operations/metrics/vespa-metric-set#searchnode-metrics). Metrics are available at the [Metrics API](/en/operations/metrics). + + +| Job | Description | +| :--- | :--- | +| CPU | Little - one thread flushes to disk | +| Memory | Little - some temporary use | +| Disk | A new file is written too, so 2x the size of an attribute on disk until the old flush file is deleted. | +| Run metric | content.proton.documentdb.job.attribute_flush | +| content.proton.documentdb.[ready|notready].attribute.memory_usage. | +| Metrics | allocated_bytes.average + used_bytes.average + dead_bytes.average + onhold_bytes.average | +| CPU | Little - one thread flushes to disk | +| Memory | Little | +| Disk | Creates a new disk index, size of the memory index. | +| Run metric | content.proton.documentdb.job.memory_index_flush | +| Metric prefix | content.proton.documentdb.index.memory_usage. | +| Metrics | allocated_bytes.average + used_bytes.average + dead_bytes.average + onhold_bytes.average | +| CPU | Multiple threads merge indices, configured as a function of + feeding concurrency - + refer to this for details | +| Memory | Little | +| Disk | Creates a new index while serving from the current: 2x temporary disk usage for the given index. | +| Run metric | content.proton.documentdb.job.disk_index_fusion | +| CPU | Little | +| Memory | Little | +| Disk | Little | +| Run metric | content.proton.documentdb.job.document_store_flush | +| CPU | Little - one thread reads one file, sorts and writes a new file | +| Memory | Holds a document store file in memory plus memory for sorting the file. + Note: This is important on hosts with little memory! + Reduce maxfilesize to increase the number of files and use less temporary memory for compaction. | +| Disk | A new file is written while the current is serving, max temporary usage is 2x. | +| Run metric | content.proton.documentdb.job.document_store_compact | +| Metric prefix | content.proton.documentdb.[ready|notready|removed].document_store. | +| Metrics | disk_usage.average + disk_bloat.average + max_bucket_spread.average + memory_usage.allocated_bytes.average + memory_usage.used_bytes.average + memory_usage.dead_bytes.average + memory_usage.onhold_bytes.average | +| CPU | CPU similar to feeding. + Consumes capacity from the write threads, so has feeding impact | +| Memory | As feeding - e.g., the attribute memory usage and memory index in the ready sub-database will grow | +| Disk | As feeding | +| Run metric | content.proton.documentdb.job.bucket_move | +| CPU | Like feeding - add and remove documents | +| Memory | Little | +| Disk | 0 | +| Run metric | content.proton.documentdb.job.lid_space_compact | +| Metric prefix | content.proton.documentdb.[ready|notready|removed].lid_space. | +| Metrics | lid_limit.last + lid_bloat_factor.average + lid_fragmentation_factor.average | +| CPU | Little | +| Memory | Little | +| Disk | Little | +| Run metric | content.proton.documentdb.job.removed_documents_prune | + + +## Retrieving documents + +Retrieving documents is done by specifying an id to *get*, or use a [selection expression](/en/reference/writing/document-selector-language) to *visit* a range of documents - refer to the [Document API](/en/reference/api/api). Overview: + + +![Retrieving documents](/assets/img/elastic-visit-get.svg) + + +| | | +| :--- | :--- | +| **Get** | When the content node receives a get request, it scans through all the document databases, and for each one, it checks all three sub-databases. Once the document is found, the scan is stopped and the document returned. If the document is found in a Ready sub-database, the document retriever will apply any changes that are stored in the [attributes](/en/content/attributes) before returning the document. | +| **Visit** | A visit request creates an iterator over each candidate bucket. This iterator will retrieve matching documents from all sub-databases of all document databases. As for get, attribute values are applied to document fields in the Ready sub-database. | + +## Queries + +Queries have a separate pathway through the system. They do not use the distributor, nor do they go through the content node persistence threads. They are orthogonal to the elasticity set up by the storage and retrieval described above. How queries move through the system: + + +![Queries](/assets/img/proton-query.svg) + + +A query enters the system through the *QR-server (query rewrite server)* in the [Vespa Container](/en/applications/containers). The QR-server issues one query per document type to the search nodes: + + +| | | +| :--- | :--- | +| **Container** | The Container knows all the document types and rewrites queries as a collection of queries, one for each type. Queries may have a [restrict](/en/reference/api/query#model.restrict) parameter, in which case the container will send the query only to the specified document types. It sends the query to content nodes and collects partial results. It pings all content nodes every second to know whether they are alive, and keeps open TCP connections to each one. If a node goes down, the elastic system will make the documents available on other nodes. | +| **Content node matching** | The *match engine* receives queries and routes them to the right document database based on the document type. The query is passed to the *Ready* sub-database, where the searchable documents are. Based on information stored in the document meta store, the query is augmented with a blocklist that ensures only *active* documents are matched. | + +## /state/v1 API + +Besides the common endpoints documented in the [/state/v1 API reference](/en/reference/api/state-v1), Proton has additional endpoints as part of the /state/v1 API that expose information about the internal state of a search node. This API is available at `http://host:stateport/state/v1/`. + +Run [vespa-model-inspect](/en/reference/operations/self-managed/tools#vespa-model-inspect) to find the JSON HTTP stateport: + +``` +vespa-model-inspect service searchnode +``` + +### Initialization Progress API + +The initialization progress can be found by HTTP GET at `http://host:stateport/state/v1/initialization`. This endpoint becomes available early during initialization of Proton when other endpoints are not yet available. It gives a human-readable overview of the document databases and their attributes being loaded. Note that this is **not** a stable API, and it will expand and change between releases. + +Example `state/v1/initialization`: + +```json expandable +{ + "state": "initializing", + "current_time": "1758873251.933488", + "start_time": "1758873249.715624", + "load": 1, + "replay_transaction_log": 0, + "online": 0, + "dbs": [ + { + "state": "load", + "start_time": "1758873249.936939", + "name": "dbname", + "ready_subdb": { + "loaded_attributes": [ + { + "state": "loaded", + "start_time": "1758873249.941415", + "name": "int_field", + "end_time": "1758873249.942051" + }, + { + "state": "loaded", + "start_time": "1758873249.941498", + "name": "string_field", + "end_time": "1758873249.944647" + } + ], + "loading_attributes": [ + { + "state": "reprocessing", + "start_time": "1758873249.941555", + "name": "tensor_field", + "reprocess_progress": "6.061879", + "reprocess_start_time": "1758873249.993847" + } + ], + "queued_attributes": [ + + ] + } + } + ] +} +``` + +### Custom Component State API + +The custom component status can be found by HTTP GET at `http://host:stateport/state/v1/custom/component`. It gives an overview of the relevant search node components and their internal state. Note that this is **not** a stable API, and it will expand and change between releases. + +Example `state/v1/custom/component`: + +```json expandable +{ + "documentdb": { + "mydoctype": { + "documentType": "mydoctype", + "status": { + "state": "ONLINE", + "configState": "OK" + }, + "documents": { + "active": 10, + "ready": 10, + "total": 10, + "removed": 0 + }, + "url": "http://host:stateport/state/v1/custom/component/documentdb/mydoctype" + } + }, + "threadpools": { + "url": "http://host:stateport/state/v1/custom/component/threadpools" + }, + "matchengine": { + "status": { + "state": "ONLINE" + }, + "url": "http://host:stateport/state/v1/custom/component/matchengine" + }, + "flushengine": { + "url": "http://host:stateport/state/v1/custom/component/flushengine" + }, + "tls": { + "url": "http://host:stateport/state/v1/custom/component/tls" + }, + "hwinfo": { + "url": "http://host:stateport/state/v1/custom/component/hwinfo" + }, + "resourceusage": { + "url": "http://host:stateport/state/v1/custom/component/resourceusage", + "disk": 0.25, + "memory": 0.35, + "attribute_address_space": 0 + }, + "session": { + "search": { + "url": "http://host:stateport/state/v1/custom/component/session/search", + "numSessions": 0 + } + } +} +``` + +Example `state/v1/custom/component/documentdb/mydoctype`: + +```json expandable +{ + "documentType": "mydoctype", + "status": { + "state": "ONLINE", + "configState": "OK" + }, + "documents": { + "active": 10, + "ready": 10, + "total": 10, + "removed": 0 + }, + "subdb": { + "removed": { + "url": "http://host:stateport/state/v1/custom/component/documentdb/mydoctype/subdb/removed" + }, + "ready": { + "url": "http://host:stateport/state/v1/custom/component/documentdb/mydoctype/subdb/ready" + }, + "notready": { + "url": "http://host:stateport/state/v1/custom/component/documentdb/mydoctype/subdb/notready" + } + }, + "threadingservice": { + "url": "http://host:stateport/state/v1/custom/component/documentdb/mydoctype/threadingservice" + }, + "bucketdb": { + "url": "http://host:stateport/state/v1/custom/component/documentdb/mydoctype/bucketdb", + "numBuckets": 1 + }, + "maintenancecontroller": { + "url": "http://host:stateport/state/v1/custom/component/documentdb/mydoctype/maintenancecontroller" + } +} +``` diff --git a/mintlify-docs/en/examples/assets/billion-vector-2vcpu.png b/mintlify-docs/en/examples/assets/billion-vector-2vcpu.png new file mode 100644 index 0000000000..98172aac9e Binary files /dev/null and b/mintlify-docs/en/examples/assets/billion-vector-2vcpu.png differ diff --git a/mintlify-docs/en/examples/assets/billion-vector-8vcpu.png b/mintlify-docs/en/examples/assets/billion-vector-8vcpu.png new file mode 100644 index 0000000000..442eaee987 Binary files /dev/null and b/mintlify-docs/en/examples/assets/billion-vector-8vcpu.png differ diff --git a/mintlify-docs/en/examples/assets/billion-vector-feed-queries.png b/mintlify-docs/en/examples/assets/billion-vector-feed-queries.png new file mode 100644 index 0000000000..14a5ad3914 Binary files /dev/null and b/mintlify-docs/en/examples/assets/billion-vector-feed-queries.png differ diff --git a/mintlify-docs/en/examples/billion-scale-image-search.mdx b/mintlify-docs/en/examples/billion-scale-image-search.mdx new file mode 100644 index 0000000000..d025cbf648 --- /dev/null +++ b/mintlify-docs/en/examples/billion-scale-image-search.mdx @@ -0,0 +1,450 @@ +--- +title: "Billion Scale Image Search" +--- + +This sample application combines two sample applications to implement +cost-efficient large scale image search over multimodal AI powered vector representations; +[text-image-search](https://github.com/vespa-engine/sample-apps/tree/master/text-image-search) and +[billion-scale-vector-search](https://github.com/vespa-engine/sample-apps/tree/master/billion-scale-vector-search). + +## The Vector Dataset +This sample app use the [LAION-5B](https://laion.ai/blog/laion-5b/) dataset, + the biggest open accessible image-text dataset in the world. + +> Large image-text models like ALIGN, BASIC, Turing Bletchly, FLORENCE & GLIDE have +> shown better and better performance compared to previous flagship models like CLIP and DALL-E. +> Most of them had been trained on billions of image-text pairs and unfortunately, no datasets of this size had been openly available until now. +> To address this problem we present LAION 5B, a large-scale dataset for research purposes +> consisting of 5,85B CLIP-filtered image-text pairs. 2,3B contain English language, +> 2,2B samples from 100+ other languages and 1B samples have texts that do not allow a certain language assignment (e.g. names ). + +The LAION-5B dataset was used to train the popular text-to-image generative StableDiffusion model. + + +Note the following about the LAION 5B dataset: + +> Be aware that this large-scale dataset is un-curated. +> Keep in mind that the un-curated nature of the dataset means that collected +> links may lead to strongly discomforting and disturbing content for a human viewer. + + + +The released dataset does not contain image data itself, +but [CLIP](https://openai.com/research/clip) encoded vector representations of the images, +and metadata like `url` and `caption`. + +## Use cases + +The app can be used to implement several use cases over the LAION dataset, or adopted to your large-scale vector dataset: + +- Search with a free text prompt over the `caption` or `url` fields in the LAION dataset using Vespa's standard text-matching functionality. +- CLIP retrieval, using vector search, given a text prompt, search the image vector representations (CLIP ViT-L/14), for example for 'french cat'. +- Given an image vector representation, search for similar images in the dataset. This can for example +be used to take the output image of StableDiffusion to find similar images in the training dataset. + +All this combined using [Vespa's query language](/en/querying/query-language), + and also in combination with filters. + +## Vespa Primitives Demonstrated + +The sample application demonstrates many Vespa primitives: + +- Importing an [ONNX](https://onnx.ai/)-exported version of [CLIP ViT-L/14](https://github.com/openai/CLIP) +for [accelerated inference](https://blog.vespa.ai/stateful-model-serving-how-we-accelerate-inference-using-onnx-runtime/) +in [Vespa stateless](/en/learn/overview) containers. +The exported CLIP model encodes a free-text prompt to a joint image-text embedding space with 768 dimensions. +- [HNSW](/en/querying/approximate-nn-hnsw) indexing of vector centroids drawn +from the dataset, and combination with classic Inverted File as described in +[Billion-scale vector search using hybrid HNSW-IF](https://blog.vespa.ai/vespa-hybrid-billion-scale-vector-search/). +- Decoupling of vector storage and vector similarity computations. The stateless layer performs vector +similarity computation over the full precision vectors. +By using Vespa's support for accelerated inference with [onnxruntime](https://onnxruntime.ai/), +moving the majority of the vector compute to the stateless layer +allows for faster auto-scaling with daily query volume changes. +The full precision vectors are stored in Vespa's summary log store, using lossless compression (zstd). +- Dimension reduction with PCA - The centroid vectors are compressed from 768 dimensions to 128 dimensions. This allows indexing 6x more +centroids on the same instance type due to the reduced memory footprint. With Vespa's support for distributed search, coupled with powerful +high memory instances, this allows Vespa to scale cost efficiently to trillion-sized vector datasets. +- The trained PCA matrix and matrix multiplication which projects the 768-dim vectors to 128-dimensions is +evaluated in Vespa using accelerated inference, both at indexing time and at query time. The PCA weights are represented also using ONNX. +- Phased ranking. +The image embedding vectors are also projected to 128 dimensions, stored using +memory mapped [paged attribute tensors](/en/content/attributes#paged-attributes). +Full precision vectors are on stored on disk in Vespa summary store. +The first-phase coarse search ranks vectors in the reduced vector space, per node, and results are merged from all nodes before +the final ranking phase in the stateless layer. +The final ranking phase is implemented in the stateless container layer using [accelerated inference](https://blog.vespa.ai/stateful-model-serving-how-we-accelerate-inference-using-onnx-runtime/). +- Combining approximate nearest neighbor search with [filters](https://blog.vespa.ai/constrained-approximate-nearest-neighbor-search/), filtering +can be on url, caption, image height, width, safety probability, NSFW label, and more. +- Hybrid ranking, both textual sparse matching features and the CLIP similarity, can be used when ranking images. +- Reduced tensor cell precision. The original LAION-5B dataset uses `float16`. The app uses Vespa's support for `bfloat16` tensors, + saving 50% of storage compared to full `float` representation. +- Caching, both reduced vectors (128) cached by the OS buffer cache, and full version 768 dims are cached using Vespa summary cache. +- Query-time vector de-duping and diversification of the search engine result page using document to document similarity instead of query to document similarity. Also +accelerated by stateless model inference. +- Scale, from a single node deployment to multi-node deployment using managed [Vespa Cloud](/), +or self-hosted on-premise. + + +## Stateless Components +The app contains several [container components](/en/applications/components): + +- [RankingSearcher](https://github.com/vespa-engine/sample-apps/tree/master/billion-scale-image-search/app/src/main/java/ai/vespa/examples/searcher/RankingSearcher.java) implements the last stage ranking using +full-precision vectors using an ONNX model for accelerated inference. +- [DedupingSearcher](https://github.com/vespa-engine/sample-apps/tree/master/billion-scale-image-search/app/src/main/java/ai/vespa/examples/searcher/DeDupingSearcher.java) implements run-time de-duping after Ranking, using +document to document similarity matrix, using an ONNX model for accelerated inference. +- [DimensionReducer](https://github.com/vespa-engine/sample-apps/tree/master/billion-scale-image-search/app/src/main/java/ai/vespa/examples/DimensionReducer.java) PCA dimension reducing vectors from 768-dims to 128-dims. +- [AssignCentroidsDocProc](https://github.com/vespa-engine/sample-apps/tree/master/billion-scale-image-search/app/src/main/java/ai/vespa/examples/docproc/AssignCentroidsDocProc.java) searches the HNSW graph content cluster +during ingestion to find the nearest centroids of the incoming vector. +- [SPANNSearcher](https://github.com/vespa-engine/sample-apps/tree/master/billion-scale-image-search/app/src/main/java/ai/vespa/examples/searcher/SPANNSearcher.java) + +## Deploying this app +These reproducing steps, demonstrates the app using a smaller subset of the LAION-5B vector dataset, suitable +for playing around with the app on a laptop. + +**Requirements:** + +- [Docker](https://www.docker.com/) Desktop installed and running. 6GB available memory for Docker is recommended. + Refer to [Docker memory](/en/operations/self-managed/docker-containers#memory) + for details and troubleshooting +- Alternatively, deploy using [Vespa Cloud](#deployment-note) +- Operating system: Linux, macOS or Windows 10 Pro (Docker requirement) +- Architecture: x86_64 or arm64 +- [Homebrew](https://brew.sh/) to install [Vespa CLI](/en/clients/vespa-cli), or download + a vespa cli release from [GitHub releases](https://github.com/vespa-engine/vespa/releases). +- Java 17 installed. +- Python3 and numpy to process the vector dataset +- [Apache Maven](https://maven.apache.org/install.html) - this sample app uses custom Java components and Maven is used to build the application. + +Verify Docker Memory Limits: + + +```bash +$ docker info | grep "Total Memory" +or +$ podman info | grep "memTotal" +``` + + +Install [Vespa CLI](/en/clients/vespa-cli): + +```bash +$ brew install vespa-cli +``` + + +For local deployment using docker image: + +```bash +$ vespa config set target local +``` + + +Use the [multi-node high availability](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) +template for inspiration for multi-node, on-premise deployments. + +Pull and start the vespa docker container image: + +```bash +$ docker pull vespaengine/vespa +$ docker run --detach --name vespa --hostname vespa-container \ + --publish 127.0.0.1:8080:8080 --publish 127.0.0.1:19071:19071 \ + vespaengine/vespa +``` + + +Verify that the configuration service (deploy api) is ready: + +```bash +$ vespa status deploy --wait 300 ./app +``` + + +Download this sample application: + +```bash +$ vespa clone billion-scale-image-search myapp && cd myapp +``` + +Setup: + + + +**Create a [tenant](/en/learn/tenant-apps-instances) on Vespa Cloud:** + + Go to [console.vespa-cloud.com](https://console.vespa-cloud.com/) and create your tenant (unless you already have one). + + +**Install the [Vespa CLI](/en/clients/vespa-cli)** using [Homebrew](https://brew.sh/): +```bash +$ brew install vespa-cli +``` + Windows/No Homebrew? See the [Vespa CLI page](/en/clients/vespa-cli) to download directly. + + +**Configure the Vespa client:** +```bash +$ vespa config set target cloud +$ vespa config set application vespa-team.autotest +``` + Use the tenant name from step 1 instead of "vespa-team", and replace in other steps in this example guide, too. + + +**Get Vespa Cloud control plane access:** +```bash +$ vespa auth login +``` + Follow the instructions from the command to authenticate. + + +**Clone a sample [application](/en/basics/applications):** +```bash +$ vespa clone billion-scale-image-search myapp && cd myapp +``` + See [sample-apps](https://github.com/vespa-engine/sample-apps) for other sample apps you can clone. + + +**Add a certificate for [data plane access](/en/security/guide#data-plane) to the application:** +```bash +$ vespa auth cert app +``` + It is a good idea to take note of the path to the `.pem` files written here. + + + +## Download Vector + Metadata + +These instructions use the first split file (0000) of a total of 2314 files in the LAION2B-en split. +Download the vector data file: + + +```bash +$ curl --http1.1 -L -o img_emb_0000.npy \ + https://the-eye.eu/public/AI/cah/laion5b/embeddings/laion2B-en/img_emb/img_emb_0000.npy +``` + + +Download the metadata file: + + +```bash +$ curl -L -o metadata_0000.parquet \ + https://the-eye.eu/public/AI/cah/laion5b/embeddings/laion2B-en/laion2B-en-metadata/metadata_0000.parquet +``` + + +Install python dependencies to process the files: + + +```bash +$ python3 -m pip install pandas numpy requests mmh3 pyarrow +``` + + +Generate centroids, this process randomly selects vectors from the dataset to represent +centroids. Performing an incremental clustering can improve vector search recall and allow +indexing fewer centroids. For simplicity, this tutorial uses random sampling. + + +```bash +$ python3 app/src/main/python/create-centroid-feed.py img_emb_0000.npy > centroids.jsonl +``` + + +Generate the image feed, this merges the embedding data with the metadata and creates a Vespa +jsonl feed file, with one json operation per line. + + +```bash +$ python3 app/src/main/python/create-joined-feed.py metadata_0000.parquet img_emb_0000.npy > feed.jsonl +``` + + +To process the entire dataset, we recommend starting several processes, each operating on separate split files +as the processing implementation is single-threaded. + + +## Build and deploy Vespa app + +`src/main/application/models` has three small ONNX models: + +- `vespa_innerproduct_ranker.onnx` for vector similarity (inner dot product) between the query and the vectors +in the stateless container. +- `vespa_pairwise_similarity.onnx` for matrix multiplication between the top retrieved vectors. +- `pca_transformer.onnx` for dimension reduction, projecting the 768-dim vector space to a 128-dimensional space. + +These `ONNX` model files are generated by specifying the compute operation using [pytorch](https://pytorch.org/) and using `torch`'s +ability to export the model to [ONNX](https://onnx.ai/) format: + +- [ranker_export.py](https://github.com/vespa-engine/sample-apps/tree/master/billion-scale-image-search/app/src/main/python/ranker_export.py) +- [similarity_export.py](https://github.com/vespa-engine/sample-apps/tree/master/billion-scale-image-search/app/src/main/python/similarity_export.py) +- [pca_transformer_export.py](https://github.com/vespa-engine/sample-apps/tree/master/billion-scale-image-search/app/src/main/python/pca_transformer_export.py) + +Build the sample app (make sure you have JDK 17, verify with `mvn -v`): - This step +also downloads a pre-exported ONNX model for mapping the prompt text to the CLIP vector embedding space. + + +```bash +$ mvn clean package -U -f app +``` + + +Deploy the application. This step deploys the application package built in the previous step: + + +```bash +$ vespa deploy --wait 300 ./app +``` + + +#### Deployment note +It is possible to deploy this app to +[Vespa Cloud](/en/basics/deploy-an-application-java). +For Vespa cloud deployments to the [dev env](/en/operations/zones) +replace the [src/main/application/services.xml](https://github.com/vespa-engine/sample-apps/tree/master/billion-scale-image-search/app/src/main/application/services.xml) with +[src/main/application/services-cloud.xml](https://github.com/vespa-engine/sample-apps/tree/master/billion-scale-image-search/app/src/main/application/services-cloud.xml) - +the cloud deployment uses dedicated clusters for `feed` and `query`. + +Wait for the application endpoint to become available: + + +```bash +$ vespa status --wait 300 +``` + + +Run [Vespa System Tests](/en/reference/applications/testing), +which runs a set of basic tests to verify that the application is working as expected: + +```bash +$ vespa test app/src/test/application/tests/system-test/feed-and-search-test.json +``` + + +The _centroid_ vectors **must** be indexed first: + + +```bash +$ vespa feed centroids.jsonl +$ vespa feed feed.jsonl +``` + + +Track number of documents while feeding: + + +```bash +$ vespa query 'yql=select * from image where true' \ + hits=0 \ + ranking=unranked +``` + + + +## Fetching data + +Fetch a single document using [document api](/en/reference/api/document-v1): + + +```bash +$ vespa document get \ + id:laion:image::5775990047751962856 +``` + + +The response contains all fields, including the full vector representation and the +reduced vector, plus all the metadata. Everything represented in the same +[schema](https://github.com/vespa-engine/sample-apps/tree/master/billion-scale-image-search/app/src/main/application/schemas/image.sd). + + +## Query the data +The following provides a few query examples, +`prompt` is a run-time query parameter which is used by the +[CLIPEmbeddingSearcher](https://github.com/vespa-engine/sample-apps/tree/master/billion-scale-image-search/app/src/main/java/ai/vespa/examples/searcher/CLIPEmbeddingSearcher.java) +which will encode the prompt text into a CLIP vector representation using the embedded CLIP model: + + +```bash +$ vespa query \ + 'yql=select documentid, caption, url, height, width from image where nsfw contains "unlikely"'\ + 'hits=10' \ + 'prompt=two dogs running on a sandy beach' +``` + + +Results are filtered by a constraint on the `nsfw` field. Note that even if the image is classified +as `unlikely` the image content might still be explicit as the NSFW classifier is not 100% accurate. + +The returned images are ranked by CLIP similarity (The score is found in each hit's `relevance` field). + +The following query adds another filter, restricting the search so that only images crawled from urls with `shutterstock.com` +is retrieved. + + +```bash +$ vespa query \ + 'yql=select documentid, caption, url, height, width from image where nsfw contains "unlikely" and url contains "shutterstock.com"'\ + 'hits=10' \ + 'prompt=two dogs running on a sandy beach' +``` + + +Another restricting the search further, adding a phrase constraint `caption contains phrase("sandy", "beach")`: + + +```bash +$ vespa query \ + 'yql=select documentid, caption, url, height, width from image where nsfw contains "unlikely" and url contains "shutterstock.com" and caption contains phrase("sandy", "beach")'\ + 'hits=10' \ + 'prompt=two dogs running on a sandy beach' +``` + + +Regular query, matching over the `default` fieldset, searching the `caption` and the `url` field, ranked by +the `text` ranking profile: + + +```bash +$ vespa query \ + 'yql=select documentid, caption, url from image where nsfw contains "unlikely" and userQuery()'\ + 'hits=10' \ + 'query=two dogs running on a sandy beach' \ + 'ranking=text' +``` + + +The `text` [rank](/en/basics/ranking) profile uses +[nativeRank](/en/ranking/nativerank), one of Vespa's many +text matching rank features. + +## Non-native hyperparameters +There are several non-native query request +parameters that controls the vector search accuracy and performance tradeoffs. These +can be set with the request, e.g, `/search/&spann.clusters=12`. + +- `spann.clusters`, default `64`, the number of centroids in the reduced vector space used to restrict the image search. +A higher number improves recall, but increases computational complexity and disk reads. +- `rank-count`, default `1000`, the number of vectors that are fully re-ranked in the container using the full vector representation. +A higher number improves recall, but increases the computational complexity and network. +- `collapse.enable`, default `true`, controls de-duping of the top ranked results using image to image similarity. +- `collapse.similarity.max-hits`, default `1000`, the number of top-ranked hits to perform de-duping of. Must be less than `rank-count`. +- `collapse.similarity.threshold`, default `0.95`, how similar a given image to image must be before it is considered a duplicate. + +## Areas of improvement +There are several areas that could be improved. + +- CLIP model. The exported text transformer model uses fixed sequence length (77), this wastes computations and makes +the model a lot slower than it has to be for shorter sequence lengths. A dynamic sequence length would +make encoding short queries a lot faster than the current model. +It would also be interesting to use the text encoder as a teacher and train a smaller distilled model using a different architecture (for example based on smaller MiniLM models). +- CLIP query embedding caching. The CLIP model is fixed and only uses the text input. Caching the map from text to +embedding would save resources. + +## Shutdown and remove the container: + + +```bash +$ vespa destroy --force +``` + + diff --git a/mintlify-docs/en/examples/billion-scale-vector-search.mdx b/mintlify-docs/en/examples/billion-scale-vector-search.mdx new file mode 100644 index 0000000000..7439018cbc --- /dev/null +++ b/mintlify-docs/en/examples/billion-scale-vector-search.mdx @@ -0,0 +1,290 @@ +--- +title: "SPANN Billion Scale Vector Search" +--- + + +The SPANN (Space Partitioned ANN) approach for approximate nearest neighbor search is described in +[SPANN: Highly-efficient Billion-scale Approximate Nearest Neighbor Search](https://arxiv.org/abs/2111.08566). +SPANN uses a hybrid combination of graph and inverted index methods for approximate nearest neighbor search. + +We recommend you read [Billion-scale vector search using hybrid HNSW-IF](https://blog.vespa.ai/vespa-hybrid-billion-scale-vector-search/) +for details on how SPANN is implemented using Vespa, before running this example application. +Excerpt: + +> SPANN searches for the k closest centroid vectors of the query vector in the in-memory ANN search data structure. +> Then, it reads the k associated posting lists for the retrieved centroids +> and computes the distance between the query vector and the vector data read from the posting list: + + +![](https://blog.vespa.ai/assets/2022-06-07-vespa-spann-billion-scale-vector-search/spann-posting-lists.excalidraw.png) + + +This sample application demonstrates how to represent SPANN using Vespa. + +Setup: + + + +**Create a [tenant](/en/learn/tenant-apps-instances) on Vespa Cloud:** + + Go to [console.vespa-cloud.com](https://console.vespa-cloud.com/) and create your tenant (unless you already have one). + + +**Install the [Vespa CLI](/en/clients/vespa-cli)** using [Homebrew](https://brew.sh/): +```bash +$ brew install vespa-cli +``` + Windows/No Homebrew? See the [Vespa CLI page](/en/clients/vespa-cli) to download directly. + + +**Configure the Vespa client:** +```bash +$ vespa config set target cloud +$ vespa config set application vespa-team.autotest +``` + Use the tenant name from step 1 instead of "vespa-team", and replace in other steps in this example guide, too. + + +**Get Vespa Cloud control plane access:** +```bash +$ vespa auth login +``` + Follow the instructions from the command to authenticate. + + +**Clone a sample [application](/en/basics/applications):** +```bash +$ vespa clone billion-scale-vector-search myapp && cd myapp +``` + See [sample-apps](https://github.com/vespa-engine/sample-apps) for other sample apps you can clone. + + +**Add a certificate for [data plane access](/en/security/guide#data-plane) to the application:** +```bash +$ vespa auth cert app +``` + It is a good idea to take note of the path to the `.pem` files written here. + + + +## Download Vector Data +This sample app uses the Microsoft SPACEV vector dataset from [big-ann-benchmarks.com](https://big-ann-benchmarks.com/). +It uses the first 10M vectors of the 100M slice sample. +This sample file is about 1GB (10M vectors): + +```bash +$ curl -L -o spacev10m_base.i8bin \ + https://data.vespa-cloud.com/sample-apps-data/spacev10m_base.i8bin +``` + + +Install dependencies and create the feed files for the first 10M vectors from the 100M sample: + +```bash +$ pip3 install numpy requests tqdm +``` + + +```bash +$ python3 app/src/main/python/create-vespa-feed.py spacev10m_base.i8bin +``` + +Output: +* `graph-vectors.jsonl` +* `if-vectors.jsonl` + + +## Build and deploy Vespa app +Build the application: + +```bash +$ mvn clean package -U -f app +``` + + +Deploy the application: + +```bash +$ vespa deploy --wait 900 ./app +``` + + +Wait for the application endpoint to become available: + +```bash +$ vespa status --wait 300 +``` + + +Test [basic functionality](https://github.com/vespa-engine/sample-apps/blob/master/billion-scale-vector-search/app/src/test/application/tests/system-test/feed-and-search-test.json): + +```bash +$ vespa test app/src/test/application/tests/system-test/feed-and-search-test.json +``` + +See [CD tests](/en/operations/automated-deployments#cd-tests) for details. + +## Feed data +The _graph_ vectors must be feed before the _if_ vectors: + +```bash +$ vespa feed graph-vectors.jsonl +``` + + +```bash +$ vespa feed if-vectors.jsonl +``` + + +Now is a good time to open the +Vespa Cloud Dashboard +to track progress. + +Refer to [<resources>](https://github.com/vespa-engine/sample-apps/blob/master/billion-scale-vector-search/app/src/main/application/services.xml) +configuration to manage the feeding speed - more CPU is better, e.g.: +``` + +``` +Use the [instance type reference](/en/performance/instance-types/aws-instance-types) to find good combinations. +Run time for a 2 VCPU deployment vs. 8 VCPU: + + +![duration 2vcpu](/en/examples/assets/billion-vector-2vcpu.png) + +
+ +![duration 8vcpu](/en/examples/assets/billion-vector-8vcpu.png) + + +Observe the feed and query phases (below) of this guide: + + +![feed and queries](/en/examples/assets/billion-vector-feed-queries.png) + + +## Recall Evaluation +Download the query vectors and the ground truth for the 10M first vectors: + +```bash +$ curl -L -o query.i8bin \ + https://github.com/microsoft/SPTAG/raw/main/datasets/SPACEV1B/query.bin +$ curl -L -o spacev10m_gt100.i8bin \ + https://data.vespa-cloud.com/sample-apps-data/spacev10m_gt100.i8bin +``` + +Find the path to the credentials from the `vespa auth cert` step above, like + +```txt +/Users/username/.vespa/tenant_name.autotest.default/data-plane-public-cert.pem +``` + +Replace the two filenames in the command below. +(This is not needed when running a [local test](#local-test-with-oci-image)) + +Run first 1K queries and evaluate recall@10. A higher number of clusters gives higher recall: + +```bash +$ ENDPOINT=$(vespa status --format=plain) +$ python3 app/src/main/python/recall.py \ + --endpoint ${ENDPOINT}/search/ \ + --query_file query.i8bin \ + --query_gt_file spacev10m_gt100.i8bin \ + --certificate $PWD/../.vespa/vespa-team.autotest.default/data-plane-public-cert.pem \ + --key $PWD/../.vespa/vespa-team.autotest.default/data-plane-private-key.pem +``` + + +See the [blog post](https://blog.vespa.ai/vespa-hybrid-billion-scale-vector-search/#hnsw-if-accuracy) +for details about this script. + + +```bash +$ vespa destroy --force +``` + + + +## Local test with OCI image + + + +**Prerequisites:** + +- Linux, macOS or Windows 10 Pro on x86\_64 or arm64, with [Podman Desktop](https://podman.io/) or [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed, with an engine running. + - Alternatively, start the Podman daemon: + + ```bash + $ podman machine init --memory 6000 + $ podman machine start + ``` + - See [Docker Containers](/en/operations/self-managed/docker-containers.html) for system limits and other settings. +- For CPUs older than Haswell (2013), see [CPU Support](/en/operations/self-managed/cpu-support). +- Memory: Minimum 4 GB RAM dedicated to Docker/Podman. [Memory recommendations](/en/operations/self-managed/node-setup#memory-settings). +- Disk: Avoid `NO_SPACE` - the vespaengine/vespa container image + headroom for data requires disk space. [Read more](/en/writing/feed-block). +- [Homebrew](https://brew.sh/) to install the [Vespa CLI](/en/clients/vespa-cli), or download the Vespa CLI from [Github releases](https://github.com/vespa-engine/vespa/releases). +- [Java 17](https://openjdk.org/projects/jdk/17/). +- [Apache Maven](https://maven.apache.org/install.html) is used to build the application. + + + + +Verify memory Limits: + +```bash +$ docker info | grep "Total Memory" +``` +or + +```bash +$ podman info | grep "memTotal" +``` + + +Install [Vespa CLI](../clients/vespa-cli.html): + +```bash +$ brew install vespa-cli +``` + + +For local deployment: + +```bash +$ vespa config set target local +``` + + +Download this sample application: + +```bash +$ vespa clone billion-scale-vector-search myapp && cd myapp +``` + + +Pull and start the Vespa image: + +```bash +$ docker pull vespaengine/vespa +$ docker run --detach --name vespa --hostname vespa-container \ + --publish 127.0.0.1:8080:8080 --publish 127.0.0.1:19071:19071 \ + vespaengine/vespa +``` + + +Verify that the configuration service (deploy api) is ready: + +```bash +$ vespa status deploy --wait 300 +``` + + +At this point, you can continue the guide from [download vector data](#download-vector-data). + +### Cleanup +When done, remove the container: + +```bash +$ docker rm -f vespa +``` + diff --git a/mintlify-docs/en/examples/rag-blueprint.mdx b/mintlify-docs/en/examples/rag-blueprint.mdx new file mode 100644 index 0000000000..5a9ae41949 --- /dev/null +++ b/mintlify-docs/en/examples/rag-blueprint.mdx @@ -0,0 +1,110 @@ +--- +title: "The RAG Blueprint" +--- + +Vespa is the [platform of choice](https://blog.vespa.ai/perplexity-builds-ai-search-at-scale-on-vespa-ai/) +for large scale RAG applications like Perplexity. +It gives you all the features you need but putting them all together can be a challenge. + +This open source sample applications contains all the elements you need to create a RAG application that + +* delivers state-of-the-art quality, and +* scales to any amount of data, query load, and complexity. + +This README provides the steps to create and run your own application based on the blueprint. +Refer to the [RAG Blueprint tutorial](/en/learn/tutorials/rag-blueprint.html) for more in-depth explanations, +or try out the [Python notebook](https://vespa-engine.github.io/pyvespa/examples/rag-blueprint-vespa-cloud.html). + +Setup: + + + +**Create a [tenant](/en/learn/tenant-apps-instances) on Vespa Cloud:** + Go to [console.vespa-cloud.com](https://console.vespa-cloud.com/) and create your tenant (unless you already have one). + + +**Install the [Vespa CLI](/en/clients/vespa-cli)** using [Homebrew](https://brew.sh/): +```bash +$ brew install vespa-cli +``` + Windows/No Homebrew? See the [Vespa CLI page](/en/clients/vespa-cli) to download directly. + + +**Configure the Vespa client:** +```bash +$ vespa config set target cloud +$ vespa config set application vespa-team.autotest +``` + Use the tenant name from step 1 instead of "vespa-team", and replace in other steps in this example guide, too. + + +**Get Vespa Cloud control plane access:** +```bash +$ vespa auth login +``` + Follow the instructions from the command to authenticate. + + +**Clone a sample [application](/en/basics/applications):** +```bash +$ vespa clone rag-blueprint myapp && cd myapp +``` +See [sample-apps](https://github.com/vespa-engine/sample-apps) for other sample apps you can clone. + + +**Add a certificate for [data plane access](/en/security/guide#data-plane) to the application:** +```bash +$ vespa auth cert app +``` +It is a good idea to take note of the path to the `.pem` files written here. + + + + +## Test the application + + +```bash +$ vespa deploy --wait 900 ./app +``` + + +Feed some documents, this will also chunk and embed so it takes about 3 minutes: + + +```bash +$ vespa feed dataset/docs.jsonl +``` + + +Now you can issue queries: + + +```bash +$ vespa query 'query=yc b2b sales' +``` + + + +```bash +$ vespa destroy --force +``` + + +**TIP:** + +Add "-v" to see the HTTP request this becomes. + + +Congratulations! You have now created a RAG application that can scale to billions of documents and thousands +of queries per second, while delivering state-of-the-art quality. + +## Explore more + +What do you want to do next? + +- To learn what this application can do, look at the files in your app/ dir. +- [Run your application locally using Docker](https://github.com/vespa-engine/sample-apps/blob/master/rag-blueprint/deploy-locally.md) +- [Using query profiles to define behavior for different use cases](https://github.com/vespa-engine/sample-apps/blob/master/rag-blueprint/query-profiles.md) +- [Evaluate and improve relevance of the data returned](https://github.com/vespa-engine/sample-apps/blob/master/rag-blueprint/relevance.md) +- [Do LLM generation inside the application](https://github.com/vespa-engine/sample-apps/blob/master/rag-blueprint/relevance.md) diff --git a/mintlify-docs/en/learn/about-documentation.mdx b/mintlify-docs/en/learn/about-documentation.mdx new file mode 100644 index 0000000000..5fb452635e --- /dev/null +++ b/mintlify-docs/en/learn/about-documentation.mdx @@ -0,0 +1,68 @@ +--- +title: About this documentation +description: "The Vespa documentation [https://docs.vespa.ai](https://docs.vespa.ai) provides all the information required to use all Vespa features and deploy them in any supported environment." +--- + +It is split into guides and tutorials, which explains features and how to use them to solve problems, and reference documentation which list complete information about all features and APIs. + +## Applicability + +The Vespa platform is open source, and can be deployed in self-managed systems and on the Vespa Cloud service. Some add-ons (but no core functionality) are only available under a commercial license. + +Documents that describe functionality with such limited applicability are clearly marked by one or more of the following chips: + +| | | +| :--- | :--- | +| **Vespa Cloud** | Only applicable to Vespa Cloud deployments. | +| **Self-managed** | Only applicable to self-managed deployments. | +| **Enterprise** | Not open source: Available commercially only (both self-managed and on cloud unless also marked by one of the other chips above). | + +For clarity, any document *not* marked with any of these chips describes functionality that is open source and available both on Vespa Cloud and self-managed deployments. + +## Contributing + +If you find errors or want to improve the documentation, [create an issue](https://github.com/vespa-engine/vespa/issues) or [contribute a fix](/en/learn/contributing). See the [README](https://docs.vespa.ai/README.md) before contributing. + +## Notation + +*Italic* is used for: + +- Pathnames, filenames, program names, hostnames, and URLs +- New terms where they are defined + +`Constant Width` is used for: + +- Programming language elements, code examples, keywords, functions, classes, interfaces, methods, etc. +- Commands and command-line output + +Commands meant to be run on the command line are shown like this, prepended by a $ for the prompt: + +```bash +$ export PATH=$VESPA_HOME/bin:$PATH # how to highlight text in pre +``` + +Notes and other Important pieces of information are shown like: + + +**Note:** + +Some info here + + + +**Important:** + +Important info here + + + +**Warning:** + +Warning here + + + +**Deprecation:** + +Deprecation warning here + diff --git a/mintlify-docs/en/learn/contributing.mdx b/mintlify-docs/en/learn/contributing.mdx new file mode 100644 index 0000000000..476b071522 --- /dev/null +++ b/mintlify-docs/en/learn/contributing.mdx @@ -0,0 +1,28 @@ +--- +title: Contributing to Vespa +description: "Contributions to [Vespa](https://github.com/vespa-engine/vespa) and the [Vespa documentation](https://github.com/vespa-engine/documentation) are welcome. This document tells you what you need to know to contribute." +--- + +## Open development + +All work on Vespa happens directly on GitHub, using the [GitHub flow model](https://docs.github.com/en/get-started/quickstart/github-flow). We release the master branch a few times a week, and you should expect it to almost always work. In addition to the [builds seen on factory.vespa.ai](https://factory.vespa.ai) we have a large acceptance and performance test suite which is also run continuously. + +### Pull requests + +All pull requests are reviewed by a member of the Vespa Committers team. You can find a suitable reviewer in the OWNERS file upward in the source tree from where you are making the change (the OWNERS have a special responsibility for ensuring the long-term integrity of a portion of the code). If you want to become a committer/OWNER making some quality contributions is the way to start. + +We require all pull request checks to pass. + +## Versioning + +Vespa uses semantic versioning - see [vespa versions](/en/learn/releases). Notice in particular that any Java API in a package having a @PublicAPI annotation in the package-info file cannot be changed in an incompatible way between major versions: Existing types and method signatures must be preserved (but can be marked deprecated). + +## Issues + +We track issues in [GitHub issues](https://github.com/vespa-engine/vespa/issues). It is fine to submit issues also for feature requests and ideas, whether you intend to work on them or not. + +There is also a [ToDo list](https://github.com/vespa-engine/vespa/blob/master/TODO.md) for larger things which no one are working on yet. + +## Community + +If you have questions, want to share your experience or help others, please join our community on the [Vespa Slack](https://slack.vespa.ai), or see Vespa on [Stack Overflow](http://stackoverflow.com/questions/tagged/vespa). diff --git a/mintlify-docs/en/learn/faq.mdx b/mintlify-docs/en/learn/faq.mdx new file mode 100644 index 0000000000..6a53f10e4f --- /dev/null +++ b/mintlify-docs/en/learn/faq.mdx @@ -0,0 +1,749 @@ +--- +title: "FAQ - frequently asked questions" +sidebarTitle: "Frequently asked questions" +description: "Refer to Vespa Support for more support options." +--- + +## Ranking + + + +[Ranking](/en/basics/ranking) is maybe the primary Vespa feature - we like to think of it as scalable, online computation. A rank profile is where the application's logic is implemented, supporting simple types like `double` and complex types like `tensor`. Supply ranking data in queries in query features (e.g. different weights per customer), or look up in a [Searcher](/en/applications/searchers). Typically, a document (e.g. product) "feature vector"/"weights" will be compared to a user-specific vector (tensor). + + + +Vespa doesn't have specific support for storing customer data as such. You can store this data as a separate document type in Vespa and look it up before passing the query, or store this customer meta-data as part of the other meta-data for the customer (i.e. login information) and pass it along the query when you send it to the backend. Find an example on how to look up data in [album-recommendation-docproc](https://github.com/vespa-engine/sample-apps/tree/master/examples/document-processing). + + + +Create a tensor in the ranking function from arrays or weighted sets using `tensorFrom...` functions - see [document features](/en/reference/ranking/rank-features#document-features). + + + +Pass a ranking feature like `query(threshold)` and use an `if` statement in the ranking expression - see [retrieval and ranking](/en/ranking/ranking-intro#retrieval-and-ranking). Example: + +```txt +rank-profile drop-low-score { + function my_score() { + expression: ..... #custom first phase score + } + rank-score-drop-limit:0.0 + first-phase { + if(my_score() < query(threshold), -1, my_score()) + } +} +``` + + + +Rank expressions are not evaluated lazily. No, this would require lambda arguments. Only doubles and tensors are passed between functions. + +Example: + +```txt +function inline foo(tensor, defaultVal) { + expression: if (count(tensor) == 0, defaultValue, sum(tensor)) +} + +function bar() { + expression: foo(tensor, sum(tensor1 * tensor2)) +} +``` + + + +Yes, this can be accomplished by configuring [match-phase](/en/reference/schemas/schemas#match-phase) in the rank profile, or by adding a range query item using *hitLimit* to the query tree, see [capped numeric range search](/en/reference/querying/yql#numeric). Both methods require an *attribute* field with *fast-search*. The capped range query is faster, but beware that if there are other restrictive filters in the query, one might end up with 0 hits. The additional filters are applied as a post filtering step over the hits from the capped range query. *match-phase* on the other hand, is safe to use with filters or other query terms, and also supports diversification which the capped range query term does not support. + + + +If a ranking profile produces NaNs or Infinities - which are impossible to represent as a number in JSON - the strings "Infinity" or "-Infinity", (NaN becomes "-Infinity") are returned in result sets, and client libraries might handle that by default (e.g., Golang). + +The returned [relevance](/en/reference/querying/default-result-format#relevance) for a hit can become "-Infinity" instead of a double: + +- The [ranking](/en/basics/ranking) expression used a feature which became `NaN` (Not a Number). For example, `log(0)` would produce `-Infinity`. Use [isNan](/en/reference/ranking/ranking-expressions#isnan-x) to guard against this. +- Surfacing low scoring hits using [grouping](/en/querying/grouping), that is, rendering low ranking hits with `each(output(summary()))` that are outside what Vespa computed and caches on a heap. This is controlled by the [total-keep-rank-count](/en/reference/schemas/schemas#total-keep-rank-count) parameter. +- Using unset fields in the ranking function + +Resolve this by one or more of: + +- Extend the client code to specifically handle these strings +- Make sure the field is set to some value for all documents +- Add a default value for the field when accessing it in your rank profile: `if (isNan(attribute(last_update)), 0, attribute(last_update))` +- Add a final guard in the ranking expressions coercing to some small number, making non-finite scores sink to the bottom while remaining a valid number: + +```txt +function finite_or_sentinel(x) { + expression: if (isNan(x - x), -1e9, x) +} +``` + +- Use CBOR instead of JSON - using a binary format can represent NaNs and Infinities without issues, and it can also be faster/more efficient + + + +To hard-code documents to positions in the result set, see the [pin results example](/en/ranking/multivalue-query-operators#pin-results-example). + + + +## Documents + + + +There is a [maximum document size](/en/reference/applications/services/container#document-api) of 100 MiB, which is configurable per content cluster in services.xml. + + + +No enforced limit, except resource usage (memory). + + + +E.g. a product is offered in a list of stores with a quantity per store. Use [multivalue fields](/en/querying/searching-multivalue-fields) (array of struct) or [parent child](/en/schemas/parent-child). Which one to chose depends on use case, see discussion in the latter link. + + + +E.g. price and quantity available per store may often change vs the actual product attributes. Vespa supports [partial updates](/en/writing/reads-and-writes) of documents. Also, the parent/child feature is implemented to support use-cases where child elements are updated frequently, while a more limited set of parent elements are updated less frequently. + + + +See the [Vespa Consistency Model](/en/content/consistency). Vespa is not transactional in the traditional sense, it doesn't have strict ACID guarantees. Vespa is designed for high performance use-cases with eventual consistency as an acceptable (and to some extent configurable) trade-off. + + + +Wildcard fields are not supported in vespa. Workaround would be to use maps to store the wildcard fields. Map needs to be defined with `indexing: attribute` and hence will be stored in memory. Refer to [map](/en/reference/schemas/schemas#map). + + + +Implement a [document processor](/en/applications/document-processors) for this. + + + +Set a selection criterion on the `document` element in `services.xml`. The criterion selects documents to keep. I.e. to purge documents "older than two weeks", the expression should be "newer than two weeks". Read more about [document expiry](/en/schemas/documents#document-expiry). + + + +Changing redundancy is a live and safe change (assuming there is headroom on disk / memory - e.g. from 2 to 3 is 50% more). The time to migrate will be quite similar to what it took to feed initially - a bit hard to say generally, and depends on IO and index settings, like if building an HNSW index. To monitor progress, take a look at the [multinode](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode) sample application for the _clustercontroller_ status page - this shows buckets pending, live. Finally, use the `.idealstate.merge_bucket.pending` metric to track progress - when 0, there are no more data syncing operations - see [monitor distance to ideal state](/en/operations/self-managed/admin-procedures#monitor-distance-to-ideal-state). Nodes will work as normal during data sync, and query coverage will be the same. + + + +It does not, _namespace_ is a mechanism to split the document space into parts that can be used for document selection - see [documentation](/en/schemas/documents#namespace). The namespace is not indexed and cannot be searched using the query api, but can be used by [visiting](/en/writing/visiting). + + + +There are multiple things that can cause this, see [visiting troubleshooting](/en/writing/visiting#troubleshooting). + + + +Run a query like `vespa query "select * from sources * where true"` and see the `totalCount` field. Alternatively, use metrics or `vespa visit` - see [examples](/en/writing/batch-delete#example). + + + +Not in the field definition, but it's possible to do this with the [choice](/en/writing/indexing#choice-example) expression in an indexing statement. + + + +## Query + + + +Facets is called grouping in Vespa. Groups can be multi-level. + + + +Add filters to the query using [YQL](/en/querying/query-language) using boolean, numeric and [text matching](/en/querying/text-matching). Query terms can be annotated as filters, which means that they are not highlighted when bolding results. + + + +One way is to describe items using tensors and query for the [nearest neighbor](/en/reference/querying/yql#nearestneighbor) - using full precision or approximate (ANN) - the latter is used when the set is too large for an exact calculation. Apply filters to the query to limit the neighbor candidate set. Using [dot products](/en/ranking/multivalue-query-operators) or [weak and](/en/ranking/wand) are alternatives. + + + +Vespa does not have a stop-word concept inherently. See the [sample app](https://github.com/vespa-engine/sample-apps/pull/335/files) for how to use [filter terms](/en/reference/querying/yql#annotations). [Tripling the query performance of lexical search](https://blog.vespa.ai/tripling-the-query-performance-of-lexical-search/) it s good blog post on this subject. + + + +Trying to request more than 400 hits in a query, getting this error: `{'code': 3, 'summary': 'Illegal query', 'message': '401 hits requested, configured limit: 400.'}`. + +- To increase max result set size (i.e. allow a higher [hits](/en/reference/api/query#hits)), configure `maxHits` in a [query profile](/en/reference/api/query#queryprofile), e.g. `500` in `search/query-profiles/default.xml` (create as needed). The [query timeout](/en/reference/api/query#timeout) can be increased, but it will still be costly and likely impact other queries - large limit more so than a large offset. It can be made cheaper by using a smaller [document summary](/en/querying/document-summaries), and avoiding fields on disk if possible. +- Using _visit_ in the [document/v1/ API](/en/writing/document-v1-api-guide) is usually a better option for dumping all the data. + + + +See the [UserProfileSearcher](https://github.com/vespa-engine/sample-apps/blob/master/news/app-6-recommendation-with-searchers/src/main/java/ai/vespa/example/UserProfileSearcher.java) for how to create a new query to fetch data - this creates a new Query, sets a new root and parameters - then `fill`s the Hits. + + + +See the sub-query question above, in addition add something like: +```java expandable +public class ConfigCacheRefresher extends AbstractComponent { + + private final ScheduledExecutorService configFetchService = Executors.newSingleThreadScheduledExecutor(); + private Chain searcherChain; + + void initialize() { + Runnable task = () -> refreshCache(); + configFetchService.scheduleWithFixedDelay(task, 1, 1, TimeUnit.MINUTES); + searcherChain = executionFactory.searchChainRegistry().getChain(new ComponentId("configDefaultProvider")); + } + + public void refreshCache() { + Execution execution = executionFactory.newExecution(searcherChain); + Query query = createQuery(execution); + + public void deconstruct() { + super.deconstruct(); + try { + configFetchService.shutdown(); + configFetchService.awaitTermination(1, TimeUnit.MINUTES); + }catch(Exception e) {..} + } +} +``` + + + +Yes, using the [in query operator](/en/reference/querying/yql#in). Example: +```sql +select * from data where user_id in (10, 20, 30) +``` +The best article on the subject is [multi-lookup set filtering](/en/performance/feature-tuning#multi-lookup-set-filtering). Refer to the [in operator example](/en/ranking/multivalue-query-operators#in-example) on how to use it programmatically in a [Java Searcher](/en/applications/searchers). + + + +Use the [in query operator](/en/reference/querying/yql#in). Example: +```sql +select * from data where category in ('cat1', 'cat2', 'cat3') +``` +See [multi-lookup set filtering](#is-it-possible-to-query-vespa-using-a-list-of-document-ids) above for more details. + + + +Count all documents using a query like [select * from doc where true](/en/querying/query-language) - this counts all documents from the "doc" source. Using `select * from doc where true limit 0` will return the count and no hits, alternatively add [hits=0](/en/reference/api/query#hits). Pass [ranking.profile=unranked](/en/reference/api/query#ranking.profile) to make the query less expensive to run. If an _estimate_ is good enough, use [hitcountestimate=true](/en/reference/api/query#hitcountestimate). + + + +Yes - a deployment warning with _This may lead to recall and ranking issues_ is emitted when fields with conflicting tokenization are put in the same [fieldset](/en/reference/schemas/schemas#fieldset). This is because a given query item searching one fieldset is tokenized just once, so there's no right choice of tokenization in this case. If you have text that you want to apply to multiple fields with different tokenization, include the text multiple times in the query: +```sql +select * from sources * where fieldsetOrField1 contains text(@query) or fieldsetOrField2 contains text(@query) +``` +More details on [stack overflow](https://stackoverflow.com/questions/72784136/why-vepsa-easily-warning-me-this-may-lead-to-recall-and-ranking-issues). + + + +Symptoms — can appear when a term's DF differs substantially between member fields: +- Poor recall for queries mixing a common term with a rare one (e.g. `"the cure"`, `"the X"`). [weakAnd](/en/ranking/wand) may drop the common term, so good matches never surface. +- `term(n).significance` and `fieldMatch(field).significance` read identical across member fields in rank-feature dumps — even in fields where the term is actually rare. + +Cause: when a term matches a [fieldset](/en/reference/schemas/schemas#fieldset) (including the implicit `default` used by [userQuery()](/en/reference/querying/yql#userquery)), Vespa aggregates the document frequency across all member fields. + +If the DF differs substantially between members, the high-DF field dominates and pulls the term's significance down for the whole fieldset. + +**Example:** + +With `fieldset default { fields: title, artist }`, `"the"` is common in `title` (countless _"The Watcher"_, _"The Best Of the ..."_) but rare in `artist`. + +Its aggregated significance is pulled down toward the `title` DF, so searching for the artist `"The Cure"` loses the signal from `"the"`. + +The same aggregated DF drives every DF/IDF feature: [bm25](/en/ranking/bm25), [nativeRank](/en/ranking/nativerank), `term(n).significance`, `fieldMatch.significance`. + +Matches that survive retrieval are scored using the aggregated DF rather than per-field statistics. + +Fix: rewrite as OR'd [userInput](/en/reference/querying/yql#userinput) clauses with a [defaultIndex](/en/reference/querying/yql#defaultindex) annotation per field. Each field then uses its own DF: + +*Combined-fieldset DF:* + +```php +vespa query 'select * from sources * where userQuery()' \ + query='the cure' +``` + +*Per-field DF:* + +```bash +vespa query 'select * from sources * where ({defaultIndex:"title"}userInput(@q)) or ({defaultIndex:"artist"}userInput(@q))' \ + q='the cure' +``` + + +**Important** + +BM25 and significance feature values shift scale when switching to per-field DF. Retrain any learned ranker on features collected with the new query formulation. + + + + +Find query timeout details in the [Query API Guide](/en/querying/query-api#timeout) and the [Query API Reference](/en/reference/api/query#timeout). + + + +Backslash is used to escape special characters in YQL. For example, to query with a literal backslash, which is useful in regexpes, you need to escape it with another backslash: \\. Unescaped backslashes in YQL will lead to "token recognition error at: '\'". + +In addition, Vespa CLI unescapes double backslashes to single (while single backslashes are left alone), so if you query with Vespa CLI you need to escape with another backslash: \\\\. The same applies to strings in Java. + +Also note that both log messages and JSON results escape backslashes, so any \ becomes \\. + + + +E.g. two select queries with slightly different filtering condition and have a limit operator for each of the subquery. This makes it impossible to do via OR conditions to select both collection of documents - something equivalent to: + +SELECT 1 AS x UNION ALL SELECT 2 AS y; + +This isn’t possible, need to run 2 queries. Alternatively, split a single incoming query into two running in parallel in a [Searcher](/en/applications/searchers) - example: + +```java +FutureResult futureResult = new AsyncExecution(settings).search(query); +FutureResult otherFutureResult = new AsyncExecution(settings).search(otherQuery); +``` + + + +There is no index or attribute data structure that allows efficient _searching_ for documents where an array field has a certain number of elements or items. The _grouping language_ has a [size()](/en/reference/querying/grouping-language#list-expressions) operator that can be used in queries. + + + +The [visiting](/en/writing/visiting#analyzing-field-values) API using document selections supports it, with a linear scan over all documents. If the field is an _attribute_ one can query using grouping to identify Nan Values, see count and list [fields with NaN](/en/querying/grouping#count-fields-with-nan). + + + +See the [random.match](/en/reference/ranking/rank-features#random.match) rank feature - example: + +```txt +rank-profile random { + first-phase { + expression: random.match + } +} +``` + +Run queries, seeding the random generator: + +```bash +$ vespa query 'select * from music where true' \ + ranking=random \ + rankproperty.random.match.seed=2 +``` + + + +See [result diversity](/en/querying/result-diversity) for strategies on how to create result sets from different sources. + + + +If you want to search for the most dissimilar items, you can with angular distance multiply your `clip_query_embedding` by the scalar -1. Then you are searching for the points that are closest to the point which is the farthest away from your `clip_query_embedding`. + +Also see a [pyvespa example](https://vespa-engine.github.io/pyvespa/examples/pyvespa-examples#Neighbors). + + + +## Feeding + + + +The best option is to use `--verbose` option, like `vespa feed --verbose myfile.jsonl` - see [documentation](/en/clients/vespa-cli#documents). A common problem is a mismatch in schema names and [document IDs](/en/schemas/documents#document-ids) - a schema like: +```yaml +schema article { + document article { + ... + } +} +``` + +will have a document feed like: + +```json +{"put": "id:mynamespace:article::1234", "fields": { ... }} +``` + +Note that the [namespace](/en/learn/glossary#namespace) is not mentioned in the schema, and the schema name is the same as the document name. + + + +This configuration is a combination of content and container cluster configuration, see [indexing](/en/writing/indexing) and [feed troubleshooting](/en/operations/self-managed/admin-procedures#troubleshooting). + + + +This is often a problem if using [document expiry](/en/schemas/documents#document-expiry), as documents already expired will not be persisted, they are silently dropped and ignored. Feeding stale test data with old timestamps in combination with document-expiry can cause this behavior. + + + +Using too many HTTP clients can generate a 429 response code. The Vespa sample apps use [vespa feed](/en/clients/vespa-cli#documents) which uses HTTP/2 for high throughput - it is better to stream the feed files through this client. + + + +Vespa does not have a Kafka connector. Refer to third-party connectors like [kafka-connect-vespa](https://github.com/vinted/kafka-connect-vespa). + + + +## Text Search + + + +E.g. integrating NER, word sense disambiguation, specific intent detection. Vespa supports these things well: +- [Query (and result) processing](/en/applications/searchers) +- [Document processing](/en/applications/document-processors) and document processors working on semantic annotations of text + + + +E.g. instead of using terms or n-grams as the unit, we might use terms with specific word senses - e.g. bark (dog bark) vs. bark (tree bark), or BCG (company) vs. BCG (vaccine name). Creating a new index *format* means changing the core. However, for the examples above, one just need control over the tokens which are indexed (and queried). That is easily done in some Java code. The simplest way to do this is to plug in a [custom tokenizer](/en/linguistics/linguistics). That gets called from the query parser and bundled linguistics processing [Searchers](/en/applications/searchers) as well as the [Document Processor](/en/applications/document-processors) creating the annotations that are consumed by the indexing operation. Since all that is Searchers and Docprocs which you can replace and/or add custom components before and after, you can also take full control over these things without modifying the platform itself. + + + +It provides the building blocks but not an out-of-the-box solution. We can write a [Searcher](/en/applications/searchers) to detect query-side entities and rewrite the query, and a [DocProc](/en/applications/document-processors) if we want to handle them in some special way on the indexing side. + + + +You can write a document processor for text extraction, Vespa does not provide it out of the box. + + + +[Imported fields](/en/schemas/parent-child) from parent documents are defined as [attributes](/en/content/attributes), and have limited text match modes (i.e. `indexing: index` cannot be used). [Details](https://stackoverflow.com/questions/71936330/parent-child-mode-cannot-be-searched-by-parent-column). + + + +## Semantic search + + + +If you have added vectors to your documents and queries, and see that the rank feature closeness(field, yourEmbeddingField) produces 1.0 for all documents, you are likely using [distance-metric](/en/reference/schemas/schemas#distance-metric): innerproduct/prenormalized-angular, but your vectors are not normalized, and the solution is normally to switch to [distance-metric: angular](/en/reference/schemas/schemas#angular) or use [distance-metric: dotproduct](/en/reference/schemas/schemas#dotproduct) (available from Vespa 8.170.18). + +With non-normalized vectors, you often get negative distances, and those are capped to 0, leading to closeness 1.0. Some embedding models, such as models from sbert.net, claim to output normalized vectors but might not. + + + +## Programming Vespa + + + +Plugins have to run in the JVM - [jython](https://www.jython.org/) might be an alternative, however Vespa Team has no experience with it. Vespa does not have a language like [painless](https://www.elastic.co/guide/en/elasticsearch/reference/current/modules-scripting-painless) - it is more flexible to write application logic in a JVM-supported language, using [Searchers](/en/applications/searchers) and [Document Processors](/en/applications/document-processors). + + + +A [Searcher](/en/applications/searchers) intercepts a query and/or result. To get a number of documents by id in a Searcher or other component like a [Document processor](/en/applications/document-processors), you can have an instance of [com.yahoo.documentapi.DocumentAccess](/en/reference/applications/components#injectable-components) injected and use that to get documents by id instead of the HTTP API. + + + +Vespa uses Java 17 - it will support 20 some time in the future. + + + +Use `System.out.println` to write text to the [vespa.log](/en/reference/operations/log-files). + + + +## Performance + + + +Vespa has a near real-time indexing core with typically sub-second latencies from document ingestion to being indexed. This depends on the use-case, available resources and how the system is tuned. Some more examples and thoughts can be found in the [scaling guide](/en/performance/sizing-search). + + + +Vespa does not have a concept of "batch ingestion" as it contradicts many of the core features that are the strengths of Vespa, including [serving elasticity](/en/content/elasticity) and sub-second indexing latency. That said, we have numerous use-cases in production that do high throughput updates to large parts of the (sometimes entire) document set. In cases where feed throughput is more important than indexing latency, you can tune this to meet your requirements. Some of this is detailed in the [feed sizing guide](/en/performance/sizing-feeding). + + + +Yes. The [content node](/en/content/proton) is implemented in C++ and not memory constrained other than what the operating system does. + + + +If the replicas are in sync the request is only sent to the primary content node. Otherwise, it's sent to several nodes, depending on replica metadata. Example: if a bucket has 3 replicas A, B, C and A & B both have metadata state X and C has metadata state Y, a request will be sent to A and C (but not B since it has the same state as A and would therefore not return a potentially different document). + + + +[Attribute](/en/content/attributes) (with or without `fast-search`) is always in memory, but does not support tokenized matching. It is for structured data. [Index](/en/basics/schemas#document-fields) (where there’s no such thing as fast-search since it is always fast) is in memory to the extent there is available memory and supports tokenized matching. It is for unstructured text. + +It is possible to guarantee that fields that are defined with `index` have both the dictionary and the postings in memory by changing from `mmap` to `populate`, see [index > io > search](/en/reference/applications/services/content#index-io-search). Make sure that the content nodes run on nodes with plenty of memory available, during index switch the memory footprint will 2x. Familiarity with Linux tools like `pmap` can help diagnose what is mapped and if it’s resident or not. + +Fields that are defined with `attribute` are in-memory, fields that have both `index` and `attribute` have separate data structures, queries will use the default mapped on disk data structures that supports `text` matching, while grouping, summary and ranking can access the field from the `attribute` store. + +A Vespa query is executed in two phases as described in [sizing search](/en/performance/sizing-search), and summary requests can touch disk (and also uses `mmap` by default). Due to their potential size there is no populate option here, but one can define [dedicated document summary](/en/querying/document-summaries#performance) containing only fields that are defined with `attribute`. + +The [practical performance guide](/en/performance/practical-search-performance-guide) can be a good starting point as well to understand Vespa query execution, difference between `index` and `attribute` and summary fetching performance. + + + +Deleting documents, by using the [document API](/en/writing/reads-and-writes) or [garbage collection](/en/schemas/documents#document-expiry) will increase the capacity on the content nodes. However, this is not necessarily observable in system metrics - this depends on many factors, like what kind of memory that is released, when [flush](/en/content/proton#proton-maintenance-jobs) jobs are run and document [schema](/en/basics/schemas). + +In short, Vespa is not designed to release memory once used. It is designed for sustained high throughput, low latency, keeping maximum memory used under control using features like [feed block](/en/writing/feed-block). + +When deleting documents, one can observe a slight increase in memory. A deleted document is represented using a [tombstone](/en/operations/self-managed/admin-procedures#content-cluster-configuration), that will later be removed, see [removed-db-prune-age](/en/reference/applications/services/content#removed-db-prune-age). When running garbage collection, the summary store is scanned using mmap and both VIRT and page cache memory usage increases. + +Read up on [attributes](/en/content/attributes) to understand more of how such fields are stored and managed. [Paged attributes](/en/content/attributes#paged-attributes) trades off memory usage vs. query latency for a lower max memory usage. + + + +A field is of type _index_ or _attribute_ - [details](/en/querying/text-matching#index-and-attribute). + +Fields with _index_ use no incremental memory at deployment, if the field has no value. + +Fields with _attribute_ use memory, even if the field value is not set, + +Attributes are optimized for random access: To be able to jump to the value of any document in O(1) time. That requires allocating a constant amount of memory (the value, or a pointer) per document, regardless of whether there is a value. In short, knowing that a value is unset is a value in itself for attributes, so deploying new fields or new schemas with attributes will cause an incremental increase in memory. Applications with many unused schemas and fields can factor this in when sizing for memory. Refer to [attributes](/en/content/attributes#attribute-memory-usage) for details. + + + +[Autoscaling](/en/operations/autoscaling) is the best guide to understand how to size and autoscale the system. Container clusters are stateless and can be autoscaled more quickly than content clusters. + + + +It is not possible to autoscale content clusters for 8x load increase in 5 minutes, as this requires both provisioning and data migration. Such use cases are best discussed with the Vespa Team to understand the resource bottlenecks, tradeoffs and mitigations. Also read [Graceful Degradation](/en/performance/graceful-degradation). + + + +It depends. Vespa aims to adapt to resources (like auto thread config based on virtual node thread count) and actual use (when to run maintenance jobs like compaction), but there are tradeoffs that applications owners can/should make. Start off by reading the [Vespa Serving Scaling Guide](/en/performance/sizing-search), then run [benchmarks](/en/performance/benchmarking-cloud) and use the [dashboards](/en/operations/monitoring). + + + +## Administration + + + +Yes, deployment is using this web service API, which allows you to create an edit session from the currently deployed package, make modifications, and deploy (prepare+activate) it: [deploy-rest-api-v2](/en/reference/api/deploy-v2). However, this is only useful in cases where you want to avoid transferring data to the config server unnecessarily. When you resend everything, the config server will notice that you did not actually change e.g. the node configs and avoid unnecessary noop changes. + + + +[Elasticity](/en/content/elasticity) is a core Vespa strength - easily add and remove nodes with minimal (if any) serving impact. The exact time needed depends on how much data will need to be migrated in the background for the system to converge to [ideal data distribution](/en/content/idealstate). + + + +You will need to load balance incoming requests between the nodes running the [stateless Java container cluster(s)](/en/learn/overview). This can typically be done using a simple network load balancer available in most cloud services. This is included when using [Vespa Cloud](/), with an HTTPS endpoint that is already load balanced - both locally within the region and globally across regions. + + + +[Search sizing](/en/performance/sizing-search) is the intro to this. Topology matters, and this is much used in the high-volume Vespa applications to optimise latency vs. cost. + + + +With [Vespa Cloud](/), we do automated background upgrades daily without noticeable serving impact. If you host Vespa yourself, you can do this, but need to implement the orchestration logic necessary to handle this. The high level procedure is found in [live-upgrade](/en/operations/self-managed/live-upgrade). + + + +[Vespa Cloud](/en/operations/zones) has integrated support - query a global endpoint. Writes will have to go to each zone. There is no auto-sync between zones. + + + +Building indexes offline requires the partition layout to be known in the offline system, which is in conflict with elasticity and auto-recovery (where nodes can come and go without service impact). It is also at odds with realtime writes. For these reasons, it is not recommended, and not supported. + + + +Use [visiting](/en/writing/visiting) to dump all or a subset of the documents. See [data-management-and-backup](/en/operations/data-management) for more information. + + + +Failure response will be given in case the document is not written on some replica nodes. + + + +Yes, it will be available, eventually. Also try [Multinode testing and observability](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode). + + + +Yes just add a `deleted` attribute, add [fast-search](/en/content/attributes#fast-search) on it and create a searcher which adds an `andnot deleted` item to queries. + + + +You can set a [transition-time](/en/reference/applications/services/content#transition-time) in services.xml to configure the cluster controller how long a node is to be kept in maintenance mode before being automatically marked down. + + + +Grouped distribution is used to reduce search latency. Content is distributed to a configured set of groups, such that the entire document collection is contained in each group. Setting the redundancy and searchable-copies equal to the number of groups ensures that data can be queried from all groups. + + + +Refer to [#17898](https://github.com/vespa-engine/vespa/issues/17898) for a discussion of options. + + + +Use [/state/v1/version](/en/reference/api/state-v1#state-v1-version) to find Vespa version. + + + +See [rollback](/en/applications/deployment#rollback) for options. + + + +## Troubleshooting + + + +If deployment fails with error message "Deployment failed, code: 413 ("Payload Too Large.")" you might need to increase the config server's JVM heap size. The config server has a default JVM heap size of 2 Gb. When deploying an app with e.g. large models this might not be enough, try increasing the heap to e.g. 4 Gb when executing 'docker run ...' by adding an environment variable to the command line: + +```bash +docker run --env VESPA_CONFIGSERVER_JVMARGS=-Xmx4g +``` + + + +When deploying an application package, with some kind of error, the endpoints might fail, like: +```bash +$ vespa deploy --wait 300 + +Uploading application package ... done + +Success: Deployed target/application.zip + +Waiting up to 5m0s for query service to become available ... +Error: service 'query' is unavailable: services have not converged +``` +Another example: + +```text +[INFO] [03:33:48] Failed to get 100 consecutive OKs from endpoint ... +``` + +There are many ways this can fail, the first step is to check the Vespa Container: + +```bash +$ docker exec vespa vespa-logfmt -l error + +[2022-10-21 10:55:09.744] ERROR container +Container.com.yahoo.container.jdisc.ConfiguredApplication +Reconfiguration failed, your application package must be fixed, unless this is a JNI reload issue: +Could not create a component with id 'ai.vespa.example.album.MetalSearcher'. +Tried to load class directly, since no bundle was found for spec: album-recommendation-java. +If a bundle with the same name is installed, +there is a either a version mismatch or the installed bundle's version contains a qualifier string. +... +``` + +[Bundle plugin troubleshooting](/en/applications/bundles#bundle-plugin-troubleshooting) is a good resource to analyze Vespa container startup / bundle load problems. + + + +Using an M1 MacBook Pro / AArch64 makes the Docker run fail: + +```txt +WARNING: The requested image’s platform (linux/amd64) does not match the detected host platform (linux/arm64/v8) +and no specific platform was requested +``` + +Make sure you are running a recent version of the Docker image, do `docker pull vespaengine/vespa`. + + + +Make sure all [Config servers](/en/operations/self-managed/configuration-server#troubleshooting) are started, and are able to establish ZooKeeper quorum (if more than one) - see the [multinode](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode) sample application. Validate that the container has [enough memory](/en/operations/self-managed/docker-containers). + + + +The Config Server cluster with 3 nodes fails to start. The ZooKeeper cluster the Config Servers use waits for hosts on the network, the hosts wait for ZooKeeper in a catch 22 - see [sampleapp troubleshooting](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations#troubleshooting). + + + +Use [vespa-logfmt](/en/reference/operations/self-managed/tools#vespa-logfmt) to dump logs. If Vespa is running in a local container (named "vespa"), run `docker exec vespa vespa-logfmt`. + + + +See [encoding troubleshooting](/en/linguistics/troubleshooting-encoding) for how to handle and remove control characters from the document feed. + + + +## Login, Tenants and Plans + + + +[Deploy an application](/en/basics/deploy-an-application) to create a tenant and start your [free trial](https://vespa.ai/free-trial/). This tenant can be your personal tenant, or shared with others. It can not be renamed. + + + +If the tenant is already created, add more users to it. In the Vespa Cloud Console, open [**Account > Users**](https://console.vespa-cloud.com/link/tenant/account/users). From this view you can manage users in the tenant, and their roles - from here, you can add/set tenant admins. + + + +When starting the free trial, you are asked to accept Terms of Service. For paid plans, this is covered by the contract. + + + +In the console, open [**Account > Billing**](https://console.vespa-cloud.com/link/tenant/account/billing) to enter information required for billing. Use [Vespa Support](https://vespa.ai/support/) if you need to provide this information without console login. + + + +Yes, contact [Vespa Support](https://vespa.ai/support/) to set it up. + + + +## Vespa Cloud Operations + + + +See [node resources](/en/performance/node-resources) to assess current and auto-suggested resources and [autoscaling](/en/operations/autoscaling) for how to automate. + + + +Managing resources is easy, as most changes are automated. Adding / removing / changing nodes starts automated data migration, see [elasticity](/en/content/elasticity). + + + +Schema changes might require data reindexing, which is automated, but takes some time. Other schema changes require data refeed - [details](/en/reference/schemas/schemas#modifying-schemas) + + + +Use the [Memory Visualizer](/en/performance/memory-visualizer) to evaluate how memory is allocated to the fields. Fields can be `index`, `attribute` and `summary`, and combinations of these, with settings like `fast-search` that affects memory usage. [Attributes](/en/content/attributes) is a great read for understanding Vespa memory usage. + + + +Listing archived objects can fail, e.g. `gsutil -u my_project ls gs://vespa-cloud-data-prod-gcp-us-central1-f-12345f/my_tenant` can fail with `AccessDeniedException: 403 me@mymail.com does not have serviceusage.services.use access to the Google Cloud project. Permission \'serviceusage.services.use\' denied on resource (or it may not exist).` This can be due to missing rights on your Google project (my_project in the example above) \- from the Google documentation: _"The user account accessing the Cloud Storage Bucket must be granted the Service Usage Consumer role (see [https://cloud.google.com/service-usage/docs/access-control](https://cloud.google.com/service-usage/docs/access-control)) in order to charge the specified user project for the bucket usage cost"_ + + + +Vespa Cloud applications have a Prometheus endpoint. Find guides for how to integrate with Grafana and AWS Cloudwatch at [monitoring](/en/operations/monitoring). + + + +Vespa Cloud has detailed dashboards linked from the _monitoring_ tab in the Console, one for each zone the instance is deployed to. + + + +Vespa is normally upgraded daily. There are exceptions, like holidays and weekends. During upgrades, nodes are stopped one-by-one per cluster. As all clusters have one redundant node, serving and write traffic is not impacted by upgrades. Before the upgrade, the application's [system and staging tests](/en/operations/automated-deployments) are run, halting the upgrade if they fail. Documents are re-migrated to the upgraded node before doing the next node, see [Elastic Vespa](/en/content/elasticity) for details. + + + +Issues like Feed Blocked, Deployment and Deprecation warnings show up in the console. There are no warnings on redundancy level / searchable copies, as redundant document buckets are activated for queries automatically, and auto data-migration kicks in for node failures / replacements. + + + +- Schema changes that [require service restart](/en/reference/schemas/schemas#changes-that-require-restart-but-not-re-feed) are handled automatically by Vespa Cloud. A deployment job involves waiting for these to complete. +- Schema changes that [require reindexing](/en/reference/schemas/schemas#changes-that-require-reindexing) of data require a validation override, and will trigger automatic reindexing. Status can be tracked in the console application view. Vespa Cloud also periodically re-indexes all data, with minimal resource usage, to account for changes in linguistics libraries. +- Schema changes that [require refeeding](/en/reference/schemas/schemas#changes-that-require-re-feed) data require a validation override, and the user must refeed the data after deployment. + + + +The management of data stored in an application running on Vespa Cloud is the responsibility of the application owner and, as such, Vespa Cloud does not have any retention policy for this data as long as it is stored by the application. + +The following data retention policies applies to Vespa Cloud: +- After a node previously allocated to an application has been deallocated (e.g. due to application being deleted by application owner), all application data will be deleted within _four hours_. +- All application log data will be deleted from Vespa servers after no more than _30 days_ (most often sooner) dependent on log volume, allocated disk resources, etc. *PLEASE NOTE:* This is the theoretical maximum retention time - see [archive guide](/en/operations/archive/archive-guide) for how to ensure access to your application logs. + + + +Yes, Vespa.ai has a SOC 2 attestation: [Trust Center](https://trust.vespa.ai). + + + +Read more in [GDPR](https://cloud.vespa.ai/en/gdpr?_gl=1*1uexiwi*_gcl_au*ODE0ODM4MTI2LjE3Nzk3MjQ3OTY.). + + + +Vespa is most often used for queries in data written from the information sources, although it can also be used without data, e.g. for model serving. It is the application owner that writes the integration with Vespa Cloud to write data. + + + +Vespa Cloud uses the following Cloud providers: +- AWS EC2 instances, with local or remote storage +- GCP Compute instances, with local or remote storage +- Azure Compute instances, with local or remote storage + +The storage devices are encrypted per Cloud provider, at rest. + + + +See the [security guide](/en/security/guide) for roles and permissions. The Vespa Cloud Console has a log view tool, and logs / access logs can be exported to the customer's AWS account easily. Deployment operations are tracked in the deployment view, with a history. Vespa Cloud Operators do not have node access, unless specifically granted by the customer, audit logged. + + + +At termination, all application instances are removed, with data, before the tenant can be deactivated. + + + +In `dev` zones we use shared resources hence have more than one node on each host/instance. In order to provide a best possible overall responsiveness we do not restrict CPU resources for the individual application nodes. + + + diff --git a/mintlify-docs/en/learn/features.mdx b/mintlify-docs/en/learn/features.mdx new file mode 100644 index 0000000000..11bd68f111 --- /dev/null +++ b/mintlify-docs/en/learn/features.mdx @@ -0,0 +1,85 @@ +--- +title: Features +--- + +## What is Vespa? + +Vespa is a platform for applications which need low-latency computation over large data sets. It allows you to write and persist any amount of data, and execute high volumes of queries over the data which typically complete in tens of milliseconds. + +Queries can use both structured filters conditions, text and nearest neighbor vector search to select data. All the matching data is then ranked according to ranking functions - typically machine learned - to implement such use cases as search relevance, recommendation, targeting and personalization. + +All the matching data can also be grouped into groups and subgroups where data is aggregated for each group to implement features like graphs, tag clouds, navigational tools, result diversity and so on. + +Application specific behavior can be included by adding Java components for processing queries, results and writes to the application package. + +Vespa is real time. It is architected to maintain constant response times with any data volume by executing queries in parallel over many data shards and cores, and with added query volume by executing queries in parallel over many copies of the same data (groups). It is optimized to return responses in tens of milliseconds. Writes to data becomes visible in a few milliseconds and can be handled at a rate of thousands to tens of thousands per node per second. + +A lot of work has gone into making Vespa easy to set up and operate. Any Vespa application - from single node systems to systems running on hundreds of nodes in data centers - are fully configured by a single artifact called an *application package*. Low level configuration of nodes, processes and components is done by the system itself based on the desired traits specified in the application package. + +Vespa is scalable. System sizes up to hundreds of nodes handling tens of billions of documents, and tens of thousands of queries per second are not uncommon, and no harder to set up and modify than single node systems. Since all system components, as well as stored data is redundant and self-correcting, hardware failures are not operational emergencies and can be handled by re-adding capacity when convenient. + +Vespa is self-repairing and dynamic. When machines are lost or new ones added, data is automatically redistributed over the machines, while continuing serving and accepting writes to the data. Changes to configuration and Java components can be made while serving by deploying a changed application package - no downtime or restarts required. + +## Features + +This section provides an overview of the main features of Vespa. The remainder of the documentation goes into full detail. + +### Data and writes + +- Documents in Vespa may be added, replaced, modified (single fields or any subset) and removed. +- Writes are acknowledged back to the client issuing them when they are durable and visible in queries, in a few milliseconds. +- Writes can be issued at a sustained volume of thousands to tens of thousands per node per second while serving queries. +- Data is replicated with a configurable redundancy. +- An even data distribution, with the desired redundancy is automatically maintained when nodes are added, removed or lost unexpectedly. +- Data corruption is automatically repaired from an uncorrupted replica of the data. +- Data is written over a simple HTTP/2 API, or (for high volume) using a small, standalone client. +- Document data schemas allow fields of any of the usual primitive types as well as collections, structs and tensors. +- Any number of data schemas can be used at the same time. +- Documents may reference each other and field from referenced documents may be used in queries without performance penalty. +- Write operations can be processed by adding custom Java components. +- Data can be streamed out of the system for batch reprocessing. + +### Queries + +- Queries may contain any combination of structured filters, free text and vector search operators. +- Queries may contain large tensors and vectors (to represent e.g a user). +- Queries choose how results should be ranked and specify how they should be organized (see sections below). +- Queries and results may be processed by adding custom Java components - or any HTTP request may be turned into a query by custom request handlers. +- Query response times are typically in tens of milliseconds and can be maintained given any load and data size by adding more hardware. +- A *streaming search* mode is available where search/selection is only supported on predefined groups of documents (e.g a user's document). In this mode each node can store and serve billions of documents while maintaining low response times. + +### Ranking and inference + +- All results are ranked using a configured ranking function, selected in the query. +- A ranking function may be any mathematical function over scalars or tensors (multidimensional arrays). +- Scalar functions include an "if" function to express business logic and decision trees. +- Tensor functions include a powerful set of primitives and composite functions which allows expression of advanced machine-learned ranking functions such as e.g. deep neural nets. +- Functions can also refer to ONNX models invoked locally on the content nodes. +- Multiple ranking phases are supported to allocate more CPU to ranking promising candidates. +- A powerful set of text ranking features using positional information from the documents is provided out of the box. +- Other ranking features include 2D distance and freshness. + +### Organizing data and presenting results + +- Matches to a query can be grouped and aggregated according to a specification in the query. +- All the matches are included, even though they reside on multiple machines executing in parallel. +- Matches can be grouped by a unique value or by a numerical bucket. +- Any level of groups and subgroups are supported, and multiple parallel groupings can be specified in one query. +- Data can be aggregated (counted, averaged etc.) and selected within each group and subgroup. +- Any selection of data from documents can be included with the final result returned to the client. +- Search engine style keyword highlighting in matching fields is supported. + +## Configuration and operations + +- Vespa can be installed using rpm files or a Docker image - on personal laptops, owned datacenters or in AWS. +- An application of Vespa is fully specified as a separate buildable artifact: An *application package* - individual machines or processes need never be configured individually. +- Systems may contain multiple clusters of each type (stateless and stateful), each containing any number of nodes. +- Systems of any size may be specified by two short configuration files in the application package. +- Document schemas, Java components and ranking functions/models are also configured in the application package. +- An application package is deployed as a single unit to Vespa to realizes the system desired by the application. +- Most application changes (including Java component changes) can be performed by deploying a changed application package - the system will manage its own change process while serving and handling writes. +- Most document schema changes (excluding field type changes) can be made while the system is live. +- Application package changes are validated on deployment to prevent destructive changes to live systems. +- Vespa has no single-point-of-failures and automatically routes around failing nodes. +- System logs are collected to a central server in real time. +- Selected metrics may be emitted to a third-party metrics/alerting system from all the nodes. diff --git a/mintlify-docs/en/learn/glossary.mdx b/mintlify-docs/en/learn/glossary.mdx new file mode 100644 index 0000000000..23767a5b48 --- /dev/null +++ b/mintlify-docs/en/learn/glossary.mdx @@ -0,0 +1,222 @@ +--- +title: Glossary +description: "This is a glossary of both Vespa-specific terminology, and general terms useful in this context." +--- + +- **Application** + + The unit of deployment and management. It can contain any number of clusters and schemas etc., but all deployed together. The files defining the application is called [Application Package](/en/basics/applications). + +- **Attribute** + + An attribute is a field with properties other than an indexed field. Attribute fields have flexible match modes, including exact match, prefix match, and case-sensitive matching. Attributes enable high sustained update rates by writing directly to memory without disk access. Features like Grouping, Sorting, and [Parent/Child](/en/learn/glossary#parent-child) use attributes. + +- **Boolean Search** + + Use [Predicate fields](/en/schemas/predicate-fields) to match queries to a set of boolean constraints in documents. The typical use case is to have a set of boolean constraints representing advertisements, specifying their target groups. Example: `hobby in [Music, Hiking] and age in [20..30]`. + +- **Cluster** + + A set of homogenous nodes which all perform the same task. Vespa has two types: Container clusters are stateless, and content clusters store and process the data. + +- **Component** + + Components extend a base class from the Container code module; some are [Chained](/en/applications/chaining) for execution. The component types are: + + - [Processors](/en/applications/processing#processors) + - [Searchers](/en/learn/glossary#searcher) + - [Document Processors](/en/learn/glossary#document-processor) + - [Search Result Renderers](/en/applications/result-renderers) + - [Provider Components](/en/applications/dependency-injection#special-components) + +- **Configuration Server** + + The configuration server hosts most of the control plane of Vespa, where application packages are deployed to - often shortened to "config server". Config servers are deployed as one or in a cluster - see [overview](/en/learn/overview). The config server serves configuration for all Vespa processes, and is normally the first cluster started. + +- **Container** + + Vespa's Java container, hosting all application components as well as the stateless logic of Vespa itself. Read more in [Container](/en/applications/containers). Not to be confused with [Docker Containers](/en/learn/glossary#docker). + +- **Content Node** + + Content nodes are stateful and holds the document and index data - see [content nodes](/en/content/content-nodes). These nodes implement Vespa's [elasticity](/en/content/elasticity) for seamless data migration and scaling. + +- **Control Plane** + + The deploy-commands are Vespa's control plane. The control plane is often secured with other credentials than the [data plane](/en/learn/glossary#data-plane). Often low throughput and used by automation like GitHub Actions to deploy new versions of application packages. + +- **Data Plane** + + Document and Query APIs make the Vespa Data plane. Also see [control plane](/en/learn/glossary#control-plane). Often high throughout / low latency, as this is user-serving. + +- **Deploy** + + `deploy` is a control-plane command to upload and activate a new version of an [application package](/en/learn/glossary#application). + +- **Deployment** + + A deployment is a running Vespa application, created by using [deploy](/en/learn/glossary#deploy). + +- **Diversity** + + Result diversity means having diverse results in the result set. As an example, not return the n highest ranking results, but eliminate similar hits, e.g. from the same domain. Refer to [diversity](/en/reference/schemas/schemas#diversity) and [grouping](/en/querying/grouping) for features to eliminate similar hits or group them together. + +- **Docker** + + Vespa is available as a container image from [hub.docker.com](https://hub.docker.com/r/vespaengine/vespa). Products to run this image include Docker, Podman and runC, and it enables users to run Vespa in a well-defined environment on multiple platforms. Read more in [Docker Containers](/en/operations/self-managed/docker-containers). + +- **Document** + + Vespa models data as documents. A document has a string identifier, set by the application, unique across all documents. A document is a set of key-value pairs. A document has a [Schema](/en/learn/glossary#schema). Read more in [Documents](/en/schemas/documents). + +- **Document frequency (normalized)** + + The *document frequency* of a term captures how often the term occurs in the document corpus relative to the total number of documents. For ranking purposes this value is always normalized so that it is in the range [0, 1]. For example, if a term occurs in 600 out of 1000 documents, its normalized document frequency will be \(600/1000 = 0.6\). + + From an information retrieval perspective, the normalized document frequency gives a measure of how common (or rare) a term is. Query terms that occur rarely (thus having a low document frequency) are usually expected to be more *relevant* to the query, since they are more specific. On the other end, very common terms (with high document frequency) are often considered to be "stopwords" (such as "the", "an" etc.), and are expected to have a low contribution to query relevance. This is directly related to [inverse document frequency](https://en.wikipedia.org/wiki/Tf%E2%80%93idf#Inverse_document_frequency), which is used by classic text ranking algorithms such as [tf-idf](https://en.wikipedia.org/wiki/Tf%E2%80%93idf) and [BM25](/en/ranking/bm25). + +- **Document summary** + + A [document summary](/en/querying/document-summaries) is the information that is shown for each document in a query result. What information to include is determined by a document summary class: A named set of fields with config on which information they should contain. When Vespa stores a document, it is written to the [document store](/en/content/proton#document-store) and used to generate summaries. The document store is scanned when using [streaming search](/en/performance/streaming-search). + +- **Document Processor** + + Document processing is a framework to create chains of configurable [Components](/en/learn/glossary#component) that read and modify document operations. A Document Processor uses `getFieldValue()` and `setFieldValue()` to process fields, alternatively using generated code from [Concrete Documents](/en/schemas/concrete-documents). + +- **Document Type** + + The data type part of a [Schema](/en/learn/glossary#schema) - a collection of fields. + +- **Elasticity** + + Vespa's clusters are elastic - a user can add or remove nodes on running applications without service disruption. For the stateful content nodes, this causes data sync between nodes for uniform distribution, with minimal data re-distribution. Read more in [Elasticity](/en/content/elasticity). + +- **Enclave** + + Vespa Cloud Enclave is a feature to run your Vespa application in Vespa Cloud in your own AWS or GCP account, see the [Enclave documentation](/en/operations/enclave/enclave). + +- **Embedding** + + A common technique in modern big data serving applications is to map the subject data - say, text or images - to points in an abstract vector space and then do computation in that vector space. For example, retrieve similar data by finding nearby points in the vector space, or using the vectors as input to a neural net. This mapping is usually referred to as *embedding*, and Vespa provides [built-in support](/en/rag/embedding) for this. + +- **Estimated hit ratio** + + When Vespa plans how a query should be evaluated in the most efficient way possible, one of the most important pieces of information is how many *hits* different parts of the query will produce. The estimated hit ratio is a normalized number in the range [0, 1] that states the proportion of documents that is expected to match a given part of the query. + + For example, a query with an `AND` operator over multiple terms will benefit by having the query planner place the term with the *lowest* estimated hit ratio *first* in the AND's evaluation order. This is because that term will be the cheapest to evaluate (least number of candidate documents to iterate over), and all other terms can be excluded as a possible match if it doesn't match. + +- **Federation** + + The [Container](/en/learn/glossary#container) allows multiple sources of data to be [federated](/en/querying/federation) to a common search service. The sources of data may be both search clusters, or external services, backed by Vespa or any other kind of service. The container may be used as a pure federation platform by setting up a system consisting solely of container nodes federating to external services. + +- **Field** + + Documents have [Fields](/en/basics/schemas#document-fields). A field has a type, and a field contained in a document can be written to, read from and queried. A field can also be generated (i.e. a synthetic field) - in this case, the field definition is outside the document - [example](/en/writing/indexing#date-indexing). A field can be singlevalue, like a string, or multivalue, like an array of strings. + +- **Fieldset** + + The term *fieldset* has two meanings in Vespa: + +- **A collection of fields that are queried together - configured in the [schema](/en/reference/schemas/schemas#fieldset):** + + ```sql + fieldset myset { + fields: artist, title, album + } + ``` + +- **A collection of fields to return for a GET or VISIT operation, see the [guide](/en/schemas/documents#fieldsets):** + + ```text + $ vespa visit --field-set restaurant:name,rating + ``` + +- **Garbage Collection** + + Use a [Document Selection](/en/reference/applications/services/content#document) to [auto-expire](/en/schemas/documents#document-expiry) documents by time or any other criterion. + +- **Grouping** + + Vespa Grouping is a list processing language which describes how the query hits should be grouped, aggregated and presented in result sets. A grouping statement takes the list of all matches to a query as input and groups/aggregates it, possibly in multiple nested and parallel ways to produce the output. [Read more](/en/querying/grouping). + +- **Handler** + + Also called *Request Handler*. A handler is a [Component](/en/learn/glossary#component) used to build API endpoints on the [Container](/en/learn/glossary#container). Find documentation at [developing request handlers](/en/applications/request-handlers), and [example use](https://github.com/vespa-engine/sample-apps/tree/master/model-inference/src/main/java/ai/vespa/example). + +- **Indexing** + + The process of creating index structures. This includes routing document writes to indexing processors, processing (/en/writing/indexing ) documents and writing the documents to content clusters. Settings like [streaming search](/en/learn/glossary#streaming-search) do not create indices to optimize resource usage. + +- **Instance** + + *Instance* is always "default" in Vespa.ai (i.e. there is only one) - managed services like [Vespa Cloud](/) support multiple, [read more](/en/learn/tenant-apps-instances). An instance is a deployment of an application for a given purpose, like production serving - multiple instances of an application can be used to support more use cases like integration testing. + +- **Namespace** + + A segment of [document IDs](/en/learn/glossary#document) which helps you generate unique ids also if you have multiple sources of unique values. Namespace can be used to [Visit](/en/learn/glossary#visit) a subspace of the corpus. + +- **Nearest neighbor search** + + [Nearest neighbor search](/en/querying/nearest-neighbor-search), or [vector search](/en/querying/vector-search-intro), is a technique used to find the closest data points to a given query point in a high-dimensional vector space - see [distance metric](/en/querying/nearest-neighbor-search#distance-metrics-for-nearest-neighbor-search). It can be exact or approximate. + + This is supported in Vespa using the [nearestNeighbor](/en/reference/querying/yql) query operator. + +- **Node** + + A Node is a host / container instance running one or more [Services](/en/learn/glossary#service). The mapping from logical to actual name is configured in [hosts.xml](/en/reference/applications/hosts). + +- **Parent / Child** + + Using document references, documents can have [parent/child](/en/schemas/parent-child) relationships. Use this to join data by importing fields from parent documents. Parent documents are replicated to all nodes in the cluster. + +- **Partial Update** + + A partial update is an update to one or more fields in a document. It also includes updating all index structures, so the effect of the partial update is immediately observable in queries. Partial updates do not require the full document, and enables a high write throughput with memory-only operations. [Read more](/en/writing/partial-updates). + +- **Posting List** + + A posting list is a fundamental data structure in information retrieval and search engines. It is used in inverted indexes to store the occurrences of a term in a collection of documents. [Read more](/en/performance/feature-tuning#posting-lists). + +- **Quantization** + + Quantization is the process of constraining an input from a continuous or otherwise large set of values (such as the real numbers) to a discrete set (such as the integers). It is a way to reduce memory and CPU usage for [tensor operations](/en/learn/glossary#tensor) in [nearest neighbor search](/en/learn/glossary#nearest-neighbor-search), to improve throughput or latencies. + +- **Query** + + Use the [Query API](/en/querying/query-api) to query the corpus. Queries are written in [YQL](/en/reference/querying/yql), or can be created programmatically in a [Searcher](/en/learn/glossary#searcher). Configure query execution in a [Query Profile](/en/querying/query-profiles). + +- **Ranking** + + Ranking is where Vespa does computing, or inference over documents. The computations to be done are expressed in functions called [Ranking Expressions](/en/ranking/ranking-expressions-features#ranking-expressions), bundled into [Rank Profiles](/en/basics/ranking#rank-profiles) defined in a [Schema](/en/learn/glossary#schema). These can range from simple math expressions combining some rank features, to tensor expressions or large machine-learned models. Ranking can be single- or [multiphased](/en/ranking/phased-ranking). + +- **Schema** + + A description of a particular type of data and how to process/rank it. See the [Schema guide](/en/basics/schemas). + +- **Searcher** + + A searcher is a [Component](/en/learn/glossary#component) - usually deployed as part of an OSGi bundle. All Searchers must implement a single method `search(query)`. Developers implement application query logic in Searchers - [read more](/en/applications/searchers). + +- **Semantic search** + + Semantic search denotes search with meaning, as distinguished from lexical search where the search engine looks for literal matches of the query words. Read [ Revolutionizing Semantic Search with Multi-Vector HNSW Indexing](https://blog.vespa.ai/semantic-search-with-multi-vector-indexing/) for more details on semantic search, pointers to resources, and how to implement it. + +- **Service** + + A Service runs in a [Cluster](/en/learn/glossary#cluster) of container or content nodes, configured in [services.xml](/en/reference/applications/services/services). + +- **Streaming search** + + [Streaming search](/en/performance/streaming-search) is querying fields that do not have an index structure. The indexing cost is minimal as no index is generated. A query is hence a scan over all data, and normally slower than using index structures. Streaming search is used for applications like personal search, where the searched data volume is small. It can be a powerful option to drastically limit memory use in nearest-neighbor applications where the possible neighbor set it orders of magnitude smaller than the total. + +- **Tenant** + + An organizational unit that owns [applications](/en/learn/glossary#application). In Vespa.ai APIs, *tenant* and *application* are always "default", and a Vespa system has exactly one tenant and one application. On [Vespa Cloud](/), multiple tenants and applications is supported - [read more](/en/learn/tenant-apps-instances). + +- **Tensor** + + A [Tensor](/en/ranking/tensor-user-guide) is a data structure which generalizes scalars, vectors and matrices to any number of dimensions: A scalar is a tensor of rank 0, a vector is a tensor of rank 1, a matrix is a tensor of rank 2. Tensors consist of a set of scalar valued cells, with each cell having a unique address. A cell's address is specified by its index or label in all the dimensions of that tensor. The number of dimensions in a tensor is the rank of the tensor, each dimension can be either mapped or indexed. + +- **Visit** + + [Visit](/en/writing/visiting) is a feature to efficiently get or process a set of / all documents, identified by a [Document Selection Expression](/en/reference/writing/document-selector-language). Visit iterates over all, or a set of, buckets and sends documents to a (set of) targets. diff --git a/mintlify-docs/en/learn/llm-help.mdx b/mintlify-docs/en/learn/llm-help.mdx new file mode 100644 index 0000000000..0b239f2d21 --- /dev/null +++ b/mintlify-docs/en/learn/llm-help.mdx @@ -0,0 +1,55 @@ +--- +title: Getting help from LLMs +description: "This page describes some of the ways that you can get help from large language models (LLMs) when developing a Vespa application." +--- + +From our experience, providing the right context to the LLM is essential to get good results when asking questions about Vespa. + +## Markdown version of documentation pages + +Every page of the documentation is available in Markdown format, by changing the URL from `.html` to `.html.md`. There is also a link to the markdown version in the top right corner of each page. + +This can for example be used to copy/paste relevant markdown documentation page(s) into your AI tool of choice when working with LLMs on particular topics. + +## llms.txt + +We provide an [llms.txt](https://docs.vespa.ai/llms.txt) file, that can serve as a top level entrypoint for an LLM, which includes both top-level overview, architecture, as well as title of and link to markdown-version of all documentation pages. + +See [llmstxt.org](https://llmstxt.org/) for more information about the format. + +### Example usage + +The [llms.txt](https://docs.vespa.ai/llms.txt) file can be downloaded with: + +```bash +curl -O /llms.txt +``` + +This file can then be used as an entrypoint when working with LLMs, either through an IDE, CLI or a chat interface. If the LLM has a tool available that allows it to fetch the referenced URLs, it can fetch the content of the desired pages as needed. + +We also provide [llms-full.txt](https://docs.vespa.ai/llms-full.txt) which contains the _full_ content of all documentation pages in markdown format. This file is relatively large (almost 0.5M words as of Oct 2025), so use accordingly. + +## MCP Server + +### Public Vespa MCP server + +We don't provide any official [MCP](https://modelcontextprotocol.io/) server at this time, but will update this page as soon as we do. + +### Personal MCP server + +Users can enable MCP server capablities in their own Vespa apps. This can be done by adding `McpRequestHandler` to `services.xml` with one or more `McpSpecProvider` components. + +A pre-built `McpSearchSpecProvider` already exists, and a usage example can be found in [this sample app](https://github.com/vespa-engine/sample-apps/tree/master/examples/mcp-server-app). This exposes Vespa search to LLMs via the `/mcp/` endpoint. + +Users can add more tools by implementing `McpSpecProvider` and adding the components in `services.xml`. + +#### Example MCP config + +Add this to `services.xml` + +```xml + + + http://*/mcp/* + +``` diff --git a/mintlify-docs/en/learn/migrating-from-elastic-search.mdx b/mintlify-docs/en/learn/migrating-from-elastic-search.mdx new file mode 100644 index 0000000000..a68cb9ca1f --- /dev/null +++ b/mintlify-docs/en/learn/migrating-from-elastic-search.mdx @@ -0,0 +1,239 @@ +--- +title: Migrating from Elasticsearch +description: "This is a guide for how to move data from Elasticsearch to Vespa. By the end of this guide you will have exported documents from Elasticsearch, generated a deployable Vespa application package and tested this with documents and queries." +--- + + +**Prerequisites:** + +- Linux, macOS or Windows 10 Pro on x86\_64 or arm64, with [Podman Desktop](https://podman.io/) or [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed, with an engine running. + - Alternatively, start the Podman daemon: + + ```bash + $ podman machine init --memory 6000 + $ podman machine start + ``` + - See [Docker Containers](/en/operations/self-managed/docker-containers) for system limits and other settings. +- For CPUs older than Haswell (2013), see [CPU Support](/en/operations/self-managed/cpu-support). +- Memory: Minimum RAM dedicated to Docker/Podman. [Memory recommendations](/en/operations/self-managed/node-setup#memory-settings). +- Disk: Avoid `NO_SPACE` - the vespaengine/vespa container image + headroom for data requires disk space. [Read more](/en/writing/feed-block). +- [Homebrew](https://brew.sh/) to install the [Vespa CLI](/en/clients/vespa-cli), or download the Vespa CLI from [Github releases](https://github.com/vespa-engine/vespa/releases). +- Linux, macOS or Windows 10 Pro on x86\_64 or arm64, with Podman or [Docker](https://docs.docker.com/engine/install/) installed. See [Docker Containers](../operations/self-managed/docker-containers) for system limits and other settings. + + +To get started, [sign up](https://vespa.ai/free-trial/) to get an endpoint to deploy to. Set the *tenant name* from the signup: + +```bash +$ export TENANT_NAME=vespa-team # Replace with your tenant name +``` + +Alternatively, [test with local deployment](#test-with-local-deployment). + +## Feed a sample Elasticsearch index + +This section sets up an index with 1000 sample documents using [getting-started-index](https://www.elastic.co/guide/en/elasticsearch/reference/7.9/getting-started-index). Skip this part if you already have an index. Wait for Elasticsearch to start: + +```bash +$ docker network create --driver bridge esnet + +$ docker run -d --rm --name esnode --network esnet -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" \ + docker.elastic.co/elasticsearch/elasticsearch:7.10.2 + +$ while [[ "$(curl -s -o /dev/null -w ''%{http_code}'' localhost:9200)" != "200" ]]; do sleep 1; echo 'waiting ...'; done +``` + +Download test data, and feed it to the Elasticsearch instance: + +```bash +$ curl 'https://raw.githubusercontent.com/elastic/elasticsearch/7.10/docs/src/test/resources/accounts.json' \ + > accounts.json + +$ curl -H "Content-Type:application/json" --data-binary @accounts.json 'localhost:9200/bank/_bulk?pretty&refresh' +``` + +Verify that the index has 1000 documents: + +```bash +$ curl 'localhost:9200/_cat/indices?v' +``` + +## Export documents from Elasticsearch + +This guide uses [ElasticDump](https://github.com/elasticsearch-dump/elasticsearch-dump) to export the index contents and the index mapping. Export the documents and mappings, then delete the Docker network and the Elasticsearch container: + +```bash +$ docker run --rm --name esdump --network esnet -v "$PWD":/dump -w /dump elasticdump/elasticsearch-dump \ + --input=http://esnode:9200/bank --output=bank_data.json --type=data + +$ docker run --rm --name esdump --network esnet -v "$PWD":/dump -w /dump elasticdump/elasticsearch-dump \ + --input=http://esnode:9200/bank --output=bank_mapping.json --type=mapping + +$ docker rm -f esnode && docker network remove esnet +``` + +## Generate Vespa documents and Application Package + +[ES_Vespa_parser.py](https://github.com/vespa-engine/vespa/tree/master/config-model/src/main/python/ES_Vespa_parser.py) is provided for conversion of Elasticsearch data and index mappings to Vespa data and configuration. It is a basic script with minimal error checking - it is designed for a simple export, modify this as needed for your application's needs. Generate Vespa documents and configuration: + +```bash +$ curl 'https://raw.githubusercontent.com/vespa-engine/vespa/master/config-model/src/main/python/ES_Vespa_parser.py' \ + > ES_Vespa_parser.py + +$ python3 ./ES_Vespa_parser.py --application_name bank bank_data.json bank_mapping.json +``` + +This generates documents in *documents.json* (see [JSON format](/en/reference/schemas/document-json-format)) where each document has IDs like this `id:bank:_doc::1`. It also generates a *bank* folder with an [application package](/en/basics/applications): + +```txt +/bank + │ + ├── documents.json + ├── hosts.xml + ├── services.xml + └── /schemas + └── _doc.sd +``` + +Enter the application package directory: + +```bash +$ cd bank +``` + +## Deploy + +Install [Vespa CLI](/en/clients/vespa-cli). In this example we use [Homebrew](https://brew.sh/), you can also download from [GitHub](https://github.com/vespa-engine/vespa/releases): + +```bash +$ brew install vespa-cli +``` + +Configure for Vespa Cloud deployment, log in and add credentials: + +```bash +$ vespa config set target cloud +$ vespa config set application $TENANT_NAME.myapp.default +``` + +```bash +$ vespa auth login +``` + +```bash +$ vespa auth cert +``` + +Also see [getting started](/en/basics/deploy-an-application) guide. Deploy the application package: + +```bash +$ vespa deploy --wait 300 +``` + +Index the documents exported from Elasticsearch: + +```bash +$ vespa feed documents.json +``` + +## Interfacing with Vespa + +Export all documents: + +```bash +$ vespa visit +``` + +Get a document: + +```bash +$ vespa document get id:bank:_doc::1 +``` + +Count documents, find `"totalCount":1000` in the output: + +```bash +$ vespa query 'select * from _doc where true' +``` + +Run a simple query against the *firstname* field: + +```bash +$ vespa query 'select firstname,lastname from _doc where firstname contains "amber"' +``` + +## Next steps + +Review the differences in document records, Vespa to the right: + +Elasticsearch: + +```json +{ + "_index": "bank", + "_type": "_doc", + "_id": "1", + "_score": 1, + "_source": { + "account_number": 1, + "balance": 39225, + "firstname": "Amber", + "lastname": "Duke", + "age": 32, + "gender": "M", + "address": "880 Holmes Lane", + "employer": "Pyrami", + "email": "amberduke@pyrami.com", + "city": "Brogan", + "state": "IL" + } +} +``` + +Vespa: + +```json +{ + "put": "id:bank:_doc::1", + "fields": { + "account_number": 1, + "balance": 39225, + "firstname": "Amber", + "lastname": "Duke", + "age": 32, + "gender": "M", + "address": "880 Holmes Lane", + "employer": "Pyrami", + "email": "amberduke@pyrami.com", + "city": "Brogan", + "state": "IL" + } +} +``` + +The [id](/en/schemas/documents#document-ids) field `id:bank:_doc::1` is composed of: + +- namespace: `bank` +- schema: `_doc` +- id: `1` + +Read more in [Documents](/en/schemas/documents) and [Schemas](/en/basics/schemas). The schema is the key Vespa configuration file where field types and [ranking](/en/basics/ranking) are configured. The schema (found in `schemas/_doc.sd`) also has [indexing](/en/basics/schemas#document-fields) settings, example: + +```txt +search _doc { + document _doc { + field account_number type long { + indexing: summary | attribute + } + field address type string { + indexing: summary | index + } + ... + } +} +``` + +These settings impact both performance and how fields are matched. For example, the *account_number* above is using the *attribute* keyword, which makes the field available for [sorting](/en/reference/querying/sorting-language), [ranking](/en/basics/ranking), [grouping](/en/querying/grouping), but which by default does not have data structures for fast search. Read more in [attributes](/en/content/attributes) and [practical search performance guide](/en/performance/practical-search-performance-guide). + +## Test with local deployment + +To run the steps above, using a local deployment, follow the steps in the [quickstart](/en/basics/deploy-an-application-local) to start a local container running Vespa. Then, deploy the application package from the *bank* folder. diff --git a/mintlify-docs/en/learn/migrating-to-cloud.mdx b/mintlify-docs/en/learn/migrating-to-cloud.mdx new file mode 100644 index 0000000000..8ec6fd9b77 --- /dev/null +++ b/mintlify-docs/en/learn/migrating-to-cloud.mdx @@ -0,0 +1,261 @@ +--- +title: Migrating to Vespa Cloud +description: "Migrating a Vespa application to Vespa Cloud is straightforward, as applications on Vespa Cloud supports all the same features as your self-hosted Vespa instances, you're just gaining some new capabilities and avoid the operational work." +--- + +The high-level process is as follows: + + + + Functional validation using the [dev](/en/operations/environments#dev) environment (this guide). + + + Deployment to a [prod](/en/operations/environments#prod) zone. + + + +The rest of this guide assumes you have a [tenant](/en/learn/tenant-apps-instances) ready for deployment: + +```bash +$ export VESPA_TENANT_NAME=mytenant +``` + + +**Prerequisites:** + +- Linux, macOS or Windows 10 Pro on x86\_64 or arm64, with [Podman Desktop](https://podman.io/) or [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed, with an engine running. +- Alternatively, start the Podman daemon: + ```bash + $ podman machine init --memory 6000 + $ podman machine start + ``` +- See [Docker Containers](/en/operations/self-managed/docker-containers) for system limits and other settings. +- For CPUs older than Haswell (2013), see [CPU Support](/en/operations/self-managed/cpu-support). +- Memory: Minimum RAM dedicated to Docker/Podman. [Memory recommendations](/en/operations/self-managed/node-setup#memory-settings). +- Disk: Avoid `NO_SPACE` - the vespaengine/vespa container image + headroom for data requires disk space. [Read more](/en/writing/feed-block). +- [Homebrew](https://brew.sh/) to install the [Vespa CLI](/en/clients/vespa-cli), or download the Vespa CLI from [Github releases](https://github.com/vespa-engine/vespa/releases). + + + +**Note:** + +[Vespa Cloud Enclave](/en/operations/enclave/enclave) users: Run the enclave setup steps first. + + + + + An [application package](/en/basics/applications) from a self-hosted system can be deployed with minor modifications to the Vespa Cloud `dev` environment. + + The root of an application package might look at this: + + ```txt + ├── schemas + │ └── doc.sd + └── services.xml + ``` + + There are often more files, the above is a minimum. This is the root of the application package - make this the current working directory: + + ```bash + $ cd /location/of/app/package + ``` + + + + Make sure the [Vespa CLI](/en/clients/vespa-cli) is installed: + + ```bash + $ vespa + Usage: + vespa [flags] + vespa [command] + ``` + + + + Configure the local environment and log in to Vespa Cloud: + + ```bash + $ vespa config set target cloud && \ + vespa config set application $VESPA_TENANT_NAME.myapp && \ + vespa auth login + ``` + + + + Create and get security credentials: + + ```bash + $ vespa auth cert + ``` + + This will add the `security` directory to the application package, and add a public certificate to it: + + ```txt + ├── schemas + │ └── doc.sd + ├── security + │ └── clients.pem + └── services.xml + ``` + + The command also installs a key/certificate pair in the Vespa CLI home directory, see [vespa auth cert](/en/reference/clients/vespa-cli/vespa_auth_cert). This pair is used in subsequent accesses to the data plane for document and query operations. + + + + + **Note:** + + Skip this step unless you are using [Vespa Cloud Enclave](/en/operations/enclave/enclave). + + + Add [deployment.xml](/en/reference/applications/deployment#deployment) with your cloud provider account - This ensures the deployment uses resources from the correct account - examples: + + ```xml + + + + ``` + + ```xml + + + + ``` + + The application package should look like: + + ```txt + ├── deployment.xml + ├── schemas + │ └── doc.sd + ├── security + │ └── clients.pem + └── services.xml + ``` + + + + `hosts.xml` is not used in Vespa Cloud, remove it. + + + + Edit the `` configuration in `services.xml` - from: + + ```xml + + + + + + + + + + ``` + + to: + + ```xml + + + + + + + + + ``` + + In short, this is the Vespa Cloud syntax for resource specifications. + + Example, migrating from a cluster using `c7i.2xlarge` instance type, with a 200G disk - from the AWS specifications: + + ```txt + c7i.2xlarge 8 16 EBS-Only + ``` + + Equivalent Vespa Cloud configuration: + + ```xml + + ``` + + Repeat this for all clusters in `services.xml`. Notes: + + 1. As you are now migrating to the `dev` environment, what is _actually_ deployed is a minimized version. The configuration changes above are easily tested in this environment. + 2. Using `count=2` is best practise at this point. + 3. Resources must match a node instance type at the cloud providers(s) deploying to, see [AWS flavors](/en/performance/instance-types/aws-instance-types), [GCP flavors](/en/performance/instance-types/gcp-instance-types), and [Azure flavors](/en/performance/instance-types/azure-instance-types). + + + + At this point, the local environment and the application package is ready for deployment: + + ```bash + $ vespa deploy --wait 600 + ``` + + Please note that a first-time deployment normally takes a few minutes, as resources are provisioned. + + At this point, we recommend opening the console to observe the deployed application. The link will be `https://console.vespa-cloud.com/tenant/mytenant/application/myapp/dev/instance/default` (replace with your own names) - this is also easily found in the console main page: + + + ![dev view](/assets/img/free-trial.png) + + + Refer to [vespa8 release notes](/en/reference/release-notes/vespa8) for troubleshooting in case the deployments fails, based on a Vespa 7 (or earlier) version. + + + + The endpoints are shown in the console, one can also list them like: + + ```bash + $ vespa status query + Container default at https://aa1c1234.b225678e.z.vespa-app.cloud/ is ready + ``` + + Test the query endpoint, expect `totalCount: 0`: + + ```bash + $ vespa query 'select * from sources * where true' + ``` + + ```json + { + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 0 + }, + ``` + + In the `services.xml` examples at the start of this guide, both `` and `` and configured in the same cluster, named `default`. In case of multiple container clusters, select the one configured with ``: + + ```bash + vespa query 'select * from sources * where true' --cluster myquerycluster + ``` + + Finally, feed a document to the cluster (this is the cluster configured with ``) + + ```bash + vespa feed mydoc.jsonl --cluster myfeedcluster + ``` + + Redo the query and observe nonzero `totalCount`. + + + +## Next steps + +This is the final step in the functional validation. Please note: + + +**Note:** + +Deployments to `dev` expire after 7 days of inactivity, i.e., 7 days after the last deployment. **This applies to all plans**. Use the Vespa Console to extend the expiry period, or redeploy the application to add 7 more days. + + +- Read more about the [dev](/en/operations/environments#dev) environment +- Feed (a subset) of the data and validate that queries and other API accesses work as expected. +- At the end of the validation process, continue to [production deployment](/en/operations/production-deployment) to set up in production zones. diff --git a/mintlify-docs/en/learn/overview.mdx b/mintlify-docs/en/learn/overview.mdx new file mode 100644 index 0000000000..164b96bada --- /dev/null +++ b/mintlify-docs/en/learn/overview.mdx @@ -0,0 +1,70 @@ +--- +title: Vespa Overview +description: "Vespa is a platform for applications which need low-latency computation over large data sets. It stores and indexes your structured, text and vector data so that queries, selection and processing and machine-learned model inference over the data can be performed quickly at serving time at any scale. Functionality can be customized and extended with application components hosted within Vespa. This document is an overview of the features and main components of Vespa." +--- + +## Introduction + +Vespa allows application developers to create applications that scale to large amounts of data and high loads without sacrificing latency or reliability. A Vespa application consists of a number of *stateless Java container clusters* and zero or more *content* clusters storing data. + + +![Vespa Overview](/assets/img/vespa-overview.svg) + + +The [stateless **container** clusters](/en/applications/containers) host components which process incoming data and/or queries and their responses. These components provide functionality belonging to the platform like indexing transformations and the global stages of query execution, but can also include the middleware logic of the application. Application developers can configure their Vespa system with a single stateless cluster which performs all such functions, or create different clusters for each kind of task. The container clusters then pass queries and data operations on to the appropriate nodes in the content clusters. If the application uses data it does not own, you can add components to access data from external services as well. + +[**Content** clusters](/en/content/elasticity) in Vespa are responsible for storing data and execute queries and inferences over the data. Queries can range from simple data lookups for content serving to complex conditions for selecting the relevant data, ranking it using machine-learned models, and grouping and aggregating the data across all nodes participating in the query. All the operations provided by Vespa scales to more content, more expensive inference, and higher query volume simply by adding more nodes to the content clusters. + +When changing the nodes of a content cluster for scaling or on node failure, content clusters automatically re-balance data in the background to maintain a balanced distribution at the configured redundancy level. Faulty nodes are also automatically removed from the serving path to avoid any impact to queries and writes (failover). + +After intermediate processing in a container cluster, data is written to content clusters. Writes are persistent and visible in all queries after receiving an ack on the write message, after a few milliseconds. Each write is guaranteed to either succeed or provide failure information response within a given time limit, and writes and scale linearly with the available resources, indefinitely. In addition to rewriting and removing entire documents, writes may selectively modify only individual document fields. Writes can be sent directly over HTTP/2, or by using a Java client — refer to the [API documentation](/en/reference/api/api). + +Each document instance stored in Vespa are of a type defined in a configured [schema](/en/basics/schemas), which defines the document fields and how to store and index them, as well as the ranking and inference profiles that belongs to the document type. Applications can contain any number of schemas for different data types, and configure them to be stored either in the same or multiple content clusters. + +Container and content clusters handle all the end user traffic of a Vespa application, but there's also a third type of cluster, the *admin and config clusters*. These set up and manage the other clusters in the application according to configuration, and manages the process of changing the clusters safely without disruption to traffic when the configuration changed. + +A Vespa application is completely specified by an [*application package*](/en/basics/applications), which is a directory structure containing a declaration of the clusters to run as part of the application, the content schemas, any machine-learned models and Java components, and other configuration or data files needed by various features. Application developers create a running application from their application package by *deploying* it to any node in the config cluster. Changes to a running application is made in the same way: By changing the application package and deploying again. Once Vespa is installed and started on a node, it is managed by the config system such that the entire system can be treated as a single unit, and application owners do not need to perform any administration tasks locally on the nodes running the application. It is also possible to configure nodes as *log servers* on Vespa. These will collect logs in real time from all the nodes of the application. By default, the first node in the config server cluster performs this role. + +The rest of this document provides some more detail on the functions Vespa performs. + +## Vespa operations + +Vespa accepts the following operations: + +- Writes: Put (add and replace) and remove documents, and update fields in these. +- Lookup of a document (or some subset of it) by id. +- [Queries](/en/querying/query-api): [*Select*](/en/querying/query-language) documents whose fields match conditions, which search free-text fields, structured data or [vector spaces (ANN)](/en/querying/nearest-neighbor-search). Any number of such conditions can be combined freely in boolean trees to define the full query to be executed. Vespa will compute a query plan over the conditions which executes them efficiently with any number of conditions such as e.g. filters combined with ANN conditions. Matches to a query can be passed through an inference step which can compute any business logic or machine-learned model expressed as a [ranking expression](/en/reference/ranking/ranking-expressions) or [ONNX model](/en/ranking/onnx). Optionally, the highest scoring matches can also run through a second stage of this, to spend more computational resources on promising candidates. The final documents are ordered according to their score from these inferences ([*ranking*](/en/basics/ranking)), or by explicit [*sorting*](/en/reference/querying/sorting-language). Matches to queries can be [*grouped*](/en/querying/grouping) hierarchically by field values, where each group can contain aggregated values over the data in the group This can be used to calculate values for, e.g., navigation aids, tag clouds, graphs or for clustering in a distributed fashion without having to transfer the distributed to a single container node. +- Data dumps: Content matching some criterion can be streamed out for background reprocessing, backup, etc., by using the [*visit*](/en/writing/visiting) operation. +- [Any other custom network request](/en/reference/applications/components) which can be handled by application components deployed on a container cluster. + +## The stateless container + +[Container clusters](/en/applications/containers) host the application components which employ the operations listed above and process their return data. Vespa provides a set of components out of the box, together with component infrastructure: dependency injection, with added support for injection of config from the admin server or the application package; a component model based on OSGi; a shared mechanism to chain components into handler chains for modularity as well as metrics and logging. The container also provides the network layer for handling and issuing remote requests - HTTP is provided out of the box, and other protocols/transports can be transparently plugged in as components. + +Developers can make changes to components (and of course their configuration) simply by redeploying their application package - the system takes care of copying the components to the nodes of the cluster and loading/unloading components impacting request serving or restarting nodes. + +## Content clusters + +[Content clusters](/en/content/elasticity) store data and maintain distributed indices of data for searches and selects. Data is replicated over multiple nodes, with a number of copies specified by the application, such that the cluster can automatically repair itself on loss of a node or a disk. Using the same mechanism, clusters can also be grown or shrunk while online, simply by changing the set of available nodes declared in the application package. + +Lookup of an individual document is routed directly to a node storing that document, while queries are spread over a subset of nodes which contain the queried documents. Complex queries are performed as distributed algorithms with multiple steps back and forth between the container and the content nodes; this is to achieve the low latency which is one of the main design goals of Vespa. + +## Administration clusters and developer support + +The [single configuration cluster](/en/basics/applications) controls all the other clusters of the system. + +A config server derives the low level configuration of each individual cluster, node and process, such that the application developer can specify the desired system on a higher level without worrying about its detailed realization. Whenever the application package is redeployed, the system will compute the necessary changes in configuration and manage the process of moving safely from the current to the new configuration without disrupting queries or writes. + +Other admin clusters in Vespa are the cluster controller cluster (controls one or more content clusters), logserver cluster (logserver holds log archive for logs from all nodes in the application) and service location brokers (slobroks, which are a name service used by some services in Vespa). + +### Application packages + +Application packages may be [changed, redeployed](/en/reference/api/deploy-v2) and [inspected](/en/reference/api/config-v2) over an HTTP REST API, or through a [command line interface](/en/clients/vespa-cli#deployment). The administration cluster runs over [ZooKeeper](https://zookeeper.apache.org) to make changes to configuration singular and consistent, and to avoid having a single point of failure. + +An application package looks the same, and is deployed the same way, whether it specifies a large system with hundreds of nodes or a single node running all services. The only change needed is to the lists of nodes making up the cluster. The container clusters may also be started within a single Java VM by "deploying" the application package from a method call. This is useful for testing applications in an IDE and in unit tests. Application packages with components can be [developed](/en/applications/developer-guide) in an IDE using Maven starting from sample applications. + +## Summary + +Vespa allows functionally rich and highly available applications to be developed to scale and perform to high standards without burdening developers with the considerable low level complexity this requires. It allows developers to evolve and grow their applications over time without taking the system offline, and lets them avoid complex data and page precomputing schemes which lead to stale data that cannot be personalized, since this often requires complex queries to complete in real user time over data which is constantly changing at the same time. + +For more details, read [Vespa Features](/en/learn/features), or try to [deploy an application](/en/basics/deploy-an-application). diff --git a/mintlify-docs/en/learn/releases.mdx b/mintlify-docs/en/learn/releases.mdx new file mode 100644 index 0000000000..08cb079a2d --- /dev/null +++ b/mintlify-docs/en/learn/releases.mdx @@ -0,0 +1,40 @@ +--- +title: Releases +description: "Vespa is released every Monday through Thursday. Each public release has passed all functional and performance tests, and all cloud applications are automatically upgraded to it." +--- + +For each Vespa release, the following artifacts are provided: + +- [Java artifacts for building Vespa applications on Maven Central](https://search.maven.org/artifact/com.yahoo.vespa/parent) +- [Vespa RPMs on Fedora Copr](https://copr.fedorainfracloud.org/coprs/g/vespa/vespa/) +- [Container images on Docker Hub](https://hub.docker.com/repository/docker/vespaengine/vespa) + +Releases: + +- [Vespa 7](/en/reference/release-notes/vespa7) +- [Vespa 8](/en/reference/release-notes/vespa8) + +Use the [Vespa Factory](https://factory.vespa.ai/releases) to inspect the commits in each release: + + +![Screenshot of commit list diff per release](/assets/commits-release.png) + + +## Versions + +Vespa uses [semantic versioning](https://semver.org/). Each release is backwards compatible and supports live migration on running systems, provided they are running a version which is less than 2 months old. It is therefore a minor version number change. All new features are released on such minor versions. Every second year or so we make a major version change which removes previously deprecated functionality. + +Java APIs, web service APIs and all application package constructs are supported through a major release and only removed on a new release if they are already marked deprecated. + +Use of deprecated Java APIs will cause a warning on compilation, and use of deprecated application package constructs will cause a deprecation warning on deployment. Note that Java APIs come in two categories: + +- *Public APIs* carry the compatibility guarantee and are visible from your code as well as in the javadoc +- *Exported APIs* are also visible from your code, but is not in the public Javadoc and carry no compatibility guarantee + +Check the Javadoc list to verify that you are using public packages. + +In addition, some public Java classes and methods are marked with the com.yahoo.api.annotations.Beta tag. These are under development and may still change before they stabilize. + +## Stored Data + +Data written to Vespa is compatible between adjacent releases. For self-hosted systems, it may be necessary to upgrade through each minor release rather than in larger leaps to ensure Vespa can read existing data. This is a good practice in any case. diff --git a/mintlify-docs/en/learn/tenant-apps-instances.mdx b/mintlify-docs/en/learn/tenant-apps-instances.mdx new file mode 100644 index 0000000000..c458b86474 --- /dev/null +++ b/mintlify-docs/en/learn/tenant-apps-instances.mdx @@ -0,0 +1,30 @@ +--- +title: Tenants, Applications and Instances +description: "When registering for Vespa Cloud, a tenant is created. Tenant is the billable unit, and most often represents an organization or a project. A tenant has one or more applications with one or more instances." +--- + + +![A tenant can have multiple applications with multiple instances each](/assets/img/tenants-apps-instances.svg) + + +Instances are used for different use cases, and are deployed to a set of [zones](/en/operations/zones) - example: + + +![An application can be deployed to multiple zones](/assets/img/instances-zones.svg) + + +The *Application* has a "default" instance serving queries from two *production* zones. It has an "integration" instance with another dataset, used for other applications to interface a production-like, stable interface. Finally, a developer has deployed the "bob" instance to a *dev* zone to further develop plugin code. + +Deployments to production zones are specified in [deployment.xml](/en/reference/applications/deployment). Deployments to the manual *dev* zones are normally done directly from a developer computer for rapid code and config development. Read more in [Automated deployments](/en/operations/automated-deployments). + +The service configuration is specified in [services.xml](/en/reference/applications/services/services) and is composed of individually sized *clusters*. A cluster is deployed to a set of *nodes* with *resources* specified. + +One or more users may be a member of the tenant. A user is given roles in the tenant based on their access level. *Administrator* for tenant level managment like adding new members and updating billing data, *Developer* for managing applications, and one for *read-only* access. + +## Lifecycle + +The tenant name cannot be changed - create a new tenant, or contact Vespa Support. + +Tenants in trial are auto-expired once trial is completed. Move to a paid plan to keep applications and data. + +It is not possible to auto-migrate applications and data between tenants. To move an application to a new tenant, re-deploy the application with the new tenant name, see [cloning applications and data](/en/operations/cloning). diff --git a/mintlify-docs/en/learn/tutorials.mdx b/mintlify-docs/en/learn/tutorials.mdx new file mode 100644 index 0000000000..2a7188ff76 --- /dev/null +++ b/mintlify-docs/en/learn/tutorials.mdx @@ -0,0 +1,64 @@ +--- +title: Tutorials and use cases +--- + +### Text search + +- [Tutorial: Text Search](/en/learn/tutorials/text-search) A text search tutorial and introduction to text ranking with Vespa using traditional information retrieval techniques like BM25. +- [Tutorial: Improving Text Search with Machine Learning](/en/learn/tutorials/text-search-ml) This tutorial builds on the text search tutorial but introduces Learning to Rank to improve relevance. + + +### Vector Search + +Learn how to use Vespa Vector Search in the [practical nearest neighbor search guide](/en/querying/nearest-neighbor-search-guide). It uses Vespa's support for [nearest neighbor search](/en/querying/nearest-neighbor-search), there is also support for fast [approximate nearest neighbor search](/en/querying/approximate-nn-hnsw) in Vespa. The guide covers combining vector search with filters and how to perform hybrid search, combining retrieval over inverted index structures with vector search. + +### Hybrid Search + +[Tutorial: Hybrid Text Search](/en/learn/tutorials/hybrid-search) A search tutorial and introduction to hybrid text ranking with Vespa, combining BM25 with text embedding models. + +### RAG (Retrieval-Augmented Generation) + +- [Tutorial: The RAG Blueprint](/en/learn/tutorials/rag-blueprint) A tutorial that provides a blueprint for building high-quality RAG applications with Vespa. Includes evaluation and learning-to-rank (LTR). +- [Retrieval-augmented generation (RAG) in Vespa](/en/rag/rag) + +### Combining search and recommendation: The News tutorial + +Follow this series to learn how to build a complete application supporting both content recommendation/personalization, navigation, and search. + +- [News 1: Getting Started](/en/learn/tutorials/news-1-deploy-an-application) +- [News 2: Application Packages, Feeding, Query](/en/learn/tutorials/news-2-basic-feeding-and-query) +- [News 3: Sorting, Grouping and Ranking](/en/learn/tutorials/news-3-searching) +- [News 4: Embeddings](/en/learn/tutorials/news-4-embeddings) +- [News 5: Partial Updates, ANNs, Filtering](/en/learn/tutorials/news-5-recommendation) +- [News 6: Custom Searchers, Document Processors](/en/learn/tutorials/news-6-recommendation-with-searchers) +- [News 7: Parent-Child, Tensor Ranking](/en/learn/tutorials/news-7-recommendation-with-parent-child) + +### ML Model Serving + +Learn how to use Vespa for ML model serving in [Stateless Model Evaluation](/en/ranking/stateless-model-evaluation.html). Vespa supports running inference with models from many popular ML frameworks, which can be used for ranking, query classification, question answering, multi-modal retrieval, and more. + +- [Ranking with ONNX models](/en/ranking/onnx) Export models from popular deep learning frameworks such as PyTorch to ONNX format for serving in Vespa. Vespa integrates with ONNX-Runtime for accelerated inference. +- [Ranking with LightGBM models](/en/ranking/lightgbm) +- [Ranking with XGBoost models](/en/ranking/xgboost) +- [Ranking with TensorFlow models](/en/ranking/tensorflow) + +### Embedding Model Inference + +Vespa supports integrating [embedding](/en/rag/embedding) models, which avoids transferring large amounts of embedding vector data over the network and allows for efficient serving of embedding models. + +- [Huggingface Embedder](/en/rag/embedding) Use single-vector embedding models from Hugging Face. +- [ColBERT Embedder](/en/rag/embedding) Use multi-vector embedding models. +- [Splade Embedder](/en/rag/embedding) Use sparse learned single vector embedding models. + + +### E-Commerce + +The [e-commerce shopping sample application](/en/learn/tutorials/e-commerce) demonstrates Vespa grouping, true in-place partial updates, custom ranking, and more. + +### Building a custom HTTP API + +The [HTTP API tutorial](/en/learn/tutorials/http-api) shows how to build a custom HTTP API in an application. + +### More examples and sample applications + +There are many examples and starting applications on [GitHub](https://github.com/vespa-engine/sample-apps/) and [Pyvespa examples](https://vespa-engine.github.io/pyvespa/index.html). diff --git a/mintlify-docs/en/learn/tutorials/e-commerce.mdx b/mintlify-docs/en/learn/tutorials/e-commerce.mdx new file mode 100644 index 0000000000..7bc18f1219 --- /dev/null +++ b/mintlify-docs/en/learn/tutorials/e-commerce.mdx @@ -0,0 +1,69 @@ +--- +title: "Use Case - shopping" +--- + +The [e-commerce, or shopping, use case](https://github.com/vespa-engine/sample-apps/tree/master/use-case-shopping) is an example of an e-commerce site complete with sample data and a web front end to browse product data and reviews. To quick start the application, follow the instructions in the [README](https://github.com/vespa-engine/sample-apps/blob/master/use-case-shopping/README.md) in the sample app. + + + +![Shopping sample app screenshot](/assets/img/shopping-1.png) + + + +To browse the application, navigate to [localhost:8080/site](http://localhost:8080/site). This site is implemented through a custom [request handler](/en/applications/request-handlers) and is meant to be a simple example of creating a front end / middleware that sits in front of the Vespa back end. As such it is fairly independent of Vespa features, and the code is designed to be fairly easy to follow and as non-magical as possible. All the queries against Vespa are sent as HTTP requests, and the JSON results from Vespa are parsed and rendered. + +This sample application is built around the Amazon product data set found at [https://cseweb.ucsd.edu/~jmcauley/datasets.html](https://cseweb.ucsd.edu/~jmcauley/datasets.html). A small sample of this data is included in the sample application, and full data sets are available from the above site. This sample application contains scripts to convert from the data set format to Vespa format: [convert_meta.py](https://github.com/vespa-engine/sample-apps/blob/master/use-case-shopping/convert_meta.py) and [convert_reviews.py](https://github.com/vespa-engine/sample-apps/blob/master/use-case-shopping/convert_reviews.py). See [README](https://github.com/vespa-engine/sample-apps/tree/master/use-case-shopping#readme) for example use. + +When feeding reviews, there is a custom [document processor](/en/applications/document-processors) that intercepts document writes and updates the parent item with the review rating, so the aggregated review rating is kept stored with the item - see [ReviewProcessor](https://github.com/vespa-engine/sample-apps/blob/master/use-case-shopping/src/main/java/ai/vespa/example/shopping/ReviewProcessor.java). This is more an example of a custom document processor than a recommended way to do this, as feeding the reviews more than once will result in inflated values. To do this correctly, one should probably calculate this offline so a re-feed does not cause unexpected results. + + + +### Highlighted features + +* [Multiple document types](/en/basics/schemas) + +Vespa models data as documents, which are configured in schemas that defines how documents should be stored, indexed, ranked, and searched. In Vespa, you can have multiple documents types, which can be defined in `services.xml` how these should be distributed around the content clusters. This application uses three document types that are stored in the same content cluster: item, review and query. Search is done on items, but reviews refer to a single parent item and are rendered on the item page. The query document type is used to power auto-suggest functionality. + +* [Custom document processor](/en/applications/document-processors) + +In Vespa, you can set up custom document processors to perform any type of extra processing during document feeding. One example is to enrich the document with extra information, and another is to precalculate values of fields to avoid unnecessary computation during ranking. This application uses a document processor to intercept reviews and update the parent item's review rating. + +* [Custom searcher processor](/en/applications/searchers) + +In Vespa, you can set up custom searchers to perform any type of extra processing during querying. In the sample app there is a single custom searcher which builds the query for auto-suggestions, using a combination of [fuzzy matching](/en/reference/querying/yql#fuzzy) and [prefix search](/en/querying/text-matching#prefix-match). + +* [Custom handlers](/en/applications/request-handlers) + +With Vespa, you can set up general request handlers to handle any type of request. This example site is implemented with a single such request handler, [SiteHandler](https://github.com/vespa-engine/sample-apps/blob/master/use-case-shopping/src/main/java/ai/vespa/example/shopping/site/SiteHandler.java) which is set up in [services.xml](https://github.com/vespa-engine/sample-apps/blob/master/use-case-shopping/src/main/application/services.xml) to be bound to `/site`. Note that this handler is for example purposes and is designed to be independent of Vespa. Most applications would serve this through a dedicated setup. + +* [Custom configuration](/en/applications/configuring-components) + +When creating custom components in Vespa, for instance document processors, searchers or handlers, one can use custom configuration to inject config parameters into the components. This involves defining a config definition (a `.def` file), which creates a config class. You can instantiate this class with data in `services.xml` and the resulting object is dependency injected to the component during construction. This application uses custom config to set up the Vespa host details for the handler. + +* [Partial update](/en/reference/schemas/document-json-format#update) + +With Vespa, you can make changes to an existing document without submitting the full document. Examples are setting the value of a single field, adding elements to an array, or incrementing the value of a field without knowing the field value beforehand. This application contains an example of a partial update, in the voting of whether a review is helpful or not. The `SiteHandler` receives the request and the `ReviewVote` class sends a partial update to increment the `up`- or `downvotes` field. + +* [Search using YQL](/en/querying/query-language) + +In Vespa, you search for documents using YQL. In this application, the classes responsible for retrieving data from Vespa (in the `data` package beneath the `SiteHandler`) set up the YQL queries which are used to query Vespa over HTTP. + +* [Grouping](/en/querying/grouping) + +Grouping is used to group various fields of query results together. For this application, many of the queries to Vespa include grouping requests. The home page uses grouping to dynamically extract the first 3 levels of categories from the stored items. The search page groups results matching the query into categories, brands, item rating and price ranges. The order which the groups are rendered are determined by both counting and the relevance of the hits. This enables query-contextualized navigation. + +* [Rank profiles](/en/basics/ranking) + +Rank profiles are profiles containing instructions on how to score documents for a given query. The most important part of rank profiles are the ranking expressions. The schemas for the item and review document types contain different rank profiles to sort or score the data. The item ranking is using a hybrid combination of keyword and vector matching. + +* [Native embedders](/en/rag/embedding) + +Native embedders are used to map the textual query and document representations into dense high dimensional vectors which are used for semantic search. The application uses an open-source embedding model and inference is performed using [stateless model evaluation](/en/ranking/stateless-model-evaluation), both during document and query processing. + +* [Vector search](/en/querying/nearest-neighbor-search) + +The default retrieval uses approximate nearest neighbor search in combination with traditional lexical matching. Both the keyword and vector matching is constrained by the filters such as brand, price or category. + +* [Ranking functions](/en/reference/schemas/schemas#function-rank) + +Ranking functions are contained in rank profiles and can be referenced as part of any ranking expression from either first-phase, second-phase, global-phase or other functions. diff --git a/mintlify-docs/en/learn/tutorials/http-api.mdx b/mintlify-docs/en/learn/tutorials/http-api.mdx new file mode 100644 index 0000000000..0cfe5890ed --- /dev/null +++ b/mintlify-docs/en/learn/tutorials/http-api.mdx @@ -0,0 +1,91 @@ +--- +title: "Building an HTTP API using request handlers and" +--- + +This tutorial builds a simple application consisting of these pieces: + +- A custom REST API - implemented in a _request handler_. +- Two pieces of request/response processing logic - implemented as two chained _processors_. +- A _component_ shared by the above processors. +- A custom output format - a _renderer_. + +The end result is to process incoming request of the form: + +```bash +http://hostname:port/demo?terms=something%20completely%20different +``` + +into a nested structure response produced by the processors and serialized by the renderer. Use the sample application found at [http-api-using-request-handlers-and-processors](https://github.com/vespa-engine/sample-apps/tree/master/examples/http-api-using-request-handlers-and-processors). + +## Request handler + +The custom request handler is required to implement a custom API. In many cases it is not necessary to add a custom handler as the Processors can access the request data directly. However, it is needed if e.g. your application wants more control over exactly which parameters are used to route to a particular processing chain. + +In this case, the request handler will simply add the request URI as a property and then forward to the built-in processing handler for processing. + +Review the code in [DemoHandler.java](https://github.com/vespa-engine/sample-apps/blob/master/examples/http-api-using-request-handlers-and-processors/src/main/java/ai/vespa/examples/DemoHandler.java) + +## Processors + +This application contains two processors, one for annotating the incoming request (using default values from config) and checking the result, and one for creating the result using the shared component. + +### AnnotatingProcessor + +Review the code in [AnnotatingProcessor.java](https://github.com/vespa-engine/sample-apps/blob/master/examples/http-api-using-request-handlers-and-processors/src/main/java/ai/vespa/examples/AnnotatingProcessor.java) + +### DataProcessor + +The other processor creates some structured Response Data from data handled to it in the request. This is done in cases where the web service is a processing service. In cases where the service is implementing some middleware on top of other services, similar processors will instead make outgoing requests to downstream web services to produce Response Data. + +Review the code in [DataProcessor.java](https://github.com/vespa-engine/sample-apps/blob/master/examples/http-api-using-request-handlers-and-processors/src/main/java/ai/vespa/examples/DataProcessor.java) + +Notice how the task of the server is decomposed into separate Processing steps which can be composed by chaining at configuration time and which communicates through the Request and Response only. This structure enhances sharing, reuse and modularity and makes it easy to create variations where some logic encapsulated in a Processor is added, removed or modified. + +The order of the processors is decided by the @Before and @After annotations - refer to [chained components](../../applications/chaining.html). + +### Custom configuration + +The default terms used by the AnnotatingProcessor are placed in user configuration, where the definition is in [demo.def](https://github.com/vespa-engine/sample-apps/blob/master/examples/http-api-using-request-handlers-and-processors/src/main/resources/configdefinitions/demo.def): + +```bash +package=com.mydomain.demo + +demo[].term string +``` + +In other words, a configuration class containing a single array named _demo_, containing a class Demo which only contains single string named _term_. + +## Renderer + +The responsibility of the renderer is to serialize the structured result into bytes for transport back to the client. + +Rendering works by first creating a single instance of the renderer, invoking the constructor, then cloning a new renderer for each result set to be rendered. `init()` will be invoked once on each new clone before `render()` is invoked. + +Review the code in [DemoRenderer.java](https://github.com/vespa-engine/sample-apps/blob/master/examples/http-api-using-request-handlers-and-processors/src/main/java/ai/vespa/examples/DemoRenderer.java) + +## Shared component + +The responsibility of this custom component is to decouple some parts of the application from the Searcher. This makes it possible to reconfigure the Searcher without rebuilding the potentially costly custom component. + +In this case, what the component does is more than a little silly. More typical use would be an [FSA](/en/reference/operations/tools#vespa-makefsa) or complex, shared helper functionality. + +Review the code in [DemoComponent.java](https://github.com/vespa-engine/sample-apps/blob/master/examples/http-api-using-request-handlers-and-processors/src/main/java/ai/vespa/examples/DemoComponent.java) + +## Application + +Review the application's configuration in [services.xml](https://github.com/vespa-engine/sample-apps/blob/master/examples/http-api-using-request-handlers-and-processors/src/main/application/services.xml) + +## Try it! + +Build the project, then [run a test](../../applications/developer-guide.html), querying [http://localhost:8080/demo?terms=1%202%203%204](http://localhost:8080/demo?terms=1%202%203%204) gives: + +```bash +OK +Renderer initialized: 1369733374898 +http://localhost:8080/demo?terms=1%202%203%204 +1 + 2 + 3 + 4 +Rendering finished work: 1369733374902 +``` \ No newline at end of file diff --git a/mintlify-docs/en/learn/tutorials/hybrid-search.mdx b/mintlify-docs/en/learn/tutorials/hybrid-search.mdx new file mode 100644 index 0000000000..53f5a18899 --- /dev/null +++ b/mintlify-docs/en/learn/tutorials/hybrid-search.mdx @@ -0,0 +1,1066 @@ +--- +title: "Hybrid Text Search Tutorial" +--- + + +Hybrid search combines different retrieval methods to improve search quality. This tutorial distinguishes between two core components of search: + +- **Retrieval**: Identifying a subset of potentially relevant documents from a large corpus. Traditional lexical methods like [BM25](/en/ranking/bm25) excel at this, as do modern, embedding-based [vector search](/en/querying/vector-search-intro) approaches. +- **Ranking**: Ordering retrieved documents by relevance to refine the results. Vespa's flexible [ranking framework](/en/basics/ranking) enables complex scoring mechanisms. + +This tutorial demonstrates building a hybrid search application with Vespa that leverages the strengths of both lexical and embedding-based approaches. We'll use the [NFCorpus](https://ir-datasets.com/nfcorpus.html) dataset from the [BEIR](https://github.com/beir-cellar/beir) benchmark and explore various hybrid search techniques using Vespa's query language and ranking features. + +The main goal is to set up a text search app that combines simple text scoring features such as [BM25](/en/ranking/bm25) [^1] with vector search in combination with text-embedding models. We demonstrate how to obtain text embeddings within Vespa using Vespa's [embedder](/en/rag/embedding#huggingface-embedder) functionality. In this guide, we use [snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs) as the text embedding model. It is a small model that is fast to run and has a small memory footprint. + + + +**Prerequisites:** + +- Linux, macOS or Windows 10 Pro on x86\_64 or arm64, with [Podman Desktop](https://podman.io/) or [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed, with an engine running. + - Alternatively, start the Podman daemon: + + ```bash + $ podman machine init --memory 6000 + $ podman machine start + ``` + - See [Docker Containers](/en/operations/self-managed/docker-containers) for system limits and other settings. +- For CPUs older than Haswell (2013), see [CPU Support](/en/operations/self-managed/cpu-support). +- Memory: Minimum 4 GB RAM dedicated to Docker/Podman. [Memory recommendations](/en/operations/self-managed/node-setup#memory-settings). +- Disk: Avoid `NO_SPACE` - the vespaengine/vespa container image + headroom for data requires disk space. [Read more](/en/writing/feed-block). +- [Homebrew](https://brew.sh/) to install the [Vespa CLI](/en/clients/vespa-cli), or download the Vespa CLI from [Github releases](https://github.com/vespa-engine/vespa/releases). +- Python3 +- `curl` + + + +## Installing vespa-cli and ir_datasets + +This tutorial uses [Vespa-CLI](/en/clients/vespa-cli) to deploy, feed, and query Vespa. We also use [ir-datasets](https://ir-datasets.com/) to obtain the NFCorpus relevance dataset. +```bash +$ pip3 install --ignore-installed vespacli ir_datasets ir_measures requests +``` + + +We can quickly look at a document from [nfcorpus](https://ir-datasets.com/beir.html#beir/nfcorpus): + + +```bash +$ ir_datasets export beir/nfcorpus docs --format jsonl | head -1 +``` + + +Which outputs: + + +```json expandable +{"doc_id": "MED-10", "text": "Recent studies have suggested that statins, an established drug group in the prevention of cardiovascular mortality, could delay or prevent breast cancer recurrence but the effect on disease-specific mortality remains unclear. We evaluated risk of breast cancer death among statin users in a population-based cohort of breast cancer patients. The study cohort included all newly diagnosed breast cancer patients in Finland during 1995\u20132003 (31,236 cases), identified from the Finnish Cancer Registry. Information on statin use before and after the diagnosis was obtained from a national prescription database. We used the Cox proportional hazards regression method to estimate mortality among statin users with statin use as time-dependent variable. A total of 4,151 participants had used statins. During the median follow-up of 3.25 years after the diagnosis (range 0.08\u20139.0 years) 6,011 participants died, of which 3,619 (60.2%) was due to breast cancer. After adjustment for age, tumor characteristics, and treatment selection, both post-diagnostic and pre-diagnostic statin use were associated with lowered risk of breast cancer death (HR 0.46, 95% CI 0.38\u20130.55 and HR 0.54, 95% CI 0.44\u20130.67, respectively). The risk decrease by post-diagnostic statin use was likely affected by healthy adherer bias; that is, the greater likelihood of dying cancer patients to discontinue statin use as the association was not clearly dose-dependent and observed already at low-dose/short-term use. The dose- and time-dependence of the survival benefit among pre-diagnostic statin users suggests a possible causal effect that should be evaluated further in a clinical trial testing statins\u2019 effect on survival in breast cancer patients.", "title": "Statin Use and Breast Cancer Survival: A Nationwide Cohort Study from Finland", "url": "http://www.ncbi.nlm.nih.gov/pubmed/25329299"} +``` + + +The NFCorpus documents have four fields: + +- The `doc_id` and `url` +- The `text` and the `title` + +We are interested in the title and the text, and we want to be able to search across these two fields. We also need to store the `doc_id` to evaluate [ranking](/en/basics/ranking) accuracy. We will create a small script that converts the above output to [Vespa JSON document](/en/reference/schemas/document-json-format) format. Create a `convert.py` file: + + +```python +import sys +import json + +for line in sys.stdin: + doc = json.loads(line) + del doc['url'] + vespa_doc = { + "put": "id:hybrid-search:doc::%s" % doc['doc_id'], + "fields": { + **doc + } + } + print(json.dumps(vespa_doc)) +``` + +```bash +Paste the above into file convert.py +``` + +With this script, we convert the document dump to Vespa JSON format. Use the following command to convert the entire dataset to Vespa JSON format: + + +```bash +$ ir_datasets export beir/nfcorpus docs --format jsonl | python3 convert.py > vespa-docs.jsonl +``` + + +Now, we will create the Vespa application package and schema to index the documents. + +## Create a Vespa Application Package + +A [Vespa application package](/en/basics/applications) is a set of configuration files and optional Java components that together define the behavior of a Vespa system. Let us define the minimum set of required files to create our hybrid text search application: `doc.sd` and `services.xml`. + + +```bash +$ mkdir -p app/schemas +``` + + + +### Schema +A [schema](/en/basics/schemas) is a document-type configuration; a single Vespa application can have multiple schemas with document types. For this application, we define a schema `doc`, which must be saved in a file named `schemas/doc.sd` in the application package directory. + +Write the following to `app/schemas/doc.sd`: + + +```js expandable +schema doc { + document doc { + field language type string { + indexing: "en" | set_language + } + field doc_id type string { + indexing: attribute | summary + match: word + } + field title type string { + indexing: index | summary + match: text + index: enable-bm25 + } + field text type string { + indexing: index | summary + match: text + index: enable-bm25 + } + } + fieldset default { + fields: title, text + } + + field embedding type tensor(v[384]) { + indexing: input title." ".input text | embed | attribute + attribute { + distance-metric: angular + } + } + + rank-profile bm25 { + first-phase { + expression: bm25(title) + bm25(text) + } + } + + rank-profile semantic { + inputs { + query(e) tensor(v[384]) + } + first-phase { + expression: closeness(field, embedding) + } + } +} +``` + +```bash +Paste the above into file app/schemas/doc.sd +``` + + A lot is happening here; let us go through it in detail. + +#### Document type and fields +The `document` section contains the fields of the document, their types, and how Vespa should index and [match](/en/reference/schemas/schemas#match) them. + +The field property `indexing` configures the _indexing pipeline_ for a field. For more information, see [schemas - indexing](/en/basics/schemas#document-fields). The [string](/en/reference/schemas/schemas#string) data type represents both unstructured and structured texts, and there are significant differences between [index and attribute](/en/querying/text-matching#index-and-attribute). The above schema includes default `match` modes for `attribute` and `index` property for visibility. + +Note that we are enabling [BM25](/en/ranking/bm25) for `title` and `text` by including `index: enable-bm25`. The language field is the only field that is not the NFCorpus dataset. We hardcode its value to "en" since the dataset is English. Using `set_language` avoids automatic language detection and uses the value when processing the other text fields. Read more in [linguistics](/en/linguistics/linguistics). + +#### Fieldset for matching across multiple fields + +[Fieldset](/en/reference/schemas/schemas#fieldset) allows searching across multiple fields. Defining `fieldset` does not add indexing/storage overhead. String fields grouped using fieldsets must share the same [match](/en/reference/schemas/schemas#match) and [linguistic processing](/en/linguistics/linguistics) settings because the query processing that searches a field or fieldset uses *one* type of transformation. + +#### Embedding inference +Our `embedding` vector field is of [tensor](/en/ranking/tensor-user-guide) type with a single named dimension (`v`) of 384 values. + +```js +field embedding type tensor(v[384]) { + indexing: input title." ".input text | embed arctic | attribute + attribute { + distance-metric: angular + } +} +``` +The `indexing` expression creates the input to the `embed` inference call (in our example the concatenation of the title and the text field). Since the dataset is small, we do not specify `index` which would build [HNSW](/en/querying/approximate-nn-hnsw) data structures for faster (but approximate) vector search. This guide uses [snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs) as the text embedding model. The model is trained with cosine similarity, which maps to Vespa's `angular` [distance-metric](/en/reference/schemas/schemas#distance-metric) for nearestNeighbor search. + +#### Ranking to determine matched documents ordering +You can define many [rank profiles](/en/basics/ranking), named collections of score calculations, and ranking phases. + +In this starting point, we have two simple rank-profile's: +- a `bm25` rank-profile that uses [BM25](/en/ranking/bm25). We sum the two field-level BM25 scores using a Vespa [ranking expression](/en/ranking/ranking-expressions-features). +- a `semantic` rank-profile which is used in combination Vespa's nearestNeighbor query operator (vector search). + +Both profiles specify a single [ranking phase](/en/ranking/phased-ranking). + +### Services Specification + +The [services.xml](/en/reference/applications/services/services.html) defines the services that make up the Vespa application — which services to run and how many nodes per service. Write the following to `app/services.xml`: + + +```xml expandable + + + + + + + + + + cls + + Represent this sentence for searching relevant passages: + + + + + + 1 + + + + + +``` +```xml +Paste the above into file app/services.xml +``` + +Some notes about the elements above: + +- `` defines the [container cluster](/en/applications/containers) for document, query and result processing. +- `` sets up the [query endpoint](/en/querying/query-api). The default port is 8080. +- `` sets up the [document endpoint](/en/reference/api/document-v1) for feeding. +- `` with type `hugging-face-embedder` configures the embedder in the application package. This includes where to fetch the model files from, the prepend instructions, and the pooling strategy. See [huggingface-embedder](/en/rag/embedding#huggingface-embedder) for details and other embedders supported. +- `` defines how documents are stored and searched. +- `` denotes how many copies to keep of each document. +- `` assigns the document types in the _schema_ to content clusters. + + +## Deploy the application package + +Once we have finished writing our application package, we can deploy it. We use settings similar to those in the [Vespa quick start guide](/en/basics/deploy-an-application-local). + +Start the Vespa container: + + +```bash +$ docker run --detach --name vespa-hybrid --hostname vespa-container \ --publish 8080:8080 --publish 19071:19071 \ vespaengine/vespa +``` + + +Notice that we publish two ports: 8080 is the data-plane where we write and query documents, and 19071 is the control-plane where we can deploy the application. Note that the data-plane port is inactive before deploying the application. + +Configure the Vespa CLI to use the local container: +```bash +$ vespa config set target local +``` + + +Starting the container can take a short while. Make sure that the configuration service is running by using `vespa status`. + + +```bash +$ vespa status deploy --wait 300 +``` + + +Now, deploy the Vespa application from the `app` directory: + + +```bash +$ vespa deploy --wait 300 app +``` + + + +## Feed the data + +The data fed to Vespa must match the document type in the schema. This step performs embed inference inside Vespa using the snowflake arctic embedding model. Remember the `component` definition in `services.xml` and the `embed` call in the schema. + + +```bash +$ vespa feed -t http://localhost:8080 vespa-docs.jsonl +``` + + +The output should look like this (rates may vary depending on your machine HW): + + +```json expandable +{ + "feeder.operation.count": 3633, + "feeder.seconds": 148.515, + "feeder.ok.count": 3633, + "feeder.ok.rate": 24.462, + "feeder.error.count": 0, + "feeder.inflight.count": 0, + "http.request.count": 3633, + "http.request.bytes": 2985517, + "http.request.MBps": 0.020, + "http.exception.count": 0, + "http.response.count": 3633, + "http.response.bytes": 348320, + "http.response.MBps": 0.002, + "http.response.error.count": 0, + "http.response.latency.millis.min": 316, + "http.response.latency.millis.avg": 787, + "http.response.latency.millis.max": 1704, + "http.response.code.counts": { + "200": 3633 + } +} +``` + + +Notice: + +- `feeder.ok.rate` which is the throughput (Note that this step includes embedding inference). See [embedder-performance](/en/rag/embedding#embedder-performance) for details on embedding inference performance. In this case, embedding inference is the bottleneck for overall indexing throughput. +- `http.response.code.counts` matches with `feeder.ok.count`. The dataset has 3633 documents. Note that if you observe any `429` responses, these are harmless. Vespa asks the client to slow down the feed speed because of resource contention. + + +## Sample queries +We can now run a few sample queries to demonstrate various ways to perform searches over this data using the [Vespa query language](/en/querying/query-language). + + +```bash +$ ir_datasets export beir/nfcorpus/test queries --fields query_id text | head -1 +``` + +```bash +PLAIN-2 Do Cholesterol Statin Drugs Cause Breast Cancer? +``` + + +If you see a pipe related error from the above command, you can safely ignore it. + +Here, `PLAIN-2` is the query id of the first test query. We'll use this test query to demonstrate querying Vespa. + +### Lexical search with BM25 scoring + +The following query uses [weakAnd](/en/ranking/wand) and where `totalTargetHits` is a hint of how many documents we want to expose to configurable [ranking phases](/en/ranking/phased-ranking). Refer to [text search tutorial](/en/learn/tutorials/text-search#querying-the-data) for more on querying with `text`. + + +```bash +vespa query \ + 'yql=select * from doc where default contains ({targetHits:10}text(@user-query))' \ + 'user-query=Do Cholesterol Statin Drugs Cause Breast Cancer?' \ + 'hits=1' \ + 'language=en' \ + 'ranking=bm25' +``` + + +Notice that we choose `ranking` to specify which rank profile to rank the documents retrieved by the query. This query returns the following [JSON result response](/en/reference/querying/default-result-format): + + +```json expandable +{ + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 46 + }, + "coverage": { + "coverage": 100, + "documents": 3633, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "id:doc:doc::MED-10", + "relevance": 25.521817426330887, + "source": "content", + "fields": { + "sddocname": "doc", + "documentid": "id:doc:doc::MED-10", + "doc_id": "MED-10", + "title": "Statin Use and Breast Cancer Survival: A Nationwide Cohort Study from Finland", + "text": "Recent studies have suggested that statins, an established drug group in the prevention of cardiovascular mortality, could delay or prevent breast cancer recurrence but the effect on disease-specific mortality remains unclear. We evaluated risk of breast cancer death among statin users in a population-based cohort of breast cancer patients. The study cohort included all newly diagnosed breast cancer patients in Finland during 1995–2003 (31,236 cases), identified from the Finnish Cancer Registry. Information on statin use before and after the diagnosis was obtained from a national prescription database. We used the Cox proportional hazards regression method to estimate mortality among statin users with statin use as time-dependent variable. A total of 4,151 participants had used statins. During the median follow-up of 3.25 years after the diagnosis (range 0.08–9.0 years) 6,011 participants died, of which 3,619 (60.2%) was due to breast cancer. After adjustment for age, tumor characteristics, and treatment selection, both post-diagnostic and pre-diagnostic statin use were associated with lowered risk of breast cancer death (HR 0.46, 95% CI 0.38–0.55 and HR 0.54, 95% CI 0.44–0.67, respectively). The risk decrease by post-diagnostic statin use was likely affected by healthy adherer bias; that is, the greater likelihood of dying cancer patients to discontinue statin use as the association was not clearly dose-dependent and observed already at low-dose/short-term use. The dose- and time-dependence of the survival benefit among pre-diagnostic statin users suggests a possible causal effect that should be evaluated further in a clinical trial testing statins’ effect on survival in breast cancer patients." + } + } + ] + } +} +``` + + +The query retrieves and ranks `MED-10` as the most relevant document—notice the `totalCount` which is the number of documents that were retrieved for ranking phases. In this case, we exposed about 50 documents to first-phase ranking, it is higher than our target, but also fewer than the total number of documents that match any query terms. + +In the example below, we change the grammar from the default `weakAnd` to `any`, and the query matches 1780, or almost 50% of the indexed documents. + + +```bash +vespa query \ + 'yql=select * from doc where default contains ({targetHits:100, grammar:"any"}text(@user-query))' \ + 'user-query=Do Cholesterol Statin Drugs Cause Breast Cancer?' \ + 'hits=1' \ + 'language=en' \ + 'ranking=bm25' +``` + + +The bm25 rank profile calculates the relevance score (\~25.521), which is configured in the schema as: + + +```txt +rank-profile bm25 { + first-phase { + expression: bm25(title) + bm25(text) + } +} +``` + + +So, in this case, `relevance` is the sum of the two BM25 scores. The retrieved document looks relevant; we can look at the graded judgment for this query `PLAIN-2`. The following exports the query relevance judgments (we grep for the query id that we are interested in): + + +```bash +$ ir_datasets export beir/nfcorpus/test qrels | grep "PLAIN-2 " +``` + + +The following is the output from the above command. Notice line two, the `MED-10` document retrieved above, is judged as very relevant with the grade 2 (perfect) for the query_id PLAIN-2. This dataset has graded relevance judgments where a grade of 1 is less relevant than 2. Documents retrieved by the system without a relevance judgment are assumed to be irrelevant (grade 0). + + +```bash expandable +PLAIN-2 0 MED-2427 2 +PLAIN-2 0 MED-10 2 +PLAIN-2 0 MED-2429 2 +PLAIN-2 0 MED-2430 2 +PLAIN-2 0 MED-2431 2 +PLAIN-2 0 MED-14 2 +PLAIN-2 0 MED-2432 2 +PLAIN-2 0 MED-2428 1 +PLAIN-2 0 MED-2440 1 +PLAIN-2 0 MED-2434 1 +PLAIN-2 0 MED-2435 1 +PLAIN-2 0 MED-2436 1 +PLAIN-2 0 MED-2437 1 +PLAIN-2 0 MED-2438 1 +PLAIN-2 0 MED-2439 1 +PLAIN-2 0 MED-3597 1 +PLAIN-2 0 MED-3598 1 +PLAIN-2 0 MED-3599 1 +PLAIN-2 0 MED-4556 1 +PLAIN-2 0 MED-4559 1 +PLAIN-2 0 MED-4560 1 +PLAIN-2 0 MED-4828 1 +PLAIN-2 0 MED-4829 1 +PLAIN-2 0 MED-4830 1 +``` + + +### Dense search using text embedding + +Now, we turn to embedding-based retrieval, where we embed the query text using the configured text-embedding model and perform an exact `nearestNeighbor` search. We use [embed query](/en/rag/embedding#embedding-a-query-text) to produce the input tensor `query(e)`, defined in the `semantic` rank-profile in the schema. + + +```bash +vespa query \ + 'yql=select * from doc where {targetHits:10}nearestNeighbor(embedding,e)' \ + 'user-query=Do Cholesterol Statin Drugs Cause Breast Cancer?' \ + 'input.query(e)=embed(@user-query)' \ + 'hits=1' \ + 'ranking=semantic' +``` + + +This query returns the following [JSON result response](/en/reference/querying/default-result-format): + + +```json expandable +{ + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 64 + }, + "coverage": { + "coverage": 100, + "documents": 3633, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "id:doc:doc::MED-2429", + "relevance": 0.6061378635706601, + "source": "content", + "fields": { + "sddocname": "doc", + "documentid": "id:doc:doc::MED-2429", + "doc_id": "MED-2429", + "title": "Statin use and risk of breast cancer: a meta-analysis of observational studies.", + "text": "Emerging evidence suggests that statins' may decrease the risk of cancers. However, available evidence on breast cancer is conflicting. We, therefore, examined the association between statin use and risk of breast cancer by conducting a detailed meta-analysis of all observational studies published regarding this subject. PubMed database and bibliographies of retrieved articles were searched for epidemiological studies published up to January 2012, investigating the relationship between statin use and breast cancer. Before meta-analysis, the studies were evaluated for publication bias and heterogeneity. Combined relative risk (RR) and 95 % confidence interval (CI) were calculated using a random-effects model (DerSimonian and Laird method). Subgroup analyses, sensitivity analysis, and cumulative meta-analysis were also performed. A total of 24 (13 cohort and 11 case-control) studies involving more than 2.4 million participants, including 76,759 breast cancer cases contributed to this analysis. We found no evidence of publication bias and evidence of heterogeneity among the studies. Statin use and long-term statin use did not significantly affect breast cancer risk (RR = 0.99, 95 % CI = 0.94, 1.04 and RR = 1.03, 95 % CI = 0.96, 1.11, respectively). When the analysis was stratified into subgroups, there was no evidence that study design substantially influenced the effect estimate. Sensitivity analysis confirmed the stability of our results. Cumulative meta-analysis showed a change in trend of reporting risk of breast cancer from positive to negative in statin users between 1993 and 2011. Our meta-analysis findings do not support the hypothesis that statins' have a protective effect against breast cancer. More randomized clinical trials and observational studies are needed to confirm this association with underlying biological mechanisms in the future." + } + } + ] + } +} +``` + + +The result of this vector-based search differed from the previous sparse keyword search, with a different relevant document at position 1. In this case, the relevance score is 0.606 and calculated by the `closeness` function in the `semantic` rank-profile. Note that more documents were retrieved than the `targetHits`. + +```bash +rank-profile semantic { + inputs { + query(e) tensor(v[384]) + } + first-phase { + expression: closeness(field, embedding) + } + } +``` + +Where [closeness(field, embedding)](/en/reference/ranking/rank-features#attribute-match-features-normalized) is a ranking feature that calculates the cosine similarity between the query and the document embedding. This returns the inverted of the distance between the two vectors. Small distance = higher closeness. This because Vespa sorts results in descending order of relevance. Descending order means the largest will appear at the top of the ranked list. + +Note that similarity scores of embedding vectors are often optimized via contrastive or ranking losses, which make them difficult to interpret. + +## Evaluate ranking accuracy + +The previous section demonstrated how to combine the Vespa query language with rank profiles to implement two different retrieval and ranking strategies. + +In the following section we evaluate all 323 test queries with both models to compare their overall effectiveness, measured using [nDCG@10](https://en.wikipedia.org/wiki/Discounted_cumulative_gain). `nDCG@10` is the official evaluation metric of the BEIR benchmark and is an appropriate metric for test sets with graded relevance judgments. + +For this evaluation task, we need to write a small script. The following script iterates over the queries in the test set, executes the query against the Vespa instance, and reads the response from Vespa. It then evaluates and prints the metric. The overall effectiveness is measured using the average of each query `nDCG@10` metric. + + +```python expandable +import requests +import ir_datasets +from ir_measures import calc_aggregate, nDCG, ScoredDoc +from enum import Enum +from typing import List + +class RModel(Enum): + SPARSE = 1 + DENSE = 2 + HYBRID = 3 + +def parse_vespa_response(response:dict, qid:str) -> List[ScoredDoc]: + result = [] + hits = response['root'].get('children',[]) + for hit in hits: + doc_id = hit['fields']['doc_id'] + relevance = hit['relevance'] + result.append(ScoredDoc(qid, doc_id, relevance)) + return result + +def search(query:str, qid:str, ranking:str, + hits=10, language="en", mode=RModel.SPARSE) -> List[ScoredDoc]: + yql = "select doc_id from doc where default contains ({targetHits:100}text(@user-query))" + if mode == RModel.DENSE: + yql = "select doc_id from doc where ({targetHits:10}nearestNeighbor(embedding, e))" + elif mode == RModel.HYBRID: + yql = "select doc_id from doc where default contains ({targetHits:100}text(@user-query)) OR ({targetHits:10}nearestNeighbor(embedding, e))" + query_request = { + 'yql': yql, + 'user-query': query, + 'ranking.profile': ranking, + 'hits' : hits, + 'language': language + } + if mode == RModel.DENSE or mode == RModel.HYBRID: + query_request['input.query(e)'] = "embed(@user-query)" + + response = requests.post("http://localhost:8080/search/", json=query_request) + if response.ok: + return parse_vespa_response(response.json(), qid) + else: + print("Search request failed with response " + str(response.json())) + return [] + +def main(): + import argparse + parser = argparse.ArgumentParser(description='Evaluate ranking models') + parser.add_argument('--ranking', type=str, required=True, help='Vespa ranking profile') + parser.add_argument('--mode', type=str, default="sparse", help='retrieval mode, valid values are sparse, dense, hybrid') + args = parser.parse_args() + mode = RModel.HYBRID + if args.mode == "sparse": + mode = RModel.SPARSE + elif args.mode == "dense": + mode = RModel.DENSE + + + dataset = ir_datasets.load("beir/nfcorpus/test") + results = [] + metrics = [nDCG@10] + for query in dataset.queries_iter(): + qid = query.query_id + query_text = query.text + results.extend(search(query_text, qid, args.ranking, mode=mode)) + + metrics = calc_aggregate(metrics, dataset.qrels, results) + print("Ranking metric NDCG@10 for rank profile {}: {:.4f}".format(args.ranking, metrics[nDCG@10])) + +if __name__ == "__main__": + main() +``` + +```bash +Paste the above into file evaluate_ranking.py +``` + + +Then execute the script: +```bash +$ python3 evaluate_ranking.py --ranking bm25 --mode sparse +``` + + +The script will produce the following output: + + +```txt +Ranking metric NDCG@10 for rank profile bm25: 0.3210 +``` + + +Now, we can evaluate the dense model using the same script: + + +```bash +$ python3 evaluate_ranking.py --ranking semantic --mode dense +``` + +```txt +Ranking metric NDCG@10 for rank profile semantic: 0.3077 +``` + + +Note that the _average_ `nDCG@10` score is computed across all the 327 test queries. You can also experiment beyond a single metric and modify the script to calculate more [measures](https://ir-measur.es/en/latest/measures.html), for example, including precision with a relevance label cutoff of 2: + + +```txt +metrics = [nDCG@10, P(rel=2)@10] +``` + + +Also note that the exact nDCG@10 values may vary slightly between runs. + +## Hybrid Search & Ranking + +We demonstrated and evaluated two independent retrieval and ranking strategies in the previous sections. Now, we want to explore hybrid search techniques where we combine: + +- traditional lexical keyword matching with a text scoring method (BM25) +- embedding-based search using a text embedding model + +With Vespa, there is a distinction between retrieval (matching) and configurable [ranking](/en/basics/ranking). + +In the Vespa ranking phases, we can express arbitrary scoring complexity with the full power of the Vespa [ranking](/en/basics/ranking) framework. Meanwhile, top-k retrieval relies on simple built-in functions associated with Vespa's top-k query operators. These top-k operators aim to avoid scoring all documents in the collection for a query by using a simplistic scoring function to identify the top-k documents. + +These top-k query operators use `index` structures to accelerate the query evaluation, avoiding scoring all documents using heuristics. In the context of hybrid text search, the following Vespa top-k query operators are relevant: + +- YQL `{targetHits:k}nearestNeighbor()` for dense representations (text embeddings) using a configured [distance-metric](/en/reference/schemas/schemas#distance-metric) as the scoring function. +- YQL `myField contains ({targetHits:k}text(@user-query))` which by default uses [weakAnd](/en/ranking/wand) for sparse representations. + + +We can combine these operators using boolean query operators like AND/OR/RANK to express a hybrid search query. Then, there is a wild number of ways that we can combine various signals in [ranking](/en/basics/ranking). + + +### Define our first simple hybrid rank profile + +First, we can add our first simple hybrid rank profile that combines the dense and sparse components using multiplication to combine them into a single score. + + +```txt +closeness(field, embedding) * (1 + bm25(title) + bm25(text)) +``` + + +- the [closeness(field, embedding)](/en/reference/ranking/rank-features#attribute-match-features-normalized) rank-feature returns a normalized score in the range 0 to 1 inclusive +- Any of the per-field BM25 scores are in the range of 0 to infinity + +We add a bias constant (1) to avoid the overall score becoming 0 if the document does not match any query terms, as the BM25 scores would be 0. We also add `match-features` to be able to debug each of the scores. + + + +```js expandable +schema doc { + document doc { + field language type string { + indexing: "en" | set_language + } + field doc_id type string { + indexing: attribute | summary + match: word + } + field title type string { + indexing: index | summary + match: text + index: enable-bm25 + } + field text type string { + indexing: index | summary + match: text + index: enable-bm25 + } + } + fieldset default { + fields: title, text + } + + field embedding type tensor(v[384]) { + indexing: input title." ".input text | embed | attribute + attribute { + distance-metric: angular + } + } + + rank-profile hybrid { + inputs { + query(e) tensor(v[384]) + } + first-phase { + expression: closeness(field, embedding) * (1 + (bm25(title) + bm25(text))) + } + match-features: bm25(title) bm25(text) closeness(field, embedding) + } +} +``` + + +Now, re-deploy the Vespa application from the `app` directory: + + +```bash +$ vespa deploy --wait 300 app +``` + + +After that, we can start experimenting with how to express hybrid queries using the Vespa query language. + +### Hybrid query examples + +The following demonstrates combining the two top-k query operators using the Vespa query language. In a later section, we will show how to combine the two retrieval strategies using the Vespa ranking framework. This section focuses on the top-k retrieval part that exposes matched documents to the Vespa [ranking](/en/basics/ranking) phase(s). + +#### Hybrid query using the OR operator +The following query exposes documents to ranking that match the query using *either (OR)* the sparse or dense representation. + + +```bash +vespa query \ + 'yql=select * from doc where default contains ({targetHits:10}text(@user-query)) or ({targetHits:10}nearestNeighbor(embedding,e))' \ + 'user-query=Do Cholesterol Statin Drugs Cause Breast Cancer?' \ + 'input.query(e)=embed(@user-query)' \ + 'hits=1' \ + 'language=en' \ + 'ranking=hybrid' +``` + The documents retrieved into ranking is scored by the `hybrid` rank-profile. Note that both top-k query operators might expose more than the the `targetHits` setting. + +The above query returns the following [JSON result response](/en/reference/querying/default-result-format): + + +```json expandable +{ + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 87 + }, + "coverage": { + "coverage": 100, + "documents": 3633, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "id:doc:doc::MED-10", + "relevance": 15.898915593367988, + "source": "content", + "fields": { + "matchfeatures": { + "bm25(text)": 17.35556767018612, + "bm25(title)": 8.166249756144769, + "closeness(field,embedding)": 0.5994655395517325 + }, + "sddocname": "doc", + "documentid": "id:doc:doc::MED-10", + "doc_id": "MED-10", + "title": "Statin Use and Breast Cancer Survival: A Nationwide Cohort Study from Finland", + "text": "Recent studies have suggested that statins, an established drug group in the prevention of cardiovascular mortality, could delay or prevent breast cancer recurrence but the effect on disease-specific mortality remains unclear. We evaluated risk of breast cancer death among statin users in a population-based cohort of breast cancer patients. The study cohort included all newly diagnosed breast cancer patients in Finland during 1995–2003 (31,236 cases), identified from the Finnish Cancer Registry. Information on statin use before and after the diagnosis was obtained from a national prescription database. We used the Cox proportional hazards regression method to estimate mortality among statin users with statin use as time-dependent variable. A total of 4,151 participants had used statins. During the median follow-up of 3.25 years after the diagnosis (range 0.08–9.0 years) 6,011 participants died, of which 3,619 (60.2%) was due to breast cancer. After adjustment for age, tumor characteristics, and treatment selection, both post-diagnostic and pre-diagnostic statin use were associated with lowered risk of breast cancer death (HR 0.46, 95% CI 0.38–0.55 and HR 0.54, 95% CI 0.44–0.67, respectively). The risk decrease by post-diagnostic statin use was likely affected by healthy adherer bias; that is, the greater likelihood of dying cancer patients to discontinue statin use as the association was not clearly dose-dependent and observed already at low-dose/short-term use. The dose- and time-dependence of the survival benefit among pre-diagnostic statin users suggests a possible causal effect that should be evaluated further in a clinical trial testing statins’ effect on survival in breast cancer patients." + } + } + ] + } +} +``` + + +What is going on here is that we are combining the two top-k query operators using a boolean OR (disjunction). The `totalCount` is the number of documents retrieved into ranking (About 90, which is higher than 10 + 10). The `relevance` is the score assigned by `hybrid` rank-profile. Notice that the `matchfeatures` field shows all the feature scores. This is useful for debugging and understanding the ranking behavior, also for feature logging. + +#### Hybrid query with AND operator +The following combines the two top-k operators using AND, meaning that the retrieved documents must match both the sparse and dense top-k operators: + + +```bash +vespa query \ + 'yql=select * from doc where default contains ({targetHits:10}text(@user-query)) and ({targetHits:10}nearestNeighbor(embedding,e))' \ + 'user-query=Do Cholesterol Statin Drugs Cause Breast Cancer?' \ + 'input.query(e)=embed(@user-query)' \ + 'hits=1' \ + 'language=en' \ + 'ranking=hybrid' +``` + For the sparse keyword query matching, the `weakAnd` operator is used by default and it requires that at least one term in the query matches the document (fieldset searched). + +#### Hybrid query with rank query operator +The following combines the two top-k operators using the [rank](/en/reference/querying/yql#rank) query operator, which allows us to retrieve using only the first operand of the rank operator, but where the remaining operands allow computing (match) features that can be used in ranking phases. + +This query is meaningful because we can use the computed features in the ranking expressions but retrieve only by the dense representation. This is usually the most resource-effective way to combine the two representations. + + +```bash +vespa query \ + 'yql=select * from doc where rank(({targetHits:10}nearestNeighbor(embedding,e)), default contains ({targetHits:10}text(@user-query)))' \ + 'user-query=Do Cholesterol Statin Drugs Cause Breast Cancer?' \ + 'input.query(e)=embed(@user-query)' \ + 'hits=1' \ + 'language=en' \ + 'ranking=hybrid' +``` + We can also invert the order of the operands to the `rank` query operator that retrieves by the sparse representation but uses the dense representation to compute features for ranking. This is very useful in cases where we do not want to build HNSW indexes (adds memory and slows down indexing), but still be able to use semantic signals in ranking phases. + + +```bash +vespa query \ + 'yql=select * from doc where rank(default contains ({targetHits:10}text(@user-query)), ({targetHits:10}nearestNeighbor(embedding,e)))' \ + 'user-query=Do Cholesterol Statin Drugs Cause Breast Cancer?' \ + 'input.query(e)=embed(@user-query)' \ + 'hits=1' \ + 'language=en' \ + 'ranking=hybrid' +``` + + +This way of performing hybrid retrieval allows retrieving only by the sparse representation and uses the dense vector representation to compute features for ranking. + +## Hybrid ranking + +In the previous section, we demonstrated combining the two top-k query operators using boolean query operators. + +This section will show combining the two retrieval strategies using the Vespa ranking framework. We can first start evaluating the effectiveness of the hybrid rank profile that combines the two retrieval strategies. + + + +```bash +$ python3 evaluate_ranking.py --ranking hybrid --mode hybrid +``` + +Which outputs + + +```txt +Ranking metric NDCG@10 for rank profile hybrid: 0.3330 +``` + + +The `nDCG@10` score is slightly higher than the profiles that only use one of the ranking strategies. + +Now, we can experiment with more complex ranking expressions that combine the two retrieval strategies. We add a few more rank profiles to the schema that combine the two retrieval strategies in different ways. + + +```js expandable +schema doc { + document doc { + field language type string { + indexing: "en" | set_language + } + field doc_id type string { + indexing: attribute | summary + match: word + } + field title type string { + indexing: index | summary + match: text + index: enable-bm25 + } + field text type string { + indexing: index | summary + match: text + index: enable-bm25 + } + } + fieldset default { + fields: title, text + } + + field embedding type tensor(v[384]) { + indexing: input title." ".input text | embed | attribute + attribute { + distance-metric: angular + } + } + + rank-profile hybrid { + inputs { + query(e) tensor(v[384]) + } + first-phase { + expression: closeness(field, embedding) * (1 + (bm25(title) + bm25(text))) + } + match-features: bm25(title) bm25(text) closeness(field, embedding) + } + + rank-profile hybrid-sum inherits hybrid { + first-phase { + expression: closeness(field, embedding) + ((bm25(title) + bm25(text))) + } + } + + rank-profile hybrid-normalize-bm25-with-atan inherits hybrid { + + function scale(val) { + expression: 2*atan(val/8)/(3.14159) + } + function normalized_bm25() { + expression: scale(bm25(title) + bm25(text)) + } + function cosine() { + expression: cos(distance(field, embedding)) + } + first-phase { + expression: normalized_bm25 + cosine + } + match-features { + normalized_bm25 + cosine + bm25(title) + bm25(text) + } + } + + rank-profile hybrid-rrf inherits hybrid-normalize-bm25-with-atan{ + + function bm25_score() { + expression: bm25(title) + bm25(text) + } + global-phase { + rerank-count: 100 + expression: reciprocal_rank(bm25_score) + reciprocal_rank(cosine) + } + match-features: bm25(title) bm25(text) bm25_score cosine + } + + rank-profile hybrid-linear-normalize inherits hybrid-normalize-bm25-with-atan{ + + function bm25_score() { + expression: bm25(title) + bm25(text) + } + global-phase { + rerank-count: 100 + expression: normalize_linear(bm25_score) + normalize_linear(cosine) + } + match-features: bm25(title) bm25(text) bm25_score cosine + } +} +``` + +```bash +Paste the above into file app/schemas/doc.sd +``` + +Now, re-deploy the Vespa application from the `app` directory: + + +```bash +vespa deploy --wait 300 app +``` + +Let us break down the new rank profiles: + +- `hybrid-sum` combines the two retrieval strategies using addition. This is a simple way to combine the two strategies. But since the BM25 scores are not normalized (unbound) and the closeness score is normalized (0-1), the BM25 scores will dominate the closeness score. +- `hybrid-normalize-bm25-with-atan` combines the two strategies using a normalized BM25 score and the cosine similarity. The BM25 scores are normalized using the `atan` function. +- `hybrid-rrf` combines the two strategies using the reciprocal rank feature. This is a way to combine the two strategies using a reciprocal rank feature. +- `hybrid-linear-normalize` combines the two strategies using a linear normalization function. This is a way to combine the two strategies using a linear normalization function. + +The two last profiles are using `global-phase` to rerank the top 100 documents using the reciprocal rank and linear normalization functions. This can only be done in the global phase as it requires access to all the documents that are retrieved into ranking and in a multi-node setup, this requires communication between the nodes and knowledge of the score distribution across all the nodes. In addition, each ranking phase can only order the documents by a single score. + +### Evaluate the new rank profiles + +Adding new rank-profiles is a hot change. Once we have deployed the application, we can evaluate the new hybrid profiles using the script: + + +```bash +$ python3 evaluate_ranking.py --ranking hybrid-sum --mode hybrid +``` + +```txt +Ranking metric NDCG@10 for rank profile hybrid-sum: 0.3244 +``` + +```bash +$ python3 evaluate_ranking.py --ranking hybrid-normalize-bm25-with-atan --mode hybrid +``` + +```txt +Ranking metric NDCG@10 for rank profile hybrid-normalize-bm25-with-atan: 0.3410 +``` + + + +```bash +$ python3 evaluate_ranking.py --ranking hybrid-rrf --mode hybrid +``` + + + +```txt +Ranking metric NDCG@10 for rank profile hybrid-rrf: 0.3233 +``` + + + +```bash +$ python3 evaluate_ranking.py --ranking hybrid-linear-normalize --mode hybrid +``` + + + +```txt +Ranking metric NDCG@10 for rank profile hybrid-linear-normalize: 0.3423 +``` + + +On this particular dataset, the `hybrid-normalize-bm25-with-atan` rank profile performs the best, but the difference is small. This also demonstrates that hybrid search and ranking is a complex problem and that the effectiveness of the hybrid model depends on the dataset and the retrieval strategies. + +These results (which is the best) might not transfer to your specific retrieval use case and dataset, so it is important to evaluate the effectiveness of a hybrid model on your specific dataset. + +See [Improving retrieval with LLM-as-a-judge](https://blog.vespa.ai/improving-retrieval-with-llm-as-a-judge/) for more information on how to collect relevance judgments for your dataset. + +### Summary + +We showed how to express hybrid queries using the Vespa query language and how to combine the two retrieval strategies using the Vespa ranking framework. We also showed how to evaluate the effectiveness of the hybrid ranking model using one of the datasets that are a part of the BEIR benchmark. We hope this tutorial has given you a good understanding of how to combine different retrieval strategies using Vespa, and that there is not a single silver bullet for all retrieval problems. + +## Cleanup + + +```bash +$ docker rm -f vespa-hybrid +``` + + +1. Robertson, Stephen and Zaragoza, Hugo and others, 2009. The probabilistic relevance framework: BM25 and beyond. Foundations and Trends in Information Retrieval. diff --git a/mintlify-docs/en/learn/tutorials/news-1-deploy-an-application.mdx b/mintlify-docs/en/learn/tutorials/news-1-deploy-an-application.mdx new file mode 100644 index 0000000000..d114168a53 --- /dev/null +++ b/mintlify-docs/en/learn/tutorials/news-1-deploy-an-application.mdx @@ -0,0 +1,222 @@ +--- +title: "News search and recommendation tutorial - getting started on Docker" +--- + +Our goal with this series is to set up a Vespa application for personalized news recommendations. We will do this in stages, starting with a simple news search system and gradually adding functionality as we go through the tutorial parts. + +The parts are: + +1. [Getting started](/en/learn/tutorials/news-1-deploy-an-application) - this part +2. [A basic news search application](/en/learn/tutorials/news-2-basic-feeding-and-query) - application packages, feeding, query +3. [News search](/en/learn/tutorials/news-3-searching) - sorting, grouping, and ranking +4. [Generating embeddings for users and news articles](/en/learn/tutorials/news-4-embeddings) +5. [News recommendation](/en/learn/tutorials/news-5-recommendation) - partial updates (news embeddings), ANNs, filtering +6. [News recommendation with searchers](/en/learn/tutorials/news-6-recommendation-with-searchers) - custom searchers, doc processors +7. [News recommendation with parent-child](/en/learn/tutorials/news-7-recommendation-with-parent-child) - parent-child, tensor ranking + +There are different entry points to this tutorial. This one is describing how to get started using Docker on your local machine. You can also deploy the application we are creating on [Vespa Cloud](https://cloud.vespa.ai). + +In this part, we will start with a minimal Vespa application to get used to some basic operations for running the application on Docker. In the next part of the tutorial, we'll start developing our application. + + + +**Prerequisites:** + +- Linux, macOS or Windows 10 Pro on x86\_64 or arm64, with [Podman Desktop](https://podman.io/) or [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed, with an engine running. + - Alternatively, start the Podman daemon: + + ```bash + $ podman machine init --memory 6000 + $ podman machine start + ``` + - See [Docker Containers](/en/operations/self-managed/docker-containers) for system limits and other settings. +- For CPUs older than Haswell (2013), see [CPU Support](/en/operations/self-managed/cpu-support). +- Memory: Minimum 4 GB RAM dedicated to Docker/Podman. [Memory recommendations](/en/operations/self-managed/node-setup#memory-settings). +- Disk: Avoid `NO_SPACE` - the vespaengine/vespa container image + headroom for data requires disk space. [Read more](/en/writing/feed-block). +- [Homebrew](https://brew.sh/) to install the [Vespa CLI](/en/clients/vespa-cli), or download the Vespa CLI from [Github releases](https://github.com/vespa-engine/vespa/releases). +- Python3 for converting the dataset to Vespa JSON. +- `curl` to download the dataset and run the Vespa health-checks. +- [Java 17](https://openjdk.org/projects/jdk/17/) in part 6. +- [Apache Maven](https://maven.apache.org/install.html) in part 6. + + + + +**Note:** + +4 GB Docker memory is sufficient for the demo dataset in part 2. The full MIND dataset requires more, use 10 GB. + + + +In upcoming parts of this series, we will have some additional Python dependencies - we use [PyTorch](https://pytorch.org/) to train vector representations for news and users and train machine learning models for use in ranking. + + +## Installing vespa-cli + +This tutorial uses [Vespa-CLI](/en/clients/vespa-cli), Vespa CLI is the official command-line client for Vespa.ai. It is a single binary without any runtime dependencies and is available for Linux, macOS, and Windows. + +```bash +$ brew install vespa-cli +``` + +For the rest of this tutorial, you will be using localhost, so you need to configure your Vespa CLI to connect to localhost. Run the following to use endpoints on localhost: + +```bash +$ vespa config set target local +``` + + +## A minimal Vespa application + +This tutorial has a [companion sample application](https://github.com/vespa-engine/sample-apps/tree/master/news). Throughout the tutorial, we will be using support code from this application. Also, the final state of each tutorial can be found in the various `app-...` subdirectories. + +Let's start by cloning the sample application: + +```bash +$ vespa clone -f news news && cd news +``` + +The above downloads the `news` directory from the Vespa [sample apps repository](https://github.com/vespa-engine/sample-apps/) and places the contents in a folder called `news`. Use `--help` to see documentation for the vespa-cli utility: + +```bash +$ vespa clone --help +``` + +In the `news` directory, several pre-configured application packages are available. The `app-1-getting-started` directory contains a minimal Vespa application. There are two files there: + +- `services.xml` - defines the services that the application consists of +- `schemas/news.sd` - defines the schema for searchable content. + +We will revisit these files in the next part of the tutorial. + + +## Starting Vespa + +This application doesn't contain much at the moment, let's start up the application anyway by starting a Docker container to run it: + +```bash +$ docker pull vespaengine/vespa +$ docker run --detach --name vespa --hostname vespa-tutorial \ + --publish 8080:8080 --publish 19071:19071 --publish 19092:19092 \ + vespaengine/vespa +``` + +First, we pull the latest [vespa-image](https://hub.docker.com/r/vespaengine/vespa/) from the Docker hub, then we start it with the name `vespa`. This starts the Docker container and the initial Vespa services to be able to deploy an application. + +Starting the container can take a short while. Before continuing, make sure that the configuration service is running by using `vespa status`. + +```bash +$ vespa status deploy --wait 300 +``` + +With the config server up and running, deploy the application using vespa-cli: + +```bash +$ vespa deploy --wait 300 app-1-getting-started +``` + +The command uploads the application and verifies the content. If anything is wrong with the application, this step will fail with a failure description; Otherwise, this switches the application to a live status. + +Whenever you have a new version of your application, run the same command to deploy the application. In most cases, there is no need to restart services. Vespa takes care of reconfiguring the system. If a restart of services is required in some rare case, however, the output will notify which services need restart to make the change effective. + +In the upcoming parts of the tutorials, we'll frequently deploy the application changes in this manner. + + +## Feeding to Vespa + +We must index data before we can search for it. This is called "feeding", and we'll get back to that in more detail in the next part of the tutorial. For now, to test that everything is up and running, we'll feed in a single test document: + +```bash +$ vespa feed -t http://localhost:8080 doc.json +``` + +The `-v` option will make vespa-cli print the http request: + +```bash +$ vespa document -v doc.json +``` + +We can also feed using [Vespa document api](/en/writing/document-v1-api-guide) directly. + +Once the feed operation is acknowledged by Vespa, the operation is visible in search. + + +## Querying Vespa + +We can query the endpoint using the vespa-cli's support for performing queries. It uses the [Vespa query api](/en/querying/query-api) to query vespa, including `-v` in the command, we can see the exact endpoint and url request parameters used. + +```bash +$ vespa query -v 'yql=select * from news where true' +``` + +This example uses [YQL (Vespa Query Language)](/en/querying/query-language) to search for all documents of type `news`. This query request will return `1` result, which is the document we fed above. + +```bash +$ vespa query \ + 'yql=select * from news where userQuery()' \ + 'query=hello world' \ + 'default-index=title' +``` + +Another query language example that searches for hello or world in the title. + +```bash +$ vespa query \ + 'yql=select * from news where title contains phrase("hello","world")' +``` + +Another query language example that searches for the phrase "hello world" in the title. In the [next part of the tutorial](/en/learn/tutorials/news-2-basic-feeding-and-query) we'll demonstrate more query examples, and also ranking and grouping of results. + + +## Remove documents + +Run the following to remove the document from the index: + +```bash +$ vespa document -v remove id:news:news::1 +``` + +Well done! + + +## Stopping and starting Vespa + +Keep Vespa running to continue with the next steps in this tutorial set (skip the below). + +To stop Vespa, we can run the following commands: + +```bash +$ docker exec vespa vespa-stop-services +$ docker exec vespa vespa-stop-configserver +``` + +Likewise, to start the Vespa services: + +```bash +$ docker exec vespa vespa-start-configserver +$ docker exec vespa vespa-start-services +``` + +If a [restart is required](/en/reference/schemas/schemas#changes-that-require-restart-but-not-re-feed) due to changes in the application package, these two steps are what you need to do. + +To wipe the index and restart: + +```bash +$ docker exec vespa sh -c ' \ + vespa-stop-services && \ + vespa-remove-index -force && \ + vespa-start-services' +``` + +You can stop and kill the Vespa container application like this: + +```bash +$ docker stop vespa; docker rm -f vespa +``` + +This will delete the Vespa application, including all data and configuration. See [container tuning for production](/en/operations/self-managed/docker-containers). + + +## Conclusion + +Our simple application should now be up and running. In the [next part of the tutorial](/en/learn/tutorials/news-2-basic-feeding-and-query), we'll start building from this foundation. diff --git a/mintlify-docs/en/learn/tutorials/news-2-basic-feeding-and-query.mdx b/mintlify-docs/en/learn/tutorials/news-2-basic-feeding-and-query.mdx new file mode 100644 index 0000000000..32a2b70b19 --- /dev/null +++ b/mintlify-docs/en/learn/tutorials/news-2-basic-feeding-and-query.mdx @@ -0,0 +1,322 @@ +--- +title: "News search and recommendation tutorial - applications, feeding and querying" +--- + +This is the second part of the tutorial series for setting up a Vespa application for personalized news recommendations. The parts are: + +1. [Getting started](/en/learn/tutorials/news-1-deploy-an-application) +2. [A basic news search application](/en/learn/tutorials/news-2-basic-feeding-and-query) - application packages, feeding, query +3. [News search](/en/learn/tutorials/news-3-searching) - sorting, grouping, and ranking +4. [Generating embeddings for users and news articles](/en/learn/tutorials/news-4-embeddings) +5. [News recommendation](/en/learn/tutorials/news-5-recommendation) - partial updates (news embeddings), ANNs, filtering +6. [News recommendation with searchers](/en/learn/tutorials/news-6-recommendation-with-searchers) - custom searchers, doc processors +7. [News recommendation with parent-child](/en/learn/tutorials/news-7-recommendation-with-parent-child) - parent-child, tensor ranking +8. Advanced news recommendation - intermission - training a ranking model +9. Advanced news recommendation - ML models + +In this part, we will build upon the minimal Vespa application in the previous part. First, we'll take a look at the [Microsoft News Dataset](https://msnews.github.io/) (MIND), which we'll be using throughout the tutorial. We'll use this to set up the search schema, deploy the application and feed some data. We'll round off with some basic querying before moving on to the next part of the tutorial: searching for content. + +For reference, the final state of this tutorial can be found in the [app-2-feed-and-query](https://github.com/vespa-engine/sample-apps/tree/master/news/app-2-feed-and-query) subdirectory of the `news` sample application. + + +## The Microsoft News Dataset + +During these tutorials, we will use the [Microsoft News Dataset](https://msnews.github.io/) (MIND). This is a large-scale dataset for news recommendation research. It contains over 160.000 articles, 15 million impressions logs, and 1 million users. We will not use the full dataset in this tutorial. To make the tutorial easier to follow along, we will use the much smaller DEMO part containing only 5000 users. However, readers are free to use the entire dataset at their own discretion. + +The [MIND dataset description](https://github.com/msnews/msnews.github.io/blob/master/assets/doc/introduction.md) contains an introduction to the contents of this dataset. For this tutorial, there are particularly two pieces of data that we will use: + +- News article content which contains data such as title, abstract, news category, and entities extracted from the title and abstract. +- Impressions which contain a list of news articles that were shown to a user, labeled with whether the user clicked on them or not. + +We'll start with developing a search application, so we'll focus on the news content at first. We'll use the impression data as we begin building the recommendation system later in this series. + +Let's start by downloading the data. The `news` sample app directory will be our starting point. We've included a script to download the data for us: + +```bash +$ ./bin/download-mind.sh small +``` + +The argument defines which dataset to download. Here, we download the `small` dataset, but `small` and `large` are valid options. Both the training and validation parts are downloaded to a directory called `mind`. Both `train` and `dev` datasets will be downloaded. + +Taking a look at the data, in `mind/train/news.tsv`, we see tab-separated lines like the following: + +``` +N16680 travel traveltripideas The Most Beautiful Natural Wonder in Every State While humans have built some impressive, gravity-defying, and awe-inspiring marvels here are the most photographed structures in the world the natural world may have us beat. https://www.msn.com/en-us/travel/traveltripideas/the-most-beautiful-natural-wonder-in-every-state/ss-AAF8Brj?ocid=chopendata [] [] +``` + +Here we see the news article id, a category, a subcategory, the title, an abstract, and a URL to the article's content. The last two fields contain the identified entities in the title and abstract. This particular news item has no such entities. + +Note that the body content of the news article is retrievable by the URL. The dataset repository contains tools to download this. For the purposes of this tutorial, we won't be using this data, but feel free to download yourself. + +Let's start building a Vespa application to make this data searchable. We'll create the directory `my-app` under the `news` sample app directory to contain your Vespa application: + +```bash +$ mkdir -p my-app/schemas +``` + + +## Application Packages + +![Vespa Overview](/assets/img/vespa-overview.svg) + +A Vespa [application package](/en/basics/applications) is the set of configuration files and Java plugins that together define the behavior of a Vespa system: what functionality to use, the available document types, how ranking will be done and how data will be processed during feeding and indexing. The schema, e.g., `news.sd`, is a required part of an application package — the other file needed is `services.xml`. + +For self-hosted multi-node deployments, a `hosts.xml` file is also needed. For multi-node self-hosted deployments using `hosts.xml`, see the [multinode high availability](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) sample application. + +We mentioned these files in the previous part but didn't really explain them at the time. We'll go through them here, starting with the specification of services. + + +### Services Specification + +The [services.xml](/en/reference/applications/services/services) file defines the services that make up the Vespa application — which services to run and how many nodes per service. Write the following to `news/my-app/services.xml`: + +```xml expandable + + + + + + + + + + + + + 1 + + + + + + + + + +``` + +Quite a lot is set up here: + +- `` defines the stateless [container cluster](/en/applications/containers) for document, query and result processing +- `` sets up the [query endpoint](/en/querying/query-api). The default port is 8080. +- `` sets up the [document endpoint](/en/reference/api/document-v1) for feeding and visiting. +- `` defines the nodes required per service. (See the [reference](/en/reference/applications/services/container) for more on container cluster setup). +- `` The stateful content cluster +- `` denotes how many copies to store of each document. +- `` assigns the document types in the *schema* — the content cluster capacity can be increased by adding node elements — see [elasticity](/en/content/elasticity). (See also the [reference](/en/reference/applications/services/content) for more on content cluster setup.) + + +### Schema + +In terms of data, Vespa operates with the notion of [documents](/en/schemas/documents). A document represents a single, searchable item in your system, e.g., a news article, a photo, or a user. Each document type must be defined in the Vespa configuration through a [schema](/en/basics/schemas). Think of the document type in a schema as similar to a table definition in a relational database - it consists of a set of fields, each with a given name, a specific type, and some optional properties. The data fed into Vespa must match the structure of the schema, and the results returned when searching will be in this format as well. There is no dynamic field creation support in Vespa, one can say Vespa document schemas are strongly typed. + +The `news` document type mentioned in the `services.xml` file above is defined in a schema. Schemas are found under the `schemas` directory in the application package, and **must** have the same name as the document type mentioned in `services.xml`. + +Given the MIND dataset described above, we'll set up the schema as follows. Write the following to `news/my-app/schemas/news.sd`: + +```sd expandable +schema news { + document news { + field news_id type string { + indexing: summary | attribute + attribute: fast-search + } + field category type string { + indexing: summary | attribute + } + field subcategory type string { + indexing: summary | attribute + } + field title type string { + indexing: index | summary + index: enable-bm25 + } + field abstract type string { + indexing: index | summary + index: enable-bm25 + } + field body type string { + indexing: index | summary + index: enable-bm25 + } + field url type string { + indexing: index | summary + } + field date type int { + indexing: summary | attribute + attribute: fast-search + } + field clicks type int { + indexing: summary | attribute + } + field impressions type int { + indexing: summary | attribute + } + } + + fieldset default { + fields: title, abstract, body + } + +} +``` + +The `document` is wrapped inside another element called `schema`. The name following these elements, here `news`, must be exactly the same for both. + +This document contains several fields. Each field has a [type](/en/reference/schemas/schemas#field), such as `string`, `int`, or `tensor`. Fields also have properties. For instance, property `indexing` configures the *indexing pipeline* for a field, which defines how Vespa will treat input during indexing — see [indexing language](/en/reference/writing/indexing-language). Each part of the indexing pipeline is separated by the pipe character '|': + +- `index:` Create a search index for this field. +- `attribute:` Store this field in memory as an [attribute](/en/content/attributes) — for [sorting](/en/reference/querying/sorting-language), [querying](/en/querying/query-api), [ranking](/en/basics/ranking) and [grouping](/en/querying/grouping). +- `summary:` Lets this field be part of the [document summary](/en/querying/document-summaries) in the result set. + +Here, we also use the [index](/en/reference/schemas/schemas#index) property, which sets up parameters for how Vespa should index the field. For the `title`, `abstract`, and `body` fields, we configure Vespa to set up an index compatible with [bm25 ranking](/en/reference/ranking/rank-features#bm25) for text search. + + +## Deploy the Application Package + +With the two necessary files above, we are ready to deploy the application package. Make sure it looks like this (use `ls` if `tree` is not installed): + +``` +my-app/ +├── schemas +│ └── news.sd +└── services.xml +``` + +```bash +$ vespa deploy --wait 300 my-app +``` + + +## Feeding data + +The data fed to Vespa must match the schema for the document type. The downloaded MIND data must be converted to a valid Vespa JSON [document format](/en/reference/schemas/document-json-format) before it can be fed to Vespa: + +```bash +$ python3 src/python/convert_to_vespa_format.py mind +``` + +The argument is where to find the downloaded data above, which was in the `mind` directory. This script creates a new file in that directory called `vespa.json`. This contains all 28603 news articles in the data set. This file can now be fed to Vespa. Use the method described in the previous part: + +```bash +$ vespa feed mind/vespa.json --target http://localhost:8080 +``` + +`vespa feed` reads a JSON array of document operations, or JSONL with one Vespa document JSON formatted operation per line. Once the feed job finishes, all our 65 238 documents are searchable, let us do a quick query to verify: + +```bash +$ vespa query -v 'yql=select * from news where true' 'hits=0' +``` + +You can verify that specific documents are indexed by fetching documents by document ID using the [Document V1 API](/en/writing/document-v1-api-guide): + +```bash +$ vespa document -v get id:news:news::N10864 +``` + + +## The first query + +Searching with Vespa is done using HTTP(S) GET or HTTP(S) POST requests, like: + +``` +/search?yql=select..&hits=1... +``` + +or with a JSON POST: + +```json +{ + "yql" : "select ..", + "hits" : 2 +} +``` + +The only mandatory parameter is the query, using either `yql=` or `query=`. More details in the [Query API](/en/querying/query-api). + +Consider the query: `select * from news where default contains "music"` + +Given the above schema, where the fields `title`, `abstract` and `body` are part of the `fieldset default`, any document containing the word "music" in one or more of these fields matches that query. Let's try that with either a GET query: + +```bash +$ vespa query -v 'yql=select * from news where default contains "music"' +``` + +or a POST JSON query (Notice the *Content-Type* header specification): + +```bash +$ curl -s -H "Content-Type: application/json" \ + --data '{"yql" : "select * from sources * where default contains \"music\""}' \ + http://localhost:8080/search/ | python3 -m json.tool +``` + + +Try the [Query Builder](https://github.com/vespa-engine/vespa/tree/master/client/js/app#query-builder) application! + + +Looking at the output, please note: + +- The field `documentid` in the output and how it matches the value we assigned to each put operation when feeding data to Vespa. +- Each hit has a property named relevance, which indicates how well the given document matches our query, using a pre-defined default ranking function. You have full control over ranking — more about ranking and ordering later. The hits are sorted by this value (descending). +- When multiple hits have the same relevance score, their internal ordering is undefined. However, their internal ordering will not change unless the documents are re-indexed. +- You can add `&trace.level=3` to dump query parsing details and execution plan, see [query tracing](/en/querying/query-api#query-tracing). +- The `totalCount` field at the top level contains the number of documents that *matched* the query. +- Also note the `coverage` element, this tells us how many documents and nodes we searched over. Coverage might be degraded, see [graceful degradation](/en/performance/graceful-degradation). + +Prefer HTTP POST over GET in production due to limitations on URI length (64 Kb). + + +### Query examples + +```bash +$ vespa query -v 'yql=select title from news where title contains "music"' +``` + +Again, this is a search for the single term "music", but this time explicitly in the `title` field. This means that we only want to match documents that contain the word "music" in the field `title`. As expected, you will see fewer hits for this query than for the previous one searching the `fieldset default`. Also note that we scope the select to only return the title. + +```bash +$ vespa query -v 'yql=select title from news where default contains "music" and default contains "festival"' +``` + +This is a query for the two terms "music" and "festival", combined with an `AND` operation; it finds documents that match both terms, not just one of them. + +```bash +$ vespa query -v \ + 'yql=select title from news where userQuery()' \ + 'query=music festival' \ + 'type=all' +``` + +This combines YQL [userQuery()](/en/reference/querying/yql#userquery) with Vespa's [simple query language](/en/reference/querying/simple-query-language). In this case, documents needs to match both "music" and "festival". + +```bash +$ vespa query -v \ + 'yql=select title from news where userQuery()' \ + 'query=music festival -beer' \ + 'type=any' +``` + +Above changes the query type from all to any, so that all documents that match either (or both) of the terms are returned, excluding documents with the term "beer". Note that number of hits which are matched and ranked increases the computational complexity of the query execution. See [using WAND with Vespa](/en/ranking/wand) for a way to speed up evaluation of type any/or-like queries. + +```bash +$ vespa query -v \ + 'yql=select title from news where userQuery()' \ + 'query=music festival' \ + 'type=phrase' \ + 'default-index=title' +``` + +Above searches using `type=phrase` which requires the exact phrase "music festival" to match in the title. + +```bash +$ vespa query -v \ + 'yql=select title from news where rank(userQuery(), title contains "festival")' \ + 'query=music' +``` + +Search for "music" in the default fieldset, boost documents with festival in the title. The [rank()](/en/reference/querying/yql#rank) query operator allows us to retrieve on the first operand, and have match ranking features calculated for the second operand argument. The second and further operands does not impact recall (which documents match the query), but can be used to tune precision (ordering of the results). More on ranking in the next part of the tutorial. + + +## Conclusion + +We now have a Vespa application running with searchable data. In the [next part of the tutorial](/en/learn/tutorials/news-3-searching), we'll explore searching with sorting, grouping, and ranking results. diff --git a/mintlify-docs/en/learn/tutorials/news-3-searching.mdx b/mintlify-docs/en/learn/tutorials/news-3-searching.mdx new file mode 100644 index 0000000000..b87b204802 --- /dev/null +++ b/mintlify-docs/en/learn/tutorials/news-3-searching.mdx @@ -0,0 +1,340 @@ +--- +# Copyright Vespa.ai. All rights reserved. +title: "News search and recommendation tutorial - searching" +--- + + +This is the third part of the tutorial series for setting up a Vespa application for personalized news recommendations. The parts are: + +1. [Getting started](/en/learn/tutorials/news-1-deploy-an-application) +2. [A basic news search application](/en/learn/tutorials/news-2-basic-feeding-and-query) - application packages, feeding, query +3. [News search](/en/learn/tutorials/news-3-searching) - sorting, grouping, and ranking +4. [Generating embeddings for users and news articles](/en/learn/tutorials/news-4-embeddings) +5. [News recommendation](/en/learn/tutorials/news-5-recommendation) - partial updates (news embeddings), ANNs, filtering +6. [News recommendation with searchers](/en/learn/tutorials/news-6-recommendation-with-searchers) - custom searchers, doc processors +7. [News recommendation with parent-child](/en/learn/tutorials/news-7-recommendation-with-parent-child) - parent-child, tensor ranking +8. Advanced news recommendation - intermission - training a ranking model +9. Advanced news recommendation - ML models + +In the previous part, we converted the [Microsoft News Dataset](https://msnews.github.io/) (MIND) to Vespa, and fed it to our application. In this part, we'll issue searches in this content and look at sorting, grouping, and ranking the results. + +For reference, the final state of this tutorial can be found in the [app-3-searching](https://github.com/vespa-engine/sample-apps/tree/master/news/app-3-searching) sub-directory of the `news` sample application. + +Conceptually, Vespa has two stages when determining the exact result to return. This first is "matching", where all the documents that match the query are found. This is a binary decision; either the document matches or it doesn't. For instance, when searching for a word, all documents that contain it are selected as candidates in this stage. + +The next stage determines the ordering of the results. We can think of the results being ordered either by: + +- a fixed value, or attribute, in the document +- a function calculating a score + +Ordering by an attribute is called [sorting](/en/reference/querying/sorting-language). For instance, we can sort by decreasing `date`. [Grouping](/en/reference/querying/grouping-language) also works on attributes. An example is to group the results by a `category` attribute. + +Calculating a score to order by is generally called "ranking". As these scores are usually dependent upon both query and document, they can also be called *relevance*. Such expressions can be arbitrarily complex, but in general, require some form of computation to find this score. Ranking can be divided into [multiple rank phases](/en/ranking/phased-ranking) as well. + +We'll start by looking at attribute-based sorting and grouping before moving on to ranking. + + +## What is an attribute? + +We saw multiple examples of attributes in the `news.sd` schema, for instance: + +`field date type int { indexing: summary | attribute attribute: fast-search }` + +Note that this `date` field has been defined as an `int` here, and when feeding document, we convert the date to the format `YYYYMMDD`. + +An [attribute](/en/content/attributes) is an in-memory field - this is different from _index_ fields, which may be moved to a disk-based index as more documents are added and the index grows. Since attributes are kept in memory, they are excellent for fields that require fast access for many documents, e.g. fields used for sorting, ranking or grouping query results. The downside is higher memory usage. + +In the above field definition we have included an additional property `attribute: fast-search` which will inform Vespa that we want to build inverted index structures (dictionary and posting lists) for *fast* *matching* in the field. See more about [when to use fast-search](/en/performance/feature-tuning#when-to-use-fast-search-for-attribute-fields) in the performance feature tuning section. + + +### Example queries using attribute field + +```bash +$ vespa query -v 'yql=select * from news where default contains "20191110"' +``` + + +This is a single-term query for the term `20191110` in the `default` fieldset. In the schema, the field `date` is not included in the `default` fieldset, so no results are found. Instead, we search using `=` which can be used for numeric and bool fields: + + +```bash +$ vespa query -v 'yql=select * from news where date=20191110' +``` + + +To get documents that were created 10 November 2019, and whose `date` field is `20191110`, replace `default` with `date` in the YQL query string. + + +```bash +$ vespa query -v 'yql=select * from news where date=20191110 and default contains "weather"' +``` + + +This is a query with two terms; a search in the `default` field set for the term "weather" combined with a search in the `date` field for the value `20191110`. + + +### Range searches + +The examples above searched over `date` just as any other field, and requested documents where the value was exactly `20191110`. Since the field is of type _int_, however, we can use it for _range searches_ as well, using the "less than" and "greater than" operators (`<` and `>`). The query: + + +```bash +$ vespa query -v 'yql=select * from news where date < 20191110' +``` + + +finds all documents where the value of `date` is less than `20191110`, i.e. all documents from before 10 November 2019, while + + +```bash +$ vespa query -v 'yql=select * from news where date = 20191108' +``` + + +finds all news articles from 8 November 2019 to 10 November 2019, inclusive. + + +### Sorting on attribute fields + +The first feature we will look at is how an attribute can be used to change the hit order. By now, you have probably noticed that hits are returned in order of descending relevance, i.e. how well the document matches the query — if not, take a moment to verify this. You might ask how Vespa does this since we haven't even touched upon ranking yet. The answer is that Vespa uses its [nativeRank](/en/ranking/nativerank) score unless anything else is defined in the schema. We'll get back to defining custom ranking later on. + +Now send the following query to Vespa, and look at the order of the hits: + + +```bash +$ vespa query -v 'yql=select date from news where default contains phrase("music","festival") order by date' +``` + + +By default, sorting is done in ascending order. This can also be specified by appending `asc` after the sort attribute name. Use `desc` to sort the results in descending order: + + +```bash +$ vespa query -v 'yql=select date from news where default contains phrase("music","festival") order by date desc' +``` + + +Attempting to sort on a field which is not defined as attribute in the schema will create an error. + + +### Query time result grouping + +[Grouping](/en/querying/grouping) is the concept of looking through all matching documents at query-time and then performing operations with specified fields across all the documents — some common use cases include: + +- Find all the unique values for a given field, make **one group per unique value**, and return the count of documents per group. +- **Group documents by time and date** in fixed-width or custom-width buckets. An example of fixed-width buckets could be to group all documents by year, while an example of custom buckets could be to sort bug tickets by date of creation into the buckets _Today_, _Past Week_, _Past Month_, _Past Year_, and _Everything else_. +- Calculate the **minimum/maximum/average value** for a given field. +- [Result diversification](https://blog.vespa.ai/result-diversification-with-vespa/), e.g. to only display 3 best ranking results per category for up to 5 categories. + +Displaying such groups and their sizes (in terms of matching documents per group) on a search result page, with a link to each such group, is a common way to let users refine searches. For now, we will only do a simple grouping query to get a list of unique values for `category`, ordered by the number of documents they occur in and top 3 is shown: + + +```bash +$ vespa query -v 'yql=select * from news where true limit 0 | all(group(category) max(3) order(-count())each(output(count())))' +``` + + +Note that expression after the pipe (`|`): this is the grouping expression that determines how grouping will be performed. You can read more about the grouping syntax in the [grouping reference documentation](/en/reference/querying/grouping-language). `limit 0` is an alternative syntax for the native `hits` parameter, in this case we are only interested in the group counts, so we set limit to 0. + +For this query, you will get something like the following: +```json expandable +{ + "root": { + "children": [ + { + "children": [ + { + "children": [ + { + "fields": { + "count()": 9115 + }, + "id": "group:string:news", + "relevance": 1.0, + "value": "news" + }, + { + "fields": { + "count()": 6765 + }, + "id": "group:string:sports", + "relevance": 0.6666666666666666, + "value": "sports" + }, + { + "fields": { + "count()": 1886 + }, + "id": "group:string:finance", + "relevance": 0.3333333333333333, + "value": "finance" + } + ], + "continuation": { + "next": "BGAAABEBGBC" + }, + "id": "grouplist:category", + "label": "category", + "relevance": 1.0 + } + ], + "continuation": { + "this": "" + }, + "id": "group:root:0", + "relevance": 1.0 + } + ], + "coverage": { + "coverage": 100, + "documents": 28603, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "fields": { + "totalCount": 28603 + }, + "id": "toplevel", + "relevance": 1.0 + } +} +``` + +So, the three most common unique values of `category` among the indexed documents (for the demo data set) are: + +- `news` with 9115 articles +- `sports` with 6765 articles +- `finance` with 1886 articles + +Try to change the filter part of the YQL+ expression — the `where` clause — to a text match of "weather", or restrict `date` to be less than 20191110, and see how the list of unique values changes as the set of matching documents for your query changes. If you try to search for a single term that is *not* present in the document set, you will see that the list of groups is empty as no documents have been matched. Vespa grouping is only applied over the documents which matched the query. + +In the following example we use the [select](/en/reference/api/query#select) parameter to pass the grouping specification: + + +```bash +$ vespa query -v 'yql=select * from news where userQuery() limit 0' \ 'select=all(group(category) max(2) each(max(2)each(output(summary()))))' \ 'query=drinks' +``` + + +This request searches for drinks, groups by category and for each unique category output the 2 top ranking hits (according to the rank profile used). Groups are sorted by default by maximum relevance in the group. Notice that we also set an upper limit on the number of unique groups my the outermost max. This is important in cases with many unique values. See also [Result diversification using Vespa result grouping](https://blog.vespa.ai/result-diversification-with-vespa/). + +Please refer to the [grouping guide](/en/querying/grouping) for more information and examples using Vespa grouping. Similar to with sorting, attempting to group on a field which is not defined as attribute in the schema will create an error. + + +### Matching - index versus attribute + +Before we move on to ranking, it's important to know some of the differences between `index` and `attribute`. + +#### Matching + +Consider the `title` field from our schema, and the document for the article with title "A little snow causes a big mess, more than 100 crashes on Minnesota roads". In the original input, the value for `title` is a string built of up the 14 words, with a single white space character between them. How should we be able to search this field? + +For string fields with `index` which defaults to `match:text`, Vespa performs linguistic processing of the string. This includes [tokenization](/en/linguistics/linguistics-opennlp#tokenization), [normalization](/en/linguistics/linguistics-opennlp#normalization) and language dependent [stemming](/en/linguistics/linguistics-opennlp#stemming) of the string. + +In our example, this means that the string above is split into the 14 tokens, enabling Vespa to match this document for: + +- the single-term queries such as "Michigan", "snow" and "roads", +- the exact phrase query "A little snow causes a big mess, more than 100 crashes on Minnesota roads", +- a query with two or more tokens in either order (e.g. "minnesota crashes"). + +This is how we all have come to expect normal free text search to work. + +However, string fields with `indexing:attributes` do not support `match:text`, only *exact matching* or *prefix matching*. Exact matching is the default, and, as the name implies, it requires you to search for the exact contents of the field in order to get a match. See supported [match](/en/reference/schemas/schemas#match) modes and the differences in support between `attribute` and `index`. + +#### Memory usage + +Attributes are stored in memory, as opposed to fields with `index`, where the data is mostly kept on disk but paged in on-demand and cached by the OS buffer cache. Even with large flavor types, one will notice that it is not practical to define all the document type fields as attributes, as it will heavily restrict the number of documents per search node. Some Vespa applications have more than 1 billion documents per node — having megabytes of text per document in memory per document might not be cost-effective. + +#### When to use attributes + +There are both advantages and drawbacks of using attributes — it enables sorting, ranking and grouping, but requires more memory and does not support `match:text` capabilities. Attribute fields do support at least one order higher update throughput then regular `index` fields, see [partial updates with Vespa](/en/writing/partial-updates). + +When to use attributes depends on the application; in general, use attributes for: + +- fields used for sorting, e.g. a last-update timestamp, +- fields used for grouping, e.g. category, and +- fields accessed in ranking expressions + +Finally, all numeric and [tensors](/en/ranking/tensor-user-guide) fields used in ranking must be defined with attribute. + +#### Combining index and attribute + +`field category type string { indexing: summary | attribute | index }` + +Combining both index and attribute for the same field is supported. In this case, we can sort and group on the category, while search or matching will be using index matching with `match:text`, which will tokenize and stem the contents of the field. + + +## Relevance and Ranking + +[Ranking](/en/basics/ranking) and relevance were briefly mentioned above; what is really the relevance of a hit? How can one change the relevance calculations? It is time to introduce _rank profiles_ and _ranking expressions_ — simple, yet powerful methods for tuning the relevance. + +Relevance is a measure of how well a given document matches a query. The default relevance is calculated by a formula that takes several *matching* factors into consideration. It computes, in essence, how well the document matches the terms in the query. The default Vespa ranking function and its limitations is described in [ranking with nativeRank](/en/ranking/nativerank). + +Ranking signals that might be useful, like freshness (the age of the document compared to the time of the query) or any other document or query features, are not a part of the nativeRank calculation. These need to be added to the ranking function depending on application specifics. + +Some use cases for tweaking the relevance calculations: + +- Personalize search results based on some property; age, nationality, language, friends and friends of friends. +- Rank fresh (age) documents higher, while still considering other relevance measures. +- Rank documents by geographical location, searching for relevant resources nearby. +- Rank documents by machine learned ranking functions - Learning to Rank (LTR). +- Rank documents by business constraints - For example by product availability. + +Vespa allows creating any number of _rank profiles_: named collections of ranking and relevance calculations that one can choose from at query time. A number of built-in functions and expressions are available to create highly specialized ranking expressions and users can define their own functions in the schema. + + +### News article popularity signal + +During the conversion of the news dataset, the conversion script counted both the number of times a news article was shown (impressions) and how many clicks it received. A high number of clicks relative to impressions indicates that the news article was generally popular. We can use this signal in our ranking. Since both clicks and impressions are attribute fields, these fields can be [updated](/en/writing/partial-updates) at scale with very high throughput. + +We can use this signal in our ranking, by including a `popularity` rank profile, as defined below at the bottom of `schemas/news.sd`. Note that rank profiles are defined outside the `document` block: + + +```txt +schema news { document news { field news_id type string { indexing: summary | attribute attribute: fast-search } field category type string { indexing: summary | attribute } field subcategory type string { indexing: summary | attribute } field title type string { indexing: index | summary index: enable-bm25 } field abstract type string { indexing: index | summary index: enable-bm25 } field body type string { indexing: index | summary index: enable-bm25 } field url type string { indexing: index | summary } field date type int { indexing: summary | attribute attribute: fast-search } field clicks type int { indexing: summary | attribute } field impressions type int { indexing: summary | attribute } } + +fieldset default { fields: title, abstract, body } + +rank-profile popularity inherits default { function popularity() { expression: if (attribute(impressions) > 0, attribute(clicks) / attribute(impressions), 0) } first-phase { expression: nativeRank(title, abstract) + 10 * popularity } } } +``` + + +- `rank-profile popularity inherits default` + +This configures Vespa to create a new rank profile named `popularity`, which inherits all the properties of the default rank-profile; only properties that are explicitly defined, or overridden, will differ from those of the default rank-profile. + +- `first-phase` + +Relevance calculations in Vespa are two-phased. The calculations done in the first phase are performed on every single document matching your query, while the second phase calculations are only done on the top `n` documents as determined by the calculations done in the first phase. See [phased ranking](/en/ranking/phased-ranking). + +- `function popularity()` + +This sets up a function that can be called from other expressions. This function calculates the number of clicks divided by impressions for indicating popularity. However, this isn't really the best way of calculating this as an article with a low number of impressions can score high on such a value, even though uncertainty is high. + +- `expression: nativeRank + 10 * popularity` + +This expression is used to rank documents. Here, the default ranking expression — the `nativeRank` of the `default` fieldset — is included to make the query relevant, while the second term calls the `popularity` function. The weighted sum of these two terms is the final relevance for each document. Note that the weight here, `10`, is set by observation. A better approach would be to learn such values using machine learning. + +More information can be found in the [schema reference](/en/reference/schemas/schemas#rank-profile). + +Deploy the _popularity_ rank profile: + + +```bash +$ vespa deploy --wait 300 my-app +``` + +Run a query: + +```bash +$ vespa query -v \ 'yql=select * from news where default contains "music"' \ 'ranking=popularity' +``` + +and find documents with high `popularity` values at the top. Note that we must specify the rank profile to use with the run time `ranking` parameter. + +## Conclusion + +After completing this part of the tutorial, you should now have a basic understanding of how you can load data into Vespa and effectively search for content. In the [next part of the tutorial](/en/learn/tutorials/news-4-embeddings), we'll start with the basics for transforming this search app into a recommendation system. + diff --git a/mintlify-docs/en/learn/tutorials/news-4-embeddings.mdx b/mintlify-docs/en/learn/tutorials/news-4-embeddings.mdx new file mode 100644 index 0000000000..bb3d1e9309 --- /dev/null +++ b/mintlify-docs/en/learn/tutorials/news-4-embeddings.mdx @@ -0,0 +1,291 @@ +--- +title: "News search and recommendation tutorial - embeddings" +--- + +This is the fourth part of the tutorial series for setting up a Vespa application for personalized news recommendations. The parts are: + +1. [Getting started](/en/learn/tutorials/news-1-deploy-an-application) +2. [A basic news search application](/en/learn/tutorials/news-2-basic-feeding-and-query) - application packages, feeding, query +3. [News search](/en/learn/tutorials/news-3-searching) - sorting, grouping, and ranking +4. [Generating embeddings for users and news articles](/en/learn/tutorials/news-4-embeddings) +5. [News recommendation](/en/learn/tutorials/news-5-recommendation) - partial updates (news embeddings), ANNs, filtering +6. [News recommendation with searchers](/en/learn/tutorials/news-6-recommendation-with-searchers) - custom searchers, doc processors +7. [News recommendation with parent-child](/en/learn/tutorials/news-7-recommendation-with-parent-child) - parent-child, tensor ranking +8. Advanced news recommendation - intermission - training a ranking model +9. Advanced news recommendation - ML models + +In this part, we'll start transforming our application from news search to recommendation. We won't be using Vespa at all in this part. Our focus is to generate news and user embeddings. We'll start using these embeddings in the next part - you can skip this part if you wish. + +The primary function of a recommendation system is to provide items of interest to any given user. The more we know about the user, the better recommendations we can provide. We can view recommendation as search where the query is the user profile. So, in this tutorial we will build upon the previous news search tutorial by creating user profiles and use them to search for relevant news articles. + +We start by generating embeddings using a collaborative filtering method. We'll then improve upon that using a content-based approach, which generates embedding based on BERT models. Since we'll use this in a nearest neighbors algorithm, we'll touch upon how the maximum inner product search is transformed to a distance search form. + +Let's start with taking a look again at what data the MIND dataset provides for us. + + +### Requirements + +We start using some machine learning tools in this tutorial. Specifically, we need Numpy, Scikit-learn, PyTorch, and the HuggingFace Transformers library. Make sure you have all the necessary dependencies by running the following in the sample application directory: + +```bash +$ python3 -m pip install --ignore-installed -r requirements.txt +``` + + +## The MIND dataset + +The MIND dataset, for our purposes in this series of tutorials, consists of two main files: `news.tsv` and `behaviors.tsv`. We used the former in the previous tutorial, as that contains all news article content. + +The `behaviors.tsv` file contains a set of impressions. An impression is an ordered list of news articles that was generated for a user. It includes which of those articles the user clicked, and conversely, which ones were not. We designate articles not clicked as "skips". Also included in the impression is a list of articles the user has previously clicked. An example is: + +``` +3 U11552 11/11/2019 1:03:52 PM N2139 N18390-0 N10537-0 N23967-1 +``` + +Here, user `U11552` was shown three articles: `N18390`, `N10537`, and `N23967`, of which the user skipped two and clicked the last one. At that time, the user had previously clicked on article `N2139`. We can cross-reference with the `news.tsv` and extract the content of these articles. + +We interpret a click as a positive signal for interest and a skip as possibly a negative signal for interest. This is called implicit feedback, as the users haven't explicitly expressed their interests. However, using clicks and skips, we can still start to infer the users' interests. + + +## Collaborative filtering in recommendation systems + +A simple approach to provide recommendations to the above would be to extract the categories, subcategories, and/or entities the users have implicitly interacted with, and store these for each user. We can call this a sparse user profile because we store the exact terms of entities or categories. We could then use traditional information retrieval techniques to search for more articles with similar content. + +However, by doing this we miss out on a lot of information. For instance, some categories or entities are similar, which could be of interest to the user. Also, users with similar interests tend to click on similar articles. If some type of content was interesting to one user, it would likely be interesting to similar users. + +Exploiting this information is called collaborative filtering and the classical approach to this is matrix factorization. In this approach, we create a large matrix with users along one axis and news articles along the other. We'll call this the interaction matrix. Then we factorize this matrix into two smaller matrices, where the product of these two smaller approximates the original. + + +![Matrix factorization](/assets/img/tutorials/mf.png) + + +In the image above, you can see a user matrix with as many rows as there are users and a news matrix with as many columns as there are news articles. Each user row, or news column, has the same length, signified by the `k` dimension. The intuition is that the dot product of the `k` length vector for a user and news pair approximates the user's interest in the news article. Since the information is compressed into the `k` length vector, this works across users as well. Thus, the "collaborative" filtering. + +These `k` length vectors can be extracted from the matrices and associated with the user or news article. So, when we want to recommend news articles to a user, we simply find the user's vector and find the articles with the highest dot products. In the following, we will use this approach to generate such embeddings for users and news articles. + +Please note, however, this approach would not work well in practice for news recommendation. The reason is that a large part of news recommendation is to recommend **new** news articles, which might not have received any implicit feedback yet. This is called the "cold start" problem. For such problems, we need to use additional content (often called "side information") of news articles to provide recommendations. We'll tackle this "cold start" a bit later. + + +## Generating embeddings + +A standard method for factorizing the interaction matrix is to use Alternating Least Squares. The idea is to randomly fill the user and news matrices and freeze one of the matrices' parameters while solving for the other. By alternating between which matrix is fixed, this can be solved with a traditional least-squares problem. We can iterate the process until convergence. + +This tutorial aims to generate embeddings so that the dot product between a user and news vector signifies the probability of a click. Using this signal we can rank news articles by click probability. To train the embedding vectors, we will use a stochastic gradient descent approach to modify the embeddings so that their dot product followed by the logistic function predicts a user click. We use a binary cross-entropy as loss function. + +We'll use PyTorch for this. The main PyTorch model class is as follows: + +```python +class MF(torch.nn.Module): + def __init__(self, num_users, num_items, embedding_size): + super(MF, self).__init__() + self.user_embeddings = torch.nn.Embedding(num_embeddings=num_users, + embedding_dim=embedding_size) + self.news_embeddings = torch.nn.Embedding(num_embeddings=num_items, + embedding_dim=embedding_size) + + def forward(self, users, items): + user_embeddings = self.user_embeddings(users) + news_embeddings = self.news_embeddings(items) + dot_prod = torch.sum(torch.mul(user_embeddings, news_embeddings), 1) + return torch.sigmoid(dot_prod) +``` + +We use the PyTorch's `Embedding` class to hold the user and news embeddings. The forward function is the forward pass of the gradient descent. First, the users and items selected for a mini-batch update are extracted from their embedding tables. Then we take the dot-product with a logistic function and return the value. This prediction for user and news pairs is then evaluated against the click or skip labels: + +```python +# forward + backward + optimize + user_ids, news_ids, labels = batch + prediction = model(user_ids, news_ids) + loss = loss_function(prediction.view(-1), labels) + loss.backward() + optimizer.step() +``` + +This is done across several of epochs. The `batch` here contains a batch of `user_id`s, `news_id`s, and `label`s used for training a mini-batch. For instance, from the example impression above, a training example would be `U11552, N23967, 1`. The code responsible for generating the training data samples 4 negative examples (skips) for each positive example (click). + +The full code can be seen in the sample application, in [train_mf.py](https://github.com/vespa-engine/sample-apps/blob/master/news/src/python/train_mf.py). Let's go ahead and generate the embeddings: + +```bash +$ ./src/python/train_mf.py mind 10 +``` + +This runs the training code for 10 epochs, and deposits the resulting user and news vectors in the `mind` directory, where the rest of the data is: + +``` +Total loss after epoch 1: 573.5299682617188 (3.49713397026062 avg) +Total loss after epoch 2: 551.6585083007812 (3.363771438598633 avg) +... +{'auc': 0.5776, 'mrr': 0.248, 'ndcg@5': 0.2573, 'ndcg@10': 0.317} +{'auc': 0.4988, 'mrr': 0.2154, 'ndcg@5': 0.2182, 'ndcg@10': 0.2824} +``` + +We can see the loss reduces over the number of epochs. The two final lines here are ranking metrics run on the training set and validation set. Here, the `AUC` metric - Area Under the (ROC) Curve - is at `0.5776` for the training set and `0.4988` for the validation set. If you run for a greater number of epochs, you would see the `AUC` for the training set become much larger than the validation set, around `0.974` and `0.51` respectively if run for 100 epochs. + +In this case, the `AUC` metric measures the probability of ranking relevant news higher than non-relevant news. A score of around `0.5` means that it is totally random. Thus, we haven't learned anything of use for the validation set. + +This is not overfitting but rather an instance of the problem mentioned earlier. The validation set contains news articles shown to users a time period after the data in the training set. Thus, most news articles are new, and their embedding vectors are effectively random. + +We'll address this next. + + +## Addressing the cold start problem + +The approach above based itself on news articles that users interacted with in the training set period. Only the user ids and news article ids were used. To overcome the problem that new articles haven't been seen in the training set, we need to use the article's content features. So, the predictions will be based on the similarity of content a user has previously interacted, rather than the actual news article id. + +This is, naturally enough, called content-based recommendation. + +The general approach we'll take here is to still rely on a dot product between a user embedding and news embedding, however the news embedding will be constructed from various content features. + +The MIND dataset has a few such features we can use. Each news article has a `category`, a `subcategory` and zero or more `entities` extracted from the text. These features are categorical, meaning that they have a finite set of values they can take. To handle these, we'll generate an embedding for each possible value, similar to how we generated embeddings for the user id's and news id's above. These ids are also categorical, after all. + + +### Creating BERT embeddings + +However, there are other content features as well such as the `title` and `abstract`. To create embeddings from these, we'll employ a [BERT-based sentence classifier](https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertForSequenceClassification) from the [HuggingFace transformers](https://huggingface.co/docs/transformers/index) library: + +```python +from transformers import BertTokenizer, BertModel +tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') +model = BertModel.from_pretrained('google/bert_uncased_L-8_H-512_A-8') +tokens = tokenizer(title, abstract, return_tensors="pt") +outputs = model(**tokens) +embedding = outputs[0][0][0] +``` + +Here, we use a medium-sized BERT model with 8 layers and a hidden dimension size of 512. This means that the embedding will be a vector of size 512. We use the vector from the first `CLS` token to represent the combined title and abstract. + +To generate these embeddings for all news content, run one of the following: + +1. Generate embeddings. This might take a while, around an hour for all news articles in the `train` and `dev` demo dataset. + +```bash +$ python3 src/python/create_bert_embeddings.py mind +``` + +2. Download pre-processed embeddings: + +```bash +$ curl -L -o mind/train/news_embeddings.tsv \ + https://data.vespa-cloud.com/sample-apps-data/mind_news_embedding.tsv +$ curl -L -o mind/dev/news_embeddings.tsv \ + https://data.vespa-cloud.com/sample-apps-data/mind_news_embedding_dev.tsv +``` + +This creates a `news_embeddings.tsv` file under the `mind/train` and `mind/dev` subdirectories. + + +## Training the model + +Now that we have content-based embeddings for each news article, we can train the model to use them. The following figure illustrates the model we are training: + + +![Model training example](/assets/img/tutorials/embeddings.png) + + +So, we'll pass the 512-dimensional embeddings from the BERT model through a typical neural network layer to reduce dimensions to 50. We then concatenate this representation with the 50 dimensional embeddings for `category`, `subcategory` and `entity`. We only use one entity for now. This representation is then sent through another neural network layer to form the final representation for a news article. Finally, the dot product is taken with the user embedding. + +In PyTorch code, this looks like: + +```python expandable +class ContentBasedModel(torch.nn.Module): + def __init__(self, + num_users, + num_news, + num_categories, + num_subcategories, + num_entities, + embedding_size, + bert_embeddings): + super(ContentBasedModel, self).__init__() + + self.user_embeddings = torch.nn.Embedding(num_embeddings=num_users, embedding_dim=embedding_size) + self.news_embeddings = torch.nn.Embedding(num_embeddings=num_news, embedding_dim=embedding_size) + self.cat_embeddings = torch.nn.Embedding(num_embeddings=num_categories, embedding_dim=embedding_size) + self.sub_cat_embeddings = torch.nn.Embedding(num_embeddings=num_subcategories, embedding_dim=embedding_size) + self.entity_embeddings = torch.nn.Embedding(num_embeddings=num_entities, embedding_dim=embedding_size) + + self.news_bert_embeddings = torch.nn.Embedding.from_pretrained(bert_embeddings, freeze=True) + self.news_bert_transform = torch.nn.Linear(bert_embeddings.shape[1], embedding_size) + self.news_content_transform = torch.nn.Linear(in_features=embedding_size*5, out_features=embedding_size) + + def get_user_embeddings(self, users): + return self.user_embeddings(users) + + def get_news_embeddings(self, items, categories, subcategories, entities): + bert_embeddings = self.news_bert_embeddings(items) + bert_embeddings = self.news_bert_transform(bert_embeddings) + bert_embeddings = torch.sigmoid(bert_embeddings) + + cat_embeddings = self.cat_embeddings(categories) + news_embeddings = self.news_embeddings(items) + sub_cat_embeddings = self.sub_cat_embeddings(subcategories) + entity_embeddings_1 = self.entity_embeddings(entities[:,0]) + news_embedding = torch.cat((news_embeddings, bert_embeddings, cat_embeddings, + sub_cat_embeddings, entity_embeddings_1), 1) + news_embedding = self.news_content_transform(news_embedding) + news_embedding = torch.sigmoid(news_embedding) + + return news_embedding + + def forward(self, users, items, categories, subcategories, entities): + user_embeddings = self.get_user_embeddings(users) + news_embeddings = self.get_news_embeddings(items, categories, subcategories, entities) + dot_prod = torch.sum(torch.mul(user_embeddings, news_embeddings), 1) + return torch.sigmoid(dot_prod) +``` + +The forward pass function is pretty much the same as before. You can see the entire training script in [train_cold_start.py](https://github.com/vespa-engine/sample-apps/blob/master/news/src/python/train_cold_start.py). Running this results in: + +```bash +$ python3 src/python/train_cold_start.py mind 5 +``` + +``` +Total loss after epoch 1: 920.5855102539062 (0.703811526298523 avg) +{'auc': 0.5391, 'mrr': 0.2367, 'ndcg@5': 0.2464, 'ndcg@10': 0.3059} +... +Total loss after epoch 10: 517.16748046875 (0.3953879773616791 avg) +{'auc': 0.8758, 'mrr': 0.5074, 'ndcg@5': 0.5818, 'ndcg@10': 0.6316} +{'auc': 0.6249, 'mrr': 0.2842, 'ndcg@5': 0.3114, 'ndcg@10': 0.3733} +``` + +This is much better. The `AUC` score at epoch 9 is a respectable `0.6266`. Note that as we train further, the `AUC` for the dev set starts dropping. This is a sign of overfitting, so we should stop training. + +For reference, the baseline model for the MIND competition, [Neural News Recommendation with Multi-Head Self-Attention](https://aclanthology.org/D19-1671/), results in `0.6362`. This model additionally uses the user history in each impression to create a better model for the user embedding. For the moment, however, we are satisfied with these, and we'll use them going forward. Feel free to experiment and see if you can achieve better results! + + +These numbers are for the demo dataset, which is much smaller than the full dataset. For reference, in [the MIND paper](https://msnews.github.io/assets/doc/ACL2020_MIND.pdf) the baseline here achieves `0.6776` on the full dataset. + + +The training script writes these embeddings to the files `mind/user_embeddings.tsv` and `mind/news_embeddings.tsv`. + + +## Mapping from inner-product search to Euclidean search + +These vectors have been trained to maximize the inner product. Finding the best news articles given a user vector is called Maximum Inner Product Search - or MIPS. This form isn't really suitable for efficient retrieval as-is, but it can be mapped to a nearest neighbor search problem, so we can use an efficient approximate nearest neighbors index. + +When specifying `distance-metric: dotproduct`, Vespa uses the technique discussed in [Speeding Up the Xbox Recommender System Using a Euclidean Transformation for Inner-Product Spaces](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/XboxInnerProduct.pdf) to solve the MIPS case. See [blog post announcing MIPS support in Vespa](https://blog.vespa.ai/announcing-maximum-inner-product-search/). + +```sd +field embedding type tensor(d0[50]) { + indexing: attribute | index + attribute { + distance-metric: dotproduct + } +} +``` + +See [Nearest Neighbor Search](/en/querying/nearest-neighbor-search) for more information on nearest neighbor search and supported distance metrics in Vespa. + +We've included a script to create a feed suitable for Vespa: + +```bash +$ python3 src/python/convert_embeddings_to_vespa_format.py mind +``` + +We are now ready to feed these embedding vectors to Vespa. + + +## Conclusion + +Now that we've generated user and document embeddings, we can start using these to recommend news items to users. We'll start feeding these in the [next part of the tutorial](/en/learn/tutorials/news-5-recommendation). diff --git a/mintlify-docs/en/learn/tutorials/news-5-recommendation.mdx b/mintlify-docs/en/learn/tutorials/news-5-recommendation.mdx new file mode 100644 index 0000000000..3d790db95e --- /dev/null +++ b/mintlify-docs/en/learn/tutorials/news-5-recommendation.mdx @@ -0,0 +1,467 @@ +--- +title: "News search and recommendation tutorial - recommendation" +--- + +This is the fifth part of the tutorial series for setting up a Vespa application for personalized news recommendations. The parts are: + +1. [Getting started](/en/learn/tutorials/news-1-deploy-an-application) +2. [A basic news search application](/en/learn/tutorials/news-2-basic-feeding-and-query) - application packages, feeding, query +3. [News search](/en/learn/tutorials/news-3-searching) - sorting, grouping, and ranking +4. [Generating embeddings for users and news articles](/en/learn/tutorials/news-4-embeddings) +5. [News recommendation](/en/learn/tutorials/news-5-recommendation) - partial updates (news embeddings), ANNs, filtering +6. [News recommendation with searchers](/en/learn/tutorials/news-6-recommendation-with-searchers) - custom searchers, doc processors +7. [News recommendation with parent-child](/en/learn/tutorials/news-7-recommendation-with-parent-child) - parent-child, tensor ranking +8. Advanced news recommendation - intermission - training a ranking model +9. Advanced news recommendation - ML models + +In this part, we'll start transforming our application from news search to recommendation using the embeddings we created in the previous part. So, we'll start by modifying our application, so we can feed the embeddings and start using them for searching. + +For reference, the final state of this tutorial can be found in the [app-5-recommendation](https://github.com/vespa-engine/sample-apps/tree/master/news/app-5-recommendation) sub-directory of the `news` sample application. + + +## Indexing embeddings + +First, we need to modify the `news.sd` search definition to include a field to hold the embedding and a recommendation rank profile: + +```sd expandable +schema news { + document news { + field news_id type string { + indexing: summary | attribute + attribute: fast-search + } + field category type string { + indexing: summary | attribute + } + field subcategory type string { + indexing: summary | attribute + } + field title type string { + indexing: index | summary + index: enable-bm25 + } + field abstract type string { + indexing: index | summary + index: enable-bm25 + } + field body type string { + indexing: index | summary + index: enable-bm25 + } + field url type string { + indexing: index | summary + } + field date type int { + indexing: summary | attribute + } + field clicks type int { + indexing: summary | attribute + } + field impressions type int { + indexing: summary | attribute + } + field embedding type tensor(d0[50]) { + indexing: attribute + attribute { + distance-metric: dotproduct + } + } + } + + fieldset default { + fields: title, abstract, body + } + + rank-profile popularity inherits default { + function popularity() { + expression: if (attribute(impressions) > 0, attribute(clicks) / attribute(impressions), 0) + } + first-phase { + expression: nativeRank(title, abstract) + 10 * popularity + } + } + + rank-profile recommendation inherits default { + first-phase { + expression: closeness(field, embedding) + } + } +} +``` + +The `embedding` field is a tensor field. Tensors in Vespa are flexible multi-dimensional data structures, and, as first-class citizens, can be used in queries, document fields, and constants in ranking. Tensors can be either dense or sparse or both, and can contain any number of dimensions. See [the tensor user guide](/en/ranking/tensor-user-guide) for more information. + +Here we have defined a dense tensor with a single dimension (`d0` - dimension 0), which represents a vector. The distance metric is "dotproduct" as we would like to use this field for nearest-neighbor search where we search for the maximal dotproduct. + +This is seen in the `recommendation` rank profile. Here, we've added a ranking expression using the [closeness](/en/reference/ranking/rank-features#closeness(dimension,name)) ranking feature, which calculates the dot product and uses that to rank the news articles. This depends on using the `nearestNeighbor` search operator, which we'll get back to below when searching. But for now, this expects a tensor in the query to be used as the initial search point. + +If you take a look at the file generated for the news embeddings, `mind/vespa_news_embeddings.json`, you'll see several lines with something like this: + +```json +{ + "update": "id:news:news::N13390", + "fields": { + "embedding": { + "assign": { + "values": [9.871717,-0.403103,...] + } + } + } +} +``` + +This is a [partial update](/en/writing/partial-updates). So, assuming you already have a system up and running from the previous search tutorial, you don't need to feed the entire corpus. With a partial update, you only need to update the necessary fields. So, after training another set of embeddings you can partially feed them again. Please refer to [Vespa reads and writes](/en/writing/reads-and-writes) for more information on feeding formats. + +We need to add another document type to represent a user. Add this schema in `schemas/user.sd`: + +```sd +schema user { + document user { + field user_id type string { + indexing: summary | attribute + attribute: fast-search + } + field embedding type tensor(d0[50]) { + indexing: summary | attribute + } + } +} +``` + +This schema is set up so that we can search for a `user_id` and retrieve the user's embedding vector. + +We also need to let Vespa know we want to use this document type, so we modify `services.xml` and add it under `documents` in the `content` section: + +```xml + + + + +``` + +```bash +$ vespa deploy --wait 300 my-app +``` + +```bash +$ sleep 20 +``` + +After redeploying with the updated schemas and `services.xml`, feed `mind/vespa_user_embeddings.json` and `mind/vespa_news_embeddings.json`: + +```bash +$ vespa feed mind/vespa_user_embeddings.json --target http://localhost:8080 +$ vespa feed mind/vespa_news_embeddings.json --target http://localhost:8080 +``` + +Once the feeding jobs finishes, the index is ready to be used, we can verify that we have 65238 news documents and 94057 user documents: + +```bash +$ sleep 20 +``` + +```bash +$ vespa query -v \ + 'yql=select * from news where true' \ + 'hits=0' +``` + +```bash +$ vespa query -v \ + 'yql=select * from user where true' \ + 'hits=0' +``` + + +## Query profiles and query profile types + +Before we can test the application, we need to add a query profile type. The `recommendation` rank profile above requires a tensor to be sent along with the query. For Vespa to bind the correct types, it needs to know the expected type of this query parameter. That is called a query profile type. + +[Query profiles](/en/reference/querying/query-profiles) are named sets of search request parameters that can be set as default, so you don't have to pass them along with the query. We don't use this in this sample application. Still we need to set up a default query profile to set up the types of query parameters we expect to pass. + +So, write the following to `news/my-app/search/query-profiles/default.xml`: + +```xml + +``` + +To set up the query profile types, write them to the file `search/query-profiles/types/root.xml`: + +```xml + + + +``` + +This configures Vespa to expect a float tensor with dimension `d0[50]` when the query parameter `ranking.features.query(user_embedding)` is passed. We'll see how this works together with the `nearestNeighbor` search operator below. + + +Setting up this query profile type is required when sending a tensor as a query parameter. A common pitfall is to forget the default query profile, but that is required to successfully set up the query profile type. + + +Deploy the updates to query profiles: + +```bash +$ vespa deploy --wait 300 my-app +``` + + +## Testing the application + +We can now query Vespa using embeddings. First, let's find the user `U33527`: + +```bash +$ vespa query -v \ + 'yql=select user_id, embedding from user where user_id contains "U33527"' \ + 'hits=1' +``` + +This returns the document containing the user's embedding: + +```json expandable +{ + "root": { + ... + "children": [ + { + "id": "index:mind/0/ce7cc40b398f32626fcff97a", + "relevance": 0.0017429193899782135, + "source": "mind", + "fields": { + "user_id": "U33527", + "embedding": { + "type": "tensor(d0[50])", + "values": [ + 0.0, + 0.06090399995446205, + 0.15839800238609314, + ... + ] + } + } + } + ] + } +} +``` + +Now we can use this vector to query the news articles. You can either write this query by hand, but we've added a convenience script [user_search.py](https://github.com/vespa-engine/sample-apps/blob/master/news/src/python/user_search.py) which queries Vespa: + +```bash +$ ./src/python/user_search.py U33527 10 +``` + +This script first retrieves the user embedding using an HTTP `GET` query to Vespa. It then parses the tensor containing the embedding vector. Finally, it issues a `nearestNeighbor` search using a `POST` (however a `GET` would work just as well). Please see the [nearest-neighbor operator](/en/reference/querying/yql#nearestneighbor) for more on the syntax for nearest-neighbor searches. The `nearestNeighbor` search looks like: + +```json +{ + "hits": 10, + "yql": "select * from sources news where (nearestNeighbor(embedding, user_embedding))", + "ranking.features.query(user_embedding)": "{ ... }", + "ranking.profile": "recommendation" +} +``` + +Here, you can see the `nearestNeighbor` search operator being set up so that the query parameter `user_embedding` will be searched against the `embedding` document field. The tensor for the `user_embedding` is in the `ranking.features.query(user_embedding)` parameter. Recall from above that we set a query profile type for this exact query parameter, so Vespa knows what to expect here. + +When Vespa receives this query, it scans linearly through all documents, and scores them using the `recommendation` rank profile we set up above. Recall that we ask Vespa to convert the problem from maximum inner product to a nearest distance problem by using the `dotproduct` distance metric; in this case `distance` ranking feature just outputs the negative dotproduct. + +With a distance search, we want to find the smallest distances. However, Vespa sorts the final results by decreasing rank score. To get the expected rank order, Vespa provides the `closeness` feature which in this case is just the dotproduct directly. + +Let's test that this works as intended, using [evaluate.py](https://github.com/vespa-engine/sample-apps/blob/master/news/src/python/evaluate.py): + +```bash +$ ./src/python/evaluate.py mind 1000 +``` + +This reads both the training and validation set impressions, queries Vespa for 1000 randomly drawn impressions, and calculates the same metrics we saw during training. The result is something like: + +``` +Train: {'auc': 0.8774, 'mrr': 0.5115, 'ndcg@5': 0.5842, 'ndcg@10': 0.6345} +Valid: {'auc': 0.6308, 'mrr': 0.2935, 'ndcg@5': 0.3203, 'ndcg@10': 0.3789} +``` + +This is in line with the results from the training. So, the conversion from inner product space to euclidean space works as intended. The resulting rank scores are different, but the transformation evidently retains the same ordering. + + +## Approximate Nearest Neighbor Search + +So far, we've been using exact nearest-neighbor search. This is a linear scan through all documents. For the MIND demo dataset we've been using, this isn't a problem as it only contains roughly 28000 documents, and Vespa only uses a few milliseconds to scan through these. However, as the index grows, the time (and computational cost) becomes significant. + +There are no exact methods for finding the nearest-neighbors efficiently. So we trade accuracy for efficiency in what is called approximate nearest-neighbors (ANN). Vespa provides a unique implementation of ANNs that uses the HNSW (hierarchical navigable small world) algorithm, while still being compatible with other facets of the search such as filtering. We'll get back to this in the next section. + +If you recall, Vespa returned something like the following when searching for single users above (with `targetHits` equals to 10): + +```json expandable +{ + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 95 + }, + "coverage": { + "coverage": 100, + "documents": 65238, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + } + } +} +``` + +Here, `coverage` shows that Vespa did scan through all 65238 documents. The interesting piece here is the `totalCount`. This number is the number of times a document has been put in the top 10 results during this linear scan. + +Let's switch to using approximate nearest-neighbors by adding `index` to the embedding field in `news.sd`: + +```sd expandable +schema news { + document news { + field news_id type string { + indexing: summary | attribute + attribute: fast-search + } + field category type string { + indexing: summary | attribute + } + field subcategory type string { + indexing: summary | attribute + } + field title type string { + indexing: index | summary + index: enable-bm25 + } + field abstract type string { + indexing: index | summary + index: enable-bm25 + } + field body type string { + indexing: index | summary + index: enable-bm25 + } + field url type string { + indexing: index | summary + } + field date type int { + indexing: summary | attribute + attribute: fast-search + } + field clicks type int { + indexing: summary | attribute + } + field impressions type int { + indexing: summary | attribute + } + field embedding type tensor(d0[50]) { + indexing: attribute | index + attribute { + distance-metric: dotproduct + } + } + } + + fieldset default { + fields: title, abstract, body + } + + rank-profile popularity inherits default { + function popularity() { + expression: if (attribute(impressions) > 0, attribute(clicks) / attribute(impressions), 0) + } + first-phase { + expression: nativeRank(title, abstract) + 10 * popularity + } + } + + rank-profile recommendation inherits default { + first-phase { + expression: closeness(field, embedding) + } + } +} +``` + +If you make this change and deploy it, you will get prompted by Vespa that a restart is required so that the index can be built: + +```bash +$ vespa deploy --wait 300 my-app +``` + +Introducing the HNSW `index` requires a content node restart, in this case we restart all services: + +```bash +$ docker exec vespa /usr/bin/sh -c \ + '/opt/vespa/bin/vespa-stop-services && /opt/vespa/bin/vespa-start-services' +``` + +```bash +$ vespa status --wait 300 +``` + +After doing this and waiting a bit for Vespa to start, we can query Vespa again: + +```bash +$ ./src/python/user_search.py U33527 10 +``` + +```json expandable +{ + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 10 + }, + "coverage": { + "coverage": 100, + "documents": 65238, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + } + } +} +``` + +Here, `coverage` is still 100%, but the `totalCount` has been reduced to 10 - the same number of hits we requested. By adding the index to this field, Vespa built a HNSW graph structure for the values in this field. When used in an approximate nearest-neighbor search, this graph is queried and only the closest points as determined by this graph is added to the list. + +The particularly observant might have noticed that the result set has changed. Indeed, the third result when using exact nearest neighbor search was news article `N438`. This was omitted from the approximate search. As mentioned, we trade accuracy for efficiency when using approximate nearest-neighbor search. + +It should also be mentioned that searching through this graph comes with a cost. In our case, since we only have a relatively small amount of documents, there isn't that much gain in efficiency. However, as the number of documents grows, this starts to pay off. See [Approximate nearest neighbor search in Vespa](https://blog.vespa.ai/approximate-nearest-neighbor-search-in-vespa-part-1/) for more of a discussion around this. See also [Billion-scale vector search with Vespa - part one](https://blog.vespa.ai/billion-scale-knn/) and [Billion-scale vector search with Vespa - part two](https://blog.vespa.ai/billion-scale-knn-part-two/) which cover the many trade-offs related to approximate nearest neighbor search. + +The implementation of ANN using HNSW in Vespa has some nice features. Notice that we did not have to re-feed the corpus to enable ANN. Many other approaches for ANNs require building an index offline in a batch job. HNSW allows for incrementally building this index, which is fully exploited in Vespa. + +A unique feature of Vespa is that the implementation allows for filtering during graph traversal, which we'll look at next. + + +## Filtering + +A common case when using approximate nearest-neighbors is to combine with some additional query filters. For instance, for retail search, one can imagine finding relevant products for a user. In this case, we should not recommend products that are out of stock. So an additional query filter would be to ensure that `in_stock` is true. + +Now, most implementations of ANNs come in the form of a library, so they are not integrated with the search at large. The natural approach is to first perform the ANN, the *post-filter* the results. Unfortunately, this often leads to sub-optimal results as relevant documents might not have been recalled. See [Using approximate nearest-neighbor search in real world applications](https://blog.vespa.ai/using-approximate-nearest-neighbor-search-in-real-world-applications/) for more of a discussion around this, and [Query Time Constrained Approximate Nearest Neighbor Search](https://blog.vespa.ai/constrained-approximate-nearest-neighbor-search/) for a better understanding of pre- and post-filtering tradeoffs. + +In our case, let's assume we want to retrieve 10 `sports` articles for a user. It turns out we need to retrieve at least 278 news articles from the search to get to 10 `sports` articles for this user: + +```bash +$ ./src/python/user_search.py U63195 10 | grep "category\": \"sports\"" | wc -l +$ ./src/python/user_search.py U63195 278 | grep "category\": \"sports\"" | wc -l +``` + +On the other hand, if we add a filter specifically: + +```bash +$ ./src/python/user_search.py U63195 10 "AND category contains 'sports'" | \ + grep "category\": \"sports" | wc -l +``` + +Here, we only specify 10 hits and exactly 10 hits of `sports` category are returned. Vespa still searches through the graph starting from the query point, however the search does not stop when we have 10 hits. In effect, the graph search widens until 10 results fulfilling the filters are found. + +As a note, strict filters that filter away a large part of the corpus would entail that many candidates in the graph are skipped while searching for the results that fulfill the filters. This can take an exponential amount of time. For this case, Vespa falls back to a linear, brute-force search over the few documents which matches the filter for efficiency. + + +## Conclusion + +We now have a basic recommendation system up and running. We can query for a user, retrieve the embedding vector and use that for querying the news articles. Right now, this means two calls to Vespa. In the [next part of the tutorial](/en/learn/tutorials/news-6-recommendation-with-searchers), we will introduce `searchers`, which allows for custom logic during query processing inside the Vespa cluster, requiring only one pass from the client to Vespa. diff --git a/mintlify-docs/en/learn/tutorials/news-6-recommendation-with-searchers.mdx b/mintlify-docs/en/learn/tutorials/news-6-recommendation-with-searchers.mdx new file mode 100644 index 0000000000..92d87c8fd9 --- /dev/null +++ b/mintlify-docs/en/learn/tutorials/news-6-recommendation-with-searchers.mdx @@ -0,0 +1,278 @@ +--- +title: "News search and recommendation tutorial - Searchers" +--- + +This is the sixth part of the tutorial series for setting up a Vespa application for personalized news recommendations. The parts are: + +1. [Getting started](/en/learn/tutorials/news-1-deploy-an-application) +2. [A basic news search application](/en/learn/tutorials/news-2-basic-feeding-and-query) - application packages, feeding, query +3. [News search](/en/learn/tutorials/news-3-searching) - sorting, grouping, and ranking +4. [Generating embeddings for users and news articles](/en/learn/tutorials/news-4-embeddings) +5. [News recommendation](/en/learn/tutorials/news-5-recommendation) - partial updates (news embeddings), ANNs, filtering +6. [News recommendation with searchers](/en/learn/tutorials/news-6-recommendation-with-searchers) - custom searchers, doc processors +7. [News recommendation with parent-child](/en/learn/tutorials/news-7-recommendation-with-parent-child) - parent-child, tensor ranking +8. Advanced news recommendation - intermission - training a ranking model +9. Advanced news recommendation - ML models + +In the previous part of this series, we set up a recommendation system that, given a user id, needed two requests to generate a recommendation. The first to retrieve the user embedding, and a second for finding the nearest neighbor news articles. In this part, we'll introduce `Searchers`, which are processors that can modify queries before passing them along to search. These allow us to pull the logic from the Python scripts into Vespa. + +For reference, the final state of this tutorial can be found in the [app-6-recommendation-with-searchers](https://github.com/vespa-engine/sample-apps/tree/master/news/app-6-recommendation-with-searchers) directory of the `news` sample application. + + +## Searchers and document processors + +First, let's revisit Vespa's overall architecture: + + +![Vespa Overview](/assets/img/vespa-overview.svg) + + +Recall that the application package contains everything necessary to run the application. When this is deployed, the config cluster takes care of distributing the services to the various nodes. In particular, the two main types of nodes are the stateless `container` nodes and the stateful `content` nodes. + +All requests pass through the `container` cluster before passing along to `content` cluster where the actual retrieval and ranking occurs. The queries actually pass through a chain of Searchers; each one possibly doing a small amount of processing. This can be seen by adding a `&trace.level=5` to a query: + +```json expandable +{ "message": "Invoke searcher 'com.yahoo.search.querytransform.WeakAndReplacementSearcher in vespa'" }, +{ "message": "Invoke searcher 'com.yahoo.prelude.statistics.StatisticsSearcher in native'" }, +{ "message": "Invoke searcher 'com.yahoo.prelude.querytransform.PhrasingSearcher in vespa'" }, +{ "message": "Invoke searcher 'com.yahoo.prelude.searcher.FieldCollapsingSearcher in vespa'" }, +{ "message": "Invoke searcher 'com.yahoo.search.yql.MinimalQueryInserter in vespa'" }, + +{ "message": "Federating to [mind]" }, + +{ "message": "Got 10 hits from source:mind" }, +{ "message": "Return searcher 'federation in native'" }, + +{ "message": "Return searcher 'com.yahoo.search.yql.MinimalQueryInserter in vespa'" }, +{ "message": "Return searcher 'com.yahoo.prelude.searcher.FieldCollapsingSearcher in vespa'" }, +{ "message": "Return searcher 'com.yahoo.prelude.querytransform.PhrasingSearcher in vespa'" }, +{ "message": "Return searcher 'com.yahoo.prelude.statistics.StatisticsSearcher in native'" }, +{ "message": "Return searcher 'com.yahoo.search.querytransform.WeakAndReplacementSearcher in vespa'" }, +``` + +This shows a small sample of the additional output when using `trace.level`. Note the invocations of the Searchers. Each Searcher gets invoked along a chain, and the last Searcher in the chain sends the post-processed query to the search backend. When the results come back, the processing passes back up the chain. The Searchers can then process the results before passing them to the previous Searcher, and ultimately back as a response to the query. + + +Adding a [trace.level](/en/reference/api/query#trace.level) is generally helpful when debugging vespa queries. + + +So, [Searchers](/en/applications/searchers) are Java components that do some kind of processing along the query chain; either modifying the query before the actual search, modifying the results after the search, or some combination of both. + +Developers can provide their own Searchers and inject them into the query chain. We'll capitalize on this and create a Searcher that performs essentially the same task that [user_search.py](https://github.com/vespa-engine/sample-apps/blob/master/news/src/python/user_search.py) does: retrieve a user embedding and do a news article search based on that. In the process, we'll only pass a `user_id` to Vespa instead of a full YQL query: `/search/?user_id=U33527&searchchain=user` + +The search will take care of creating the actual query for us - let's get started. + + +## Adding a user profile Searcher + +While the `content` layer in Vespa is written in C++ for maximum performance, the `container` layer is in Java for flexibility. So, all Searchers and thus custom Searchers are written in Java. Refer to [the guide on Searcher development](/en/applications/searchers) for more information. + +We want to create a Searcher that takes a `user_id`, issues a query to find the corresponding embedding, then issues a second query to retrieve the news articles. + +To do this, we create a `UserProfileSearcher` that extends the base Searcher class `com.yahoo.search.Searcher`. This Searcher must implement a single method: `search`, and has the responsibility of passing the query to the next Searcher on the list. A minimal example: + +```java +public class UserProfileSearcher extends Searcher { + public Result search(Query query, Execution execution) { + // ... process query + Result results = execution.search(query) + // ... process results + return results; + } +} +``` + +So, what we do before we pass the query along (in `execution.search(query)`) and before we return the results is completely up to us. So, we implement our `UserProfileSearcher` like this: + +```java expandable +public class UserProfileSearcher extends Searcher { + + public Result search(Query query, Execution execution) { + + // Get tensor and read items from user profile + Object userIdProperty = query.properties().get("user_id"); + if (userIdProperty != null) { + + // Retrieve user embedding by doing a search for the user_id and extract the tensor + Tensor userEmbedding = retrieveUserEmbedding(userIdProperty.toString(), execution); + + // Create a new search using the user's embedding tensor + NearestNeighborItem nn = new NearestNeighborItem("embedding", "user_embedding"); + nn.setTargetNumHits(query.getHits()); + nn.setAllowApproximate(true); + + query.getModel().getQueryTree().setRoot(nn); + query.getRanking().getFeatures().put("query(user_embedding)", userEmbedding); + query.getModel().setRestrict("news"); + + // Override default rank profile + if (query.getRanking().getProfile().equals("default")) { + query.getRanking().setProfile("recommendation"); + } + } + + return execution.search(query); + } + + private Tensor retrieveUserEmbedding(String userId, Execution execution) { + Query query = new Query(); + query.getModel().setRestrict("user"); + query.getModel().getQueryTree().setRoot(new WordItem(userId, "user_id")); + query.setHits(1); + + Result result = execution.search(query); + execution.fill(result); // This is needed to get the actual summary data + + if (result.getTotalHitCount() == 0) + throw new RuntimeException("User id " + userId + " not found..."); + return (Tensor) result.hits().get(0).getField("embedding"); + } + +} +``` + +First, we retrieve the `user_id` from the query. If this is given in the query, we first call the `retrieveUserEmbedding` method, which creates a new `Query` to find the user's embedding. This is a straight-forward search which is restricted to the `user` document type. Since the `user_id` is unique, we only expect a single hit. We then extract the `embedding` tensor from the user document. + + +We explicitly call a *fill* on the results before returning. A query is usually passed to the search backend at least twice: one to retrieve the top ranked results, another to retrieve the summary data of the final result set. This is to avoid sending excess data between services. For instance, if searching for the top 10 results with two search backends, each backend will retrieve the top 10 results from the local content on that node. A Searcher will determine the global top ten ranked results (potentially including diversification) and only issue a *fill* to retrieve the summary features for those top 10. + + +Now that we've retrieved the user embedding, we programmatically set up a nearest-neighbor search, and add the user embedding to the query as the ranking feature `query(user_embedding)`. The search is then passed along to the next Searcher in the chain. We do not need to explicitly fill the result here, as that is guaranteed to happen before ultimately rendering the results. + +Again, note that all this is pretty much the same as what we did in [user_search.py](https://github.com/vespa-engine/sample-apps/blob/master/news/src/python/user_search.py) - just in Java. + + +## Adding a search chain + +To add this Searcher to Vespa, we need to modify `services.xml`: + +```xml + + + + + + + ... + +``` + +Here, we instruct Vespa to add a new search chain called `user` (which inherits the default `vespa` search chain), and includes our `UserProfileSearcher`. Note that Vespa expects this Searcher to be in a bundle called `news-recommendation`, so we need to compile and package this code. In Vespa, we use [Apache Maven](https://maven.apache.org/) for this, which requires a project object model, or `pom.xml`, to specify how to build this artifact. + +We won't go through that here; please refer to [app-6-recommendation-with-searchers](https://github.com/vespa-engine/sample-apps/tree/master/news/app-6-recommendation-with-searchers) in the `news` sample application for details. Note that this application's directory structure has changed compared to the previous parts in the tutorial. The structure is now: + +``` +. +├── pom.xml +└── src + └── main + ├── application + │ ├── schemas + │ │ ├── news.sd + │ │ └── user.sd + │ ├── search + │ │ └── query-profiles + │ │ ├── default.xml + │ │ └── types + │ │ └── root.xml + │ └── services.xml + └── java + └── ai + └── vespa + └── example + └── UserProfileSearcher.java +``` + +The Vespa application now lies under `src/main/application`, and all custom Java components are under `src/main/java` as is standard in a Java project. We can now compile and package this application: + +```bash +$ (cd app-6-recommendation-with-searchers && mvn package) +``` + +[pom.xml](https://github.com/vespa-engine/sample-apps/blob/master/news/app-6-recommendation-with-searchers/pom.xml) is set up to create an artifact called `news-recommendation-searcher`, which is referred to in `services.xml`. When the command finishes, we can see this artifact in `target/application.zip`. This contains the full Vespa application, with Java components - deploy it: + +```bash +$ vespa deploy --wait 300 app-6-recommendation-with-searchers +``` + +After the application has been deployed, we are ready to test. Refer to [the Searcher development guide](/en/applications/searchers) for much more on custom Searchers and the Java API. + + +## Testing + +Now we can search for a user's recommended news articles directly from the `user_id`: + +```bash +$ vespa query -v \ + 'user_id=U33527' \ + 'searchChain=user' +``` + +This should now return the top 10 recommended news articles for this user. Indeed, if we now add a with a `trace.level=5`, we see the Searcher being invoked: + +```bash +$ vespa query -v \ + 'user_id=U33527' \ + 'searchChain=user' \ + 'trace.level=5' +``` + +```json +{ "message": "Invoke searcher 'ai.vespa.example.UserProfileSearcher in user'" }, +{ "message": "Invoke searcher 'com.yahoo.search.querytransform.WeakAndReplacementSearcher in vespa'" }, +{ "message": "Invoke searcher 'com.yahoo.prelude.statistics.StatisticsSearcher in native'" }, + +{ "message": "Return searcher 'com.yahoo.prelude.statistics.StatisticsSearcher in native'" }, +{ "message": "Return searcher 'com.yahoo.search.querytransform.WeakAndReplacementSearcher in vespa'" }, +{ "message": "Return searcher 'ai.vespa.example.UserProfileSearcher in user'" }, +``` + +Note that the `searchChain` query parameter can be set as default, so this does not have to be passed with the query request. This is done by adding it to the default query profile in [src/main/application/search/query-profiles/default.xml](https://github.com/vespa-engine/sample-apps/blob/master/news/app-6-recommendation-with-searchers/src/main/application/search/query-profiles/default.xml): + +```xml + + user + +``` + + +[src/python/evaluate.py](https://github.com/vespa-engine/sample-apps/blob/master/news/src/python/evaluate.py) can now be modified to use this Searcher. However, to properly calculate the metrics, the Searcher needs to be modified to accept a list of news article id's and only recall those. We'll leave this as an exercise to the reader. + + + +## Document processors + +As can be seen in the architecture overview above, there are other component types as well. One is Document Processors, which are conceptually similar to Searchers. When a document is fed to Vespa, it goes through a chain of Document Processors before being passed to the content node for storage and indexing. + +Vespa also supports custom Document Processors, refer to [the guide for document processing](/en/applications/document-processors) for more information. + + +## Improving recommendation diversity + +If we take a closer look at the query above, and search for the top 100 hits: + +```bash +$ vespa query \ + 'user_id=U33527' \ + 'searchChain=user' \ + 'hits=100' | \ + grep "category\": \"sports" | wc -l +``` + +We see that all the hits are of category `sports` for this user. Actually, they are all from the `football_nfl` sub-category. Indeed, from inspection of the impressions file, this user has only clicked on `sports` articles. So, while this can seem a success, we generally would like to give users some form of diversity to keep them interested. This is also to combat the negative effects of filter bubbles. + +One way to do this is to create Searchers that perform multiple queries to the backend with various rank profiles. In the above, we were only retrieving results from the `recommendation` rank profile. Still, we can have any number of rank profiles. By searching in multiple rank profiles, we can blend the results from these sources before returning to the user, and thus introduce diversity. + +This is often called federation. Vespa supports federation both from internal and external sources, see [the guide on federation](/en/querying/federation) for more information. + + +If the same document can be returned from multiple sources, it's important to perform some form of de-duplication before returning the final results! + + +A common way of performing blending from multiple sources is to implement a specialized blending Searcher. This Searcher can, for instance, use an approach such as [reciprocal rank fusion](https://research.google/pubs/pub36196/), which gives decent results. However, when it comes to diversity, there are usually some goals or restrictions that needs to be controlled. In this case the business rules can be hand-written in the blending Searcher. Searchers are flexible enough to perform any type of processing. + + +## Conclusion + +We now have a Vespa application up and running that takes a single `user_id` and returns recommendations for that user. In the [next part of the tutorial](/en/learn/tutorials/news-7-recommendation-with-parent-child), we'll address what to do when new users without any history visit our recommendation system. diff --git a/mintlify-docs/en/learn/tutorials/news-7-recommendation-with-parent-child.mdx b/mintlify-docs/en/learn/tutorials/news-7-recommendation-with-parent-child.mdx new file mode 100644 index 0000000000..ef188a0c59 --- /dev/null +++ b/mintlify-docs/en/learn/tutorials/news-7-recommendation-with-parent-child.mdx @@ -0,0 +1,250 @@ +--- +title: "News Recommendation Tutorial - parent child and tensor ranking" +--- + +This is the seventh part of the tutorial series for setting up a Vespa application for personalized news recommendations. The parts are: + +1. [Getting started](/en/learn/tutorials/news-1-deploy-an-application) +2. [A basic news search application](/en/learn/tutorials/news-2-basic-feeding-and-query) - application packages, feeding, query +3. [News search](/en/learn/tutorials/news-3-searching) - sorting, grouping, and ranking +4. [Generating embeddings for users and news articles](/en/learn/tutorials/news-4-embeddings) +5. [News recommendation](/en/learn/tutorials/news-5-recommendation) - partial updates (news embeddings), ANNs, filtering +6. [News recommendation with searchers](/en/learn/tutorials/news-6-recommendation-with-searchers) - custom searchers, doc processors +7. [News recommendation with parent-child](/en/learn/tutorials/news-7-recommendation-with-parent-child) - parent-child, tensor ranking +8. Advanced news recommendation - intermission - training a ranking model +9. Advanced news recommendation - ML models + +In this part of the series, we'll introduce a new ranking signal: category click-through rate (CTR). The idea is that we can recommend popular content for users that don't have a click history yet. Rather than just recommending based on articles, we recommend based on categories. However, these global CTR values can often change continuously, so we need an efficient way to update this value for all documents. We'll do that by introducing parent-child relationships between documents in Vespa. We will also use sparse tensors directly in ranking. + +For reference, the final state of this tutorial can be found in the [app-7-parent-child](https://github.com/vespa-engine/sample-apps/tree/master/news/app-7-parent-child) directory of the `news` sample application. + + +## Parent-child relationships in Vespa + +Recall that most features come from either attributes in the document or parameters passed with the query when ranking a document. Parent-child relationships introduce the option of using attributes found in other documents. Parent-child relationships work as a form of scalable document joins. + +For instance, assume we have a global CTR value for the sports category of `0.2`. If we want to use this value during ranking, we could have a field in each news article holding this value. However, when we need to update this value, we need to issue a partial update to all documents, which seems wasteful. + +Another way would be to take inspiration from the `UserProfileSearcher`, where we retrieved the tensor embedding for a user in a search before passing that with the news article query. We could have a single document holding all global values and retrieve that with each query. However, that isn't particularly efficient. + +For these cases, Vespa introduced the [parent-child relationship](/en/schemas/parent-child). Parents are global documents, which are automatically distributed to all content nodes. Other documents can reference these parents and "import" values for use in ranking. The benefit is that the global category CTR values only need to be written to one place: the global document. + +Please see the [guide on parent-child relationships](/en/schemas/parent-child) for more information and examples. + + +## Setting up a global category CTR document + +So, let's set this up for our application. First we need to add a new document type to hold the CTR values. We introduce the `category_ctr` document type, which we add in `schemas/category_ctr.sd`: + +```sd +schema category_ctr { + document category_ctr { + field ctrs type tensor(category{}) { + indexing: attribute + attribute: fast-search + } + } +} +``` + +This document holds a single field: a [tensor](/en/ranking/tensor-user-guide) of type `tensor(category{})`. This is a tensor with a single sparse dimension, which is slightly different from the tensors we have seen so far. Sparse tensors have strings as dimension addresses rather than a numeric index. More concretely, an example of such a tensor is (using the [tensor literal form](/en/reference/ranking/tensor#tensor-literal-form)): + +``` +{ + {category: entertainment}: 0.2, + {category: news}: 0.3, + {category: sports}: 0.5, + {category: travel}: 0.4, + {category: finance}: 0.1, + ... +} +``` + +This tensor holds all the CTR scores for all the categories. When updating this tensor, we can update individual cells if we don't need to update the whole tensor. This is called [tensor modify](/en/reference/schemas/document-json-format#tensor-modify) and can be helpful when you have large tensors. + +To use this document, add it to `services.xml`: + +```xml + + + + + + + +``` + +Notice that we've set `global="true"`, configuring Vespa to keep a copy of these documents on all content nodes. This is required for using it in a parent-child relationship. This also put limits on how many parent documents a system can have, as all nodes needs to index all parent documents. + + +## Importing parent values in child documents + +To use the `category_ctr` tensor when ranking `news` documents, we need to "import" the tensor into the child document type. There are two things to set up: + +1. The reference to the parent document +2. Which fields to import. + +Modify `schemas/news.sd`: + +```sd +schema news { + document news { + ... + field category_ctr_ref type reference { + indexing: attribute + } + ... + } + import field category_ctr_ref.ctrs as global_category_ctrs {} +} +``` + +The field `category_ctr_ref` is a field of type `reference` of a `category_ctr` document type. When feeding this field, Vespa expects the fully qualified document ID. For instance, if our global CTR document has the id `id:category_ctr:category_ctr::global`, that is what this field must be set to. Usually, there are many parent documents that children can reference, but our application will only hold one. + +You can think of the reference field as holding a foreign key to the parent document, and the import as performing a real-time join between the child and parent document using this foreign key. The imported values are usable as if they were stored with the child. + +The `import` statement defines that we should import the `ctrs` field from the document referenced in the `category_ctr_ref` field. We name this as `global_category_ctrs`, and we can reference this as `attribute(global_category_ctrs)` during ranking. + + +## Tensor expressions in ranking + +Up until this point, we've only used tensors as storage. We used tensors to hold news and user embeddings, and Vespa used these tensors to calculate the dot product in nearest-neighbor searches. + +However, Vespa has a [rich language](/en/ranking/tensor-user-guide#ranking-with-tensors) to perform calculations with tensors. We'll exploit that by looking up the `news` article's category in the global CTR tensor and using that as a feature in ranking. + +Our `news` document has a field currently that holds the `category` as a string. Unfortunately, tensor expressions only work on tensors, so we need to add a new field to hold the category tensor: + +```sd +field category_tensor type tensor(category{}) { + indexing: attribute +} +``` + +Using a tensor in this way also enables a document to have multiple categories, but our dataset only has a single category per article. For instance, we can represent the `finance` category of a `news` article like: + +``` +{category: finance}: 1.0 +``` + +Since this is a sparse tensor, we don't need to mention the other categories. Now, we can use this tensor to calculate the global CTR score for an article's category: + +``` +attribute(category_tensor) * attribute(global_category_ctrs) +``` + +Given the global category CTR example above, this would result in the value `0.1`. How did we arrive at this? Recall that the value for the cell `finance` in the `category` dimension of the example above had a value of `0.1`. The multiplication of these two tensors is conceptually an "inner join", so you can take the matching cells and multiply them together. Due to the sparseness of the tensor, only the `finance` cell matches, and that value is multiplied by the `1.0` in this document. So in this case, this would effectively work as a lookup. + + +Much more complex operations are available, refer to the [tensor user guide](/en/ranking/tensor-user-guide#ranking-with-tensors) for more information. + + +Let's add a new rank profile to do this calculation: + +```sd expandable +rank-profile recommendation_with_global_category_ctr inherits recommendation { + function category_ctr() { + expression: sum(attribute(category_tensor) * attribute(global_category_ctrs)) + } + function nearest_neighbor() { + expression: closeness(field, embedding) + } + first-phase { + expression: nearest_neighbor * category_ctr + } + summary-features { + attribute(category_tensor) + attribute(global_category_ctrs) + category_ctr + nearest_neighbor + } +} +``` + +Here, we've added a first phase ranking expression that multiplies the nearest-neighbor score with the category CTR score, implemented with the functions `nearest_neighbor` and `category_ctr`, respectively. + +We've added a `sum` function around the `category_ctr` expression - this is simply to unbox the single-value tensor to a double value suitable for use in the first phase expression. + +Note that, as a first attempt, we just multiply the nearest-neighbor with the category CTR score. This is not necessarily the correct way to combine these values, but we'll get back to that in a bit. + +We've added a section for [summary features](/en/reference/schemas/schemas#summary-features). This is simply a list of features that will be returned with the hit when using this rank profile. Recall that we can specify which features should be returned in the summary with the `indexing: summary` statement with each field. The `summary-features` can also include the result of functions as well. This is a helpful debugging tool, and we'll see how this looks after feeding some data. + + +## Feeding parent and child updates + +Deploy the application: + +```bash +$ (cd app-7-parent-child && mvn package) +``` + +```bash +$ vespa deploy --wait 300 app-7-parent-child +``` + +After deploying the application, we are ready to feed a global CTR document. For convenience, we've created [create_category_ctrs.py](https://github.com/vespa-engine/sample-apps/blob/master/news/src/python/create_category_ctrs.py) that reads the MIND content and impression data to calculate CTR scores for each category. This produces two files in the `mind` directory: + +1. `mind/global_category_ctr.json` - a feed file for the global CTR document containing CTR score for each category. +2. `mind/news_category_ctr_update.json` - a feed file for partially updating the `news` articles with the reference to the global CTR document as well as the category tensor. + +These files can now be fed to Vespa, but note that the `mind/global_category_ctr.json` need to be fed first because the global document needs to exist before the child documents can reference it. + +Create feed files: + +```bash +$ ./src/python/create_category_ctrs.py mind +``` + +Feed the created feed files: + +```bash +$ vespa feed mind/global_category_ctr.json --target http://localhost:8080 +$ vespa feed mind/news_category_ctr_update.json --target http://localhost:8080 +``` + + +## Testing the application + +After feeding the above files, we can now test the application with a query: + +```bash +$ vespa query \ + 'user_id=U33527' \ + 'ranking.profile=recommendation_with_global_category_ctr' \ + 'hits=10' +``` + +Note that we specify the rank profile to use. The first result of this query is something like the following: + +```json expandable +"fields": { + "title": "Matthew Stafford's status vs. Bears uncertain, Sam Martin will play", + "abstract": "Stafford's start streak could be in jeopardy, according to Ian Rapoport.", + "category": "sports", + ... + "summary-features": { + "attribute(category_tensor)": {"cells": [{"address": { "category": "sports"}, "value":1.0 }]}, + "attribute(global_category_ctrs)": {"cells": [ + ... + { "address": { "category": "sports" }, "value": 0.05611187964677811 }, + ... + ]}, + "rankingExpression(category_ctr)": 0.05611187964677811, + "rankingExpression(nearest_neighbor)": 0.14914761220236453, + } + ... + "relevance": 0.008368952865503413, +} +``` + +This is clearly a sports article. The global CTR document is also listed here, and the CTR score for the `sports` category is `0.0561`. Thus, the result of the `category_ctr` function is `0.0561` as intended. The `nearest_neighbor` score is `0.149`, and the resulting relevance score is `0.00836`. So, this worked as expected. + +If we were to feed another value to the global CTR document, this updated value is immediately available. As such, the system responds quickly to changes in the global parameters. + +Now, a simple multiplication between these features might not give us what we want. For instance, these features have different average values and different standard deviations. Particularly, if we add multiple additional features, just multiplying them together will probably not give a great user experience. Instead of a hand-tuned final relevancy calculation as demonstrated above, we could use a machine learned function with these as feature inputs. + +Ultimately, these features are computed in real-time for every news article during ranking. These features can then be added to any machine-learned ranking model. Vespa supports gradient-boosted trees from [XGBoost](/en/ranking/xgboost) and [LightGBM](/en/ranking/lightgbm), and also neural networks in [ONNX](/en/ranking/onnx) format, exported from popular ML frameworks like [PyTorch](https://pytorch.org/) and [Tensorflow](https://www.tensorflow.org/). + + +## Conclusion + +This tutorial introduced parent-child relationships and demonstrated it through a global CTR feature we used in ranking. As this feature was based on tensors, we also introduced ranking with tensor expressions. For a real-world use-case using parent-child tensors, see this [blog post](https://blog.vespa.ai/parent-child-joins-tensors-content-recommendation/?_gl=1*1cqlj1i*_gcl_au*ODE0ODM4MTI2LjE3Nzk3MjQ3OTY.). diff --git a/mintlify-docs/en/learn/tutorials/rag-blueprint.mdx b/mintlify-docs/en/learn/tutorials/rag-blueprint.mdx new file mode 100644 index 0000000000..618df21bcf --- /dev/null +++ b/mintlify-docs/en/learn/tutorials/rag-blueprint.mdx @@ -0,0 +1,1381 @@ +--- +title: "RAG Blueprint" +description: "Many of our users use Vespa to power large scale RAG Applications." +--- + +This blueprint aims to exemplify many of the best practices we have learned while supporting these users. + +While many RAG tutorials exist, this blueprint provides a customizable template that: + +- Can [(auto)scale](/en/operations/autoscaling) with your data size and/or query load. +- Is fast and [production grade](/en/operations/production-deployment). +- Enables you to build RAG applications with state-of-the-art quality. + +This tutorial will show how we can develop a _high-quality_ RAG application with an evaluation-driven mindset, while being a resource you can revisit for making informed choices for your own use case. + +We will guide you through the following steps: + +1. [Our use case](#our-use-case) +2. [Data modeling](#data-modeling) +3. [Structuring your Vespa application](#structuring-your-vespa-application) +4. [Configuring match-phase (retrieval)](#configuring-match-phase-retrieval) +5. [First-phase ranking](#first-phase-ranking) +6. [Second-phase ranking](#second-phase-ranking) +7. [(Optional) Global-phase ranking](#optional-global-phase-ranking) + +All the accompanying code can be found in our [sample app](https://github.com/vespa-engine/sample-apps/tree/master/rag-blueprint) repo. + +Each step will contain reasoning behind the choices and design of the blueprint, as well as pointers for customizing your own application. + +Below, you can see a diagram of the indexing (document side), retrieval and ranking of the sample application, which will be explained in more detail in the following sections. + + +**Note:** + +The elements in the diagram are clickable, and will lead you to the relevant sections, either of this tutorial or in the Vespa documentation. + + + +![RAG Blueprint Overview](/assets/img/tutorials/rag-blueprint-overview.svg) + + +[Click to open diagram in full size](/assets/img/tutorials/rag-blueprint-overview.svg) + + +**Note:** + +This is not a **'Deploy RAG in 5 minutes'** tutorial (although you _can_ technically do that by following the README in our [sample app](https://github.com/vespa-engine/sample-apps/tree/master/rag-blueprint)). This focus is more about providing you with the insights and tools for you to apply it to your own use case. Therefore we suggest taking your time to look at the code in the sample app, and run the described steps. + + + +**Prerequisites:** + +- Linux, macOS or Windows 10 Pro on x86\_64 or arm64, with [Podman Desktop](https://podman.io/) or [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed, with an engine running. + - Alternatively, start the Podman daemon: + ```bash + $ podman machine init --memory 6000 + $ podman machine start + ``` + - See [Docker Containers](/en/operations/self-managed/docker-containers) for system limits and other settings. +- For CPUs older than Haswell (2013), see [CPU Support](/en/operations/self-managed/cpu-support). +- Memory: Minimum 4 GB RAM dedicated to Docker/Podman. [Memory recommendations](/en/operations/self-managed/node-setup#memory-settings). +- Disk: Avoid `NO_SPACE` - the vespaengine/vespa container image + headroom for data requires disk space. [Read more](/en/writing/feed-block). +- [Homebrew](https://brew.sh/) to install the [Vespa CLI](/en/clients/vespa-cli), or download the Vespa CLI from [Github releases](https://github.com/vespa-engine/vespa/releases). +- [uv](https://docs.astral.sh/uv/) For Python dependency handling + + + +## Our use case + +The sample use case is a document search application, for a user who wants to get answers and insights quickly from a document collection containing company documents, notes, learning material, training logs. To make the blueprint more realistic, we required a dataset with more structured fields than are commonly found in public datasets. Therefore, we used a Large Language Model (LLM) to generate a custom one. + +It is a toy example, with only 100 documents, but we think it will illustrate the necessary concepts. You can also feel confident that the blueprint will provide a starting point that can scale as you want, with minimal changes. + +Below you can see a sample document from the dataset: + +```json +{ + "put": "id:doc:doc::78", + "fields": { + "created_timestamp": 1717750000, + "modified_timestamp": 1717750000, + "text": "# Feature Brainstorm: SynapseFlow Model Monitoring Dashboard v1\n\n**Goal:** Provide users with basic insights into their deployed model's performance and health.\n\n**Key Metrics to Display:**\n- **Inference Latency:** Avg, p95, p99 (Histogram).\n- **Request Rate / Throughput:** Requests per second/minute.\n- **Error Rate:** Percentage of 5xx errors.\n- **CPU/Memory Usage:** Per deployment/instance.\n- **GPU Usage / Temp (if applicable).**\n\n**Visualizations:**\n- Time series graphs for all key metrics.\n- Ability to select time range (last hour, day, week).\n- Filter by deployment ID.\n\n**Data Sources:**\n- Prometheus metrics from model server (see `code_review_pr123_metrics.md`).\n- Kubernetes metrics (via Kube State Metrics or cAdvisor).\n\n**Future Ideas (v2+):**\n- Data drift detection.\n- Concept drift detection.\n- Alerting on anomalies or threshold breaches.\n- Custom metric ingestion.\n\n## (UI mock-up sketches, specific Prometheus queries)", + "favorite": true, + "last_opened_timestamp": 1717750000, + "open_count": 3, + "title": "feature_brainstorm_monitoring_dashboard.md", + "id": "78" + } +} +``` + +In order to evaluate the quality of the RAG application, we also need a set of representative queries, with annotated relevant documents. Crucially, you need a set of representative queries that thoroughly cover your expected use case. More is better, but _some_ eval is always better than none. + +We used `gemini-2.5-pro` to create our queries and relevant document labels. Please check out our [blog post](https://blog.vespa.ai/improving-retrieval-with-llm-as-a-judge/) to learn more about using LLM-as-a-judge. + +We decided to generate some queries that need several documents to provide a good answer, and some that only need one document. If these queries are representative of the use case, we will show that they can be a great starting point for creating an (initial) ranking expression that can be used for retrieving and ranking candidate documents. But, it can (and should) also be improved, for example by collecting user interaction data, human labeling and/ or using an LLM to generate relevance feedback following the initial ranking expression. + + +## Data modeling + +Here is the schema that we will use for our sample application. + +```txt expandable +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +schema doc { + + document doc { + + field id type string { + indexing: summary | attribute + } + + field title type string { + indexing: index | summary + index: enable-bm25 + } + + field text type string { + + } + + field created_timestamp type long { + indexing: attribute | summary + } + field modified_timestamp type long { + indexing: attribute | summary + } + + field last_opened_timestamp type long { + indexing: attribute | summary + } + field open_count type int { + indexing: attribute | summary + } + field favorite type bool { + indexing: attribute | summary + } + + } + + field title_embedding type tensor(x[96]) { + indexing: input title | embed | pack_bits | attribute | index + attribute { + distance-metric: hamming + } + } + + field chunks type array { + indexing: input text | chunk fixed-length 1024 | summary | index + index: enable-bm25 + } + + field chunk_embeddings type tensor(chunk{}, x[96]) { + indexing: input text | chunk fixed-length 1024 | embed | pack_bits | attribute | index + attribute { + distance-metric: hamming + } + } + + fieldset default { + fields: title, chunks + } + + document-summary no-chunks { + summary id {} + summary title {} + summary created_timestamp {} + summary modified_timestamp {} + summary last_opened_timestamp {} + summary open_count {} + summary favorite {} + summary chunks {} + } + + document-summary top_3_chunks { + from-disk + summary chunks_top3 { + source: chunks + select-elements-by: top_3_chunk_sim_scores #this needs to be added a summary-feature to the rank-profile + } + } +} +``` + +Keep reading for an explanation and reasoning behind the choices in the schema. + + +### Picking your searchable unit + +When building a RAG application, your first key decision is choosing the "searchable unit." This is the basic block of information your system will search through and return as context to the LLM. For instance, if you have millions of documents, some hundreds of pages long, what should be your searchable unit? + +Consider these points when selecting your searchable unit: + +- **Too fine-grained (e.g., individual sentences or very small paragraphs):** +- Leads to duplication of context and metadata across many small units. +- May result in units lacking sufficient context for the LLM to make good selections or generate relevant responses. +- Increases overhead for managing many small document units. +- **Too coarse-grained (e.g., very long chapters or entire large documents):** +- Can cause performance issues due to the size of the units being processed. +- May lead to some large documents appearing relevant to too many queries, reducing precision. +- If you embed the whole document, a too large context will lead to reduced retrieval quality. + +We recommend erring on the side of using slightly larger units. + +- LLMs are increasingly capable of handling larger contexts. +- In Vespa, you can index larger units, while avoiding data duplication and performance issues, by returning only the most relevant parts. + +With Vespa, it is now possible to return only the top k most relevant chunks of a document, and include and combine both document-level and chunk-level features in ranking. + + +### Chunk selection + +Assume you have chosen a document as your searchable unit. Your documents may then contain text index fields of highly variable lengths. Consider for example a corpus of web pages. Some might be very long, while the average is well within the recommended size. See [scaling retrieval size](/en/performance/sizing-search#scaling-retrieval-size) for more details. + +While we recommend implementing guards against too long documents in your feeding pipeline, you still probably do not want to return every chunk of the top k documents to an LLM for RAG. + +In Vespa, we now have a solution for this problem. Check out our [blog post on layered ranking](https://blog.vespa.ai/introducing-layered-ranking-for-rag-applications/) for an overview of the new features that allow you to do this. + +Below, we show how you can score both documents as well as individual chunks, and use that score to select the best chunks to be returned in a summary, instead of returning all chunks belonging to the top k ranked documents. + +Compute closeness per chunk in a ranking function; use `elementwise(bm25(chunks), i, double)` for a per-chunk text signal. See [rank feature reference](/en/reference/ranking/rank-features#elementwise-bm25) Now available: elementwise rank functions and filtering on the content nodes. + +This allows you to pick a large document as the searchable unit, while still addressing the potential drawbacks many encounter as follows: + +- Pick your (larger) document as your searchable unit. +- Chunk the text-fields automatically on indexing. +- Embed each chunk (enabled through Vespa's multivector support) +- Calculate chunk-level features (e.g. bm25 and embedding similarity) and document-level features. Combine as you want. +- Limit the actual chunks that are returned to the ones that are actually relevant context for the LLM. + +This allows you to index larger units, while avoiding data duplication and performance issues, by returning only the most relevant parts. + +Vespa also supports automatic [chunking](/en/reference/writing/indexing-language#converters) in the [indexing language](/en/writing/indexing). + +Here are the parts of the schema, which defines the searchable unit as a document with a text field, and automatically chunks it into smaller parts of 1024 characters, which each are embedded and indexed separately: + +```txt +field chunks type array { + indexing: input text | chunk fixed-length 1024 | summary | index + index: enable-bm25 +} + +field chunk_embeddings type tensor(chunk{}, x[96]) { + indexing: input text | chunk fixed-length 1024 | embed | pack_bits | attribute | index + attribute { + distance-metric: hamming + } +} +``` + +In Vespa, we can specify which chunks to be returned with a summary feature, see [docs](/en/reference/schemas/schemas#select-elements-by) for details. For this blueprint, we will return the top 3 chunks based on the similarity score of the chunk embeddings, which is calculated in the ranking phase. Note that this feature could be any chunk-level summary feature defined in your rank-profile. + +Here is how the summary feature is calculated in the rank-profile: + +```txt expandable +# This function unpacks the bits of each dimension of the mapped chunk_embeddings attribute tensor +function chunk_emb_vecs() { + expression: unpack_bits(attribute(chunk_embeddings)) +} + +# This function calculates the dot product between the query embedding vector and the chunk embeddings (both are now float) over the x dimension +function chunk_dot_prod() { + expression: reduce(query(float_embedding) * chunk_emb_vecs(), sum, x) +} + +# This function calculates the L2 normalized length of an input tensor +function vector_norms(t) { + expression: sqrt(sum(pow(t, 2), x)) +} + +# Here we calculate cosine similarity by dividing the dot product by the product of the L2 normalized query embedding and document embeddings +function chunk_sim_scores() { + expression: chunk_dot_prod() / (vector_norms(chunk_emb_vecs()) * vector_norms(query(float_embedding))) +} + +function top_3_chunk_text_scores() { + expression: top(3, chunk_text_scores()) +} + +function top_3_chunk_sim_scores() { + expression: top(3, chunk_sim_scores()) + } + +summary-features { + top_3_chunk_sim_scores + } +``` + + +**Note:** + +The ranking expression may seem a bit complex, as we chose to embed each chunk independently, store the embeddings in a binarized format, and then unpack them to calculate similarity based on their float representations. For single dimension dense vector similarity between same-precision embeddings, this can be simplified significantly using the [closeness](/en/reference/ranking/rank-features#closeness(name)) convenience function. + + +Note that we want to use the float-representation of the query-embedding, and thus also need to convert the binary embedding of the chunks to float. After that, we can calculate the similarity score between the query embedding and the chunk embeddings using cosine similarity (the dot product, and then normalize it by the norms of the embeddings). + +See [ranking expressions](/en/reference/ranking/ranking-expressions#non-primitive-functions) for more details on the `top`-function, and other functions available for ranking expressions. + +Now, we can use this summary feature in our document summary to return the top 3 chunks of the document, which will be used as context for the LLM. Note that we can also define a document summary that returns all chunks, which might be useful for another use case, such as deep research. + +```txt +document-summary top_3_chunks { + from-disk + summary chunks_top3 { + source: chunks + select-elements-by: top_3_chunk_sim_scores #this needs to be added a summary-feature to the rank-profile + } + } +``` + + +### Use multiple text fields, consider multiple embeddings + +We recommend indexing different textual content as separate indexes. These can be searched together, using [field-sets](/en/reference/schemas/schemas#fieldset) + +In our schema, this is exemplified by the sections below, which define the `title` and `chunks` fields as separate indexed text fields. + +```txt +... +field title type title { + indexing: index | summary + index: enable-bm25 +} +field chunks type array { + indexing: input text | chunk fixed-length 1024 | summary | index + index: enable-bm25 +} +``` + +Whether you should have separate embedding fields, depends on whether the added memory usage is justified by the quality improvement you could get from the additional embedding field. + +We choose to index both a `title_embedding` and a `chunk_embeddings` field for this blueprint, as we aim to minimize cost by embedding the binary vectors. + +```txt +field title_embedding type tensor(title{}, x[96]) { + indexing: input text | embed | pack_bits | attribute | index + attribute { + distance-metric: hamming + } +} +field chunk_embeddings type tensor(chunk{}, x[96]) { + indexing: input text | chunk fixed-length 1024 | embed | pack_bits | attribute | index + attribute { + distance-metric: hamming + } +} +``` + +Indexing several embedding fields may not be worth the cost for you. Evaluate whether the cost-quality trade-off is worth it for your application. + +If you have different vector space representations of your document (e.g images), indexing them separately is likely worth it, as they are likely to provide signals that are complementary to the text-based embeddings. + + +### Model Metadata and Signals Using Structured Fields + +We recommend modeling metadata and signals as structured fields in your schema. Below are some general recommendations, as well as the implementation in our blueprint schema. + +**Metadata** — knowledge about your data: + +- Authors, publish time, source, links, category, price, … +- Usage: filters, ranking, grouping/aggregation +- Index only metadata that are strong filters + +In our blueprint schema, we include these metadata fields to demonstrate these concepts: + +- `id` - document identifier +- `title` - document name/filename for display and text matching +- `created_timestamp`, `modified_timestamp` - temporal metadata for filtering and ranking by recency + +**Signals** — observations about your data: + +- Popularity, quality, spam probability, click_probability, … +- Usage: ranking +- Often updated separately via partial updates +- Multiple teams can add their own signals independently + +In our blueprint schema, we include several of these signals: + +- `last_opened_timestamp` - user engagement signal for personalization +- `open_count` - popularity signal indicating document importance +- `favorite` - explicit user preference signal, can be used for boosting relevant content + +These fields are configured as `attribute | summary` to enable efficient filtering, sorting, and grouping operations while being returned in search results. The timestamp fields allow for temporal filtering (e.g., "recent documents") and recency-based ranking, while usage signals like `open_count` and `favorite` can boost frequently accessed or explicitly marked important documents. + +Consider [parent-child](/en/schemas/parent-child) relationships for low-cardinality metadata. Most large scale RAG application schemas contain at least a hundred structured fields. + +## LLM-generation with OpenAI-client + +Vespa supports both Local LLMs, and any OpenAI-compatible API for LLM generation. For details, see [LLMs in Vespa](/en/rag/llms-in-vespa) + +The recommended way to provide an API key is by using the [secret store](/en/security/secret-store) in Vespa Cloud. + +To enable this, you need to create a vault (if you don't have one already) and a secret through the [Vespa Cloud console](/). If your vault is named `sample-apps` and contains a secret with the name `openai-api-key`, you would use the following configuration in your `services.xml` to set up the OpenAI client to use that secret: + +```xml + + + + + + + openai-api-key + + +``` + +Alternatively, for local deployments, you can set the `X-LLM-API-KEY` header in your query to use the OpenAI client for generation. + +To test generation using the OpenAI client, post a query that runs the `openai` search chain, with `format=sse`. (Use `format=json` for a streaming json response including both the search hits and the LLM-generated tokens.) + +```bash +$ vespa query \ + --timeout 60 \ + --header="X-LLM-API-KEY:" \ + yql='select * + from doc + where default contains text(@query) or + ({label:"title_label", targetHits:100}nearestNeighbor(title_embedding, embedding)) or + ({label:"chunks_label", targetHits:100}nearestNeighbor(chunk_embeddings, embedding))' \ + query="Summarize the key architectural decisions documented for SynapseFlow's v0.2 release." \ + searchChain=openai \ + format=sse \ + hits=5 +``` + + +## Structuring your vespa application + +This section provides recommendations for structuring your Vespa application package. See also the [application package docs](/en/basics/applications) for more details on the application package structure. Note that this is not mandatory, and it might be simpler to start without query profiles and rank profiles, but as you scale out your application, it will be beneficial to have a well-structured application package. + +Consider the following structure for our application package: + +```txt expandable +app +├── models +│ └── lightgbm_model.json +├── schemas +│ └── doc +│ │ ├-- base-features.profile +│ │ ├── collect-second-phase.profile +│ │ ├── collect-training-data.profile +│ │ ├── learned-linear.profile +│ │ ├── match-only.profile +│ │ └── second-with-gbdt.profile +│ └── doc.sd +├── search +│ └── query-profiles +│ ├── deepresearch-with-gbdt.xml +│ ├── deepresearch.xml +│ ├── hybrid-with-gbdt.xml +│ ├── hybrid.xml +│ ├── rag-with-gbdt.xml +│ └── rag.xml +├── security +│ └── clients.pem +└── services.xml +``` + +You can see that we have separated the [query profiles](/en/reference/querying/query-profiles), and [rank profiles](/en/basics/ranking#rank-profiles) into their own directories. + + +### Manage queries in query profiles + +Query profiles let you maintain collections of query parameters in one file. Clients choose a query profile → the profile sets everything else. This lets us change behavior for a use case without involving clients. + +Let us take a closer look at 3 of the query profiles in our sample application. + +1. `hybrid` +2. `rag` +3. `deepresearch` + + +### **_hybrid_** query profile + +This query profile will be the one used by clients for traditional search, where the user is presented a limited number of hits. Our other query profiles will inherit this one (but may override some fields). + +```xml + + doc + embed(@query) + embed(@query) + + select * + from %{schema} + where default contains text(@query) or + ({label:"title_label", targetHits:100}nearestNeighbor(title_embedding, embedding)) or + ({label:"chunks_label", targetHits:100}nearestNeighbor(chunk_embeddings, embedding)) + + 10 + learned-linear + top_3_chunks + +``` + + +### **_rag_** query profile + +This will be the query profile where the `openai` searchChain will be added, to generate a response based on the retrieved context. Here, we set some configuration that are specific to this use case. + +```xml + + 50 + openai + sse + +``` + + +### **_deepresearch_** query profile + +Again, we will inherit from the `hybrid` query profile, but override with a `targetHits` value of 10 000 (original was 100) that prioritizes recall over latency. We will also increase number of hits to be returned, and increase the timeout to 5 seconds. + +```xml + + + select * + from %{schema} + where default contains text(@query) or + ({label:"title_label", targetHits:10000}nearestNeighbor(title_embedding, embedding)) or + ({label:"chunks_label", targetHits:10000}nearestNeighbor(chunk_embeddings, embedding)) + + 100 + 5s + +``` + +We will leave out the LLM-generation for this one, and let an LLM agent on the client side be responsible for using this API call as a tool, and to determine whether enough relevant context to answer has been retrieved. Note that the `targetHits` parameter set here does not really make sense until your dataset reach a certain scale. + +As we add more rank-profiles, we can also inherit the existing query profiles, only to override the `ranking.profile` field to use a different rank profile. This is what we have done for the `rag-with-gbdt` and `deepresearch-with-gbdt` query profiles, which will use the `second-with-gbdt` rank profile instead of the `learned-linear` rank profile. + +```xml + + 50 + openai + sse + +``` + + +### Separating out rank profiles + +To build a great RAG application, assume you'll need many ranking models. This will allow you to bucket-test alternatives continuously and to serve different use cases, including data collection for different phases, and the rank profiles to be used in production. + +Separate common functions/setup into parent rank profiles and use `.profile` files. + + +## Phased ranking in Vespa + +Before we move on, it might be useful to recap Vespa´s [phased ranking](/en/ranking/phased-ranking) approach. + +Below is a schematic overview of how to think about retrieval and ranking for this RAG blueprint. Since we are developing this as a tutorial using a small toy dataset, the application can be deployed in a single machine, using a single docker container, where only one container node and one container node will run. This is obviously not the case for most real-world RAG applications, so this is cruical to have in mind as you want to scale your application. + + +![phased ranking overview](/assets/img/phased-ranking-rag.png) + + +The stateless container nodes can be [scaled independently](/en/performance/sizing-search) to handle increased query load. + + +## Configuring match-phase (retrieval) + +This section will contain important considerations for the retrieval-phase of a RAG application in Vespa. + +The goal of the retrieval phase is to retrieve candidate documents efficiently, and maximize recall, without exposing too many documents to ranking. + + +### Choosing a Retrieval Strategy: Vector, Text, or Hybrid? + +As you could see from the schema, we create and index both a text representation and a vector representation for each chunk of the document. This will allow us to use both text-based features and semantic features for both recall and ranking. + +The text and vector representation complement each other well: + +- **Text-only** → misses recall of semantically similar content +- **Vector-only** → misses recall of specific content not well understood by the embedding models + +Our recommendation is to default to hybrid retrieval: + +```sql +select * + from doc + where default contains text(@query) or + ({label:"title_label", targetHits:1000}nearestNeighbor(title_embedding, embedding)) or + ({label:"chunks_label", targetHits:1000}nearestNeighbor(chunk_embeddings, embedding)) +``` + +In generic domains, or if you have fine-tuned an embedding model for your specific data, you might consider a vector-only approach: + +```sql +select * + from doc + where rank({targetHits:10000}nearestNeighbor(embeddings_field, query_embedding), default contains text(@query)) +``` + +Notice that only the first argument of the [rank](/en/reference/querying/yql#rank)-operator will be used to determine if a document is a match, while all arguments are used for calculating rank features. This mean we can do vector only for matching, but still use text-based features such as `bm25` and `nativeRank` for ranking. Note that if you do this, it makes sense to increase the number of `targetHits` for the `nearestNeighbor`-operator. + +For our sample application, we add three different retrieval operators (that are combined with `OR`), one with `weakAnd` for text matching, and two `nearestNeighbor` operators for vector matching, one for the title and one for the chunks. This will allow us to retrieve both relevant documents based on text and vector similarity, while also allowing us to return the most relevant chunks of the documents. + +```sql +select * + from doc + where default contains text(@query) or + ({targetHits:100}nearestNeighbor(title_embedding, embedding)) or + ({targetHits:100}nearestNeighbor(chunk_embeddings, embedding)) +``` + + +### Choosing your embedding model (and strategy) + +Choice of embedding model will be a trade-off between inference time (both indexing and query time), memory usage (embedding dimensions) and quality. There are many good open-source models available, and we recommend checking out the [MTEB leaderboard](https://huggingface.co/spaces/mteb/leaderboard), and look at the `Retrieval`-column to gauge performance, while also considering the memory usage, vector dimensions, and context length of the model. + +See [model hub](/en/rag/model-hub) for a list of provided models ready to use with Vespa. See also [Huggingface Embedder](/en/rag/embedding#huggingface-embedder) for details on using other models (exported as ONNX) with Vespa. + +In addition to dense vector representation, Vespa supports sparse embeddings (token weights) and multi-vector (ColBERT-style) embeddings. See our [example notebook](https://vespa-engine.github.io/pyvespa//examples/mother-of-all-embedding-models-cloud#bge-m3-the-mother-of-all-embedding-models) of using the bge-m3 model, which supports both, with Vespa. + +Vespa also supports [Matryoshka embeddings](https://blog.vespa.ai/combining-matryoshka-with-binary-quantization-using-embedder/), which can be a great way of reducing inference cost for retrieval phases, by using a subset of the embedding dimensions, while using more dimensions for increased precision in the later ranking phases. + +For domain-specific applications or less popular languages, you may want to consider finetuning a model on your own data. + + +### Consider binary vectors for recall + +Another decision to make is which precision you will use for your embeddings. See [binarization docs](/en/rag/binarizing-vectors) for an introduction to binarization in Vespa. + +For most cases, binary vectors (in Vespa, packed into `int8`-representation) will provide an attractive tradeoff, especially for recall during match-phase. Consider these factors to determine whether this holds true for your application: + +- Reduces memory-vector cost by 5 – 30 × +- Reduces query and indexing cost by 30 × +- Often reduces quality by only a few percentage points + +```txt +field binary_chunk_embeddings type tensor(chunk{}, x) { + indexing: input text | chunk fixed-length 1024 | embed | pack_bits | attribute | index + attribute { distance-metric: hamming } +} +``` + +If you need higher precision vector similarity, you should use bfloat16 precision, and consider paging these vectors to disk to avoid large memory cost. Note that this means that when accessing this field in ranking, they will also need to be read from disk, so you need to restrict the number of hits that accesses this field to avoid performance issues. + +```txt +field chunk_embeddings type tensor(chunk{}, x) { + indexing: input text | chunk fixed-length 1024 | embed | attribute + attribute: paged +} +``` + +For example, if you want to calculate `closeness` for a paged embedding vector in first-phase, consider configuring your retrieval operators (typically `weakAnd` and/or `nearestNeighbor`, optionally combined with filters) so that not too many hits are matched. Another option is to enable match-phase limiting, see [match-phase docs](/en/reference/schemas/schemas#match-phase). In essence, you restrict the number of matches by specifying an attribute field. + + +### Consider float-binary for ranking + +In our blueprint, we choose to index binary vectors of the documents. This does not prevent us from using the float-representation of the query embedding though. + +By unpacking the binary document chunk embeddings to their float representations (using [`unpack_bits`](/en/reference/ranking/ranking-expressions#unpack-bits)), we can calculate the similarity between query and document with slightly higher precision using a `float-binary` dot product, instead of hamming distance (`binary-binary`) + +Below, you can see how we can do this: + +```txt expandable +rank-profile base-features { + + inputs { + query(embedding) tensor(x[96]) + query(float_embedding) tensor(x[768]) + } + + function chunk_emb_vecs() { + expression: unpack_bits(attribute(chunk_embeddings)) + } + + function chunk_dot_prod() { + expression: reduce(query(float_embedding) * chunk_emb_vecs(), sum, x) + } + + function vector_norms(t) { + expression: sqrt(sum(pow(t, 2), x)) + } + function chunk_sim_scores() { + expression: chunk_dot_prod() / (vector_norms(chunk_emb_vecs()) * vector_norms(query(float_embedding))) + } + + function top_3_chunk_text_scores() { + expression: top(3, chunk_text_scores()) + } + + function top_3_chunk_sim_scores() { + expression: top(3, chunk_sim_scores()) + } +} +``` + + +### Use complex linguistics/recall only for precision + +Vespa gives you extensive control over [linguistics](/en/linguistics/linguistics). You can decide [match mode](/en/reference/schemas/schemas#match), stemming, normalization, or control derived tokens. + +It is also possible to use more specific operators than [weakAnd](/en/reference/querying/yql#weakand) to match only close occurrences ([near](/en/reference/querying/yql#near)/ [onear](/en/reference/querying/yql#near)), multiple alternatives ([equiv](/en/linguistics/query-rewriting#equiv)), weight items, set connectivity, and apply [query-rewrite](/en/linguistics/query-rewriting) rules. + +**Don't use this to increase recall — improve your embedding model instead.** + +Consider using it to improve precision when needed. + + +### Evaluating recall of the retrieval phase + +To know whether your retrieval phase is working well, you need to measure recall, number of total matches and the reported time spent. + +We can use [`VespaMatchEvaluator`](https://vespa-engine.github.io/pyvespa/api/vespa/evaluation.html#vespa.evaluation.VespaMatchEvaluator) from the pyvespa client library to do this. + +For this sample application, we set up an evaluation script that compares three different retrieval strategies, let us call them "retrieval arms": + +1. **Semantic-only**: Uses only vector similarity through `nearestNeighbor` operators. +2. **WeakAnd-only**: Uses only text-based matching with `userQuery()`. +3. **Hybrid**: Combines both approaches with OR logic. + + +**Note:** + +Note that this is only generic suggestion for and that you are of course free to include both [filter clauses](/en/reference/querying/yql#where), [grouping](/en/querying/grouping), [predicates](/en/schemas/predicate-fields), [geosearch](/en/querying/geo-search) etc. to support your specific use cases. + + +It is recommended to use a ranking profile that does not use any first-phase ranking, to run the match-phase evaluation faster. + +The evaluation will output metrics like: + +- Recall (percentage of relevant documents matched) +- Total number of matches per query +- Query latency statistics +- Per-query detailed results (when `write_verbose=True`) to identify "offending" queries in regard to recall or performance. + +This will be valuable input for tuning each of them. + +Run the complete evaluation script from the `eval/` directory to see detailed comparisons between all three retrieval strategies on your dataset. + + +#### Semantic Query Evaluation + +```sql +select * from doc where +({targetHits:100}nearestNeighbor(title_embedding, embedding)) or +({targetHits:100}nearestNeighbor(chunk_embeddings, embedding)) +``` + +| Metric | Value | +| :--- | :--- | +| Match Recall | 1.0000 | +| Average Recall per Query | 1.0000 | +| Total Relevant Documents | 51 | +| Total Matched Relevant | 51 | +| Average Matched per Query | 100.0000 | +| Total Queries | 20 | +| Search Time Average (s) | 0.0090 | +| Search Time Q50 (s) | 0.0060 | +| Search Time Q90 (s) | 0.0193 | +| Search Time Q95 (s) | 0.0220 | + + +#### WeakAnd Query Evaluation + +The `userQuery` is just a convenience wrapper for `weakAnd`, see [reference/query-language-reference.html](/en/reference/querying/yql). The default `targetHits` for `weakAnd` is 100, but it is [overridable](/en/reference/querying/yql#targethits). + +```sql +select * from doc where userQuery() +``` + +| Metric | Value | +| :--- | :--- | +| Match Recall | 1.0000 | +| Average Recall per Query | 1.0000 | +| Total Relevant Documents | 51 | +| Total Matched Relevant | 51 | +| Average Matched per Query | 88.7000 | +| Total Queries | 20 | +| Search Time Average (s) | 0.0071 | +| Search Time Q50 (s) | 0.0060 | +| Search Time Q90 (s) | 0.0132 | +| Search Time Q95 (s) | 0.0171 | + + +#### Hybrid Query Evaluation + +```sql +select * from doc where +({targetHits:100}nearestNeighbor(title_embedding, embedding)) or +({targetHits:100}nearestNeighbor(chunk_embeddings, embedding)) or +userQuery() +``` + +| Metric | Value | +| :--- | :--- | +| Match Recall | 1.0000 | +| Average Recall per Query | 1.0000 | +| Total Relevant Documents | 51 | +| Total Matched Relevant | 51 | +| Average Matched per Query | 100.0000 | +| Total Queries | 20 | +| Search Time Average (s) | 0.0076 | +| Search Time Q50 (s) | 0.0055 | +| Search Time Q90 (s) | 0.0150 | +| Search Time Q95 (s) | 0.0201 | + + +### Tuning the retrieval phase + +We can see that all queries match all relevant documents, which is expected, since we use `targetHits:100` in the `nearestNeighbor` operator, and this is also the default for `weakAnd`(and `userQuery`). By setting `targetHits` lower, we can see that recall will drop. + +In general, you have these options if you want to increase recall: + +1. Increase `targetHits` in your retrieval operators (e.g., `nearestNeighbor`, `weakAnd`). +2. Improve your embedding model (use a better model or finetune it on your data). +3. You can also consider tuning HNSW parameters, see [docs on HNSW](/en/querying/approximate-nn-hnsw#using-vespas-approximate-nearest-neighbor-search). + +Conversely, if you want to reduce the latency of one of your retrieval 'arms' at the cost of a small trade-off in recall, you can: + +1. Tune `weakAnd` parameters. This has potential to 3x your performance for the `weakAnd`-parameter of your query, see [blog post](https://blog.vespa.ai/tripling-the-query-performance-of-lexical-search/). + +Below are some empirically found default parameters that work well for most use cases: + +```txt +rank-profile optimized inherits baseline { + filter-threshold: 0.05 + weakand { + stopword-limit: 0.6 + adjust-target: 0.01 + } + } +``` + +See the [reference](/en/reference/schemas/schemas#weakand) for more details on the `weakAnd` parameters. These can also be set as query parameters. + +1. As already [mentioned](#consider-binary-vectors-for-recall), consider binary vectors for your embeddings. +2. Consider using an embedding model with less dimensions, or using only a subset of the dimensions (e.g., using [Matryoshka embeddings](https://blog.vespa.ai/combining-matryoshka-with-binary-quantization-using-embedder/)). + + +## First-phase ranking + +For the first-phase ranking, we must use a computationally cheap function, as it is applied to all documents matched in the retrieval phase. For many applications, this can amount to millions of candidate documents. + +Common options include (learned) linear combination of features including text similarity features, vector closeness, and metadata. It could also be a heuristic handwritten function. + +Text features should include [nativeRank](/en/reference/ranking/nativerank#nativeRank) or [bm25](/en/ranking/bm25#ranking-function) — not [fieldMatch](/en/reference/ranking/rank-features#field-match-features-normalized) (it is too expensive). + +Considerations for deciding whether to choose `bm25` or `nativeRank`: + +- **bm25**: cheapest, strong significance, no proximity, not normalized. +- **nativeRank**: 2 – 3 × costlier, truncated significance, includes proximity, normalized. + +For this blueprint, we opted for using `bm25` for first phase, but you could evaluate and compare to see whether the additional cost of using `nativeRank` is justified by increased quality. + + +### Collecting training data for first-phase ranking + +The features we will use for first-phase ranking are not normalized (i.e. they have values in different ranges). This means we can't just weight them equally and expect that to be a good proxy for relevance. + +Below we will show how we can find (learn) optimal weights (coefficients) for each feature, so that we can combine them into a ranking-expression on the format: + +```python +a * bm25(title) + b * bm25(chunks) + c * max_chunk_sim_scores() + d * max_chunk_text_scores() + e * avg_top_3_chunk_sim_scores() + f * avg_top_3_chunk_text_scores() +``` + +The first thing we need to is to collect training data. We do this using the [VespaFeatureCollector](https://vespa-engine.github.io/pyvespa/api/vespa/evaluation.html#vespa.evaluation.VespaFeatureCollector) from the pyvespa library. + +These are the features we will include: (Below, ) + +```txt expandable +rank-profile base-features { + inputs { + query(embedding) tensor(x[96]) + query(float_embedding) tensor(x[768]) + } + + rank chunks { + element-gap: 0 # Fixed length chunking should not cause any positional gap between elements + } + function chunk_text_scores() { + expression: elementwise(bm25(chunks),chunk,float) + } + + function chunk_emb_vecs() { + expression: unpack_bits(attribute(chunk_embeddings)) + } + + function chunk_dot_prod() { + expression: reduce(query(float_embedding) * chunk_emb_vecs(), sum, x) + } + + function vector_norms(t) { + expression: sqrt(sum(pow(t, 2), x)) + } + function chunk_sim_scores() { + expression: chunk_dot_prod() / (vector_norms(chunk_emb_vecs()) * vector_norms(query(float_embedding))) + } + + function top_3_chunk_text_scores() { + expression: top(3, chunk_text_scores()) + } + + function top_3_chunk_sim_scores() { + expression: top(3, chunk_sim_scores()) + } + + function avg_top_3_chunk_text_scores() { + expression: reduce(top_3_chunk_text_scores(), avg, chunk) + } + function avg_top_3_chunk_sim_scores() { + expression: reduce(top_3_chunk_sim_scores(), avg, chunk) + } + + function max_chunk_text_scores() { + expression: reduce(chunk_text_scores(), max, chunk) + } + + function max_chunk_sim_scores() { + expression: reduce(chunk_sim_scores(), max, chunk) + } +} + +rank-profile collect-training-data inherits base-features { + match-features { + bm25(title) + bm25(chunks) + max_chunk_sim_scores + max_chunk_text_scores + avg_top_3_chunk_sim_scores + avg_top_3_chunk_text_scores + + } + + + first-phase { + expression { + # Not used in this profile + bm25(title) + + bm25(chunks) + + max_chunk_sim_scores() + + max_chunk_text_scores() + } + } + + second-phase { + expression: random + } + } +``` + +As you can see, we have defined a `collect-training-data` rank profile that inherits from the `base-features` rank profile. This rank profile will collect the match features we defined in the `match-features` section. + +The `random` expression in the `second-phase` allows us to collect random hits, and is necessary for our data collection script. See the [docstring of pyvespa class VespaFeatureCollector](https://vespa-engine.github.io/pyvespa/api/vespa/evaluation.html#vespa.evaluation.VespaFeatureCollector) that is used in the script for details. + +As you can see, we rely on the `bm25` and different vector similarity features (both document-level and chunk-level) for the first-phase ranking. These are relatively cheap to calculate, and will likely provide good enough ranking signals for the first-phase ranking. + +Running the command below will save a .csv-file with the collected features, which can be used to train a ranking model for the first-phase ranking. + +```bash +$ python eval/collect_pyvespa.py --collect_matchfeatures +``` + +Our output file looks like this: + +| query_id | doc_id | relevance_label | relevance_score | match_avg_top_3_chunk_sim_scores | match_avg_top_3_chunk_text_scores | match_bm25(chunks) | match_bm25(title) | match_max_chunk_sim_scores | match_max_chunk_text_scores | +| :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | +| alex_q_01 | 50 | 1 | 0.660597 | 0.248329 | 8.444725 | 7.717984 | 0. | 0.268457 | 8.444725 | +| alex_q_01 | 82 | 1 | 0.649638 | 0.225300 | 12.327676 | 18.611592 | 2.453409 | 0.258905 | 15.644889 | +| alex_q_01 | 1 | 1 | 0.245849 | 0.358027 | 15.100841 | 23.010389 | 4.333828 | 0.391143 | 20.582403 | +| alex_q_01 | 28 | 0 | 0.988250 | 0.278074 | 0.179929 | 0.197420 | 0. | 0.278074 | 0.179929 | +| alex_q_01 | 23 | 0 | 0.968268 | 0.203709 | 0.182603 | 0.196956 | 0. | 0.203709 | 0.182603 | + +Note that the `relevance_score` in this table is just the random expression we used in the `second-phase` of the `collect-training-data` rank profile, and will be dropped before training the model. + + +### Training a first-phase ranking model + +As you recall, a first-phase ranking expression must be cheap to evaluate. This most often means a heuristic handwritten combination of match features, or a linear model trained on match features. + +We will demonstrate how to train a simple Logistic Regression model to predict relevance based on the collected match features. The full training script can be found in the [sample-apps repository](https://github.com/vespa-engine/sample-apps/blob/master/rag-blueprint/eval/train_logistic_regression.py). + +Some "gotchas" to be aware of: + +- We sample an equal number of relevant and random documents for each query, to avoid class imbalance. +- We make sure that we drop `query_id` and `doc_id` columns before training. +- We apply standard scaling to the features before training the model. We apply the inverse transform to the model coefficients after training, so that we can use them in Vespa. +- We do 5-fold stratified cross-validation to evaluate the model performance, ensuring that each fold has a balanced number of relevant and random documents. +- We also make sure to have an unseen set of test queries to evaluate the model on, to avoid overfitting. + +Run the training [script](https://github.com/vespa-engine/sample-apps/blob/master/rag-blueprint/eval/train_logistic_regression.py) + +```bash +$ python eval/train_logistic_regression.py +``` + +Expect output like this: + +```txt expandable +------------------------------------------------------------ + Cross-Validation Results (5-Fold, Standardized) +------------------------------------------------------------ +Metric | Mean | Std Dev +------------------------------------------------------------ +Accuracy | 0.9024 | 0.0294 +Precision | 0.9236 | 0.0384 +Recall | 0.8818 | 0.0984 +F1-Score | 0.8970 | 0.0415 +Log Loss | 0.2074 | 0.0353 +ROC AUC | 0.9749 | 0.0103 +Avg Precision | 0.9764 | 0.0117 +------------------------------------------------------------ +Transformed Coefficients (for original unscaled features): +-------------------------------------------------- +avg_top_3_chunk_sim_scores : 13.383840 +avg_top_3_chunk_text_scores : 0.203145 +bm25(chunks) : 0.159914 +bm25(title) : 0.191867 +max_chunk_sim_scores : 10.067169 +max_chunk_text_scores : 0.153392 +Intercept : -7.798639 +-------------------------------------------------- +``` + +Which seems quite good. With such a small dataset however, it is easy to overfit. Let us evaluate on the unseen test queries to see how well the model generalizes. + +First, we need to add the learned coefficients as inputs to a new rank profile in our schema, so that we can use them in Vespa. + +```txt expandable +rank-profile learned-linear inherits collect-training-data { + match-features: + inputs { + query(embedding) tensor(x[96]) + query(float_embedding) tensor(x[768]) + query(intercept) double + query(avg_top_3_chunk_sim_scores_param) double + query(avg_top_3_chunk_text_scores_param) double + query(bm25_chunks_param) double + query(bm25_title_param) double + query(max_chunk_sim_scores_param) double + query(max_chunk_text_scores_param) double + } + first-phase { + expression { + query(intercept) + + query(avg_top_3_chunk_sim_scores_param) * avg_top_3_chunk_sim_scores() + + query(avg_top_3_chunk_text_scores_param) * avg_top_3_chunk_text_scores() + + query(bm25_title_param) * bm25(title) + + query(bm25_chunks_param) * bm25(chunks) + + query(max_chunk_sim_scores_param) * max_chunk_sim_scores() + + query(max_chunk_text_scores_param) * max_chunk_text_scores() + } + } + summary-features { + top_3_chunk_sim_scores + } + + } +``` + +To allow for changing the parameters without redeploying the application, we will also add the values of the coefficients as query parameters to a new query profile. + +```xml expandable + + doc + embed(@query) + embed(@query) + -7.798639 + 13.383840 + 0.203145 + 0.159914 + 0.191867 + 10.067169 + 0.153392 + + select * + from %{schema} + where default contains text(@query) or + ({label:"title_label", targetHits:100}nearestNeighbor(title_embedding, embedding)) or + ({label:"chunks_label", targetHits:100}nearestNeighbor(chunk_embeddings, embedding)) + + 10 + learned-linear + top_3_chunks + +``` + + +### Evaluating first-phase ranking + +Now we are ready to evaluate our first-phase ranking function. We can use the [VespaEvaluator](https://vespa-engine.github.io/pyvespa/evaluating-vespa-application-cloud.html#vespaevaluator) from the [pyvespa](https://vespa-engine.github.io/pyvespa/) library to evaluate the first-phase ranking function. + +Run the following command to run the [evaluation script](https://github.com/vespa-engine/sample-apps/blob/master/rag-blueprint/eval/evaluate_ranking.py) + +```bash +$ python eval/evaluate_ranking.py +``` + +We run the evaluation script on a set of unseen test queries, and get the following output: + +```json expandable +{ + "accuracy@1": 1.0, + "accuracy@3": 1.0, + "accuracy@5": 1.0, + "accuracy@10": 1.0, + "precision@10": 0.235, + "recall@10": 0.9405, + "precision@20": 0.13, + "recall@20": 0.9955, + "mrr@10": 1.0, + "ndcg@10": 0.8902, + "map@100": 0.8197, + "searchtime_avg": 0.017, + "searchtime_q50": 0.0165, + "searchtime_q90": 0.0251, + "searchtime_q95": 0.0267 +} +``` + +For the first phase ranking, we care most about recall, as we just want to make sure that the candidate documents are ranked high enough to be included in the second-phase ranking. The number of documents to be reranked in second-phase in total over all content nodes is controlled by the `total-rerank-count` parameter. + +We can see that our results are already very good. This is of course due to the fact that we have a small,synthetic dataset. In reality, you should align the metric expectations with your dataset and test queries. + +We can also see that our search time is quite fast, with an average of 22ms. You should consider whether this is well within your latency budget, as you want some headroom for second-phase ranking. + + +## Second-phase ranking + +For the second-phase ranking, we can afford to use a more expensive ranking expression, since we will only run it on the top-k documents from the first-phase ranking (decided by the `total-rerank-count` parameter). + +This is where we can significantly improve ranking quality by using more sophisticated models and features that would be too expensive to compute for all matched documents. + + +### Collecting features for second-phase ranking + +For second-phase ranking, we request Vespa's default set of rank features, which includes a comprehensive set of text features. See the [rank features documentation](/en/reference/ranking/rank-features) for complete details. + +We can collect both match features and rank features by running the same [script](https://github.com/vespa-engine/sample-apps/blob/master/rag-blueprint/eval/collect_pyvespa.py) as we did for first-phase ranking, with some additional parameters to collect rank features as well: + +```bash +$ python eval/collect_pyvespa.py --collect_rankfeatures --collect_matchfeatures --collector_name rankfeatures-secondphase +``` + +This collects approximately 194 features, providing a rich feature set for training more sophisticated ranking models. + + +### Training a GBDT model for second-phase ranking + +With the expanded feature set, we can train a Gradient Boosted Decision Tree (GBDT) model to predict document relevance. We use [LightGBM](/en/ranking/lightgbm) for this purpose. + +Vespa also supports [XGBoost](/en/ranking/xgboost) and [ONNX](/en/ranking/onnx) models. + +To train the model, run the following command ([link to training script](https://github.com/vespa-engine/sample-apps/blob/master/rag-blueprint/eval/train_lightgbm.py)): + +```bash +$ python eval/train_lightgbm.py --input_file eval/output/Vespa-training-data_match_rank_second_phase_20250623_135819.csv +``` + +The training process includes several important considerations: + +- **Cross-validation**: We use 5-fold stratified cross-validation to evaluate model performance and prevent overfitting +- **Hyperparameter tuning**: We set conservative hyperparameters to prevent growing overly large and deep trees, especially important for smaller datasets +- **Feature selection**: Features with zero importance during cross-validation are excluded from the final model +- **Early stopping**: Training stops when validation scores don't improve for 50 rounds + +Example training output: + +```txt +------------------------------------------------------------ + Cross-Validation Results (5-Fold) +------------------------------------------------------------ +Metric | Mean | Std Dev +------------------------------------------------------------ +Accuracy | 0.9214 | 0.0664 +ROC AUC | 0.9863 | 0.0197 +------------------------------------------------------------ +Overall CV AUC: 0.9249 • ACC: 0.9216 +------------------------------------------------------------ +``` + + +### Feature importance analysis + +The trained model reveals which features are most important for ranking quality. For our sample application, the top features include: + +| Feature | Importance | +| :--- | :--- | +| nativeProximity | 168.8498 | +| firstPhase | 151.7382 | +| max_chunk_sim_scores | 69.4377 | +| avg_top_3_chunk_text_scores | 56.5079 | +| avg_top_3_chunk_sim_scores | 31.8700 | +| nativeRank | 20.0716 | +| nativeFieldMatch | 15.9914 | +| elementSimilarity(chunks) | 9.7003 | + +Key observations: + +- **Text proximity features** ([nativeProximity](/en/reference/ranking/nativerank#nativeProximity)) are highly valuable for understanding query-document relevance +- **First-phase score** (`firstPhase`) being important validates that our first-phase ranking provides a good foundation +- **Chunk-level features** (both text and semantic) contribute significantly to ranking quality +- **Traditional text features** like [nativeRank](/en/reference/ranking/nativerank#nativeRank) and [bm25](/en/ranking/bm25#ranking-function) remain important + + +### Integrating the GBDT model into Vespa + +The trained LightGBM model is exported and added to your Vespa application package: + +```txt +app/ +├── models/ +│ └── lightgbm_model.json +``` + +Create a new rank profile that uses this model: + +```txt +rank-profile second-with-gbdt inherits collect-training-data { + ... + + second-phase { + expression: lightgbm("lightgbm_model.json") + } + + ... +} +``` + +And redeploy your application: + +```bash +$ vespa deploy +``` + + +### Evaluating second-phase ranking performance + +Run the [evaluate_ranking.py](https://github.com/vespa-engine/sample-apps/blob/master/rag-blueprint/eval/evaluate_ranking.py) script to evaluate the GBDT-powered second-phase ranking on unseen test queries: + +```bash +$ python evaluate_ranking.py --second_phase +``` + +Expected results should show something like this: + +```json expandable +{ + "accuracy@1": 0.9, + "accuracy@3": 1.0, + "accuracy@5": 1.0, + "accuracy@10": 1.0, + "precision@10": 0.235, + "recall@10": 0.9402, + "precision@20": 0.13, + "recall@20": 0.9955, + "mrr@10": 0.95, + "ndcg@10": 0.8782, + "map@100": 0.8091, + "searchtime_avg": 0.0204, + "searchtime_q50": 0.018, + "searchtime_q90": 0.0333, + "searchtime_q95": 0.0362 +} +``` + +For a larger dataset, we would expect to see significant improvement over first-phase ranking. Since our first-phase ranking is already quite good, we can not see this here, but we will leave the comparison code for you to run on a real-world dataset. + +We also observe a slight increase in search time (from 22ms to 35ms average), which is expected due to the additional complexity of the GBDT model. + + +### Query profiles with GBDT ranking + +Create new query profiles that leverage the improved ranking: + +```xml + + second-with-gbdt + 20 + + + + 50 + openai + sse + +``` + +Test the improved ranking: + +```bash +$ vespa query query="what are key points learned for finetuning llms?" queryProfile=hybrid-with-gbdt +``` + +For RAG applications with LLM generation: + +```bash +$ vespa query \ + --timeout 60 \ + query="what are key points learned for finetuning llms?" \ + queryProfile=rag-with-gbdt +``` + + +### Best practices for second-phase ranking + +**Model complexity considerations:** + +- Use more sophisticated models (GBDT, neural networks) that would be too expensive for first-phase +- Take advantage of the reduced candidate set (typically 100-10,000 documents) +- Include expensive text features like `nativeProximity` and `fieldMatch` + +**Feature engineering:** + +- Combine first-phase scores with additional text and semantic features +- Use chunk-level aggregations (max, average, top-k) to capture document structure +- Include metadata signals + +**Training data quality:** + +- Use the first-phase ranking to generate better training data +- Consider having LLMs generate relevance judgments for top-k results +- Iteratively improve with user interaction data when available + +**Performance monitoring:** + +- Monitor latency impact of second-phase ranking +- Adjust `total-rerank-count` based on quality vs. performance trade-offs +- Consider using different models for different query types or use cases + +The second-phase ranking represents a crucial step in building high-quality RAG applications, providing the precision needed for effective LLM context while maintaining reasonable query latencies. + + +## (Optional) Global-phase ranking + +We also have the option of configuring [global-phase](/en/reference/schemas/schemas#globalphase-rank) ranking, which can rerank the top k (as set by `total-rerank-count` parameter) documents from the second-phase ranking. + +Common options for global-phase are [cross-encoders](/en/ranking/cross-encoders) or another GBDT model, trained for better separating top ranked documents on objectives such as [LambdaMart](https://xgboost.readthedocs.io/en/latest/tutorials/learning_to_rank.html). For RAG applications, we consider this less important than for search applications where the results are mainly consumed by an human, as LLMs don't care that much about the ordering of the results. + + +## Further improvements + +Finally, we will sketch out some opportunities for further improvements. As you have seen, we started out with only binary relevance labels for a few queries, and trained a model based on the relevant docs and a set of random documents. + +As you may have noted, we have not discussed what most people think about when discussing RAG evals, evaluating the "Generation"-step. There are several tools available to do this, for example [ragas](https://docs.ragas.io/en/stable/) and [ARES](https://github.com/stanford-futuredata/ARES). We refer to other sources for details on this, as this tutorial is probably enough to digest as it is. + +This was useful initially, as we had no better way to retrieve the candidate documents. Now, that we have a reasonably good second-phase ranking, we could potentially generate a new set of relevance labels for queries that we did not have labels for by having an LLM do relevance judgments of the top k returned hits. This training dataset would likely be even better in separating the top documents. + + +## Summary + +In this tutorial, we have built a complete RAG application using Vespa, providing our recommendations for how to approach both retrieval phase with binary vectors and text matching, first-phase ranking with a linear combination of relatively cheap features to a more sophisticated second-phase ranking system with more expensive features and a GBDT model. + +We hope that this tutorial, along with the provided code in our [sample-apps repository](https://github.com/vespa-engine/sample-apps/tree/master/rag-blueprint), will serve as a useful reference for building your own RAG applications, with an evaluation-driven approach. + +By using the principles demonstrated in this tutorial, you are empowered to build high-quality RAG applications that can scale to any dataset size, and any query load. + + +## FAQ + + + +We love ColBERT, and it has shown great performance. We do support ColBERT-style models in Vespa. The challenge is the added cost in memory storage, especially for large-scale applications. If you use it, we recommend consider binarizing the vectors to reduce memory usage 32x compared to float. If you want to improve the ranking quality and accept the additional cost, we encourage you to evaluate and try. Here are some resources if you want to learn more about using ColBERT with Vespa: + +- [Announcing ColBERT embedder](https://blog.vespa.ai/announcing-colbert-embedder-in-vespa/#what-is-colbert?) +- [Long context ColBERT](https://blog.vespa.ai/announcing-long-context-colbert-in-vespa/) +- [Long context ColBERT sample app](https://github.com/vespa-engine/sample-apps/tree/master/colbert-long/#vespa-sample-applications---long-context-colbert) +- [ColBERT sample app](https://github.com/vespa-engine/sample-apps/tree/master/colbert) +- [ColBERT embedder reference](/en/rag/embedding#colbert-embedder) +- [ColBERT standalone python example notebook](https://vespa-engine.github.io/pyvespa/examples/colbert_standalone_Vespa-cloud.html) +- [ColBERT standalone long context example notebook](https://vespa-engine.github.io/pyvespa/examples/colbert_standalone_long_context_Vespa-cloud.html) + + + +Vespa supports a variety of embedding models. For a list of vespa provided models on Vespa Cloud, see [Model hub](/en/rag/model-hub). See also [embedding reference](/en/rag/embedding#provided-embedders) for how to use embedders. You can also use private models (gated by authentication with Bearer token from Vespa Cloud secret store). + + + +No, you are free to use Vespa as a search engine. We provide the option of calling out to LLMs from within a Vespa application for reduced latency compared to sending large search results sets several times over network as well as the option to deploy Local LLMs, optionally in your own infrastructure if you prefer. See [Vespa Cloud Enclave](/en/operations/enclave/enclave) + + + +Binary vectors takes up a lot less memory and are faster to compute distances on, with only a slight reduction in quality. See blog [post](https://blog.vespa.ai/combining-matryoshka-with-binary-quantization-using-embedder/) for details. + + + +Vespa can scale both the stateless container nodes and content nodes of your application. See [overview](../overview) and [elasticity](/en/content/elasticity) for details. + + diff --git a/mintlify-docs/en/learn/tutorials/text-search-ml.mdx b/mintlify-docs/en/learn/tutorials/text-search-ml.mdx new file mode 100644 index 0000000000..fdb451b603 --- /dev/null +++ b/mintlify-docs/en/learn/tutorials/text-search-ml.mdx @@ -0,0 +1,333 @@ +--- +title: "Improving Text Search through ML" +--- + +At this point, we assume you have read our [Text Search Tutorial](/en/learn/tutorials/text-search) and accomplished the following steps. + +- Created and deployed a basic text search app in Vespa. +- Fed the app with the MS MARCO full document dataset. +- Compared and evaluated two different ranking functions. + +We are now going to show you how to create a dataset that joins relevance information from the MS MARCO dataset with ranking features from Vespa to enable you to train ML models to improve your application. More specifically, you will accomplish the following steps in this tutorial. + +- Learn how to collect rank feature data from Vespa associated with a specific query. +- Create a dataset that can be used to improve your app's ranking function. +- Propose sanity-checks to help you detect bugs in your data collection logic and ensure you have a properly built dataset at the end of the process. +- Illustrate the importance of going beyond pointwise loss functions when dealing with Learning To Rank (LTR) tasks. + +[Vespa Product Ranking](https://github.com/vespa-engine/sample-apps/tree/master/commerce-product-ranking) is a good resource for Learning To Rank using XGBoost and LightGBM, with linked blog posts. + + +## Collect rank feature data from Vespa + +Vespa's [rank feature set](/en/reference/ranking/rank-features) contains a large set of low and high level features. Those features are useful to understand the behavior of your app and to improve your ranking function. + + +### Default rank features + +To access the default set of ranking features, set the query parameter [`ranking.listFeatures`](/en/reference/api/query#ranking.listfeatures) to `true`. For example, below is the body of a post request that in a [query](/en/querying/query-language), selects the `bm25` rank-profile developed in the previous tutorial and returns the rank features associated with each of the results returned. + +```bash +$ vespa query \ + 'yql=select id,rankfeatures from msmarco where userQuery()' \ + 'query=what is dad bod' \ + 'ranking=bm25' \ + 'type=weakAnd' \ + 'ranking.listFeatures=true' +``` + +The list of rank features that are returned by default can change in the future - the current list can be checked in the [system test](https://github.com/vespa-engine/system-test/blob/master/tests/search/rankfeatures/dump.txt). For the request specified by the body above we get the following (edited) json back. Each result will contain a field called `rankfeatures` containing the set of default ranking features: + +```json expandable +{ + "root": { + "children": [ + ... + { + "fields": { + "rankfeatures": { + ... + "attributeMatch(id).totalWeight": 0.0, + "attributeMatch(id).weight": 0.0, + "elementCompleteness(body).completeness": 0.5051413881748072, + "elementCompleteness(body).elementWeight": 1.0, + "elementCompleteness(body).fieldCompleteness": 0.010282776349614395, + "elementCompleteness(body).queryCompleteness": 1.0, + "elementCompleteness(title).completeness": 0.75, + "elementCompleteness(title).elementWeight": 1.0, + "elementCompleteness(title).fieldCompleteness": 1.0, + "elementCompleteness(title).queryCompleteness": 0.5, + "fieldMatch(body)": 0.7529285549778888, + "fieldMatch(body).absoluteOccurrence": 0.065, + ... + } + }, + "id": "index:msmarco/0/811ccbaf9796f92bfa343045", + "relevance": 37.7705101001455, + "source": "msmarco" + }, + ], + ... + } +} +``` + + +### Chose and process specific rank features + +If instead of returning the complete set of rank features you want to select [specific ones](/en/reference/ranking/rank-features), you can add a new rank-profile (let's call it `collect_rank_features`) to our *msmarco.sd* schema definition and disable the default ranking features by adding `ignore-default-rank-features` to the new rank-profile. In addition, we can specify the desired features within the `rank-features` element. In the example below we explicitly configured Vespa to only return `bm25(title)`, `bm25(body)`, `nativeRank(title)` and `nativeRank(body)`. + +Note that using *all* available rank features comes with computational cost, as Vespa needs to calculate all these features. Using many features is usually only advisable using second phase ranking, see [phased ranking with Vespa](/en/ranking/phased-ranking). + +```sd expandable +schema msmarco { + document msmarco { + field id type string { + indexing: attribute | summary + } + field title type string { + indexing: index | summary + index: enable-bm25 + } + field url type string { + indexing: index | summary + } + field body type string { + indexing: index + index: enable-bm25 + } + } + + document-summary minimal { + summary id { } + } + + fieldset default { + fields: title, body, url + } + + rank-profile default { + first-phase { + expression: nativeRank(title, body, url) + } + } + + rank-profile bm25 inherits default { + first-phase { + expression: bm25(title) + bm25(body) + bm25(url) + } + } + + rank-profile collect_rank_features inherits default { + first-phase { + expression: bm25(title) + bm25(body) + bm25(url) + } + second-phase { + expression: random + } + + match-features { + bm25(title) + bm25(body) + bm25(url) + nativeRank(title) + nativeRank(body) + nativeRank(url) + } + } +} +``` + + +``` +Paste the above into file text-search/app/schemas/msmarco.sd +``` + +The [random](/en/reference/ranking/rank-features#random) global feature will be useful in the next section when we describe our data collection process. + +After adding the `collect_rank_features` rank-profile to *msmarco.sd*, redeploy the app: + +```bash +$ vespa deploy --wait 300 app +``` + + +## Create a training dataset + +The [MS MARCO](https://microsoft.github.io/msmarco/) dataset described in [the previous tutorial](/en/learn/tutorials/text-search) provides us with more than 300 000 training queries, each of which is associated with a specific document ID that is relevant to the query. In this section we want to combine the information contained in the pairs `(query, relevant_id)` with the information available in the Vespa ranking features to create a dataset that can be used to train ML models to improve the ranking function of our msmarco text app. + +Before we move on to describe the collection process in detail, we want to point out that the whole process can be replicated by the following call to the data collection script `collect_training_data.py` available in [this tutorial repository](https://github.com/vespa-engine/sample-apps/tree/master/text-search): + +The following routine requires that you have downloaded the full dataset. + +```bash +$ ./src/python/collect_training_data.py msmarco collect_rank_features 99 +``` + +The command above use data contained in the query (msmarco-doctrain-queries.tsv.gz) and in the relevance (msmarco-doctrain-qrels.tsv.gz) files that are part of the MSMARCO dataset, and send queries to Vespa using the `collect_rank_features` rank-profile defined in the previous section in order to request `99` randomly selected documents for each query in addition to the relevant document associated with the query. All the data from the request are then parsed and stored in the output folder, which is chosen to be `data` in this case. + + +### Data collection logic + +Since we want to improve the first-phase ranking function of our application, our goal here is to create a dataset that will be used to train models that will generalize well when used in the first-phase ranking of an actual Vespa instance running against possibly unseen queries and documents. This might be obvious at first but turns out to be easy to neglect when making some data collection decisions. + +The logic behind the `collect_training_data.py` can be summarized by the pseudo-code below: + +```python +hits = get_relevant_hit(query, rank_profile, relevant_id) +if relevant_hit: + hits.extend(get_random_hits(query, rank_profile, number_random_sample)) + data = annotate_data(hits, query_id, relevant_id) + append_data(file, data) +``` + +For each query, we first send a request to Vespa to get the relevant document associated with the query. If the relevant document is matched by the query, Vespa will return it, and we will expand the number of documents associated with the query by sending a second request to Vespa. The second request asks Vespa to return a number of random documents sampled from the set of documents that were matched by the query. We then parse the hits returned by Vespa and organize the data into a tabular form containing the rank features and the binary variable indicating if the query-document pair is relevant or not. + +We are only interested in collecting documents that are matched by the query because those are the documents that would be presented to the first-phase model in a production environment. This means that we will likely leave some queries that contain information about relevant documents out of the collected dataset, but it will create a dataset that are closer to our stated goal. In other words, the dataset we collect is conditional on our match criteria. + + +### Get relevant hit + +The first Vespa request is contained in the function call `get_relevant_hit(query, rank_profile, relevant_id)` where the `query` parameter contains the desired query string, `rank_profile` is set to the `collect_rank_features` defined earlier and `relevant_id` is the document ID that is said to be relevant to that specific query. + +The body of the request is given by: + +```python +body = { + "yql": "select id, rankfeatures from sources * where userQuery()", + "query": query, + "hits": 1, + "recall": "+id:" + str(relevant_id), + "ranking": {"profile": rank_profile, "listFeatures": "true"}, +} +``` + +where the `yql` and `userQuery` parameters instruct Vespa to return the *id* of the documents along with the selected rank-features defined in the `collect_rank_features` rank-profile. The `hits` parameter is set to 1 because we know there are only one relevant id for each query, so we set Vespa to return only one document in the result set. The `recall` parameter allow us to specify the exact document *id* we want to retrieve. + +Note that the parameter `recall` only works if the document is matched by the query, which is exactly the behavior we want in this case. + +The `recall` syntax to retrieve one document with id equal to 1 is given by `"recall": "+id:1"` and the syntax to retrieve more than one document, say documents with ids 1 and 2 is given by `"recall": "+(id:1 id:2)"`. + +If we wanted to retrieve the document even if it did not match the query specification we could alter the query to use the following query specification: + +```python +body = { + "yql": "select id, rankfeatures from sources * where true or userQuery()", + "query": query, + "hits": 1, + "recall": "+id:" + str(relevant_id), + "ranking": {"profile": rank_profile, "listFeatures": "true"}, +} +``` + + +### Get random hits + +The second Vespa request happens when we want to extend the dataset by adding randomly selected documents from the matched set. The request is contained in the function call `get_random_hits(query, rank_profile, number_random_sample)` where the only new parameter is `number_random_sample`, which specify how many documents we should sample from the matched set. + +The body of the request is given by: + +```python +body = { + "yql": "select id, rankfeatures from sources * where default contains text(@userQuery)", + "userQuery": query, + "hits": number_random_sample, + "ranking": {"profile": collect_features, "listFeatures": "true"}, +} +``` + +where the only changes with respect to the `get_relevant_hit` is that we no longer need to use the `recall` parameter and that we set the number of hits returned by Vespa to be equal to `number_random_sample`. + +Remember we had configured the second phase to use random scoring: + +```sd +second-phase { + expression: random +} +``` + +Using `random` as our second-phase ranking function ensures that the top documents returned by Vespa are randomly selected from the set of documents that were matched by the query. + + +### Annotated data + +Once we have both the relevant and the random documents associated with a given query, we parse the Vespa result and store it in a file with the following format: + +| bm25(body) | bm25(title) | nativeRank(body) | nativeRank(title) | docid | qid | relevant | +| --- | --- | --- | --- | --- | --- | --- | +| 25.792076 | 12.117309 | 0.322567 | 0.084239 | D312959 | 3 | 1 | +| 22.191228 | 0.043899 | 0.247145 | 0.017715 | D3162299 | 3 | 0 | +| 13.880625 | 0.098052 | 0.219413 | 0.036826 | D2823827 | 3 | 0 | + +where the values in the `relevant` column are equal to 1 if document `docid` is relevant to the query `qid` and zero otherwise. + + +## Data collection sanity check + +In the process of writing this tutorial and creating the data collection logic described above, we found it useful to develop a data collection sanity-check to help us catch bugs in our process. There is no unique right answer here, but our proposal is to use the dataset to train a model using the same features and functional form used by the baseline you want to improve upon. If the dataset is well-built and contains useful information about the task you are interested in, you should be able to get results at least as good as the one obtained by your baseline on a separate test set. + +In our case, the baseline is the ranking function used in [our previous tutorial](/en/learn/tutorials/text-search): + +```sd +rank-profile bm25 inherits default { + first-phase { + expression: bm25(title) + bm25(body) + } +} +``` + +Therefore, our sanity-check model will be a linear model containing only the two features above, i.e. `a + b * bm25(title) + c * bm25(body)`, where `a`, `b` and `c` should be learned by using our collected dataset. + +We split our dataset into training and validation sets, train the linear model and evaluate it on the validation dataset. We then expect the difference observed in the collected validation set between the model and the baseline to be similar to the difference observed on a running instance of Vespa when applied to an independent test set. In addition, we expect that the trained model to do at least as good as the baseline on a test set, given that the baseline model is contained in the set of possible trained models and is recovered when `a=0`, `b=1` and `c=1`. + +This is a simple procedure, but it did catch some bugs while we were writing this tutorial. For example, at one point we forgot to include + +```sd +first-phase { + expression: random +} +``` + +in the `collect_rank_features` rank-profile leading to a biased dataset where the negative examples were actually quite relevant to the query. The trained model did well on the validation set, but failed miserably on the test set when deployed to Vespa. This showed us that our dataset probably had a different distribution than what was observed on a running Vespa instance and led us to investigate and catch the bug. + + +## Beyond pointwise loss functions + +The most straightforward way to train the linear model mentioned in the previous section would be to use a vanilla logistic regression, since our target variable `relevant` is binary. The most commonly used loss function in this case (binary cross-entropy) is referred to as a pointwise loss function in the LTR literature, as it does not take the relative order of documents into account. However, as we described in [the previous tutorial](/en/learn/tutorials/text-search), the metric that we want to optimize in this case is the Mean Reciprocal Rank (MRR). The MRR is affected by the relative order of the relevance we assign to the list of documents generated by a query and not by their absolute magnitudes. This disconnect between the characteristics of the loss function and the metric of interest might lead to suboptimal results. + +For ranking search results, it is preferable to use a listwise loss function when training our linear model, which takes the entire ranked list into consideration when updating the model parameters. To illustrate this, we trained linear models using the [TF-Ranking framework](https://github.com/tensorflow/ranking). The framework is built on top of TensorFlow and allow us to specify pointwise, pairwise and listwise loss functions, among other things. + +The two *rank-profile*'s below are obtained by training the linear model with a pointwise (sigmoid cross-entropy) and listwise (softmax cross-entropy) loss functions, respectively: + +```sd +rank-profile pointwise_linear_bm25 inherits default { + first-phase { + expression: 0.22499913 * bm25(title) + 0.07596389 * bm25(body) + } +} + +rank-profile listwise_linear_bm25 inherits default { + first-phase { + expression: 0.13446581 * bm25(title) + 0.5716889 * bm25(body) + } +} +``` + +It is interesting to see that a pointwise loss function set more weight into the title in relation to the body while the opposite happens when using the listwise loss function. + +The figure below shows how frequently (over more than 5.000 test queries) those two ranking functions allocate the relevant document between the 1st and 10th position of the list of documents returned by Vespa. Although there is not a huge difference between those models on average, we can clearly see in the figure below that a model based on a listwise loss function allocate more documents in the first two positions of the ranked list when compared to the pointwise model: + + +![Plot of pointwise and listwise BM25](/assets/img/tutorials/text_search_baseline_pointwise_listwise_rr.png) + + +Overall, on average, there is not much difference between those models (with respect to MRR), which was expected given the simplicity of the models described here. The point was simply to point out the importance of choosing better loss functions when dealing with LTR tasks and to give a quick start for those who want to give it a shot in their own applications. We expect the difference in MRR between pointwise and listwise loss functions to increase as we move on to more complex models. + + +## Next steps + +In this tutorial we have looked at using a simple *linear* ranking function. Vespa integrates with several popular machine learning libraries which can be used for Machine Learned Ranking: + +- [Ranking with XGBoost Models](/en/ranking/xgboost) +- [Ranking with LightGBM Models](/en/ranking/lightgbm) +- [Ranking with ONNX Models](/en/ranking/onnx) diff --git a/mintlify-docs/en/learn/tutorials/text-search.mdx b/mintlify-docs/en/learn/tutorials/text-search.mdx new file mode 100644 index 0000000000..7d71b2a5b6 --- /dev/null +++ b/mintlify-docs/en/learn/tutorials/text-search.mdx @@ -0,0 +1,692 @@ +--- +title: "Text Search Tutorial" +--- + +This tutorial will guide you through setting up a simple text search application. At the end, you can index text documents in Vespa and search them via text queries. The application built here will be the foundation for other tutorials, such as creating ranking functions based on Machine Learning (ML) models. + +The main goal is to set up a text search app based on simple text scoring features such as [BM25](/en/ranking/bm25) [^1] and [nativeRank](/en/reference/ranking/nativerank). + + +**Prerequisites:** + +- Linux, macOS or Windows 10 Pro on x86_64 or arm64, with [Podman Desktop](https://podman.io/) or [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed, with an engine running. + - Alternatively, start the Podman daemon: + + ```bash + $ podman machine init --memory 6000 + $ podman machine start + ``` + +- See [Docker Containers](/en/operations/self-managed/docker-containers) for system limits and other settings. +- For CPUs older than Haswell (2013), see [CPU Support](/en/operations/self-managed/cpu-support). +- Memory: Minimum 4 GB RAM dedicated to Docker/Podman. [Memory recommendations](/en/operations/self-managed/node-setup#memory-settings). +- Disk: Avoid `NO_SPACE` - the vespaengine/vespa container image + headroom for data requires disk space. [Read more](/en/writing/feed-block). +- [Homebrew](https://brew.sh/) to install the [Vespa CLI](/en/clients/vespa-cli), or download the Vespa CLI from [Github releases](https://github.com/vespa-engine/vespa/releases). +- Python3 +- `curl` + + + +## Installing vespa-cli + +This tutorial uses [Vespa-CLI](/en/clients/vespa-cli) to deploy, feed and query Vespa. Below, we use [HomeBrew](https://brew.sh/) to download and install `vespa-cli`, you can also download a binary from [GitHub](https://github.com/vespa-engine/vespa/releases) for your OS/CPU architecture. + +```bash +$ brew install vespa-cli +``` + +We acquire the scripts to follow this tutorial from the [sample-apps repository](https://github.com/vespa-engine/sample-apps/tree/master/text-search) via `vespa clone`. + +```bash +$ vespa clone text-search text-search && cd text-search +``` + +The repository contains a fully-fledged Vespa application, but below, we will build it all from scratch for educational purposes. + + +## Dataset + +We use a dataset called [MS MARCO](https://microsoft.github.io/msmarco/) throughout this tutorial. MS MARCO is a collection of large-scale datasets released by Microsoft with the intent of helping the advance of deep learning research related to search. Many tasks are associated with MS MARCO datasets, but we want to build an end-to-end search application that returns relevant documents to a text query. We have included a small dataset sample for this tutorial under the `ext/sample` directory, which contains around 1000 documents. + +The sample data must be converted to Vespa [JSON feed format](/en/reference/schemas/document-json-format). The following step includes extracting documents, queries and relevance judgments from the sample files: + +```bash +$ ./scripts/convert-msmarco.sh +``` + +After running the script, we end up with a file `dataset/documents.jsonl` containing lines such as the one below: + +```json +{ + "put": "id:msmarco:msmarco::D1555982", + "fields": { + "id": "D1555982", + "url": "https://answers.yahoo.com/question/index?qid=20071007114826AAwCFvR", + "title": "The hot glowing surfaces of stars emit energy in the form of electromagnetic radiation", + "body": "Science Mathematics Physics The hot glowing surfaces of stars emit energy in the form of electromagnetic radiation ... " + } +} +``` + +In addition to `vespa.json` we also have a `test-queries.tsv` file containing a list of the sampled queries along with the document ID relevant to each particular query. + + +## Create a Vespa Application Package + +A [Vespa application package](/en/basics/applications) is a set of configuration files and optional Java components that together define the behavior of a Vespa system. Let us define the minimum set of required files to create our basic text search application: `msmarco.sd` and `services.xml`. + +For this tutorial, we will create a new Vespa application rather than using the one in the repository, so we will create a directory for this application: + +```bash +$ mkdir -p app/schemas +``` + + +### Schema + +A [schema](/en/basics/schemas) is a document-type configuration; a single vespa application can have multiple schemas with document types. For this application, we define a schema `msmarco` which must be saved in a file named `schemas/msmarco.sd`. Write the following to `text-search/app/schemas/msmarco.sd`: + +```sd expandable +schema msmarco { + document msmarco { + field language type string { + indexing: "en" | set_language + } + field id type string { + indexing: attribute | summary + match: word + } + field title type string { + indexing: index | summary + match: text + index: enable-bm25 + } + field body type string { + indexing: index | summary + match: text + index: enable-bm25 + } + field url type string { + indexing: index | summary + index: enable-bm25 + } + } + fieldset default { + fields: title, body, url + } + document-summary minimal { + summary id { } + } + document-summary debug-tokens { + summary url {} + summary url-tokens { + source: url + tokens + } + from-disk + } + rank-profile default { + first-phase { + expression: nativeRank(title, body, url) + } + } + rank-profile bm25 inherits default { + first-phase { + expression: bm25(title) + bm25(body) + bm25(url) + } + } +} +``` + +A lot is going on here; let us go through it in detail. + + +#### Document type and fields + +The `document` section contains the fields of the document, their types, and how Vespa should index and [match](/en/reference/schemas/schemas#match) them. + +The field property `indexing` configures the *indexing pipeline* for a field. For more information, see [schemas - indexing](/en/basics/schemas#document-fields). The [string](/en/reference/schemas/schemas#string) data type is used to represent both unstructured and structured texts, and there are significant differences between [index and attribute](/en/querying/text-matching#index-and-attribute). The above schema includes default `match` modes for `attribute` and `index` property for visibility. + +Note that we are enabling the usage of [BM25](/en/ranking/bm25) for `title`, `body` and `url`. by including `index: enable-bm25`. The language field is the only field not in the msmarco dataset. We hardcode its value to "en" since the dataset is English. Using `set_language` avoids automatic language detection and uses the value when processing the other text fields. Read more in [linguistics](/en/linguistics/linguistics). + + +#### Fieldset for matching across multiple fields + +[Fieldset](/en/reference/schemas/schemas#fieldset) allows searching across multiple fields. Defining `fieldset` does not add indexing/storage overhead. String fields grouped using fieldsets must share the same [match](/en/reference/schemas/schemas#match) and [linguistic processing](/en/linguistics/linguistics) settings because the query processing that searches a field or fieldset uses *one* type of transformation. + + +#### Document summaries to control search response contents + +Next, we define two [document summaries](/en/querying/document-summaries). Document summaries control what fields are available in the [response](/en/reference/querying/default-result-format); we include the `debug-tokens` document-summary to demonstrate later how we can get visibility into how text is converted into searchable tokens. + + +#### Ranking to determine matched documents ordering + +You can define many [rank profiles](/en/basics/ranking), named collections of score calculations, and ranking phases. + +In this tutorial, we define our `default` to be using [nativeRank](/en/reference/ranking/nativerank). In addition, we have a `bm25` rank-profile that uses [bm25](/en/ranking/bm25). Both are examples of text-scoring [rank-features](/en/reference/ranking/rank-features) in Vespa. + + +### Services Specification + +The [services.xml](/en/reference/applications/services/services) defines the services that make up the Vespa application — which services to run and how many nodes per service. Write the following to `text-search/app/services.xml`: + +```xml expandable + + + + + + + + + + + 1 + + + + + + + + + +``` + +``` +Paste the above into file text-search/app/services.xml +``` + +Some notes about the elements above: + +- `` defines the [container cluster](/en/applications/containers) for document, query and result processing +- `` sets up the [query endpoint](/en/querying/query-api). The default port is 8080. +- `` sets up the [document endpoint](/en/reference/api/document-v1) for feeding. +- `` defines how documents are stored and searched +- `` denotes how many copies to keep of each document. +- `` assigns the document types in the *schema* to content clusters — the content cluster capacity can be increased by adding node elements — see [elasticity](/en/content/elasticity). (See also the [reference](/en/reference/applications/services/content) for more on content cluster setup.) +- `` defines the hosts for the content cluster. + + +## Deploy the application package + +Once we have finished writing our application package, we can deploy it. We use settings similar to those in the [Vespa quick start guide](/en/basics/deploy-an-application-local). + +Start the Vespa container: + +```bash +$ docker run --detach --name vespa-msmarco --hostname vespa-msmarco \ + --publish 8080:8080 --publish 19071:19071 \ + vespaengine/vespa +``` + +Notice that we publish two ports (:8080) is the data-plane port where we write and query documents, and 19071 is the control-plane where we can deploy the application. + +Configure the Vespa CLI to use the local container: + +```bash +$ vespa config set target local +``` + +Starting the container can take a short while. Make sure that the configuration service is running by using `vespa status`. + +```bash +$ vespa status deploy --wait 300 +``` + +Now, deploy the Vespa application from the `app` directory: + +```bash +$ vespa deploy --wait 300 app +``` + + +## Feed the data + +The data fed to Vespa must match the document type in the schema. The file `vespa.json` generated by the `convert-msmarco.sh` script described in the [dataset section](#dataset) already has data in the appropriate format expected by Vespa: + +```bash +$ vespa feed -t http://localhost:8080 dataset/documents.jsonl +``` + + +## Querying the data + +This section demonstrates various ways to search the data using the [Vespa query language](/en/querying/query-language). All the examples use the `vespa-cli` client, the tool uses the HTTP api and if you pass `-v`, you will see the `curl` equivalent API request. + +```bash +$ vespa query \ + 'yql=select * from msmarco where default contains text(@user-query)' \ + 'user-query=what is dad bod' \ + 'hits=3' \ + 'language=en' +``` + +This query combines YQL [text()](/en/reference/querying/yql#text), a robust way to combine free text from users or models with application logic. Similar to `set_language` in indexing, we specify the language of the query using the [language](/en/linguistics/linguistics#querying-with-language) API parameter. This ensures symmetric linguistic processing of both the query and the document text. Automatic language detection is inaccurate for short query strings and might lead to asymmetric processing of queries and document texts. + +Following is a partial output of the query above when using the small dataset sample: + +```json expandable +{ + "root": { + "id": "toplevel", + "relevance": 1, + "fields": { + "totalCount": 562 + }, + "children": [ + { + "id": "id:msmarco:msmarco::D2977840", + "relevance": 0.20676669550322158, + "source": "msmarco", + "fields": { + "sddocname": "msmarco", + "body": "After The Cut released a piece explaining what the dad bod is last week the internet pretty much exploded into debate over the trend ", + "documentid": "id:msmarco:msmarco::D2977840", + "id": "D2977840", + "title": "What Is A Dad Bod An Insight Into The Latest Male Body Craze To Sweep The Internet", + "url": "http://www.huffingtonpost.co.uk/2015/05/05/what-is-a-dadbod-male-body_n_7212072.html" + } + } + ] + } +} +``` + +As shown, 562 documents matched the query out of 996 in the corpus. The `first-phase` ranking expression scores all the matching documents. + +A few important observations: + +- We did not specify which fields to search in the query. Vespa will, by default, use a field set or field named `default` when the query terms do not specify a field. In our case: + +```sd +fieldset default { + fields: title, body, url +} +``` + +- Our query for `what is dad bod` searches across all those three fields. +- If we did not specify a `default` fieldset in the schema, the above query would return zero hits as the query did not specify a field. +- The hit `relevance` holds the score computed by the rank profile. Vespa uses `default` by default. In our case: + +```sd +rank-profile default { + first-phase { + expression: nativeRank(title, body, url) + } +} +``` + +We can use query operator annotations for the [text](/en/reference/querying/yql#text) operator to control various matching aspects, for example to set the number of hits to produce in the text operator: + +```bash +$ vespa query \ + 'yql=select * from msmarco where title contains ({targetHits:100}text(@user-query))' \ + 'user-query=what is dad bod' \ + 'hits=3' \ + 'language=en' +``` + +Notice how the query above matches fewer documents `totalCount:116` because we limited the free text query to the title field. We can change the [grammar](/en/reference/querying/yql#grammar) to specify how the user query text is parsed into a query execution plan. In the following example, we use `grammar:"all"` to specify that we only want to retrieve documents where *all* the query terms match the title field. + +```bash +$ vespa query \ + 'yql=select * from msmarco where title contains ({grammar:"all"}text(@user-query))' \ + 'user-query=what is dad bod' \ + 'hits=3' \ + 'language=en' +``` + +This query, using `all`, matches only one document. Notice how the relevance of the hit is the same as in the above example. The difference between the two types of queries is in the matching specification. + +We can use `text` to build a query that searches multiple fields (or fieldsets): + +```bash +$ vespa query \ + 'yql=select * from msmarco where title contains ({grammar:"all"}text(@user-query)) or url contains ({grammar:"all"}text(@user-query))' \ + 'user-query=what is dad bod' \ + 'hits=3' \ + 'language=en' +``` + + +### Boosting by query terms + +Sometimes, we want to add a query time boost if some field matches a query term; the following uses the [rank](/en/reference/querying/yql#rank) query operator. The rank query operator allows us to retrieve using the first operand, and the remaining operands can only impact ranking. + +It is important to note that the following approach for query time term boosting is in the context of using the `nativeRank` text scoring feature. + +```bash +$ vespa query \ + 'yql=select * from msmarco where rank(default contains text(@user-query), url contains ({weight:1000, significance:1.0}"www.answers.com"))' \ + 'user-query=what is dad bod' \ + 'hits=3' \ + 'language=en' +``` + +The above will match the user query against the default fieldset and produce match features for the second operand. It does not change the *retrieval* or *matching* as the number of documents exposed to ranking is the same as before. The `rank` operator can be used to implement a variety of use case around boosting. + + +#### Combine free text with filters + +We can combine the `text` operator with application logic. We add an application-specific query filter on the `url` field to demonstrate how to combine `text` with other query time constraints. We add `ranked:false` to tell Vespa that this specific term should not contribute to the relevance calculation and `filter:true` to ensure that the term is not used for [bolding/highlighting or dynamic snippeting](/en/querying/document-summaries#dynamic-snippets). + +```bash +$ vespa query \ + 'yql=select * from msmarco where default contains text(@user-query) and url contains ({filter:true,ranked:false}"huffingtonpost.co.uk")' \ + 'user-query=what is dad bod' \ + 'hits=3' \ + 'language=en' +``` + +Notice that the `relevance` stays the same since we used `ranked:false` for the filter. Let us see what is going on by adding [query tracing](/en/querying/query-api#query-tracing): + +```bash +$ vespa query \ + 'yql=select * from msmarco where default contains text(@user-query) and url contains ({filter:true,ranked:false}"huffingtonpost.co.uk")' \ + 'user-query=what is dad bod' \ + 'trace.level=3' \ + 'language=en' +``` + +We can notice the following in the trace output: + +``` +query=[AND (WEAKAND(100) default:what default:is default:dad default:bod) |url:'huffingtonpost co uk'] +``` + +Notice that the `text` part is converted to a [weakAnd](/en/ranking/wand) query operator and that this operator is AND'ed with a phrase search ('huffingtonpost co uk') in the `url` field. Notice also the field scoping where the query terms are prefixed with `default`. Notice also that punctuation characters (.) are removed as part of the tokenization. Suppose this is a common pattern where we want to filter on specific strings. In that case, we should create a separate field to avoid phrase matching, phrase matching is more expensive than a single token search. + + +### Supporting end user query syntax + +In some applications, you'd like end users or models to be able to search specific fields, match phrases etc. To do that, you can use the `userInput` YQL operator instead of `text`: + +```bash +$ vespa query \ + 'yql=select * from msmarco where userInput(@user-query)' \ + 'user-query=title:"dad bod"' \ + 'hits=3' \ + 'language=en' +``` + +Notice that since the string given to userInput may specify the fields to search, there is no "field contains" part that specifies the field on the YQL side. See the [userInput() documentation](/en/reference/querying/yql#userinput) on the various end user query languages supported and other parameters that can be set. + + +### Debugging token string matching + +Query tracing, combined with a summary using [tokens](/en/reference/schemas/schemas#tokens) can help debug matching. + +```bash +$ vespa query \ + 'yql=select * from msmarco where url contains ({filter:true,ranked:false}"huffingtonpost.co.uk")' \ + 'trace.level=0' \ + 'language=en' \ + 'summary=debug-tokens' +``` + +```json expandable +{ + "root": { + "id": "toplevel", + "relevance": 1, + "fields": { + "totalCount": 562 + }, + "children": [ + { + "id": "index:msmarco/0/59444ddd06537a24953b73e6", + "relevance": 0.0, + "source": "msmarco", + "fields": { + "sddocname": "msmarco", + "url": "http://www.huffingtonpost.co.uk/2015/05/05/what-is-a-dadbod-male-body_n_7212072.html", + "url-tokens": [ + "http", + "www", + "huffingtonpost", + "co", + "uk", + "2015", + "05", + "05", + "what", + "is", + "a", + "dadbod", + "male", + "body", + "n", + "7212072", + "html" + ] + } + } + ] + } +} +``` + +This gives us insight into how the input `url` field was tokenized and indexed. Those are the tokens that the query can match. Notice how punctuation characters like `:`, `,`, `.`, `/`, `_` and `-` are removed as part of the text tokenization. + +Observations: + +- Relevance is 0.0, because the term uses `ranked:false`. +- We cannot match "://" because those are not searchable characters with `match:text` +- `dadbod` is a token in the url, this cannot match `dad` or `bod` as it is represented as a single token `dadbod`. + +Let us do a similar example to demonstrate the impact of linguistic stemming: + +```bash +$ vespa query \ + 'yql=select * from msmarco where url contains ({filter:true,ranked:false}"http")' \ + 'summary=debug-tokens' \ + 'language=en' +``` + +```json +{ + "url": "http://www.ourbabynamer.com/meaning-of-Anika.html", + "url-tokens": [ + "http", + "www", + "ourbabynamer", + "com", + "meaning", + "of", + "anika", + "html" + ] +} +``` + +Notice that a query for `https` matches `http`, because 'https' on the query is stemmed to `http`. If we turn off stemming on the query side, searching for `https` directly, we end up with 0 results. + +```bash +$ vespa query \ + 'yql=select * from msmarco where url contains ({filter:true,ranked:false,stem:false}"https")' \ + 'summary=debug-tokens' \ + 'language=en' +``` + +Similarly, if we pass a different language tag, which will not stem https to http, we also get 0 results: + +```bash +$ vespa query \ + 'yql=select * from msmarco where url contains ({filter:true,ranked:false}"https")' \ + 'summary=debug-tokens' \ + 'language=de' +``` + + +## Ranking + +The previous section covered free-text search matching, linguistics, and how to combine business logic with free-text user queries. All the examples used a `default` rank-profile using Vespa's [nativeRank](/en/ranking/nativerank) text scoring feature. + +With free-text search, we can use other text scoring functions, like [BM25](/en/ranking/bm25). All the matching capabilities (or limitations) still apply, we can use fieldsets or fields; the difference is in the text scoring function where BM25 is different from nativeRank. + +```bash +$ vespa query \ + 'yql=select * from msmarco where default contains text(@user-query)' \ + 'user-query=what is dad bod' \ + 'hits=3' \ + 'language=en' \ + 'ranking=bm25' +``` + +While the `nativeRank` text score is normalized to the range 0 to 1, BM25 is unbounded, as demonstrated above. When querying (matching), we can ask Vespa to compute both features in the same query. + +Modify the schema and add a new rank-profile `combined`: + +```sd expandable +schema msmarco { + document msmarco { + field language type string { + indexing: "en" | set_language + } + field id type string { + indexing: attribute | summary + match: word + } + field title type string { + indexing: index | summary + match: text + index: enable-bm25 + } + field body type string { + indexing: index | summary + match: text + index: enable-bm25 + } + field url type string { + indexing: index | summary + index: enable-bm25 + } + } + fieldset default { + fields: title, body, url + } + document-summary minimal { + summary id { } + } + document-summary debug-tokens { + summary url {} + summary url-tokens { + source: url + tokens + } + from-disk + } + rank-profile default { + first-phase { + expression: nativeRank(title, body, url) + } + } + rank-profile bm25 inherits default { + first-phase { + expression: bm25(title) + bm25(body) + bm25(url) + } + } + + rank-profile combined inherits default { + first-phase { + expression: bm25(title) + bm25(body) + bm25(url) + nativeRank(title) + nativeRank(body) + nativeRank(url) + } + match-features { + bm25(title) + bm25(body) + bm25(url) + nativeRank(title) + nativeRank(body) + nativeRank(url) + } + } +} +``` + +Then, re-deploy the Vespa application from the `app` directory: + +```bash +$ vespa deploy --wait 300 app +``` + +Adding or removing rank profiles is a live-change as it only impacts how we score documents, not how we index or match them. + +Run a query with the new rank-profile: + +```bash +$ vespa query \ + 'yql=select * from msmarco where default contains text(@user-query)' \ + 'user-query=what is dad bod' \ + 'hits=3' \ + 'language=en' \ + 'ranking=combined' +``` + +Which will produce a result like this: + +```json expandable +{ + "root": { + "id": "toplevel", + "relevance": 1, + "fields": { + "totalCount": 562 + }, + "children": [ + { + "id": "id:msmarco:msmarco::D2977840", + "relevance": 25.482783473796484, + "source": "msmarco", + "fields": { + "matchfeatures": { + "bm25(body)": 19.51565699523739, + "bm25(title)": 4.978933753876959, + "bm25(url)": 0.3678926381724701, + "nativeRank(body)": 0.3010929113058281, + "nativeRank(title)": 0.24814575272673867, + "nativeRank(url)": 0.07106142247709807 + }, + "sddocname": "msmarco", + "documentid": "id:msmarco:msmarco::D2977840", + "id": "D2977840", + "title": "What Is A Dad Bod An Insight Into The Latest Male Body Craze To Sweep The Internet", + "url": "http://www.huffingtonpost.co.uk/2015/05/05/what-is-a-dadbod-male-body_n_7212072.html" + } + } + ] + } +} +``` + +Notice that `matchfeatures` field that is added to the hit when using `match-features` in the rank-profile. Here, we have all the computed features from the matched document, and the final `relevance` score is the sum of these features (In this case). This query and ranking example demonstrates that for a single query searching a set of fields via fieldset, we can compute different types of text scoring features and use combinations. + +Now consider the following where we limit matching to the title field: + +```bash +$ vespa query \ + 'yql=select * from msmarco where title contains text(@user-query)' \ + 'user-query=what is dad bod' \ + 'hits=3' \ + 'language=en' \ + 'ranking=combined' +``` + +Now, we do not get features for `body` or `url`, because they were not matched by the query. + + +## Next steps + +Check out the [Improving Text Search through ML](/en/learn/tutorials/text-search-ml). + + +## Cleanup + +If you do not want to proceed with the [Improving Text Search through ML](/en/learn/tutorials/text-search-ml) guide, you can stop and remove the container (and data): + +```bash +$ docker rm -f vespa-msmarco +``` + +[^1]: Robertson, Stephen and Zaragoza, Hugo and others, 2009. The probabilistic relevance framework: BM25 and beyond. Foundations and Trends in Information Retrieval. diff --git a/mintlify-docs/en/linguistics/linguistics-custom.mdx b/mintlify-docs/en/linguistics/linguistics-custom.mdx new file mode 100644 index 0000000000..e8354751c9 --- /dev/null +++ b/mintlify-docs/en/linguistics/linguistics-custom.mdx @@ -0,0 +1,37 @@ +--- +title: "Custom Linguistics" +--- + +A linguistics component is an implementation of [com.yahoo.language.Linguistics](https://github.com/vespa-engine/vespa/blob/master/linguistics/src/main/java/com/yahoo/language/Linguistics.java). Refer to the [com.yahoo.language.simple.SimpleLinguistics](https://github.com/vespa-engine/vespa/blob/master/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java) implementation (which can be subclassed for convenience). + +SimpleLinguistics provides support for english stemming only. Try loading the `com.yahoo.language.simple.SimpleLinguistics` module, or providing another linguistics module. + +The linguistics implementation must be configured as a component in container clusters doing linguistics processing, see [injecting components](/en/applications/dependency-injection). + +As document processing for indexing is by default done by an autogenerated container cluster which cannot be configured, specify a container cluster for indexing explicitly. + +This example shows how to configure SimpleLinguistics for linguistics using the same cluster for both query and indexing processing (if using different clusters, add the same linguistics component to all of them): + +```xml highlight= {4,14} + + + + + + + + + + + 1 + + + + + + + + +``` + +If changing the linguistics component of a live system, recall can be reduced until all documents are re-written. This because documents will still be stored with tokens generated by the previous linguistics module. \ No newline at end of file diff --git a/mintlify-docs/en/linguistics/linguistics-opennlp.mdx b/mintlify-docs/en/linguistics/linguistics-opennlp.mdx new file mode 100644 index 0000000000..f6eff7aa19 --- /dev/null +++ b/mintlify-docs/en/linguistics/linguistics-opennlp.mdx @@ -0,0 +1,123 @@ +--- +title: "OpenNLP Linguistics" +sidebarTitle: "Default (OpenNLP) linguistics" +--- + +The default Vespa linguistics implementation uses [OpenNLP](https://opennlp.apache.org/). The Apache OpenNLP language detection is also used, by default, even if you're using a different implementation. See [Language handling](/en/linguistics/linguistics#language-handling) for more information. OpenNLP has support for 103 languages. + +## OpenNLP language detection + +The OpenNLP language detector gives a prediction with a confidence; with confidence typically increasing with more input. The threshold for using the prediction can be configured with a number typically from 1.0 (wild guess) to 6.0 (confident guess), with 2.0 as the default: + +```xml + + ... + + 4.2 + +``` + +## Default languages + +OpenNLP tokenization and stemming supports these languages: + +- Arabic (ar) +- Catalan (ca) +- Danish (da) +- Dutch (nl) +- English (en) +- Finnish (fi) +- French (fr) +- German (de) +- Greek (el) +- Hungarian (hu) +- Indonesian (id) +- Irish (ga) +- Italian (it) +- Norwegian (no) +- Portuguese (pt) +- Romanian (ro) +- Russian (ru) +- Spanish (es) +- Swedish (sv) +- Turkish (tr) + +Other languages will use a fallback to English _en_. + +English uses a simpler stemmer (kStem) by default, which produces fewer stems and therefore lower recall. To use OpenNlp stemming (Snowball) also for English add this config to your \ element(s): + +```xml + + ... + + true + +``` + +See _Tokens_ [OpenNLP models](https://opennlp.apache.org/models.html) and [text matching](/en/querying/text-matching) for examples and how to experiment with linguistics. + +If you need support for more languages, you can consider replacing the default OpenNLP based linguistic integration with the [Lucene Linguistics](/en/linguistics/lucene-linguistics) implementation which supports more languages. + +### Chinese + +The default linguistics implementation does not segment Chinese into tokens, but this can be turned on by config: + +```xml + + ... + + true + true + +``` + +The createCjkGrams adds substrings of segments longer than 2 characters, which may increase recall. + +## Tokenization + +Tokenization removes any non-word characters, and splits the string into _tokens_ on each word boundary. In addition, CJK tokens are split using a _segmentation_ algorithm. The resulting tokens are then searchable in the index. + +Also see [N-gram matching](/en/reference/schemas/schemas#gram). + +## Normalization + +An example normalization is à ⇒ a. Normalizing will cause accents and similar decorations which are often misspelled to be normalized the same way both in documents and queries. + +Vespa uses [java.text.Normalizer](https://docs.oracle.com/javase/7/docs/api/java/text/Normalizer.html) to normalize text, see [SimpleTransformer.java](https://github.com/vespa-engine/vespa/blob/master/linguistics/src/main/java/com/yahoo/language/simple/SimpleTransformer.java). Normalization preserves case. + +Refer to the [nfkc](/en/reference/querying/yql#nfkc) query term annotation. Also see the YQL [accentDrop](/en/reference/querying/yql#accentdrop) annotation. + +## Stemming + +Stemming means _translate a word to its base form_ (singular forms for nouns, infinitive for verbs), using a [stemmer](https://en.wikipedia.org/wiki/Stemming). Use of stemming increases search recall, because the searcher is usually interested in documents containing query words regardless of the word form used. Stemming in Vespa is symmetric, i.e. words are converted to stems both when indexing and searching. + +Examples of this is when text is indexed, the stemmer will convert the noun _reports_ (plural) to _report_, and the latter will be stored in the index. Likewise, before searching, _reports_ will be stemmed to _report_. Another example is that _am_, _are_ and _was_ will be stemmed to _be_ both in queries and indexes. + +When [bolding](/en/reference/schemas/schemas#bolding) is enabled, all forms of the query term will be bolded. I.e. when searching for _reports_, both _report_, _reported_ and _reports_ will be bolded. + +See the [stem](/en/reference/querying/yql#stem) query term annotation. + +### Theory + +From a matching point of view, stemming takes all possible token strings and maps them into equivalence classes. So in the example above, the set of tokens \{ _report_, _reports_, _reported_ } are in an equivalence class. To represent the class, the linguistics library should pick the best element in the class. At query time, the text typed by a user will be tokenized, and then each token should be mapped to the most likely equivalence class, again represented by the shortest element that belongs to the class. + +While the theory sounds pretty simple, in practice it is not always possible to figure out which equivalence class a token should belong to. A typical example is the string _number_. In most cases we would guess this to mean a numerical entity of some kind, and the equivalence class would be \{ _number_, _numbers_ } - but it could also be a verb, with a different equivalence class \{ _number_, _numbered_, _numbering_ \}. These are of course closely related, and in practice they will be merged, so we'll have a slightly larger equivalence class \{ _number_, _numbers_, _numbered_, _numbering_ \} and be happy with that. However, in a sentence such as _my legs keep getting number every day_, the _number_ token clearly does not have the semantics of a numerical entity, but should be in the equivalence class \{ _numb_, _number_, _numbest_, _numbness_ \} instead. But blindly assigning _number_ to the equivalence class _numb_ is clearly not right, since the _more numb_ meaning is much less likely than the _numerical entity_ meaning. + +The approach currently taken by the low-level linguistics library will often lead to problems in the _number_-like cases as described above. To give better recall, Vespa has implemented a _multiple_ stemming option. + +### Configuration + +By default, all words are stemmed to their _best_ form. Refer to the [stemming reference](/en/reference/schemas/schemas#stemming) for other stemming types. To change type, add: + +```yaml +stemming: [stemming-type] +``` + +Stemming can be set either for a field, a fieldset or as a default for all fields. Example: Disable stemming for the field _title_: + +```yaml +field title type string { + indexing: summary | index + stemming: none +} +``` \ No newline at end of file diff --git a/mintlify-docs/en/linguistics/linguistics.mdx b/mintlify-docs/en/linguistics/linguistics.mdx new file mode 100644 index 0000000000..1765aa1c8b --- /dev/null +++ b/mintlify-docs/en/linguistics/linguistics.mdx @@ -0,0 +1,313 @@ +--- +title: "Linguistics in Vespa" +sidebarTitle: "Linguistics Overview" +--- + +Vespa uses a _linguistics_ module to process text in queries and documents during indexing and searching. The goal of linguistic processing is to increase _recall_ (how many documents are matched) without hurting _precision_ (the relevance of the documents matched) too much. It consists of such operations as: + +- tokenizing text into chunks of known types such as words and punctuation. +- normalizing accents. +- finding the base form of words (stemming or lemmatization). + +Linguistic processing is run when writing documents, and when querying: + + + ![Overview: linguistic processing in Vespa](/assets/img/vespa-overview-linguistics.svg) + + +The processing is run on [string](/en/reference/schemas/schemas#string) fields with `index` indexing mode. Overview: + +1. When writing documents, string fields with `indexing: index` are by default processed. A field's language will configure this processing. A document/fields can have the language set explicitly, if not, it is [detected](/en/linguistics/linguistics#field-language-detection). +2. The field's content is processed (e.g., tokenized, normalized, stemmed, etc.), and the resulting terms are added to the index. + + **Note:** The language for the field is not persisted on the content node, just the processed terms themselves + +3. A query is also processed in a similar fashion. Typically through the same [linguistics profile](/en/reference/schemas/schemas#linguistics) as the field content, producing the same terms from the same text. The language of query strings is [detected](/en/linguistics/linguistics#query-language-detection) unless specified using [model.locale](/en/reference/api/query#model.locale) or [annotations](/en/reference/querying/yql#annotations) like `language`. + + **Note:** This is a very common query problem - it is hard to detect language precisely from short strings. + +4. The processed query is evaluated on the content nodes, and will only work as expected if both documents and queries produce the same terms. + +These operations can be turned on or off per field in the [schema](/en/basics/schemas). See [implicitTransforms](/en/reference/querying/yql#implicittransforms) for how to enable/disable transforms per query term. + +## Linguistics implementations + +Vespa comes with two linguistics variants out of the box: [OpenNLP](/en/linguistics/linguistics-opennlp) and [Lucene](/en/linguistics/lucene-linguistics). Check out the respective pages for more information on how to configure them. + +You can also implement a custom [Linguistics](/en/linguistics/linguistics-custom) component. + +The default linguistics variant is [OpenNLP](/en/linguistics/linguistics-opennlp), but for the rest of this page we'll go through common options, such as language handling, inherited by all implementations. + + + **Note:** Linguistics implementations only control how text is tokenized, including positional information. These tokens are stored in the same way in the underlying index. For example, if you use Lucene linguistics, Vespa does not store information such as positions in Lucene segment files. Storage is the same as with OpenNLP, only resulting tokens might differ. + + +## Language handling + +Vespa does _not_ know the language of a document - this applies: + +1. The indexing processor is instructed on a per-field level what language to use when calling the underlying linguistics library +2. The query processor is instructed on a per-query level what language to use + +If no language is explicitly set in a document or a query, Vespa will run its configured language detector (by default, [OpenNLP language detection](/en/linguistics/linguistics-opennlp#language-detection)) on the available text (the full content of a document field, or the full `query=` parameter value). + +A document that contains the exact same word as a query might not be recall-able if the language of the document field is detected differently from the query. Unless the query has explicitly declared a [language](/en/reference/api/query#model.language), this can occur. + +### Indexing with language + +The indexing process run by Vespa is a sequential execution of the indexing scripts of each field in the schema, in the declared order. At any point, the script may set the language that will be used for indexing statements for subsequent fields, using [set\_language](/en/reference/writing/indexing-language#set_language). Example: + +```yaml +schema doc { + document doc { + field language type string { + indexing: set_language + } + field title type string { + indexing: index + } + } +} +``` + +If a language has not been set when tokenization of a field is run, the language is determined by [language detection](/en/linguistics/linguistics#field-language-detection). + +If all documents have the same language, the language can be hardcoded it the schema in this way: + +```yaml +schema doc { + + field language type string { + indexing: "en" | set_language + } + + document doc { + ... +``` + +If the same document contains fields in multiple languages, set\_language can be invoked multiple times, e.g.: + +```yaml +schema doc { + document doc { + field language_title1 type string { + indexing: set_language + } + field title1 type string { + indexing: index + } + field language_title2 type string { + indexing: set_language + } + field title2 type string { + indexing: index + } + } +} +``` + +Or, if fixed per field, use multiple indexing statements in each field: + +```yaml +schema doc { + document doc { + field my_english_field type string { + indexing { + "en" | set_language; + index; + } + } + field my_spanish_field type string { + indexing { + "es" | set_language; + index; + } + } + } +} +``` + +### Field language detection + +When indexing a document, if a field has unknown language (i.e. not set using `set_language`), language detection is run on the field's content. This means, language detection is per field, not per document. + +See [query language detection](/en/linguistics/linguistics#query-language-detection) for detection confidence, fields with little text will default to English. + +### Querying with language + +The content of an indexed string field is language-agnostic. One must therefore apply a compatible tokenization on the query terms (e.g., stemming for the same language) in order to match the content of that field. + +The query parser subscribes to configuration that tells it what fields are indexed strings, and every query term that targets such a field are run through appropriate tokenization. The [language](/en/reference/api/query#model.language) query parameter controls the language state of these calls. + +Because an index may simultaneously contain terms in any number of languages, one can have stemmed variants of one language match the stemmed variants of another. To work around this, store the language of a document in a separate attribute, and apply a filter against that attribute at query-time. + +By default, there is no knowledge anywhere that captures what languages are used to generate the content of an index. The language parameter only affects the transformation of query terms that hit tokenized indexes. + +### Query language detection + +If no [language](/en/reference/api/query#model.language) parameter is used, or the query terms are [annotated](/en/reference/querying/yql#annotations), the language detector is called to process the query string. + +Queries are normally short, as a consequence, the detection confidence is low. Example: + +```bash +$ vespa query "select * from music where default contains text(@text)" \ + tracelevel=3 text='Eine kleine Nachtmusik' | grep 'Stemming with language' + "message": "Stemming with language=ENGLISH" + +$ vespa query "select * from music where default contains text(@text)" \ + tracelevel=3 text='Eine kleine Nachtmusik schnell' | grep 'Stemming with language' + "message": "Stemming with language=GERMAN" +``` + +See [#24265](https://github.com/vespa-engine/vespa/issues/24265) for details - in short, with the current 0.02 confidence cutoff, queries with 3 terms or fewer will default to English. + +### Multiple languages + +Vespa supports having documents in multiple languages in the same schema, but does not out-of-the-box support cross-lingual retrieval (e.g., search using English and retrieve relevant documents written in German). This is because the language of a query is determined by the language of the query string and only one transformation can take place. + +Approaches to overcome this limitation include: + +1. Use semantic retrieval using a multilingual text embedding model (see [blog post](https://blog.vespa.ai/simplify-search-with-multilingual-embeddings/)) which has been trained on multilingual corpus and can be used to retrieve documents in multiple languages. +2. Stem and tokenize the query using the relevant languages, build a query tree using [weakAnd](/en/reference/querying/yql#weakand) / [or](/en/reference/querying/yql#or) and using [equiv](/en/reference/querying/yql#equiv) per stem variant. This is easiest done in a custom [Searcher](/en/applications/searchers) as mentioned in [#12154](https://github.com/vespa-engine/vespa/issues/12154). + +Example: + +**language=fr:** machine learning =\> machin learn + +**language=en:** machine learning =\> machine learn + +Using _weakAnd_ here as example as that technique is already mentioned in #12154: + +```sql +select * from sources * where rank( + default contains "machine", + default contains "learning", + weakAnd( + default contains equiv("machin", "machine"), + default contains "learn" + ) +) +``` + +We now retrieve using all possible stems/base forms with _weakAnd_, and use the [rank](/en/reference/querying/yql#rank) operator to pass in the original query form, so that ranking can rank literal matches (original) higher. Benefit of _equiv_ is that it allows multiple term variants to share the same position, so that proximity ranking does not become broken by this approach. + +## Linguistics profiles + +Linguistics profiles are used to configure linguistics processing for a field in the schema. They are typically used with the [Lucene linguistics implementation](/en/linguistics/lucene-linguistics), but can be used in e.g., [custom linguistics implementations](/en/linguistics/linguistics-custom) as well. + +### Symmetrical processing + +For example, a definition like this: + +```yaml +field title type string { + indexing: summary | index + linguistics { + profile: whitespaceLowercase + } +} +``` + +Will look for a profile named `whitespaceLowercase`, which could be defined like this in `services.xml`: + +```xml + + + whitespace + + + + lowercase + + + +``` + +Note `language=en` there. It is optional: if it's not set, the profile will be used for all languages. But you can have different definitions for different languages on the same profile (e.g., different stemming). + +### Different processing for query strings + +For some use cases, you may want to process the query string differently than the document content. Synonyms are a good example. If you expand `dog` to `dog,puppy` at query time, it will match either term in the document anyway - no need to expand it at write-time. + +To do this, you'd define a different profile for the query string. Like: + +```xml + + + whitespace + + + + lowercase + + + synonymGraph + + + en/synonyms.txt + + + + +``` + +Then, in the schema, expand `profile` to `profile.index` and `profile.search`: + +```yaml +field title type string { + indexing: summary | index + linguistics { + profile { + index: whitespaceLowercase + search: whitespaceLowercaseSynonyms + } + } +} +``` + +At this point, `where synonyms_test contains 'dog'` will match a document containing `puppy`. + +### Overriding profile for query strings + +At query time, you can tell Vespa to use a specific profile to process the query string via [grammar.profile](/en/reference/querying/yql#grammar). This works with the [userInput()](/en/reference/querying/yql#userinput) and [text()](/en/reference/querying/yql#text) operators. For example, to use the `whitespaceLowercase` profile for the query string: + +```sql +where title contains ({grammar.profile: 'whitespaceLowercase'}text('dog')) +``` +Equivalent expression via `userInput()`: +```sql +where {defaultIndex:'title', grammar.profile: 'whitespaceLowercase', grammar: 'linguistics'}userInput('dog') +``` + + + **Note:** You should use grammar=linguistics (like in the example above) with grammar.profile to ensure that there is no additional processing (e.g., tokenization) besides what is already defined in the profile. + + +## Troubleshooting linguistics processing + +If your documents don't match as expected, there are two ways to get more information. First, you can get the tokenized text for a field by using [tokens](/en/reference/schemas/schemas#tokens) in the [document summary](/en/querying/document-summaries). For example, to get the original text and tokens for the `title` field: + +```yaml +document-summary debug-text-tokens { + summary title {} + summary title_tokens { + source: title + tokens + } + from-disk +} +``` + +Then, at query time, you can also get the tokens of the query string by increasing the [trace level](/en/reference/api/query#trace.level): + +```json +{ + "yql": "select * from sources * where title contains \"dog\"", + "presentation.summary": "debug-text-tokens", + "model.locale": "en", + "trace.level": 2 +} +``` diff --git a/mintlify-docs/en/linguistics/lucene-linguistics.mdx b/mintlify-docs/en/linguistics/lucene-linguistics.mdx new file mode 100644 index 0000000000..22969ccab1 --- /dev/null +++ b/mintlify-docs/en/linguistics/lucene-linguistics.mdx @@ -0,0 +1,214 @@ +--- +title: "Lucene Linguistics" +--- + +Lucene Linguistics is a custom [linguistics](/en/linguistics/linguistics) implementation of the [Apache Lucene](https://lucene.apache.org) library. It provides a Lucene analyzer to handle text processing for a language with an optional variation per [stemming mode](https://github.com/vespa-engine/vespa/blob/master/linguistics/src/main/java/com/yahoo/language/process/StemMode.java). + +Check [sample apps](https://github.com/vespa-engine/sample-apps/tree/master/examples/lucene-linguistics) to get started. + +## Crash course to Lucene text analysis + +Lucene [text analysis](https://lucene.apache.org/core/9_8_0/core/org/apache/lucene/analysis/package-summary.html) is a process of converting text into searchable tokens. This text analysis consists of a series of components applied to the text in order: + +- [CharFilters](https://lucene.apache.org/core/9_8_0/core/org/apache/lucene/analysis/CharFilter.html): transform the text before it is tokenized, while providing corrected character offsets to account for these modifications. +- [Tokenizers](https://lucene.apache.org/core/9_8_0/core/org/apache/lucene/analysis/Tokenizer.html): responsible for breaking up incoming text into tokens. +- [TokenFilters](https://lucene.apache.org/core/9_8_0/core/org/apache/lucene/analysis/TokenFilter.html): responsible for modifying tokens that have been created by the Tokenizer. + +A specific configuration of the above components is wrapped into an [Analyzer](https://lucene.apache.org/core/9_8_0/core/org/apache/lucene/analysis/Analyzer.html) object. + +The text analysis works as follows: +1. All char filters are applied in the specified order on the entire text string +2. Token filters in the specified order are applied on each token. + +## Defaults language analysis + +Lucene Linguistics out-of-the-box exposes the analysis components provided by the [lucene-core](https://lucene.apache.org/core/9_8_0/core/index.html) and the [lucene-analysis-common](https://lucene.apache.org/core/9_8_0/analysis/common/index.html) libraries. Other libraries with Lucene text analysis components (e.g. [analysis-kuromoji](https://lucene.apache.org/core/9_8_0/analysis/kuromoji/index.html)) can be added to the application package as a Maven dependency. + +Lucene Linguistics out-of-the-box provides analyzers for 40 languages: + +- Arabic +- Armenian +- Basque +- Bengali +- Bulgarian +- Catalan +- Chinese +- Czech +- Danish +- Dutch +- English +- Estonian +- Finnish +- French +- Galician +- German +- Greek +- Hindi +- Hungarian +- Indonesian +- Irish +- Italian +- Japanese +- Korean +- Kurdish +- Latvian +- Lithuanian +- Nepali +- Norwegian +- Persian +- Portuguese +- Romanian +- Russian +- Serbian +- Spanish +- Swedish +- Tamil +- Telugu +- Thai +- Turkish + +The Lucene [StandardAnalyzer](https://lucene.apache.org/core/9_8_0/core/org/apache/lucene/analysis/standard/StandardAnalyzer.html) is used for the languages that doesn't have a custom nor a default analyzer. + +## Linguistics key + +Linguistics keys identify a configuration of text analysis. It can be made of two parts, separated by a semicolon, though you can omit one or the other. The two parts are: + +- A [linguistics profile](/en/linguistics/linguistics#linguistics-profiles). +- A language key. + +The language key, in turn, has 2 parts: a mandatory [language code](https://github.com/vespa-engine/vespa/blob/master/linguistics/src/main/java/com/yahoo/language/Language.java) and an optional stemming mode. The format is `LANGUAGE_CODE[/STEM_MODE]`. There are 5 stemming modes: `NONE, DEFAULT, ALL, SHORTEST, BEST` (they can be specified in the [field schema](/en/reference/schemas/schemas#stemming)). + +Examples of linguistics key: + +- `profile=whitespaceLowercase`: a profile that applies to all languages. You can bind it to different fields by specifying their [linguistics profiles](/en/linguistics/linguistics#linguistics-profiles) in the schema. +- `profile=whitespaceLowercase;language=en`: a profile that applies to the English language. You'd still bind it to fields via their [linguistics profiles](/en/linguistics/linguistics#linguistics-profiles) in the schema, but it will only be applied to the English texts (either at indexing or query time). +- `en`: English language: applies to all English texts where no profile is specified (in the schema or [in the query](/en/linguistics/linguistics#overriding-profile-for-query-strings)). +- `en/BEST`: English language with the `BEST` stemming mode. Like the previous example, but only applies when [stemming](/en/reference/schemas/schemas#stemming) is set to `BEST`. + + + **Note:** You can use different profiles for document fields and query strings. See [Different processing for query strings](/en/linguistics/linguistics#different-processing-for-query-strings) and the [multiple-profiles sample app](https://github.com/vespa-engine/sample-apps/tree/master/examples/lucene-linguistics/multiple-profiles) for more information. + + +## Customizing text analysis + +Lucene linguistics provides multiple ways to customize text analysis per language: + +- `LuceneLinguistics` component configuration in the `services.xml` +- `ComponentsRegistry` + +### LuceneLinguistics component configuration + +In `services.xml` it is possible to construct an analyzer by providing [configuration for the](https://github.com/vespa-engine/vespa/blob/master/lucene-linguistics/src/main/resources/configdefinitions/lucene-analysis.def)`LuceneLinguistics` component (from all text analysis components that are available on the classpath). Example for the English language: + +```xml + + + lucene-linguistics + + + + standard + + + + stop + + en/stopwords.txt + true + + + + englishMinimalStem + + + + + + +``` + +Notes: + +- `item key="profile=standardStopStem;language=en"` value is a [linguistics key](#linguistics-key). +- `name` values are the [SPI names](https://docs.oracle.com/en/java/javase/17/docs/api/java.naming/javax/naming/spi/package-summary.html) of the text analysis components. You'll typically find them in the [Lucene analysis JavaDocs](https://lucene.apache.org/core/9_11_1/analysis/common/allclasses-index.html). For example, the name `stop` along with other options can be found in the [StopFilterFactory JavaDoc](https://lucene.apache.org/core/9_11_1/analysis/common/org/apache/lucene/analysis/core/StopFilterFactory.html). +- The `en/stopwords.txt` file must be placed in your application package under the `lucene-linguistics` directory, which is referenced by the `configDir` option. +- If `configDir` is not provided the files must be on the classpath. + +### Components registry + +The [ComponentsRegistry](/en/applications/dependency-injection#depending-on-all-components-of-a-specific-type) mechanism can be used to set a Lucene Analyzer for a language. + +```xml + +``` + +Where: + +- `id` must be a [linguistics key](#linguistics-key); +- `class` is the implementation class that extends the `Analyzer` class; +- `bundle` is a name of the application package as specified in the `pom.xml` (or can be any bundle added to your `components` dir that contains the class). + +For this to work, the class must provide **only** a constructor without arguments. + +In case your analyzer class needs some initialization you must wrap the analyzer into a class that implements the `Provider` class. + +### Custom text analysis components + +The text analysis components are loaded via Java Service provider interface ([SPI](https://www.baeldung.com/java-spi)). + +To use an external library that is properly prepared it is enough to add the library to the application package as a Maven dependency. + +In case you need to create a custom component the steps are: + +1. Implement a component in a Java class +2. Register the component class in the (e.g. a custom token filter) `META-INF/services/org.apache.lucene.analysis.TokenFilterFactory` file that is on the classpath. + +## Language Detection + +Lucene Linguistics doesn't provide language detection. This means that for both feeding and searching you should provide a [language parameter](/en/reference/api/query#model.language). + +## Indexing all stems + +Some analyzers expand the input text into multiple tokens on the same position. For example, those based on the [NGramTokenFilter](https://lucene.apache.org/core/9_11_1/analysis/common/org/apache/lucene/analysis/ngram/NGramTokenFilter.html). Here's a sample analyzer configuration: + +```xml + + + whitespace + + + + nGram + + 2 + 2 + + + + +``` + +This will take a text like `dog` and produce `do` and `og` as tokens, plus (by default) the original `dog`. However, Vespa only takes the first token (`do`) and writes it to the index, ignoring the other "stems". As a result, a search for `og` will not match documents that contain `dog`, which is the whole point of using letter n-grams. + +To index all stems, you can use the [stemming](/en/reference/schemas/schemas#stemming) parameter in the schema definition of your field: + +```yaml +field title_grams type string { + indexing: summary | index + linguistics { + profile: ngram + } + stemming: multiple +} +``` + +Now, Vespa will index all stems, and a search for `og` will match documents that contain `dog`. + + + **Note:** Queries look for all stems by default (regardless of the schema configuration). For example, a search for `dog` would expand to `do` and `og` as well, looking for all three terms. + \ No newline at end of file diff --git a/mintlify-docs/en/linguistics/query-rewriting.mdx b/mintlify-docs/en/linguistics/query-rewriting.mdx new file mode 100644 index 0000000000..0b76fb900d --- /dev/null +++ b/mintlify-docs/en/linguistics/query-rewriting.mdx @@ -0,0 +1,204 @@ +--- +title: "Query Rewriting" +--- + + +A search application can improve the quality by interpreting the intended meaning of the user queries. Once the meaning is guessed, the query can be rewritten to one that will satisfy the user better than the raw query. Vespa includes a query rewriting language which makes it easy to use query rewriting to understand and act upon the query semantics. + +These query rewriting techniques can be combined to improve the search experience: + +- Query focusing: Decide a field to search for a term +- Query enhancing: Add additional terms which improves the query +- Stopwords: Remove terms which hurts recall or precision - [example](https://github.com/vespa-cloud/cord-19-search/blob/main/src/main/java/ai/vespa/example/cord19/searcher/BoldingSearcher.java) +- Synonyms: Replace terms or phrases by others + +Query rewriting done by _semantic rules_ or _searchers_. Semantic rules is a simple production rule language that operates on queries. For more complex query rewriting logic which could not be handled by simple rules, one could create a rewriting searcher making use of the query rewriting framework. + +## EQUIV + +EQUIV is a query operator that can be used to add synonyms for words where the various synonyms should be equivalent - example: + +- The user query is `(used AND automobile)` +- _automobile_ is a synonym for _car_ (from a dictionary) +- Rewrite the query to `(used AND (automobile EQUIV car))` +- _automobile_ or _car_ are here equivalent - the query shall behave as if all occurrences of _car_ in the document corpus had been replaced by _automobile_ + +See the [reference](/en/reference/querying/yql#equiv) for differences between OR and EQUIV. In many cases it might be better to use OR instead of EQUIV. Example _Snoop_ Dogg: + +```sql +"Snoop" EQUIV "Snoop Doggy Dogg" EQUIV "Snoop Lion" EQUIV "Calvin Broadus" EQUIV "Calvin Cordozar Broadus Junior" +``` + +However, _Snoop_ is used by other people - so matching that alone is not a sure hit for the correct entity, and finding more than one of the synonyms in the same text gives better confidence. This is exactly what OR does: + +```sql +"Snoop"!20 OR "Snoop Doggy Dogg"!90 OR "Snoop Lion"!75 OR "Calvin Broadus"!60 OR "Calvin Cordozar Broadus Junior"!100 +``` + +Use lower weights on the alternatives with less confidence. If it looks like the many words and phrases inside the OR overwhelms other words in the query, giving even lower weights may be useful, for example making the sum of weights 100 - the default weight for just one alternative. + +The decision to use EQUIV must be taken by application-specific dictionary or linguistics use. This can be done using [YQL](/en/reference/querying/yql#equiv) or from a container plugin (example [EquivSearcher.java](https://github.com/vespa-engine/sample-apps/blob/master/album-recommendation-java/app/src/main/java/ai/vespa/example/album/EquivSearcher.java)) where the query object is manipulated as follows: + +1. Find a word item in the query +2. Check that an EQUIV can be used in that place (see [limitations](/en/reference/querying/yql#equiv)) +3. Find the synonyms in the dictionary +4. Make an `EquivItem` with the synonyms (and the original word) as children +5. Replace the original `WordItem` with the new `EquivItem` + +## Rules + +A simple semantic rule looks like: + +```sql +lotr -> lord of the rings; +``` + +This means that whenever the term _lotr_ is encountered in a query, replace it by the terms _lord of the rings_. Rules can also refer to conditions, and the produced terms can be a modified version of whatever is matched instead of a concrete term: + +```sql +[brand] -> company:[brand]; +[brand] :- sony, dell, ibm, hp; +``` + +This rule says that, whenever the condition named _brand_ is matched, replace the matched term(s) by _the same term(s)_ searching the _company_ field. In addition, the _brand_ condition is defined to match any of a list of brands. Note how `->` means a replacing production rule, `:-` means a condition and `,` separates alternatives. + +It is also possible to do grouping using parentheses, list multiple terms which must be matched in sequence, and to write _adding_ production rules using `+>` instead of `->`. Terms are by default added using the query default (as if they were written in the search box), but it is also possible to force them to be AND, OR, NOT or RANK using respectively `+`, `?`, `-` and `$`. Here is a more complex rule illustrating this: + +```sql +[destination] (in, by, at, on) [place] +> $name:[destination] +``` + +This rule boosts matches which has a destination which matches the _name_ field followed by a preposition and a place (the definition of the _destination_ and _place_ conditions are not shown). This is achieved by adding a RANK term - a term which do not impact whether a document is matched or not, but which adds a relevancy boost if it is. + +The complete syntax of this language is found in the [semantic rules reference](/en/reference/querying/semantic-rules). + +## Rule bases + +A collection of rules used together are collected in a _rule base_ - a text file containing rules and conditions, with file suffix `.sr` (for semantic rules). Example: + +```sql +# Replacements +lotr -> lord of the rings; +colour -> color; +audi -> skoda; + +# Stopwords +[stopword] -> ; # (Replace them by nothing) +[stopword] :- and, or, the, be; + +# Focus brands to the brand field. If we think the_brand_# field has high quality data, we can replace. We use the same name +# for the condition and the field, but this is not necessary. +[brand] :- brand:[brand]; +[brand] :- sony, dell, ibm, hp; + +# Boost recognized categories +[category] +> $category:[category]; +[category] :- laptop, digital camera, camera; +``` + +The rules in a rule base is evaluated in order from the top down. A rule will be matched as many times as is possible before evaluation moves on to the next query. So the query _colour colour_ will be rewritten to _color color_ before moving on to the next rule. + +## Configuration + +A rule base file is placed in the `rules/` directory under the [application package](/en/reference/applications/application-packages), and will be named as the file, excluding the `.sr` suffix. E.g. if the rules above are saved to `[my-application]/rules/example.sr`, the rules base available is named `example`. + +To make a rule base be used by default in queries, add `@default` on a separate line to the rule base. To deactivate the default rules, add [rules.off](/en/reference/api/query#rules.off) to the query. + +The rules can safely be updated at any time by running `vespa prepare` again. If there are errors in the rule bases, they will not be updated, and the errors will be reported on the command line. + +To trace what the rules are doing, add [tracelevel.rules=[number]](/en/reference/api/query#tracelevel.rules) to the query. + +## Using multiple rule bases + +It is possible to place multiple rule bases in the `[my-application]/rules/` and choose between them in the query. Rules may also include each other. This is useful to organize larger sets of rules, to experiment with variants of the rule set in new bases which includes the standard base, or to use different sets of rules for different use cases. + +To include one rule base in another, add `@include(rulebasename)` on a separate line, where _rulebasename_ is the file name (with or without the _.sr_). The result will be the same as if the included rule base were copied in to the location of the `include` line. If a condition is defined in both bases, the one from the _including_ base will be used. It is also possible to refer to the same-named condition in an included rule base using the `@super` directive as a condition. For example, this rule base adds some more categories to the _category_ definition in the `example.sr` above: + +```sql +@include(example) + +# Category becomes laptop, digital camera, camera, palmtop, phone +[category] :- @super, palmtop, phone; +``` + +Multiple rule bases can be included, and included rule bases can themselves have included rule bases. All the rule bases included in the application package will be available when making queries. One of the rule bases can be made the default by adding `@default` on a separate line in the rule base. To use another rule base, add [rules.rulebase=[rulebasename]](/en/reference/api/query#rules.rulebase) to the query. + +## Using a finite state automaton + +_Finite state automata_ (FSA) are efficient in storing and making lookups in large string lists. A rule base can be compiled into an FSA to increase performance. An automaton is created from a text file which lists the condition terms to match and the condition names separated by a tab (by default). The name of the condition can be followed by a semicolon and additional data which will be ignored. + +This automaton source file defines the same as the _stopword_ and _brand_ conditions in the example rule base: + +```txt +and stopword +or stopword +be stopword +the stopword +sony brand +dell brand +ibm brand; This text is ignored +hp brand +``` + +Use [vespa-makefsa](/en/reference/operations/tools#vespa-makefsa) to compile the automaton file: + +```bash +$ vespa-makefsa -t sourcefile.txt targetfile.fsa +``` + +The target file is used from a rule base by adding _@automata(automatonfile)_ on a separate line in the rule base file (the file path is relative to _$VESPA\_HOME_). Automata-files must be stored on all container nodes. + +Note that automata are not included in others, so a rule base including another which uses an automaton must also declare to use the same automaton (or an automaton containing any changes from the automaton of the included base). + +## Query phrasing + +Users search for phrases like _New York_, _Rolling Stones_, _The Who_, or _daily horoscopes_. Considering the latter, most of the time the query string will look like this: + +```bash +/search/?query=daily horoscopes&… +``` + +This is actually a search for documents where both _daily_ and _horoscopes_ match, but not necessarily documents with the exact phrase _"daily horoscopes"_. PhrasingSearcher is a Searcher that compares queries with a list of common phrases, and replaces two search terms with a phrase. If _"daily horoscopes"_ is a common phrase, the above query becomes: + +```bash +/search/?query="daily horoscopes"&… +``` + +The PhrasingSearcher must be configured with a list of common phrases, compiled into a _finite state automation_ (FSA). The phrase list must be: + +- all lowercase +- sorted alphabetically + +Example: + +```bash +$ perl -ne 'print lc' listofphrasestextfile.unsorted.mixedcase | sort > listofphrasestextfile +``` + +Note that the Perl command to convert the text file to lowercase does not handle non-ASCII characters well (this is just an example). If the list of phrases is e.g. UTF-8 encoded and/or contains non-English characters, double-check that the resulting file is correct. + +Use [vespa-makefsa](/en/reference/operations/tools#vespa-makefsa) to compile the list into an FSA file: + +```bash +$ vespa-makefsa listofphrasestextfile phrasefsa +``` + +Put the file on all container nodes, configure the location and [deploy](/en/basics/applications): + +```xml + + + + + + + + /path/phrasefsa + + + + + + +``` + diff --git a/mintlify-docs/en/linguistics/troubleshooting-encoding.mdx b/mintlify-docs/en/linguistics/troubleshooting-encoding.mdx new file mode 100644 index 0000000000..ab15b9df7d --- /dev/null +++ b/mintlify-docs/en/linguistics/troubleshooting-encoding.mdx @@ -0,0 +1,49 @@ +--- +title: "Troubleshooting character encoding" +--- + +This document helps recognize the most common problems related to Unicode and I18N. + +UTF-8 is a Unicode specific encoding where each letter (code point) is encoded as one to four 8 bit bytes. The UTF-8 schema can technically use more bytes, but Unicode is defined as having approximately 1 million code points (partly on cause of limitations of UTF-16), and more than four bytes are then never necessary. + +A string in Java is stored as UTF-16, a series of 16 bits char(acter)s. All code points in Unicode base plane, the first 64k code points, is represented as a single char, while higher code points is represented using surrogate pairs. A surrogate pair is a pair of char from a reserved range. + +Accessing a code point in a Java string is done using e.g. String.codePointAt(), which then returns a 32-bit integer representing the code point (basically UCS-4). When traversing a string in Java, use codePointAt + offsetByCodePoints or String.codePoints() or similar methods. If your applications conceptually handles letters, using String.charAt() will most of the time be wrong. To calculate buffer sizes for UTF-8 buffers with UTF-16 inputs without doing speculative encoding, Vespa has a toolbox, [com.yahoo.text.Utf8](https://github.com/vespa-engine/vespa/blob/master/vespajlib/src/main/java/com/yahoo/text/Utf8.java), with static helper methods. + +If you are using python, use the following to remove control characters: + +```python +def remove_control_characters(s): + return "".join(ch for ch in s if unicodedata.category(ch)[0]!="C") +``` + +## Visual pattern matching of encoding bugs + +| Transformation | Result | +| :--- | :--- | +| Input | hôtel | +| Correctly URL quoted (Vespa always uses UTF-8 there) | h%C3%B4tel | +| Encoded as ISO-8859-1 (ISO Latin-1), then URL quoted | h%F4tel | +| Encoded as UTF-16 (as in Java strings), then URL quoted | %00h%00%F4%00t%00e%00l | +| For completeness, little endian UTF-16, including byte order marker | %FF%FEh%00%F4%00t%00e%00l%00 | + +What we are looking for is single bytes outside ASCII, i.e. ordinal above 127. Given UTF-8, there should always be sequences of two or more of these when a code point is outside ASCII. The first byte for each code point will have the two most significant bits set, in other words hex C to hex F. The rest of the bytes for that code point will have the most significant bit set, and the second most unset, in other words hex 8 to hex B. + +From here, we move on to the two most common de-/encoding errors: + +| Error | Hex dump of code points | Rendered | +| :--- | :--- | :--- | +| UTF-8 input decoded as if it were ISO-8859-1 | h\xc3\xb4tel | hôtel | +| UTF-8 input re-encoded as UTF-8, then decoded as UTF-8 again | h\xc3\xb4tel | hôtel | + +Note how these two bugs create exactly the same byte sequences. This is because the first 256 code points of Unicode are identical to ISO-8859-1. What we are looking for is line noise in-between normal ASCII, as both ISO-8859-1 and Unicode are ASCII compatible. + +Trying to decode valid ISO-8859-1 input with a UTF-8 decoder will usually make the decoder report the input as invalid if there are code points outside ASCII. Valid ISO-8859-1 rarely end up conforming to the required bit patterns of valid UTF-8, though it sometimes happens. + +_Never_ try to debug encoding problems with a web browser. Always use a hexdump tool. `xxd` is a nice utility which is included with vim, which avoids several of the endianness headaches associated with some UNIX alternatives. + +Also, remember Windows 1252 is _not_ the same as ISO-8859-1. + +## JSON + +Use proper JSON - a common error is not stripping ASCII control characters from feed data. See [stripInvalidCharacters](https://github.com/vespa-engine/vespa/blob/master/vespajlib/src/main/java/com/yahoo/text/Text.java) for a utility function. diff --git a/mintlify-docs/en/modules/e-commerce/multi-currency-filtering.mdx b/mintlify-docs/en/modules/e-commerce/multi-currency-filtering.mdx new file mode 100644 index 0000000000..f3ec481bd0 --- /dev/null +++ b/mintlify-docs/en/modules/e-commerce/multi-currency-filtering.mdx @@ -0,0 +1,500 @@ +--- +title: Multi-Currency Pricing +--- + +Vespa for e-commerce includes multi-currency pricing support for e-commerce applications with global product catalogs where products are priced in different currencies and sold across multiple markets. Multi-currency pricing refers to presenting and working with prices in multiple currencies, enabling applications to query, filter, and rank products using prices expressed in the buyer's preferred currency. This enables filtering by price range in any currency and using converted prices in ranking, with automatic currency conversion when market-specific pricing is not available. + +## Overview + +The multi-currency pricing feature supports: + +- **Per-market pricing** - Define different prices for different markets on each product. +- **Keeping track of exchange rates** - An N×N tensor mapping of currency-to-currency exchange rates is stored in a "forex" document, and can be updated at any time. +- **Automatic currency conversion** - Fallback to a default market when no other market-specific price exists for the buyer's market. +- **Query-time filtering** - Filter products by price range in any currency. +- **Ranking integration** - Optional exposure of currency rates for use in ranking expressions (ranking on the computed price). + +The implementation consists of two key components: + +- **MultiCurrencyFilterSearcher** - A custom searcher that intercepts queries and dynamically filters products based on effective prices. +- **CachedForexRateService** - A background service that stores exchange rates from the forex document in-memory for faster look-ups. + +## Quick Start +This quick start walks through an end-to-end example of enabling multi-currency pricing in a Vespa application. +### Define Schemas + +Create two schemas: one to store the forex rates, and one for products. If you already have an existing product schema, you can reuse it as long as it contains the required fields described below. + +#### Forex Schema + +The forex schema stores currency exchange rates as a tensor. Add a `forex.sd` schema to your application defined as: + +```json +schema forex { + document forex { + field timestamp type long { + indexing: attribute | summary + } + + field rates type tensor(from{}, to{}) { + indexing: attribute | summary + } + } +} +``` + +#### Product Schema + +The product schema stores products with their seller currency and per-market prices. The `per_market_price` array contains price overrides for specific markets, with a `DEFAULT` market used as fallback. Every product must include a `DEFAULT` entry, and all `per_market_price.price` values are expressed in the document's `seller_currency`. + + + **Note:** Every per-market override is stored in the seller's native currency, so the searcher can convert buyer price windows instead of rewriting stored prices. + + + + **Important:**`fast-search` on the struct-fields is recommended. Without it, price filtering becomes significantly slower as the number of supported currencies grows. `rank: filter` can be added for further optimization, but this depends on the specific ranking setup ” see [rank: filter](/en/reference/schemas/schemas#filter) for details. + + +```js expandable +schema product { + document product { + + # Your existing fields above + + field seller_currency type string { + indexing: summary | attribute + } + + struct market_price { + field market type string {} + field price type double {} + } + + field per_market_price type array { + indexing: summary + summary: matched-elements-only + struct-field market { + indexing: attribute + attribute: fast-search + } + struct-field price { + indexing: attribute + attribute: fast-search + } + } + + # Your existing fields below + + } +} +``` + +### Configure Services + +Vespa only applies multi-currency filtering when the searcher and forex cache are wired into the container cluster. Queries must pass through a chain that includes `MultiCurrencyFilterSearcher`, and the `ForexRateRetriever` must read the global forex document via its own search chain. Add both chains and the two components to your container definition in `services.xml`: + +Inside your existing `` block, add the multi-currency chains and components: + +```xml + + + + + + + + + + +``` + +In the `` cluster ensure both document types are declared: + +``` + + + + +``` + +The retriever issues background queries through the `forex-cache` chain. If that chain is missing or restricts the wrong document type, the cache never reaches `READY` and queries fail with "ensure exactly one forex document exists". + +Putting it all together, a minimal `services.xml` might look like this: + +```xml + + + + + + + + + + + + + + + + + + + + + +``` + +### Feed Data + +#### Feed Forex Rates + +Feed a single forex document with ID `id:forex:forex::forex` containing all currency-to-currency exchange rates. Include identity rates (e.g., USD→USD = 1.0) to avoid missing-cell lookups. The `timestamp` field is required and must be updated with each rate change to ensure the cache picks up new rates. + + **Warning:** Exactly one global forex document must exist. If multiple documents are present, the retriever reports `INVALID_FOREX_DOCUMENTS` and the searcher returns error hits instructing you to keep a single forex document. + +```json +{ + "put": "id:forex:forex::forex", + "fields": { + "timestamp": 1757385600, + "rates": { + "cells": [ + {"address": {"from": "USD", "to": "USD"}, "value": 1.0}, + {"address": {"from": "USD", "to": "EUR"}, "value": 0.92}, + {"address": {"from": "USD", "to": "GBP"}, "value": 0.78}, + {"address": {"from": "USD", "to": "NOK"}, "value": 10.50}, + {"address": {"from": "EUR", "to": "USD"}, "value": 1.09}, + {"address": {"from": "EUR", "to": "EUR"}, "value": 1.0}, + {"address": {"from": "EUR", "to": "GBP"}, "value": 0.85} + ] + } + } +} +``` + +#### Feed Products + +Feed products with their seller currency and per-market prices. Always include a `DEFAULT` market entry as fallback. + +```json +{ + "put": "id:product:product::sku-100", + "fields": { + "seller_currency": "USD", + "per_market_price": [ + {"market": "DEFAULT", "price": 199.0}, + {"market": "EU", "price": 189.0}, + {"market": "UK", "price": 209.0}, + {"market": "NO", "price": 300.0} + ] + } +} +``` + + + **Note:** If your product schema already includes identifiers or descriptive fields (such as `product_id` or `product_name`), include them in the feed as usual. The example keeps only the required currency fields so it works with the minimal schema shown above. + + +### Query with Price Filtering + +Use the following query parameters to filter products by price range in a specific market and currency: + +| Parameter | Description | Example | +| :--- | :--- | :--- | +| `ecommerce.multicurrency.market` | Target market code | `NO`, `US`, `EU`, `NO-49`, `27` | +| `ecommerce.multicurrency.currency` | Target currency code | `NOK`, `USD`, `EUR` | +| `ecommerce.multicurrency.price-min` | Minimum price in target currency | `1000` | +| `ecommerce.multicurrency.price-max` | Maximum price in target currency | `1500` | +| `ecommerce.multicurrency.enrich` | Optional: expose forex rates as query tensor for ranking. Defaults to false | `true` or `false` | + +#### Example Query + +```sql +$ vespa query \ + 'yql=select * from product where true' \ + 'searchChain=multi-currency-filter' \ + 'ecommerce.multicurrency.market=NO' \ + 'ecommerce.multicurrency.currency=NOK' \ + 'ecommerce.multicurrency.price-min=1000' \ + 'ecommerce.multicurrency.price-max=1500' +``` + +This query returns all products whose effective price in NOK (Norwegian Krone) for the Norwegian market is between 1000 and 1500 NOK. The searcher will: + +1. Check if the product has a market-specific price for `NO` +2. If yes, use that price directly +3. If no, convert the product's `DEFAULT` market price from the seller currency to NOK using forex rates +4. Keep only products within the specified price range + +### Validation Rules + +The multi-currency searcher validates query parameters and returns an error if validation fails: + +- **Currency codes** must be exactly 3 letters (ISO-4217 format, e.g., `USD`, `EUR`, `NOK`) +- **Market codes** must be alphanumeric (e.g., `US`, `NO`, `EU`, `NO-47`, `13`) +- **Price values** must be valid numbers and non-negative +- **Price range**: `price-max` must be greater than or equal to `price-min` +- **Currency availability**: The requested currency must exist in the forex document + +If any parameter is missing or invalid, the searcher will either skip filtering (for format issues) or return an error result (for logical issues like invalid price ranges or unknown currencies). + + + **Note:** When filtering is skipped due to malformed inputs, the searcher acts as a no-op and the trace log records the reason (for example, "currency failed ISO-4217 validation; skipping filter"). Use [query tracing](/en/reference/api/query#tracing) to confirm whether the multi-currency filter actually ran. + + +### Updating Forex Rates + +Forex rates can be updated at any time by feeding a new version of the forex document with an updated `timestamp` field. The cache will automatically pick up the new rates on its next refresh cycle (typically within seconds). + +```bash +$ vespa feed <(echo '{ + "update": "id:forex:forex::forex", + "fields": { + "timestamp": {"assign": 1757472000}, + "rates": { + "assign": { + "cells": [...] + } + } + } +}') +``` + +## How It Works + +### Price Resolution Logic + +For each product, the effective price in the target currency is determined as follows: + +1. **Market-specific price:** If the product has a price entry for the requested market, use that price directly +2. **Currency conversion:** Otherwise, use the `DEFAULT` market price and convert it from the seller currency to the target currency using forex rates +3. **Price range filter:** Keep only products whose effective price falls within the specified min/max range + +### Forex Cache + +The `CachedForexRateService` component maintains an in-memory cache of exchange rates and refreshes them periodically from the forex document (`id:forex:forex::forex`). This ensures low-latency access to forex rates during query processing. + +#### Automatic Refresh + +The `ForexRateRetriever` component automatically refreshes forex rates every 10 seconds using a fixed schedule. This cadence (10s interval, 5s retry window, 1s per attempt) is hard-coded in the provided component and cannot be tuned at deployment time. Each refresh cycle: + +- Queries the forex document using the `forex-cache` search chain +- Validates the document has both `rates` (tensor) and `timestamp` (long) fields +- Only applies updates if the timestamp is newer than the cached version +- Retries within a 5-second budget if the first attempt fails + +#### Health States + +The forex service tracks its operational status with the following health states: + +| State | Description | Query Behavior | +| :--- | :--- | :--- | +| `READY` | Forex rates loaded and service is operational | Queries with multi-currency filtering work normally | +| `UNINITIALIZED` | No forex document has been loaded yet | Queries return error: "forex rate service not initialized" | +| `OUTAGE` | Refresh failed but stale data exists (cache stays ready for re-use once the retriever succeeds again) | Queries return error: "forex rate service temporarily unavailable (last refresh failed)" | +| `INVALID_FOREX_DOCUMENTS` | Multiple forex documents detected (expected exactly one) | Queries return error: "ensure exactly one forex document exists" | + +#### Error Handling + +When the service is not in `READY` state, queries with multi-currency filtering will: + +- Return an empty result with an appropriate error message +- Log detailed diagnostic information at appropriate trace levels +- Continue retrying background refresh attempts until successful + +### Performance + +Multi-currency price filtering is implemented as efficient query-time filter construction, not result-time evaluation. This means Vespa can use its indexes to find matching products without iterating through all documents. + +#### How Filtering Works + +When a query with multi-currency parameters is received, the searcher: + +1. **Pre-computes price ranges:** Converts the buyer's price range (e.g., 1000-1500 NOK) into equivalent ranges for every seller currency using cached forex rates. For example, if the forex cache has USD, EUR, and GBP, it computes what 1000-1500 NOK equals in each currency. +2. **Builds structured query filters:** Creates a query tree using Vespa's efficient query primitives: + - `SameElementItem` - Matches documents where market and price appear in the same array element + - `RangeItem` - Efficiently filters on numeric price ranges using indexes + - `WordItem` - Matches exact seller currency and market values + +3. **Injects filter into query tree:** Combines the price filter with the user's query, allowing Vespa's query execution engine to evaluate it efficiently using indexes. + +This approach has several performance benefits: + +- **No document iteration:** Vespa uses attribute indexes to quickly identify matching documents without fetching and evaluating all products +- **One-time conversion:** Currency conversion happens once during query construction, not for every product in the result set +- **Index-backed filtering:** Price range and market matching leverage Vespa's fast attribute lookups +- **Query optimization:** Vespa's query optimizer can reorder and optimize the combined query tree for efficient execution + +## Advanced Usage + +### Custom Field Configuration + +By default, the multi-currency components expect specific field names in your product schema. You can customize these field names using the `ecommerce-schema-wiring` configuration. See [Configuration Reference](#configuration) for all available parameters and their defaults. + +```html + + + + + + + seller_currency + per_market_price + market + price + + + DEFAULT + + + forexRates + + + + + + +``` + +### Using Forex Rates in Ranking + +When `ecommerce.multicurrency.enrich=true` is set, the searcher exposes the forex rates as a query tensor `query(forexRates)` that can be used in ranking expressions. The ranking profile should implement the same fallback logic as the searcher: check for market-specific prices first, then fall back to the `DEFAULT` market price, and convert to the buyer's currency. + +```js expandable +rank-profile price_ranking { + inputs { + query(forexRates) tensor(from{}, to{}) + query(buyer_currency) tensor(to{}) + query(buyer_market) tensor(market{}) + } + + function from_selector() { + expression: tensorFromLabels(attribute(seller_currency), from) + } + + function buyer_rate() { + expression: sum(query(forexRates) * from_selector() * query(buyer_currency), from, to) + } + + function price_tensor() { + expression: tensorFromStructs(attribute(per_market_price), market, price, double) + } + + function market_specific_price() { + expression: sum(price_tensor() * query(buyer_market), market) + } + + function default_price() { + expression: price_tensor(){market:'DEFAULT'} + } + + function effective_price_in_seller_currency() { + expression: if(market_specific_price() > 0, market_specific_price(), default_price()) + } + + function effective_price_in_buyer_currency() { + expression: effective_price_in_seller_currency() * buyer_rate() + } + + first-phase { + expression: -effective_price_in_buyer_currency() + } +} +``` + +#### Filter Parameters vs Ranking Inputs + +There are two distinct types of query parameters used together, and they must not be confused: + +| Type | Prefix | Format | Purpose | +| :--- | :--- | :--- | :--- | +| Filter parameters | `ecommerce.multicurrency.*` | Plain string or number | Tells the searcher which market, currency, and price range to filter on. Consumed server-side ” never reach the rank profile. | +| Ranking inputs | `ranking.features.query(...)` | One-hot tensor | Passed directly to the rank profile to drive scoring expressions. The searcher does not read or modify these. | + + + **Note:** The only exception is `enrich=true`, which causes the searcher to inject `query(forexRates)` from its in-memory cache ” because the client cannot know the current rates. `buyer_currency` and `buyer_market` are already known to the client so they are passed directly as ranking inputs, not via the searcher. + + +#### One-Hot Tensor Format + +Ranking inputs use one-hot encoded tensors. The format is `{{dimension:value}:1}` where `dimension` is the named dimension defined in the rank profile `inputs` block, and `value` is the label to select: + +- `query(buyer_currency)` has dimension `to` ” pass `{{to:NOK}:1}` +- `query(buyer_market)` has dimension `market` ” pass `{{market:NO}:1}` + +Both tensors are required by the example rank profile above. Omitting either causes its ranking expressions to produce zero for all documents. + +#### Complete Query with Filtering and Ranking + +The following example uses the `price_ranking` profile defined above with price filtering and both ranking input tensors: + +```bash +$ vespa query \ + 'yql=select * from product where true' \ + 'searchChain=multi-currency-filter' \ + 'ecommerce.multicurrency.market=NO' \ + 'ecommerce.multicurrency.currency=NOK' \ + 'ecommerce.multicurrency.price-min=1000' \ + 'ecommerce.multicurrency.price-max=1500' \ + 'ecommerce.multicurrency.enrich=true' \ + 'ranking.profile=price_ranking' \ + 'ranking.features.query(buyer_currency)={{to:NOK}:1}' \ + 'ranking.features.query(buyer_market)={{market:NO}:1}' +``` + +Key functions: + +- `tensorFromStructs` - Converts the `per_market_price` array to a tensor at ranking time +- `market_specific_price()` - Extracts price for the requested market if it exists +- `default_price()` - Gets the `DEFAULT` market price as fallback +- `effective_price_in_seller_currency()` - Selects market-specific price or falls back to `DEFAULT` +- `effective_price_in_buyer_currency()` - Converts the effective price using forex rates + +## Configuration Reference + +This section describes the [configuration parameters](/en/applications/configuring-components) used by the multi-currency components. All parameters are part of the `ecommerce-schema-wiring` config (`ai.vespa.ecommerce.common.ecommerce-schema-wiring`). + +| Parameter | Description | Type | Default | +| :--- | :--- | :--- | :--- | +| `productFields.sellerCurrency` | Field name for the product's seller currency. | `string` | `seller_currency` | +| `productFields.perMarketPriceArrayStruct` | Array field name containing per-market prices. | `string` | `per_market_price` | +| `productFields.marketStructField` | Struct field name for market code. | `string` | `market` | +| `productFields.priceStructField` | Struct field name for price value. | `string` | `price` | +| `defaults.market` | Default market identifier used as fallback when no market-specific price exists. | `string` | `DEFAULT` | +| `rankProfileInputs.forexRates` | Query tensor name for forex rates in ranking. Used when `enrich=true` to inject the forex tensor into the query. | `string` | `forexRates` | + +## Requirements + +- **Single global forex document:** Maintain one document with ID `id:forex:forex::forex` and mark it `global="true"`. Additional documents trigger the `INVALID_FOREX_DOCUMENTS` health state and queries fail. +- **Forex payload completeness:** Every feed/update must include the `rates` tensor for all buyer/seller pairs you filter on, identity rates (USD→USD, etc.), and a monotonically increasing `timestamp` (epoch seconds). +- **Product schema layout:** Products expose `seller_currency`, encode all `per_market_price.price` values in that seller currency, and include a `DEFAULT` market entry. +- **Container wiring:** Deploy the `multi-currency-filter` search chain and the `forex-cache` chain in your container cluster, along with the `ForexRateService` and `ForexRateRetriever` components. +- **Query parameters:** Multi-currency filtering only runs when the query supplies `market`, `currency`, `price-min`, and `price-max`. Missing or malformed parameters cause the searcher to skip filtering. + +## Recommended Practices + +- **Structure product ids/names as needed:** Keep your existing product fields (IDs, names, facets) and add the required currency fields alongside them. +- **Model asymmetric rates:** Store both A→B and B→A conversions explicitly so buyer→seller lookups stay accurate even when FX rates are not perfect inverses. +- **Plan update cadence:** Choose how often you feed forex data based on market volatility. The retriever polls every 10 seconds, so frequent feeds are reflected quickly. +- **Default chain selection:** Either set `searchChain=multi-currency-filter` on relevant queries or make it the default chain so multi-currency filtering is always applied when parameters are present. + +## See Also + +- [Saved Search Notifications](/en/modules/e-commerce/saved-search) +- [Using Features Together](/en/modules/e-commerce/using-features-together) +- [E-commerce tutorial](/en/learn/tutorials/e-commerce) +- [Searcher Development](/en/applications/searchers) +- [Tensor Guide](/en/ranking/tensor-user-guide) +- [tensorFromStructs - Convert struct arrays to tensors](/en/reference/ranking/rank-features#tensorFromStructs(attribute,key,value,type)) +- [Struct Fields in Schemas](/en/reference/schemas/schemas#struct-field) +- [Search Chains](/en/applications/searchers#search-chains) \ No newline at end of file diff --git a/mintlify-docs/en/modules/e-commerce/saved-search.mdx b/mintlify-docs/en/modules/e-commerce/saved-search.mdx new file mode 100644 index 0000000000..4de0f9e70c --- /dev/null +++ b/mintlify-docs/en/modules/e-commerce/saved-search.mdx @@ -0,0 +1,350 @@ +--- +title: Saved Search Notifications +--- + + + +**Important:** **Experimental feature under active development.** Saved Search Notifications is in early access and is being actively developed. Schemas, configuration, the webhook payload format, and other public APIs may change in backwards-incompatible ways without notice. Do not rely on this feature for production-critical workloads, and expect to migrate as the feature evolves. Feedback is welcome - please reach out to [Vespa Support](https://vespa.ai/support/). + + +Vespa for e-commerce includes a module for storing queries in Vespa ("searches") and issuing notifications when a new or updated document matches any saved searches. A typical use case in e-commerce is letting users store queries on products using filters on keywords, price, location etc. and sending them a notification when a new product matches their query. + +## Overview + +The saved search notifications feature supports: + +- **Storing predicate queries** - Saved searches contain arbitrary boolean expressions over a set of string attributes and numerical ranges. See [Predicate Fields](/en/schemas/predicate-fields). +- **Schema wiring configuration** - Wirings between the saved search attributes and the document fields can be configured by the application. +- **Webhook notifications** - A match between a new or updated document and a set of saved searches can be sent to a HTTP endpoint in a JSON format. +- **Separate document processing** - Separate routing to the saved search component allows processing of saved searches without disrupting normal feed operations. + +## Quick Start + +A minimal setup for demonstrating saved search notification capabilities is given in this section. We will develop a small shopping use-case example with a few saved search attributes. + +### Define Schemas + +Create two schemas: one for products and one for storing the saved searches. + +#### Product Schema + +We create a minimal document type representing a product for sale. Each of the three fields will correspond to a searchable attribute in the saved searches. + +```js expandable +schema product { + document product { + + # Other fields + + field price type int { + indexing: attribute + } + + field category type string { + indexing: attribute + } + + field condition type string { + indexing: attribute + } + } + + # rank-profiles etc. +} +``` + +#### Saved Search Schema + +The predicate field will contain the entire search expression used to match products. + +```js expandable +schema saved_search { + document saved_search { + field filters type predicate { + indexing: attribute + index { + arity: 2 # Mandatory + # Range of values the expressions are expected to operate on. + # Better performance if these are smaller + lower-bound: 3 + upper-bound: 500 + + dense-posting-list-threshold: 0.25 + } + } + } +} +``` + +### Configure Services + +A minimal `services.xml` configuring the saved search component can look like this: + +```xml expandable + + + + + + + + + + + WEBHOOK + + http://localhost:8000/notification + + + + product + saved_search + filters + 100 + + + + category + category + true + + + condition + condition + false + + + + + price + price + true + + + + + + + + + 2 + + + + + + + + + + + + + + + + + + +``` + +### Feed Data + +#### Feed saved searches + +To test the functionality, feed two saved search documents: + +```json +[ + {"put": "id:saved_search:saved_search::search1", "fields": { + "filters": "price in [20..100] and category in [Sports, Books]" + }}, + {"put": "id:saved_search:saved_search::search2", "fields": { + "filters": "price in [200..487] and category in [Electronics]" + }} +] +``` + +#### Feed a product to the notification route + +Assume a new product is available, with the following schema: + +```json +{"put": "id:saved_search:product::product1", "fields":{"category": "Sports", "price": 50}} +``` + +To enable notifications when feeding this product, feed it to the `notification-route`: + +```bash +$ vespa feed --route notification-route product.jsonl +``` + +Assuming everything is set up correctly, it should match `id:saved_search:saved_search::search1` but not `search2`. If a server is receiving requests at the endpoint specified in the `URL` parameter of the `webhook` configuration, you should see a request with a JSON body representing the matched pair. + + + **Warning:** The `SavedSearchDocumentProcessor` acts as a **sink** for incoming documents. That is, `Put` and `Update` operations sent to that document processor will not propagate down to the content nodes, effectively discarding the operations. This is why a `routingtable` is specified in the example - documents going to the route `notification-route` will "fork", with one path going to the content cluster and one path to the saved search component. + + +## Notification kinds + +### Webhook + +The webhook notification kind will send a request to a specified URL for each document that matches a set of saved searches. It requires an external application to provide the handling of such requests. + +If the configuration parameter `notification.kind == WEBHOOK`, all configuration parameters prefixed with `notification.webhook` will take effect. The requests from the saved search application will be `POST`-requests with a JSON body: + +```json highlight= {7} +{ + "id": "", + "timestamp": "", + "matched_documents": [ + "", + "", + ... + ] +} +``` + +#### Security + +In most cases, the Webhook endpoint handling the notifications is (and should be) protected in some way. The currently supported way to send authorized requests to the webhook endpoint is by combining the [Vespa secret store](/en/security/secret-store) with the `notification.webhook.headers[].secret` config parameter. Assume we want to send notifications to `https://my.webhook.com/notification`, and that the api requires the following header: + +```text +Authorization: Bearer TOKEN +``` + +to be present in all requests. To enable our application to use this, first create the secret in Vespa Cloud and let it contain the **full** value of the header: `Bearer TOKEN`, replacing `TOKEN` with the actual token. + +Next, add the secret to the application in `services.xml`: + +```xml expandable + + ... + + + +``` + +Finally, configure the `SavedSearchDocumentProcessor` to add a header with this secret value to all notification requests: + +```xml expandable +... + + + + WEBHOOK + + + + Authorization + + myApiToken + + + + + + +``` + +Webhook notifications are sent once without automatic retry. Delivery failures are recorded in the Vespa log. + +### Vespa Schema + +For a simpler way to test saved search notification, a method for storing the notifications within the Vespa application is provided. This method represents each notification between a pair of a product and a saved search using a dedicated Vespa document type. It is recommended for testing purposes only. + +If the configuration parameter `notification.kind == VESPA_SCHEMA`, all configuration parameters prefixed with `notification.vespaSchema` will take effect. A minimal working example of this notification kind is given below. + +#### Notification example + +Define a document type for storing the notifications, for example `notification.sd`: + +```js +schema notification { + document notification { + field product_id type string { + indexing: attribute | summary + } + + field saved_search_id type string { + indexing: attribute | summary + } + + field timestamp type long { + indexing: attribute | summary + } + } +} +``` + +Add the document type to the application: + +```xml + + + ... + + + +``` + +Configure the schema wirings of the `SavedSearchDocumentProcessor`: + +```xml expandable + + + + + + VESPA_SCHEMA + + notification + saved_search + product_id + saved_search_id + timestamp + + + + +``` +Now notifications can be inspected by using `vespa visit` or `vespa query` with the appropriate parameters. +## Configuration reference + +This section describes the possible [configuration parameters](/en/applications/configuring-components) used by the document processor. + +| Parameter | Description | Type | Default value | +| :--- | :--- | :--- | :--- | +| `notification.kind` | Method to use for sending notifications. | `enum {WEBHOOK, DUMMY, VESPA_SCHEMA}` | `DUMMY` | +| `notification.webhook.URL` | URL to send notification requests. | `string` | | +| `notification.webhook.connectionPoolSize` | Number of HTTP client threads to use in the container cluster. | `int` | `20` | +| `notification.webhook.headers[].key` | Key of a header to add to all webhook requests. | `string` | | +| `notification.webhook.headers[].value` | Value of a header to add to all webhook requests. | `string` | | +| `notification.webhook.headers[].secret` | Use a secret from Vespa secret store instead of the value provided in `.value`. The value provided here should match the name of a secret specified with a `secrets` tag in `services.xml`. | `string` | | +| `notification.vespaSchema.documentType` | Name of the Vespa document type to use for storing notifications. This document type has to be defined in the application. | `string` | saved\_search\_notification | +| `notification.vespaSchema.namespace` | Namespace to use for creating document IDs for the notification documents. | `string` | saved\_search | +| `notification.vespaSchema.fieldPathProductId` | Fieldpath for storing the product id in the notification documents. | `string` | product\_id | +| `notification.vespaSchema.fieldPathSavedSearchId` | Fieldpath for storing saved search id in the notification documents. | `string` | saved\_search\_id | +| `notification.vespaSchema.fieldPathTimestamp` | Fieldpath for storing timestamps in the notification documents. | `string` | timestamp | +| `productDocumentType` | The name of the document type that can trigger notifications, e.g. `product`. | `string` | product | +| `savedSearchDocumentType` | The name of the document type storing saved searches, e.g. `saved_search`. | `string` | saved\_search | +| `predicateFieldName` | The name of the field in `savedSearchDocumentType` storing the predicate query. | `string` | filters | +| `savedSearchNumHits` | Maximum number of saved searches that can match per product update. Matches beyond this limit are silently dropped. Higher values increase work per update. | `int` | 100 | +| `regularAttributes[].predicateName` | The name of a regular (string) attribute to be used in the saved search predicate field. | `string` | | +| `regularAttributes[].fieldPath` | The field in the `productDocumentType` to be matched with this attribute. This field should be of type `string`. | `string` | | +| `regularAttributes[].required` | Whether documents are required to specify this attribute. | `bool` | `false` | +| `rangeAttributes[].predicateName` | The name of a numerical range attribute to be used in the saved search predicate field. | `string` | | +| `rangeAttributes[].fieldPath` | The field in the `productDocumentType` to be matched with this attribute. This field should be of a numeric type, e.g. `int`. | `string` | | +| `rangeAttributes[].required` | Whether documents are required to specify this attribute. | `bool` | `false` | + +## See Also + +- [Predicate Fields](/en/schemas/predicate-fields) +- [Document Processors](/en/applications/document-processors) +- [Configuring Components](/en/applications/configuring-components) +- [Secret Store](/en/security/secret-store) +- [Multi-Currency Pricing](/en/modules/e-commerce/multi-currency-filtering) +- [Using Features Together](/en/modules/e-commerce/using-features-together) \ No newline at end of file diff --git a/mintlify-docs/en/modules/e-commerce/using-features-together.mdx b/mintlify-docs/en/modules/e-commerce/using-features-together.mdx new file mode 100644 index 0000000000..b8fe35b7b0 --- /dev/null +++ b/mintlify-docs/en/modules/e-commerce/using-features-together.mdx @@ -0,0 +1,234 @@ +--- +title: Using Features Together +--- + + +**Important:** Some features described on this page (notably [Saved Search Notifications](/en/modules/e-commerce/saved-search)) are experimental and under active development. APIs and configuration may change in backwards-incompatible ways - see the individual feature pages for details. + + +The e-commerce features are designed as standalone components that can be composed together. This page covers the configuration needed when features interact at feed time or query time. Read the individual feature pages before this one: + +- [Multi-Currency Pricing](/en/modules/e-commerce/multi-currency-filtering) +- [Saved Search Notifications](/en/modules/e-commerce/saved-search) + +The schemas, services configuration, and other examples on this page are illustrative. Adapt field names, document types, and configuration values to match your application. + +## Saved Search with Multi-Currency + +When [saved search notifications](/en/modules/e-commerce/saved-search) and [multi-currency pricing](/en/modules/e-commerce/multi-currency-filtering) are used together, the saved search document processor generates per-currency price features at feed time. This enables saved searches with price filters to match products regardless of the seller's currency. + +### How It Works + +Without multi-currency, a saved search like `price in [100..200]` matches against the product's single `price` field. With multi-currency enabled, the document processor: + +1. Reads the product's `per_market_price` entries and `seller_currency` +2. Converts each price to every known currency using the forex rates +3. Scales the converted prices to integers by multiplying with `priceScaleFactor` (e.g., a factor of 100 preserves two decimal places). Predicate ranges require integer values, so this step is needed to retain precision. +4. Generates predicate range features named `{featurePrefix}_{currency}_{market}` (e.g., `price_NOK_DEFAULT`, `price_EUR_NO`) +5. Feeds these features alongside the regular attributes into the predicate query + +A saved search filtering on NOK prices can then use `price_NOK_DEFAULT in [1000..1500]` as its predicate expression, and it will match products originally priced in any currency. + +### Predicate Upper Bound + +The predicate field's `upper-bound` in the saved search schema must be large enough to cover the highest possible scaled converted price. Because prices are both currency-converted and scaled by `priceScaleFactor`, the resulting values can be significantly larger than the original prices. + + + **Warning:** If any single range feature in a predicate query exceeds the `upper-bound`, Vespa rejects the **entire** predicate query for that document, not just the out-of-range feature. This means the product will not match any saved searches at all. + + +For example, with a scale factor of 100 and a EUR-to-SEK rate of \~11.76: a product priced at 200 EUR produces `price_SEK_DEFAULT = 200 × 11.76 × 100 = 235,294`. The `upper-bound` must be at least 235,294 for this to work. + +Choose an upper bound that covers your highest-priced products converted to the weakest target currency, multiplied by the scale factor. A generous margin is recommended to accommodate price and exchange rate fluctuations. + +### Schema Setup + +The following are minimal examples showing the fields required from each feature. Your schemas will likely have additional fields and configuration. + +#### Product Schema + +The product schema must include fields from both features: the saved search attributes (`price`, `category`, etc.) and the multi-currency fields (`seller_currency`, `per_market_price`). + +```js expandable +schema product { + document product { + + # Saved search attributes + field price type int { + indexing: attribute + } + + field category type string { + indexing: attribute + } + + # Multi-currency fields + field seller_currency type string { + indexing: summary | attribute + } + + struct market_price { + field market type string {} + field price type double {} + } + + field per_market_price type array { + indexing: summary + summary: matched-elements-only + struct-field market { + indexing: attribute + } + struct-field price { + indexing: attribute + } + } + } +} +``` + +#### Saved Search Schema + +Set the `upper-bound` high enough to cover scaled converted prices. See [Predicate Upper Bound](#predicate-upper-bound) above. + +```js expandable +schema saved_search { + document saved_search { + field filters type predicate { + indexing: attribute + index { + arity: 2 + lower-bound: 3 + upper-bound: 250000 + + dense-posting-list-threshold: 0.25 + } + } + } +} +``` + +### Services Configuration + +Both features are configured in the same container cluster. The example below shows a combined setup - the key addition is the `multicurrency` block inside the `ecommerce-schema-wiring` config of the document processor: + +```xml expandable + + + + + + + + + + + + + + + + + + + + + WEBHOOK + + http://my-webhook-endpoint:8000/notification + + + + + seller_currency + per_market_price + market + price + + + product + saved_search + filters + + + + category + category + false + + + + + price + price + false + + + + + + true + price + 100 + + + + + + + + + + + + + + + + + + + + + + + + +``` + +### Multi-currency Configuration Reference + +These parameters are part of the `ecommerce-schema-wiring` config and only apply when the saved search document processor is used together with multi-currency: + +| Parameter | Description | Type | Default | +| :--- | :--- | :--- | :--- | +| `multicurrency.enabled` | Enable generation of per-currency price features at feed time. | `bool` | `false` | +| `multicurrency.featurePrefix` | Prefix for the generated predicate range features. A feature is named `{prefix}_{currency}_{market}`. | `string` | `price` | +| `multicurrency.priceScaleFactor` | Integer multiplier applied to converted prices before feeding as predicate range features. Predicate ranges require integer values, so this preserves decimal precision. A factor of 100 preserves two decimal places. | `int` | `100` | + + + **Note:** The `productFields` config block is shared between both features. When combining, use the same field names in the searcher and document processor configurations. + + +### Feeding Workflow + +When both features are active, the feeding workflow is: + +1. Feed the forex document (`id:forex:forex::forex`) and wait for the `CachedForexRateService` to reach `READY` state +2. Feed saved search documents to the content cluster +3. Feed products to the `notification-route` - the document processor will generate multi-currency predicate features and match against saved searches + + + **Warning:** The forex rates must be loaded before feeding products to the notification route. If the `ForexRateService` is not ready, the document processor cannot generate currency-converted price features. + + +## See Also + +- [Saved Search Notifications](/en/modules/e-commerce/saved-search) +- [Multi-Currency Pricing](/en/modules/e-commerce/multi-currency-filtering) +- [Predicate Fields](/en/schemas/predicate-fields) +- [Configuring Components](/en/applications/configuring-components) diff --git a/mintlify-docs/en/operations/access-logging.mdx b/mintlify-docs/en/operations/access-logging.mdx new file mode 100644 index 0000000000..123ef3b5b1 --- /dev/null +++ b/mintlify-docs/en/operations/access-logging.mdx @@ -0,0 +1,265 @@ +--- +title: "Access Logging" +--- + +The Vespa access log format allows the logs to be processed by a number of available tools handling JSON based (log) files. With the ability to add custom key/value pairs to the log from any Searcher, you can easily track the decisions done by container components for given requests. + +## Vespa Access Log Format + +In the Vespa access log, each log event is logged as a JSON object on a single line. The log format defines a list of fields that can be logged with every request. In addition to these fields, [custom key/value pairs](#logging-key-value-pairs-to-the-json-access-log-from-searchers) can be logged via Searcher code. Pre-defined fields: + +| Name | Type | Description | Always present | +| --- | --- | --- | --- | +| ip | string | The IP address request came from | yes | +| time | number | UNIX timestamp with millisecond decimal precision (e.g. 1477828938.123) when request is received | yes | +| duration | number | The duration of the request in seconds with millisecond decimal precision (e.g. 0.123) | yes | +| responsesize | number | The size of the response in bytes | yes | +| code | number | The HTTP status code returned | yes | +| method | string | The HTTP method used (e.g. 'GET') | yes | +| uri | string | The request URI from path and beyond (e.g. '/search?query=test') | yes | +| version | string | The HTTP version (e.g. 'HTTP/1.1') | yes | +| agent | string | The user agent specified in the request | yes | +| host | string | The host header provided in the request | yes | +| scheme | string | The scheme of the request | yes | +| port | number | The IP port number of the interface on which the request was received | yes | +| remoteaddr | string | The IP address of the [remote client](#logging-remote-address-port) if specified in HTTP header | no | +| remoteport | string | The port used from the [remote client](#logging-remote-address-port) if specified in HTTP header | no | +| peeraddr | string | Address of immediate client making request if different from *remoteaddr* | no | +| peerport | string | Port used by immediate client making request if different from *remoteport* | no | +| user-principal | string | The name of the authenticated user (java.security.Principal.getName()) if principal is set | no | +| ssl-principal | string | The name of the x500 principal if client is authenticated through SSL/TLS | no | +| search | object | Object holding search specific fields | no | +| search.totalhits | number | The total number of hits for the query | no | +| search.hits | number | The hits returned in this specific response | no | +| search.coverage | object | Object holding [query coverage information](/en/performance/graceful-degradation) similar to that returned in result set. | no | +| connection | string | Reference to the connection log entry. See [Connection log](#connection-log) | no | +| attributes | object | Object holding [custom key/value pairs](#logging-key-value-pairs-to-the-json-access-log-from-searchers) logged in searcher. | no | + + +**Note:** + +IP addresses can be both IPv4 addresses in standard dotted format (e.g. 127.0.0.1) or IPv6 addresses in standard form with leading zeros omitted (e.g. 2222:1111:123:1234:0:0:0:4321). + + +An example log line will look like this (here, pretty-printed): + +```json +{ + "ip": "152.200.54.243", + "time": 920880005.023, + "duration": 0.122, + "responsesize": 9875, + "code": 200, + "method": "GET", + "uri": "/search?query=test¶m=value", + "version": "HTTP/1.1", + "agent": "Mozilla/4.05 [en] (Win95; I)", + "host": "localhost", + "search": { + "totalhits": 1234, + "hits": 0, + "coverage": { + "coverage": 98, + "documents": 100, + "degraded": { + "non-ideal-state": true + } + } + } +} +``` + + +**Note:** + +The log format is extendable by design such that the order of the fields can be changed and new fields can be added between minor versions. Make sure any programmatic log handling is using a proper JSON processor. + + +Example: Decompress, pretty-print, with human-readable timestamps: + +```bash +$ jq '. + {iso8601date:(.time | todateiso8601)}' \ + <(unzstd -c /opt/vespa/logs/vespa/access/JsonAccessLog.default.20210601010000.zst) +``` + +### Logging Remote Address/Port + +In some cases when a request passes through an intermediate service, this service may add HTTP headers indicating the IP address and port of the real origin client. These values are logged as *remoteaddr* and *remoteport* respectively. Vespa will log the contents in any of the following HTTP request headers as *remoteaddr*: *X-Forwarded-For*, *Y-RA*, *YahooRemoteIP* or *Client-IP*. If more than one of these headers are present, the precedence is in the order listed here, i.e. *X-Forwarded-For* takes precedence over *Y-RA*. The contents of the *Y-RP* HTTP request header will be logged as *remoteport*. + +If the remote address or -port differs from those initiating the HTTP request, the address and port for the immediate client making the request are logged as *peeraddress* and *peerport* respectively. + +## Configuring Logging + +For details on the access logging configuration see [accesslog in the container](/en/reference/applications/services/container#accesslog) element in *services.xml*. + +Key configuration options include: + +- **fileNamePattern**: Pattern for log file names with time variable support +- **rotationInterval**: Time-based rotation schedule (minutes since midnight) +- **rotationSize**: Size-based rotation threshold in bytes (0 = disabled) +- **rotationScheme**: Either 'sequence' or 'date' +- **compressionFormat**: GZIP or ZSTD compression for rotated files + +### Logging Request Content + +Vespa supports logging of request content for specific URI paths. This is useful for inspecting query content of search POST requests or document operations of Document v1 POST/PUT requests. The request content is logged as a base64-encoded string in the JSON access log. + +To configure request content logging, use the [request-content](/en/reference/applications/services/container#request-content) element in the accesslog configuration in *services.xml*. + +Here is an example of how the request content appears in the JSON access log: + +```json +{ + ... + "method": "POST", + "uri": "/search", + ..., + "request-content": { + "type": "application/json; charset=utf-8", + "length": 12345, + "body": "" + } +} +``` + +### File name pattern + +The file name pattern is expanded using the time when the file is created. The following parts in the file name are expanded: + +| Field | Format | Meaning | Example | +| --- | --- | --- | --- | +| `%`Y` | YYYY | Year | 2003 | +| `%`m` | MM | Month, numeric | 08 | +| `%`x` | MMM | Month, textual | Aug | +| `%`d` | dd | Date | 25 | +| `%`H` | HH | Hour | 14 | +| `%`M` | mm | Minute | 30 | +| `%`S` | ss | Seconds | 35 | +| `%`s` | SSS | Milliseconds | 123 | +| `%`Z` | Z | Time zone | \-0400 | +| `%`T` | Long | System.currentTimeMillis | 1349333576093 | +| `%`%` `| `%``| Escape percentage | % | + +## Log rotation + +Apache httpd style log *rotation* can be configured by setting the *rotationScheme*. There's two alternatives for the rotationScheme, sequence and date. Rotation can be triggered by time intervals using *rotationInterval* and/or by file size using *rotationSize*. + +### Sequence rotation scheme + +The *fileNamePattern* is used for the active log file name (which in this case will often be a constant string). At rotation, this file is given the name fileNamePattern.N where N is 1 + the largest integer found by extracting the integers from all files ending by .`` in the same directory + +```xml + +``` + +### Date rotation scheme + +The *fileNamePattern* is used for the active log file name here too, but the log files are not renamed at rotation. Instead, you must specify a time-dependent fileNamePattern so that each time a new log file is created, the name is unique. In addition, a symlink is created pointing to the active log file. The name of the symlink is specified using *symlinkName*. + +```xml + +``` + +### Rotation interval + +The time of rotation is controlled by setting *rotationInterval*: + +```xml + +``` + +The rotationInterval is a list of numbers specifying when to do rotation. Each element represents the number of minutes since midnight. Ending the list with '...' means continuing the [arithmetic progression](https://en.wikipedia.org/wiki/Arithmetic_progression) defined by the two last numbers for the rest of the day. E.g. "0 100 240 480 ..." is expanded to "0 100 240 480 720 960 1200" + +### Log retention + +Access logs are rotated, but not deleted by Vespa processes. It is up to the application owner to take care of archiving of access logs. + +## Logging Key/Value pairs to the JSON Access Log from Searchers + +To add a key/value pair to the access log from a searcher, use + +```bash +query/result.getContext(true).logValue(key,value) +``` + +Such key/value pairs may be added from any thread participating in handling the query without incurring synchronization overhead. + +If the same key is logged multiple times, the values written will be included in the log as an array of strings rather than a single string value. + +The key/value pairs are added to the *attributes* object in the log. + +An example log line will then look something like this: + +```json +{"ip":"152.200.54.243","time":920880005.023,"duration":0.122,"responsesize":9875,"code":200,"method":"GET","uri":"/search?query=test¶m=value","version":"HTTP/1.1","agent":"Mozilla/4.05 [en] (Win95; I)","host":"localhost","search":{"totalhits":1234,"hits":0},"attributes":{"singlevalue":"value1","multivalue":["value2","value3"]}} +``` + +A pretty print version of the same example: + +```json +{ + "ip": "152.200.54.243", + "time": 920880005.023, + "duration": 0.122, + "responsesize": 9875, + "code": 200, + "method": "GET", + "uri": "/search?query=test¶m=value", + "version": "HTTP/1.1", + "agent": "Mozilla/4.05 [en] (Win95; I)", + "host": "localhost", + "search": { + "totalhits": 1234, + "hits": 0 + }, + "attributes": { + "singlevalue": "value1", + "multivalue": [ + "value2", + "value3" + ] + } +} +``` + +## Connection log + +In addition to the access log, one entry per connection is written to the connection log. This entry is written on connection close. Available fields: + +| Name | Type | Description | Always present | +| :--- | :--- | :--- | :--- | +| id | string | Unique ID of the connection, referenced from access log. | yes | +| timestamp | number | Timestamp (ISO8601 format) when the connection was opened | yes | +| duration | number | The duration of the request in seconds with millisecond decimal precision (e.g. 0.123) | yes | +| peerAddress | string | IP address used by immediate client making request | yes | +| peerPort | number | Port used by immediate client making request | yes | +| localAddress | string | The local IP address the request was received on | yes | +| localPort | number | The local port the request was received on | yes | +| remoteAddress | string | Original client ip, if proxy protocol enabled | no | +| remotePort | number | Original client port, if proxy protocol enabled | no | +| httpBytesReceived | number | Number of HTTP bytes sent over the connection | no | +| httpBytesSent | number | Number of HTTP bytes received over the connection | no | +| requests | number | Number of requests sent by the client | no | +| responses | number | Number of responses sent to the client | no | +| ssl | object | Detailed information on ssl connection | no | + +## SSL information + +| Name | Type | Description | Always present | +| :--- | :--- | :--- | :--- | +| clientSubject | string | Client certificate subject | no | +| clientNotBefore | string | Client certificate valid from | no | +| clientNotAfter | string | Client certificate valid to | no | +| sessionId | string | SSL session id | no | +| protocol | string | SSL protocol | no | +| cipherSuite | string | Name of session cipher suite | no | +| sniServerName | string | SNI server name | no | \ No newline at end of file diff --git a/mintlify-docs/en/operations/archive/archive-guide-aws.mdx b/mintlify-docs/en/operations/archive/archive-guide-aws.mdx new file mode 100644 index 0000000000..11ab754b4f --- /dev/null +++ b/mintlify-docs/en/operations/archive/archive-guide-aws.mdx @@ -0,0 +1,118 @@ +--- +title: "AWS Archive guide" +--- + + +**Note:** + +This guide is for tenants using Vespa Cloud. If your tenant uses **Enclave**, the archive buckets are in your own cloud account and you can access them directly — see the [Enclave archive guide](/en/operations/enclave/archive) instead. + + +Vespa Cloud exports log data, heap dumps, and Java Flight Recorder sessions to buckets in AWS S3. This guide explains how to access this data. Access to the data must happen through an AWS account controlled by the tenant. Data traffic to access this data is charged to this AWS account. + +These resources are needed to get started: + +- An AWS account +- An IAM Role in that AWS account +- The [AWS command line client](https://aws.amazon.com/cli/) + +Access is configured through the Vespa Cloud Console in the tenant account screen. Choose the "archive" tab, then expand the **AWS** section. + +## Register IAM Role + + +![AWS archive accordion](/assets/img/archive-aws-expanded-dropdown.png) + + +Click **Configure access to your cloud archive** to open the configuration dialog. + +## Configure access + + +![AWS configure access](/assets/img/archive-aws-configure-access.png) + + +In **Step 1**, enter the ARN of the IAM Role that should have access to the S3 buckets (e.g. `arn:aws:iam::123456789012:role/my-iam-role`) and click **Save**. Vespa Cloud will then grant access to that role on the S3 buckets. + +In **Step 2**, a policy is generated that must be attached to your IAM Role. Copy the policy and attach it to the IAM Role in your AWS account. AWS requires permissions to be registered in both Vespa Cloud's AWS account (step 1) and the tenant's AWS account (step 2). Make your own equivalent policy should you have other requirements. For more information, see the [AWS documentation](https://docs.aws.amazon.com/IAM/latest/UserGuide/access_policies_manage-attach-detach.html). + +## Access files using AWS CLI + + +![AWS download logs](/assets/img/archive-aws-access-logs.png) + + +Once permissions have been granted, the IAM Role can access the contents of the archive buckets. Any AWS S3 client will work, but the AWS command line client is an easy tool to use. The archive page will list all buckets where data is stored, typically one bucket per zone the tenant has applications. + +The `--request-payer=requester` parameter is mandatory to make sure network traffic is charged to the correct AWS account. + +Refer to [access-log-lambda](https://github.com/vespa-cloud/vespa-documentation-search/blob/main/access-log-lambda/README.md) for how to install and use `aws cli`, which can be used to download logs as in the illustration, or e.g. list objects: + +```bash +$ aws s3 ls --profile=archive --request-payer=requester \ + s3://vespa-cloud-data-prod.aws-us-east-1c-9eb633/vespa-team/ + + PRE album-rec-searcher/ + PRE cord-19/ + PRE vespacloud-docsearch/ +``` + +In the example above, the S3 bucket name is *vespa-cloud-data-prod.aws-us-east-1c-9eb633* and the tenant name is *vespa-team* (for that particular prod zone). Archiving is per tenant, and a log file is normally stored with a key like: + +```bash +/vespa-team/vespacloud-docsearch/default/h2946a/logs/access/JsonAccessLog.default.20210629100001.zst +``` + +The URI to this object is hence: + +```bash +s3://vespa-cloud-data-prod.aws-us-east-1c-9eb633/vespa-team/vespacloud-docsearch/default/h2946a/logs/access/JsonAccessLog.default.20210629100001.zst +``` + +Objects are exported once generated - access log files are compressed and exported at least once per hour. + +If you are having problems accessing the files, please run + +```bash +aws sts get-caller-identity +``` + +to verify that you are correctly assuming the role which has been granted access. + +## Lambda processing + +When processing logs using a lambda function, write a minimal function to list objects, to sort out access / keys / roles: + +```js expandable +const aws = require("aws-sdk"); +const s3 = new aws.S3({ apiVersion: "2006-03-01" }); + +const findRelevantKeys = ({ Bucket, Prefix }) => { + console.log(`Finding relevant keys in bucket ${Bucket}`); + return s3 + .listObjectsV2({ Bucket: Bucket, Prefix: Prefix, RequestPayer: "requester" }) + .promise() + .then((res) => + res.Contents.map((content) => ({ Bucket, Key: content.Key })) + ) + .catch((err) => Error(err)); +}; + +exports.handler = async (event, context) => { + const options = { Bucket: "vespa-cloud-data-prod.aws-us-east-1c-9eb633", Prefix: "MY-TENANT-NAME/" }; + return findRelevantKeys(options) + .then((res) => { + console.log("response: ", res); + return { statusCode: 200 }; + }) + .catch((err) => ({ statusCode: 500, message: err })); +}; +``` + + +**Note:** + +Always set `RequestPayer: "requester"` to access the objects - transfer cost is assigned to the requester. + + +Once the above lists the log files from S3, review [access-log-lambda](https://github.com/vespa-cloud/vespa-documentation-search/blob/main/access-log-lambda/README.md) for how to write a function to decompress and handle the log data. \ No newline at end of file diff --git a/mintlify-docs/en/operations/archive/archive-guide-gcp.mdx b/mintlify-docs/en/operations/archive/archive-guide-gcp.mdx new file mode 100644 index 0000000000..253966a5d1 --- /dev/null +++ b/mintlify-docs/en/operations/archive/archive-guide-gcp.mdx @@ -0,0 +1,81 @@ +--- +title: "GCP Archive guide" +sidebarTitle: "Archive Guide GCP" +--- + + +**Note:** + +This guide is for tenants using Vespa Cloud. If your tenant uses **Enclave**, the archive buckets are in your own cloud account and you can access them directly — see the [Enclave archive guide](/en/operations/enclave/archive) instead. + + +Vespa Cloud exports log data, heap dumps, and Java Flight Recorder sessions to buckets in Google Cloud Storage. This guide explains how to access this data. Access to the data is through a GCP project controlled by the tenant. Data traffic to access this data is charged to this GCP project. + +These resources are needed to get started: + +- A GCP project +- A Google user account +- The [gcloud command line interface](https://cloud.google.com/sdk/docs/install) + +Access is configured through the Vespa Cloud Console in the tenant account screen. Choose the "archive" tab, then expand the **GCP** section. + +## Register IAM principal + + +![GCP archive accordion](/assets/img/archive-gcp-expanded-dropdown.png) + + +Click **Configure access to your cloud archive** to open the configuration dialog. + +## Grant access to Vespa Cloud resources + + +![GCP configure access](/assets/img/archive-gcp-configure-access.png) + + +Enter a [principal](https://cloud.google.com/iam/docs/overview) with a supported prefix and click **Save**. Vespa Cloud will then grant access to that principal on the Cloud Storage buckets. + +Supported principal prefixes: + +- `user:` — Google Account, e.g. `user:email@example.com` +- `serviceAccount:` — Service account, e.g. `serviceAccount:my-app@project.iam.gserviceaccount.com` +- `group:` — Google group, e.g. `group:admins@example.com` +- `domain:` — Google Workspace or Cloud Identity domain, e.g. `domain:example.com` + +## Access files using Gcloud CLI + + +![GCP download logs](/assets/img/archive-gcp-access-logs.png) + + +Once permissions have been granted, the GCP member can access the contents of the archive buckets. Any Cloud Storage client will work, but the `gsutil` command line client is an easy tool to use. The archive page will list all buckets where data is stored, typically one bucket per zone the tenant has applications. + +The `-u user-project` parameter is mandatory to make sure network traffic is charged to the correct GCP project. + +```bash +$ gsutil -u my-project ls \ + gs://vespa-cloud-data-prod.gcp-us-central1-f-73770f/vespa-team/ + gs://vespa-cloud-data-prod.gcp-us-central1-f-73770f/vespa-team/album-rec-searcher/ + gs://vespa-cloud-data-prod.gcp-us-central1-f-73770f/vespa-team/cord-19/ + gs://vespa-cloud-data-prod.gcp-us-central1-f-73770f/vespa-team/vespacloud-docsearch/ +``` + +In the example above, the bucket name is *vespa-cloud-data-prod.gcp-us-central1-f-73770f* and the tenant name is *vespa-team* (for that particular prod zone). Archiving is per tenant, and a log file is normally stored with a key like: + +```bash +/vespa-team/vespacloud-docsearch/default/h7644a/logs/access/JsonAccessLog.20221011080000.zst +``` + +The URI to this object is hence: + +```bash +gs://vespa-cloud-data-prod.gcp-us-central1-f-73770f/vespa-team/vespacloud-docsearch/default/h2946a/logs/access/JsonAccessLog.default.20210629100001.zst +``` + +Objects are exported once generated - access log files are compressed and exported at least once per hour. + + +**Note:** + +Always set a user project to access the objects - transfer cost is assigned to the requester. + \ No newline at end of file diff --git a/mintlify-docs/en/operations/archive/archive-guide.mdx b/mintlify-docs/en/operations/archive/archive-guide.mdx new file mode 100644 index 0000000000..bd28724008 --- /dev/null +++ b/mintlify-docs/en/operations/archive/archive-guide.mdx @@ -0,0 +1,87 @@ +--- +title: "Archive guide" +--- + +Vespa Cloud exports log data, heap dumps, and Java Flight Recorder sessions to storage buckets. The bucket system used will depend on which cloud provider is backing the zone your application is running in. AWS S3 will be used in the AWS zones, and Cloud Storage will be used in the GCP zones. + +How to access and use the storage buckets is found in the documentation for the respective cloud providers: + + + + + + +## Examples + +These examples use GCP as source, replace with AWS commands as needed. Here, *resonant-triode-123456* is the Google project ID that owns the target bucket *my\_access\_logs* for data copy (and will get the data download cost, if any). + +Use the CLUSTERS view in the Vespa Cloud Console to find hostname(s) for the nodes to export logs from - then list contents: + +```bash +$ gsutil -u resonant-triode-123456 ls \ + gs://vespa-cloud-data-prod-gcp-us-central1-f-73770f/mytenant/myapp/ + +$ gsutil -u resonant-triode-123456 ls \ + gs://vespa-cloud-data-prod-gcp-us-central1-f-73770f/mytenant/myapp/myinstance + +$ gsutil -u resonant-triode-123456 ls \ + gs://vespa-cloud-data-prod-gcp-us-central1-f-73770f/mytenant/myapp/myinstance/h404a/logs/access +``` + +Copy files for a host to the *my\_access\_logs* bucket: + +```bash +$ gsutil -u resonant-triode-123456 \ + -m -o "GSUtil:parallel_process_count=1" \ + cp -r \ + gs://vespa-cloud-data-prod-gcp-us-central1-f-73770f/vespa-team/vespacloud-docsearch/default/h404a \ + gs://my_access_logs/vespa-files +``` + +`rsync` can be used to reduce number of files copied, using `-x` to exclude paths: + +```bash +$ gsutil -u resonant-triode-123456 \ + -m -o "GSUtil:parallel_process_count=1" \ + rsync -r \ + -x '.*/connection/.*|.*/vespa/.*|.*/zookeeper/.*' \ + gs://vespa-cloud-data-prod-gcp-us-central1-f-73770f/vespa-team/vespacloud-docsearch/default/h404a \ + gs://my_access_logs/vespa-files +``` + +Refer to [cloud-functions](https://github.com/vespa-engine/sample-apps/tree/master/examples/google-cloud/cloud-functions) and [lambda](https://github.com/vespa-engine/sample-apps/tree/master/examples/aws/lambda) for how to write and deploy simple functions to process files in Google Cloud and AWS. + +For local processing, copy files for a host to local file system (or use `rsync`): + +```bash +$ gsutil -u resonant-triode-123456 \ + -m -o "GSUtil:parallel_process_count=1" \ + cp -r \ + gs://vespa-cloud-data-prod-gcp-us-central1-f-73770f/vespa-team/vespacloud-docsearch/default/h404a \ + . +``` + +Use [zstd](https://facebook.github.io/zstd/) to decompress files: + +```bash +$ zstd -d * +``` + +Example: Filter out healthchecks using [jq](https://stedolan.github.io/jq/): + +```bash +$ cat JsonAccessLog.20230117* | jq '. | + select (.uri != "/status.html") | + select (.uri != "/state/v1/metrics") | + select (.uri != "/state/v1/health")' +``` + +Add a human-readable date field per access log entry: + +```bash +$ cat JsonAccessLog.20230117* | jq '. | + select (.uri != "/status.html") | + select (.uri != "/state/v1/metrics") | + select (.uri != "/state/v1/health") | + . +{iso8601date:(.time|todateiso8601)}' +``` \ No newline at end of file diff --git a/mintlify-docs/en/operations/automated-deployments.mdx b/mintlify-docs/en/operations/automated-deployments.mdx new file mode 100644 index 0000000000..30a2e65970 --- /dev/null +++ b/mintlify-docs/en/operations/automated-deployments.mdx @@ -0,0 +1,346 @@ +--- +title: "Automated Deployments" +--- + + +![Picture of an automated deployment](/assets/img/automated-deployments-overview.png) + + +See [pipeline graph](#pipeline-graph) for details on the visual elements. + +Vespa Cloud provides: + +- A [CD test framework](#cd-tests) for safe deployments to production zones. +- [Multi-zone deployments](#deployment-orchestration) with orchestration and test steps. + +This guide goes through details of an orchestrated deployment. Read / try [production deployment](/en/reference/applications/deployment) first to have a baseline understanding. The [developer guide](/en/applications/developer-guide) is useful for writing tests. Use [example GitHub Actions](#automating-with-github-actions) for automation. + +## CD tests + +Before deployment in production zones, [system tests](#system-tests) and [staging tests](#staging-tests) are run. Tests are run in a dedicated and [downsized](/en/operations/environments) environment. These tests are optional, see details in the sections below. Status and logs of ongoing tests can be found in the *Deployment* view in the [Vespa Cloud Console](https://console.vespa-cloud.com/): + + +![Minimal deployment pipeline](/assets/img/deployment-with-system-test.png) + + +These tests are also run during [Vespa Cloud upgrades](#vespa-cloud-upgrades). + +Find deployable example applications in [CI-CD](https://github.com/vespa-cloud/examples/tree/main/CI-CD). + +### System tests + +When a system test is run, the application is deployed in the [test environment](/en/operations/environments#test). The system test suite is then run against the endpoints of the test deployment. The test deployment is empty when the test execution begins. The application package and Vespa platform version is the same as that to be deployed to production. + +A test suite includes at least one [system test](/en/applications/testing#system-tests). An application can be deployed to a production zone without system tests - this step will then only test that the application starts successfully. See [production deployment](/en/reference/applications/deployment) for an example without tests. + +If the production zones span multiple cloud providers (e.g., both AWS and GCP), system tests are run separately for each cloud provider, using test nodes from that provider. This ensures the application starts and works correctly on each provider's infrastructure before production deployment. + +Read more about [system tests](/en/applications/testing#system-tests). + +### Staging tests + +A staging test verifies the transition of a deployment of a new application package - i.e., from application package `Appold` to `Appnew`. A test suite includes at least one [staging setup](/en/applications/testing#staging-tests), and [staging test](/en/applications/testing#staging-tests). + + + +All production zone deployments are polled for the current versions. As there can be multiple versions already being deployed (i.e. multiple `Appold`), there can be a series of staging test runs. + + +The application at revision `Appold` is deployed in the [staging environment](/en/operations/environments#staging). + + +The staging setup test code is run, typically making the cluster reasonably similar to a production cluster. + + +The test deployment is then upgraded to application revision `Appnew`. + + +Finally, the staging test code is run, to verify the deployment works as expected after the upgrade. + + + +An application can be deployed to a production zone without staging tests - this step will then only test that the application starts successfully before and after the change. See [production deployment](/en/reference/applications/deployment) for an example without tests. + +Like system tests, staging tests are run separately for each cloud provider when the production zones span multiple providers. + +Read more about [staging tests](/en/applications/testing#staging-tests). + +### Disabling tests + +To deploy without testing, remove the test files from the application package. Tests are always run, regardless of *deployment.xml*. + +To temporarily deploy without testing, run `deploy` and hit the "Abort" button (see illustration above, hover over the test step in the Console) - this skips the test step and makes the orchestration progress to the next step. + +### Running tests only + +To run a system test, without deploying to any nodes after, add a new test instance. In *deployment.xml*, add the instance without `dev` or`prod` elements, like: + +```xml + + + + + ... + +``` + +Note that this will leave an empty instance in the console, as the deployment is for testing only, so no resources deployed to after test. + +Make sure to run `vespa prod deploy` to invoke the pipeline for testing, and use a separate application for this test. + +## Deployment orchestration + +The *deployment orchestration* is flexible. One can configure dependencies between deployments to production zones, production verification tests, and configured delays; by ordering these in parallel and serial blocks of steps: + + +![Picture of a complex automated deployment](/assets/img/automated-deployments-complex.png) + + +### Pipeline graph + +The deployment pipeline is visualized as a graph in the [Vespa Cloud Console](https://console.vespa-cloud.com/). Each node represents a step in the pipeline, and edges show dependencies between steps. Hover over any node to see details and available actions. + +#### Node shapes + +| Shape | Step type | Description | +| --- | --- | --- | +| | Instance | The application instance. Hover to see target versions, cancel/deploy/pin controls, and block windows. | +| | Test | System test, staging test, or production test. Hover to see run status, versions, and abort/restart actions. | +| | Production deployment | A deployment to a production zone. Hover to see run status, versions, and abort/restart/defer actions. | +| | Delay | A configured delay between steps. | + +#### Visual indicators + +| Indicator | Meaning | Description | +| --- | --- | --- | +| | Completed | The step has completed successfully on the current version. The color corresponds to the deployed version. | +| | Running | A deployment or test is currently in progress. Shown as an animated gradient between the source and target version colors. | +| | Failed | The last run of this step failed. | +| | Unknown / initial | No version has been deployed to this step yet. | +| | Pending change | A newer version is queued and waiting to be deployed to this step. | +| | Paused / deferred | Deployments to this step are temporarily postponed. | +|
| Application blocked | Application changes are blocked by a [block window](#block-windows). Shown as vertical bars. | +|

| Platform blocked | Platform upgrades are blocked by a [block window](#block-windows). Shown as horizontal bars. | + +Each version deployed through the pipeline is assigned a distinct color. This makes it easy to see at a glance which zones are on the same version and where a rollout is in progress. A thumbtack icon on a node indicates that the version is [pinned](#pinning-versions). + +On a higher level, instances can also depend on each other in the same way. This makes it easy to configure a deployment process which gradually rolls out changes to increasingly larger subsets of production nodes, as confidence grows with successful production verification tests. Refer to [deployment.xml](/en/reference/applications/deployment) for details. + +Deployments run sequentially by default, but can be configured to [run in parallel](/en/reference/applications/deployment). Inside each zone, Vespa Cloud orchestrates the deployment, such that the change is applied without disruption to read or write traffic against the application. A production deployment in a zone is complete when the new configuration is active on all nodes. + +Most changes are instant, making this a quick process. If node restarts are needed, e.g., during platform upgrades, these will happen automatically and safely as part of the deployment. When this is necessary, deployments will take longer to complete. + +System and staging tests, if present, must always be successfully run before the application package is deployed to production zones. + +### Version progression + +The deployment pipeline deploys one revision at a time through the production zones. When a revision is being deployed, it must complete deployment to *all* declared production zones before the next revision begins its production rollout. System and staging tests for newer revisions may run in parallel, but production deployment is serialized. + +For example, if build 90 is being deployed to the second of two production zones, build 91 will not start deploying to the first zone until build 90 has completed in all zones — even if build 91 has already passed system and staging tests. + +#### Superseding a version + +To override the currently deploying revision and force a newer build through the pipeline, hover over the instance node in the pipeline graph and use the *TARGET VERSIONS* controls. Select the desired build number from the revision dropdown and click **deploy**. This updates the instance's deployment target. Any running production job for the old revision will be aborted, and the pipeline will start deploying the new revision from the first production zone. + + +![Picture of instance hover card with build selector and deploy button](/assets/img/automated-deployment-supersede.png) + + +To cancel the currently deploying revision without selecting a new one, click **cancel**. This lets the pipeline pick the next revision automatically. + +#### Pinning versions + +Pinning locks the pipeline to a specific platform version or application revision, preventing automatic upgrades. This is useful for forcing a downgrade, holding a known-good revision during an incident, or preventing the system from picking up a new platform version. + +To pin a version, hover over the instance node in the pipeline graph. Under *TARGET VERSIONS*, select the desired version from the dropdown and click **pin**. A reason is required — enter a description and click **submit pin**. Platform and revision can be pinned independently. + + +![Picture of instance hover card showing pin dialog](/assets/img/automated-deployment-pin.png) + + +While pinned, no newer platform versions or revisions will be deployed for the pinned dimension. The dropdown and deploy button are disabled to prevent accidental changes. To unpin, hover over the instance node and click **unpin**, which allows newer versions to move through the pipeline again. + +For example, to roll back to a previous revision: + + + +Select the older build number from the revision dropdown. + + +Click **pin** and provide a reason (e.g., "rollback due to regression in build 91"). + + +The pipeline will deploy the pinned build to all production zones. + + +Once the issue is resolved, click **unpin** to resume normal deployments. + + + +#### Cooldown after failures + +When a production deployment fails repeatedly, an exponential cooldown is applied before the job is automatically retried. The cooldown period grows with the time between the first failure and the last completed run. This prevents the system from continuously retrying a failing deployment. + +The cooldown applies only when the target versions match those of the failing runs. If the target changes (e.g., a new revision is set as the deployment target), the cooldown resets and the new revision can be deployed immediately. + +To manually re-trigger a failed deployment and bypass the cooldown, hover over the failed zone node in the pipeline graph and click **restart**. + + +![Picture of zone hover card showing failed status with restart button](/assets/img/automated-deployment-restart.png) + + +#### Pausing deployments to a zone + +To temporarily hold off deployments to a specific production zone, hover over the zone node in the pipeline graph and click **defer**. This postpones deployments for 72 hours. Click **enable** to resume scheduling before the deferral period expires. + +### Source code repository integration + +Each new *submission* is assigned an increasing build number, which can be used to track the roll-out of the new package to the instances and their zones. With the submission, add a source code repository reference for easy integration - this makes it easy to track changes: + + +![Build numbers and source code repository reference](/assets/img/CI-integration.png) + + +Add the source diff link to the pull request - see example [GitHub Action](https://github.com/vespa-cloud/vespa-documentation-search/blob/main/.github/workflows/deploy-vespa-documentation-search.yaml): + +```bash +$ vespa prod deploy \ + --source-url "$(git config --get remote.origin.url | sed 's+git@\(.*\):\(.*\)\.git+https://\1/\2+')/commit/$(git rev-parse HEAD)" +``` + +### Block-windows + +Use block-windows to block deployments during certain windows throughout the week, e.g., avoid rolling out changes during peak hours / during vacations. Hover over the instance (here "default") to find block status - see [block-change](/en/reference/applications/deployment#block-change): + + +![Application block window](/assets/img/block-window.png) + + +### Validation overrides + +Some configuration changes are potentially destructive / change the application behavior - examples are removing fields and changing linguistic processing. These changes are disallowed by default, the deploy-command will fail. To override and force a deploy, use a [validation override](/en/reference/applications/validation-overrides): + +```xml + + tensor-type-change + +``` + +### Production tests + +Production tests are optional and configured in [deployment.xml](/en/reference/applications/deployment). A production test is placed after a deployment zone in the pipeline and acts as a gate: if it fails, the rollout stops and subsequent zones will not receive the new version. This is useful in multi-zone deployments where the first zone serves as a canary. Production tests run against the endpoints of the preceding production region in the pipeline. + + +![Picture of production test hover card with version or build tested](/assets/img/automated-deployment-production-test.png) + + +### Deploying Components + +Vespa is [backwards compatible](/en/learn/releases#versions) within major versions, and major versions rarely change. This means that [Components](/en/applications/components) compiled against an older version of Vespa APIs can always be run on the same major version. However, if the application package is compiled against a newer API version, and then deployed to an older runtime version in production, it might fail. See [vespa:compileVersion](/en/reference/applications/deployment#production-deployment-with-components) for how to solve this. + +## Automating with GitHub Actions + +Auto-deploy production applications using GitHub Actions - examples: + +- [deploy-vector-search.yaml](https://github.com/vespa-cloud/vector-search/blob/main/.github/workflows/deploy-vector-search.yaml) deploys an application to a production environment - a good example to start from! +- [deploy.yaml](https://github.com/vespa-cloud/examples/blob/main/.github/workflows/deploy.yaml) deploys an application with basic HTTP tests. +- [deploy-vespa-documentation-search.yaml](https://github.com/vespa-cloud/vespa-documentation-search/blob/main/.github/workflows/deploy-vespa-documentation-search.yaml) deploys an application with Java-tests. + +The automation scripts use an API-KEY to deploy: + +```bash +$ vespa auth api-key +``` + +This creates a key, or outputs: + +```bash +Error: refusing to overwrite /Users/me/.vespa/mytenant.api-key.pem +Hint: Use -f to overwrite it + +This is your public key: +-----BEGIN PUBLIC KEY----- +ABCDEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEB2UFsh8ZjoWNtkrDhyuMyaZQe1ze +qLB9qquTKUDQTuM2LOr2dawUs02nfSc3UTfC08Lgr/dvnTnHpc0/fY+3Aw== +-----END PUBLIC KEY----- + +Its fingerprint is: +12:34:56:78:65:30:77:90:30:ab:83:ee:a9:67:68:2c + +To use this key in Vespa Cloud click 'Add custom key' at +https://console.vespa-cloud.com/tenant/mytenant/account/keys +and paste the entire public key including the BEGIN and END lines. +``` + +This means, if there is a key, it is not overwritten, it is safe to run. Make sure to add the deploy-key to the tenant using the Vespa Cloud Console. + +After the deploy-key is added, everything is ready for deployment. + +You can upload or create new Application keys in the console, and store them as a secret in the repository like the GitHub actions example above. + +Some services like [Travis CI](https://travis-ci.com) do not accept multi-line values for Environment Variables in Settings. A workaround is to use the output of + +```bash +$ openssl base64 -A -a < mykey.pem && echo +``` + +in a variable, say `VESPA_MYAPP_API_KEY`, in Travis Settings. `VESPA_MYAPP_API_KEY` is exported in the Travis environment, example output: + +```bash +Setting environment variables from repository settings +$ export VESPA_MYAPP_API_KEY=[secure] +``` + +Then, before deploying to Vespa Cloud, regenerate the key value: + +```bash +$ MY_API_KEY=`echo $VESPA_MYAPP_API_KEY | openssl base64 -A -a -d` +``` + +and use `${MY_API_KEY}` in the deploy command. + +## Vespa Cloud upgrades + +Vespa upgrades follows the same pattern as for new application revisions in [CD tests](#cd-tests), and can be tracked via its version number in the Vespa Cloud Console. + +System tests are run the same way as for deploying a new application package. + +A staging test verifies the upgrade from application package `Appold` to `Appnew`, and from Vespa platform version `Vold` to `Vnew`. The staging test then consists of the following steps: + + + +All production zone deployments are polled for the current `Vold` / `Appold` versions. As there can be multiple versions already being deployed (i.e. multiple `Vold` / `Appold`), there can be a series of staging test runs. + + +The application at revision `Appold` is deployed on platform version `Vold`, to a zone in the [staging environment](/en/operations/environments#staging). + + +The *staging setup* test code is run, typically making the cluster reasonably similar to a production cluster. + + +The test deployment is then upgraded to application revision `Appnew` and platform version `Vnew`. + + +Finally, the *staging test* test code is run, to verify the deployment works as expected after the upgrade. + + + +Note that one or both of the application revision and platform may be upgraded during the staging test, depending on what upgrade scenario the test is run to verify. + +### Concurrent platform and revision changes + +When both a platform upgrade and a revision change are pending, the `rollout` setting in [deployment.xml](/en/reference/applications/deployment) controls how they interact in production zones: + +- `simultaneous` (default): Revision changes deploy independently of platform upgrades. A revision can catch up to and pass an ongoing platform upgrade. +- `leading`: When a revision catches up to a platform upgrade, the two changes fuse and roll out together. +- `separate`: The revision waits for the platform upgrade to complete, unless the upgrade is failing. + +With the default `simultaneous` strategy, a new revision will not be held back by an ongoing platform upgrade. + +## Next steps + +- Read more about [feature switches and bucket tests](/en/applications/testing#feature-switches-and-bucket-tests). +- A challenge with continuous deployment can be integration testing across multiple services: Another service depends on this Vespa application for its own integration testing. Use a separate [application instance](/en/reference/applications/deployment#instance) for such integration testing. +- Set up a deployment badge - available from the console's deployment view - example: ![vespa-team.vespacloud-docsearch.default overview](https://api-ctl.vespa-cloud.com/badge/v1/vespa-team/vespacloud-docsearch/default) +- Set up a [global query endpoint](/en/reference/applications/deployment#endpoints-global). \ No newline at end of file diff --git a/mintlify-docs/en/operations/autoscaling.mdx b/mintlify-docs/en/operations/autoscaling.mdx new file mode 100644 index 0000000000..e06533e2ac --- /dev/null +++ b/mintlify-docs/en/operations/autoscaling.mdx @@ -0,0 +1,96 @@ +--- +title: "Autoscaling" +--- + +Autoscaling lets you adjust the hardware resources allocated to application clusters automatically depending on actual usage. It will attempt to keep utilization of all allocated resources close to ideal, and will automatically reconfigure to the cheapest option allowed by the ranges when necessary. + +You can turn it on by specifying *ranges* in square brackets for the [nodes](/en/reference/applications/services/services#nodes) and/or [node resource](/en/reference/applications/services/services#resources) values in *services.xml*. Vespa Cloud will monitor the resource utilization of your clusters and automatically choose the cheapest resource allocation within ranges that produces close to optimal utilization. + +You can see the status and recent actions of the autoscaler in the *Resources* view under a deployment in the console. + +Autoscaling is not considering latency differences achieved by different configurations. If your application has certain configurations that produce good throughput but too high latency, you should not include these configurations in your autoscaling ranges. + +Adjusting the allocation of a cluster may happen quickly for stateless container clusters, and much more slowly for content clusters with a lot of data. Autoscaling will adjust each cluster on the timescale it typically takes to rescale it (including any data redistribution). + +The ideal utilization takes into account that a node may be down or failing, that another region may be down causing doubling of traffic, and that we need headroom for maintenance operations and handling requests with low latency. It acts on what it has observed on your system in the recent past. If you need much more capacity in the near future than you do currently, you may want to set the lower limit to take this into account. Upper limits should be set to the maximum size that makes business sense. + +## When to use autoscaling + +Autoscaling is useful in a number of scenarios. Some typical ones are: + +- You have a new application which you can't benchmark with realistic data and usage, making you unsure what resources to allocate: Set wide ranges for all resource parameters and let the system choose a configuration. Once you gain experience you can consider tightening the configuration space. +- You have load that varies quickly during the day, or that may suddenly increase quickly due to some event, and want container cluster resources to quickly adjust to the load: Set a range for the number of nodes and/or vcpu on containers. +- You expect your data volume to grow over time, but you don't want to allocate resources prematurely, nor constantly worry about whether it is time to increase: Configure ranges for content nodes and/or node resources such that the size of the system grows with the data. + +## Resource tradeoffs + +Some other considerations when deciding resources: + +- Making changes to resources/nodes is easy and safe, and one of Vespa Cloud's strengths. We advise you make controlled changes and observe effect on latencies, data migration and cost. Everything is automated, just deploy a new application package. This is useful learning when later needed during load peaks and capacity requirement changes. +- Node resources cannot be chosen freely in all zones, CPU/Memory often comes in increments of x 2. Try to make sure that the resource configuration is a good fit. +- CPU is the most expensive component, optimize for this for most applications. +- Having few nodes means more overcapacity as Vespa requires that the system will handle one node being down (or one group, in content clusters having multiple groups). 4-5 nodes minimum is a good rule of thumb. Whether 4-5 or 9-10 nodes of half the size is better depends on quicker upgrade cycles vs. smoother resource auto-scale curves. Latencies can be better or worse, depending on static vs dynamic query cost. +- Changing a node resource may mean allocating a new node, so it may be faster to scale container nodes by changing the number of nodes. +- As a consequence, during resource shortage (say almost full disk), add nodes and keep the rest unchanged. +- It is easiest to reason over capacity when changing one thing at a time. + +It is often safe to follow the *suggested resources* advice when shown in the console and feel free to contact us if you have questions. + +## Mixed load + +A Vespa application must handle a combination of reads and writes, from multiple sources. User load often resembles a sine-like curve. Machine-generated load, like a batch job, can be spiky and abrupt. + +In the default Vespa configuration, all kinds of load uses *one* default container cluster. Example: An application where daily batch jobs update the corpus at high rate: + + +![nodes and resources](/assets/img/load.png) + + +Autoscaling scales *up* much quicker than *down*, as the probability of a new spike is higher after one has been observed. In this example, see the rapid cluster growth for the daily load spike - followed by a slow decay. + +The best solution for this case is to slow down the batch job, as it is of short duration. It is not always doable to slow down jobs - in these cases, setting up multiple [container clusters](/en/applications/containers) can be a smart thing - optimize each cluster for its load characteristics. This could be a combination of clusters using autoscale and clusters with a fixed size. Autoscaling often works best for the user-generated load, whereas the machine-generated load could either be tuned or routed to a different cluster in the same Vespa application. + +## Examples + +Below is an example of node resources with autoscaling that would work well for a container cluster: + +```xml + + + +``` + +The above would in general **not be recommended for a content cluster.** Changing cpu, memory or disk usually leads to allocating new nodes to fulfil the new node resources spec. When that happens there will be redistribution of documents between the old and new nodes and this might impact service quality to some degree. For a content cluster it would usually be better to try to stick to the same node resources and add or remove nodes, e.g something like: + +```xml + + + +``` + +If a content cluster is configured to autoscale based on node resources (not just number of nodes or groups) this will work fine, but note that using paged attributes or HNSW indexes will make it more expensive and time-consuming to redistribute documents when scaling up or down. When doing the initial feeding of a cluster it will be best to avoid auto-scaling, as changing the topology will require redistribution of documents, possibly several times. + +When using groups in a content cluster it's possible to scale the number of groups instead of the number of nodes, e.g. with a fixed group size and a range for the number of groups: + +```xml + + + +``` + +Note that at the moment it is not possible to autoscale GPU resources per node, but you can scale the number of nodes with GPUs: + +```xml + + + + + +``` + +## Related reading + + + + + \ No newline at end of file diff --git a/mintlify-docs/en/operations/cloning.mdx b/mintlify-docs/en/operations/cloning.mdx new file mode 100644 index 0000000000..ab0cfd9a9d --- /dev/null +++ b/mintlify-docs/en/operations/cloning.mdx @@ -0,0 +1,280 @@ +--- +title: "Cloning applications and data" +--- + +This is a guide on how to replicate a Vespa application in different environments, with or without data. Use cases for cloning include: + +- Get a copy of the application and (some) data on a laptop to work offline, or attach a debugger. +- Deploy local experiments to the `dev` environment to easily cooperate and share. +- Set up a copy of the application and (some) data to test a new major version of Vespa. +- Replicate a bug report in a non-production environment. +- Set up a copy of the application and (some) data in a `prod` environment to experiment with a CI/CD pipeline, without touching the current production serving. +- Onboard a new team member by setting up a copy of the application and test data in a `dev` environment. +- Clone to a `dev` environment for load testing. + +This guide uses *applications*. One can also use *instances*, but that will not work across Vespa major versions on Vespa Cloud - refer to [tenant, applications, instances](/en/learn/tenant-apps-instances) for details. + +Vespa Cloud has different environments `dev` and `prod`, with different characteristics - [details](/en/operations/environments). Clone to `dev` for short-lived experiments/development/benchmarking, use `prod` for serving applications with a [CI/CD pipeline](/en/operations/automated-deployments). + +As some steps are similar, it is a good idea to read through all, as details are added only the first time for brevity. Examples are based on the [album-recommendation](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation) sample application. + + +**Note:** + +When done, it is easy to tear down resources in Vespa Cloud. E.g., *https://console.vespa-cloud.com/tenant/mytenant/application/myapp/prod/deploy* or *https://console.vespa-cloud.com/tenant/mytenant/application/myapp/dev/instance/default* to find a delete-link. Instances in `dev` environments are auto-expired ([details](/en/operations/environments)), so application cloning is a safe way to work with Vespa. Find more information in [deleting applications](/en/operations/deleting-applications). + + +## Cloning - self-hosted to Vespa Cloud + +**Source setup:** + +```bash +$ docker run --detach --name vespa1 --hostname vespa-container1 \ + --publish 8080:8080 --publish 19071:19071 \ + vespaengine/vespa + +$ vespa deploy -t http://localhost:19071 +``` + +**Target setup:** + +[Create a tenant](/en/basics/deploy-an-application) in the Vespa Cloud console, in this guide using "mytenant". + +**Export source application package:** + +This gets the application package and copies it out of the container to local file system: + +```bash +$ vespa fetch -t http://localhost:19071 && \ + unzip application.zip -x application.zip +``` + +**Deploy target application package** + +The procedure differs a little whether deploying to dev or prod [environment](/en/operations/environments). The `mvn -U clean package` step is only needed for applications with custom code. Configure application name and create data plane credentials: + +```bash +$ vespa config set target cloud && \ + vespa config set application mytenant.myapp + +$ vespa auth login + +$ vespa auth cert -f + +$ mvn -U clean package +``` + + +**Note:** + +When deploying to a new app, one will often want to generate a new data plane cert/key pair. To do this, use `vespa auth cert -f`. If reusing a cert/key pair, drop `-f` and make sure to put the pair in *.vespa*, to avoid errors like `Error: open /Users/me/.vespa/mytenant.myapp.default/data-plane-public-cert.pem: no such file or directory` in the subsequent deploy step. + + +Then deploy the application. Depending on the use case, deploy to `dev` or `prod`: + +- `dev`: + + ```bash + $ vespa deploy + ``` + Expect something like: + + ```bash + Uploading application package ... done + + Success: Triggered deployment of . with run ID 1 + + Use vespa status for deployment status, or follow this deployment at + https://console.vespa-cloud.com/tenant/mytenant/application/myapp/dev/instance/default/job/dev-aws-us-east-1c/run/1 + ``` +- Deployments to the `prod` environment requires [deployment.xml](/en/reference/applications/deployment) - select which [zone](/en/operations/zones) to deploy to: + + ```bash + $ cat < deployment.xml + + + aws-us-east-1c + + + EOF + ``` + + `prod` deployments also require `resources` specifications in [services.xml](/en/reference/applications/services/services) - use [vespa-documentation-search](https://github.com/vespa-cloud/vespa-documentation-search/blob/main/src/main/application/services.xml) as an example and add/replace `nodes` elements for `container` and `content` clusters. If in doubt, just add a small config to start with, and change later: + + ```bash + + + + ``` + Deploy the application package: + + ```bash + $ vespa prod deploy + ``` + + Expect something like: + + `Hint: See` [`production deployment`](/en/reference/applications/deployment)
+ `Success: Deployed` .
+ `See https://console.vespa-cloud.com/tenant/mytenant/application/myapp/prod/deployment for deployment progress` + + A proper deployment to a `prod` zone should have automated tests, read more in [automated deployments](/en/operations/automated-deployments) + +**Data copy** + +Export documents from the local instance and feed to the Vespa Cloud instance: + +```bash +$ vespa visit -t http://localhost:8080 | vespa feed - +``` + +Add more parameters as needed to `vespa feed` for other endpoints. + +**Get access log from source:** + +```bash +$ docker exec vespa1 cat /opt/vespa/logs/vespa/access/JsonAccessLog.default +``` + +## Cloning - Vespa Cloud to self-hosted + +**Download application from Vespa Cloud** + +Validate the endpoint, and fetch the application package: + +```bash +$ vespa config get application +application = mytenant.myapp.default + +$ vespa fetch +Downloading application package... done +Success: Application package written to application.zip +``` + +The application package can also be downloaded from the Vespa Cloud Console: + +- dev: Navigate to *https://console.vespa-cloud.com/tenant/mytenant/application/myapp/dev/instance/default*, click *Application* to download: + ![Application package download from dev environment](/assets/img/app-download-dev.png) +- prod: Navigate to *https://console.vespa-cloud.com/tenant/mytenant1/application/myapp/prod/deployment?tab=builds* and select the version of the application to download: + ![Application package download from prod environment](/assets/img/app-download-prod.png) + +**Target setup:** + +Note the name of the application package .zip-file just downloaded. If changes are needed, unzip it and use `vespa deploy -t http://localhost:19071 ` to deploy from current directory: + +```bash +$ docker run --detach --name vespa1 --hostname vespa-container1 \ + --publish 8080:8080 --publish 19071:19071 \ + vespaengine/vespa + +$ vespa config set target local + +$ vespa deploy -t http://localhost:19071 mytenant.myapp.default.dev.aws-us-east-1c.zip +``` + +**Data copy** + +Set config target cloud for `vespa visit` and pipe the jsonl output into `vespa feed` to the local instance: + +```bash +$ vespa config set target cloud + +$ vespa visit | vespa feed - -t http://localhost:8080 +``` + +**data copy - minimal** + +For use cases requiring a few documents, visit just a few documents: + +```bash +$ vespa visit --chunk-count 10 +``` + +**Get access log from source:** + +Use the Vespa Cloud Console to get access logs + +## Cloning - Vespa Cloud to Vespa Cloud + +This is a combination of the procedures above. Download the application package from dev or prod, make note of the source name, like mytenant.myapp.default. Then use `vespa deploy` or `vespa prod deploy` as above to deploy to dev or prod. + +If cloning from `dev` to `prod`, pay attention to changes in *deployment.xml* and *services.xml* as in [cloning to Vespa Cloud](#cloning---self-hosted-to-vespa-cloud). + +**Data copy** + +Set the feed endpoint name / paths, e.g. mytenant.myapp-new.default: + +```bash +$ vespa config set target cloud + +$ vespa visit | vespa feed - -t https://default.myapp-new.mytenant.aws-us-east-1c.dev.z.vespa-app.cloud +``` + +**Data copy 5%** Set the –selection argument to `vespa visit` to select a subset of the documents. + +## Cloning - self-hosted to self-hosted + +Creating a copy from one self-hosted application to another. Self-hosted means running [Vespa](https://vespa.ai/) on a laptop or a [multinode system](/en/operations/self-managed/multinode-systems). + +This example sets up a source app and deploys the [application package](/en/basics/applications) - use [album-recommendation](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation) as an example. The application package is then exported from the source and deployed to a new target app. Steps: + +**Source setup:** + +```bash +$ vespa config set target local + +$ docker run --detach --name vespa1 --hostname vespa-container1 \ + --publish 8080:8080 --publish 19071:19071 \ + vespaengine/vespa + +$ vespa deploy -t http://localhost:19071 +``` + +**Target setup:** + +```bash +$ docker run --detach --name vespa2 --hostname vespa-container2 \ + --publish 8081:8080 --publish 19072:19071 \ + vespaengine/vespa +``` + +**Export source application package** + +Export files: + +```bash +$ vespa fetch -t http://localhost:19071 +``` + +**Deploy application package to target** + +Before deploying, one can make changes to the application package files as needed. Deploy to target: + +```bash +$ vespa deploy -t http://localhost:19072 application.zip +``` + +**Data copy from source to target** + +This pipes the source data directly into `vespa feed` - another option is to save the data to files temporarily and feed these individually: + +```bash +$ vespa visit -t http://localhost:8080 | vespa feed - -t http://localhost:8081 +``` + +**Data copy 5%** + +This is an example on how to use a [selection](/en/reference/writing/document-selector-language) to specify a subset of the documents - here a "random" 5% selection: + +```bash +$ vespa visit -t http://localhost:8080 --selection 'id.hash().abs() % 20 = 0' | \ + vespa feed - -t http://localhost:8081 +``` + +**Get access log from source** + +Get the current query access log from the source application (there might be more files there): + +```bash +$ docker exec vespa1 cat /opt/vespa/logs/vespa/access/JsonAccessLog.default +``` \ No newline at end of file diff --git a/mintlify-docs/en/operations/data-management.mdx b/mintlify-docs/en/operations/data-management.mdx new file mode 100644 index 0000000000..5fd23b3bdf --- /dev/null +++ b/mintlify-docs/en/operations/data-management.mdx @@ -0,0 +1,233 @@ +--- +title: "Data management and backup" +--- + +This guide covers data management operations for Vespa Cloud applications, including automated backups, document export, feed, and bulk updates and removals. + +## Automated Backups + +Depending on [plan](https://vespa.ai/pricing/), content clusters are automatically backed up when a [``](/en/reference/applications/deployment#backup) element is specified in *deployment.xml*. Vespa Cloud manages the backup schedule, storage, and lifecycle with no external tooling required. Backups will run at the configured frequency while also respecting any [block windows](/en/reference/applications/deployment#block-change) defined for the instance. + +```xml + + + + aws-us-east-1c + + +``` + +Backups are retained for three backup intervals (e.g. 21 days for a 7-day frequency). The most recent fully completed backup is always retained regardless of age. See [Restore from Backup](#restore) for how to request a restore. + +If you prefer to manage backups yourself, documents can be exported manually using `vespa visit` as shown in the [Google Cloud Function example](https://github.com/vespa-engine/sample-apps/tree/master/examples/google-cloud/cloud-functions#backup---experimental). + +## Restore from Backup + +Restoring from a backup is handled by Vespa Cloud. To initiate a restore, contact [Vespa Support](https://vespa.ai/support/). Response time and priority handling are governed by your [support plan](https://vespa.ai/pricing/). + +Restore requires a deployed target cluster with: + +- The same number of content nodes as the backup. +- At least equivalent disk capacity per node as at the time of the backup. + +Note that content redistribution is usually required after restoration. See [backup reference](/en/reference/applications/deployment#backup) for details. + +## Export documents + + +**Note:** + +The examples below use the [Vespa CLI](/en/clients/vespa-cli). Ensure you have the latest version installed. + + +To export documents, configure the application to export from, then select zone, container cluster and schema - example: + +```bash +$ vespa config set application vespa-team.vespacloud-docsearch.default + +$ vespa visit --zone prod.aws-us-east-1c --cluster default --selection doc | head +``` + +Some of the parameters above are redundant if unambiguous. Here, the application is set up using a template found in [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) with multiple container clusters. This example [visit](/en/writing/visiting) documents from the `doc` schema. + +Use a [fieldset](/en/schemas/documents#fieldsets) to export document IDs only: + +```bash +$ vespa visit --zone prod.aws-us-east-1c --cluster default --selection doc --field-set '[id]' | head +``` + + +**Note:** + +Configuring the [`documentid`](/en/reference/schemas/schemas#documentid) field to be an attribute in the schema avoids that this requires disk access and, hence, speeds up the exporting process. + + +As the name implies, fieldsets are useful to select a subset of fields to export. Note that, if disk access is required to fetch a field from the fieldset, selecting fewer fields does not speed up the exporting process as the same amount of data is read from the index. The data transfer out of the Vespa application is smaller with fewer fields. + +For copying documents between applications, see [cloning applications and data](/en/operations/cloning). + +## Feed + +If a document feed is generated with `vespa visit` (above), it is already in [JSON Lines](https://jsonlines.org/) feed-ready format by default: + +```bash +$ vespa visit | vespa feed - -t $ENDPOINT +``` + +Find more examples in [cloning applications and data](/en/operations/cloning). + +A document export generated using [/document/v1](/en/writing/document-v1-api-guide) is slightly different from the .jsonl output from `vespa visit` (e.g., fields like a continuation token are added). Extract the `document` objects before feeding: + +```bash +$ gunzip -c docs.gz | jq '.documents[]' | \ + vespa feed - -t $ENDPOINT +``` + +## Delete + +To remove all documents in a Vespa deployment—or a selection of them—run a *deletion visit*. Use the `DELETE` HTTP method, and fetch only the continuation token from the response: + +```bash expandable +#!/bin/bash + +set -x + +# The ENDPOINT must be a regional endpoint, do not use '*.g.vespa-app.cloud/' +ENDPOINT="https://vespacloud-docsearch.vespa-team.aws-us-east-1c.z.vespa-app.cloud" +NAMESPACE=open +DOCTYPE=doc +CLUSTER=documentation + +# doc.path =~ "^/old/" -- all documents under the /old/ directory: +SELECTION='doc.path%3D~%22%5E%2Fold%2F%22' + +continuation="" + +while + token=$( curl -X DELETE -s \ + --cert data-plane-public-cert.pem \ + --key data-plane-private-key.pem \ + "${ENDPOINT}/document/v1/${NAMESPACE}/${DOCTYPE}/docid?selection=${SELECTION}&cluster=${CLUSTER}&${continuation}" \ + | tee >( jq . > /dev/tty ) | jq -re .continuation ) +do + continuation="continuation=${token}" +done +``` + +Each request will return a response after roughly one minute—change this by specifying *timeChunk* (default 60). + +To purge all documents in a document export (above), generate a feed with `remove`\-entries for each document ID, like: + +```bash +$ gunzip -c docs.gz | jq '[ .documents[] | {remove: .id} ]' | head + +[ + { + "remove": "id:open:doc::open/documentation/schemas.html" + }, + { + "remove": "id:open:doc::open/documentation/securing-your-vespa-installation.html" + }, +``` + +Complete example for a single chunk: + +```bash +$ gunzip -c docs.gz | jq '[ .documents[] | {remove: .id} ]' | \ + vespa feed - -t $ENDPOINT +``` + +## Update + +To update all documents in a Vespa deployment—or a selection of them—run an *update visit*. Use the `PUT` HTTP method, and specify a partial update in the request body: + +```bash expandable +#!/bin/bash + +set -x + +# The ENDPOINT must be a regional endpoint, do not use '*.g.vespa-app.cloud/' +ENDPOINT="https://vespacloud-docsearch.vespa-team.aws-us-east-1c.z.vespa-app.cloud" +NAMESPACE=open +DOCTYPE=doc +CLUSTER=documentation + +# doc.inlinks == "some-url" -- the weightedset inlinks has the key "some-url" +SELECTION='doc.inlinks%3D%3D%22some-url%22' + +continuation="" + +while + token=$( curl -X PUT -s \ + --cert data-plane-public-cert.pem \ + --key data-plane-private-key.pem \ + --data '{ "fields": { "inlinks": { "remove": { "some-url": 0 } } } }' \ + "${ENDPOINT}/document/v1/${NAMESPACE}/${DOCTYPE}/docid?selection=${SELECTION}&cluster=${CLUSTER}&${continuation}" \ + | tee >( jq . > /dev/tty ) | jq -re .continuation ) +do + continuation="continuation=${token}" +done +``` + +Each request will return a response after roughly one minute—change this by specifying *timeChunk* (default 60). + +## Using /document/v1/ api + +To get started with a document export, find the *namespace* and *document type* by listing a few IDs. Hit the [/document/v1/](/en/reference/api/document-v1) ENDPOINT. Restrict to one CLUSTER, see [content clusters](/en/reference/applications/services/content): + +```bash +$ curl \ + --cert data-plane-public-cert.pem \ + --key data-plane-private-key.pem \ + "$ENDPOINT/document/v1/?cluster=$CLUSTER" +``` + +For ID export only, use a [fieldset](/en/schemas/documents#fieldsets): + +```bash +$ curl \ + --cert data-plane-public-cert.pem \ + --key data-plane-private-key.pem \ + "$ENDPOINT/document/v1/?cluster=$CLUSTER&fieldSet=%5Bid%5D" +``` + +From an ID, like *id:open:doc::open/documentation/schemas.html*, extract + +- NAMESPACE: open +- DOCTYPE: doc + +Example script: + +```bash expandable +#!/bin/bash + +set -x + +# The ENDPOINT must be a regional endpoint, do not use '*.g.vespa-app.cloud/' +ENDPOINT="https://vespacloud-docsearch.vespa-team.aws-us-east-1c.z.vespa-app.cloud" +NAMESPACE=open +DOCTYPE=doc +CLUSTER=documentation + +continuation="" +idx=0 + +while + ((idx+=1)) + echo "$continuation" + printf -v out "%05g" $idx + filename=${NAMESPACE}-${DOCTYPE}-${out}.data.gz + echo "Fetching data..." + token=$( curl -s \ + --cert data-plane-public-cert.pem \ + --key data-plane-private-key.pem \ + "${ENDPOINT}/document/v1/${NAMESPACE}/${DOCTYPE}/docid?wantedDocumentCount=1000&concurrency=4&cluster=${CLUSTER}&${continuation}" \ + | tee >( gzip > ${filename} ) | jq -re .continuation ) +do + continuation="continuation=${token}" +done +``` + +If only a few documents are returned per response, *wantedDocumentCount* (default 1, max 1024) can be specified for a lower bound on the number of documents per response, if that many documents still remain. + +Specifying *concurrency* (default 1, max 100) increases throughput, at the cost of resource usage. This also increases the number of documents per response, and *could* lead to excessive memory usage in the HTTP container when many large documents are buffered to be returned in the same response. \ No newline at end of file diff --git a/mintlify-docs/en/operations/deleting-applications.mdx b/mintlify-docs/en/operations/deleting-applications.mdx new file mode 100644 index 0000000000..eaa5c13397 --- /dev/null +++ b/mintlify-docs/en/operations/deleting-applications.mdx @@ -0,0 +1,57 @@ +--- +title: "Deleting Applications" +sidebarTitle: "Deleting Applications" +--- + + +**Warning:** + +Following these steps will remove production instances or regions and all data within them. Data will be unrecoverable. + + + +## Deleting an application + +To delete an application, use the console: + +- navigate to the *application* view at https://console.vespa-cloud.com/tenant/tenant-name/application where you can find the trash can icon to the far right, as an `ACTION`. +- navigate to the *deploy* view at *https://console.vespa-cloud.com/tenant/tenant-name/application/app-name/prod/deploy*. + + +![delete production deployment](/assets/img/console/delete-production-deployment.png) + + +When the application deployments are deleted, delete the application in the [console](https://console.vespa-cloud.com). Remove the CI job that builds and deploys application packages, if any. + +## Deleting an instance / region + +To remove an instance or a deployment to a region from an application: + + + +Remove the `region` from `prod`, or the `instance` from `deployment` in [deployment.xml](/en/reference/applications/deployment#instance): +```xml + + + aws-us-east-1c + + + + +``` + + +Add or modify [validation-overrides.xml](/en/reference/applications/validation-overrides), allowing Vespa Cloud to remove production instances: + +```xml + + deployment-removal + + global-endpoint-change + +``` + + +Build and deploy the application package. + + \ No newline at end of file diff --git a/mintlify-docs/en/operations/deployment-patterns.mdx b/mintlify-docs/en/operations/deployment-patterns.mdx new file mode 100644 index 0000000000..08a6aa4595 --- /dev/null +++ b/mintlify-docs/en/operations/deployment-patterns.mdx @@ -0,0 +1,179 @@ +--- +title: "Deployment patterns" +--- + +Vespa Cloud's [automated deployments](/en/operations/automated-deployments) lets you design CD pipelines for staged rollouts and multi-zone deployments. This guide documents some of these patterns. + +## Two regions, two AZs each, sequenced deployment + +This is the simplest pattern, deploy to a set of zones/regions, in a sequence: + + +![Two regions, two AZs each, sequenced deployment](/assets/img/pipeline-1.png) + + +```xml + + + aws-us-east-1c + aws-use1-az4 + aws-use2-az1 + aws-use2-az3 + + +``` + +## Two regions, two AZs each, parallel deployment + +Same as above, but deploying all zones in parallel: + + +![Two regions, two AZs each, parallel deployment](/assets/img/pipeline-2.png) + + +```xml + + + + aws-us-east-1c + aws-use1-az4 + aws-use2-az1 + aws-use2-az3 + + + +``` + +## Two regions, two AZs each, parallel deployment inside region + +Deploy to the use1 region first, both AZs in parallel, then the use2 region, both AZs in parallel: + + +![Two regions, two AZs each, parallel deployment inside region](/assets/img/pipeline-3.png) + + +```xml + + + + aws-us-east-1c + aws-use1-az4 + + + aws-use2-az1 + aws-use2-az3 + + + +``` + +## Deploy to a test instance first + +Deploy to a (downscaled) instance first, and add a delay before propagating to later instances and zones. + + +![With a canary instance](/assets/img/canary-instance-one-app.png) + + +```xml + + + + aws-use2-az1 + + + + + + aws-use2-az1 + + + +``` + +### Deployment variants + +[Deployment variants](/en/operations/deployment-variants) are useful to set up a downscaled instance. In [services.xml](/en/reference/applications/services/services), override settings per instance: + +```xml + + + + + + +``` + +## Test and prod instances as separate applications + +In the section before, we modeled the test and prod app as one pipeline. This lets users halt the pipeline (using the delay) before prod propagation. + +In some cases, this is better modeled as different applications: + +- The CI pipeline is multistep, with approvals and use of different branches + +The below uses different *applications* to model the flow, these are completely separate application instances. The application owner will model the flow in own tool, and orchestrate deployments to Vespa Cloud as fit: + + +![canary app](/assets/img/canaryapp.png) + + +![prod app](/assets/img/prodapp.png) + + +The important point is, these are two *separate* deploy commands to Vespa Cloud: + +```bash +$ vespa config set application tenant1.canaryapp +$ vespa prod deploy app +``` + +```xml + + + + aws-use2-az1 + + + +``` + +```bash +$ vespa config set application tenant1.prodapp +$ vespa prod deploy app +``` + +```xml + + + + aws-use2-az1 + + + +``` + +## services.xml structure + +It is possible to split *services.xml* to more file using includes: + +```xml + + + + +``` + + +**Note:** + +The include-feature can not be used in combination with [deployment variants](#deployment-variants). + + +## Next reads + + + + + + \ No newline at end of file diff --git a/mintlify-docs/en/operations/deployment-variants.mdx b/mintlify-docs/en/operations/deployment-variants.mdx new file mode 100644 index 0000000000..23f3c55bf8 --- /dev/null +++ b/mintlify-docs/en/operations/deployment-variants.mdx @@ -0,0 +1,150 @@ +--- +title: "Application, instance, region, cloud and environment variants" +sidebarTitle: "Deployment variants" +--- + +Sometimes it is useful to create configuration that varies depending on properties of the deployment, for example to set region specific endpoints of services used by [Searchers](/en/applications/searchers), use smaller clusters for a "beta" instance, or vary configuration when the same application package is shared across multiple applications. + +This is supported both for [services.xml](#services.xml-variants) and [query profiles](#query-profile-variants). + +## services.xml variants + +[services.xml](/en/reference/applications/services/services) files support different configuration settings for different *tags*, *applications*, *instances*, *environments*, *clouds* and *regions*. To use this, import the *deploy* namespace: + +```xml + +``` + +Deploy directives are used to specify with which tags, and in which application, instance, environment, cloud and/or [region](/en/operations/zones) an XML element should be included: + +```xml expandable + + 2 + + + + + + + + + + + + + + + + + + + +``` + +The example above configures different node counts/configurations depending on the deployment target. Deploying the application in the *dev* environment gives: + +```xml + + 2 + + + + + +``` + +Whereas in `aws-us-west-2a` it is: + +```xml + + 2 + + + + + + + +``` + +This can be used to modify any config by deployment target. + +The `deploy` directives have a set of override rules: + +- A directive specifying more conditions will override one specifying fewer. +- Directives are inherited in child elements. +- When multiple XML elements with the same name is specified (e.g. when specifying search or docproc chains), the *id* attribute or the *idref* attribute of the element is used together with the element name when applying directives. + +Some overrides are applied by default in some environments, see [environments](/en/operations/environments). Any override made explicitly for an environment will override the defaults for it. + +### Specifying multiple targets + +More than one tag, application, instance, region or environment can be specified in the attribute, separated by space. + +Note that `tags` by default only apply in production instances, and are matched whenever the tags of the element and the tags of the instance intersect. To match tags in other environments, an explicit `deploy:environment` directive for that environment must also match. Use tags if you have a complex instance structure which you want config to vary by. + +The namespace can be applied to any element. Example: + +```xml + + + + + + Hello from application config + Hello from east colo! + + + + + +``` + +Above, the `container` element is configured for the 3 environments only (it will not apply to `dev`) - and in region `aws-us-east-1c`, the config is different. + +## Query profile variants + +[Query profiles](/en/querying/query-profiles) support different configuration settings for different *applications*, *instances*, *environments* and *regions* through [query profile variants](/en/querying/query-profiles#query-profile-variants). This allows you to set different query parameters for a query type depending on these deployment attributes. + +To use this feature, create a regular query profile variant with any of `application`, `instance`, `environment` and `region` as dimension names and let your query profile vary by that. For example: + +```xml expandable + + + application, instance, environment, region + + My default value + + + + My beta value + + + + + My dev value + + + + + My main instance prod value + + + +``` + +You can pick and combine these dimensions in any way you want with other dimensions sent as query parameters, e.g: + +```xml +device, application, instance, usecase +``` \ No newline at end of file diff --git a/mintlify-docs/en/operations/enclave/archive.mdx b/mintlify-docs/en/operations/enclave/archive.mdx new file mode 100644 index 0000000000..70e23c008c --- /dev/null +++ b/mintlify-docs/en/operations/enclave/archive.mdx @@ -0,0 +1,45 @@ +--- +title: "Log archive in Vespa Cloud Enclave" +sidebarTitle: "Log archive" +--- + + +**Warning:** + +The structure of log archive buckets may change without notice + + +After Vespa Cloud Enclave is established in your cloud provider account using Terraform, the module will have created a storage bucket per Vespa Cloud zone you configured in your enclave. These storage buckets are used to archive logs from the machines that run Vespa inside your account. + +Since the buckets are in your own cloud account, you do not need to register an IAM role or configure access through the Vespa Cloud Console — you can access the archive buckets directly using your existing cloud credentials. + + +![Enclave archive view](/assets/img/archive-aws-enclave.png) + + +There will be one storage bucket per Vespa Cloud Zone that is configured in the enclave. The name of the bucket will depend on the cloud provider you are setting up the enclave in. + +Files are synchronized to the archive bucket when the file is rotated by the logging system, or when a virtual machine is deprovisioned from the application. The consequence of this is that frequency of uploads will depend on the activity of the Vespa application. + +## Directory structure + +The directory structure in the bucket is as follows: + +```bash +////logs// +``` + +- `tenant` is the tenant ID. +- `application` is the application ID that generated the log. +- `instance` is the instance ID of the generated log, e.g. `default`. +- `host` is the name prefix of the host that generated the log, e.g. `e103a`. +- `logtype` is the type of log in the directory (see below). +- `logfile` is the specific file of the log. + +## Log types + +There are three log types that are synced to this bucket. + +- `vespa`: [Vespa logs](/en/reference/operations/log-files) +- `access`: [Access logs](/en/operations/access-logging) +- `connection`: [Connection logs](/en/operations/access-logging#connection-log) \ No newline at end of file diff --git a/mintlify-docs/en/operations/enclave/aws-architecture.mdx b/mintlify-docs/en/operations/enclave/aws-architecture.mdx new file mode 100644 index 0000000000..c373006425 --- /dev/null +++ b/mintlify-docs/en/operations/enclave/aws-architecture.mdx @@ -0,0 +1,38 @@ +--- +title: "Vespa Cloud Enclave AWS Architecture" +sidebarTitle: "AWS architecture" +--- + +Each Vespa Cloud Enclave in the tenant AWS account corresponds to a Vespa Cloud [zone](../zones.html). Inside the tenant AWS account one enclave is contained within one single [VPC](https://docs.aws.amazon.com/vpc/latest/userguide/what-is-amazon-vpc.html). + + +![Enclave architecture](/assets/img/vespa-cloud-enclave-aws.png) + + +#### EC2 Instances, Load Balancers, and S3 buckets + +Configuration Servers inside the Vespa Cloud zone makes the decision to create or destroy EC2 instances ("Vespa Hosts" in diagram) based on the Vespa applications that are deployed. The Configuration Servers also set up the Network Load Balancers needed to communicate with the deployed Vespa application. + +Each Vespa Host will periodically sync its logs to a S3 bucket ("Log Archive"). This bucket is "local" to the enclave and provisioned by the Terraform module inside the tenant's AWS account. + +#### Networking + +The enclave VPC is very network restricted. Vespa Hosts do not have public IPv4 addresses and there is no [NAT gateway](https://docs.aws.amazon.com/vpc/latest/userguide/vpc-nat-gateway.html) available in the VPC. Vespa Hosts have public IPv6 addresses and are able to make outbound connections. Inbound connections are not allowed. Outbound IPv6 connections are used to bootstrap communication with the Configuration Servers, and to report operational metrics back to Vespa Cloud. + +When a Vespa Host is booted it will set up an encrypted tunnel back to the Configuration Servers. All communication between Configuration Servers and the Vespa Hosts will be run over this tunnel after it is set up. + +### Security + +The Vespa Cloud operations team does *not* have any direct access to the resources that is part of the customer account. The only possible access is through the management APIs needed to run Vespa itself. In case it is needed for, e.g. incident debugging, direct access can only be granted to the Vespa team by the tenant itself. For further details, see the documentation for the [`ssh`\-submodule](https://registry.terraform.io/modules/vespa-cloud/enclave/aws/latest/submodules/ssh). + +All communication between the enclave and the Vespa Cloud configuration servers is encrypted, authenticated and authorized using [mTLS](https://en.wikipedia.org/wiki/Mutual_authentication#mTLS) with identities embedded in the certificate. mTLS communication is facilitated with the [Athenz](https://www.athenz.io/) service. + +All data stored is encrypted at rest using [KMS](https://docs.aws.amazon.com/kms/latest/developerguide/overview.html). All keys are managed by the tenant in the tenant's AWS account. + +The resources provisioned in the tenant AWS account are either provisioned by the Terraform module executed by the tenant, or by the orchestration services inside a Vespa Cloud Zone. + +Resources are provisioned by the Vespa Cloud configuration servers, using the [`provision_policy`](https://github.com/vespa-cloud/terraform-aws-enclave/blob/main/modules/provision/main.tf) AWS IAM policy document defined in the Terraform module. + +The tenant that registered the AWS account is the only tenant that can deploy applications targeting the enclave. + +For more general information about security in Vespa Cloud, see the [whitepaper](/en/security/whitepaper). \ No newline at end of file diff --git a/mintlify-docs/en/operations/enclave/aws-getting-started.mdx b/mintlify-docs/en/operations/enclave/aws-getting-started.mdx new file mode 100644 index 0000000000..fa0a55778a --- /dev/null +++ b/mintlify-docs/en/operations/enclave/aws-getting-started.mdx @@ -0,0 +1,85 @@ +--- +title: "Getting started with Vespa Cloud Enclave in AWS" +sidebarTitle: "AWS getting started" +--- + +Setting up Vespa Cloud Enclave requires: + + + +Registration at [Vespa Cloud](https://console.vespa-cloud.com), or use a pre-existing tenant. + + +Running a [Terraform](https://www.terraform.io/) configuration to provision AWS resources in the account. Go through the [AWS tutorial](https://developer.hashicorp.com/terraform/tutorials/aws-get-started) as needed. + + +Registration of the AWS account ID in Vespa Cloud + + +Deployment of a Vespa application. + + + +### 1. Vespa Cloud Tenant setup + +Register at [Vespa Cloud](https://console.vespa-cloud.com) or use an existing tenant. Note that the tenant must be on a [paid plan](https://vespa.ai/pricing/). + +### 2. Configure AWS Account + + +**Note:** + +We recommend using a *dedicated* account for your Vespa Cloud Enclave. Vespa Cloud will manage resources in the Enclave VPCs created in the AWS resource provisioning step. Primarily EC2 instances, load balancers and service endpoints. + + +One account can host all your Vespa applications, there is no need for multiple tenants or accounts. + +The AWS account you intend to use for Vespa Cloud Enclave must be prepared for deploying Vespa applications using either *Terraform* or *Cloudformation*. + +#### Terraform + +Use [Terraform](https://www.terraform.io/) to set up the necessary resources using the [modules](https://registry.terraform.io/modules/vespa-cloud/enclave/aws/latest) published by the Vespa team. + +Modify the [multi-region Terraform files](https://github.com/vespa-cloud/terraform-aws-enclave/blob/main/examples/multi-region/main.tf) for your deployment. + +If you are unfamiliar with Terraform: It is a tool to manage resources and their configuration in various cloud providers, like AWS and GCP. Terraform has published an [AWS](https://developer.hashicorp.com/terraform/tutorials/aws-get-started) tutorial, and we strongly encourage enclave users to read and follow the Terraform recommendations for [CI/CD](https://developer.hashicorp.com/terraform/tutorials/automation/automate-terraform). + +The Terraform module we provide is regularly updated to add new required resources or extra permissions for Vespa Cloud to automate the operations of your applications. In order for your enclave applications to use the new features you must re-apply your terraform templates with the latest release. The [notification system](/en/operations/notifications) will let you know when a new release is available. + +### 3. Onboarding + +Once the AWS account is configured, contact [support@vespa.ai](mailto:support@vespa.ai) stating which tenant should be on-boarded to use Vespa Cloud Enclave. Also include the [AWS account ID](https://docs.aws.amazon.com/accounts/latest/reference/manage-acct-identifiers.html#FindAccountId) to associate with the tenant. + + +**Note:** + +Wait for confirmation from the Vespa team that onboarding is complete before deploying an application in the next step. + + + +### 4. Deploy a Vespa application + +By default, all applications are deployed on resources in Vespa Cloud accounts. To deploy in your enclave account, update [deployment.xml](/en/reference/applications/deployment) to reference the AWS account you onboarded: + +```xml + + + +``` + +Useful resources are [getting started](/en/basics/deploy-an-application-java) and [migrating to Vespa Cloud](/en/learn/migrating-to-cloud) - put *deployment.xml* next to *services.xml*. + +## Next steps + +After a successful deployment to the [dev](/en/operations/environments#dev) environment, iterate on the configuration to implement your application on Vespa. The *dev* environment is ideal for this, with rapid deployment cycles. + +For production serving, deploy to the [prod](/en/operations/environments#prod) environment - follow the steps in [production deployment](/en/reference/applications/deployment). + +## Enclave teardown + +To tear down a Vespa Cloud Enclave system, do the steps above in reverse order: + +1. [Undeploy the application(s)](/en/operations/deleting-applications) +2. Undeploy the Terraform changes + +It is important to undeploy the Vespa application(s) first. After running the Terraform, Vespa Cloud cannot manage the resources allocated, so you must clean up these yourself. \ No newline at end of file diff --git a/mintlify-docs/en/operations/enclave/azure-architecture.mdx b/mintlify-docs/en/operations/enclave/azure-architecture.mdx new file mode 100644 index 0000000000..9e24f07649 --- /dev/null +++ b/mintlify-docs/en/operations/enclave/azure-architecture.mdx @@ -0,0 +1,42 @@ +--- +title: "Architecture for Vespa Cloud Enclave in Azure" +sidebarTitle: "Azure architecture" +--- + +### Architecture + +With Vespa Cloud Enclave, all Azure resources associated with your Vespa Cloud applications are in your enclave Azure subscription, as opposed to a shared Vespa Cloud subscription. + +Each Vespa Cloud [zone](/en/operations/zones) has an associated zone resource group (RG) in the enclave subscription, that contains all the resources for that zone. For instance, it has one Virtual Network (VNet aka [VPC](https://cloud.google.com/vpc/)). + + +![Enclave architecture](/assets/img/vespa-cloud-enclave-azure.png) + + +#### Virtual Machines, Load Balancers, and Blob Storage + +Configuration Servers inside the Vespa Cloud subscription make the decision to create or destroy virtual machines ("Vespa Hosts" in diagram) based on the Vespa applications that are deployed. The Configuration Servers also set up the Container Load Balancers needed to communicate with the deployed Vespa application. + +Each Vespa Host will periodically sync its logs to a Blob Storage container ("Log Archive") in a Storage Account in the zone RG. This storage account is "local" to the enclave and provisioned by the Terraform module inside your Azure subscription. + +#### Networking + +The Zone Virtual Network (VNet aka VPC) is very network restricted. The Vespa Hosts do not have a public IPv4 address. But your application can connect to external IPv4 services using a [NAT gateway](https://learn.microsoft.com/en-us/azure/nat-gateway/nat-overview). Vespa Hosts have public IPv6 addresses and are able to make outbound connections. Inbound connections are not allowed. Outbound IPv6 connections are used to bootstrap communication with the Configuration Servers, and to report operational metrics back to Vespa Cloud. + +When a Vespa Host is booted, it will set up an encrypted tunnel back to the Configuration Servers. All communication between Configuration Servers and the Vespa Hosts will be run over this tunnel after it is set up. + +### Security + +The Vespa Cloud operations team does *not* have any direct access to the resources in your subscription. The only possible access is through the management APIs needed to run Vespa itself. In case it is needed for, e.g. incident debugging, direct access can only be granted to the Vespa team by you. Enable direct access by setting the `enable_ssh` input to true in the enclave module. For further details, see the documentation for the [enclave module inputs](https://registry.terraform.io/modules/vespa-cloud/enclave/azure/latest/?tab=inputs). + +All communication between the enclave and the Vespa Cloud Configuration servers is encrypted, authenticated, and authorized using [mTLS](https://en.wikipedia.org/wiki/Mutual_authentication#mTLS) with identities embedded in the certificate. mTLS communication is facilitated with the [Athenz](https://www.athenz.io/) service. + +All data stored is encrypted at rest using [Encryption At Host](https://learn.microsoft.com/en-us/azure/virtual-machines/disk-encryption-overview). All keys are managed automatically by the Azure platform. + +The resources provisioned in your Azure subscription are either provisioned by the Vespa Cloud Enclave Terraform module you apply, or by the orchestration services inside a Vespa Cloud zone. + +Resources are provisioned by the Vespa Cloud Configuration servers, using the [`id-provisioner`](https://github.com/vespa-cloud/terraform-azure-enclave/blob/main/provisioner.tf) user-assigned managed identity defined in the Terraform module. + +Only your Vespa tenant (that registered this Azure subscription) can deploy applications targeting your enclave. + +For more general information about security in Vespa Cloud, see the [whitepaper](/en/security/whitepaper). \ No newline at end of file diff --git a/mintlify-docs/en/operations/enclave/azure-getting-started.mdx b/mintlify-docs/en/operations/enclave/azure-getting-started.mdx new file mode 100644 index 0000000000..751856b38b --- /dev/null +++ b/mintlify-docs/en/operations/enclave/azure-getting-started.mdx @@ -0,0 +1,76 @@ +--- +title: "Getting started with Vespa Cloud Enclave in Azure" +sidebarTitle: "Azure getting started" +--- + +Setting up Vespa Cloud Enclave requires: + + + +Registration at [Vespa Cloud](https://console.vespa-cloud.com), or use a pre-existing Vespa tenant. + + +Running a [Terraform](https://www.terraform.io/) configuration to provision necessary Azure resources in the subscription. + + +Registration of the Azure subscription in Vespa Cloud. + + +Deployment of a Vespa application. + + + +### 1. Vespa Cloud Tenant setup + +Register at [Vespa Cloud](https://console.vespa-cloud.com) or use an existing Vespa tenant. Note that the tenant must be on a [paid plan](https://vespa.ai/pricing/). + +### 2. Configure Azure subscription + +Choose an Azure subscription to use for Vespa Cloud Enclave. + + +**Note:** + +We recommend using a *dedicated* subscription for your Vespa Cloud Enclave. Resources in this subscription will be fully managed by Vespa Cloud. + + +One subscription can host all your Vespa applications, there is no need for multiple Vespa tenants or Azure subscriptions. + +The subscription must be prepared for deploying Vespa applications. Use [Terraform](https://www.terraform.io/) to set up the necessary resources using the [modules](https://registry.terraform.io/modules/vespa-cloud/enclave/azure/latest) published by the Vespa team. + +Feel free to use the [example](https://github.com/vespa-cloud/terraform-azure-enclave/blob/main/examples/basic/main.tf) to get started. + +If you are unfamiliar with Terraform: It is a tool to manage resources and their configuration in various cloud providers, like AWS, Azure, and GCP. Terraform has published a [Get Started - Azure](https://developer.hashicorp.com/terraform/tutorials/azure-get-started) tutorial, and we strongly encourage enclave users to read and follow the Terraform recommendations for [CI/CD](https://developer.hashicorp.com/terraform/tutorials/automation/automate-terraform). + +The Terraform module we provide is regularly updated to add new required resources or extra permissions for Vespa Cloud to automate the operations of your applications. In order for your enclave applications to use the new features you must re-apply your terraform templates with the latest release. The [notification system](/en/operations/notifications) will let you know when a new release is available. + +### 3. Onboarding + +Contact [support@vespa.ai](mailto:support@vespa.ai) and provide the `enclave_config` output after applying the Terraform, see [Outputs](https://github.com/vespa-cloud/terraform-azure-enclave?tab=readme-ov-file#outputs). The `enclave_config` includes which Vespa tenant should be on-boarded to use Vespa Cloud Enclave. And the Azure tenant ID, the subscription ID, and a client ID of an Athenz identity the Terraform created. + +### 4. Deploy a Vespa application + +By default, all applications are deployed on resources in Vespa Cloud accounts. To deploy in your Azure enclave subscription instead, update [deployment.xml](/en/reference/applications/deployment) to reference the subscription ID from step 2: + +```bash + + + +``` + +Useful resources are [getting started](/en/basics/deploy-an-application) and [migrating to Vespa Cloud](/en/learn/migrating-to-cloud) - put *deployment.xml* next to *services.xml*. + +## Next steps + +After a successful deployment to the [dev](/en/operations/environments#dev) environment, iterate on the configuration to implement your application on Vespa. The *dev* environment is ideal for this, with rapid deployment cycles. + +For production serving, deploy to the [prod](/en/operations/environments#prod) environment - follow the steps in [production deployment](/en/reference/applications/deployment). + +## Enclave teardown + +To tear down a Vespa Cloud Enclave system, do the steps above in reverse order: + +1. [Undeploy the application(s)](/en/operations/deleting-applications) +2. Undeploy the Terraform changes + +It is important to undeploy the Vespa application(s) first. After running the Terraform, Vespa Cloud cannot manage the resources allocated, so you must clean up these yourself. \ No newline at end of file diff --git a/mintlify-docs/en/operations/enclave/enclave.mdx b/mintlify-docs/en/operations/enclave/enclave.mdx new file mode 100644 index 0000000000..063e0e65fe --- /dev/null +++ b/mintlify-docs/en/operations/enclave/enclave.mdx @@ -0,0 +1,84 @@ +--- +title: "Vespa Cloud Enclave" +sidebarTitle: "Enclave" +--- + + +![enclave architecture](/assets/img/enclave-architecture.png) + + +Vespa Cloud Enclave allows Vespa Cloud applications to run inside the tenant's own cloud accounts while everything is still fully managed by Vespa Cloud's automation, giving the tenant full access to Vespa Cloud features inside their own cloud account. This allows tenant data to always remain within the bounds of services controlled by the tenant, and also to build closer integrations with Vespa applications inside the cloud services. + +Vespa Cloud Enclave is available in AWS, Azure, and GCP. + + +**Note:** + +As the Vespa Cloud Enclave resources run in *your* account, this incurs resource costs from your cloud provider in *addition* to the Vespa Cloud costs. + + + +## AWS + + + + + + +## Azure + + + + + + +## GCP + + + + + + +## Guides + + + + + + +## FAQ + + + + +The permissions required are coded into the Terraform modules found at: + + + + + + +Navigate to the *modules* directory for details. + + + +Use terraform to grant Vespa hosts access to necessary secrets, and create an RPM that retrieves them and configures your application. See [enclave-examples](https://github.com/vespa-cloud/enclave-examples/tree/main/systemd-secrets) for a complete example. + + + +This happens if you deploy to new zones *before* running the Terraform/CloudFormation templates: + +```bash +Deployment failed: Invalid application: In container cluster 'mycluster': Could not provision load balancer mytenant:myapp:myinstance:mycluster: Expected to find exactly 1 resource, but got 0 for subnet with service 'tenantelb' +``` + + + +Vespa Cloud will take proactive actions on maintenance operations and replace instances that are scheduled for maintenance tasks ahead of time to reduce any impact the maintenance may incur. + +All EC2 instance failures are detected by our control plane, and the problematic instances are automatically replaced. The system will, as part of the replacement process, also ensure that the document distribution is kept in line with your application configuration. + + +VPC peering is not supported; [AWS PrivateLink](/en/operations/private-endpoints#aws-private-link) and [Google Private Service Connect](/en/operations/private-endpoints#gcp-private-service-connect) are good alternatives, so you can access the endpoints without going over public internet. [Read more](/en/reference/applications/deployment#accessing-a-public-cloud-application-from-another-vpc-on-another-account). + + \ No newline at end of file diff --git a/mintlify-docs/en/operations/enclave/gcp-architecture.mdx b/mintlify-docs/en/operations/enclave/gcp-architecture.mdx new file mode 100644 index 0000000000..b059028a19 --- /dev/null +++ b/mintlify-docs/en/operations/enclave/gcp-architecture.mdx @@ -0,0 +1,40 @@ +--- +title: "Architecture for Vespa Cloud Enclave in GCP" +sidebarTitle: "GCP architecture" +--- + +### Architecture + +Each Vespa Cloud Enclave in the tenant GCP project corresponds to a Vespa Cloud [zone](/en/operations/zones). Inside the tenant GCP project one enclave is contained within one single [VPC](https://cloud.google.com/vpc/). + + +![Enclave architecture](/assets/img/vespa-cloud-enclave-gcp.png) + + +#### Compute Instances, Load Balancers, and Cloud Storage buckets + +Configuration Servers inside the Vespa Cloud zone makes the decision to create or destroy compute instances ("Vespa Hosts" in diagram) based on the Vespa applications that are deployed. The Configuration Servers also set up the Network Load Balancers needed to communicate with the deployed Vespa application. + +Each Vespa Host will periodically sync its logs to a Cloud Storage bucket ("Log Archive"). This bucket is "local" to the enclave and provisioned by the Terraform module inside the tenant's GCP project. + +#### Networking + +The enclave VPC is very network restricted. Vespa Hosts do not have public IPv4 addresses and there is no [NAT gateway](https://cloud.google.com/nat/docs/overview) available in the VPC. Vespa Hosts have public IPv6 addresses and are able to make outbound connections. Inbound connections are not allowed. Outbound IPv6 connections are used to bootstrap communication with the Configuration Servers, and to report operational metrics back to Vespa Cloud. + +When a Vespa Host is booted it will set up an encrypted tunnel back to the Configuration Servers. All communication between Configuration Servers and the Vespa Hosts will be run over this tunnel after it is set up. + +### Security + +The Vespa Cloud operations team does *not* have any direct access to the resources that is part of the customer account. The only possible access is through the management APIs needed to run Vespa itself. In case it is needed for, e.g. incident debugging, direct access can only be granted to the Vespa team by the tenant itself. Enabling direct access is done by setting the `enable_ssh` input to true in the enclave module. For further details, see the documentation for the [enclave module inputs](https://registry.terraform.io/modules/vespa-cloud/enclave/google/latest/?tab=inputs). + +All communication between the enclave and the Vespa Cloud configuration servers is encrypted, authenticated and authorized using [mTLS](https://en.wikipedia.org/wiki/Mutual_authentication#mTLS) with identities embedded in the certificate. mTLS communication is facilitated with the [Athenz](https://www.athenz.io/) service. + +All data stored is encrypted at rest using [Cloud Key Management](https://cloud.google.com/security-key-management). All keys are managed by the tenant in the tenant's GCP project. + +The resources provisioned in the tenant GCP project are either provisioned by the Terraform module executed by the tenant, or by the orchestration services inside a Vespa Cloud zone. + +Resources are provisioned by the Vespa Cloud configuration servers, using the [`vespa_cloud_provisioner_role`](https://github.com/vespa-cloud/terraform-google-enclave/blob/main/main.tf) IAM role defined in the Terraform module. + +The tenant that registered the GCP project is the only tenant that can deploy applications targeting the enclave. + +For more general information about security in Vespa Cloud, see the [whitepaper](/en/security/whitepaper). \ No newline at end of file diff --git a/mintlify-docs/en/operations/enclave/gcp-getting-started.mdx b/mintlify-docs/en/operations/enclave/gcp-getting-started.mdx new file mode 100644 index 0000000000..5de7521cb5 --- /dev/null +++ b/mintlify-docs/en/operations/enclave/gcp-getting-started.mdx @@ -0,0 +1,84 @@ +--- +title: "Getting started with Vespa Cloud Enclave in GCP" +sidebarTitle: "GCP getting started" +--- + +Setting up Vespa Cloud Enclave requires: + + + +Registration at [Vespa Cloud](https://console.vespa-cloud.com), or use a pre-existing tenant. + + +Running a [Terraform](https://www.terraform.io/) configuration to provision necessary GCP resources in the project. + + +Registration of the GCP project in Vespa Cloud. + + +Deployment of a Vespa application. + + + +### 1. Vespa Cloud Tenant setup + +Register at [Vespa Cloud](https://console.vespa-cloud.com) or use an existing tenant. Note that the tenant must be on a [paid plan](https://vespa.ai/pricing/). + +### 2. Configure GCP Project + + +**Note:** + +We recommend using a *dedicated* project for your Vespa Cloud Enclave. Resources in this project will be fully managed by Vespa Cloud. + + +One project can host all your Vespa applications, there is no need for multiple tenants or projects. + +The project you intend to use for Vespa Cloud Enclave must be prepared for deploying Vespa applications. Use [Terraform](https://www.terraform.io/) to set up the necessary resources using the [modules](https://registry.terraform.io/modules/vespa-cloud/enclave/google/latest) published by the Vespa team. + +Modify the [multi-region example](https://github.com/vespa-cloud/terraform-google-enclave/blob/main/examples/multi-region/main.tf) for your deployment. + +If you are unfamiliar with Terraform: It is a tool to manage resources and their configuration in various cloud providers, like AWS and GCP. Terraform has published a [GCP](https://developer.hashicorp.com/terraform/tutorials/gcp-get-started) tutorial, and we strongly encourage enclave users to read and follow the Terraform recommendations for [CI/CD](https://developer.hashicorp.com/terraform/tutorials/automation/automate-terraform). + +The Terraform module we provide is regularly updated to add new required resources or extra permissions for Vespa Cloud to automate the operations of your applications. In order for your enclave applications to use the new features you must re-apply your terraform templates with the latest release. The [notification system](/en/operations/notifications) will let you know when a new release is available. + +### 3. Onboarding + +Once the GCP project is configured, contact [support@vespa.ai](mailto:support@vespa.ai) stating which tenant should be on-boarded to use Vespa Cloud Enclave. Also include the [GCP Project ID](https://cloud.google.com/resource-manager/docs/creating-managing-projects#identifying_projects) to associate with the tenant. + + +**Note:** + +Wait for confirmation from the Vespa team that onboarding is complete before deploying an application in the next step. + + +### 4. Deploy a Vespa application + +By default, all applications are deployed on resources in Vespa Cloud accounts. To deploy in your enclave account, update [deployment.xml](/en/reference/applications/deployment) to reference the GCP project you onboarded: + +```xml + + + +``` + +Useful resources are [getting started](/en/basics/deploy-an-application) and [migrating to Vespa Cloud](/en/learn/migrating-to-cloud) - put *deployment.xml* next to *services.xml*. + +## Next steps + +After a successful deployment to the [dev](/en/operations/environments#dev) environment, iterate on the configuration to implement your application on Vespa. The *dev* environment is ideal for this, with rapid deployment cycles. + +For production serving, deploy to the [prod](/en/operations/environments#prod) environment - follow the steps in [production deployment](/en/reference/applications/deployment). + +## Enclave teardown + +To tear down a Vespa Cloud Enclave system, do the steps above in reverse order: + +1. [Undeploy the application(s)](/en/operations/deleting-applications) +2. Undeploy the Terraform changes + +It is important to undeploy the Vespa application(s) first. After running the Terraform, Vespa Cloud cannot manage the resources allocated, so you must clean up these yourself. + +## Troubleshooting + +**Identities restricted by domain**: If your GCP organization is using [domain restriction for identities](https://cloud.google.com/resource-manager/docs/organization-policy/restricting-domains) you will need to permit Vespa.ai GCP identities to be added to your project. For Vespa Cloud the organization ID to allow identities from is: *1056130768533*, and the Google Customer ID is *C00u32w3e*. \ No newline at end of file diff --git a/mintlify-docs/en/operations/enclave/operations.mdx b/mintlify-docs/en/operations/enclave/operations.mdx new file mode 100644 index 0000000000..2c7202b03c --- /dev/null +++ b/mintlify-docs/en/operations/enclave/operations.mdx @@ -0,0 +1,21 @@ +--- +title: "Operations and Support for Vespa Cloud Enclave" +sidebarTitle: "Operations" +--- +Vespa Cloud Enclave requires that resources provisioned within the VPC are wholly managed by the Vespa Cloud orchestration services, and must not be manually managed by tenant operations. Changing or removing the resources created by the Configuration Servers will negatively impact your Vespa application and may prevent Vespa Cloud from properly managing the applications as well as Vespa engineers from support it. + +The Terraform modules might see occasional backwards compatible updates. It is recommended that the tenant applies updates to their system on a regular basis. For more information, see the Terraform documentation on [using Terraform in automation](https://developer.hashicorp.com/terraform/tutorials/automation/automate-terraform). + +The network access granted to Vespa Hosts must be in place for the Vespa application to operate properly. If network access is restricted the Vespa application might stop working. + +## Custom resource tags + +Custom tags can be applied to the cloud resources (virtual machines and disks) that Vespa Cloud provisions inside the tenant's cloud account. Tags are declared in *deployment.xml* via the [``](/en/reference/applications/deployment#resource-tags) element and are commonly used for cost tracking and resource management. This is supported on AWS, Azure, and GCP. + +## Quota + +Make sure your organization's AWS or GCP quotas are set high enough to support common Vespa Cloud use cases. A common use case is migrating to new instance types, and this causes temporary doubled (or more) resource usage in the data migration transition period. Other use cases with temporary increased resource usage are node replacements. + +Best practise is to ensure the quota is 3x of current resource usage, to also cover for capacity expansion. + +This is not to be confused with the [Vespa Cloud quota](/en/cloud/quota). \ No newline at end of file diff --git a/mintlify-docs/en/operations/endpoint-routing.mdx b/mintlify-docs/en/operations/endpoint-routing.mdx new file mode 100644 index 0000000000..b3e3fbea2a --- /dev/null +++ b/mintlify-docs/en/operations/endpoint-routing.mdx @@ -0,0 +1,64 @@ +--- +title: "Routing and endpoints" +sidebarTitle: "Endpoint routing" +--- +Vespa Cloud supports multiple methods of routing requests to an application. This guide describes how these routing methods work, failover, and how to configure them. + +By default, each deployment of a Vespa Cloud application will have a zone endpoint. In addition to the default zone endpoint, one can configure global endpoints. + +All endpoints for an application are available under the *endpoints* tab of each deployment in the console. + +## Endpoint format + +Vespa Cloud endpoints are on the format: `{random}.{random}.{scope}.vespa-app.cloud`. + +## Endpoint scopes + +### Zone endpoint + +This is the default endpoint for a deployment. Requests through a zone endpoint are sent directly to the zone. + +Zone endpoints are created implicitly, one per container cluster declared in [services.xml](/en/reference/applications/services/container). Zone endpoints are not configurable. + +Zone endpoints have the suffix `z.vespa-app.cloud` + +### Global endpoint + +A global endpoint is an endpoint that can route requests to multiple zones. It can be configured in [deployment.xml](/en/reference/applications/deployment#endpoints-global). Similar to how a [CDN](https://en.wikipedia.org/wiki/Content_delivery_network) works, requests through this endpoint will be routed to the nearest zone based on geo proximity, i.e. the zone that is nearest to the client. + +Global endpoints have the suffix `g.vespa-app.cloud` + + +**Important:** + +Global endpoints do not support feeding. Feeding must be done through zone endpoints. + + + +## Routing control + +Vespa Cloud has two mechanisms for manually controlling routing of requests to a zone: + +- Removing the `` element from the relevant `` elements in [deployment.xml](/en/reference/applications/deployment) and deploying a new version of your application. +- Changing the status through the console. + +This section describes the latter mechanism. Navigate to the relevant deployment of your application in the console. Hovering over the *GLOBAL ROUTING* badge will display the current status and when it was last changed. + +### Change status + +In case of a production emergency, a zone can be manually set out to prevent it from receiving requests: + +1. Hover over the *GLOBAL ROUTING* badge for the problematic deployment and click *Deactivate*. +2. Inspection of the status will now show the status set to *OUT*. To set the zone back in and have it continue receiving requests: Hover over the *GLOBAL ROUTING* badge again and click *Activate*. + +### Behaviour + +Changing the routing status is independent of the endpoint scope used. You're technically overriding the routing status the deployment reports to the Vespa Cloud routing infrastructure. This means that a change to routing status affects both *zonal endpoints* and *global endpoints*. + +Deactivating a deployment disables routing of requests to that deployment through global endpoints until the deployment is activated again. As routing through these endpoints is DNS-based, it may take up between 5 and 15 minutes for all traffic to shift to other deployments. + +If all deployments of an endpoint are deactivated, requests are distributed as if all deployments were active. This is because attempting to route traffic according to the original configuration is preferable to discarding all requests. + +## AWS clients + +While Vespa Cloud is hosted in AWS, clients that talk to Vespa Cloud from AWS nodes will be treated as any other client from the Internet. This means clients in AWS will generate regular Internet egress traffic even though they are talking to a service in AWS in the same zone. \ No newline at end of file diff --git a/mintlify-docs/en/operations/environments.mdx b/mintlify-docs/en/operations/environments.mdx new file mode 100644 index 0000000000..aedc8e9423 --- /dev/null +++ b/mintlify-docs/en/operations/environments.mdx @@ -0,0 +1,85 @@ +--- +title: "Environments" +--- + +Vespa Cloud has two kinds of environments: + +- Manual environment for rapid development and test: `dev` +- Automated environment with integrated CD pipeline: `prod` + +An application is deployed to one or more *zones* (see [zone list](/en/operations/zones)), which is a combination of an *environment* and a *region*, like `vespa deploy -z dev.aws-us-east-1c`. + +## Dev + +The dev environment is built for rapid developments cycles, with auto-downscaling and auto-expiry for ease of use and cost control. The dev environment is the default, to deploy to this, use `vespa deploy`. + +### Auto downscaling + +One use case for the dev environment is to take an application package from a prod environment and deploy to the dev environment to debug. To minimize cost and make this speedy, Vespa Cloud will by default ignore [nodes](/en/reference/applications/services/services#nodes) and [resources](/en/reference/applications/services/services#resources) settings. + +With this, you can safely download an application package from prod (that are normally large) and deploy to dev, with no changes. + +To override this behavior and control the resources, specify them explicitly for the dev environment as described in [deployment variants](/en/operations/deployment-variants#services.xml-variants). Example: + +```xml + + + + + +> +``` + + +**Important:** + +The `dev` environment has redundancy 1 by default, and there are no availability or data persistence guarantees. Do not use applications deployed to these zones for production serving use cases. + + +### Auto expiry + +Deployments to `dev` expire after 14 days of inactivity, that is, 14 days after the last [deployment](/en/basics/applications#deploying-applications). **This applies to all plans**. To add 7 more days to the expiry period, redeploy the application or use the Vespa Cloud Console. + +### Vespa version + +The latest active Vespa version is used when deploying to the dev environment. The deployment is upgraded at a time which is most likely at night for the developer in order to minimize downtime (based on the time when last deployments were made). An upgrade will be skipped if metrics indicate ongoing feed or query load, but will still be done if current version is more than a week old. + +## Prod + +Applications are deployed to the `prod` environment for production serving. Deployments are passed through an integrated CD pipeline for system tests and staging tests. Read more in [automated deployments](/en/operations/automated-deployments). + +## Test + +The `test` environment is used by the integrated CD pipeline for prod deployments, to run [system tests](/en/operations/automated-deployments#system-tests). The test capacity is ephemeral and only used during test. Nodes in test and staging environments do not have access to data in prod environments. + +Note that one cannot deploy directly to test and staging environments. For long-lived test applications (e.g., a QA system that is integrated with other services) use the prod environment. + +System tests are always invoked, even if there are no tests defined. In this case, an instance is just started and then stopped. This has value in itself, as it ensures that the application is able to start. + +Test runs can be [aborted](/en/operations/automated-deployments#disabling-tests). + +## Staging + +See system tests above, this applies to the staging, too. [Staging tests](/en/operations/automated-deployments#staging-tests) use a fraction of the configured prod capacity, this can be overridden to using 1 node regardless of prod cluster size: + +```xml + + + + + +``` + +## Reference + +Environment settings: + +| Name | Description | Expiry | Cluster sizes | +| :--- | :--- | :--- | :--- | +| `dev` | Used for manual development testing. | 14 days | `1` | +| `test` | Used for [automated system tests](/en/applications/testing#system-tests). | \- | `1` | +| `staging` | Used for [automated staging tests](/en/applications/testing#staging-tests). | \- | `min(max(2, 0.05 * spec), spec)` | +| `prod` | Hosts all production deployments. | No expiry | `max(2, spec)` | \ No newline at end of file diff --git a/mintlify-docs/en/operations/kubernetes/architecture.mdx b/mintlify-docs/en/operations/kubernetes/architecture.mdx new file mode 100644 index 0000000000..dedd147d08 --- /dev/null +++ b/mintlify-docs/en/operations/kubernetes/architecture.mdx @@ -0,0 +1,11 @@ +--- +title: "Architecture" +--- + + +![Vespa Operator Architecture](/assets/img/vespa-operator-architecture.png) + + +The Vespa Operator is an implementation of the [Operator Pattern](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/) that extends Kubernetes with custom orchestration capabilities for Vespa. It relies on a [Custom Resource Definition](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/) called a `VespaSet`, which represents a quorum of [ConfigServers](/en/operations/self-managed/configuration-server) in a Kubernetes namespace. The Vespa Operator is responsible for the deployment and lifecycle of the `VespaSet` resource and its ConfigServers, which collectively entails the infrastructure for Vespa on Kubernetes. + +[Application Packages](/en/basics/applications) are deployed to the [ConfigServers](/en/operations/self-managed/configuration-server) to create Vespa applications. The ConfigServers will dynamically instantiate the services as individual Pods based on the settings provided in the Application Package. After an Application Package is deployed, the ConfigServers will remain responsible for the management and lifecycle of the Vespa application. \ No newline at end of file diff --git a/mintlify-docs/en/operations/kubernetes/configuration/configure-local-storage-type.mdx b/mintlify-docs/en/operations/kubernetes/configuration/configure-local-storage-type.mdx new file mode 100644 index 0000000000..9370780ba9 --- /dev/null +++ b/mintlify-docs/en/operations/kubernetes/configuration/configure-local-storage-type.mdx @@ -0,0 +1,183 @@ +--- +title: "Configure Local Storage Type" +--- +We recommend configuring node-local storage for the [content cluster](/en/content/proton) (i.e. the search core) to maximize performance by avoiding network I/O on the data path. In a standard Vespa deployment, this is controlled through the `storage-type` attribute under the [resources](/en/reference/applications/services/services#resources) tag in the [application package](/en/basics/applications). However, that attribute has no effect when running Vespa on Kubernetes. Instead, local storage should be configured through the `spec.application.storageClass` field in the `VespaSet`. Vespa on Kubernetes abstracts away the concept of storage and will consume whatever is provided by the referenced storage class. + +For ConfigServer pods, storage performance is less critical; therefore, selecting a more cost-efficient network-attached storage class, such as `gp3` EBS volumes on Amazon EKS, is generally an appropriate tradeoff. + +To provision node-local storage, we recommend using Kubernetes [Local Persistent Volumes](https://kubernetes.io/blog/2019/04/04/kubernetes-1.14-local-persistent-volumes-ga/). These volumes expose `NodeAffinity` constraints to the Kubernetes scheduler, ensuring that Pods consuming them are scheduled onto nodes where the underlying storage is available. This avoids the need to manually manage NodeAffinity rules on per Pod. + +In addition, the Kubernetes Special Interest Groups (SIGs) provide an external [Local Persistent Volume](https://kubernetes.io/blog/2019/04/04/kubernetes-1.14-local-persistent-volumes-ga/) static provisioner. This provisioner automatically discovers local disks mounted on each node and creates corresponding `PersistentVolumes`, while managing their lifecycle, including cleanup and reuse as Pods are deleted. We recommend using this component in production deployments. + +This guide walks through setting up local NVMe instance storage on EKS nodes using the [Kubernetes Local Volume Static Provisioner](https://github.com/kubernetes-sigs/sig-storage-local-static-provisioner). This exposes the physical NVMe disks available on instances as a `local-nvme` StorageClass that Application Pods can claim. While this guide specifically targets an Amazon EKS setup, the concept is similar across different environments - refer to the [project](https://github.com/kubernetes-sigs/sig-storage-local-static-provisioner/tree/master/helm/examples) for several other examples. + +## Setup Local Storage on Amazon EKS + +This guide assumes that your EKS cluster has a Node Group configured with an instance type that supports local NVMe instance storage, such as `m7gd.xlarge`. These instance types typically contain the `d` suffix to designate themselves as specialized for workloads that require local instance storage. Refer to the [AWS EKS Node Groups](https://docs.aws.amazon.com/eks/latest/userguide/managed-node-groups.html) documentation for further information on configuring Node Groups. + +This guide specifically targets Bottlerocket-based EKS Nodes. These Nodes do not execute the standard EKS bootstrap script responsible for preparing NVMe instance storage. Disk formatting and mounting is therefore handled by an init container, after which the static provisioner scans for available volumes and registers them as `PersistentVolumes`. + +Add the Helm repository for the Local Volume Static Provisioner. + +```text +$ helm repo add sig-storage-local-static-provisioner https://kubernetes-sigs.github.io/sig-storage-local-static-provisioner +$ helm repo update +``` + +Create an EKS NVMe instance storage configuration. The example below will run an [initContainer](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) that will scan for NVMe instance store disks and format them as `ext4` under `/mnt/disks`, which the static provisioner will detect. + +```bash expandable +cat <<'EOF' > local-nvme-values.yaml +# EKS Bottlerocket NVMe instance storage configuration. +classes: + - name: local-nvme + hostDir: /mnt/disks + mountDir: /mnt/disks + volumeMode: Filesystem + fsType: ext4 + accessMode: ReadWriteOnce + storageClass: + reclaimPolicy: Delete + isDefaultClass: false + +nodeSelector: + eks.amazonaws.com/nodegroup: test-node-group + +priorityClassName: system-node-critical +mountDevVolume: true + +initContainers: + - name: nvme-disk-setup + image: registry.k8s.io/sig-storage/local-volume-provisioner:v2.8.0 + securityContext: + privileged: true + command: + - sh + - -c + - | + set -eu + + DISKS_PATH=/mnt/disks + + disks=$(ls /dev/nvme*n1 2>/dev/null | grep -v '/dev/nvme0n1' || true) + + if [ -z "${disks}" ]; then + echo "No NVMe instance-store disks found, nothing to do" + exit 0 + fi + + for disk in ${disks}; do + echo "Processing ${disk}..." + + model=$(cat /sys/block/$(basename ${disk})/device/model 2>/dev/null || true) + if ! echo "${model}" | grep -q "Amazon EC2 NVMe Instance Storage"; then + echo "${disk} is not an instance store disk (model: ${model}), skipping" + continue + fi + + if grep -q "^${disk} " /proc/mounts; then + echo "${disk} is already mounted, skipping" + continue + fi + + if ! blkid "${disk}" >/dev/null 2>&1; then + echo "No filesystem on ${disk}, formatting as ext4..." + mkfs.ext4 -F "${disk}" + fi + + uuid=$(blkid -s UUID -o value "${disk}") + if [ -z "${uuid}" ]; then + echo "Could not determine UUID for ${disk}, skipping" + continue + fi + + mount_point="${DISKS_PATH}/${uuid}" + mkdir -p "${mount_point}" + echo "Mounting ${disk} (UUID=${uuid}) at ${mount_point}" + mount "${disk}" "${mount_point}" + done + + echo "Setup complete. Disks mounted under ${DISKS_PATH}:" + grep "${DISKS_PATH}" /proc/mounts || echo " (none found)" + volumeMounts: + - name: provisioner-dev + mountPath: /dev + - name: local-nvme + mountPath: /mnt/disks + mountPropagation: Bidirectional + +resources: + requests: + cpu: 10m + memory: 32Mi + limits: + cpu: 100m + memory: 128Mi +EOF + +$ helm install local-volume-provisioner \ + sig-storage-local-static-provisioner/local-static-provisioner \ + --namespace kube-system \ + --values local-nvme-values.yaml +``` + +`mountPropagation: Bidirectional` will ensure that the volume mount is propagated back to the host, and `priorityClassName: system-node-critical` ensures the provisioner Pod will not be evicted in the case of Node pressure. + +After installing the static provisioner, a `StorageClass` type of `local-nvme` will be created. This should be used in the `spec.application.storageClass` attribute of the `VespaSet`. + +```text +$ kubectl get storageclasses +NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE +local-nvme kubernetes.io/no-provisioner Delete WaitForFirstConsumer false 12h +``` + +Ensure that the `VolumeBindingMode` is `WaitForFirstConsumer` to delay `PersistentVolume` binding until a Pod is scheduled, allowing the scheduler to place the Pod on a Node where the storage physically resides. + +After the `initContainer` has completed, the static provisioner will provision `PersistentVolumes`. + +```text expandable +$ kubectl get persistentvolumes +NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS VOLUMEATTRIBUTESCLASS REASON AGE +local-pv-201c66f3 216Gi RWO Delete Available local-nvme 12h +local-pv-2942e993 216Gi RWO Delete Available local-nvme 12h +local-pv-2fea7934 216Gi RWO Delete Available local-nvme 12h +local-pv-335a2831 216Gi RWO Delete Available local-nvme 12h +local-pv-3499cebf 216Gi RWO Delete Available local-nvme 12h +local-pv-36dc72b5 216Gi RWO Delete Available local-nvme 12h +local-pv-37928b3d 216Gi RWO Delete Available local-nvme 12h +local-pv-5e09d438 216Gi RWO Delete Available local-nvme 12h +local-pv-6e9849a9 216Gi RWO Delete Available local-nvme 12h +``` + +Configure the `VespaSet` to use the newly created `StorageClass`. For example: + +```bash expandable +# vespaset sample for EKS with local storage configured +$ cat > vespaset.yaml <• Add env vars/mounts to main container. | • Cannot change main container image, command, or args.
• Cannot override main container CPU/Memory resources (these are locked to `services.xml`). | +| **Volumes** | • Add new Volumes (ConfigMap, Secret, EmptyDir). | • Cannot modify operator-reserved volumes (e.g., `/data`). | +| **Metadata** | • Add new Labels and Annotations. | • Cannot overwrite operator-created labels and annotations | + +## Examples + +### Example 1: Injecting a Logging Sidecar + +This example adds a Fluent Bit sidecar to ship logs to a central system. It defines the sidecar container and mounts a shared volume that the Vespa container also writes to. + +```bash +apiVersion: k8s.ai.vespa/v1 +kind: VespaSet +metadata: + name: my-vespa-cluster +spec: + application: + image: vespaengine/vespa:8.200.15 + # Define the Custom Overlay + podTemplate: + spec: + containers: + # 1. Define the Sidecar + - name: fluent-bit + image: fluent/fluent-bit:1.9 + volumeMounts: + - name: vespa-logs + mountPath: /opt/vespa/logs/vespa + # 2. Define the Shared Volume + volumes: + - name: vespa-logs + emptyDir: {} +``` + +### Example 2: Pinning Pods to Specific Nodes + +This example uses a nodeSelector to ensure Vespa pods only run on nodes labeled with workload=high-performance. + +```bash +apiVersion: k8s.ai.vespa/v1 +kind: VespaSet +metadata: + name: prod-vespa +spec: + application: + podTemplate: + spec: + # Schedule only on nodes with label 'workload: high-performance' + nodeSelector: + workload: high-performance + # Tolerate the 'dedicated' taint if those nodes are tainted + tolerations: + - key: "dedicated" + operator: "Equal" + value: "search-team" + effect: "NoSchedule" +``` + +### Example 3: Adding Cost Allocation Labels + +This example adds custom labels that will appear on every tenant pod, enabling cost tracking by team. + +```bash +apiVersion: k8s.ai.vespa/v1 +kind: VespaSet +metadata: + name: shared-vespa +spec: + application: + podTemplate: + metadata: + labels: + cost-center: "engineering-search" + owner: "team-alpha" + annotations: + # Example annotation for an external monitoring system + monitoring.datadoghq.com/enabled: "true" +``` \ No newline at end of file diff --git a/mintlify-docs/en/operations/kubernetes/deployment/dev-mode.mdx b/mintlify-docs/en/operations/kubernetes/deployment/dev-mode.mdx new file mode 100644 index 0000000000..51258904fa --- /dev/null +++ b/mintlify-docs/en/operations/kubernetes/deployment/dev-mode.mdx @@ -0,0 +1,84 @@ +--- +title: "Setup Dev Environment" +--- + +The steps to enable the `dev` environment for Vespa on Kubernetes are described in this guide. This is a one-time irreversible operation. Once a `VespaSet` has been deployed in the `dev` environment configuration, it cannot be reserved. + + +**Important:** + +The `dev` environment is intended for local development, integration testing, and experimentation — not for production serving. + + +## Dev Environment + +Contrary to Vespa Cloud, the `dev` environment must additionally be configured at the `VespaSet` resource level. Once this is enabled, any Vespa Cluster that is reconciled through this `VespaSet` will have a `min-availability` in their `contenet` cluster and `node` count of 1 for all cluster types. + +As such, HA (high-availability) of Vespa Pods is not guaranteed, and availability will be reduced during upgrades. The only exception is the ConfigServer Pods, which must always maintain a replica count of 3 to ensure a quorum. + +For more information on Environments, refer to the [Vespa Cloud](/en/operations/environments#dev) documentation. + +## Enable Dev Environment + +The `dev` environment is activated by adding the following annotation to the `VespaSet` resource: + +| Annotation | Value | Effect | +| --- | --- | --- | +| `internal.vespa.ai/environment` | `dev` | Signals to the ConfigServer that this is a `dev` environment. | + +```bash +$ cat > vespaset-dev.yaml < + + + + + + + + + + + + + 1 + + + + + + + + + +``` \ No newline at end of file diff --git a/mintlify-docs/en/operations/kubernetes/deployment/ecr-pull-through-cache.mdx b/mintlify-docs/en/operations/kubernetes/deployment/ecr-pull-through-cache.mdx new file mode 100644 index 0000000000..3078ce8253 --- /dev/null +++ b/mintlify-docs/en/operations/kubernetes/deployment/ecr-pull-through-cache.mdx @@ -0,0 +1,91 @@ +--- +title: "Setup Amazon ECR Pull-Through Cache" +sidebarTitle: "Setup ECR Pull-through Cache" +--- + +For production, we recommend mirroring the upstream artifacts into your own registry. This section shows how to create an [Amazon ECR pull-through cache](https://docs.aws.amazon.com/AmazonECR/latest/userguide/pull-through-cache.html) for the images referenced in the [Installation](/en/operations/kubernetes/deployment/installation) guide. + +## AWS Console Steps + + + +Open AWS Console -> **Amazon ECR** -> **Private registry** -> **Pull through cache rules**. + + +Choose **Create rule**. + + +Set **ECR repository prefix** to `vespa-cache`. + + +Set **Upstream registry URL** to `images.ves.pa`. + + +Create or select a Secrets Manager credential with your support-provided upstream username/token. + + +Create the rule, then optionally pull one tag of each artifact to warm the cache. + + + +## AWS CLI Steps + +Set the AWS account, region, and ECR registry variables, along with the upstream credentials provided by Vespa support. + +```js +export AWS_ACCOUNT_ID=123456789012 +export AWS_REGION=us-east-1 +export ECR_REGISTRY=${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com +export ECR_CACHE_PREFIX=vespa-cache + +export VESPAAI_REGISTRY_USER= +export VESPAAI_REGISTRY_TOKEN= +``` + +Create a Secrets Manager secret to store the upstream registry credentials. + +```bash +aws secretsmanager create-secret \ + --name vespa-registry-creds \ + --secret-string "{\"username\":\"${VESPAAI_REGISTRY_USER}\",\"password\":\"${VESPAAI_REGISTRY_TOKEN}\"}" \ + --region ${AWS_REGION} || \ +aws secretsmanager put-secret-value \ + --secret-id vespa-registry-creds \ + --secret-string "{\"username\":\"${VESPAAI_REGISTRY_USER}\",\"password\":\"${VESPAAI_REGISTRY_TOKEN}\"}" \ + --region ${AWS_REGION} +``` + +Create the pull-through cache rule. A single rule covers all repositories under the `images.ves.pa` host. + +```bash +aws ecr create-pull-through-cache-rule \ + --ecr-repository-prefix ${ECR_CACHE_PREFIX} \ + --upstream-registry-url images.ves.pa \ + --credential-arn arn:aws:secretsmanager:${AWS_REGION}:${AWS_ACCOUNT_ID}:secret:vespa-registry-creds \ + --region ${AWS_REGION} +``` + +Authenticate your local tooling to the ECR registry. + +```bash +aws ecr get-login-password --region ${AWS_REGION} | \ + docker login --username AWS --password-stdin ${ECR_REGISTRY} +aws ecr get-login-password --region ${AWS_REGION} | \ + helm registry login --username AWS --password-stdin ${ECR_REGISTRY} +``` + +Warm the cache by pulling the Vespa images and the Helm chart artifact. + +```bash +podman pull ${ECR_REGISTRY}/${ECR_CACHE_PREFIX}/kubernetes/vespa:${VESPA_VERSION} +podman pull ${ECR_REGISTRY}/${ECR_CACHE_PREFIX}/kubernetes/operator:${VESPA_VERSION} +helm pull oci://${ECR_REGISTRY}/${ECR_CACHE_PREFIX}/helm/vespa-operator --version ${VESPA_VERSION} +``` + +Point the installation variables to ECR. + +```bash +export VESPA_IMAGE=${ECR_REGISTRY}/${ECR_CACHE_PREFIX}/kubernetes/vespa +export VESPA_OPERATOR_IMAGE=${ECR_REGISTRY}/${ECR_CACHE_PREFIX}/kubernetes/operator +export HELM_CHART_REF=oci://${ECR_REGISTRY}/${ECR_CACHE_PREFIX}/helm/vespa-operator +``` \ No newline at end of file diff --git a/mintlify-docs/en/operations/kubernetes/deployment/installation.mdx b/mintlify-docs/en/operations/kubernetes/deployment/installation.mdx new file mode 100644 index 0000000000..7551a4d5c3 --- /dev/null +++ b/mintlify-docs/en/operations/kubernetes/deployment/installation.mdx @@ -0,0 +1,238 @@ +--- +title: "Install Vespa on Kubernetes" +sidebarTitle: "Installation" +description: "These steps walk through deploying Vespa using the official Helm chart." +--- + +## Requirements + +The following tools are required for a smooth deployment. + + + + + + + +These instructions assume that your `kubeconfig` is pointing to an active Kubernetes cluster. Refer to the [Getting Started](https://kubernetes.io/docs/setup/) guide to create a Kubernetes cluster. For instructions on deploying Vespa locally on MiniKube, refer to the [Deploy Vespa Locally](/en/reference/applications/deployment) guide. + +Vespa on Kubernetes uses a [Custom Resource Definition](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/) (CRD) called a `VespaSet`. Users intending to manage the CRD definition by themselves should apply it to the cluster before installation. + +The permissions that are needed to run Vespa are listed on the [Permissions](/en/operations/kubernetes/deployment/permissions) page. The Helm Chart will automatically apply a default set of RBAC API resources onto the cluster. + +## Setup Registry Access + + +**Note**: + +Vespa on Kubernetes is an enterprise feature. You will need access to the images below. Contact us through our support portal to receive an authentication ID and token. For production use, we recommend mirroring these images into your own registry or a well-known internal repository appropriate to your infrastructure. + + +- `VESPA_IMAGE=images.ves.pa/kubernetes/vespa` +- `VESPA_OPERATOR_IMAGE=images.ves.pa/kubernetes/operator` +- `HELM_CHART_REF=oci://images.ves.pa/helm/vespa-operator` + +We will use this naming convention throughout this guide. The tags for all three images conform to the [Vespa Version](/en/learn/releases) release semantics. We recommend using the latest Vespa release as the default. We will refer to it as `VESPA_VERSION`. + +The Vespa Operator and all Vespa components are local to a namespace. We will refer to the namespace as `NAMESPACE` in this guide. + +## Deploy the Vespa Operator + +Authenticate to the Helm Chart OCI registry. The credentials will be provided by our support team. + +```bash +$ helm registry login images.ves.pa -u $USER -p $TOKEN +``` + +Install the Helm Chart onto the namespace. This will deploy the Vespa Operator and apply the `VespaSet` resource definition. Set `image.repository` to `VESPA_OPERATOR_IMAGE` as provided by our support team. The `image.tag` refers to the `VESPA_VERSION`. + +```bash +$ helm install vespa-operator $HELM_CHART_REF --namespace $NAMESPACE --create-namespace --set image.repository=$VESPA_OPERATOR_IMAGE --set image.tag=$VESPA_VERSION +``` + +The lifecycle of the CRD definition can be managed separately. However, the CRD specification must be manually applied to the Kubernetes cluster before installing the Helm Chart. Our support team can provide this specification if necessary. To do this, use the `--skip-crds` option in Helm. + +```bash +$ kubectl apply vespasets.k8s.ai.vespa-v1.yaml +$ helm install vespa-operator $HELM_CHART_REF --namespace $NAMESPACE --create-namespace --skip-crds --set image.repository $VESPA_OPERATOR_IMAGE --set image.tag $VESPA_VERSION +``` + +Ensure that the `Deployment` resource was successfully created, and that the `Vespa Operator` Pod is running. + +## Deploy a VespaSet + +To set up a `dev` environment in Vespa on Kubernetes, refer to the example on the [Setup Dev Environment](/en/operations/kubernetes/deployment/dev-mode) page. + +A `VespaSet` is a quorum of [ConfigServer](/en/operations/self-managed/configuration-server) Pods that manage the lifecycle of Vespa applications. Several examples of `VespaSet` resources are provided in the Helm Chart `samples` directory. An example of a `VespaSet` for an archetypical [Amazon Elastic Kubernetes Service](https://aws.amazon.com/eks/) (EKS) setup is shown below. + +```bash +# vespaset sample for EKS +$ cat > vespaset.yaml < vespaset.yaml < +``` + +## Deploy a Vespa Application + +A Vespa application can be deployed through the ConfigServers' ingress endpoint once a quorum has been met. Refer to the [Vespa Sample Applications](https://github.com/vespa-engine/sample-apps) to get started. In the following example, we will use the [Album Recommendation](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation) sample application. + +Set up the Vespa CLI to download the Album Recommendation sample to a directory. + +```bash +$ vespa clone album-recommendation myapp && cd myapp +``` + +The `Node` resources must be specified for any application package is deployed on Vespa on Kubernetes. These will directly translate to Kubernetes container [resource requests and limits](https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/). In a default deployment without any [PodTemplate](/en/operations/kubernetes/custom-overrides-podtemplate) overrides, the requests will equal the limits for a container. + +Modify the container and content cluster specifications in the application package, as shown below: + +```xml + + + + + + + + + + + + + + 2 + + + + + + + + + +``` + +Enable port-forwarding from the ConfigServer's ingress port `19071` to your local port `19071`. Any ConfigServer Pod can be used. + +```bash +$ vespa config set target local +$ kubectl -n $NAMESPACE port-forward pod/cfg-1 19071:19071 +``` + +Deploy and activate the application. + +```bash +$ vespa prepare --target local +$ while ! vespa --target local activate; do sleep 1; done +``` + +The ConfigServers will create the Container, Content, and Cluster-Controller Pods as specified in the application package. The deployment is considered complete once all Pods show the phase `RUNNING` in the `VespaSet` status. + +Port-forwarding provides a simple way to ingress to the ConfigServer locally. For other ingress options, refer to the [Configuring the External Access Layer](/en/operations/kubernetes/ingress) page. + +## Feed and Query Documents + +Feed and query documents by port-forwarding the ConfigServer ingress port and the Dataplane ingress port, then using the Vespa CLI. + +```bash +$ kubectl -n $NAMESPACE port-forward pod/cfg-1 19071:19071 +$ kubectl -n $NAMESPACE port-forward pod/default-100 8080:8080 +$ vespa feed dataset/A-Head-Full-of-Dreams.json +$ vespa query 'yql=select * from music where true limit 1' +``` + +Refer to the [Vespa CLI documentation](/en/reference/clients/vespa-cli/vespa) for the full list of available commands. \ No newline at end of file diff --git a/mintlify-docs/en/operations/kubernetes/deployment/local-deployment.mdx b/mintlify-docs/en/operations/kubernetes/deployment/local-deployment.mdx new file mode 100644 index 0000000000..74b0af925d --- /dev/null +++ b/mintlify-docs/en/operations/kubernetes/deployment/local-deployment.mdx @@ -0,0 +1,80 @@ +--- +title: "Deploy Vespa Locally" +description: "Vespa on Kubernetes can be deployed locally using MiniKube for development and experimental use-cases." +sidebarTitle: "Minikube Setup" +--- + + +**Note:** + +This setup is not recommended for production. + + +Initialize a Minikube cluster with 8 nodes, each with 4GiB of memory and 2 CPUs. Enable Minikube's image registry add-on to allow the Minikube nodes to access the Vespa images. In this example, we use `podman` as the driver. + +```bash +minikube start --nodes 8 --cpus 2 --memory 4GiB --driver=podman --insecure-registry="192.168.49.0/24" +minikube addons enable registry +``` + +Cache the images provided by our support team into the MiniKube registry. + +```bash +echo $VESPAAI_REGISTRY_TOKEN | podman login images.ves.pa \ + -u "$VESPAAI_REGISTRY_USER" \ + --password-stdin + +podman pull images.ves.pa/kubernetes/vespa:$VESPA_VERSION +podman pull images.ves.pa/kubernetes/operator:$VESPA_VERSION +``` + +Then, push the images to the MiniKube registry. The images will then be accessible from `$(minikube ip):5000`. + +```bash +export MINIKUBE_REGISTRY=$(minikube ip) + +podman tag kubernetes/vespa:$VESPA_VERSION $MINIKUBE_REGISTRY:5000/localhost/kubernetes/vespa:$VESPA_VERSION +podman push --tls-verify=false $MINIKUBE_REGISTRY:5000/localhost/kubernetes/vespa:$VESPA_VERSION + +podman tag kubernetes/operator:$VESPA_VERSION $MINIKUBE_REGISTRY:5000/localhost/kubernetes/operator:$VESPA_VERSION +podman push --tls-verify=false $MINIKUBE_REGISTRY:5000/localhost/kubernetes/operator:$VESPA_VERSION +``` + +We will now use the following environment variables for the rest of the guide to refer to the images. + +```bash +export VESPA_IMAGE=$MINIKUBE_REGISTRY:5000/localhost/kubernetes/vespa +export VESPA_OPERATOR_IMAGE=$MINIKUBE_REGISTRY:5000/localhost/kubernetes/operator +``` + +Then, install the [Local Persistent Volume](https://github.com/kubernetes-sigs/sig-storage-local-static-provisioner) Helm Chart. This will allow provisioning Persistent Volumes locally, which is required to run Vespa on Kubernetes. Helm will automatically create a StorageClass called `local-storage`, which should be used as the `StorageClass` for subsequent steps. + +```bash +$ git clone git@github.com:kubernetes-sigs/sig-storage-local-static-provisioner.git + +# Install the Helm Chart onto the cluster globally +$ cd sig-storage-local-static-provisioner +$ helm install -f helm/examples/baremetal-default-storage.yaml local-volume-provisioner --namespace kube-system ./helm/provisioner +``` + +Create several usable volumes on each MiniKube Node. We recommend at least 4 per node for a smooth deployment. + +```bash +# Create several volumes on each Minikube node. +$ for n in minikube minikube-m02 minikube-m03 minikube-m04 minikube-m05 minikube-m06 minikube-m07 minikube-m08; do + echo "==> $n" + minikube ssh -n "$n" -- ' + set -e + for i in 1 2 3 4; do + sudo mkdir -p /mnt/disks/vol$i + if ! mountpoint -q /mnt/disks/vol$i; then + sudo mount --bind /mnt/disks/vol$i /mnt/disks/vol$i + fi + done + echo "Mounted:" + mount | grep -E "/mnt/disks/vol[1-4]" || true + ' +done +``` + +Once the images are available in the MiniKube registry, proceed to the [Installation](/en/operations/kubernetes/deployment/installation) guide, using `local-storage` as the `storageClass` and `NONE` as the `endpointType`. \ No newline at end of file diff --git a/mintlify-docs/en/operations/kubernetes/deployment/permissions.mdx b/mintlify-docs/en/operations/kubernetes/deployment/permissions.mdx new file mode 100644 index 0000000000..9ef3c15183 --- /dev/null +++ b/mintlify-docs/en/operations/kubernetes/deployment/permissions.mdx @@ -0,0 +1,20 @@ +--- +title: "Permissions" +--- + +The Vespa Operator requires the following permissions within the namespace. These permissions are listed by Kubernetes [API verbs](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) per resource. + +| Kubernetes Resource | Required Permissions | +| --- | --- | +| CustomResourceDefinitions | create, get, list, watch | +| VespaSet | get, list, watch, create, update, patch, delete | +| VespaSet Subresources | `vespasets/status`: update, patch `vespasets/finalizers`: update | +| ConfigMaps | get, list, watch, create, update, patch, delete | +| Services | get, list, watch, create, update, patch, delete | +| Pods | get, list, watch, create, update, patch, delete | +| Pod Execution | get, create | +| Events | create, patch | +| PersistentVolumeClaims | get, list, watch, create, update, patch, delete | +| ServiceAccounts | get, list, watch, create, update, patch, delete | +| Roles | get, list, watch, create, update, patch, delete | +| RoleBindings | get, list, watch, create, update, patch, delete | \ No newline at end of file diff --git a/mintlify-docs/en/operations/kubernetes/ingress.mdx b/mintlify-docs/en/operations/kubernetes/ingress.mdx new file mode 100644 index 0000000000..50b58e5cc6 --- /dev/null +++ b/mintlify-docs/en/operations/kubernetes/ingress.mdx @@ -0,0 +1,136 @@ +--- +title: "Configure External Access Layer" +--- + +The Vespa Operator automatically provisions Kubernetes `Service` resources to enable external access for feeding and querying data. This behavior is controlled by the `VespaSet` Custom Resource configuration. + +Load balancers are provisioned exclusively for Container clusters. Content clusters communicate internally and do not require external load balancing services. The type of service provisioned is determined by the `spec.ingress.endpointType` field in the `VespaSet`. + +## Supported Endpoint Types + +The operator supports four endpoint types to cover different infrastructure requirements. + +| Endpoint Type | Kubernetes Service Type | Use Case | +| --- | --- | --- | +| `LOAD_BALANCER` | `LoadBalancer` | Provision the cloud-native (AWS, GCP, Azure) load-balancer. | +| `NODE_PORT` | `NodePort` | Expose a static port across every worker node, allowing external traffic to access the cluster from any node's IP. | +| `CLUSTER_IP` | `ClusterIP` | Each Container Pod will expose an internal IP address. Should not be used for production use-cases. | +| `NONE` | N/A | An external access layer will not be provisioned. Custom networking setups (Istio, Ingress Controllers) where no automatic service is desired. | + +## LOAD_BALANCER + +This is the recommended configuration for production deployments on cloud providers (EKS, GKE, AKS). The operator creates a standard Kubernetes `LoadBalancer` service, triggering the cloud provider to provision a managed load balancer (e.g., AWS NLB). + +**Configuration:** + +```bash +ingress: + endpointType: LOAD_BALANCER +``` + +On AWS, the ConfigServer automatically applies the annotation `service.beta.kubernetes.io/aws-load-balancer-internal: "true"` to all Container pods. This provisions an **internal** Network Load Balancer (NLB) accessible only within the VPC where the EKS cluster nodes reside. + +## NODE_PORT + +The `NODE_PORT` type exposes the Vespa container cluster on a specific port (range 30000-32767) across all Kubernetes worker nodes. + +**Configuration:** + +```bash +ingress: + endpointType: NODE_PORT +``` + +When this option is set, Kubernetes opens a static port on every worker node. External traffic can reach the application via `:`. Note that unlike `LOAD_BALANCER`, this does not provide health checks at the entry point level. If a worker node with a connection crashes, the connection will simply time out or fail. This additionally requires all worker nodes to expose an External IP. + +To use the `NODE_PORT` service, find the assigned port. + +```bash +$ kubectl get service lb-default -n $NAMESPACE + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +lb-default NodePort 10.100.150.25 80:31942/TCP 5m +``` + +Get the list of nodes and look for their External IP addresses. + +```bash +$ kubectl get nodes -o wide + +NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION +ip-192-168-3-50.us-east-2.compute.internal Ready 10d v1.27.3-eks-a5565ad 192.168.3.50 18.221.100.45 Amazon Linux 2 5.10.184-175.731.amzn2.x86_64 +ip-192-168-3-51.us-east-2.compute.internal Ready 10d v1.27.3-eks-a5565ad 192.168.3.51 3.142.200.10 Amazon Linux 2 5.10.184-175.731.amzn2.x86_64 +``` + +Choose any External IP and combine the IP and port to access the service. + +```js expandable +$ curl http://18.221.100.45:31942/state/v1/health + +{ + "time" : 1769981985754, + "status" : { + "code" : "up" + }, + "metrics" : { + "snapshot" : { + "from" : 1.769981924895E9, + "to" : 1.769981984895E9 + }, + "values" : [ { + "name" : "requestsPerSecond", + "values" : { + "count" : 19, + "rate" : 0.31666666666666665 + } + }, { + "name" : "latencySeconds", + "values" : { + "average" : 0.009578947368421053, + "sum" : 0.182, + "count" : 19, + "last" : 0.003, + "max" : 0.057, + "min" : 0.0, + "rate" : 0.31666666666666665 + } + } ] + } +} +``` + +## CLUSTER\_IP + +This type restricts access to within the Kubernetes cluster. It provides a stable internal IP and DNS name (e.g., `lb-default.vespa.svc.cluster.local`) but assigns no external IP. + +**Configuration:** + +```bash +ingress: + endpointType: CLUSTER_IP +``` + +The `CLUSTER_IP` service is ideal for architectures where the clients (e.g., front-end applications or ingestion services) run inside the same Kubernetes cluster as Vespa. + +## NONE + +This option disables automatic Service provisioning. Use this if you intend to manually define `Ingress` resources, use a Service Mesh (like Istio or Linkerd), or have complex networking requirements not covered by the standard types. + +**Configuration:** + +```bash +ingress: + endpointType: NONE +``` + +## Traffic Routing & Labeling + +To ensure zero-downtime deployments, the ConfigServer manages traffic routing dynamically using Kubernetes labels. The created Services use the selector `vespa.ai/tenant-lb: backend`. When the Pod is provisioned, these labels are automatically attached. + +During a rolling upgrade, the label is removed from the terminating Pod(s) before they are shut down. This provides a window for the remaining traffic to drain before the Pod is upgraded. + + +**Note**: + +The Service exposes port **80** (plaintext) and **443** (TLS) externally, mapping them to the container's port 4443. + \ No newline at end of file diff --git a/mintlify-docs/en/operations/kubernetes/logging.mdx b/mintlify-docs/en/operations/kubernetes/logging.mdx new file mode 100644 index 0000000000..ebcdf5b5d2 --- /dev/null +++ b/mintlify-docs/en/operations/kubernetes/logging.mdx @@ -0,0 +1,152 @@ +--- +title: "Configure Log Collection" +description: "Use the Fluent Bit Operator to collect logs and forward them to Grafana Cloud Loki." +--- + +## 1. Install Fluent Bit Operator + +Install the Fluent Bit Operator in a dedicated `logging` namespace. + +```bash +$ helm repo add fluent https://fluent.github.io/helm-charts +$ helm repo update +$ kubectl create namespace logging +$ helm install fluent-operator fluent/fluent-operator --namespace logging --set operator.logLevel=debug +``` + +Verify the installation by ensuring the operator Pod is running: + +```bash +$ kubectl get pods -n logging +``` + +## 2. Configure Loki Credentials + +To forward logs to Grafana Cloud Loki, you must create a Kubernetes Secret containing your credentials. Obtain your User ID and API Token from the Grafana Cloud Portal under *Connections → Loki*. + +```bash +$ kubectl create secret generic grafana-cloud-loki -n logging --from-literal=username=$USER_ID --from-literal=password=$API_TOKEN +``` + +## 3. Deploy Fluent Bit Configuration + +The Fluent Operator uses Custom Resources to define the logging pipeline. The following configuration sets up a **ClusterInput** to tail container logs, a **ClusterFilter** to add Kubernetes metadata, and a **ClusterOutput** to ship logs to Loki. + +Save the following as `fluentbit-logging.yaml`. **Note:** Replace `logs-prod-006.grafana.net` with your specific Loki endpoint. + +```yaml expandable + apiVersion: fluentbit.fluent.io/v1alpha2 +kind: ClusterFluentBitConfig +metadata: + name: fluent-bit-config +spec: + service: + httpServer: true + parsersFile: parsers.conf + inputSelector: + matchLabels: + fluentbit.fluent.io/enabled: "true" + filterSelector: + matchLabels: + fluentbit.fluent.io/enabled: "true" + outputSelector: + matchLabels: + fluentbit.fluent.io/enabled: "true" +--- +apiVersion: fluentbit.fluent.io/v1alpha2 +kind: ClusterInput +metadata: + name: kube-containers + labels: + fluentbit.fluent.io/enabled: "true" +spec: + tail: + tag: kube.* + path: /var/log/containers/*.log + parser: cri + readFromHead: false + memBufLimit: 5MB +--- +apiVersion: fluentbit.fluent.io/v1alpha2 +kind: ClusterFilter +metadata: + name: k8s-meta + labels: + fluentbit.fluent.io/enabled: "true" +spec: + match: "kube.*" + filters: + - kubernetes: + mergeLog: true + keepLog: false + labels: true + annotations: true +--- +apiVersion: fluentbit.fluent.io/v1alpha2 +kind: ClusterOutput +metadata: + name: loki + labels: + fluentbit.fluent.io/enabled: "true" +spec: + match: "kube.*" + loki: + host: logs-prod-006.grafana.net + port: 443 + tls: + verify: false + httpUser: + valueFrom: + secretKeyRef: + name: grafana-cloud-loki + key: username + httpPassword: + valueFrom: + secretKeyRef: + name: grafana-cloud-loki + key: password + labels: + - job=fluentbit + - cluster=minikube + autoKubernetesLabels: "off" +--- +apiVersion: fluentbit.fluent.io/v1alpha2 +kind: FluentBit +metadata: + name: fluent-bit + namespace: logging +spec: + fluentBitConfigName: fluent-bit-config + image: ghcr.io/fluent/fluent-operator/fluent-bit:3.1.4 + tolerations: + - operator: Exists +``` + +Apply the configuration to your cluster: + +```bash +$ kubectl apply -f fluentbit-logging.yaml +``` + +## 4. Query Logs + +Once deployed, Fluent Bit will run as a `DaemonSet` on every node. You can query the logs using LogQL in Grafana Explore. + +Check the Fluent Bit logs to ensure there are no authentication errors (HTTP 401): + +```bash +$ kubectl logs -n logging -l app.kubernetes.io/name=fluent-bit --tail=50 +``` + +Use these LogQL queries to inspect Vespa components specifically: + +```bash +# Filter logs for the Config Server +{cluster="minikube", namespace_name="default", pod_name=~"cfg-.*"} + +# Filter logs for specific containers (e.g., query container) +{cluster="minikube", container_name="vespa"} + +# Search for errors across all Vespa pods +{cluster="minikube"} |= "error" +``` \ No newline at end of file diff --git a/mintlify-docs/en/operations/kubernetes/monitoring.mdx b/mintlify-docs/en/operations/kubernetes/monitoring.mdx new file mode 100644 index 0000000000..26c1f7b079 --- /dev/null +++ b/mintlify-docs/en/operations/kubernetes/monitoring.mdx @@ -0,0 +1,194 @@ +--- +title: "Monitor a Vespa on Kubernetes Deployment" +--- + +Use the Prometheus Operator to collect metrics from a Vespa on Kubernetes deployment. This guide covers the installation of the monitoring stack, configuration of `PodMonitor` resources for Vespa components, and forwarding metrics to Grafana Cloud. + +## Prerequisites + +- A Kubernetes cluster (EKS, GKE, AKS, or Minikube). +- [Helm CLI](https://helm.sh/docs/intro/install/) +- Kubernetes Command Line Tool ([kubectl](https://kubernetes.io/docs/reference/kubectl/)) +- A Grafana Cloud account + +## 1. Install Prometheus Operator + +The recommended way to install Prometheus on Kubernetes is via the [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) Helm chart. Add the repository and create a monitoring namespace. + +```bash +$ helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +$ helm repo update +$ kubectl create namespace monitoring +``` + +### Configure Grafana Cloud Credentials + +If you intend to forward metrics to Grafana Cloud, create a Kubernetes Secret with your credentials. Retrieve your **Instance ID** (User) and **API Token** (Password) from the Grafana Cloud Portal under *Configure Prometheus*. + +```bash +$ kubectl create secret generic grafana-cloud-prometheus -n monitoring --from-literal=username=$INSTANCE_ID --from-literal=password=$API_TOKEN +``` + +### Configure Helm Values + +Create a `prometheus-values.yaml` file. This configuration enables remote writing to Grafana Cloud, configures the Prometheus Operator to select all `PodMonitors`, and disables the local Grafana instance. + +```yaml expandable +prometheus: + prometheusSpec: + # Allow Prometheus to discover PodMonitors in other namespaces + podMonitorSelectorNilUsesHelmValues: false + serviceMonitorSelectorNilUsesHelmValues: false + + # Remote write configuration for Grafana Cloud + remoteWrite: + - url: [https://prometheus-prod-XX-prod-XX.grafana.net/api/prom/push](https://prometheus-prod-XX-prod-XX.grafana.net/api/prom/push) + basicAuth: + username: + name: grafana-cloud-prometheus + key: username + password: + name: grafana-cloud-prometheus + key: password + writeRelabelConfigs: + - sourceLabels: [__address__] + targetLabel: cluster + replacement: my-cluster-name + +# Disable local Grafana +grafana: + enabled: false + +# Enable Alertmanager and Kube State Metrics +alertmanager: + enabled: true + kube-state-metrics: + enabled: true +``` + +Install the stack using Helm: + +```bash +$ helm install prometheus prometheus-community/kube-prometheus-stack --namespace monitoring --values prometheus-values.yaml +``` + +## 2. Configure PodMonitors + +Vespa exposes metrics on specific ports that differ from standard web traffic ports. We use the `PodMonitor` Custom Resource to define how Prometheus should scrape these endpoints. + +### Monitor ConfigServer Pods + +ConfigServers expose metrics on port **19071** at the path `/configserver-metrics`. Apply the following configuration to scrape these metrics. + +```yaml expandable +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: vespa-configserver + namespace: $NAMESPACE + labels: + release: prometheus # Required to be picked up by the operator +spec: + selector: + matchLabels: + app: vespa-configserver + podMetricsEndpoints: + - targetPort: 19071 + path: /configserver-metrics + interval: 30s + scheme: http + params: + format: ['prometheus'] + relabelings: + # Map Kubernetes pod name to the 'pod' label + - sourceLabels: [__meta_kubernetes_pod_name] + targetLabel: pod + - targetLabel: vespa_role + replacement: configserver +``` + +### Monitor Application Pods + +Container and Content Pods expose metrics on the state API port **19092** at `/prometheus/v1/values`. The following example defines a PodMonitor for Vespa application pods. + +```yaml expandable +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: vespa-application + namespace: default + labels: + release: prometheus +spec: + selector: + matchExpressions: + # Selects pods that are part of a Vespa application (feed, query, content) + - key: vespa.ai/cluster-name + operator: Exists + podMetricsEndpoints: + - targetPort: 19092 + path: /prometheus/v1/values + interval: 30s + scheme: http + relabelings: + - sourceLabels: [__meta_kubernetes_pod_name] + targetLabel: pod + - sourceLabels: [__meta_kubernetes_namespace] + targetLabel: namespace + # Extract the role from the pod name or labels if needed + - targetLabel: vespa_role + replacement: node +``` + +## 3. Verify Metrics + +Once the `PodMonitors` are applied, verify that Prometheus is successfully scraping the targets. + +### Check Targets Locally + +Port-forward the Prometheus UI to your local machine: + +```bash +$ kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090 +``` + +Navigate to [http://localhost:9090/targets](http://localhost:9090/targets). You should see targets named `default/vespa-configserver` and `default/vespa-application` in the **UP** state. + +### Query Metrics + +You can verify the data using PromQL queries in the Prometheus UI or Grafana Explore: + +```js +# Check availability of Config Servers +up{vespa_role="configserver"} + +# Retrieve average maintenance duration +vespa_maintenance_duration_average + +# List all metrics coming from Vespa +{job=~"default/vespa-.*"} +``` + +## Troubleshooting + +**Targets show `No active targets`**: + +This indicates the `PodMonitor` selector does not match any Pods. Verify the labels on your Vespa pods: + +```bash +$ kubectl get pods -n $NAMESPACE --show-labels +``` + +Ensure the `selector.matchLabels` in your `PodMonitor` YAML matches the labels shown in the output above. + +**Targets are in `DOWN` state**: + +This usually means Prometheus cannot reach the metric endpoint. Verify that the metrics are exposed on the expected port by running a curl command from within the cluster: + +```bash +$ kubectl run curl-test -n $NAMESPACE --image=curlimages/curl -it --rm -- curl http://cfg-0.$NAMESPACE.svc.cluster.local:19071/configserver-metrics?format=prometheus +``` + +**Network Policies**: + +If you use `NetworkPolicy` to restrict traffic, ensure you have a policy allowing ingress traffic from the `monitoring` namespace to the `$NAMESPACE` namespace on ports 19071 and 19092. \ No newline at end of file diff --git a/mintlify-docs/en/operations/kubernetes/operations/delete-vespaset.mdx b/mintlify-docs/en/operations/kubernetes/operations/delete-vespaset.mdx new file mode 100644 index 0000000000..29ff4d17fb --- /dev/null +++ b/mintlify-docs/en/operations/kubernetes/operations/delete-vespaset.mdx @@ -0,0 +1,92 @@ +--- +title: "Delete a VespaSet" +sidebarTitle: "Delete a VespaSet" +--- + +This page provides instructions for deleting a VespaSet. + +The ConfigServer and Application Pods use [Kubernetes PreStop Hooks](https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/) to prevent their immediate removal when evicted voluntarily or deleted involuntarily. In production cases, these finalizers are paramount for ensuring proper data redistribution between the Content Pods. However, they also have the adverse side effect of making Vespa difficult to fully uninstall. + +Follow the steps below to fully uninstall your setup. This example assumes the Pods were created for the [Album Recommendation](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation) sample application, the Pods are scheduled in the `$NAMESPACE` namespace, and a `VespaSet` called `vespaset-test` was deployed. + + + **Important:** These instructions should not be run on production serving environments. + + +## Steps + +### Delete Pods + +Run `vespa-stop-configserver` on all ConfigServer Pods. This will ensure that any finalizers will exit immediately, since all finalizers ultimately route to a ConfigServer. + +```bash +$ kubectl exec -n $NAMESPACE cfg-1 -- vespa-stop-configserver +$ kubectl exec -n $NAMESPACE cfg-2 -- vespa-stop-configserver +$ kubectl exec -n $NAMESPACE cfg-3 -- vespa-stop-configserver +``` + +Run `vespa-stop-services` on all Application Pods. + +```bash +$ kubectl exec -n $NAMESPACE default-100 -- vespa-stop-services +$ kubectl exec -n $NAMESPACE default-101 -- vespa-stop-services +$ kubectl exec -n $NAMESPACE music-102 -- vespa-stop-services +$ kubectl exec -n $NAMESPACE music-103 -- vespa-stop-services +$ kubectl exec -n $NAMESPACE cluster-controllers-104 -- vespa-stop-services +$ kubectl exec -n $NAMESPACE cluster-controllers-105 -- vespa-stop-services +$ kubectl exec -n $NAMESPACE cluster-controllers-106 -- vespa-stop-services +``` + +Delete all the ConfigServer and Application Pods. The finalizers will exit immediately. + +```bash +$ kubectl delete pod -n $NAMESPACE cfg-1 --grace-period=0 --force --ignore-not-found +$ kubectl delete pod -n $NAMESPACE cfg-2 --grace-period=0 --force --ignore-not-found +$ kubectl delete pod -n $NAMESPACE cfg-3 --grace-period=0 --force --ignore-not-found +$ kubectl delete pod -n $NAMESPACE default-100 --grace-period=0 --force --ignore-not-found +$ kubectl delete pod -n $NAMESPACE default-101 --grace-period=0 --force --ignore-not-found +$ kubectl delete pod -n $NAMESPACE music-102 --grace-period=0 --force --ignore-not-found +$ kubectl delete pod -n $NAMESPACE music-103 --grace-period=0 --force --ignore-not-found +$ kubectl delete pod -n $NAMESPACE cluster-controllers-104 --grace-period=0 --force --ignore-not-found +$ kubectl delete pod -n $NAMESPACE cluster-controllers-105 --grace-period=0 --force --ignore-not-found +$ kubectl delete pod -n $NAMESPACE cluster-controllers-106 --grace-period=0 --force --ignore-not-found +``` + +### Delete Persistent Volume Claims + +Delete all PersistentVolumeClaims (PVCs) from the namespace. This should be performed after all Pods have been deleted, to ensure PVC deletion does not hang on a Pod binding. + +```bash +$ kubectl delete pvc -n $NAMESPACE cfg-1-data --ignore-not-found +$ kubectl delete pvc -n $NAMESPACE cfg-2-data --ignore-not-found +$ kubectl delete pvc -n $NAMESPACE cfg-3-data --ignore-not-found +$ kubectl delete pvc -n $NAMESPACE default-100-data --ignore-not-found +$ kubectl delete pvc -n $NAMESPACE default-101-data --ignore-not-found +$ kubectl delete pvc -n $NAMESPACE music-102-data --ignore-not-found +$ kubectl delete pvc -n $NAMESPACE music-103-data --ignore-not-found +``` + +### Delete ConfigMaps + +Delete all remaining ConfigMaps in the namespace. + +```bash +$ kubectl delete configmap -n $NAMESPACE vespa-config --ignore-not-found +``` + +### Delete Services + +Delete any Services and other networking components that may have been setup by the operator. + +```bash +$ kubectl delete svc -n $NAMESPACE x --ignore-not-found +$ kubectl delete svc -n $NAMESPACE cfg-internal --ignore-not-found +``` + +### Delete VespaSet + +Delete the `VespaSet` resource. With all Pods and services already removed, the operator's finalizer will exit immediately. + +```bash +$ kubectl delete vespaset -n $NAMESPACE vespaset-test --ignore-not-found +``` \ No newline at end of file diff --git a/mintlify-docs/en/operations/kubernetes/operations/monitoring.mdx b/mintlify-docs/en/operations/kubernetes/operations/monitoring.mdx new file mode 100644 index 0000000000..6a9df01a7b --- /dev/null +++ b/mintlify-docs/en/operations/kubernetes/operations/monitoring.mdx @@ -0,0 +1,195 @@ +--- +title: "Monitor a Vespa on Kubernetes Deployment" +sidebarTitle: "Monitor a Vespa on Kubernetes Deployment" +--- + +Use the Prometheus Operator to collect metrics from a Vespa on Kubernetes deployment. This guide covers the installation of the monitoring stack, configuration of `PodMonitor` resources for Vespa components, and forwarding metrics to Grafana Cloud. + +## Prerequisites + +- A Kubernetes cluster (EKS, GKE, AKS, or Minikube). +- [Helm CLI](https://helm.sh/docs/intro/install/) +- Kubernetes Command Line Tool ([kubectl](https://kubernetes.io/docs/reference/kubectl/)) +- A Grafana Cloud account + +## 1. Install Prometheus Operator + +The recommended way to install Prometheus on Kubernetes is via the [kube-prometheus-stack](https://github.com/prometheus-community/helm-charts/tree/main/charts/kube-prometheus-stack) Helm chart. Add the repository and create a monitoring namespace. + +```bash +$ helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +$ helm repo update +$ kubectl create namespace monitoring +``` + +### Configure Grafana Cloud Credentials + +If you intend to forward metrics to Grafana Cloud, create a Kubernetes Secret with your credentials. Retrieve your **Instance ID** (User) and **API Token** (Password) from the Grafana Cloud Portal under _Configure Prometheus_. + +```bash +$ kubectl create secret generic grafana-cloud-prometheus -n monitoring --from-literal=username=$INSTANCE_ID --from-literal=password=$API_TOKEN +``` + +### Configure Helm Values + +Create a `prometheus-values.yaml` file. This configuration enables remote writing to Grafana Cloud, configures the Prometheus Operator to select all `PodMonitors`, and disables the local Grafana instance. + +```yaml expandable +prometheus: + prometheusSpec: + # Allow Prometheus to discover PodMonitors in other namespaces + podMonitorSelectorNilUsesHelmValues: false + serviceMonitorSelectorNilUsesHelmValues: false + + # Remote write configuration for Grafana Cloud + remoteWrite: + - url: [https://prometheus-prod-XX-prod-XX.grafana.net/api/prom/push](https://prometheus-prod-XX-prod-XX.grafana.net/api/prom/push) + basicAuth: + username: + name: grafana-cloud-prometheus + key: username + password: + name: grafana-cloud-prometheus + key: password + writeRelabelConfigs: + - sourceLabels: [__address__] + targetLabel: cluster + replacement: my-cluster-name + +# Disable local Grafana +grafana: + enabled: false + +# Enable Alertmanager and Kube State Metrics +alertmanager: + enabled: true + kube-state-metrics: + enabled: true +``` + +Install the stack using Helm: + +```bash +$ helm install prometheus prometheus-community/kube-prometheus-stack --namespace monitoring --values prometheus-values.yaml +``` + +## 2. Configure PodMonitors + +Vespa exposes metrics on specific ports that differ from standard web traffic ports. We use the `PodMonitor` Custom Resource to define how Prometheus should scrape these endpoints. + +### Monitor ConfigServer Pods + +ConfigServers expose metrics on port **19071** at the path `/configserver-metrics`. Apply the following configuration to scrape these metrics. + +```yaml expandable +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: vespa-configserver + namespace: $NAMESPACE + labels: + release: prometheus # Required to be picked up by the operator +spec: + selector: + matchLabels: + app: vespa-configserver + podMetricsEndpoints: + - targetPort: 19071 + path: /configserver-metrics + interval: 30s + scheme: http + params: + format: ['prometheus'] + relabelings: + # Map Kubernetes pod name to the 'pod' label + - sourceLabels: [__meta_kubernetes_pod_name] + targetLabel: pod + - targetLabel: vespa_role + replacement: configserver +``` + +### Monitor Application Pods + +Container and Content Pods expose metrics on the state API port **19092** at `/prometheus/v1/values`. The following example defines a PodMonitor for Vespa application pods. + +```yaml expandable +apiVersion: monitoring.coreos.com/v1 +kind: PodMonitor +metadata: + name: vespa-application + namespace: default + labels: + release: prometheus +spec: + selector: + matchExpressions: + # Selects pods that are part of a Vespa application (feed, query, content) + - key: vespa.ai/cluster-name + operator: Exists + podMetricsEndpoints: + - targetPort: 19092 + path: /prometheus/v1/values + interval: 30s + scheme: http + relabelings: + - sourceLabels: [__meta_kubernetes_pod_name] + targetLabel: pod + - sourceLabels: [__meta_kubernetes_namespace] + targetLabel: namespace + # Extract the role from the pod name or labels if needed + - targetLabel: vespa_role + replacement: node +``` + +## 3. Verify Metrics + +Once the `PodMonitors` are applied, verify that Prometheus is successfully scraping the targets. + +### Check Targets Locally + +Port-forward the Prometheus UI to your local machine: + +```bash +$ kubectl port-forward -n monitoring svc/prometheus-kube-prometheus-prometheus 9090:9090 +``` + +Navigate to [http://localhost:9090/targets](http://localhost:9090/targets). You should see targets named `default/vespa-configserver` and `default/vespa-application` in the **UP** state. + +### Query Metrics + +You can verify the data using PromQL queries in the Prometheus UI or Grafana Explore: + +```js +# Check availability of Config Servers +up{vespa_role="configserver"} + +# Retrieve average maintenance duration +vespa_maintenance_duration_average + +# List all metrics coming from Vespa +{job=~"default/vespa-.*"} +``` + +## Troubleshooting + +**Targets show `No active targets`**: + +This indicates the `PodMonitor` selector does not match any Pods. Verify the labels on your Vespa pods: + +```bash +$ kubectl get pods -n $NAMESPACE --show-labels +``` + +Ensure the `selector.matchLabels` in your `PodMonitor` YAML matches the labels shown in the output above. + +**Targets are in `DOWN` state**: + +This usually means Prometheus cannot reach the metric endpoint. Verify that the metrics are exposed on the expected port by running a curl command from within the cluster: + +```bash +$ kubectl run curl-test -n $NAMESPACE --image=curlimages/curl -it --rm -- curl http://cfg-0.$NAMESPACE.svc.cluster.local:19071/configserver-metrics?format=prometheus +``` + +**Network Policies**: + +If you use `NetworkPolicy` to restrict traffic, ensure you have a policy allowing ingress traffic from the `monitoring` namespace to the `$NAMESPACE` namespace on ports 19071 and 19092. \ No newline at end of file diff --git a/mintlify-docs/en/operations/kubernetes/operations/operations.mdx b/mintlify-docs/en/operations/kubernetes/operations/operations.mdx new file mode 100644 index 0000000000..11605c42ec --- /dev/null +++ b/mintlify-docs/en/operations/kubernetes/operations/operations.mdx @@ -0,0 +1,62 @@ +--- +title: "Lifecycle Operations for Vespa on Kubernetes" +sidebarTitle: "Operations" +--- +The ConfigServer and Vespa Application Pods have built-in resilience and recovery capabilities; they are automatically recovered during failures and gracefully shut down during maintenance or scaling operations to preserve data integrity. + +### Automatic Recovery + +Vespa relies on standard Kubernetes controllers to detect and restart crashed Pods. If a container exits unexpectedly (e.g., OOMKilled or application crash), the kubelet will automatically restart it. + +However, the ConfigServers track the health history of every Pod. To prevent a "crash loop" from causing cascading failures or constantly churning resources, the system implements a strict throttling mechanism. The ConfigServers allow a maximum of 2 involuntary Pod disruptions per 24-hour period for a given Vespa Application. If this limit is exceeded, the ConfigServer stops automatically failing these Pods and will require human intervention to investigate the root cause. + +### Graceful Shutdown + +To prevent query failures or data loss during termination, a [PreStop Hook](https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/) is placed on every ConfigServer and Vespa Application Pod. During a voluntary disruption, this hook ensures that existing traffic is drained and that data is flushed before killing the Pod. + +Two types of disruptions exist in Kubernetes: + +| Type | Scenario | Behavior | +| --- | --- | --- | +| **Voluntary Disruption** | Scaling down, rolling upgrades, or node maintenance. | The preStop hook detects a voluntary disruption, stops the Vespa Container cluster from accepting new traffic, flushes in-memory data to disk for Content clusters, and ensures a clean exit before the Pod is deleted. | +| **Involuntary Disruption** | Node hardware failure, kernel panic, or eviction. | Kubernetes initiates the termination. The preStop hook attempts to run to flush data and close connections. However, if the Pod is lost abruptly. the hook cannot run, and recovery relies on Vespa's data replication. | + +### Pod Disruption Budget + +Defining a `PodDisruptionBudget` (PBD) is not supported for Vespa on Kubernetes. The ConfigServers will override any PBD with its own orchestration policy. + +### Application Pod Resources + +For Vespa Application Pods, the resources for each Pod, the number of Pods in a Vespa cluster, and the group configuration can be updated through the `` element in the application package. Refer to the [specification](/en/reference/applications/services/services) for more details. + +### ConfigServer Pod Resources + +ConfigServer Pod resources can be configured by overriding the `vespa` container's resource specification via the PodTemplate in the `VespaSet`. The Config Server deduces its heap size from the Pod cgroup limits, which are derived from the `requests` and `limits` set on the Pod. Setting requests and limits to the same value is recommended to ensure the heap size is deduced correctly. + +Horizontally scaling the replica count for ConfigServer Pods is not supported. + +```bash +apiVersion: k8s.ai.vespa/v1 +kind: VespaSet +metadata: + name: sample-vespaset +spec: + configServer: + image: "$VESPA_IMAGE" + storageClass: "gp3" + podTemplate: + spec: + containers: + - name: vespa + resources: + requests: + cpu: "4" + memory: "8Gi" + limits: + cpu: "4" + memory: "8Gi" +``` + +### Autoscaling + +Vespa on Kubernetes provides autoscaling through ranges specified in the `resource` elements in the application package. Refer to the [Autoscaling](/en/operations/autoscaling) guide for more details. \ No newline at end of file diff --git a/mintlify-docs/en/operations/kubernetes/operations/upgrades.mdx b/mintlify-docs/en/operations/kubernetes/operations/upgrades.mdx new file mode 100644 index 0000000000..c417904ea8 --- /dev/null +++ b/mintlify-docs/en/operations/kubernetes/operations/upgrades.mdx @@ -0,0 +1,196 @@ +--- +title: "Upgrades" +sidebarTitle: "Upgrade Vespa on Kubernetes" +--- + +Vespa on Kubernetes supports zero-downtime rolling upgrades. An upgrade involves upgrading the `vespa-operator` via the Helm chart and the ConfigServer and Application (Container and Content) Pods through the `VespaSet` resource. + +We do not support version drift between the `vespa-operator` and the `VespaSet`. Accordingly, upgrades should be planned so that all components are updated together. To ensure availability, they should be performed in the order as shown in this guide. + +## Update the CRD + +Some upgrades may introduce changes to the `VespaSet` CRD definition. These changes should be applied to the cluster before performing the upgrade. As a rule of thumb, we recommend executing this before every upgrade procedure. + +Helm does not manage the lifecycle of the CRD after it is installed (see [the official documentation](https://helm.sh/docs/chart_best_practices/custom_resource_definitions/?utm_source=chatgpt.com)). As a result, CRD updates must be handled manually. Given the official Helm Chart for Vespa on Kubernetes, this can be performed by extracting the CRD definition from the OCI package and applying it directly using `kubectl`. + +```text +$ helm show crds $HELM_CHART_REF --version $VESPA_VERSION > vespaset-crd.yaml +$ kubectl apply -f vespaset-crd.yaml +``` + +## Upgrade the Vespa Operator + +The operator can be upgraded through helm by running `helm upgrade` with the new `VESPA_VERSION`. Replace `$NAMESPACE` with the namespace where Vespa is installed. Refer to [Factory](https://factory.vespa.ai/) for the latest `VESPA_VERSION`. Note that upgrading the operator does not affect the ConfigServer and Application Pods. Their upgrade will be performed in a subsequent step. + +```bash +$ helm upgrade vespa-operator vespa/vespa-operator \ + --version $OPERATOR_VERSION \ + --namespace $NAMESPACE \ + --reuse-values +``` + +Wait for the operator to finish rolling out before proceeding. + +```js +$ kubectl rollout status deployment/vespa-operator -n $NAMESPACE +``` + +## Upgrade the VespaSet + +To upgrade the ConfigServer and application Pods, patch the `spec.version` field in the `VespaSet` resource. Ensure that the target image is available and accessible on the Kubernetes Node at `VESPA_OPERATOR_IMAGE:VESPA_VERSION` and `VESPA_IMAGE:VESPA_VERSION` before proceeding. For example: + +```bash +$ cat > vespaset.yaml < +Annotations: +API Version: k8s.ai.vespa/v1 +Kind: VespaSet +Metadata: + Creation Timestamp: 2026-01-29T21:32:27Z + Finalizers: + vespasets.k8s.ai.vespa/finalizer + Generation: 1 + Resource Version: 121822902 + UID: a70f56e9-6625-4011-acd7-9f7cad29dbc2 +Spec: + Application: + Image: $VESPA_IMAGE + Storage Class: gp3 + Config Server: + Generate Rbac: false + Image: $VESPA_IMAGE + Storage Class: gp3 + Ingress: + Endpoint Type: LOAD_BALANCER + Version: 8.577 +Status: + Bootstrap Status: + Pods: + cfg-1: + Last Updated: 2026-01-29T21:38:45Z + Message: Pod is running + Phase: RUNNING + Converged Version: 8.577 + cfg-2: + Last Updated: 2026-01-29T21:38:09Z + Message: Pod is running + Phase: RUNNING + Converged Version: 8.577 + cfg-3: + Last Updated: 2026-01-29T21:36:32Z + Message: Pod is running + Phase: RUNNING + Converged Version: 8.577 + default-100: + Last Updated: 2026-01-29T21:38:45Z + Message: Pod is upgrading + Phase: UPGRADING + Converged Version: 8.576 + default-101: + Last Updated: 2026-01-29T21:38:09Z + Message: Pod is running + Phase: RUNNING + Converged Version: 8.576 + documentation-102: + Last Updated: 2026-01-29T21:36:32Z + Message: Pod is running + Phase: RUNNING + Converged Version: 8.576 + documentation-103: + Last Updated: 2026-01-29T21:36:32Z + Message: Pod is running + Phase: RUNNING + Converged Version: 8.576 + cluster-controller-104: + Last Updated: 2026-01-29T21:36:32Z + Message: Pod is running + Phase: RUNNING + Converged Version: 8.576 + cluster-controller-105: + Last Updated: 2026-01-29T21:36:32Z + Message: Pod is running + Phase: RUNNING + Converged Version: 8.576 + cluster-controller-106: + Last Updated: 2026-01-29T21:36:32Z + Message: Pod is running + Phase: RUNNING + Converged Version: 8.576 + Last Transition Time: 2026-01-29T21:33:55Z + Message: All configservers running + Phase: RUNNING +Events: +``` + +The upgrade is complete when every Pod's `Converged Version` matches the new version and all phases report `RUNNING`. + +## Debugging Upgrade Failures + +If a Pod fails to converge to the target version — for example, due to an image pull failure, a crash loop, or a failed health check, the ConfigServer will continuously retry the upgrade for that Pod until it either succeeds or an administrator intervenes. + +In this scenario, the administrator can diagnose the issue by inspecting the ConfigServer logs or the events of the failing Pod in the current upgrade phase. Once the issue is resolved, the ConfigServer will automatically retry the upgrade for that Pod and proceed with the remaining nodes. + +For example, suppose the Pod `search-106` is failing to upgrade. + +```bash +$ kubectl get logs cfg-1 -n $NAMESPACE +$ kubectl get logs cfg-2 -n $NAMESPACE +$ kubectl get logs cfg-3 -n $NAMESPACE +$ kubectl describe pod search-106 -n $NAMESPACE +``` + +This design prevents a bad upgrade from cascading to the rest of the Pods. Since the ConfigServer refuses to advance past a Pod that has not converged, the remaining Pods stay on the previous known-good version while the administrator investigates. \ No newline at end of file diff --git a/mintlify-docs/en/operations/kubernetes/tls.mdx b/mintlify-docs/en/operations/kubernetes/tls.mdx new file mode 100644 index 0000000000..0549691e43 --- /dev/null +++ b/mintlify-docs/en/operations/kubernetes/tls.mdx @@ -0,0 +1,402 @@ +--- +title: "Enable TLS Encryption for Vespa on Kubernetes" +sidebarTitle: "Enable TLS Encryption" +--- + +TLS encryption for Vespa on Kubernetes can be configured for internal pod-to-pod communication using mutual TLS (mTLS) and for external ingress traffic. This guide demonstrates using [cert-manager](https://cert-manager.io/), a Kubernetes-native certificate management solution that automates certificate issuance and renewal, to set up TLS for the Vespa on Kubernetes deployment. + +`cert-manager` integrates with multiple certificate authorities including self-signed CAs, Let's Encrypt, HashiCorp Vault, and commercial providers. It handles the certificate lifecycle by automatically issuing certificates and renewing them before expiration. The [cert-manager CSI driver](https://cert-manager.io/docs/usage/csi-driver/) provides secure certificate delivery to pods through runtime injection via a DaemonSet, ensuring certificates are available before containers start. + +## Prerequisites + +- Kubernetes cluster with Vespa Operator installed (see [Installation](/en/operations/kubernetes/deployment/installation)) +- [cert-manager](https://cert-manager.io/docs/installation/) v1.13 or later +- [cert-manager CSI driver](https://cert-manager.io/docs/usage/csi-driver/) installed +- Kubernetes Command Line Tool ([kubectl](https://kubernetes.io/docs/reference/kubectl/)) +- OpenSSL (for CA generation) + +## Enable mTLS for Internal Communication + +Mutual TLS (mTLS) secures communication between Vespa services within the Kubernetes cluster. Each pod authenticates using client certificates issued by a namespace-local root Certificate Authority. It is also possible to configure the Certificate Authority to be cluster-global. + +This method is ideal for those who prefer TLS to terminate at the service, or those who have already integrated with mTLS from Vespa Cloud. For more details on Vespa's mTLS implementation, see the [Vespa mTLS documentation](/en/security/mtls). + +### Step 1: Create Certificate Authority + +Create a self-signed issuer to bootstrap the certificate chain, then use it to create a namespace-local root CA certificate that acts as the trust anchor for all internal mTLS certificates. + +```yaml expandable +$ cat < + + + + + + + + \ No newline at end of file diff --git a/mintlify-docs/en/operations/login.mdx b/mintlify-docs/en/operations/login.mdx new file mode 100644 index 0000000000..64b4bf5d1f --- /dev/null +++ b/mintlify-docs/en/operations/login.mdx @@ -0,0 +1,9 @@ +--- +title: "Login" +--- + +## MFA reset + +If you have lost your MFA information you can request it reset from Vespa.ai Support. + +To reset MFA for a user, contact [support@vespa.ai](mailto:support@vespa.ai) with the user's email address. Support will send a verification code to that email address as part of the reset process. Once received, the code must be returned to support — along with explicit confirmation that the MFA reset is authorized — before the reset can be completed. diff --git a/mintlify-docs/en/operations/metrics.mdx b/mintlify-docs/en/operations/metrics.mdx new file mode 100644 index 0000000000..d2a8b749fb --- /dev/null +++ b/mintlify-docs/en/operations/metrics.mdx @@ -0,0 +1,200 @@ +--- +title: "Metrics" +--- + +Metrics for all nodes is aggregated using *[/metrics/v2/values](/en/reference/api/metrics-v2#metrics-v2-values)* or *[/prometheus/v1/values](/en/reference/api/prometheus-v1#prometheus-v1-values)*. Values from these endpoints reflect the 1 minute activity immediately before the request. + +Example getting a metric value from using the prometheus endpoint: + +```bash +$ curl -s http://ENDPOINT/prometheus/v1/values/?consumer=vespa | \ + grep "vds.idealstate.merge_bucket.pending.average" | egrep -v 'HELP|TYPE' +``` + + +**Important:** + +Make sure to use [consumer=vespa](/en/reference/api/metrics-v1#consumer) to list all metrics. + + +Example getting a metric value using */metrics/v2/values*: + +```bash +$ curl ENDPOINT/metrics/v2/values | \ + jq -r -c ' + .nodes[] | + .hostname as $h | + .services[].metrics[] | + select(.values."content.proton.documentdb.documents.total.last") | + [$h, .dimensions.documenttype, .values."content.proton.documentdb.documents.total.last"] | + @tsv' + +node9.vespanet music 0 +node8.vespanet music 0 +``` + +## Aggregating metrics + +Metrics in Vespa are generated from services running on the individual nodes, and in many cases have many recordings per metric, from within each node, with unique tag / dimension combinations. These recordings need to be put together to contribute to the overall picture of how the system is behaving. If this is done the right way you will be able to “zoom out” to get the bigger picture, or to “zoom in” to see how things behave in more detail. This is very useful when looking into possible production issues. Unfortunately it is easy to combine metrics the wrong way, resulting in potentially significantly distorted graphs. + +For each of the values (suffixes) available for the different metrics here is how we recommend that you aggregate them to get the best use of them. The guidelines should be used both for aggregations over time (multiple snapshot intervals) and over tag combinations. + +| Suffix Name | Aggregation | +| --- | --- | +| `max` | Use the highest value available `MAX(max)`. | +| `min` | Use the lowest value available `MIN(min)`. | +| `sum` | Use the sum of all values `SUM(sum)`. | +| `count` | Use the sum of all values `SUM(count)`. | +| `average` | To generate an average value you want to do `SUM(sum) / SUM(count)` where you generate the graph. Don’t use the `average` suffix itself if you have the `sum` and `count` suffixes available. Using this will easily lead to computing averages of averages, which will easily become very distorted and noisy. | +| `last` | Avoid this except for metrics you expect to be stable, such as amount of memory available on a node, etc. This value is the last from a metrics snapshot period, hence basically a single value picked from all values during the snapshot period. Typically, very noisy for volatile metrics. It does not make sense to aggregate on this value at all, but if you must then choose a value with the same combination of tags over time. | +| `95percentile` | This value cannot be aggregated in a way that gives a mathematically correct value. But where you have to either compute the average value for the most realistic value, `AVERAGE(95percentile)`, or max if the goal is to better identify outliers, `MAX(95percentile)`. Regardless, this value is best used when considered at the most granular level, with all tag values specified. | +| `99percentile` | Same as for the `95percentile` suffix. | + +## Metric-sets + +Node metrics in */metrics/v1/values* are listed per service, with a set of system metrics - example: + +```json expandable +{ + "services": [ + { + "name": "vespa.container", + "timestamp": 1662120754, + "status": { + "code": "up", + "description": "Data collected successfully" + }, + "metrics": [ + { + "values": { + "memory_virt": 3683172352, + "memory_rss": 1425416192, + "cpu": 2.0234722784298, + "cpu_util": 0.202347227843 + }, + "dimensions": { + "metrictype": "system", + "instance": "container", + "clustername": "default", + "vespaVersion": "8.46.19" + } + }, + { + "values": {}, + "dimensions": { + "clustername": "default", + "instance": "container", + "vespaVersion": "8.46.19" + } + } + ] + }, +``` + +The `default` metric-set is added to the system metric-set, unless a [consumer](/en/reference/api/metrics-v1#consumer) request parameter specifies a different built-in or custom metric set - see [metric list](/en/reference/operations/metrics/default-metric-set#metric-sets). + +The `Vespa` metric-set has a richer set of metrics, see [metric list](/en/reference/operations/metrics/vespa-metric-set#metric-sets). + +The *consumer* request parameter can also be used in [/metrics/v2/values](/en/reference/api/metrics-v2#metrics-v2-values) and [/prometheus/v1/values](/en/reference/api/prometheus-v1#prometheus-v1-values). + +Example minimal metric-set; system metric-set + a specific metric: + +```xml + + + + + + + + +``` + +Example default metric-set and more; system metric-set + default metric-set + a built-in metric: + +```xml + + + + + + + + + +``` + +## Metrics names + +The names of metrics emitted by Vespa typically follow this naming scheme: `...`. The separator (`.` here) may differ for different metrics integrations. Similarly, the `` string may differ depending on your configuration. Further some metrics have several levels of `component` names. Each metric will have a number of values associated with them, one for each `suffix` provided by the metric. Typical suffixes include `sum`, `count` and `max`. + +## Container Metrics + +Metrics from the container with description and unit can be found in the [container metrics reference](/en/reference/operations/metrics/container#container-metrics). The most commonly used metrics are mentioned below. + +### Generic Container Metrics + +These metrics are output for the server as a whole, e.g. related to resources. Some metrics indicate memory usage, such as `mem.heap.*`, `mem.native.*`, `mem.direct.*`. Other metrics are related to the JVM garbage collection, `jdisc.gc.count` and `jdisc.gc.ms`. + +### Thread Pool Metrics + +Metrics for the container thread pools. The `jdisc.thread_pool.*` metrics have a dimension `threadpool` with thread pool name, e.g. *default-pool* for the container's default thread pool. See [Container Tuning](/en/performance/container-tuning#container-tuning) for details. + +### HTTP Specific Metrics + +These are metrics specific for HTTP. Those metrics that are specific to a connector will have a dimension containing the TCP listen port. + +Refer to [Container Metrics](/en/reference/operations/metrics/container) for metrics on HTTP status response codes, `http.status.*` or more detailed requests related to the handling of requests, `jdisc.http.*`. Other relevant metrics include `serverNumConnections`, `serverNumOpenConnections`, `serverBytesReceived` and `serverBytesSent`. + +### Query Specific Metrics + +For metrics related to queries please start with the `queries` and `query_latency`, the `handled.requests` and `handled.latency` or the `httpapi_*` metrics for more insights. + +### Feed Specific Metrics + +For metrics related to feeding into Vespa, we recommend using the `feed.operations` and `feed.latency` metrics. + +## Available metrics + +Each of the services running in a Vespa installation maintains and reports a number of metrics. + +Metrics from the container services are the most commonly used, and are listed in [Container Metrics](/en/reference/operations/metrics/container). You will find the metrics available there, with description and unit. + +## Metrics from custom components + +Add custom metrics from components like [Searchers](/en/applications/searchers) and [Document processors](/en/applications/document-processors): + + + +Add a [MetricReceiver](https://javadoc.io/doc/com.yahoo.vespa/container-disc/latest/com/yahoo/metrics/simple/MetricReceiver.html) instance to the constructor of the component - it is [injected](/en/applications/dependency-injection) by the Container. + + +Declare [Gauge](https://javadoc.io/doc/com.yahoo.vespa/container-disc/latest/com/yahoo/metrics/simple/Gauge.html) and [Counter](https://javadoc.io/doc/com.yahoo.vespa/container-disc/latest/com/yahoo/metrics/simple/Counter.html) metrics using the *declare*\-methods on the *MetricReceiver*. Optionally set arbitrary metric dimensions to default values at declaration time - refer to the javadoc for details. + + +Each time there is some data to measure, invoke the [sample](https://javadoc.io/doc/com.yahoo.vespa/container-disc/latest/com/yahoo/metrics/simple/Gauge.html#sample\(double\)) method on gauges or the [add](https://javadoc.io/doc/com.yahoo.vespa/container-disc/latest/com/yahoo/metrics/simple/Counter.html#add\(\)) method on counters. The gauges and counters declared are inherently thread-safe. When sampling data, any dimensions can optionally be set. + + +Add a [consumer](/en/reference/applications/services/admin#consumer) in *services.xml* for the metrics to be emitted in the metric APIs, like in the previous section. + + +Find a full example in the [album-recommendation-java](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation-java) sample application. + + +**Note:** + +Metrics with no value do now show in the metric APIs - in the example above, make at least one query to set the metric value. + + +### Example / QA + +I have two different libraries that are running as components with their own threads within the vespa container. We are injecting MetricReceiver to each library. After injecting the receiver we store the reference to this receiver in a container-wide object so that they can be used inside these libraries (the libraries each have several classes and such, so it is not possible to inject the receiver every time, and we need to use the stored reference). Questions: + + + +Yes, you get the same object. + + +It remains valid for the lifetime of the component to which it got injected. Therefore, if you share component references through some other mean than direct or indirect injection you may end up with invalid references. A "container-wide object" sounds like trouble. You should have it injected into all the components that needs it instead. Or, if you feel that will be too fine-grained, create one large object which gets these things injected, and then have that injected into all components that need the common stuff. + + \ No newline at end of file diff --git a/mintlify-docs/en/operations/monitoring.mdx b/mintlify-docs/en/operations/monitoring.mdx new file mode 100644 index 0000000000..1c4e08f6ab --- /dev/null +++ b/mintlify-docs/en/operations/monitoring.mdx @@ -0,0 +1,509 @@ +--- +title: "Monitoring" +--- + + +![Sample Vespa Console dashboard](/assets/img/grafana-metrics.png) + + +The Vespa Cloud Console has dashboards for insight into performance metrics, use the METRICS tab in the application zone view. + +These metrics can also be pulled into external monitoring tools using the Prometheus metrics API. + +## The Vespa Cloud metrics dashboard + +The Vespa Cloud metrics dashboard (the METRICS tab in the application zone view) is organized around a *symptom → layer → resource* workflow, so an investigation that starts from "latency is up" can land on "this specific layer is the bottleneck" without scanning every chart. + +### Tabs and filters + + +![Dashboard tab bar](/assets/img/monitoring-dashboard-tabs.png) + + +The dashboard is organized into seven tabs: + +| Tab | What it shows | When to use it | +| --- | --- | --- | +| **Overview** | Health indicators, request rates, QoS, latency summary, HTTP status codes, resource utilization | Daily health check, first stop during incidents | +| **Query** | Container- and content-node query latency, per-rank-profile breakdown, match/docsum executors | Investigating read latency, query quality issues | +| **Feed** | Feed operation rates and latency at each layer, feed blocking | Investigating write latency or throughput issues | +| **Nearest Neighbor Search** | NNS distance computations, visit efficiency | Tuning HNSW parameters (hidden when not in use) | +| **Content Node** | Document counts, Proton resource usage, executor utilization, maintenance jobs | Deep investigation of search engine internals | +| **Resources** | CPU, memory, disk, GPU, JVM, thread pools | Sizing and scaling decisions | +| **Health** | Cluster state, data consistency, restarts, reindexing, resource limits | Stability monitoring, post-incident review | + +Filters at the top apply across all tabs: + +- **Cluster** — limit metrics to specific clusters +- **Per host metrics** — toggle between aggregated cluster view and per-node breakdown +- **Rank Profile** — filter per-rank-profile panels on the Query tab (defaults to "All") + +Query, Feed, Content Node, Resources, and Health tabs group metrics per cluster — you see all metrics for one cluster before scrolling to the next. Container metrics are grouped per container cluster, content metrics per content cluster. + +### Annotations + + +![Latency chart with Service restart and Core dump annotations](/assets/img/monitoring-annotations-example.png) + + +Annotations are vertical lines drawn on every chart that mark operational events. When a latency or throughput anomaly lines up with an annotation, you get the context for the change without having to infer it from the graph alone. + +| Annotation | Triggered by | Why it matters | +| :--- | :--- | :--- | +| **Feed blocked in cluster** | A content node crosses its disk/memory feed-block limit | Writes are paused cluster-wide until remediated | +| **Vespa upgrade** | A new Vespa version is rolled out | Brief rolling-restart latency spikes are expected around this marker | +| **Data migration** | Bucket merges pending exceed a threshold | Explains elevated CPU/IO and latency during redistribution | +| **Document re-indexing** | A reindexing job is running | Explains elevated CPU and search-side load | +| **Auto-scaling** | The autoscaler changed the cluster shape | Brief capacity drop during reshuffle | +| **Service restart** | `delta(sentinel_totalRestarts[10m]) > 0` — a Vespa service process restarted on one or more nodes | Unexpected restarts usually indicate a crash, OOM, or forced stop; outside of planned upgrades these are always worth investigating | +| **Core dump** | `delta(coredumps_processed[1h]) > 0` — a process core-dumped | Signals a crash; cross-reference with Service restart. Should be extremely rare | + +### Overview tab + +The Overview tab is the fastest place to answer "is anything obviously broken?" and provides everything needed for daily monitoring at a glance. + +#### Health Indicators + + +![Overview tab Health Indicators row](/assets/img/monitoring-health-indicators.png) + + +The Overview tab opens with a dedicated **Health Indicators** row — five stat panels designed to surface stability issues in a single glance. A row of green zeros is the signal to stop; a non-zero value tells you which tab to visit next. + +| Indicator | What it counts | Healthy value | +| :--- | :--- | :--- | +| **Core Dumps (1h)** | Core dumps processed across all clusters in the last hour | 0 — any non-zero value is a crash to investigate | +| **Restarts (1h)** | Vespa service restarts across all clusters in the last hour | 0 during steady state; brief spikes are normal during upgrades | +| **Feed Blocked** | Nodes currently above a feed-block resource limit | 0 — non-zero means writes are being rejected cluster-wide | +| **Content: Groups/Nodes Down** | Content groups with at least one node down | 0 during steady state. 1 group down is normal during rolling restarts or maintenance; 2 or more should be investigated | +| **Container: Services Down** | Active container nodes where some service isn't running | 0 during steady state; brief spikes during deployments are expected | + +#### QoS and latency overview + +**QoS (Quality of Service)** shows the percentage of successful requests. Read and write QoS are shown separately; a healthy application should be above 99.9%. If QoS drops, consult the HTTP Response Code Reference row (collapsed by default) for a table explaining every observed status code and its meaning in Vespa context. 4xx responses are client errors; 5xx responses are server errors and should be investigated immediately. + +**Latency summary** separates query and feed latency into read and write rows. Compare averages with p99 — a large gap indicates tail latency that won't show up in averages. As a rule of thumb, if p99 is more than 5× the average, investigate the tail. + +#### Resource utilization + +The bottom row gives a quick view of CPU, memory, and disk across all clusters. Any resource consistently above 80% warrants attention. + +### Query tab + +When query latency increases, the Query tab helps find the cause layer-by-layer. Metrics are grouped per container cluster (for container-level metrics) and per content cluster (for content-node metrics). + +A query flows through multiple layers, each with its own latency metric: + +```bash +Client + → HTTP Read Latency (end-to-end including network I/O) + → Query Container Latency (time in the container itself) + → Query Latency (container-observed total, excluding HTTP overhead) + → Search Protocol Latency (time on each content node) + → Rank Profile Latency (per rank-profile breakdown) +``` + +#### Container-level metrics + +Start with the *Query Rate & Latency* row: + +- Did QPS increase? More queries means more load. +- Which latency metric increased? + - **Query Latency** — container level, includes dispatch to content nodes + - **HTTP Read Latency** — includes HTTP I/O overhead + - **Search Protocol Latency** — content node execution only + +If HTTP latency is much higher than query latency, the bottleneck is network or payload size. If search protocol latency dominates, the bottleneck is on the content nodes. + +The *Query Quality* row shows: + +- **Failed queries** — actual errors. Should be near zero. +- **Degraded queries** — queries that were [soft-doomed](/en/performance/graceful-degradation) (ran out of time during matching). These return partial results. +- **Empty results** — queries returning zero hits. A sudden increase may indicate an indexing problem or a query change. + +#### Rank profile metrics + + +![Query tab rank profile sub-rows](/assets/img/monitoring-rank-profile-rows.png) + + +The Query tab groups per-rank-profile metrics into four sub-rows, all filterable by the Rank Profile dropdown: + +- **Rank Profile — Latency & Volume** — query latency (avg and max), QPS per profile, and raw docs matched per profile +- **Rank Profile — Time Breakdown** — setup time, rerank time, and grouping time, each shown as avg plus peak so you can tell whether a profile has steady-state cost or occasional cost spikes +- **Rank Profile — Quality** — docs matched per query, soft-doom factor, and soft-doomed queries. These tell you when a profile is [overrunning its time budget](/en/performance/graceful-degradation). +- **Rank Profile — Query Distribution** — QPS split by content group, which helps spot uneven routing + +Things to look for: + +- Which rank profile has the highest latency? +- Are soft-doomed queries concentrated on a specific rank profile? +- Is the peak for rerank or grouping time much higher than the average? That often points to a specific second-phase or grouping expression that's expensive only on some queries. +- Did docs matched per query grow? More documents matched means more ranking work. + +See [Latency tracking](#latency-tracking) below for a worked example, and the [rank profiles](/en/basics/ranking#rank-profiles) documentation for background. + +#### Match and Docsum executor panels + +The Query tab also includes *Match Executor* and *Docsum Executor* sub-rows (queue size + accepted rate) so you can see whether the content-node thread pools feeding the query and summary paths are saturated. These are not attributable to a rank profile, but often explain tail-latency spikes that aren't visible in rank-profile metrics. + +### Feed tab + +When feed latency increases or throughput drops, the Feed tab shows where in the write path the slowdown occurs. A write operation flows through: + +```bash +Client + → HTTP Write Latency (end-to-end) + → Container Feed Latency (document processing chains, embedders) + → Distributor Latency (routing based on bucket distribution) + → Content: Storage Latency(persistence, per document replica) + → Commit Latency (transaction log) +``` + +Start from the top and find where latency increases. If container feed latency is normal but HTTP write latency is high, the bottleneck is network/payload. If distributor latency is high, check for node state issues in the Health tab. If storage latency is high, check disk I/O in the Resources tab. + +#### Typical healthy values + +- Feed latency: 1–50 ms for puts/updates is typical; spikes during maintenance are normal +- Distributor failures: zero — non-zero indicates node state issues +- HTTP API failures: near zero +- Feed blocked: always zero + +#### Feed blocked + +**Feed Blocked** is the most critical feed metric. When a content node exceeds its disk or memory [resource limit](/en/writing/feed-block), feeding is paused for the entire cluster. HTTP clients receive `507 Insufficient Storage`. + +If feed is being blocked: + + + +Check *Health > Feed Resource Limits* for which resource is near the limit. + + +Check the Resources tab for the specific nodes causing pressure. + + +Add nodes to the content cluster (always add, don't resize — data [auto-redistributes](/en/content/elasticity)). + + + +The Health tab includes a Resource Limits Reference panel explaining the default limits, the blocking mechanism, and how to remediate. + +### Nearest Neighbor Search tab + +This tab only appears when the application uses [approximate nearest neighbor search](/en/querying/approximate-nn-hnsw) — it is automatically hidden when no NNS distance computations are detected. + +Vespa supports two NNS modes: + +- **Approximate NNS** — uses an HNSW graph index to find neighbors efficiently without scanning every document. Fast, but may miss some true nearest neighbors. +- **Exact NNS** — brute-force scan computing distance to every document. Accurate but expensive. Vespa falls back to this when the filter hit ratio is below the `approximate-threshold` (default 0.02). + +Key metrics: + +- **Exact NNS Ratio** — fraction of queries using brute-force search. Should be below 0.05 (5%). High values mean many queries fall back to exact search, significantly increasing cost. +- **Approx NNS Visit Efficiency** — ratio of graph nodes visited to distances computed. Values of 1.0–3.0 are typical; much higher suggests the HNSW index could be tuned. +- **Distances Computed / Nodes Visited** — rate metrics showing the raw NNS workload. + +Tuning parameters (set per [rank profile](/en/basics/ranking#rank-profiles)): `approximate-threshold`, `filter-first-threshold`, `target-hits-max-adjustment-factor`, `exploration-slack`. If the exact NNS ratio is high, consider increasing `approximate-threshold` or restructuring filters to be less restrictive. + +### Content Node tab + +The Content Node tab shows internals of the [Proton](/en/content/proton) search engine running on each content node. All metrics are grouped per content cluster. + +#### Documents + +- **Total** — all documents in the database (including removed) +- **Ready** — documents available for search +- **Active** — primary copies that should be searchable on this node +- **Removed** — tombstones pending garbage collection + +#### Proton resource usage + +Disk and memory usage from Proton's internal accounting. This is distinct from node-level metrics in the Resources tab — these are the values Vespa uses for [feed-blocking](/en/writing/feed-block) decisions. + +#### Executor utilization + +Proton uses several thread pools (executors): + +- **Match** — executes queries. Directly impacts query latency. +- **Shared** — handles background tasks like flush and compaction. +- **Proton** — internal coordination tasks. +- **Field writer** — writes attribute and index data during feeding. Saturation directly impacts feed throughput. + +Typical healthy values: + +- Utilization below 0.8 (80%) — sustained values above this are a bottleneck +- Field writer saturation well below 1.0 +- Queue sizes near zero during steady state + +The dashboard renders avg as a solid green line and max as a dashed yellow line, making it easy to spot whether the maximum tracks the average or has concerning spikes. + +#### Maintenance jobs + +Proton runs background [maintenance jobs](/en/content/proton#proton-maintenance-jobs) that manage data structures. The dashboard includes a reference panel (collapsed) explaining each job and its resource impact: + +| Job | Resource impact | +| --- | --- | +| Attribute Flush | Low | +| Memory Index Flush | Moderate | +| Disk Index Fusion | High — temporary 2× disk usage | +| Document Store Compaction | High — holds file in memory | +| Bucket Move | High — competes with feeding | +| LID-Space Compaction | Moderate | + +Latency spikes that correlate with active maintenance are expected but may indicate the cluster needs more headroom. + +### Resources tab + +The Resources tab is the primary tool for sizing decisions. Node-level resources (CPU, memory, disk) are grouped per cluster. Container-specific metrics (JVM, thread pools, GPU, network) are grouped per container cluster. + +#### Typical healthy values + +| Resource | Healthy | Concerning | Action needed | +| --- | --- | --- | --- | +| **CPU** | `< 70%` | `70-85%` | `> 85%` sustained | +| **CPU IOWait** | `< 5%` | `5-10%` | `> 10%` (I/O bottleneck) | +| **Memory** | `< 70%` | `70-80%` | Approaching feed-block limit | +| **Disk** | `< 70%` | `70-80% `| Approaching feed-block limit | +| **JVM GC Overhead** | `< 5%` | `5-15%` | `> 15%` (severe latency impact) | +| **Threadpool utilization** | `< 70%` | `70-90%` | Rejected tasks = requests dropped | + +Content nodes need extra headroom because [maintenance jobs](/en/content/proton#proton-maintenance-jobs) (especially disk index fusion) temporarily increase resource usage. + +#### Container thread pools + + +![Container thread pools row with per-pool avg/max panels](/assets/img/monitoring-container-thread-pools.png) + + +Which thread pools exist on a container depends on which elements are configured in `services.xml`: + +| Thread pool | Present when | +| --- | --- | +| `default-handler-common` | Always (handler executor used by anything without its own pool) | +| `search-handler` | `` element is present | +| `feedapi-handler` | `` element is present | + +To keep the dashboard free of empty panels, the Resources tab contains three threadpool rows — one per container configuration case — and each row repeats per container cluster that falls into that case: + +- **Container Thread Pools (search + document-api)** — clusters with both pools +- **Container Thread Pools (search only)** — clusters with `` but no feed API +- **Container Thread Pools (document-api only)** — feed-only clusters + +Classification is automatic: hidden variables derive the cluster list per case, so only relevant rows render for a given deployment. Each pool gets three panels — **Utilization**, **Work Queue Size**, **Work Queue Utilization** — with avg as a solid green line and max as a dashed yellow line. + +- **Utilization** — active threads as percentage of pool size +- **Work queue size** — tasks waiting for a thread. The default pool uses a synchronous queue (capacity 0), so there is no buffering — if no thread is available, the task is rejected. +- **Queue utilization** — percentage of configured queue capacity used (only meaningful for thread pools with bounded queues) + +#### JVM memory breakdown + + +![JVM memory breakdown: heap, direct, native, GC](/assets/img/monitoring-jvm-memory.png) + + +The Resources tab's JVM row separates the three layers of container memory: + +- **JVM Heap Usage** — Java objects (searchers, document processors, caches) +- **JVM Direct Memory** — NIO buffers, Netty pools +- **JVM Native Memory** — JNI allocations, including ONNX embedder working memory and — if configured — a local LLM's KV cache and compute buffers + +When overall node memory is high but heap and direct look normal, the native layer is usually the answer. This is common on container nodes running embedder or local-LLM components: model weights are memory-mapped and only partially resident, but KV cache and compute buffers are allocated upfront as native memory. + +### Health tab + +The Health tab tracks cluster stability and data consistency, grouped per content cluster. + +#### Cluster state + +Nodes are distributed across states: **up** (serving), **down** (unreachable), **initializing** (starting up), **maintenance** (temporarily out), **retired** (being removed). During normal operation: all up, zero down. See [content node states](/en/content/content-nodes). + +#### Data consistency + +- **Buckets Out of Sync** — percentage of [data buckets](/en/content/buckets) not yet replicated/consistent. Should be 0% during steady state; non-zero during scaling, restarts, or failures. +- **Merge Pending** — bucket merge operations queued. High during data redistribution. + +After scaling events, expect buckets out of sync and pending merges. These should converge back to zero. If they don't, investigate. + +#### Stability + +- **Service Restarts** — cumulative restarts per cluster. An increase indicates a process crash. +- **Core Dumps** — should always be zero. + +Both signals surface in three complementary ways: as per-cluster time series on this tab (for historical context), as at-a-glance counters in the [Health Indicators row](#health-indicators) on the Overview tab, and as *Service restart*/*Core dump* [annotations](#dashboard-annotations) drawn as vertical lines on every chart. + +#### Feed Resource Limits + +Shows memory and disk utilization vs. configured limits. When utilization exceeds the limit, [feeding is blocked](/en/writing/feed-block). The dashboard includes a Resource Limits Reference panel (collapsed) explaining the default limits (disk 80%, memory 80%), the blocking mechanism, and what to do about it. + +### Common workflows + +#### "Our query latency increased" + + + +Overview: confirm the latency increase, check if QPS also changed. + + +Query: which percentile increased most? (avg vs p95 vs p99) + + +Query → Rank Profile: is it one query type or all? + + +Resources: CPU, JVM GC overhead. + + +Content Node: match executor utilization, queue sizes growing? + + + +#### "Feed is slow / feed is blocked" + + + +Overview: check feed latency and feed operation rate. + + +Feed: which layer shows increased latency? + + +Health: is feed blocked? Check resource limits. + + +Resources: disk and memory utilization, CPU IOWait. + + +Content Node: field writer saturation, filestor queue, active maintenance jobs. + + + +#### "Should we scale up?" + + + +Resources: identify the bottleneck resource. + + +Content Node: check executor utilization (are we compute-bound?). + + +Per host view: is load evenly distributed? + + +Enable [autoscaling](/en/operations/autoscaling) or adjust resources in `services.xml`. + + +See the [benchmarking guide](/en/performance/benchmarking) for systematic capacity testing. + + + +## Latency tracking + +When monitoring latency in clusters with mixed loads, it is useful to use [rank profiles](/en/basics/ranking#rank-profiles) to separate them. As an example, an application might have user queries mixed with agentic, batch-oriented queries. Tracking the Container-level query latencies might look like: + + +![total query latency chart](/assets/img/latency-total.png) + + +Using Content node level metrics, separated by ranking profile, we see: + + +![query latency per rank profile chart](/assets/img/latency-rank-profile.png) + + +From this, we see that query latency varies with the rank profile used. Relevant metrics to export to your monitoring system include: + +- [content.proton.documentdb.matching.rank\_profile.queries](/en/reference/operations/metrics/searchnode#content_proton_documentdb_matching_rank_profile_queries) +- [content.proton.documentdb.matching.rank\_profile.docs\_matched](/en/reference/operations/metrics/searchnode#content_proton_documentdb_matching_rank_profile_docs_matched) +- [content.proton.documentdb.matching.rank\_profile.query\_latency](/en/reference/operations/metrics/searchnode#content_proton_documentdb_matching_rank_profile_query_latency) +- [content.proton.documentdb.matching.rank\_profile.rerank\_time](/en/reference/operations/metrics/searchnode#content_proton_documentdb_matching_rank_profile_rerank_time) + +In short, when debugging latency, look for changes, per rank profile: + +- Did the query rate increase? +- Did number of matched or ranked documents change? + +The above metrics is a subset or the available metrics. It is a good idea to set a [query profile](/en/reference/querying/query-profiles) per class of query, and in each query profile, select a distinct rank profile. With this, you can change the rank profile for a given query class by configuration only (no need to change the clients) - a good example is having a lightweight rank profile to use in overload situations. This makes it easier to track the individual query classes, per rank profile. + +## Prometheus metrics API + +Prometheus metrics are found at `$ENDPOINT/prometheus/v1/values`: + +```bash +$ curl -s --cert data-plane-public-cert.pem --key data-plane-private-key.pem \ + 'https://b6718765.b68a1234.z.vespa-app.cloud/prometheus/v1/values' +``` + +The metrics can be fed into e.g. your Grafana Cloud or self-hosted Grafana instance. See the [Vespa metrics documentation](/en/reference/operations/metrics/vespa-metric-set) for more information. + +## Using Grafana + +This section explains how to set up Grafana to consume Vespa metrics using the Prometheus API. + +### 1. Prometheus configuration + +Prometheus is configured using `prometheus.yml`, find sample config in [prometheus](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/monitoring/album-recommendation-monitoring/prometheus). See `prometheus-cloud.yml`, which is designed to be easy to set up with any Vespa Cloud instance. Replace `` and `` with the endpoint for the application and the service name, respectively. In addition, the path to the private key and public cert that is used for the data plane to the endpoint need to be provided - refer to [security](/en/security/guide). Then, configure the Prometheus instance to use this configuration file. The Prometheus instance will now start retrieving the metrics from Vespa Cloud. If the Prometheus instance is used for multiple services, append the target configuration for Vespa to scrape\_configs. + +### 2. Grafana configuration + +Use the [provisioning folder](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/monitoring/album-recommendation-monitoring/grafana/provisioning) as a baseline for further configuration. + +In the provisioning folder there are a few different files that all help for configuring Grafana locally. These work as good examples of default configurations, but the most important is the file named `Vespa-Engine-Advanced-Metrics-External.json`. This is a default dashboard, based upon the metrics the Vespa team use to monitor performance. + +Click the + button on the side and go to import. Upload the file to the Grafana instance. This should automatically load in the dashboard for usage. For now, it will not display any data as no data sources are configured yet. + +### 3. Grafana Data Source + +The Prometheus data source has to be added to the Grafana instance for the visualisation. Click the cog on the left and then "Data Sources". Click "Add data source" and choose Prometheus from the list. Add the URL for the Prometheus instance with appropriate bindings for connecting. The configuration for the bindings will depend on how the Prometheus instance is hosted. Once the configuration details have been entered, click Save & Test at the bottom and ensure that Grafana says "Data source is working". + +To verify the data flow, navigate back to the Vespa Metrics dashboard by clicking the dashboard symbol on the left (4 blocks) and clicking manage and then click Vespa Metrics. Data should now appear in the Grafana dashboard. If no data shows up, edit one of the data sets and ensure that it has the right data source selected. The name of the data source the dashboard is expecting might be different from what your data source is named. If there is still no data appearing, it either means that the Vespa instance is not being used or that some part of the configuration is wrong. + +## Using AWS Cloudwatch + +To pull metrics from your Vespa application into AWS Cloudwatch, refer to the [metrics-emitter](https://github.com/vespa-engine/metrics-emitter/tree/master/cloudwatch) documentation for how to set up an AWS Lambda. + +## Alerting + +The [Vespa Grafana Terraform template](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/monitoring/vespa-grafana-terraform) provides a set of dashboards and alerts. If you are using a different monitoring service and want to set up an equivalent alert set, you can follow this table: + +| Metric name | Threshold | Dimension aggregation | +| --- | --- | --- | +| [content_proton_resource_usage_disk_average](/en/reference/operations/metrics/searchnode#content_proton_resource_usage_disk) | `>` 0.9 | max by(applicationId, clusterId, zone) | +| [content_proton_resource_usage_memory_average](/en/reference/operations/metrics/searchnode#content_proton_resource_usage_memory) | `>` 0.8 | max by(applicationId, zone, clusterId) | +| cpu_util | `>` 90 | max by(applicationId, zone, clusterId) | +| [content_proton_resource_usage_feeding_blocked_last](/en/reference/operations/metrics/searchnode#content_proton_resource_usage_feeding_blocked) | `>=` 1 | N/A | + +All metrics are from the [default metric set](/en/reference/operations/metrics/default-metric-set#metric-sets). Metrics are using the naming scheme from the [Prometheus metrics](/en/reference/api/prometheus-v1#prometheus-v1-values) API. Dimension aggregation is optional, but reduces alerting noise - e.g. in the case where an entire cluster goes bad. It is recommended to filter all alerts on zones in the [prod environment](/en/operations/environments#prod). + +## Prometheus Metrics Sample + +Below is a sample request with sample response for prometheus metrics for a minimal application on Vespa Cloud: + + +```bash +$ curl -s --cert data-plane-public-cert.pem --key data-plane-private-key.pem \ + 'https://b6718765.b68a1234.z.vespa-app.cloud/prometheus/v1/values' + +... +jdisc_thread_pool_work_queue_size_min{threadpool="default-pool",zone="dev.aws-us-east-1c",applicationId="mytenant.myapp.default",serviceId="logserver-container",clusterId="admin/logserver",hostname="h97490a.dev.us-east-1c.aws.vespa-cloud.net",vespa_service="vespa_logserver_container",} 0.0 1733139324000 +jdisc_thread_pool_work_queue_size_min{threadpool="default-handler-common",zone="dev.aws-us-east-1c",applicationId="mytenant.myapp.default",serviceId="logserver-container",clusterId="admin/logserver",hostname="h97490a.dev.us-east-1c.aws.vespa-cloud.net",vespa_service="vespa_logserver_container",} 0.0 1733139324000 +# HELP content_proton_documentdb_matching_rank_profile_rerank_time_average +# TYPE content_proton_documentdb_matching_rank_profile_rerank_time_average untyped +content_proton_documentdb_matching_rank_profile_rerank_time_average{rankProfile="rank_albums",documenttype="music",zone="dev.aws-us-east-1c",applicationId="mytenant.myapp.default",serviceId="searchnode",clusterId="content/music",hostname="h104562a.dev.us-east-1c.aws.vespa-cloud.net",vespa_service="vespa_searchnode",} 0.0 1733139324000 +content_proton_documentdb_matching_rank_profile_rerank_time_average{rankProfile="unranked",documenttype="music",zone="dev.aws-us-east-1c",applicationId="mytenant.myapp.default",serviceId="searchnode",clusterId="content/music",hostname="h104562a.dev.us-east-1c.aws.vespa-cloud.net",vespa_service="vespa_searchnode",} 0.0 1733139324000 +content_proton_documentdb_matching_rank_profile_rerank_time_average{rankProfile="default",documenttype="music",zone="dev.aws-us-east-1c",applicationId="mytenant.myapp.default",serviceId="searchnode",clusterId="content/music",hostname="h104562a.dev.us-east-1c.aws.vespa-cloud.net",vespa_service="vespa_searchnode",} 0.0 1733139324000 +... +``` + +Relevant labels include: + +- `chain` This is the name on the search chain in the container that is used for a set of query requests. This is typically used to get separate metrics, such as latency and the number of queries for each chain over time. +- `documenttype` This is the name of the document type for which a set of queries are run in the content clusters. This is typically used to get separate content layer metrics, such as latency and the number of queries for each chain over time. +- `groupId` This is the id of the cluster group for which the metric measurement is done. This is typically used to get separate metrics aggregates per group in a content cluster. The label is most relevant for metrics from the content clusters running multiple content groups, see [Content Cluster Elasticity](/en/content/elasticity). The value is in the format group 0, group 1, group 2, etc. +- `rankProfile` This label is present for a subset of metrics from the content clusters, with names starting with `content_proton_documentdb_matching_rank_profile_`. The label is typically used in cases where you use multiple rank profiles and want to analyse performance differences between the different rank profiles, or to better understand certain types of performance issues and need to narrow down the candidate set. +- `source` This is a label applied on container metrics for classifying query failures by the content cluster where the failure was triggered. + +How you will use labels to separate different kinds of queries depends on the observability backend you use, but you will typically compute weighted averages for query latency and query volume, and split graphs by the relevant labels to better understand system performance and bottlenecks. + +For the container level metrics you use the `chain` label to differentiate between different query streams, while you use the `rankProfile` label to do the same in the content level. \ No newline at end of file diff --git a/mintlify-docs/en/operations/notifications.mdx b/mintlify-docs/en/operations/notifications.mdx new file mode 100644 index 0000000000..6c71a9000d --- /dev/null +++ b/mintlify-docs/en/operations/notifications.mdx @@ -0,0 +1,36 @@ +--- +title: "Notifications" +--- + +Vespa Cloud supports two different categories of notifications. Notifications can be sent by email if this has been configured in the Console. + +- **Tenant notifications** are administrative notifications about the tenant. Information about users, plan, etc. are sent to all contacts configured to get tenant notifications. +- **Application notifications** are notifications about your running Vespa applications. If there are resource constraint issues, deployment errors, configuration errors or other issues with a Vespa application, they will be sent to all contacts configured to get application notifications. + +## Configuring Notifications + +Notifications are configured in the Console under [**Account > Notifications**](https://console.vespa-cloud.com/link/tenant/account/notifications). You can add contacts here that will start receiving emails for the categories enabled for that contact. + + +![Console Notifications](/assets/img/console-notifications.png) + + +To add a new address to get notifications: + + + +Click **+Add new contact**. + + +Enter the email address to receive notifications to. + + +Choose the types of notifications to receive. + + +Click **Save** + + +Go to your email inbox and click the verification link you have received there. + + \ No newline at end of file diff --git a/mintlify-docs/en/operations/private-endpoints.mdx b/mintlify-docs/en/operations/private-endpoints.mdx new file mode 100644 index 0000000000..f80add455f --- /dev/null +++ b/mintlify-docs/en/operations/private-endpoints.mdx @@ -0,0 +1,263 @@ +--- +title: "Private endpoints" +--- + +Vespa Cloud lets you set up private endpoint services on your application clusters, for exclusive access from your own, co-located VPCs with the same cloud provider. This is supported for AWS deployments through AWS's [PrivateLink](https://docs.aws.amazon.com/vpc/latest/privatelink/what-is-privatelink.html), and for GCP deployments through GCP's [Private Service Connect](https://cloud.google.com/vpc/docs/private-service-connect). This guide takes you through the necessary configuration steps for either [AWS PrivateLink](#aws-private-link) or for [GCP Private Service Connect](#gcp-private-service-connect). + +Private endpoints are only supported in zones in the [prod environment](/en/operations/environments#prod). + + +**Note:** + +Private endpoints use mTLS authentication by default, and token-based authentication must be explicitly enabled. See [configuring private endpoint authentication method](#authentication-methods). + + +## AWS PrivateLinkRequired information: + +| Item | Description | +| --- | --- | +| **Your IAM account number** | The numeric identifier for your AWS account. | +| **VPC ID** | The identifier of your AWS VPC where you wish to connect to the service endpoints from. | +| **AWS region name** | The name of the AWS region to connect from. Note that you can only connect to a service in the same region, or, if public endpoints are disabled, in the same AWS availability zone. | + +Procedure: + + + +Add `` to [deployment.xml](/en/reference/applications/deployment#endpoint-private), allowing access to the container cluster using the designated ARN from your account. The example allows all roles and users under the `123123123123` account to connect to the endpoint service on the `my-container` cluster, in each region listed under the `` tag. + +See [endpoint service configuration](https://docs.aws.amazon.com/vpc/latest/privatelink/configure-endpoint-service.html) for details on valid ARNs, and more fine-grained access control. + +The example also shows how to disable the public zone endpoint by adding the [`"zone"` type endpoint](/en/reference/applications/deployment#endpoint-zone) declaration—this is an optional step, and not required to set up the private service: + ```xml + + + region-1 + region-2 + + + + + + + + + ``` + +Build and deploy the application package, and wait for it to deploy to the indicated regions. + +**Important:** + +In the above example, the public endpoint is set to disabled, and the private endpoint is added. Make this change in the *same* deployment. If you disable the public endpoint *after* the private endpoint is live, the private endpoint will be disabled and recreated as part of recreating the load balancer. If this happens, run through the procedure again for correct load balancer creation. + + + +Navigate to the endpoints tab for your application in the Console, and find the service ID for the deployment to which you wish to connect. While there, verify that access to connect to the endpoint was granted to the correct ARNs. + + +![Service ID for VPC endpoint](/assets/img/vpc-1.png) + + + +[Create a VPC endpoint](https://docs.aws.amazon.com/cli/latest/reference/ec2/create-vpc-endpoint.html) in your VPC. This is your entry point, which forwards connections to your Vespa application through the private network of AWS. For this example, assume your VPC has id `vpc-123` and resides in the AWS region `us-east-1`, and that the service ID of your endpoint service, found in the Console, is `com.amazonaws.vpce.us-east-1.vpce-svc-321`: + +```bash +$ aws ec2 create-vpc-endpoint \ + --region us-east-1 \ + --vpc-id vpc-123 \ + --service-name com.amazonaws.vpce.us-east-1.vpce-svc-321 \ + --vpc-endpoint-type Interface \ + --private-dns-enabled | jq . +``` + +Note the value of the `VpcEndpointId` field, for verification in the below item. This is also where you specify optional security group and subnet IDs; these are omitted here for brevity. If creating the VPC endpoint through the AWS console instead, be sure to check "Enable DNS names"! + + +Navigate back to the endpoints tab in the Console, and refresh the page. You should now see a new entry representing the connection between your newly created interface endpoint and the endpoint service on your container cluster. This is the "CONNECTED ENDPOINTS" in the image above. Verify the ID matches the value of the `VpcEndpointId` field above. The connection is ready when the state is `open`. + + +The zone endpoint of the designated container cluster should now resolve through private DNS, for any AWS resource that is allowed to connect to your VPC endpoint. The easiest way to verify this is to run the following Python 3.9 lambda, using your own zone endpoint, from within your VPC: + +```bash +from socket import gethostbyname +from urllib.request import urlopen + +def lambda_handler(event, context): + return { + 'statusCode': 200, + 'body': urlopen('https://badc0ffee.deadbeef.z.vespa-app.cloud/status.html').read(), + 'ip': gethostbyname('badc0ffee.deadbeef.z.vespa-app.cloud') + } +``` + +Alternatively, run a couple of commands from a host inside the VPC: + +```bash +$ host my-container.my-app.my-tenant.region-1.z.vespa-app.cloud +$ curl https://my-container.my-app.my-tenant.region-1.z.vespa-app.cloud/status.html +``` + +In both cases, the IP should be in one of the private IP ranges, and the HTTP response from the Vespa container endpoint should be `OK`. + + + + +**Note:** + +Enclave users may set up high-availability PrivateLink endpoints connected across multiple AZs. Contact [Vespa support](https://vespa.ai/support/) for guidance. + + +## GCP Private Service ConnectPrerequisites: + +| Item | Description | +| --- | --- | +| **Enabled GCP APIs** | The *Compute Engine*, *Service Directory* and *Cloud DNS* APIs must all be enabled in your GCP account:

`$ gcloud services enable compute.googleapis.com`
`$ gcloud services enable dns.googleapis.com`
`$ gcloud services enable servicedirectory.googleapis.com` | +| **Your GCP project name** | The string identifier for your GCP account, like *resonant-diode-123456* | +| **VPC network and subnetwork names** | The name of the network and subnetwork to create your consumer endpoint in. | + +Procedure: + + + + +Add `` to [deployment.xml](/en/reference/applications/deployment#endpoint-private), allowing access to the container cluster from the GCP account with the designated project ID. The example below allows consumer endpoints created under the `private-test` account to connect to the endpoint service on the `my-container` cluster, in each region listed under the `` tag. + +The example also shows how to disable the public zone endpoint by adding the [`"zone"` type endpoint](/en/reference/applications/deployment#endpoint-zone) declaration—this is an optional step, and not required to set up the private service + ```xml + + + region-1 + region-2 + + + + + + + + + ``` + Build and deploy the application package, and wait for it to deploy to the indicated regions. + + +Navigate to the endpoints tab for your application in the Console, and find the service ID for the deployment to which you wish to connect. While there, verify that access to connect to the endpoint was granted to the correct projects. + + +![Service ID for VPC endpoint](/assets/img/vpc-2.png) + + + + + +[Create a service consumer endpoint](https://cloud.google.com/vpc/docs/configure-private-service-connect-services) in your VPC. This is your entry point, which forwards connections to your Vespa application through the private GCP network. In this example, the project is named `test-project`, has a VPC network named `test-network` that resides in the GCP region `us-central1`, with a subnet `test-subnet` to hold the endpoint, behind an address to be named `test-address`, and the service ID of the endpoint service (found in the Console) is `projects/vespa-external/regions/us-central1/serviceAttachments/scsa-xxxxxx`. Finally, the endpoint is named `badc0ffee`, and the service directory namespace is `my-tenant-my-app`. See the discussion on generated endpoint names in the last item in this guide. + +Create network (if it does not already exist): + +```bash +$ gcloud compute networks create test-network +``` + +Create subnet (if it does not already exist): + +```bash +$ gcloud compute networks subnets create test-subnet \ + --region=us-central1 \ + --network=test-network \ + --range=10.10.0.0/24 +``` + +Create the IP address which will be used for the endpoint, for clients inside your VPC: + +```bash +$ gcloud compute addresses create test-address \ + --region=us-central1 \ + --subnet=test-subnet +``` + +Create a forwarding rule for traffic to the above IP, to the service endpoint in Vespa Cloud: + +```bash +$ gcloud compute forwarding-rules create badc0ffee \ + --region=us-central1 \ + --network=test-network \ + --address=test-address \ + --target-service-attachment=projects/vespa-external/regions/us-central1/serviceAttachments/scsa-xxxxxx \ + --service-directory-registration=projects/test-project/locations/us-central1/namespaces/my-tenant-my-app +``` + +Note the ID of the created resource, for the verification step below. + + + +Navigate back to the endpoints tab in the Console, and refresh the page. You should now see a new entry representing the connection between your newly created interface endpoint, and the endpoint service on your container cluster. This is the "CONNECTED ENDPOINTS" in the image above. Verify the ID matches the resource ID of the forwarding rule created above. The connection is ready when the state is `open`. + + + +The generated endpoint name (see last items) of the designated container cluster should now resolve through private DNS inside your VPC. The easiest way to verify this is to launch an instance in your VPC, inside the designated subnet, and run a couple of commands from it: + +```bash +$ host badc0ffee.deadbeef.z.vespa-app.cloud +$ curl https://badc0ffee.deadbeef.z.vespa-app.cloud/status.html +``` + +The resolved IP address should be that of the address created earlier, and the `curl` command should simply output `OK`. + +If the endpoint fails to resolve, refer to [GCP's troubleshooting documentation](https://cloud.google.com/vpc/docs/configure-private-service-connect-services#troubleshooting). + + +When a consumer endpoint is created with a *Service Directory* namespace, GCP automatically creates a private DNS record for that endpoint, which must be used instead of the IP address (created above) of the endpoint, as Vespa application containers have web certificates matching specific domain names. Unfortunately, we are unable to set the final endpoint names for the consumer endpoint. For a private endpoint service, we can only set a domain name *suffix*, and GCP then generates private DNS records matching *your endpoint resource name prepended to this suffix*. The Service Directory namespace of these endpoints *must also be one-to-one* with their domain name suffixes, lest the automatic setup fail. + +The domain name suffix used by Vespa Cloud is `[.].z.vespa-app.cloud`. We therefore encourage using the `-` pair as the service directory namespace, as this ensures a one-to-one mapping between suffixes and namespaces, as required by GCP (see above). + +The Vespa Cloud web certificates (see above) match any direct descendant of the domain suffix we set for your services. Thus, any endpoint resource name yields a private DNS record that matches the web certificate, with Service Directory. Moreover, the zone endpoints generated by Vespa Cloud consist of a random, unique cluster-instance-region ID. Using this same ID as the GCP endpoint resource name (as in the example) results in identical domain names for the private DNS set up by GCP, and the endpoint names generated by Vespa Cloud, visible in our console. + + + +## Configuring Private Endpoint Authentication + +You can configure private endpoints to use either mTLS or token-based authentication with the optional `auth-method` attribute. If the attribute is not set, mTLS will be used by default. The attribute is only allowed with `private` type endpoints and must be either `mtls` or `token`. + + +**Note:** + +Only one authentication method can be enabled at the same time. Enabling token authentication will disable mTLS authentication for the private endpoint, and vice versa. + + +#### Example with token-based authentication + +```xml + + + region-1 + region-2 + + + + + + + +``` + +#### Changing authentication method for an existing deployment + +If you have an existing deployment with a private endpoint, you must remove any connections and redeploy with a [validation override](/en/reference/applications/validation-overrides) to modify the authentication method: + + + +Remove the VPC interface endpoint (AWS) or service consumer endpoint (GCP) configured above + + +Change the authentication method for the endpoint in `deployment.xml` + + +Deploy with the `zone-endpoint-change` validation override: + ```xml + + + zone-endpoint-change + + + ``` + + \ No newline at end of file diff --git a/mintlify-docs/en/operations/production-deployment.mdx b/mintlify-docs/en/operations/production-deployment.mdx new file mode 100644 index 0000000000..c8eefc12f3 --- /dev/null +++ b/mintlify-docs/en/operations/production-deployment.mdx @@ -0,0 +1,243 @@ +--- +title: "Production Deployment" +--- + +Production zones enable serving from various locations, with a [CI/CD pipeline](/en/operations/automated-deployments) for safe deployments. This guide goes through the minimal steps for a production deployment - in short: + +- Configure a production zone in [deployment.xml](/en/reference/applications/deployment). +- Configure resources for clusters in [services.xml](/en/reference/applications/services/services). +- Name the tenant, application, log in. +- Create or have access to the data-plane cert/key pair. +- Deploy the application to Vespa Cloud. + +The sample application used in [getting started](/en/basics/deploy-an-application) is a good basis for these steps, see [source files](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation). + +Read [migrating to Vespa Cloud](/en/learn/migrating-to-cloud) first, as a primer on deployment and endpoint usage. + +There are alternative ways of deploying at the end of this guide, too. + +## deployment.xml + +Add a `` element to *deployment.xml*: + +```xml + + + aws-us-east-1c + + +``` + +If *deployment.xml* does not exist, add it to the application package root (next to *services.xml*). + + +**Note:** + +If the application uses [private endpoints](/en/operations/private-endpoints), add this configuration here, too, and run the setup steps in the guide. + + +## services.xml + +Modify *services.xml* - minimal example: + +```xml expandable + + + + + + + + + + + + 2 + + + + + + + + + +``` + +For production deployments, at least 2 nodes are required for each cluster to ensure availability during maintenance tasks and upgrades. In some cases one might still want to use just 1 node per cluster, even though redundancy will be lost. This can be done by adding [a validation override](/en/reference/applications/validation-overrides) `minimum-node-count` (and additional validation override `redundancy-one` in case of a content cluster). The `nodes` section is also where you specify your required [resources](/en/reference/applications/services/services#resources): + +```xml + + + +``` + +Also note the minimum redundancy requirement of 2: + +```xml +2 +``` + +## Minimum resources + +To help ensure a reliable service, there is a minimum resource requirement for nodes in the production environment. The minimum is currently 0.5 VCPU, 8Gb of memory, and for disk, 2 x memory for stateless nodes, or 3 x memory for content nodes. As the disk resource is normally the least expensive, we recommend it should be allocated generously to ensure it does not limit the use of more expensive cpu and memory resources. + +## Application name + +Give the deployment a name and log in: + +```bash +vespa config set target cloud +vespa config set application mytenant.myapp +vespa auth login +``` + +The tenant name is found in the console, the application is something unique within your organization - see [tenants, applications and instances](/en/learn/tenant-apps-instances). + +## Add public certificate + +Just as in the [getting started](/en/basics/deploy-an-application) guide, the application package needs the public key in the *security* directory. You might already have a pair, if not generate it: + +```bash +$ vespa auth cert -f +Success: Certificate written to security/clients.pem +Success: Certificate written to /Users/me/.vespa/mytenant.myapp.default/data-plane-public-cert.pem +Success: Private key written to /Users/me/.vespa/mytenant.myapp.default/data-plane-private-key.pem +``` + +Observe that the files are put in *$HOME/.vespa*. The content from *data-plane-public-cert.pem* is copied to *security/clients.pem*. More details on [data-plane access control permissions](/en/security/guide#permissions). + +## Deploy the application + +Package the application and deploy it to a production zone: + +```bash +vespa prod deploy +``` + +Find alternative deployment procedures in the next sections. + + +**Note:** + +The `vespa prod deploy` command to prod zones, which uses [deployment.xml](/en/reference/applications/deployment) differs from the `vespa deploy` command used for dev zones - see [environments](/en/operations/environments). + + +## Endpoints + +Find the 'zone' endpoint to use under Endpoints in the [console](https://console.vespa-cloud.com/). There is an mTLS endpoint for each zone by default. See [configuring mTLS](/en/security/guide#configuring-mtls) for how to use mTLS certificates. + +You can also add [access tokens](/en/security/guide#configuring-tokens) in the console as an alternative to mTLS, and specify [global](/en/reference/applications/deployment#endpoints-global) and [private](/en/reference/applications/deployment#endpoint-private) endpoints in *deployment.xml*. + +Write data efficiently using the [document/v1 API](/en/reference/api/document-v1) using HTTP/2, or with the [Vespa CLI](/en/clients/vespa-cli). There is also a [Java library](/en/clients/vespa-feed-client#java-library). + +To feed data from a self-hosted Vespa into a new cloud instances, see the [appendix](#feeding-data-from-an-existing-vespa-instance) or [cloning applications and data](/en/operations/cloning). + +Also see the [http best practices documentation](/en/clients/http-best-practices). + +## Automate deployments + +Use [deploy-vector-search.yaml](https://github.com/vespa-cloud/vector-search/blob/main/.github/workflows/deploy-vector-search.yaml) as a starting point, and see [Automating with GitHub Actions](/en/operations/automated-deployments#automating-with-github-actions) for more information. + +## Production deployment using console + +Instead of using the [Vespa CLI](/en/clients/vespa-cli), one can build an application package for production deployment using zip only: + +- Create [deployment.xml](#deployment-xml) and modify [services.xml](#services-xml) as above. +- Skip the [Application name](#application-name) step. +- Add a public certificate to *security/clients.pem*. See [creating a self-signed certificate](/en/basics/deploy-an-application-shell#create-a-self-signed-certificate) for how to create the key/cert pair, then copy the cert file to *security/clients.pem*. At this point, the files are ready for deployment. +- Create a deployable zip-file: + + ```bash + zip -r application.zip . \ + -x application.zip "ext/*" README.md .gitignore ".idea/*" + ``` +- Click *Create Application* in the [console](https://console.vespa-cloud.com/). Select the *PROD* tab. Enter a name for the application and drop the *application.zip* file in the upload section. +- Click *Create and deploy* to deploy the application to the production environment. + +## Production deployment with components + +Deploying an application with [Components](/en/applications/components) is a little different from above: + +- The application package root is at *src/main/application*. +- Find the Vespa API version to compile the component. +- The application package is built into a zip artifact, before deploying it. + +See [Getting started java](/en/basics/deploy-an-application-java) for prerequisites. Procedure: + + + +Use the [album-recommendation-java](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation-java) sample application as a starting point. + + +Make the same changes to *src/main/application/deployment.xml* and *src/main/application/services.xml*. + + +Run the same steps for [Application name](#application-name) and [Add public certificate](#add-public-certificate). + + +Find the lowest Vespa version of the current deployments (if any) - [details](/en/operations/automated-deployments#deploying-components): + +```bash +mvn vespa:compileVersion \ + -Dtenant=mytenant \ + -Dapplication=myapp +``` + + +Build *target/application.zip*: + +```bash +mvn -U package -Dvespa.compile.version="$(cat target/vespa.compile.version)" +``` + + +Run the [Deploy the application](#deploy-the-application) step. Here, the Vespa CLI command will deploy *target/application.zip* built in the step above. + + + +## Next steps + +- Vespa Cloud takes responsibility for rolling out application changes to all production zones as well as testing the changes first. You will usually want to set up a job which automatically builds your application package when changes to it are checked in, to get continuous deployment of your application. Read [automated deployments](/en/operations/automated-deployments) for automation, adding CD tests and multi-zone deployments. +- Once you have experience with load patterns, consider [autoscaling](/en/operations/autoscaling). +- Set up [monitoring](/en/operations/monitoring). + +## Feeding data from an existing Vespa instance + +To dump data from an existing Vespa instance, you can use this command with Vespa CLI: + +```bash +slices=10 +for slice in $(seq 0 $((slices-1))); do + vespa visit \ + --slices $slices --slice-id $slice \ + --target [existing Vespa instance endpoint] \ + | gzip > dump.$slice.gz & +done +``` + +This dumps all the content to files, but you can also pipe the content directly into 'vespa feed'. + +To feed the data: + +```bash +slices=10 +for slice in $(seq 0 $((slices-1))); do + zcat dump.$slice.gz | \ + vespa feed \ + --application .. \ + --target [zone endpoint from the Vespa Console] - +done +``` + +Note that the different slices in these commands can be done in parallel on different machines. + +## Accessing a public cloud application from another VPC on another account + +A common challenge when deploying on the public cloud, is network connectivity between workloads running in different accounts and VPCs. Within in a team, this is often resolved by setting up VPC peering between VPCs, but this has its challenges when coordinating between many different teams and dynamic workloads. Vespa does not support direct VPC peering. + +There are three recommended options: + +1. **Use your public endpoints, but IPv6 if you can:** The default. There are many advantages to a Zero-Trust approach and accessing your application through the public endpoint. If you use IPv6, you will also avoid some of the network costs associated with IPv4 NATs, etc. For some applications, this option could be cost prohibitive, but one should not assume this is the case for all applications with a moderate amount of data being transferred over the endpoint. +2. **Use private endpoints via AWS PrivateLink or GCP Private Service Connect:** Vespa allows you to set up private endpoints for exclusive access from your own, co-located VPCs. This requires less administrative overhead than general VPC peering and is also more secure. Refer to [private endpoints](/en/operations/private-endpoints). +3. **Run Vespa workloads in your own account/project (Enclave):** The Vespa Cloud Enclave feature allows you to have all your Vespa workloads run in your own account. In this case, you can set up any required peering to open the connection into your application. While generally available, using Vespa Cloud Enclave requires significantly more effort from the application team in terms of operating the service, and is only recommended for larger applications that can justify the additional work from e.g., a security or interoperability perspective. Refer to [Vespa Cloud Enclave](/en/operations/enclave/enclave). \ No newline at end of file diff --git a/mintlify-docs/en/operations/reindexing.mdx b/mintlify-docs/en/operations/reindexing.mdx new file mode 100644 index 0000000000..537bb18079 --- /dev/null +++ b/mintlify-docs/en/operations/reindexing.mdx @@ -0,0 +1,50 @@ +--- +title: "Reindexing" +--- + +When the indexing pipeline of a Vespa application changes, Vespa may automatically refeed stored data such that the index is updated according to the new specification. Changes in the indexing pipeline may be due to changes in external libraries, e.g. for linguistics, or due to changes in the configuration done by the user, such as the [indexing script](/en/reference/writing/indexing-language) in a document's schema, or the [indexing mode](/en/reference/applications/services/content#document.mode) of a document type in a content cluster. Reindexing can be done for an application's full corpus, for only certain content clusters, or for only certain document types in certain clusters, using the [reindex endpoint](/en/reference/api/deploy-v2#reindex), and inspected at the [reindexing endpoint](/en/reference/api/deploy-v2#reindexing), details are described below. + +## Start reindexing + +When a change in the indexing pipeline of an application is deployed, this is discovered by the config server (see the [prepare endpoint](/en/reference/api/deploy-v2#prepare-session) for details). If the change is to be deployed, a [validation override](/en/reference/applications/validation-overrides) might have to be added to the application package (e.g. if changing match settings for a field). Deployment output will then list the *reindex actions* required to make the index reflect the new indexing pipeline. Use the [reindex endpoint](/en/reference/api/deploy-v2#reindex) to mark reindexing as ready for affected document types, **but only after the new indexing pipeline is successfully deployed**, i.e. when the application has converged on the config generation that introduced the change. Reindexing then commences with the next deployment of the application. Summary of steps needed to enable and start reindexing: + + + +Change indexing pipeline in application package, adding validation overrides if needed + + +Wait until config has converged on new config generation + + +Mark reindexing change as ready by POSTing to reindex endpoint + + +Start reindexing job by deploying application package one more time + + + +## Reindexing progress + +Reindexing is done by a component in each content cluster that [visits](/en/writing/visiting) all documents of the indicated types, and re-feeds these through the [indexing chain](/en/writing/indexing) of the cluster. (Note that only the [document fields](/en/reference/schemas/schemas#document) are re-fed — all derived fields, produced by the indexing pipeline, are recomputed.) The reindexing process avoids write races with concurrent feed by locking [small subsets](/en/content/buckets) of the corpus when reindexing them; this may cause elevated write latencies for a fraction of concurrent write operations, but does not impact general throughput. Moreover, since reindexing can be both lengthy and resource consuming, depending on the corpus, the process is tuned to yield resources to other tasks, such as external feed and serving, and is generally safe to run in the background. + +Reindexing is done for one document type at a time, in parallel across content clusters. Detailed progress can be found at the [reindexing endpoint](/en/reference/api/deploy-v2#reindexing). If state is *failed*, reindexing attempts to resume from the position where it failed after a grace period of some minutes. State *pending* indicates reindexing will start, or resume, when the cluster is ready, while *running* means it's currently progressing. Finally, *successful* means all documents of that type were successfully reindexed. Additionally, if the *speed* of a reindexing is `0.0`—set by users—that reindexing is halted until the speed is either set to a positive value again, or it is replaced by a new reindexing of that document type. + +## Procedure + +Refer to [schema changes](/en/reference/schemas/schemas#modifying-schemas) for a procedure / way to test the reindexing feature, and tools to validate the data. + +## Use cases + +Below are sample changes to the schema for different use cases, or examples of operational steps for data manipulation. + +| Use case | Description | +| --- | --- | +| **clear field** | To clear a field, do a partial update of all documents with the value, say an empty string.

It is also possible to use reindexing, but there is a twist - intuitively, this would work:

`field artist type string {`
`indexing: "" \| summary \| index`
`}`

However, the reset only works for [synthetic fields](/en/reference/schemas/schemas#schema).

A solution is to deploy a [document processor](/en/applications/document-processors) that empties the field, to the default indexing chain - then trigger a reprocessing. | +| **change indexing settings** | As reindexing takes time, a field's data can be in one state or another, while the queries to it have the most current state. This is OK for many changes and applications.

If not, it is possible to reindex to a new field for a more atomic change. Add a *synthetic field* outside the *document definition* and pipe the content of the current field to it:

`search mydocs {`
`field title_non_stemmed type string {`
`indexing: input title \| index \| summary`
`stemming: none`
`}`
`document mydocs {`
`field title type string`
`{`
`indexing: index \| summary`
`}`

Once reindexing is completed, switch queries to use the new field. This solution naturally increases memory and disk requirements in the transition.

Going back to using the original field with the new settings can be done by changing the index settings for the original field, wait for reindexing to be finished and start using the original field again in queries, then remove the temporary synthetic field. | + +Relevant pointers: + + + + + \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/admin-procedures.mdx b/mintlify-docs/en/operations/self-managed/admin-procedures.mdx new file mode 100644 index 0000000000..d41c9ec158 --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/admin-procedures.mdx @@ -0,0 +1,249 @@ +--- +title: "Administrative Procedures" +sidebarTitle: "Admin procedures" +--- + +## Install + +Refer to the [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) sample application for a primer on how to set up a cluster - use this as a starting point. Try the [Multinode testing and observability](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode) sample app to get familiar with interfaces and behavior. + +## Vespa start / stop / restart + +Start and stop all services on a node: + +`$ $VESPA_HOME/bin/`[`vespa-start-services`](/en/reference/operations/self-managed/tools#vespa-start-services) +`$ $VESPA_HOME/bin/`[`vespa-stop-services`](/en/reference/operations/self-managed/tools#vespa-stop-services) + +Likewise, for the config server: + +`$ $VESPA_HOME/bin/`[`vespa-start-configserver`](/en/reference/operations/self-managed/tools#vespa-start-configserver) +`$ $VESPA_HOME/bin/`[`vespa-stop-configserver`](/en/reference/operations/self-managed/tools#vespa-stop-configserver) + +There is no *restart* command, do a *stop* then *start* for a restart. Learn more about which processes / services are started at [Vespa startup](/en/operations/self-managed/config-sentinel#start-sequence), read the [start sequence](/en/operations/self-managed/configuration-server#start-sequence) and find training videos in the vespaengine [YouTube channel](https://www.youtube.com/@vespaai). + +Use [vespa-sentinel-cmd](/en/reference/operations/self-managed/tools#vespa-sentinel-cmd) to stop/start individual services. + + +**Important:** + +Running *vespa-stop-services* on a content node will call [prepareRestart](/en/reference/operations/self-managed/tools#vespa-proton-cmd) to optimize restart time, and is the recommended way to stop Vespa on a node. + + +See [multinode](/en/operations/self-managed/multinode-systems#aws-ec2) for *systemd* /*systemctl* examples. [Docker containers](/en/operations/self-managed/docker-containers) has relevant start/stop information, too. + +### Content node maintenance mode + +When stopping a content node *temporarily* (e.g. for a software upgrade), consider manually setting the node into [maintenance mode](/en/reference/api/cluster-v2#maintenance) *before* stopping the node to prevent automatic redistribution of data while the node is down. Maintenance mode must be manually removed once the node has come back online. See also: [cluster state](#cluster-state). + +Example of setting a node with [distribution key](/en/reference/applications/services/content#node) 42 into `maintenance` mode using [vespa-set-node-state](/en/reference/operations/self-managed/tools#vespa-set-node-state), additionally supplying a reason that will be recorded by the cluster controller: + +```bash + $ vespa-set-node-state --type storage --index 42 maintenance "rebooting for software upgrade" +``` + +After the node has come back online, clear maintenance mode by marking the node as `up`: + +```bash + $ vespa-set-node-state --type storage --index 42 up +``` + +Note that if the above commands are executed *locally* on the host running the services for node 42, `--index 42` can be omitted; `vespa-set-node-state` will use the distribution key of the local node if no `--index` has been explicitly specified. + +## System status + +- Use [vespa-config-status](/en/reference/operations/self-managed/tools#vespa-config-status) on a node in [hosts.xml](/en/reference/applications/hosts) to verify all services run with updated config +- Make sure [VESPA\_CONFIGSERVERS](/en/operations/self-managed/files-processes-and-ports#environment-variables) is set and identical on all nodes in hosts.xml +- Use the *cluster controller* status page (below) to track the status of search/storage nodes. +- Check [logs](/en/reference/operations/log-files) +- Use performance graphs, System Activity Report (*sar*) or [status pages](#status-pages) to track load +- Use [query tracing](/en/reference/api/query#trace.level) +- Disk and/or memory might be exhausted and block feeding - recover from [feed block](/en/writing/feed-block) + +## Status pages + +All Vespa services have status pages, for showing health, Vespa version, config, and metrics. Status pages are subject to change at any time - take care when automating. Procedure: + + + +**Find the port:** The status pages runs on ports assigned by Vespa. To find status page ports, use [vespa-model-inspect](/en/reference/operations/self-managed/tools#vespa-model-inspect) to list the services run in the application. + +```bash +$ vespa-model-inspect services +``` + +To find the status page port for a specific node for a specific service, pick the correct service and run: + +```bash +$ vespa-model-inspect service [Options] +``` + + +**Get the status and metrics:** *distributor*, *storagenode*, *searchnode* and *container-clustercontroller* are content services with status pages. These ports are tagged HTTP. The cluster controller have multiple ports tagged HTTP, where the port tagged STATE is the one with the status page. Try connecting to the root at the port, or /state/v1/metrics. The *distributor* and *storagenode* status pages are available at `/`: + +```bash +$ vespa-model-inspect service searchnode + + searchnode @ myhost.mydomain.com : search + search/search/cluster.search/0 + tcp/myhost.mydomain.com:19110 (STATUS ADMIN RTC RPC) + tcp/myhost.mydomain.com:19111 (FS4) + tcp/myhost.mydomain.com:19112 (TEST HACK SRMP) + tcp/myhost.mydomain.com:19113 (ENGINES-PROVIDER RPC) + tcp/myhost.mydomain.com:19114 (HEALTH JSON HTTP) + $ curl http://myhost.mydomain.com:19114/state/v1/metrics + ... + $ vespa-model-inspect service distributor + distributor @ myhost.mydomain.com : content + search/distributor/0 + tcp/myhost.mydomain.com:19116 (MESSAGING) + tcp/myhost.mydomain.com:19117 (STATUS RPC) + tcp/myhost.mydomain.com:19118 (STATE STATUS HTTP) + $ curl http://myhost.mydomain.com:19118/state/v1/metrics + ... + $ curl http://myhost.mydomain.com:19118/ + ... +``` + + +**Use the cluster controller status page**: A status page for the cluster controller is available at the status port at `http://hostname:port/clustercontroller-status/v1/**`. If *clustername* is not specified, the available clusters will be listed. The cluster controller leader status page will show if any nodes are operating with differing cluster state versions. It will also show how many data buckets are pending merging (document set reconciliation) due to either missing or being out of sync. + +`$` [`vespa-model-inspect`](/en/reference/operations/self-managed/tools#vespa-model-inspect) `service container-clustercontroller | grep HTTP` + +With multiple cluster controllers, look at the one with a "/0" suffix in its config ID; it is the preferred leader. + +The cluster state version is listed under the *SSV* table column. Divergence here usually points to host or networking issues. + + + +## Cluster state + +Cluster and node state information is available through the [/cluster/v2 API](/en/reference/api/cluster-v2). This API can also be used to set a *user state* for a node - alternatively use: + + + + + + + +Also see the cluster controller [status page](#status-pages). + +State is persisted in a ZooKeeper cluster, restarting/changing a cluster controller preserves: + +- Last cluster state version number, for new cluster controller handover at restarts +- User states, set by operators - i.e. nodes manually set to down / maintenance + +In case of state data lost, the cluster state is reset - see [cluster controller](/en/content/content-nodes#cluster-controller) for implications. + +## Cluster controller configuration + +It is recommended to run cluster controllers on the same hosts as [config servers](/en/operations/self-managed/configuration-server), as they share a zookeeper cluster for state and deploying three nodes is best practise for both. See the [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) sample app for a working example. + +To configure the cluster controller, use [services.xml](/en/reference/applications/services/content#cluster-controller) and/or add [configuration](https://github.com/vespa-engine/vespa/blob/master/configdefinitions/src/vespa/fleetcontroller.def) under the *services* element - example: + +```bash + + + 5000 + +``` + +A broken content node may end up with processes constantly restarting. It may die during initialization due to accessing corrupt files, or it may die when it starts receiving requests of a given type triggering a node local bug. This is bad for distributor nodes, as these restarts create constant ownership transfer between distributors, causing windows where buckets are unavailable. + +The cluster controller has functionality for detecting such nodes. If a node restarts in a way that is not detected as a controlled shutdown, more than [max\_premature\_crashes](https://github.com/vespa-engine/vespa/blob/master/configdefinitions/src/vespa/fleetcontroller.def), the cluster controller will set the wanted state of this node to be down. + +Detecting a controlled restart is currently a bit tricky. A controlled restart is typically initiated by sending a TERM signal to the process. Not having any other sign, the content layer has to assume that all TERM signals are the cause of controlled shutdowns. Thus, if the process keep being killed by kernel due to using too much memory, this will look like controlled shutdowns to the content layer. + +## Monitor distance to ideal state + +Refer to the [distribution algorithm](/en/content/idealstate). Use distributor [status pages](#status-pages) to inspect state metrics, see [metrics](/en/content/content-nodes#metrics). `idealstate.merge_bucket.pending` is the best metric to track, it is 0 when the cluster is balanced - a non-zero value indicates buckets out of sync. + +## Cluster configuration + +- Running `vespa prepare` will not change served configuration until `vespa activate` is run. `vespa prepare` will warn about all config changes that require restart. +- Refer to [schemas](/en/basics/schemas) for how to add/change/remove these. +- Refer to [elasticity](/en/content/elasticity) for how to add/remove capacity from a Vespa cluster, procedure below. +- See [chained components](/en/applications/chaining) for how to add or remove searchers and document processors. +- Refer to the [sizing examples](/en/operations/self-managed/sizing-examples) for changing from a *flat* to *grouped* content cluster. + +## Add or remove a content node + + + +**Node setup:** Prepare the node by installing software, set up the file systems/directories and set [VESPA\_CONFIGSERVERS](/en/operations/self-managed/files-processes-and-ports#environment-variables). [Start](#vespa-start-stop-restart) the node. + + +**Modify configuration:** Add/remove a [node](/en/reference/applications/services/content#node)\-element in *services.xml* and [hosts.xml](/en/reference/applications/hosts). Refer to [multinode install](/en/operations/self-managed/multinode-systems). Make sure the *distribution-key* is unique. + + +**Deploy**: [Observe metrics](#monitor-distance-to-ideal-state) to track progress as the cluster redistributes documents. Use the [cluster controller](/en/content/content-nodes#cluster-controller) to monitor the state of the cluster. + + +**Tune performance (optional):** Use [maxpendingidealstateoperations](https://github.com/vespa-engine/vespa/blob/master/storage/src/vespa/storage/config/stor-distributormanager.def) to tune concurrency of bucket merge operations from distributor nodes. Likewise, tune [merges](/en/reference/applications/services/content#merges) - concurrent merge operations per content node. The tradeoff is speed of bucket replication vs use of resources, which impacts the applications' regular load. + + +**Finish:** The cluster is done redistributing when `idealstate.merge_bucket.pending` is zero on all distributors. + + + +Do not remove more than *redundancy*\-1 nodes at a time, to avoid data loss. Observe `idealstate.merge_bucket.pending` to know bucket replica status, when zero on all distributor nodes, it is safe to remove more nodes. If [grouped distribution](/en/content/elasticity#grouped-distribution) is used to control bucket replicas, remove all nodes in a group if the redundancy settings ensure replicas in each group. + +To increase bucket redundancy level before taking nodes out, [retire](/en/content/content-nodes) nodes. Again, track `idealstate.merge_bucket.pending` to know when done. Use the [/cluster/v2 API](/en/reference/api/cluster-v2) or [vespa-set-node-state](/en/reference/operations/self-managed/tools#vespa-set-node-state) to set a node to the *retired* state. You can set any number of nodes retired at the same time. The [cluster controller's](/en/content/content-nodes#cluster-controller) status page lists node states. + +An alternative to increasing cluster size is building a new cluster, then migrate documents to it. This is supported using [visiting](/en/writing/visiting). + +To *merge* two content clusters, add nodes to the cluster like above, considering: +- [distribution-keys](/en/reference/applications/services/content#node) must be unique. Modify paths like *$VESPA\_HOME/var/db/vespa/search/mycluster/n3* before adding the node. +- Set [VESPA\_CONFIGSERVERS](/en/operations/self-managed/files-processes-and-ports#environment-variables), then start the node. + +## Topology change + +Read [changing topology first](/en/content/elasticity#changing-topology), and plan the sequence of steps. + +Make sure to not change the `distribution-key` for nodes in *services.xml*. + +It is not required to restart nodes as part of this process + +## Add or remove services on a node + +It is possible to run multiple Vespa services on the same host. If changing the services on a given host, stop Vespa on the given host before running `vespa activate`. This is because the services are dynamically allocated port numbers, depending on what is running on the host. Consider if some of the services changed are used by services on other hosts. In that case, restart services on those hosts too. Procedure: + + + +Edit *services.xml* and *hosts.xml* + + +Stop Vespa on the nodes that have changes + + +Run `vespa prepare` and `vespa activate` + + +Start Vespa on the nodes that have changes + + + +## Troubleshooting + +Also see the [FAQ](/en/learn/faq). + +||| +| --- | --- | +| **No endpoint** | Most problems with the quick start guides are due to Docker out of memory. Make sure at least 6G memory is allocated to Docker:

`$ docker info \| grep "Total Memory"`
`or`
`$ podman info \| grep "memTotal"`

OOM symptoms includeINFO:

`Problem with Handshake localhost:8080 ssl=false: localhost:8080 failed to respond `

The container is named *vespa* in the guides, for a shell do:

`$ docker exec -it vespa bash` | +| **Log viewing** | Use [vespa-logfmt](/en/reference/operations/self-managed/tools#vespa-logfmt) to view the vespa log - example:

`$ /opt/vespa/bin/vespa-logfmt -l warning,error` | +| **Json** | For json pretty-print, append

`\| python -m json.tool`

to commands that output json - or use [jq](https://stedolan.github.io/jq/). | +| **Routing** | Vespa lets application set up custom document processing / indexing, with different feed endpoints. Refer to [indexing](/en/writing/indexing) for how to configure this in *services.xml*.

[#13193](https://github.com/vespa-engine/vespa/issues/13193) has a summary of problems and solutions. | +| **Tracing** | Use [tracelevel](/en/reference/api/document-v1#request-parameters) to dump the routes and hops for a write operation - example:

`$ curl -H Content-Type:application/json --data-binary @docs.json \`
`$ENDPOINT/document/v1/mynamespace/doc/docid/1?tracelevel=4 \ jq .`
`{`
`"pathId": "/document/v1/mynamespace/doc/docid/1",`
`"id": "id:mynamespace:doc::1",`
`"trace": [`
`{ "message": "[1623413878.905] Sending message (version 7.418.23) from client to ..." },`
`{ "message": "[1623413878.906] Message (type 100004) received at 'default/container.0' ..." },`
`{ "message": "[1623413878.907] Sending message (version 7.418.23) from 'default/container.0' ..." },`
`{ "message": "[1623413878.907] Message (type 100004) received at 'default/container.0' ..." },`
`{ "message": "[1623413878.909] Selecting route" },`
`{ "message": "[1623413878.909] No cluster state cached. Sending to random distributor." }` | + +## Clean start mode + +There has been rare occasions were Vespa stored data that was internally inconsistent. For those circumstances it is possible to start the node in a [validate\_and\_sanitize\_docstore](https://github.com/vespa-engine/vespa/blob/master/configdefinitions/src/vespa/proton.def) mode. This will do its best to clean up inconsistent data. However, detecting that this is required is not easy, consult the Vespa Team first. In order for this approach to work, all nodes must be stopped before enabling this feature - this to make sure the data is not redistributed. + +## Content cluster configuration + +||| +| --- | --- | +| **Availability vs resources** | Keeping index structures costs resources. Not all replicas of buckets are necessarily searchable, unless configured using [searchable-copies](/en/reference/applications/services/content#searchable-copies). As Vespa indexes buckets on-demand, the most cost-efficient setting is 1, if one can tolerate temporary coverage loss during node failures. | +| **Data retention vs size** | When a document is removed, the document data is not immediately purged. Instead, *remove-entries* (tombstones of removed documents) are kept for a configurable amount of time. The default is two weeks, refer to [removed-db prune age](/en/reference/applications/services/content#removed-db-prune-age). This ensures that removed documents stay removed in a distributed system where nodes change state. Entries are removed periodically after expiry. Hence, if a node comes back up after being down for more than two weeks, removed documents are available again, unless the data on the node is wiped first. A larger *prune age* will grow the storage size as this keeps document and tombstones longer.

**Note:**

The backend does not store remove-entries for nonexistent documents. This to prevent clients sending wrong document identifiers from filling a cluster with invalid remove-entries. A side effect is that if a problem has caused all replicas of a bucket to be unavailable, documents in this bucket cannot be marked removed until at least one replica is available again. Documents are written in new bucket replicas while the others are down - if these are removed, then older versions of these will not re-emerge, as the most recent change wins.
| +| **Transition time** | See [transition-time](/en/reference/applications/services/content#transition-time) for tradeoffs for how quickly nodes are set down vs. system stability. | +| **Removing unstable nodes** | One can configure how many times a node is allowed to crash before it will automatically be removed. The crash count is reset if the node has been up or down continuously for more than the [stable state period](/en/reference/applications/services/content#stable-state-period). If the crash count exceeds [max premature crashes](/en/reference/applications/services/content#max-premature-crashes), the node will be disabled. Refer to [troubleshooting](#troubleshooting). | +| **Minimal amount of nodes required to be available** | A cluster is typically sized to handle a given load. A given percentage of the cluster resources are required for normal operations, and the remainder is the available resources that can be used if some of the nodes are no longer usable. If the cluster loses enough nodes, it will be overloaded:

• Remaining nodes may create disk full situation. This will likely fail a lot of write operations, and if disk is shared with OS, it may also stop the node from functioning.
• Partition queues will grow to maximum size. As queues are processed in FIFO order, operations are likely to get long latencies.
• Many operations may time out while being processed, causing the operation to be resent, adding more load to the cluster.
• When new nodes are added, they cannot serve requests before data is moved to the new nodes from the already overloaded nodes. Moving data puts even more load on the existing nodes, and as moving data is typically not high priority this may never actually happen.

To configure what the minimal cluster size is, use [min-distributor-up-ratio](/en/reference/applications/services/content#min-distributor-up-ratio) and [min-storage-up-ratio](/en/reference/applications/services/content#min-storage-up-ratio). | \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/build-install.mdx b/mintlify-docs/en/operations/self-managed/build-install.mdx new file mode 100644 index 0000000000..412246667e --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/build-install.mdx @@ -0,0 +1,82 @@ +--- +title: "Build / install Vespa" +sidebarTitle: "Build and install" +--- + +To develop with Vespa, follow the [guide](https://github.com/vespa-engine/vespa#building) to set up a development environment on AlmaLinux 8 using Docker. + +Build Vespa Java artifacts with Java >= 17 and Maven >= 3.6.3. Once built, Vespa Java artifacts are ready to be used and one can build a Vespa application using the [bundle plugin](/en/applications/bundles#maven-bundle-plugin). + +```bash +$ export MAVEN_OPTS="-Xms128m -Xmx1024m" +$ ./bootstrap.sh java && mvn install +``` + +See [vespa.ai releases](/en/learn/releases). + +## Container images + +| Image | Description | +| --- | --- | +| [docker.io/vespaengine/vespa](https://hub.docker.com/r/vespaengine/vespa) [ghcr.io/vespa-engine/vespa](https://github.com/orgs/vespa-engine/packages/container/package/vespa) | Container image for running Vespa. | +| [docker.io/vespaengine/vespa-build-almalinux-8](https://hub.docker.com/r/vespaengine/vespa-build-almalinux-8) | Container image for building Vespa on AlmaLinux 8. | +| [docker.io/vespaengine/vespa-dev-almalinux-8](https://hub.docker.com/r/vespaengine/vespa-dev-almalinux-8) | Container image for development of Vespa on AlmaLinux 8. Used for incremental building and system testing. | + +## RPMs + +Dependency graph: + + +![RPM overview](/assets/img/rpms.svg) + + +Installing Vespa on AlmaLinux 8: + +```bash +$ dnf config-manager \ + --add-repo https://raw.githubusercontent.com/vespa-engine/vespa/master/dist/vespa-engine.repo +$ dnf config-manager --enable powertools +$ dnf install -y epel-release +$ dnf install -y vespa +``` + +Package repository hosting is graciously provided by [Cloudsmith](https://cloudsmith.com) which is a fully hosted, cloud-native and universal package management solution: +[![OSS hosting by Cloudsmith](https://img.shields.io/badge/OSS%20hosting%20by-cloudsmith-blue?logo=cloudsmith&style=flat-square)](https://cloudsmith.com) + + +**Important:** + +Please note that the retention of released RPMs in the repository is limited to the latest 50 releases. Use the Docker images (above) for installations of specific versions older than this. Any problems with released rpm packages will be fixed in subsequent releases, please [report any issues](https://vespa.ai/support/) - troubleshoot using the [install example](/en/operations/self-managed/multinode-systems#aws-ec2-singlenode). + + +Refer to [vespa.spec](https://github.com/vespa-engine/vespa/blob/master/dist/vespa.spec). Build RPMs for a given Vespa version X.Y.Z: + +```bash +$ git clone https://github.com/vespa-engine/vespa +$ cd vespa +$ git checkout vX.Y.Z +$ docker run --rm -ti -v $(pwd):/wd:Z -w /wd \ + docker.io/vespaengine/vespa-build-almalinux-8:latest \ + make -f .copr/Makefile rpms outdir=/wd +$ ls *.rpm | grep -v debug +vespa-8.691.19-1.el8.src.rpm +vespa-8.691.19-1.el8.x86_64.rpm +vespa-ann-benchmark-8.691.19-1.el8.x86_64.rpm +vespa-base-8.691.19-1.el8.x86_64.rpm +vespa-base-libs-8.691.19-1.el8.x86_64.rpm +vespa-clients-8.691.19-1.el8.x86_64.rpm +vespa-config-model-fat-8.691.19-1.el8.x86_64.rpm +vespa-jars-8.691.19-1.el8.x86_64.rpm +vespa-libs-8.691.19.el8.x86_64.rpm +vespa-malloc-8.691.19-1.el8.x86_64.rpm +vespa-node-admin-8.691.19-1.el8.x86_64.rpm +vespa-tools-8.691.19-1.el8.x86_64.rpm +``` + +Find most utilities in the vespa-x.y.z\*.rpm - other RPMs: + +| RPM | Description | +| --- | --- | +| **vespa-tools** | Tools accessing Vespa endpoints for query or document operations:

• [vespa-destination](/en/reference/operations/self-managed/tools#vespa-destination)
• [vespa-fbench](/en/reference/operations/tools#vespa-fbench)
• [vespa-feeder](/en/reference/operations/self-managed/tools#vespa-feeder)
• [vespa-get](/en/reference/operations/self-managed/tools#vespa-get)
• [vespa-query-profile-dump-tool](/en/reference/operations/tools#vespa-query-profile-dump-tool)
• [vespa-stat](/en/reference/operations/self-managed/tools#vespa-stat)
• [vespa-summary-benchmark](/en/reference/operations/self-managed/tools#vespa-summary-benchmark)
• [vespa-visit](/en/reference/operations/self-managed/tools#vespa-visit)
• [vespa-visit-target](/en/reference/operations/self-managed/tools#vespa-visit-target) | +| **vespa-malloc** | Vespa has its own memory allocator, *vespa-malloc* - refer to */opt/vespa/etc/vespamalloc.conf* | +| **vespa-clients** | *vespa-feed-client.jar* - see [vespa-feed-client](/en/clients/vespa-feed-client) | \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/config-proxy.mdx b/mintlify-docs/en/operations/self-managed/config-proxy.mdx new file mode 100644 index 0000000000..6bc533c108 --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/config-proxy.mdx @@ -0,0 +1,85 @@ +--- +title: "Configuration proxy" +sidebarTitle: "Config Proxy" +--- + +Read [application packages](/en/basics/applications) for an overview of the cloud config system. The *config proxy* runs on every Vespa node. It has a set of config sources, defined in [VESPA\_CONFIGSERVERS](/en/operations/self-managed/files-processes-and-ports#environment-variables). + +The config proxy will act as a proxy for config clients on the same machine, so that all clients can ask for config on *localhost:19090*. The *config source* that the config proxy uses is set in [VESPA\_CONFIGSERVERS](/en/operations/self-managed/files-processes-and-ports#environment-variables) and consists of one or more config sources (the addresses of [config servers](/en/operations/self-managed/configuration-server)). + +The proxy has a memory cache that is used to serve configs if it is possible. In default mode, the proxy will have an outstanding request to the config server that will return when the config has changed (a new generation of config). This means that every time config changes on the config server, the proxy will get a response, update its cache and respond to all its clients with the changed config. + +The config proxy has two modes: + +| Mode | Description | +| --- | --- | +| default | Gets config from server and stores in memory cache. The config proxy will always be started in *default* mode. Serves from cache if possible. Always uses a config source. If restarted, it will lose all configs that were cached in memory. | +| memorycache | Serves config from memory cache only. Never uses a config source. A restart will lose all cached configs. Setting the mode to *memorycache* will make all applications on the node work as before (given that they have previously been running and requested config), since the config proxy will serve config from cache and work without connection to any config server. Applications on this node will not work if the config proxy stops, is restarted or crashes. | + +Use [vespa-configproxy-cmd](/en/reference/operations/self-managed/tools#vespa-configproxy-cmd) to inspect cached configs, mode, config sources etc., there are also some commands to change some of the settings. Run the command as: + +```bash +$ vespa-configproxy-cmd -m +``` + +to see all possible commands. + +## Detaching from config servers + +```bash +$ vespa-configproxy-cmd -m setmode memorycache +``` + +## Inspecting config + +To inspect the configuration for a service, in this example a searchnode (proton) instance, do: + + + +Find the active config generation used by the service, using [/state/v1/config](/en/reference/api/state-v1#state-v1-config) - example for *http://localhost:19110/state/v1/config*, here the generation is 2: + +```json +{ + "config": { + "generation": 2, + "proton": { + "generation": 2 + }, + "proton.documentdb.music": { + "generation": 2 + } + } +} +``` + + +Find the relevant *config definition name*, *config id* and *config generation* using [vespa-configproxy-cmd](/en/reference/operations/self-managed/tools#vespa-configproxy-cmd) - e.g.:$ vespa-configproxy-cmd | grep proton + +```bash +$ vespa-configproxy-cmd | grep proton + +vespa.config.search.core.proton,music/search/cluster.music/0,2,MD5:40087d6195cedb1840721b55eb333735,XXHASH64:43829e79cea8e714 +``` + +`vespa.config.search.core.proton` is the *config definition name* for this particular config, `music/search/cluster.music/0` is the *config id* used by the proton service instance on this node and `2` is the active config generation. This means, the service is using the correct config generation as it is matching the /state/v1/config response (a restart can be required for some config changes). + + +Get the generated config using [vespa-get-config](/en/reference/operations/self-managed/tools#vespa-get-config) - e.g.:$ vespa-get-config -n vespa.config.search.core.proton -i music/search/cluster.music/0 + +```bash +$ vespa-get-config -n vespa.config.search.core.proton -i music/search/cluster.music/0 + +basedir "/opt/vespa/var/db/vespa/search/cluster.music/n0" +rpcport 19106 +httpport 19110 +... +``` + + +**Important:** + +Omitting `-i` will return the default configuration, meaning not generated for the active service instance. + + + + \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/config-sentinel.mdx b/mintlify-docs/en/operations/self-managed/config-sentinel.mdx new file mode 100644 index 0000000000..b6f8e3ce67 --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/config-sentinel.mdx @@ -0,0 +1,145 @@ +--- +title: "Config sentinel" +--- + +The config sentinel starts and stops services - and restart failed services unless they are manually stopped. All nodes in a Vespa system have at least these running processes: + +| Process | Description | +| --- | --- | +| [config-proxy](/en/operations/self-managed/config-proxy) | Proxies config requests between Vespa applications and the configserver node. All configuration is cached locally so that this node can maintain its current configuration, even if the configserver shuts down. | +| **config-sentinel** | Registers itself with the *config-proxy* and subscribes to and enforces node configuration, meaning the configuration of what services should be run locally, and with what parameters. | +| [vespa-logd](/en/reference/operations/log-files#logd) | Monitors *$VESPA\_HOME/logs/vespa/vespa.log*, which is used by all other services, and relays everything to the [log-server](/en/reference/operations/log-files#log-server). | +| [metrics-proxy](/en/operations/self-managed/monitoring#metrics-proxy) | Provides APIs for metrics access to all nodes and services. | + + +![Vespa node configuration, startup and logs](/assets/img/config-sentinel.svg) + + +Start sequence: + + + +*config server(s)* are started and application config is deployed to them - see [config server operations](/en/operations/self-managed/configuration-server). + + +*config-proxy* is started. The environment variables [VESPA\_CONFIGSERVERS](/en/operations/self-managed/files-processes-and-ports#environment-variables) and [VESPA\_CONFIGSERVER\_RPC\_PORT](/en/operations/self-managed/files-processes-and-ports#environment-variables) are used to connect to the [config-server(s)](/en/operations/self-managed/configuration-server). It will retry all config servers in case some are down. + + +*config-sentinel* is started, and subscribes to node configuration (i.e. a service list) from *config-proxy* using its hostname as the [config id](/en/applications/configapi-dev#config-id). See [Node and network setup](/en/operations/self-managed/node-setup) for details about how the hostname is detected and how to override it. The config for the config-sentinel (the service list) lists the processes to be started, along with the *config id* to assign to each, typically the logical name of that service instance. + + +*config-proxy* subscribes to node configuration from *config-server*, caches it, and returns the result to *config-sentinel* + + +*config-sentinel* starts the services given in the node configuration, with the config id as argument. See example output below, like *id="search/qrservers/qrserver.0"*. *logd* and *metrics-proxy* are always started, regardless of configuration. Each service: + + a. Subscribes to configuration from *config-proxy*. + + b. *config-proxy* subscribes to configuration from *config-server*, caches it and returns result to the service. + + c. The service runs according to its configuration, logging to *`$VESPA_HOME/logs/vespa/vespa.log`*. The processes instantiate internal components, each assigned the same or another config id, and instantiating further components. + +Also see [cluster startup](#cluster-startup) for a minimum nodes-up start setting. + + + +When new config is deployed to *config-servers* they propagate the changed configuration to nodes subscribing to it. In turn, these nodes reconfigure themselves accordingly. + +## User interface + +The config sentinel runs an RPC service which can be used to list, start and stop the services supposed to run on that node. This can be useful for testing and debugging. Use [vespa-sentinel-cmd](/en/reference/operations/self-managed/tools#vespa-sentinel-cmd) to trigger these actions. Example output from `vespa-sentinel-cmd list`: + +```bash +vespa-sentinel-cmd 'sentinel.ls' OK. +container state=RUNNING mode=AUTO pid=27993 exitstatus=0 id="default/container.0" +container-clustercontroller state=RUNNING mode=AUTO pid=27997 exitstatus=0 id="admin/cluster-controllers/0" +distributor state=RUNNING mode=AUTO pid=27996 exitstatus=0 id="search/distributor/0" +logd state=RUNNING mode=AUTO pid=5751 exitstatus=0 id="hosts/r6-3/logd" +logserver state=RUNNING mode=AUTO pid=27994 exitstatus=0 id="admin/logserver" +searchnode state=RUNNING mode=AUTO pid=27995 exitstatus=0 id="search/search/cluster.search/0" +slobrok state=RUNNING mode=AUTO pid=28000 exitstatus=0 id="admin/slobrok.0" +``` + +To learn more about the processes and services, see [files and processes](/en/operations/self-managed/files-processes-and-ports). Use [vespa-model-inspect host *hostname*](/en/reference/operations/self-managed/tools#vespa-model-inspect) to list services running on a node. + +## Cluster startup + +The config sentinel will not start services on a node unless it has connectivity to a minimum of other nodes, default 50%. Find an example of this feature in the [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA#start-the-admin-server) example application. Example configuration: + +```xml + + + + 20 + 1 + + +``` + +Example: `minOkPercent 10` means that services will be started only if more than or equal to 10% of nodes are up. If there are 11 nodes in the application, the first node started will not start its services - when the second node is started, services will be started on both. + +`maxBadCount` is for connectivity checks where the other node is up, but we still do not have proper two-way connectivity. Normally, one-way connectivity means network configuration is broken and needs looking into, so this may be set low (1 or even 0 are the recommended values). If there are some temporary problems (in the example below non-responding DNS which leads to various issues at startup) the config sentinel will loop and retry, so the service startup will just be slightly delayed. + +Example log: + +```bash +[2021-06-15 14:33:25] EVENT : starting/1 name="sbin/vespa-config-sentinel -c hosts/le40808.ostk (pid 867)" +[2021-06-15 14:33:25] EVENT : started/1 name="config-sentinel" +[2021-06-15 14:33:25] CONFIG : Sentinel got 4 service elements [tenant(footest), application(bartest), instance(default)] for config generation 1001 +[2021-06-15 14:33:25] CONFIG : Booting sentinel 'hosts/le40808.ostk' with [stateserver port 19098] and [rpc port 19097] +[2021-06-15 14:33:25] CONFIG : listening on port 19097 +[2021-06-15 14:33:25] CONFIG : Sentinel got model info [version 7.420.21] for 35 hosts [config generation 1001] +[2021-06-15 14:33:25] CONFIG : connectivity.maxBadCount = 3 +[2021-06-15 14:33:25] CONFIG : connectivity.minOkPercent = 40 +[2021-06-15 14:33:28] INFO : Connectivity check details: 2086533.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le01287.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le23256.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le23267.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le23297.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le23312.ostk -> connect OK, but reverse check FAILED +[2021-06-15 14:33:28] INFO : Connectivity check details: le23317.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le23319.ostk -> connect OK, but reverse check FAILED +[2021-06-15 14:33:28] INFO : Connectivity check details: le30550.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le30553.ostk -> connect OK, but reverse check FAILED +[2021-06-15 14:33:28] INFO : Connectivity check details: le30556.ostk -> unreachable from me, but up +[2021-06-15 14:33:28] INFO : Connectivity check details: le30560.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le30567.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le40387.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le40389.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le40808.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le40817.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le40833.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le40834.ostk -> unreachable from me, but up +[2021-06-15 14:33:28] INFO : Connectivity check details: le40841.ostk -> connect OK, but reverse check FAILED +[2021-06-15 14:33:28] INFO : Connectivity check details: le40858.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le40860.ostk -> unreachable from me, but up +[2021-06-15 14:33:28] INFO : Connectivity check details: le40863.ostk -> connect OK, but reverse check FAILED +[2021-06-15 14:33:28] INFO : Connectivity check details: le40873.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le40892.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le40900.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le40905.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: le40914.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: sm02318.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: sm02324.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: sm02340.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: zt40672.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: zt40712.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: zt40728.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] INFO : Connectivity check details: zt41329.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:28] WARNING : 8 of 35 nodes up but with network connectivity problems (max is 3) +[2021-06-15 14:33:28] WARNING : Bad network connectivity (try 1) +[2021-06-15 14:33:30] WARNING : slow resolve time: 'le30556.ostk' -> '1234:5678:90:123::abcd' (5.00528 s) +[2021-06-15 14:33:30] WARNING : slow resolve time: 'le40834.ostk' -> '1234:5678:90:456::efab' (5.00527 s) +[2021-06-15 14:33:30] WARNING : slow resolve time: 'le40860.ostk' -> '1234:5678:90:789::cdef' (5.00459 s) +[2021-06-15 14:33:31] INFO : Connectivity check details: le23312.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:31] INFO : Connectivity check details: le23319.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:31] INFO : Connectivity check details: le30553.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:31] INFO : Connectivity check details: le30556.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:31] INFO : Connectivity check details: le40834.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:31] INFO : Connectivity check details: le40841.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:31] INFO : Connectivity check details: le40860.ostk -> connect OK, but reverse check FAILED +[2021-06-15 14:33:31] INFO : Connectivity check details: le40863.ostk -> OK: both ways connectivity verified +[2021-06-15 14:33:31] INFO : Enough connectivity checks OK, proceeding with service startup +[2021-06-15 14:33:31] EVENT : starting/1 name="searchnode" +... +``` \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/configuration-server.mdx b/mintlify-docs/en/operations/self-managed/configuration-server.mdx new file mode 100644 index 0000000000..6ffd84f6fc --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/configuration-server.mdx @@ -0,0 +1,272 @@ +--- +title: "Configuration Servers" +--- + +Vespa Configuration Servers host the endpoint where application packages are deployed - and serves generated configuration to all services - see the [overview](/en/learn/overview) and [application packages](/en/basics/applications) for details. I.e., one cannot configure Vespa without config servers, and services cannot run without it. + +It is useful to understand the [Vespa start sequence](/en/operations/self-managed/config-sentinel). Refer to the sample applications [multinode](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode) and [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) for practical examples of multi-configserver configuration. + +Vespa configuration is set up using one or more configuration servers (config servers). A config server uses [Apache ZooKeeper](https://zookeeper.apache.org/) as a distributed data storage for the configuration system. In addition, each node runs a config proxy to cache configuration data - find an overview at [services start](/en/operations/self-managed/config-sentinel). + +## Status and config generation + +Check the health of a running config server using (replace localhost with hostname): + +```bash +$ curl http://localhost:19071/state/v1/health +``` + +Note that the config server is a service is itself, and runs with file-based configuration. The application packages deployed will not change the config server - the config server serves this configuration to all other Vespa nodes. This will hence always be config generation 0: + +```bash +$ curl http://localhost:19071/state/v1/config +``` + +Details in [start-configserver](https://github.com/vespa-engine/vespa/blob/master/configserver/src/main/sh/start-configserver). + +## Redundancy + +The config servers are defined in [VESPA\_CONFIGSERVERS](/en/operations/self-managed/files-processes-and-ports#environment-variables), [services.xml](/en/reference/applications/services/services) and [hosts.xml](/en/reference/applications/hosts): + + +```bash +$ VESPA_CONFIGSERVERS=myserver0.mydomain.com,myserver1.mydomain.com,myserver2.mydomain.com +``` + +```xml + + + + + + + + + +``` +```xml + + + admin0 + + + admin1 + + + admin2 + + +``` + +[VESPA\_CONFIGSERVERS](/en/operations/self-managed/files-processes-and-ports#environment-variables) must be set on all nodes. This is a comma- or whitespace-separated list with the hostname of all config servers, like *myhost1.mydomain.com,myhost2.mydomain.com,myhost3.mydomain.com*. + +When there are multiple config servers, the [config proxy](/en/operations/self-managed/config-proxy) will pick a config server randomly (to achieve load balancing between config servers). The config proxy is fault-tolerant and will switch to another config server (if there is more than one) if the one it is using becomes unavailable or there is an error in the configuration it receives. + +For the system to tolerate *n* failures, [ZooKeeper](#zookeeper) by design requires using *(2\*n)+1* nodes. Consequently, only an odd numbers of nodes is useful, so you need minimum 3 nodes to have a fault-tolerant config system. + +Even when using just one config server, the application will work if the server goes down (but deploying application changes will not work). Since the *config proxy* runs on every node and caches configs, it will continue to serve config to the services on that node. However, restarting a node when config servers are unavailable means that services on the node will be unable to start since the cache will be destroyed when restarting the config proxy. + +Refer to the [admin model reference](/en/reference/applications/services/admin#configservers) for more details on *services.xml*. + +## Start sequence + +To bootstrap a Vespa application instance, the high-level steps are: + +- Start config servers +- Deploy config +- Start Vespa nodes + +[multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) is a great guide on how to start a multinode Vespa application instance - try this first. Detailed steps for config server startup: + + + +Set [VESPA\_CONFIGSERVERS](/en/operations/self-managed/files-processes-and-ports#environment-variables) on all nodes, using fully qualified hostnames and the same value on all nodes, including the config servers. + + +Start the config server on the nodes configured in *services/hosts.xml*. Make sure the startup is successful by inspecting [/state/v1/health](/en/reference/api/state-v1#state-v1-health), default on port 19071: + +```bash +$ curl http://localhost:19071/state/v1/health +``` + +```json +{ + "time" : 1651147368066, + "status" : { + "code" : "up" + }, + "metrics" : { + "snapshot" : { + "from" : 1.651147308063E9, + "to" : 1.651147367996E9 + } + } +} +``` + +If there is no response on the health API, two things can have happened: + - The config server process did not start - inspect logs using `vespa-logfmt`, or check *`$VESPA_HOME/logs/vespa/vespa.log`*, normally */opt/vespa/logs/vespa/vespa.log*. + - The config server process started, and is waiting for [Zookeeper quorum](#zookeeper): + + ```bash + $ vespa-logfmt -S configserver + ``` + + ```bash + configserver Container.com.yahoo.vespa.zookeeper.ZooKeeperRunner Starting ZooKeeper server with /opt/vespa/var/zookeeper/conf/zookeeper.cfg. Trying to establish ZooKeeper quorum (members: [node0.vespanet, node1.vespanet, node2.vespanet], attempt 1) + configserver Container.com.yahoo.container.handler.threadpool.ContainerThreadpoolImpl Threadpool 'default-pool': min=12, max=600, queue=0 + configserver Container.com.yahoo.vespa.config.server.tenant.TenantRepository Adding tenant 'default', created 2022-04-28T13:02:24.182Z. Bootstrapping in PT0.175576S + configserver Container.com.yahoo.vespa.config.server.rpc.RpcServer Rpc server will listen on port 19070 + configserver Container.com.yahoo.container.jdisc.state.StateMonitor Changing health status code from 'initializing' to 'up' + configserver Container.com.yahoo.jdisc.http.server.jetty.Janitor Creating janitor executor with 2 threads + configserver Container.com.yahoo.jdisc.http.server.jetty.JettyHttpServer Threadpool size: min=22, max=22 + configserver Container.org.eclipse.jetty.server.Server jetty-9.4.46.v20220331; built: 2022-03-31T16:38:08.030Z; git: bc17a0369a11ecf40bb92c839b9ef0a8ac50ea18; jvm 11.0.14.1+1- + configserver Container.org.eclipse.jetty.server.handler.ContextHandler Started o.e.j.s.ServletContextHandler@341c0dfc{19071,/,null,AVAILABLE} + configserver Container.org.eclipse.jetty.server.AbstractConnector Started configserver@3cd6d147{HTTP/1.1, (http/1.1, h2c)}{0.0.0.0:19071} + configserver Container.org.eclipse.jetty.server.Server Started @21955ms + configserver Container.com.yahoo.container.jdisc.ConfiguredApplication Switching to the latest deployed set of configurations and components. Application config generation: 0 + ``` + +It will hang until quorum is reached, and the second highlighted log line is emitted. Root causes for missing quorum can be: + - No connectivity between the config servers. Zookeeper logs the members like `(members: [node0.vespanet, node1.vespanet, node2.vespanet], attempt 1)`. Verify that the nodes running config server can reach each other on port 2181. + - No connectivity can be wrong network config. [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) uses a docker network, make sure there are no underscores in the hostnames. + + +Once all config servers return `up` on *state/v1/health*, an application package can be deployed. This means, if deploy fails, it is always a good idea to verify the config server health first - if config servers are up, and deploy fails, it is most likely an issue with the application package - if so, refer to [application packages](/en/basics/applications). + + +A successful deployment logs the following, for the *prepare* and *activate* steps: + +```bash +Container.com.yahoo.vespa.config.server.ApplicationRepository Session 2 prepared successfully. +Container.com.yahoo.vespa.config.server.deploy.Deployment Session 2 activated successfully using no host provisioner. Config generation 2. File references: [file '9cfc8dc57f415c72'] +Container.com.yahoo.vespa.config.server.session.SessionRepository Session activated: 2 +``` + + +Start the Vespa nodes. Technically, they can be started at any time. When troubleshooting, it is easier to make sure the config servers are started successfully, and deployment was successful - before starting any other nodes. Refer to the [Vespa start sequence](/en/operations/self-managed/config-sentinel) and [Vespa start / stop / restart](/en/operations/self-managed/admin-procedures#vespa-start-stop-restart). + + + +Make sure to look for logs on all config servers when debugging. + +## Scaling up + +Add a config server node for increased fault tolerance or when replacing a node. Read up on [ZooKeeper configuration](#zookeeper-configuration) before continuing. Although it is *possible* to add more than one config server at a time, doing it one by one is recommended, to keep the ZooKeeper quorum intact. + +Due to the ZooKeeper majority vote, use one or three config servers. + + + +Install *vespa* on new config server node. + + +Append the config server node's hostname to VESPA\_CONFIGSERVERS on all nodes, then (re)start all config servers in sequence to update the ZooKeeper config. By appending, the current config server nodes keep their current ZooKeeper index. Restart the existing config server(s) first. Config server will log which servers are configured when starting up to vespa log. + + +Update *services.xml* and *hosts.xml* with the new set of config servers, then *vespa prepare* and *vespa activate*. + + +Restart other nodes one by one to start using the new config servers. This will let the vespa nodes use the updated set of config servers. + + + +The config servers will automatically redistribute the application data to new nodes. + +## Scaling down + +This is the inverse of scaling up, and the procedure is the same. Remove config servers from the end of *VESPA\_CONFIGSERVERS*, and here one can remove two nodes in one go, if going from three to one. + +## Replacing nodes + +- Make sure to replace only one node at a time. +- If you have only one config server you need to first scale up with a new node, then scale down by removing the old node. +- If you have 3 or more you can replace one of the old nodes in VESPA\_CONFIGSERVERS with the new one instead of adding one, otherwise same procedure as in [Scaling up](#scaling-up). Repeat for each node you want to replace. + +## Tools + +Tools to access config: + + + + + + + +## ZooKeeper + +[ZooKeeper](https://zookeeper.apache.org/) handles data consistency across multiple config servers. The config server Java application runs a ZooKeeper server, embedded with an RPC frontend that the other nodes use. ZooKeeper stores data internally in *nodes* that can have *sub-nodes*, similar to a file system. + +At [vespa prepare](/en/reference/clients/vespa-cli#vespa-prepare), the application's files, along with global configurations, are stored in ZooKeeper. The application data is stored under */config/v2/tenants/default/sessions/\[sessionid\]/userapp*. At [vespa activate](/en/reference/clients/vespa-cli#vespa-activate), the newest application is activated *live* by writing the session id into */config/v2/tenants/default/applications/default:default:default*. It is at that point the other nodes get configured. + +Use *vespa-zkcli* to inspect state, replace with actual session id: + +```bash +$ vespa-zkcli ls /config/v2/tenants/default/sessions/sessionid/userapp +$ vespa-zkcli get /config/v2/tenants/default/sessions/sessionid/userapp/services.xml +``` + +The ZooKeeper server logs to *`$VESPA_HOME/logs/vespa/zookeeper.configserver.0.log` (files are rotated with sequence number)* + +### ZooKeeper configuration + +The members of the ZooKeeper cluster is generated based on the contents of [VESPA\_CONFIGSERVERS](/en/operations/self-managed/files-processes-and-ports#environment-variables). *`$VESPA_HOME/var/zookeeper/conf/zookeeper.cfg`* is written when (re)starting the config server. Hence, config server(s) must all be restarted when `VESPA_CONFIGSERVERS` changes. + +The order of the nodes is used to create indexes in *zookeeper.cfg*, do not change node order. + +### ZooKeeper recovery + +If the config server(s) should experience data corruption, for instance a hardware failure, use the following recovery procedure. One example of such a scenario is if *`$VESPA_HOME/logs/vespa/zookeeper.configserver.0.log`* says *java.io.IOException: Negative seek offset at java.io.RandomAccessFile.seek(Native Method)*, which indicates ZooKeeper has not been able to recover after a full disk. There is no need to restart Vespa on other nodes during the procedure: + +1. [vespa-stop-configserver](/en/reference/operations/self-managed/tools#vespa-stop-configserver) +2. [vespa-configserver-remove-state](/en/reference/operations/self-managed/tools#vespa-configserver-remove-state) +3. [vespa-start-configserver](/en/reference/operations/self-managed/tools#vespa-start-configserver) +4. [vespa](/en/clients/vespa-cli#deployment) prepare `` +5. [vespa](/en/clients/vespa-cli#deployment) activate + +This procedure completely cleans out ZooKeeper's internal data snapshots and deploys from scratch. + +Note that by default the [cluster controller](/en/content/content-nodes#cluster-controller) that maintains the state of the content cluster will use the shared same ZooKeeper instance, so the content cluster state is also reset when removing state. Manually set state will be lost (e.g. a node with user state *down*). It is possible to run cluster-controllers in standalone zookeeper mode - see [standalone-zookeeper](/en/reference/applications/services/admin#cluster-controllers). + +### ZooKeeper barrier timeout + +If the config servers are heavily loaded, or the applications being deployed are big, the internals of the server may time out when synchronizing with the other servers during deploy. To work around, increase the timeout by setting: [VESPA\_CONFIGSERVER\_ZOOKEEPER\_BARRIER\_TIMEOUT](/en/operations/self-managed/files-processes-and-ports#environment-variables) to 600 (seconds) or higher, and restart the config servers. + +## Configuration + +To access config from a node not running the config system (e.g. doing feeding via the Document API), use the environment variable [VESPA\_CONFIG\_SOURCES](/en/operations/self-managed/files-processes-and-ports#environment-variables): + +```bash +$ export VESPA_CONFIG_SOURCES="myadmin0.mydomain.com:19071,myadmin1.mydomain.com:19071" +``` + +Alternatively, for Java programs, use the system property *configsources* and set it programmatically or on the command line with the *\-D* option to Java. The syntax for the value is the same as for *VESPA\_CONFIG\_SOURCES*. + +### System requirements + +The minimum heap size for the JVM it runs under is 128 Mb and max heap size is 2 GB (which can be changed with a [setting](/en/performance/container-tuning#config-server-and-config-proxy)). It writes a transaction log that is regularly purged of old items, so little disk space is required. Note that running on a server that has a lot of disk I/O will adversely affect performance and is not recommended. + +### Ports + +The config server RPC port can be changed by setting [VESPA\_CONFIGSERVER\_RPC\_PORT](/en/operations/self-managed/files-processes-and-ports#environment-variables) on all nodes in the system. + +Changing HTTP port requires changing the port in *`$VESPA_HOME/conf/configserver-app/services.xml`*: + +```xml + + + +``` + +When deploying, use the *\-p* option, if port is changed from the default. + +## Troubleshooting + +| Problem | Description | +| --- | --- | +| **Health checks** | Verify that a config server is up and running using [/state/v1/health](/en/reference/api/state-v1#state-v1-health), see [start sequence](#start-sequence). Status code is `up` if the server is up and has finished bootstrapping.

Alternatively, use [http://localhost:19071/status.html](http://localhost:19071/status.html) which will return response code 200 if server is up and has finished bootstrapping.

Metrics are found at [/state/v1/metrics](/en/reference/api/state-v1#state-v1-metrics). Use [vespa-model-inspect](/en/reference/operations/self-managed/tools#vespa-model-inspect) to find host and port number, port is 19071 by default. | +| **Consistency** | When having more than one config server, consistency between the servers is crucial. [http://localhost:19071/status](http://localhost:19071/status) can be used to check that settings for config servers are the same for all servers.

[vespa-config-status](/en/reference/operations/self-managed/tools#vespa-config-status) can be used to check config on nodes.

[http://localhost:19071/application/v2/tenant/default/application/default](http://localhost:19071/application/v2/tenant/default/application/default) displays active config generation and should be the same on all servers, and the same as in response from running [vespa deploy](/en/clients/vespa-cli#deployment) | +| **Bad Node** | If running with more than one config server and one of these goes down or has hardware failure, the cluster will still work and serve config as usual (clients will switch to use one of the good servers). It is not necessary to remove a bad server from the configuration.

Deploying applications will take longer, as [vespa deploy](/en/clients/vespa-cli#deployment) will not be able to complete a deployment on all servers when one of them is down. If this is troublesome, lower the [barrier timeout](#zookeeper-barrier-timeout) - (default value is 120 seconds).

Note also that if you have not configured [cluster controllers](/en/reference/applications/services/admin#cluster-controller) explicitly, these will run on the config server nodes and the operation of these might be affected. This is another reason for not trying to manually remove a bad node from the config server setup. | +| **Stuck filedistribution** | The config system distributes binary files (such as jar bundle files) using [file-distribution](/en/reference/applications/deployment#file-distribution) - use [vespa-status-filedistribution](/en/reference/operations/self-managed/tools#vespa-status-filedistribution) to see detailed status if it gets stuck. | +| **Memory** | Insufficient memory on the host / in the container running the config server will cause startup or deploy / configuration problems - see [Docker containers](/en/operations/self-managed/docker-containers). | +| **ZooKeeper** | The following can be caused by a full disk on the config server, or clocks out of sync:

`at com.yahoo.vespa.zookeeper.ZooKeeperRunner.startServer(ZooKeeperRunner.java:92)`
`Caused by: java.io.IOException: The accepted epoch, 10 is less than the current epoch, 48`

Users have reported that "Copying the currentEpoch to acceptedEpoch fixed the problem". | \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/container.mdx b/mintlify-docs/en/operations/self-managed/container.mdx new file mode 100644 index 0000000000..faf7d4aea3 --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/container.mdx @@ -0,0 +1,95 @@ +--- +title: "Container" +description: "This is the Container service operational guide." +--- + + +![Vespa Overview](/assets/img/vespa-overview.svg) + + +Note that "container" is an overloaded concept in Vespa - in this guide it refers to service instance nodes in blue. + +Refer to [container metrics](/en/operations/metrics#container-metrics). + +## Endpoints + +Container service(s) hosts the query and feed endpoints - examples: + +- [album-recommendation](https://github.com/vespa-engine/sample-apps/blob/master/album-recommendation/app/services.xml) configures \_both\_ query and feed in the same container cluster (i.e. service): + ```xml + + + + + + + + ``` +- [multinode-HA](https://github.com/vespa-engine/sample-apps/blob/master/examples/operations/multinode-HA/services.xml) configures query and feed in separate container clusters (i.e. services): + ```xml + + + + + + + + + + + + + + + + ``` + +Observe that `` and `` are located in separate clusters in the second example, and endpoints are therefore different. + + +**Important:** + +The first thing to validate when troubleshooting query errors is to make sure that the endpoint is correct, i.e. that query requests hit the correct nodes. A query will be written to the [access log](/en/operations/access-logging) on one of the nodes in the container cluster + + +## Inspecting Vespa Java Services using JConsole + +Determine the state of each running Java Vespa service using JConsole. JConsole is distributed along with the Java developer kit. Start JConsole: + +```bash +$ jconsole : +``` + +where the host and port determine which service to attach to. For security purposes the JConsole tool can not directly attach to Vespa services from external machines. + +### Connecting to a Vespa instance + +To attach a JConsole to a Vespa service running on another host, create a tunnel from the JConsole host to the Vespa service host. This can for example be done by setting up two SSH tunnels as follows: + +```bash +$ ssh -N -L:localhost: & +$ ssh -N -L:localhost: & +``` + +where port1 and port2 are determined by the type of service (see below). A JConsole can then be attached to the service as follows: + +```bash +$ jconsole localhost: +``` + +Port numbers: + +| Service | Port 1 | Port 2 | +| --- | --- | --- | +| QRS | 19015 | 19016 | +| Docproc | 19123 | 19124 | + +Updated port information can be found by running: + +`$` [`vespa-model-inspect`](/en/reference/operations/self-managed/tools#vespa-model-inspect) `service ` + +where the resulting RMIREGISTRY and JMX lines determine port1 and port2, respectively. + +### Examining thread states + +The state of each container is available in JConsole by pressing the Threads tab and selecting the thread of interest in the threads list. Threads of interest includes *search*, *connector*, *closer*, *transport* and *acceptor* (the latter four are used for backend communications). \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/content-node-recovery.mdx b/mintlify-docs/en/operations/self-managed/content-node-recovery.mdx new file mode 100644 index 0000000000..cad08f9f1d --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/content-node-recovery.mdx @@ -0,0 +1,35 @@ +--- +title: "Content node recovery" +--- + +In exceptional cases, one or more content nodes may end up with corrupted data causing it to fail to restart. Possible reasons are + +- the application configuring a higher memory or disk limit such that the node is allowed to accept more data than it can manage, +- hardware failure, or +- a bug in Vespa. + +Normally a corrupted node can just be wiped of all data or removed from the cluster, but when this happens simultaneously to multiple nodes, or redundancy 1 is used, it may be necessary to recover the node(s) to avoid data loss. This documents explains the procedure. + +## Recovery steps + +On each of the nodes needing recovery: + + + +[Stop services](/en/operations/self-managed/admin-procedures#vespa-start-%2F-stop-%2F-restart) on the node if running. + + +Repair the node: + - If the node cannot start due to needing more memory than available: Increase the memory available to the node, or if not possible stop all non-essential processes on the node using [`vespa-sentinel-cmd`](/en/reference/operations/self-managed/tools#vespa-sentinel-cmd) list and `vespa-sentinel-cmd stop [name]`, and (if necessary) start only the content node process using `vespa-sentinel-cmd start searchnode`. When the node is successfully started, issue delete operations or increase the cluster size to reduce the amount of data on the node if necessary. + - If the node cannot start due to needing more disk than available: Increase the disk available to the node, or if not possible delete non-essential data such as logs and cached packages. When the node is successfully started, issue delete operations or increase the cluster size to reduce the amount of data on the node if necessary. + - If the node cannot start for any other reason, repair the data manually as needed. This procedure will depend on the specific nature of the data corruption. + + +[Start services](/en/operations/self-managed/admin-procedures#vespa-start-%2F-stop-%2F-restart) on the node. + + +Verify that the node is fully up before doing the next node - metrics/interfaces to be used to evaluate if the next node can be stopped: + - Check if a node is up using [/state/v1/health](/en/reference/api/state-v1#state-v1-health). + - Check the `vds.idealstate.merge_bucket.pending.average` metric on content nodes. When 0, all buckets are in sync - see [example](/en/operations/metrics). + + \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/cpu-support.mdx b/mintlify-docs/en/operations/self-managed/cpu-support.mdx new file mode 100644 index 0000000000..cf99ef3d9c --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/cpu-support.mdx @@ -0,0 +1,25 @@ +--- +title: "CPU Support" +--- + +For maximum performance, the current version of Vespa for x86\_64 is compiled only for [Haswell (2013)](https://en.wikipedia.org/wiki/Haswell_\(microarchitecture\)) or later CPUs. If trying to run on an older CPU, you will likely see error messages like the following: + +```bash +Problem running program /opt/vespa/bin/vespa-runserver => died with signal: illegal instruction (you probably have an older CPU than required) +``` + +or in older versions of Vespa, something like + +```bash +/usr/local/bin/start-container.sh: line 67: 10 Illegal instruction /opt/vespa/bin/vespa-start-configserver +``` + +If you would like to run Vespa on an older CPU, we provide a [generic x86 container image](https://hub.docker.com/r/vespaengine/vespa-generic-intel-x86_64/). This image is slower, receives less testing than the regular image, and is less frequently updated. + +**To start a Vespa Docker container using this image:** + +```bash +$ docker run --detach --name vespa --hostname vespa-container \ + --publish 8080:8080 --publish 19071:19071 \ + vespaengine/vespa-generic-intel-x86_64 +``` \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/docker-containers.mdx b/mintlify-docs/en/operations/self-managed/docker-containers.mdx new file mode 100644 index 0000000000..0cb54ba470 --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/docker-containers.mdx @@ -0,0 +1,246 @@ +--- +title: "Docker containers" +--- + +This document describes tuning and adaptions for running Vespa Docker containers, for developer use on laptop, and in production. + +## Mounting persistent volumes + +The [quick start](/en/basics/deploy-an-application-local) and [AWS ECS multinode](/en/operations/self-managed/multinode-systems#aws-ecs) guides show how to run Vespa in Docker containers. In these examples, all the data is stored inside the container - the data is lost if the container is deleted. When running Vespa inside Docker containers in production, volume mappings to the parent host should be added to persist data and logs. + +- /opt/vespa/var +- /opt/vespa/logs + +```bash +$ mkdir -p /tmp/vespa/var; export VESPA_VAR_STORAGE=/tmp/vespa/var +$ mkdir -p /tmp/vespa/logs; export VESPA_LOG_STORAGE=/tmp/vespa/logs +$ docker run --detach --name vespa --hostname vespa-container \ + --volume $VESPA_VAR_STORAGE:/opt/vespa/var \ + --volume $VESPA_LOG_STORAGE:/opt/vespa/logs \ + --publish 8080:8080 \ + vespaengine/vespa +``` + +## Start Vespa container with Vespa user + +You can start the container directly as the *vespa* user. The *vespa* user and group within the container are configured with user id *1000* and group id *1000*. The vespa user and group must be the owner of the */opt/vespa/var* and */opt/vespa/logs* volumes that are mounted in the container for Vespa to start. This is required for Vespa to create the required directories and files within those directories. + +The start script will check that the correct owner uid and gid are set and fail if the wrong user or group is set as the owner. + +When using an isolated user namespace for the Vespa container, you must set the uid and gid of the directories on the host to the subordinate uid and gid, depending on your mapping. See the [Docker documentation](https://docs.docker.com/engine/security/userns-remap/) for more details. + +```bash +$ mkdir -p /tmp/vespa/var; export VESPA_VAR_STORAGE=/tmp/vespa/var +$ mkdir -p /tmp/vespa/logs; export VESPA_LOG_STORAGE=/tmp/vespa/logs +$ sudo chown -R 1000:1000 $VESPA_VAR_STORAGE $VESPA_LOG_STORAGE +$ docker run --detach --name vespa --user vespa:vespa --hostname vespa-container \ + --volume $VESPA_VAR_STORAGE:/opt/vespa/var \ + --volume $VESPA_LOG_STORAGE:/opt/vespa/logs \ + --publish 8080:8080 \ + vespaengine/vespa +``` + +## System limits + +When Vespa starts inside Docker containers, the startup scripts will set [system limits](/en/operations/self-managed/files-processes-and-ports#vespa-system-limits). Make sure that the environment starting the Docker engine is set up in such a way that these limits can be set inside the containers. + +For a CentOS/RHEL base host, Docker is usually started by [systemd](https://www.freedesktop.org/software/systemd/man/systemd.exec.html). In this case, `LimitNOFILE`, `LimitNPROC` and `LimitCORE` should be set to meet the minimum requirements in [system limits](/en/operations/self-managed/files-processes-and-ports#vespa-system-limits). + +In general, when using Docker or Podman to run Vespa, the `--ulimit` option should be used to set limits according to [system limits](/en/operations/self-managed/files-processes-and-ports#vespa-system-limits). The `--pids-limit` should be set to unlimited (`-1` for Docker and `0` for Podman). + +## Transparent Huge Pages + +Vespa performance improves significantly by enabling [Transparent Huge Pages (THP)](https://www.kernel.org/doc/html/latest/admin-guide/mm/transhuge.html), especially for memory-intensive applications with large dense tensors with concurrent query and write workloads. + +One application improved query p99 latency from 950 ms to 150 ms during concurrent query and write by enabling THP. Using THP is even more important when running in virtualized environments like AWS and GCP due to nested page tables. + +When running Vespa using the container image, *THP* settings must be set on the base host OS (Linux). The recommended settings are: + +```bash +$ echo 1 > /sys/kernel/mm/transparent_hugepage/khugepaged/defrag +$ echo always > /sys/kernel/mm/transparent_hugepage/enabled +$ echo never > /sys/kernel/mm/transparent_hugepage/defrag +``` + +To verify that the setting is active, one should see that *AnonHugePages* is non-zero, In this case, 75 GB has been allocated using AnonHugePages. + +```bash +$ cat /proc/meminfo |grep AnonHuge + + AnonHugePages: 75986944 kB +``` + +Note that the Vespa container needs to be restarted after modifying the base host OS settings to make the changes effective. Vespa uses `MADV_HUGEPAGE` for memory allocations done by the [content node process (proton)](/en/content/proton). + +## Controlling which services to start + +The Docker image *vespaengine/vespa*'s [start script](https://github.com/vespa-engine/docker-image/blob/master/include/start-container.sh) takes a parameter that controls which services are started inside the container. + +Starting a *configserver* container: + +```bash +$ docker run \ + --env VESPA_CONFIGSERVERS= \ + vespaengine/vespa configserver +``` + +Starting a *services* container (configserver will not be started): + +```bash +$ docker run \ + --env VESPA_CONFIGSERVERS= \ + vespaengine/vespa services +``` + +Starting a container with *both configserver and services*: + +```bash +$ docker run \ + --env VESPA_CONFIGSERVERS= \ + vespaengine/vespa configserver,services +``` + +This is required in the case where the configserver container should run other services like an adminserver or logserver (see [services.html](/en/reference/applications/services/services)) + +If the [VESPA\_CONFIGSERVERS](/en/operations/self-managed/files-processes-and-ports#environment-variables) environment variable is not specified, it will be set to the container hostname, also see [node setup](/en/operations/self-managed/node-setup#hostname). + +Use the [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) sample application as a blueprint for how to set up config servers and services. + +## Graceful stop + +Stopping a running *vespaengine/vespa* container triggers a graceful shutdown, which saves time when starting the container again (i.e., data structures are flushed). If the container is shut down forcefully, the content nodes might need to restore the state from the transaction log, which might be time-consuming. There is no chance of data loss or data corruption as the data is always written and synced to persistent storage. + +The default timeout for the Docker daemon to wait for the shutdown might be too low for larger number of documents per node. Below stop will wait at least 120 seconds before terminating the running container forcefully, if the stop is successfully performed before the timeout has passed, the command takes less than the timeout: + +```bash +$ docker stop name -t 120 +``` + +It is also possible to configure the default Docker daemon timeout, see [\--shutdown-timeout](https://docs.docker.com/reference/cli/dockerd/). + +A clean content node shutdown looks like: + +```bash +[2025-05-02 10:07:52.052] EVENT searchnode proton.node.server stopping/1 name="storagenode" why="Stopped" +[2025-05-02 10:07:52.056] EVENT searchnode proton stopping/1 name="servicelayer" why="clean shutdown" +[2025-05-02 10:07:52.056] INFO searchnode proton.proton.server.rtchooks shutting down monitoring interface +[2025-05-02 10:07:52.058] INFO searchnode proton.searchlib.docstore.logdatastore Flushing. Disk bloat is now at 0 of 8832 at 0.00 percent +[2025-05-02 10:07:52.059] INFO searchnode proton.searchlib.docstore.logdatastore Flushing. Disk bloat is now at 0 of 8832 at 0.00 percent +[2025-05-02 10:07:52.060] INFO searchnode proton.searchlib.docstore.logdatastore Flushing. Disk bloat is now at 0 of 8840 at 0.00 percent +[2025-05-02 10:07:52.066] INFO searchnode proton.transactionlog.server Stopping TLS +[2025-05-02 10:07:52.066] INFO searchnode proton.transactionlog.server TLS Stopped +[2025-05-02 10:07:52.071] EVENT searchnode proton stopping/1 name="proton" why="clean shutdown" +[2025-05-02 10:07:52.078] EVENT config-sentinel sentinel.sentinel.service stopped/1 name="searchnode" pid=354 exitcode=0 +``` + +## Memory + +The [sample applications](https://github.com/vespa-engine/sample-apps) and [local application deployment guide](/en/basics/deploy-an-application-local) indicates the minimum memory requirements for the Docker containers. + + +**Note:** + +Too little memory is a very common problem when testing Vespa in Docker containers. Use the below to troubleshoot before making a support request, and also see the [FAQ](/en/learn/faq). + + +As a rule of thumb, a single-node Vespa application requires a minimum of 4 GB for the Docker container. Using `docker stats` can be useful to track memory usage: + +```bash +$ docker stats + +CONTAINER ID NAME CPU % MEM USAGE / LIMIT MEM % NET I/O BLOCK I/O PIDS +589bf5801b22 node0 213.25% 697.3MiB / 3.84GiB 17.73% 14.2kB / 11.5kB 617MB / 976MB 253 +e108dde84679 node1 213.52% 492.7MiB / 3.84GiB 12.53% 15.7kB / 12.7kB 74.3MB / 924MB 252 +be43aacd0bbb node2 191.22% 497.8MiB / 3.84GiB 12.66% 19.6kB / 21.6kB 64MB / 949MB 261 +``` + +It is not necessarily easy to verify that Vespa has started all services successfully. Symptoms of errors due to insufficient memory vary, depending on where it fails. Example: Inspect restart logs in a container named *vespa*, running the [quickstart](/en/basics/deploy-an-application-local) with only 2G: + +```bash +$ docker exec -it vespa sh -c "/opt/vespa/bin/vespa-logfmt -S config-sentinel -c sentinel.sentinel.service" + +INFO : config-sentinel sentinel.sentinel.service container: incremented restart penalty to 2.000 seconds +INFO : config-sentinel sentinel.sentinel.service container: incremented restart penalty to 6.000 seconds +INFO : config-sentinel sentinel.sentinel.service container: incremented restart penalty to 14.000 seconds +INFO : config-sentinel sentinel.sentinel.service container: incremented restart penalty to 30.000 seconds +INFO : config-sentinel sentinel.sentinel.service container: will delay start by 25.173 seconds +INFO : config-sentinel sentinel.sentinel.service container: incremented restart penalty to 62.000 seconds +INFO : config-sentinel sentinel.sentinel.service container: incremented restart penalty to 126.000 seconds +INFO : config-sentinel sentinel.sentinel.service container: will delay start by 119.515 seconds +INFO : config-sentinel sentinel.sentinel.service container: incremented restart penalty to 254.000 seconds +INFO : config-sentinel sentinel.sentinel.service container: incremented restart penalty to 510.000 seconds +INFO : config-sentinel sentinel.sentinel.service container: will delay start by 501.026 seconds +INFO : config-sentinel sentinel.sentinel.service container: incremented restart penalty to 1022.000 seconds +INFO : config-sentinel sentinel.sentinel.service container: incremented restart penalty to 1800.000 seconds +INFO : config-sentinel sentinel.sentinel.service container: will delay start by 1793.142 seconds +``` + +Observe that the *container* service restarts in a loop, with increasing pause. + +A common problem is [config servers](/en/operations/self-managed/configuration-server) not starting or running properly due to a lack of memory. This manifests itself as nothing listening on 19071, or deployment failures. + +Some guides/sample applications have specific configurations to minimize resource usage. Example from [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA): + +```bash +$ docker run --detach --name node0 --hostname node0.vespanet \ + -e VESPA_CONFIGSERVERS=node0.vespanet,node1.vespanet,node2.vespanet \ + -e VESPA_CONFIGSERVER_JVMARGS="-Xms32M -Xmx128M" \ + -e VESPA_CONFIGPROXY_JVMARGS="-Xms32M -Xmx32M" \ + --network vespanet \ + --publish 19071:19071 --publish 19100:19100 --publish 19050:19050 --publish 20092:19092 \ + vespaengine/vespa +``` + +Here [VESPA\_CONFIGSERVER\_JVMARGS](/en/operations/self-managed/files-processes-and-ports#environment-variables) and [VESPA\_CONFIGPROXY\_JVMARGS](/en/operations/self-managed/files-processes-and-ports#environment-variables) are tweaked to the minimum for a functional test only. + + +**Important:** + +For production use, do not reduce memory settings in `VESPA_CONFIGSERVER_JVMARGS` and `VESPA_CONFIGPROXY_JVMARGS` unless you know what you are doing - the Vespa defaults are set for regular production use, and rarely need changing. + + +Container memory setting are done in *services.xml*, example from [multinode-HA](https://github.com/vespa-engine/sample-apps/blob/master/examples/operations/multinode-HA/services.xml): + +```xml + + + + + +``` + +Make sure that the settings match the Docker container Vespa is running in. + +Also see [node memory settings](/en/operations/self-managed/node-setup#memory-settings) for more settings. + +## Network + +Vespa processes communicate over both fixed and ephemeral ports - in general, all ports must be accessible. See [example ephemeral use](/en/writing/visiting#handshake-failed). + +Find an example application using a Docker network in [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA). + +## Resource usage + +Note that CPU usage will not be zero even if there are zero documents and zero queries. Starting the *vespaengine/vespa* container image means starting the [configuration server](/en/operations/self-managed/configuration-server) and the [configuration sentinel](/en/operations/self-managed/config-sentinel). When deploying an application, the sentinel starts the configured service processes, and they all listen to work to do, changes in the config, and so forth. + +Therefore, an "idle" container instance consumes CPU and memory. + +## Troubleshooting + +The Vespa documentation examples use `docker`. The Vespa Team has good experience with using `podman`, too, in the examples just change from `docker` to `podman`. We recommend using Podman v5, see the [release notes](https://github.com/containers/podman/blob/main/RELEASE_NOTES.md). [emulating-docker-cli-with-podman](https://podman-desktop.io/docs/migrating-from-docker/emulating-docker-cli-with-podman) is a useful resource. + +Many startup failures are caused by a failed Vespa Container start due to configuration or download errors. Use `docker logs vespa` to show the log (this example assumes a Docker container named `vespa`, use `docker ps` to list containers). + +### Docker image + +Make sure to use a recent Vespa release (check [releases](https://factory.vespa.ai/releases)) and validate the downloaded image: + +```bash +$ docker images +REPOSITORY TAG IMAGE ID CREATED SIZE +docker.io/vespaengine/vespa latest 8cfb0da22c01 35 hours ago 1.2 GB +``` + +### Model download failures + +If the application package depends on downloaded models, look for `RuntimeException: Not able to create config builder for payload` - [details](/en/applications/components#component-load). \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/files-processes-and-ports.mdx b/mintlify-docs/en/operations/self-managed/files-processes-and-ports.mdx new file mode 100644 index 0000000000..80aa3f946d --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/files-processes-and-ports.mdx @@ -0,0 +1,87 @@ +--- +title: "Files, Processes, Ports, Environment" +--- + +This is a reference of directories used in a Vespa installation, processes that run on the Vespa nodes and ports / environment variables used. Also see [log files](/en/reference/operations/log-files). + +## Directories + +| Directory | Description | +| --- | --- | +| `$VESPA_HOME/bin/` | Command line utilities and scripts | +| `$VESPA_HOME/libexec/vespa/` | Command line utilities and scripts | +| `$VESPA_HOME/sbin/` | Server programs, daemons, etc | +| `$VESPA_HOME/lib64/` | Dynamically linked libraries, typically third-party libraries | +| `$VESPA_HOME/lib/jars/` | Java archives | +| `$VESPA_HOME/logs/vespa/` | Log files | +| `$VESPA_HOME/var/db/vespa/config_server/serverdb/` | Config server database and user applications | +| `$VESPA_HOME/share/vespa/` | A directory with config definitions and XML schemas for application package validation | +| `$VESPA_HOME/conf/vespa` | Various config files used by Vespa or libraries Vespa depend on | + +## Processes and ports + +The following is an overview of which ports and port ranges are used by the different services in a Vespa system. Note that for services capable of running multiple instances on the same node, all instances will run within the listed port range. + +Processes are run as user `vespa`. + +Many services are allocated ports dynamically. So even though the allocation is deterministic, i.e. the same system will get the same ports on subsequent startups, a particular service instance may get different ports when the overall system setup is changed through [services.xml](/en/reference/applications/services/services). Use [vespa-model-inspect](/en/reference/operations/self-managed/tools#vespa-model-inspect) to see port allocations. + +- The number of ports used in a range depends on number of instances that are running +- Not all ports within a range are used, but they are assigned each service to support future extensions +- The range from 19100 is used for internal communication ports, i.e. ports that are not necessary to use from an external API +- See [Configuring Http Servers and Filters](/en/applications/http-servers-and-filters) for how to configure Container ports and [services.xml](/en/reference/applications/services/services) for how to configure other ports + +| Process | Host | Port/range | ps | Function | +| --- | --- | --- | --- | --- | +| [Config server](/en/operations/self-managed/configuration-server) | Config server nodes | 19070-19071 | `java (...) -jar $VESPA_HOME/lib/jars/standalone-container-jar-with-dependencies.jar` | Vespa Configuration server | +| | | 2181-2183 | | Embedded Zookeeper cluster ports, see [zookeeper-server.def](https://github.com/vespa-engine/vespa/blob/master/configdefinitions/src/vespa/zookeeper-server.def) | +| [Config sentinel](/en/operations/self-managed/config-sentinel) | All nodes | 19098 | `$VESPA_HOME/sbin/vespa-config-sentinel` | Sentinel that starts and stops vespa services and makes sure they are running unless they are manually stopped | +| [Config proxy](/en/operations/self-managed/config-proxy) | All nodes | 19090 | `java (…) com.yahoo.vespa.config.proxy.ProxyServer` | Communication liaison between Vespa processes and config server. Caches config in memory | +| [Slobrok](/en/operations/self-managed/slobrok) | Admin nodes | 19099 for RPC port, HTTP port dynamically allocated in the 19100-19899 range | `$VESPA_HOME/sbin/vespa-slobrok` | Service location object broker | +| [logd](/en/reference/operations/log-files#logd) | All nodes | 19089 | `$VESPA_HOME/sbin/vespa-logd` | Reads local log files and sends them to log server | +| [Log server](/en/reference/operations/log-files#log-server) | Log server node | 19080 | `java (...) -jar lib/jars/logserver-jar-with-dependencies.jar` | Vespa Log server | +| [Metrics proxy](/en/operations/self-managed/monitoring#metrics-proxy) | All nodes | 19092-19095 | `java (...) -jar $VESPA_HOME/lib/jars/container-disc-with-dependencies.jar` | Provides a single access point for metrics from all services on a Vespa node | +| [Distributor](/en/content/content-nodes#distributor) | Content cluster | dynamically allocated in the 19100-19899 range | `$VESPA_HOME/sbin/vespa-distributord-bin` | Content layer distributor processes | +| [Cluster controller](/en/content/content-nodes#cluster-controller) | Content cluster | 19050, plus ports dynamically allocated in the 19100-19899 range | `java (...) -jar $VESPA_HOME/lib/jars/container-disc-jar-with-dependencies.jar` | Cluster controller processes, manages state for content nodes | +| [proton](/en/content/proton) | Content cluster | dynamically allocated in the 19100-19899 range | `$VESPA_HOME/sbin/vespa-proton-bin` | Searchnode process, receives queries from the container and returns results from the indexes. Also receives feed and indexes documents | +| [container](/en/applications/containers) | Container cluster | 8080 | `java (...) -jar $VESPA_HOME/lib/jars/container-disc-with-dependencies.jar` | Container running servers, handlers and processing components | + +## System limits + +The [startup scripts](/en/operations/self-managed/admin-procedures#vespa-start-stop-restart) checks that system limits are set, failing startup if not. Refer to [vespa-configserver.service](https://github.com/vespa-engine/vespa/blob/master/vespabase/src/vespa-configserver.service.in) and [vespa.service](https://github.com/vespa-engine/vespa/blob/master/vespabase/src/vespa.service.in) for minimum values. + +## Core dumps + +Example settings: + +```bash +$ mkdir -p /tmp/cores && chmod a+rwx /tmp/cores +$ echo "/tmp/cores/core.%e.%p.%h.%t" > /proc/sys/kernel/core_pattern +``` + +This will write files like */tmp/cores/core.vespa-proton-bi.1721.localhost.1580387387*. + +## Environment variables + +Vespa configuration is set in [application packages](/en/basics/applications). Some configuration is used to bootstrap nodes - this is set in environment variables. Environment variables are only read at startup. + +*`$VESPA_HOME/conf/vespa/default-env.txt`* is read in Vespa start scripts - use this to modify variables ([example](/en/operations/self-managed/multinode-systems#aws-ec2)). Each line has the format `action variablename value` where the items are: + +| Item | Description | +| --- | --- | +| action | One of `fallback`, `override`, or `unset`. `fallback` sets the variable if it is unset (or empty). `override` set the value regardless. `unset` unsets the variable. | +| variablename | The name of the variable, e.g. `VESPA_CONFIGSERVERS` | +| value | The rest of the line is the variable's value. | + +Refer to the [template](https://github.com/vespa-engine/vespa/blob/master/vespabase/conf/default-env.txt.in) for format. + +| Environment variable | Description | +| --- | --- | +| VESPA_CONFIGSERVERS | A comma-separated list of hosts to run configservers, use fully qualified hostnames. Should always be set to the same value on all hosts in a multi-host setup. If not set, `localhost` is assumed. Refer to [configuration server operations](/en/operations/self-managed/configuration-server). | +| VESPA_HOSTNAME | Vespa uses `hostname` for node identity. But sometimes this doesn't work properly, either because that name can't be used to find an IP address which works for connecting to services running on the node, or it's just that the name doesn't agree with what the config server thinks the node's host name is. In this case, override by setting the `VESPA_HOSTNAME`, to be used instead of running the `hostname` command.

Note that `VESPA_HOSTNAME` will be used *both* when a node identifies itself to the config server *and* when a service on that node registers a network connection point that other services can connect to.

An error message with "hostname detection failed" is emitted if the `VESPA_HOSTNAME` isn't set and the hostname isn't usable. If `VESPA_HOSTNAME` is set to something that cannot work, an error with "hostname validation failed" is emitted instead. | +| VESPA_CONFIG_SOURCES | Used by libraries like the [Document API](/en/writing/document-api-guide) to set config server endpoints. Refer to [configuration server operations](/en/operations/self-managed/configuration-server#configuration) for example use. | +| VESPA_WEB_SERVICE_PORT | The port number where REST apis will run, default `8080`. This isn't strictly needed, as the port number can be set for each HTTP server in `services.xml`, but with a big application it can be easier to set the default port number just once. Also note that this needs to be set when starting the *configserver*, since the REST api implementation gets its port number from there. | +| VESPA_TLS_CONFIG_FILE | Absolute path to [TLS configuration file](/en/security/mtls). | +| VESPA_CONFIGSERVER_JVMARGS | JVM arguments for the config server - see [tuning](/en/performance/container-tuning#config-server-and-config-proxy). | +| VESPA_CONFIGPROXY_JVMARGS | JVM arguments for the config proxy - see [tuning](/en/performance/container-tuning#config-server-and-config-proxy). | +| VESPA_LOG_LEVEL | Tuning of log output from tools, see [controlling log levels](/en/reference/operations/log-files#controlling-log-levels) | \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/live-upgrade.mdx b/mintlify-docs/en/operations/self-managed/live-upgrade.mdx new file mode 100644 index 0000000000..ddcd3f2de3 --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/live-upgrade.mdx @@ -0,0 +1,30 @@ +--- +title: "Live-upgrading Vespa" +sidebarTitle: "Live Vespa upgrade procedure" +--- + +This document describes how to live upgrade a Vespa instance. Use this procedure to upgrade without disruption to read or write traffic. + +1. **Before upgrading** + - If upgrading to a **new major version**: Upgrade to the latest version on the current major first, then read the release notes for the new major before progressing. + - If upgrading to a new minor version [you should upgrade to a version that is less than 2 months older than the one you are running](/en/learn/releases#versions) When upgrading a config server this is verified by checking that the minor version number bump is less than 30, otherwise the config server refuses to start. This behavior can be overridden at your own risk by setting environment variable *VESPA\_SKIP\_UPGRADE\_CHECK=true* on config servers before upgrading. + - Redundancy: For availability, there must be sufficient capacity to take one node per cluster out of service at the time. If the clusters have redundancy=1, or searchable-copies=1, some data will not be available during the upgrade (reduced coverage). + - To reduce node downtime, download the new Vespa version to all hosts in advance. +2. **Detach the application nodes** Not necessary in Vespa 8, for upgrading between Vespa 7 versions see [Vespa 8 release notes](/en/reference/release-notes/vespa8#upgrade-procedure). +3. **Upgrade config servers** + - Install the new Vespa version on the config servers and [restart](/en/operations/self-managed/admin-procedures#vespa-start-stop-restart) them one by one. Wait until it is up again, look in vespa log for "Changing health status code from 'initializing' to 'up'" or use [health checks](/en/operations/self-managed/configuration-server#troubleshooting). + - Redeploy and activate the application: + + `$` [`vespa`](/en/clients/vespa-cli#deployment) `prepare && vespa activate` + - The other nodes in the system will not receive config until they are upgraded to the new version (there will be warnings in vespa log containing "Request callback failed: UNKNOWN\_VESPA\_VERSION" until the node is upgraded). This is to make sure that no new, possibly incompatible, config is served. +4. **Upgrade all other nodes one by one** - for each of the other nodes in the system: + - [Stop services](/en/operations/self-managed/admin-procedures#vespa-start-stop-restart) on the node. + - Install the new Vespa version. + - [Start services](/en/operations/self-managed/admin-procedures#vespa-start-stop-restart) on the node. + - Wait until the node is fully up before doing the next node - metrics/interfaces to be used to evaluate if the next node can be stopped: + - Check if a node is up using [/state/v1/health](/en/reference/api/state-v1#state-v1-health). + - Check the `vds.idealstate.merge_bucket.pending.average` metric on content nodes. When 0, all buckets are in sync - see [example](/en/operations/metrics). + +### Troubleshooting + +See [config server troubleshooting](/en/operations/self-managed/configuration-server#troubleshooting). \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/monitoring.mdx b/mintlify-docs/en/operations/self-managed/monitoring.mdx new file mode 100644 index 0000000000..a29800c636 --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/monitoring.mdx @@ -0,0 +1,392 @@ +--- +title: "Monitoring" +description: "Vespa provides metrics integration with CloudWatch, Datadog and Prometheus / Grafana, as well as a JSON HTTP API." +--- + +There are two main approaches to transfer metrics to an external system: + +- Have the external system *pull* metrics from Vespa +- Make Vespa *push* metrics to the external system + +Use the example overview of two nodes running Vespa for where the APIs are set up and how they interact: + + +![Metrics interfaces](/assets/img/metrics-api.svg) + + +- [/metrics/v1/values](#metrics-v1-values) is the node metrics api, and aggregates metrics for processes running on the node. +- [/state/v1/metrics](#state-v1-metrics) is the process metrics api, and exposes all metrics from an individual service - here each node runs a container and a content node. +- [/metrics/v2/values](#metrics-v2-values) is an aggregation of [/metrics/v1/values](#metrics-v1-values), for all nodes. Served on the metrics-proxy port. +- [/prometheus/v1/values](/en/reference/api/prometheus-v1#prometheus-v1-values) is the same as [/metrics/v1/values](#metrics-v1-values), in prometheus format. Served on the metrics-proxy port. +- [/prometheus/v1/values](/en/reference/api/prometheus-v1#prometheus-v1-values) and [/metrics/v2/values](#metrics-v2-values) are also replicated on the container port, default 8080. + + +**Note:** + +refer to the [multinode](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode) and [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) sample applications for a practical example of using the APIs. These apps also include examples for how to find ports used by using [vespa-model-inspect](/en/reference/operations/self-managed/tools#vespa-model-inspect). + + +See the [metrics guide](/en/operations/metrics) for how to get a metric using `/metrics/v1/values` and `/prometheus/v1/values`. This guide also documents use of custom metrics and histograms. + +## Metrics proxy + +Each Vespa node has a *metrics-proxy* process running for this API, default port 19092. It aggregates metrics from all processes on the node, and across nodes: + +The metrics-proxy normally listens on port 19092 - use [vespa-model-inspect](/en/reference/operations/self-managed/tools#vespa-model-inspect) to validate. + +See the [metrics guide](/en/operations/metrics) for the metrics interfaces hosted by the metrics proxy. + +Metric-proxies intercommunicate to build a metric cache served on the internal *applicationmetrics/v1/* API. This is replicated on the container on */metrics/v2/values* for easy access to all metrics for an application. + +The metrics-proxy is started by the [config-sentinel](/en/operations/self-managed/config-sentinel) and is not configurable. The metrics-proxy process looks like: + +```bash +$ ps ax | grep admin/metrics/vespa-container + + 703 ? Sl 0:10 /usr/bin/java + -Dconfig.id=admin/metrics/vespa-container + ... + -cp /opt/vespa/lib/jars/jdisc_core-jar-with-dependencies.jar + com.yahoo.jdisc.core.StandaloneMain + file:/opt/vespa/lib/jars/container-disc-jar-with-dependencies.jar +``` + +## /state/v1/health + +*Per-process* health status is found at `http://host:port/state/v1/health` + + +![Health API](/assets/img/health-api.svg) + + +`/state/v1/health` is most commonly used for heartbeating, see the [reference](/en/reference/api/state-v1#state-v1-health) for details. Example: + +```json +{ + "status": { + "code": "up", + "message": "Everything ok here" + } +} +``` + +## /state/v1/metrics + +*Per-process* metrics are found at `http://host:port/state/v1/metrics` + +Internally, Vespa aggregates metrics in the APIs above from the *per-process* metrics and health APIs. While most users would use the aggregated APIs, the per-process metric APIs could be used for specific cases. + +Metrics are reported in snapshots, where the snapshot specifies the time window the metrics are gathered from. Typically, the service will aggregate metrics as they are reported, and after each snapshot period, a snapshot is taken of the current values, and they are reset. Using this approach, min and max values are tracked, and enables values like 95% percentile for each complete snapshot period. + +Refer to the [reference](/en/reference/api/state-v1#state-v1-metrics) for details. + +Vespa supports [custom metrics](/en/operations/metrics#metrics-from-custom-components). + +Example: + +```json expandable +{ + "status" : { + "code" : "up", + "message" : "Everything ok here" + }, + "metrics" : { + "snapshot" : { + "from" : 1334134640.089, + "to" : 1334134700.088, + }, + "values" : [ + { + "name" : "queries", + "description" : "Number of queries executed during snapshot interval", + "values" : { + "count" : 28, + "rate" : 0.4667 + }, + "dimensions" : { + "chain" : "vespa" + } + }, + { + "name" : "hits_per_query", + "description" : "Number of hits returned for queries during snapshot interval", + "values" : { + "count" : 28, + "rate" : 0.4667, + "average" : 128.3, + "min" : 0, + "max" : 1000, + "sum" : 3584, + "last" : 72, + "95percentile" : 849.1, + "99percentile": 672.0, + }, + "dimensions" : { + "chain" : "vespa" + } + } + ] + } +} +``` + +A flat list of metrics is returned. Each metric value reported by a component should be a separate metric. For related metrics, prefix metric names with common parts and dot separate the names - e.g. `memory.free` and `memory.virtual`. + +### /metrics/v1/values + +This API can be used for monitoring, using products like [Prometheus](#pulling-into-prometheus) and [DataDog](#pulling-into-datadog). The response contains a selected set of metrics from each service running on the node, see the [reference](/en/reference/api/metrics-v1#metrics-v1-values) for details. Example: + +```bash +$ curl http://localhost:19092/metrics/v1/values +``` + +```json expandable +{ + "services": [ + { + "name": "vespa.container", + "timestamp": 1661945852, + "status": { + "code": "up", + "description": "Data collected successfully" + }, + "metrics": [ + { + "values": { + "memory_virt": 3693178880, + "memory_rss": 1331331072, + "cpu": 2.3794255627932, + "cpu_util": 0.2379425562793 + }, + "dimensions": { + "metrictype": "system", + "instance": "container", + "clustername": "default", + "vespaVersion": "8.43.64" + } + } + ] + } + ] +} +``` + +### /metrics/v2/values + +```bash +$ curl http://localhost:19092/metrics/v2/values +``` + +A container service on the same node as the metrics proxy might forward `/metrics/v2/values` on its own port, normally 8080. + +`/metrics/v2/values` exposes a selected set of metrics for every service on all nodes for the application. For example, it can be used to [pull Vespa metrics to Cloudwatch](https://github.com/vespa-engine/metrics-emitter/tree/master/cloudwatch) using an AWS lambda function. + +The [metrics API](#metrics-v2-values) exposes a [selected set of metrics](https://github.com/DataDog/integrations-extras/blob/master/vespa/metadata.csv) for the whole application, or for a single node, to allow integration with graphing and alerting services. + +The response is a `nodes` list with metrics (see example output below), see the [reference](/en/reference/api/metrics-v2#metrics-v2-values) for details. + +```json expandable +{ + "nodes": [ + { + "hostname": "vespa-container", + "role": "hosts/vespa-container", + "services": [ + { + "name": "vespa.container", + "timestamp": 1634127924, + "status": { + "code": "up", + "description": "Data collected successfully" + }, + "metrics": [ + { + "values": { + "memory_virt": 3685253120, + "memory_rss": 1441259520, + "cpu": 29.1900152827305 + }, + "dimensions": { + "serviceId": "container" + } + }, + { + "values": { + "jdisc.gc.ms.average": 0 + }, + "dimensions": { + "gcName": "G1OldGeneration", + "serviceId": "container" + } + }, +``` + +### /prometheus/v1/values + +Vespa provides a *node metrics API* on each *node* at `http://host:port/prometheus/v1/values` + +Port and content is the same as */metrics/v1/values*. + +The prometheus API on each node exposes metrics in a text based [format](https://prometheus.io/docs/instrumenting/exposition_formats/) that can be scraped by [Prometheus](https://prometheus.io/docs/introduction/overview/). See below for a Prometheus / Grafana example. + +## Pulling metrics from Vespa + +All pull-based solutions use Vespa's [metrics API](#metrics-v2-values), which provides metrics in JSON format, either for the full system or for a single node. The polling frequency should be limited to max once every 30 seconds as more frequent polling would not give increased granularity but only lead to unnecessary load on your systems. + +| Service | Description | +| --- | --- | +| CloudWatch | Metrics can be pulled into CloudWatch from both [Vespa Cloud](/) and self-hosted Vespa. The recommended solution is to use an AWS lambda function, as described in [Pulling Vespa metrics to Cloudwatch](https://github.com/vespa-engine/metrics-emitter/tree/master/cloudwatch). | +| Datadog | The Vespa team has created a Datadog Agent integration to allow real-time monitoring of Vespa in Datadog. The [Datadog Vespa](https://docs.datadoghq.com/integrations/vespa/) integration is not packaged with the agent, but is included in Datadog's [integrations-extras](https://github.com/DataDog/integrations-extras) repository. Clone it and follow the steps in the [README](https://github.com/DataDog/integrations-extras/blob/master/vespa/README.md).

**Note:**

The Datadog Agent integration currently works for self-hosted Vespa only.
| +| Prometheus | Vespa exposes metrics in a text based [format](https://prometheus.io/docs/instrumenting/exposition_formats/) that can be scraped by [Prometheus](https://prometheus.io/docs/introduction/overview/). For [Vespa Cloud](/), append */prometheus/v1/values* to your endpoint URL. For self-hosted Vespa the URL is: *http://`:`/prometheus/v1/values*, where the *port* is the same as for searching, e.g. 8080. Metrics for each individual host can also be retrieved at `http://host:19092/prometheus/v1/values`.

See the below for a Prometheus / Grafana example. | + +## Pushing metrics to CloudWatch + + +**Note:** + +This method currently works for self-hosted Vespa only. + + +This is presumably the most convenient way to monitor Vespa in CloudWatch. Steps / requirements: + + + +An IAM user or IAM role that only has the *putMetricData* permission. + + +Store the credentials for the above user or role in a [shared credentials file](https://docs.aws.amazon.com/ses/latest/dg/create-shared-credentials-file.html) on each Vespa node. If a role is used, provide a mechanism to keep the credentials file updated when keys are rotated. + + +Configure Vespa to push metrics to CloudWatch - example configuration for the [admin](/en/reference/applications/services/admin) section in *services.xml*: + +```xml + + + + + + + + +``` + +This configuration sends the default set of Vespa metrics to the CloudWatch namespace `my-vespa-metrics` in the `us-east-1` region. Refer to the [metric list](https://github.com/DataDog/integrations-extras/blob/master/vespa/metadata.csv) for `default` metric set. + + + +## Monitoring with Grafana + +Follow these steps to set up monitoring with Grafana for a Vespa instance. This guide builds on the [quick start](/en/basics/deploy-an-application-local) by adding three more Docker containers and connecting these in the Docker *monitoring* network: + + +![Docker containers in a Docker network](/assets/img/monitoring-getting-started.svg) + + + + +Complete steps [1-7](/en/basics/deploy-an-application-local) (or 1-10), but skip the removal step. Clone repository: + +```bash +$ git clone --depth 1 https://github.com/vespa-engine/sample-apps.git && \ + cd sample-apps/examples/operations/monitoring/album-recommendation-monitoring +``` + + + +```bash +$ docker network create --driver bridge monitoring && \ + docker network connect monitoring vespa +``` + +This creates the *monitoring* network and attaches the vespa container to it. Find details in [docker-compose.yml](https://github.com/vespa-engine/sample-apps/blob/master/examples/operations/monitoring/album-recommendation-monitoring/docker-compose.yml). + + +```bash +$ docker run --detach --name sample-apps-prometheus --hostname prometheus \ + --network monitoring \ + --publish 9090:9090 \ + --volume `pwd`/prometheus/prometheus-selfhosted.yml:/etc/prometheus/prometheus.yml \ + prom/prometheus +``` + +[Prometheus](https://prometheus.io/) is a time-series database, which holds a series of values associated with a timestamp. Open Prometheus at [http://localhost:9090/](http://localhost:9090/). One can easily find what data Prometheus has, the input box auto-completes, e.g. enter *feed\_operations\_rate* and click *Execute*. Also explore the *Status* dropdown. + + +```bash +$ docker run --detach --name sample-apps-grafana \ + --network monitoring \ + --publish 3000:3000 \ + --volume `pwd`/grafana/provisioning:/etc/grafana/provisioning \ + grafana/grafana +``` + +This launches [Grafana](https://grafana.com/oss/grafana/). Grafana is a visualisation tool that can be used to easily make representations of important metrics surrounding Vespa. Open [http://localhost:3000/](http://localhost:3000/) and find the Grafana login screen - log in with admin/admin (skip changing password). From the list on the left, click *Browse* under *Dashboards* (the symbol with 4 blocks), then click the *Vespa Detailed Monitoring Dashboard*. The dashboard displays detailed Vespa metrics - empty for now. + + + +```bash +$ docker build album-recommendation-random-data --tag random-data-feeder +``` + +This builds the [Random Data Feeder](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/monitoring/album-recommendation-monitoring/album-recommendation-random-data) - it generates random sets of data and puts them into the Vespa instance. Also, it repeatedly runs queries, for Grafana visualisation. Compiling the Random Data Feeder takes a few minutes. + + +```bash +$ docker run --detach --name sample-apps-random-data-feeder \ + --network monitoring \ + random-data-feeder +``` + + +Graphs will now show up in Grafana and Prometheus - it might take a minute or two. The Grafana dashboard is fully customisable. Change the default modes of Grafana and Prometheus by editing the configuration files in [album-recommendation-monitoring](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/monitoring/album-recommendation-monitoring). + + + +```bash +$ docker rm -f vespa \ + sample-apps-grafana \ + sample-apps-prometheus \ + sample-apps-random-data-feeder +``` + +```bash +$ docker network rm monitoring || true +``` + + + +## Histograms + +Metric histograms is supported for [Gauge](https://javadoc.io/doc/com.yahoo.vespa/container-disc/latest/com/yahoo/metrics/simple/Gauge.html) metrics. Create the metric like in [album-recommendation-java](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation-java), adding the histogram: + +```js +public HitCountSearcher(MetricReceiver receiver) { + this.hitCountMetric = receiver.declareGauge(EXAMPLE_METRIC_NAME, Optional.empty(), + new MetricSettings.Builder().histogram(true).build()); +} +``` + +The histograms for the last five minutes of logged data are available as CSV per dimension at [/state/v1/metrics/histograms](/en/reference/api/state-v1#state-v1-metrics-histograms). Example output: + +```bash +# start of metric hits_per_query, dimensions: { "chain": "metalchain" } +"Value","Percentile","TotalCount","1/(1-Percentile)" +1.00,0.000000000000,1,1.00 +1.00,1.000000000000,1,Infinity +# end of metric hits_per_query, dimensions: { "chain": "metalchain" } +# start of metric example_hitcounts, dimensions: { "query_language": "en" } +"Value","Percentile","TotalCount","1/(1-Percentile)" +1.00,0.000000000000,1,1.00 +1.00,1.000000000000,1,Infinity +# end of metric example_hitcounts, dimensions: { "query_language": "en" } +# start of metric query_latency, dimensions: { "chain": "metalchain" } +"Value","Percentile","TotalCount","1/(1-Percentile)" +5.69,0.000000000000,1,1.00 +5.69,1.000000000000,1,Infinity +# end of metric query_latency, dimensions: { "chain": "metalchain" } +# start of metric totalhits_per_query, dimensions: { "chain": "metalchain" } +"Value","Percentile","TotalCount","1/(1-Percentile)" +1.00,0.000000000000,1,1.00 +1.00,1.000000000000,1,Infinity +# end of metric totalhits_per_query, dimensions: { "chain": "metalchain" } +``` \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/multinode-systems.mdx b/mintlify-docs/en/operations/self-managed/multinode-systems.mdx new file mode 100644 index 0000000000..2fb2d3e35c --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/multinode-systems.mdx @@ -0,0 +1,535 @@ +--- +title: "Multinode systems" +--- + +A Vespa *system* consists of one or more stateless and stateful clusters configured by an application package. A Vespa system is configured and managed through an admin cluster as shown below. + + +![Vespa Overview](/assets/img/vespa-overview.svg) + + +All nodes of a Vespa system have the same software installed. Which processes are started on each node and how they are configured is determined by the admin cluster from the specification given in [services.xml](/en/reference/applications/services/services) in the application package. + +## Creating a multinode system from a sample application + +To create a fully functional production ready multinode system from a single-node sample application, follow these steps (also see [next steps](#next-steps)): + + + +Add an [admin cluster](/en/reference/applications/services/admin) in services.xml: + ```xml + + + + + + + + + + + + + + + + + + + + ``` + + +Install the Vespa packages or the *vespaengine/vespa* Docker image on all the nodes. + + +Run + +```bash +$ echo "override VESPA_CONFIGSERVERS [configserver-hostnames]" >> $VESPA_HOME/conf/vespa/default-env.txt +``` +where [`configserver-hostnames`] is replaced by the full hostname of the config server (or a comma-separated list if multiple). + + +Add these nodes to the container and content clusters by adding more `node` tags in *services.xml*. + + +Add the same nodes to *hosts.xml*. + + +Start Vespa on the nodes + + + +See below for AWS examples. Refer to [configuration server operations](/en/operations/self-managed/configuration-server) for troubleshooting. + +## AWS EC2 + +The following is a procedure to set up a multinode application on *AWS EC2* instances. Please run the procedure in [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) first, to get familiar with the different Vespa concepts before running the AWS procedure below. This procedure will use the name number of hosts, 10, and set up the same application. + + +**Important:** + +Note the use of `sudo`. The Vespa start scripts will modify the environment (directories, system limits), requiring root access - refer to [vespa-start-configserver](/en/reference/operations/self-managed/tools#vespa-start-configserver) and [vespa-start-services](/en/reference/operations/self-managed/tools#vespa-start-services). After the environment setup, Vespa is run as the `vespa` user. + + + +**Note:** + +The procedure below is a bare minimum, for educational purposes. Make sure to use AWS instance types suitable for the application load, and implement security mechanisms of choice. + + +Can [AWS Auto Scaling](https://aws.amazon.com/autoscaling/) be used? Read the [autoscaling Q/A](#autoscaling). + +### Node setup + +- Provision nodes: + - Find AMI at [CentOS AWS AMI Cloud Images](https://centos.org/download/aws-images/) - this procedure is tested with *CentOS Stream 8 us-east-1 x86\_64 ami-0ee70e88eed976a1b* and vespa-8.30.50. + - Use minimum *t2.medium* instances. + - Let AWS create a security group for the nodes, or use an existing one. + - Make sure to check for SSH traffic, for host login. + - Launch 10 instances - the 3 first will be Vespa config server nodes, the 7 last Vespa nodes. Write down private / public hostnames. The private names are used in Vespa configuration, the public names for login to check status. To find a hostname, click the instance and copy hostname from *Private IP DNS name (IPv4 only)* and *Public IPv4 DNS*. Create a table like: + + | type | Private IP DNS name (IPv4 only) | Public IPv4 DNS | + | :--- | :--- | :--- | + | configserver | ip-10-0-1-234.ec2.internal | ec2-3-231-33-190.compute-1.amazonaws.com | + | configserver | ip-10-0-1-154.ec2.internal | ec2-3-216-28-201.compute-1.amazonaws.com | + | configserver | ip-10-0-0-88.ec2.internal | ec2-34-230-33-42.compute-1.amazonaws.com | + | services | ip-10-0-1-95.ec2.internal | ec2-44-192-98-165.compute-1.amazonaws.com | + | services | ip-10-0-0-219.ec2.internal | ec2-3-88-143-47.compute-1.amazonaws.com | + | services | ip-10-0-0-28.ec2.internal | ec2-107-23-52-245.compute-1.amazonaws.com | + | services | ip-10-0-0-67.ec2.internal | ec2-54-198-251-100.compute-1.amazonaws.com | + | services | ip-10-0-1-84.ec2.internal | ec2-44-193-84-85.compute-1.amazonaws.com | + | services | ip-10-0-0-167.ec2.internal | ec2-54-224-15-163.compute-1.amazonaws.com | + | services | ip-10-0-1-41.ec2.internal | ec2-44-200-227-127.compute-1.amazonaws.com | +- Security group setup: + - Click the Security Group for the nodes just provisioned (under the security tab), then *Edit inbound rules*. Add *All TCP* for port range 0-65535, specifying the name of the current Security Group as the Source. This lets the hosts communicate with each other. +- Host login example, without ssh-agent: + ```bash + $ SSH_AUTH_SOCK=/dev/null ssh -i mykeypair.pem centos@ec2-3-231-33-190.compute-1.amazonaws.com + ``` +- On each of the 10 hosts, install Vespa using the [installation procedure](/en/operations/self-managed/build-install#rpms):$ sudo dnf config-manager \\ + ```bash + $ sudo dnf config-manager \ + --add-repo https://raw.githubusercontent.com/vespa-engine/vespa/master/dist/vespa-engine.repo + $ sudo dnf config-manager --enable powertools + $ sudo dnf install -y epel-release + $ sudo dnf install -y vespa + $ export VESPA_HOME=/opt/vespa + ``` +- On all the 10 hosts, set up the environment using the config server host list: + ```bash + $ echo "override VESPA_CONFIGSERVERS" \ + "ip-10-0-1-234.ec2.internal,ip-10-0-1-154.ec2.internal,ip-10-0-0-88.ec2.internal" \ + | sudo tee -a $VESPA_HOME/conf/vespa/default-env.txt + ``` + It is required that all nodes, both config server and Vespa nodes, have the same setting for `VESPA_CONFIGSERVERS`. + +### Config server cluster setup + +- Start the 3-node config server cluster: + ```bash + $ sudo systemctl start vespa-configserver + ``` +- Verify the config cluster is running - on one of the config server nodes +```bash +$ for configserver in \ + ip-10-0-1-234.ec2.internal \ + ip-10-0-1-154.ec2.internal \ + ip-10-0-0-88.ec2.internal; \ + do curl -s http://$configserver:19071/state/v1/health | head -5; done + +{ + "time" : 1660034756595, + "status" : { + "code" : "up" + }, +{ + "time" : 1660034756607, + "status" : { + "code" : "up" + }, +{ + "time" : 1660034756786, + "status" : { + "code" : "up" + }, +``` + A successful config server start will log an entry like: + + ```bash + $ $VESPA_HOME/bin/vespa-logfmt | grep "Application config generation" + + [2022-08-09 08:29:38.684] INFO : configserver + Container.com.yahoo.container.jdisc.ConfiguredApplication + Switching to the latest deployed set of configurations and components. + Application config generation: 0 + ``` + Do not continue setup before the config server cluster is successfully started. See the video: [Troubleshooting startup - multinode](https://www.youtube.com/embed/BG7XZmXpIzo) and read [config server start sequence](/en/operations/self-managed/configuration-server#start-sequence). +- Start Vespa services on the 3 config server nodes - this starts basic Vespa services like log forwarding: + + ```bash + $ sudo systemctl start vespa + ``` + *`$VESPA_HOME/logs/vespa/vespa.log`* will now contain messages for `APPLICATION_NOT_LOADED`, this is normal until an application is deployed (next section). + +### Configure application + +- Configure the sample application - on one of the config server nodes: + ```bash + $ sudo dnf install -y git zip + $ git clone https://github.com/vespa-engine/sample-apps.git && \ + cd sample-apps/examples/operations/multinode-HA + ``` +- Edit *hosts.xml* - replace the *nodeX.vespanet* names. Let the 3 first hosts be the config server hosts above, the 7 rest the Vespa hosts - example: + ```xml expandable + + + + + node0 + + + node1 + + + node2 + + + + node3 + + + node4 + + + node5 + + + node6 + + + node7 + + + node8 + + + node9 + + + ``` +- Deploy the application: + + ```bash + $ zip -r - . -x "img/*" "scripts/*" "pki/*" "tls/*" README.md .gitignore | \ + curl --header Content-Type:application/zip --data-binary @- \ + http://localhost:19071/application/v2/tenant/default/prepareandactivate + ``` + Expected output: + + ```json + { + "log": [], + "tenant": "default", + "url": "http://localhost:19071/application/v2/tenant/default/application/default/environment/prod/region/default/instance/default", + "message": "Session 2 for tenant 'default' prepared and activated.", + "configChangeActions": { + "restart": [], + "refeed": [], + "reindex": [] + } + } + ``` + +### Vespa nodes setup + +- Start Vespa on the 7 hosts: + ```bash + $ sudo systemctl start vespa + ``` +- Validate the installation. Use the [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) steps to check the health interfaces on all 10 nodes. Note that in this guide, the ports are not mapped through a Docker container, so the native Vespa ports should be used - e.g. for nodes 4 to 7 (see illustration below):$ curl http://localhost:8080/state/v1/health + + ```js + $ curl http://localhost:8080/state/v1/health + + { + "time" : 1660038306465, + "status" : { + "code" : "up" + }, + ``` + Refer to the sample application ports: + + ![Sample application ports](https://raw.githubusercontent.com/vespa-engine/sample-apps/master/examples/operations/multinode-HA/img/multinode-HA.svg) + + +### Terminate instances + +Remember to terminate the instances in the AWS console after use. + +### AWS EC2 singlenode + +This is a variant of the multinode install, using only one host, running both a config server and the other Vespa services on the same node. + +- Provision a node, minimum a *t2.large*. Get its hostname for use in `VESPA_CONFIGSERVERS`: + ```bash + $ hostname + ``` +- Install Vespa: + ```bash + $ sudo dnf config-manager \ + --add-repo https://raw.githubusercontent.com/vespa-engine/vespa/master/dist/vespa-engine.repo + $ sudo dnf config-manager --enable powertools + $ sudo dnf install -y epel-release + $ sudo dnf install -y vespa + $ export VESPA_HOME=/opt/vespa + $ echo "override VESPA_CONFIGSERVERS ip-172-31-95-248.ec2.internal" | \ + sudo tee -a $VESPA_HOME/conf/vespa/default-env.txt + ``` +- Get a sample application: + ```bash + $ sudo dnf install -y git zip + $ git clone https://github.com/vespa-engine/sample-apps.git && cd sample-apps/album-recommendation + ``` +- Start the config server, check health port after a few seconds: + ```bash + $ sudo systemctl start vespa-configserver + $ curl http://localhost:19071/state/v1/health | head -5 + ``` +- Deploy the sample application + ```bash + $ zip -r - . -x "img/*" "scripts/*" "pki/*" "tls/*" README.md .gitignore | \ + curl --header Content-Type:application/zip --data-binary @- \ + http://localhost:19071/application/v2/tenant/default/prepareandactivate + ``` +- Start Vespa, check container node health after some seconds: + ```bash + $ sudo systemctl start vespa + $ curl http://localhost:8080/state/v1/health | head -5 + ``` +- Remember to terminate the instances in the AWS console after use. + +## AWS ECS + +The following is a procedure to set up a multinode application on [AWS ECS](https://us-east-1.console.aws.amazon.com/ecs) instances. Please run the procedure in [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) first, to get familiar with the different Vespa concepts before running the AWS procedure below. This procedure will use the name number of host, 10, and set up the same application. Running the [EC2 procedure](#aws-ec2) above can also be helpful, this procedure has a similar structure. + +### Create a 10-node ECS cluster + +- Log in to AWS and the EC2 Container Service. Click *Clusters > Create Cluster > EC2 Linux + Networking > Next step*, using the defaults and: + + | Cluster name | vespa | + | --- | --- | + | EC2 instance type | t2.medium | + | Number of instances | 10 | + | Key pair | *Select or create your keypair* | + | Security group inbound rules - port range | 0 - 65535 | + +- Click *Create*, wait for the tasks to succeed, then *View Cluster* - it should say *Registered container instances: 10* in ACTIVE state. + +### Configure ECS instances + +- Click the *ECS Instances tab* - this should list 10 container instances. +- Select the 3 first Container Instance checkboxes, then *Actions `>` View/Edit attributes*. +- Click *Add attribute*. Set `Name=type` and `Value=configserver`, click the green checkbox on the right, then *Close*. +- Select the next 7 Container instance checkboxes, then *Actions `>` View/Edit attributes*. +- Click *Add attribute*. Set `Name=type` and `Value=services`, click the green checkbox on the right, then *Close*. +- Write down private / public hostnames and create a table like in the [EC2 procedure](#node-setup) The private names are used in Vespa configuration, the public names for login to check status. To find a hostname, click *ECS Instance > Instance ID* and copy hostname from *Private IP DNS name (IPv4 only)* and *Public IPv4 DNS*. + +### Start the config server task + +- Click *Task Definitions `>` Create new Task Definition `>` EC2 `>` Next step*. +- Click *Configure via JSON* and replace the content with (note the comma-separated hostnames of the config servers addresses): +```json expandable +{ + "networkMode": "host", + "containerDefinitions": [ + { + "name": "configserver", + "environment": [ + { + "name": "VESPA_CONFIGSERVERS", + "value": "ip-10-0-1-234.ec2.internal,ip-10-0-1-154.ec2.internal,ip-10-0-0-88.ec2.internal" + } + ], + "image": "vespaengine/vespa", + "privileged": true, + "memoryReservation": 1024 + } + ], + "placementConstraints": [ + { + "expression": "attribute:type == configserver", + "type": "memberOf" + } + ], + "family": "configserver" +} +``` +- Click *Save `>` Create*. +- Choose *Actions `->` Run task* and configure: + + | Launch type | EC2 | + | --- | --- | + | Cluster | vespa | + | Number of tasks | 3 | + | Placement templates | One Task Per Host | + +- Click *Run Task*. +- Validate that the config servers started successfully - use the same procedure as for [EC2 instances](#config-server-cluster-setup), checking */state/v1/health*. Do not continue before successfully validating this: + ```bash + $ ssh -i mykeypair.pem ec2-user@ec2-3-231-33-190.compute-1.amazonaws.com \ + curl -s http://localhost:19071/state/v1/health | head -5 + + { + "time" : 1660635645783, + "status" : { + "code" : "up" + }, + ``` + +### Configure application - ECS + +- Log into a config server: + ```bash + $ ssh -i mykeypair.pem ec2-user@ec2-3-231-33-190.compute-1.amazonaws.com + ``` +- Download the multinode-HA sample application: + ```bash + $ sudo yum -y install git zip + $ git clone https://github.com/vespa-engine/sample-apps.git && \ + cd sample-apps/examples/operations/multinode-HA + ``` +- Modify *hosts.xml* using the internal DNS hostnames - this step is the same as for [EC2 instances](#configure-application) +- Deploy the application: + ```bash + $ zip -r - . -x "img/*" "scripts/*" "pki/*" "tls/*" README.md .gitignore | \ + curl --header Content-Type:application/zip --data-binary @- \ + http://localhost:19071/application/v2/tenant/default/prepareandactivate + ``` + +### Start the services tasks + +- Click *Task Definitions `>` Create new Task Definition `>` EC2 `>` Next step*. +- Click *Configure via JSON* and replace the content with (using the same 3 config server internal DNS names): +```json expandable +{ + "networkMode": "host", + "containerDefinitions": [ + { + "name": "services", + "environment": [ + { + "name": "VESPA_CONFIGSERVERS", + "value": "ip-10-0-1-234.ec2.internal,ip-10-0-1-154.ec2.internal,ip-10-0-0-88.ec2.internal" + } + ], + "image": "vespaengine/vespa", + "command": [ + "services" + ], + "privileged": true, + "memoryReservation": 1024 + } + ], + "placementConstraints": [ + { + "expression": "attribute:type == services", + "type": "memberOf" + } + ], + "family": "services" +} +``` +- Click *Save `>` Create*. Note the `"command": [ "services" ]`. See [controlling which services to start](/en/operations/self-managed/docker-containers#controlling-which-services-to-start) for details, this starts *services* only - the start script starts both the *configserver* and *services* if given no arguments - this is used for the config server above. For these 7 nodes, `services` is given as an argument to the start script to only start Vespa services. +- Choose *Actions `>` Run task* and configure: + + | Launch type | EC2 | + | --- | --- | + | Cluster | vespa | + | Number of tasks | 7 | + | Placement templates | One Task Per Host | + +- Click *Run Task*. +- Validate startup. This step is the same as for [EC2 instances](#vespa-nodes-setup), e.g. for nodes running a Vespa container the port is 8080: + ```bash + $ ssh -i mykeypair.pem ec2-user@ec2-3-88-143-47.compute-1.amazonaws.com \ + curl -s http://localhost:8080/state/v1/health | head -5 + + { + "time" : 1660652648442, + "status" : { + "code" : "up" + }, + ``` + +### Terminate cluster + +- Remember to delete the cluster in the AWS console after use. + +## Log collection + +Logs are automatically collected from all nodes in real time to the admin node listed as `adminserver`. To view log messages from the system, run [vespa-logfmt](/en/reference/operations/self-managed/tools#vespa-logfmt) on this node. + +## Making changes to live systems + +To change the system, deploy the changed application to the admin cluster. The admin cluster will automatically change the participating nodes as necessary. It is safe to do this while serving live query and write traffic. In some cases the admin cluster will report that some processes must be restarted to make the change effective. To avoid query or write traffic disruption, such restarts must be done on one node at the time, waiting until the node is fully up before restarting the next one. + +## Multiple proton processes + +A content cluster can have multiple schemas. There is another way to distribute load over hosts, by mapping multiple content clusters to the same hosts: + +```xml + + + + + + + + + + + + + + + + + +``` + +Observe that both clusters use `node1`. This is a non-recommended configuration, as it runs multiple [proton](/en/content/proton) processes per node. To reduce interference between the processes in this case, virtualize the host into more nodes. One can use [containers or VMs](/en/operations/self-managed/docker-containers) to do this: + + +![Multiple proton processes per node](/assets/img/schemas-and-content-clusters-multiple-proton.svg) + + + +**Important:** + +Vespa's features for overload handling, like [feed-block](/en/writing/feed-block), requires that only one proton process is running on the node. + + +## Autoscaling + +A common question is, *"Can [AWS Auto Scaling](https://aws.amazon.com/autoscaling/) be used?"* That is a difficult question to answer, here is a transcript from the [Vespa Slack](https://slack.vespa.ai): + +> *I have a question about deployment. I set up cluster on two AWS auto-scaling groups (config & services) based on [multinode-systems.html#aws-ec2](#aws-ec2). But if one of instances was replaced by auto-scaling group, I need manually update hosts.xml file, zip it and deploy new version of the app. I'm thinking about automation of this process by Cloudwatch & Lambda... I wonder if there is some node-discovery mechanism which can e.g. check instances tags and update hosts config based on it?* + +First, you see in [aws-ec2](#aws-ec2) that there are two types of hosts, `configserver` and `services`. configserver setup / operations is documented at [configuration server operations](/en/operations/self-managed/configuration-server). This must be set up first. This is backed by an [Apache ZooKeeper](https://zookeeper.apache.org/) cluster, so should be 1 or 3 nodes large. In our own clusters in Yahoo, we do not autoscale configserver clusters, there is no need - we use 3. If that is too many, use 1. So this question is easy - do not autoscale configservers. + +For the services nodes, observe that there are two kinds of nodes - stateless containers and stateful content nodes - see the [overview](/en/learn/overview). In any way, you will want to manage these differently - the stateless nodes are more easily replaced / increased / shrunk, by changing *services.xml* and *hosts.xml*. It is doable to build an autoscaling service for the stateless nodes, but you need to make sure to use the right metrics for your autoscaling code, and integrate the deploy-automation with the other deployments (say schema modifications). + +A much harder problem is autoscaling the stateful nodes - these are the nodes with the indexes and data. See [elasticity](/en/content/elasticity) - adding a node + data redistribution can take hours, and the node's load will increase during redistribution. Building autoscaling here is very difficult to do safely and efficient. + +Nothing of this is impossible, and it is actually implemented at [cloud.vespa.ai/autoscaling](/#autoscaling) - but it is a difficult feature to get right. + +So, my recommendation is starting with a static set of hosts, like in [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) - and in parallel try out [cloud.vespa.ai/en/free-trial](https://vespa.ai/free-trial/) with autoscaling experiments using your data and use cases. + +Autoscaling can save money, but before going there, it is wise to read about [performance](/en/performance/) and optimize resources using a static node set (or use the sizing suggestions from the Vespa Cloud Console). I.e., get the node resources right first, then consider if autoscaling node count for your load patterns makes sense. + +## Next steps + +- [Multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) is a high-availability multi-node template - use this as a basis for the final configuration. +- The [multinode](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode) sample application is a useful for experimenting with node state transitions. \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/node-setup.mdx b/mintlify-docs/en/operations/self-managed/node-setup.mdx new file mode 100644 index 0000000000..c2dce5ba20 --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/node-setup.mdx @@ -0,0 +1,58 @@ +--- +title: "Node and network setup" +sidebarTitle: "Node Setup" +--- + +Vespa is composed of services that communicate and interact with each other. These services can be partitioned onto any amount of actual hardware for scaling, or they can all coexist on a single environment for development. To achieve this flexibility, some requirements must be met for the environment where the services will run. + +## Node + +A *node* in this context is the environment where some Vespa services are running. This can be an actual machine like a server in a datacenter, or a laptop for development and testing of Vespa configuration. It can also be a Virtual Machine or a Docker container, so one can run multiple nodes on a single piece of hardware. + +The different Vespa services that run on nodes will mostly communicate with each other via the network. This means that all nodes must have an IP address and have network connectivity to all other nodes. Both IPv4 and IPv6 protocols are supported. Note that the same framework is used even when running the entire Vespa stack on a single node. + +## Memory settings + +In the [getting started guides](/en/basics/deploy-an-application-local) and [sample applications](https://github.com/vespa-engine/sample-apps), memory settings are always the minimum to run the guides. This to make it easy to set up and explore Vespa features. The [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) application has examples for even tighter settings, just to be able to test a larger application on a small host. + +It is important to note that these are not the recommended Vespa memory settings. Finding the optimal node configuration is covered in the [performance guides](/en/performance/index). There are many ways to configure, feed and use Vespa, it is not possible to have a general, recommended setting. + + +**Important:** + +Vespa is a multiprocess application, and can be configured to run multiple services per node - see [config sentinel](/en/operations/self-managed/config-sentinel). Out-of-memory can cause a range of problems hard to diagnose - the Vespa team recommends testing with larger nodes in these cases. + + +As a rule of thumb, start with an 8 GB node, just to make sure the application is functionally correct - then optimize later. + +Review system defaults. A common issue is too low default for `vm.max_map_count` which makes apps run into memory mapping assertions, especially if the [paged](/en/content/attributes#paged-attributes) option has been enabled for a lot of attribute fields - symptoms of this can be like: + +```bash +vespalib::alloc::MemoryAllocator::PtrAndSize vespalib::alloc::MmapFileAllocator::alloc(size_t) const: Assertion buf!=MAP_FAILED +``` + +```bash +'terminate called after throwing an instance of 'std::runtime_error' 'what(): mmap of file '/opt/vespa/var/db/vespa/search/cluster.abc/n12/documents/xyz/0.ready/index/index.fusion.43/field123/boolocc.bdat' with flags '1' failed with error: 'Cannot allocate memory' +``` + +## Hostname + +When Vespa services are started on a node, the node must identify itself to the configuration system to get configuration (including which services to run). This requires a unique identifier for the node in the config server. Since it is already a requirement that the node has a *hostname* that the config server knows, Vespa uses the same *hostname* when a node identifies itself to get its configuration. See [config sentinel](/en/operations/self-managed/config-sentinel) for details. + +In order to find the IP address of a node and connect to it, the node must have a *hostname* that identifies it and which maps to its IP address. Actual machines on a network will usually have a *Fully Qualified Domain Name* (FQDN) in DNS, which should be used as the host name for this purpose. + +Note that it is a *requirement* that the host name, configured in [hosts.xml](/en/reference/applications/hosts), can be used to look up the IP address of the node (see workaround using `VESPA_HOSTNAME` below). The configuration server use this host name to create URLs to be used to open network connections to Vespa services running on that node. If the nodes use IP addresses which don't have DNS names, one *must* have *all* those IP addresses with corresponding host names in `/etc/hosts` on *all* nodes in the Vespa installation. We recommend using names that can be used as FQDNs also in this case, in case of moving to using a DNS server instead of publishing `/etc/hosts`. + +This means that the node *must* know its own hostname (FQDN), and be in agreement with the config server about what exactly the host name is. Usually this is achieved by just running the `hostname` command. If `hostname` is set to the FQDN of the node, then everything should Just Work. + +As an alternative to modifying `/etc/hosts`, set [VESPA\_HOSTNAME](/en/operations/self-managed/files-processes-and-ports#environment-variables) on the hosts. [vespa-ip-vs-hostname](https://www.jocas.lt/journal/articles/vespa-ip-vs-hostname/) is a great post on how to do this. + +## Simple single-node development environment + +When testing a Vespa configuration on a single-node setup, one can usually avoid the setup hassle by overriding the hostname with the value "localhost". Try this command for that purpose: + +```bash +$ echo "override VESPA_HOSTNAME localhost" >> $VESPA_HOME/conf/vespa/default-env.txt +``` + +Running Java unit tests won't pick up settings in `default-env.txt` and will default to "localhost" if `VESPA_HOSTNAME` isn't set in the environment. \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/procedure-change-attribute-index.mdx b/mintlify-docs/en/operations/self-managed/procedure-change-attribute-index.mdx new file mode 100644 index 0000000000..66c16a92f3 --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/procedure-change-attribute-index.mdx @@ -0,0 +1,253 @@ +--- +title: "Procedure: Change from attribute to index" +sidebarTitle: "Change from attribute to index procedure" +--- + +Changing between `index` and `attribute` is a common field change operation to optimize performance. Use the [reindexing](/en/operations/reindexing) feature to safely migrate data to/from index structures. + +Changing from attribute to index can be seen as "drop attribute" and "add index". When the attribute aspect of a field is removed, the field's data is not queryable after deployment. The reindexing process will populate the field's index structure, but this takes time, depending on corpus size. + +Another approach is to run with both attribute and index in the transition, keeping data available for queries. The gist of this procedure is to add `index`, run a reindex - then remove `attribute` aspect: + +```js +# field configuration at start +field artist type string { + indexing: summary | attribute +} + +-> + +# intermediate step to populate index structure, keeping the data in the attribute +field artist type string { + indexing: summary | attribute | index + match: word + stemming: none +} + +-> + +# final configuration, migrated to index +field artist type string { + indexing: summary | index + match: word + stemming: none +} +``` + + +**Note:** + +If the field is used as a filter only (i.e. no ranking), consider adding `rank: filter`, see example in [feature-tuning](/en/performance/feature-tuning). + + +## Procedure + + + +Test this using the [quick-start](/en/basics/deploy-an-application-local), changing the `artist` field to an attribute before running. Also add a [validation override](/en/reference/applications/validation-overrides) in `src/main/application/validation-overrides.xml`: + +```xml + + indexing-change + +``` + + +Run the quick start, stop after feeding documents. Run a query to validate data can be queried: + +```bash +$ curl "http://localhost:8080/search/?ranking=rank_albums&yql=select%20%2A%20from%20sources%20%2A%20where%20artist%20contains%20%22Coldplay%22" +``` + +One can also [dump current index structures](#appendix), see `artist` as an attribute. + + +Add index aspect and match/stemming settings to the field, deploy and observe output + +```js expandable +field artist type string { + indexing: summary | attribute | index + match : word + stemming : none +} + +$ (cd src/main/application && zip -r - .) | curl --header Content-Type:application/zip --data-binary @- \ + localhost:19071/application/v2/tenant/default/prepareandactivate + +{ + "log": [ + { + "time": 1628239290150, + "level": "WARNING", + "message": "Change(s) between active and new application that may require re-index:\nindexing-change: + Consider re-indexing document type 'music' in cluster 'music' because:\n + 1) Document type 'music': Field 'artist' changed: add index aspect, + indexing script: '{ input artist | summary artist | attribute artist; }' -> + '{ input artist | exact | summary artist | attribute artist | index artist; }'\n" + } + ], + "tenant": "default", + "url": "http://localhost:19071/application/v2/tenant/default/application/default/environment/prod/region/default/instance/default", + "message": "Session 3 for tenant 'default' prepared and activated.", + "configChangeActions": { + "restart": [], + "refeed": [], + "reindex": [ + { + "name": "indexing-change", + "documentType": "music", + "clusterName": "music", + "messages": [ + "Document type 'music': Field 'artist' changed: + add index aspect, indexing script: + '{ input artist | summary artist | attribute artist; }' + -> + '{ input artist | exact | summary artist | attribute artist | index artist; }'" + ], + "services": [ + { + "serviceName": "searchnode", + "serviceType": "searchnode", + "configId": "music/search/cluster.music/0", + "hostName": "vespa-container" + } + ] + } + ] + } +} +``` + + +Wait for the new configuration generation to be activated on the config server(s) - this is normally quite immediate. After that, allow up to 3 minutes for the config servers to set reindexing ready, track this using the `reindexing` endpoint: + +```json expandable +$ while true; do + curl http://localhost:19071/application/v2/tenant/default/application/default/environment/prod/region/default/instance/default/reindexing | jq . + sleep 10 + done + +{ + "enabled": true, + "clusters": { + "music": { + "pending": { + "music": 3 + }, + "ready": { + "music": {} + } + } + } +} + +... + +{ + "enabled": true, + "clusters": { + "music": { + "pending": {}, + "ready": { + "music": { + "readyMillis": 1628665589516 + } + } + } + } +} +``` + + +When ready, deploy again to start reindexing, wait for it to complete (use the loop in previous step): + +```json expandable +$ (cd src/main/application && zip -r - .) | curl --header Content-Type:application/zip --data-binary @- \ + localhost:19071/application/v2/tenant/default/prepareandactivate + +... + +{ + "enabled": true, + "clusters": { + "music": { + "pending": {}, + "ready": { + "music": { + "readyMillis": 1628665589516 + } + } + } + } +} + +... + +{ + "enabled": true, + "clusters": { + "music": { + "pending": {}, + "ready": { + "music": { + "readyMillis": 1628665589516, + "startedMillis": 1628668739973, + "endedMillis": 1628668740536, + "state": "successful" + } + } + } + } +} +``` + + +Dumping the index structures now shows artist both in index and attribute, and there is an entry in vespa.log. Verify the query still works: + +```bash +$ docker exec vespa /usr/bin/sh -c 'vespa-logfmt | grep Reindexer' +[2021-08-11 07:59:00.535] INFO : container-clustercontroller Container.ai.vespa.reindexing.Reindexer + Completed reindexing of datatype music (code: 1412693671) after PT0.558683S + +$ curl "http://localhost:8080/search/?ranking=rank_albums&yql=select%20%2A%20from%20sources%20%2A%20where%20artist%20contains%20%22Coldplay%22" +``` + + + +As data is now reindexed into the index data structures, deploy without attribute. (Observe changes to index files, "artist" is now in index only). Test query after restart: + +```js +field artist type string { + indexing: summary | index + match : word + stemming : none +} + +$ (cd src/main/application && zip -r - .) | curl --header Content-Type:application/zip --data-binary @- \ + localhost:19071/application/v2/tenant/default/prepareandactivate + +$ curl "http://localhost:8080/search/?ranking=rank_albums&yql=select%20%2A%20from%20sources%20%2A%20where%20artist%20contains%20%22Coldplay%22" +``` + + + +Optional: restart Vespa - a restart will reclaim memory from the attribute: + +```bash +$ docker exec vespa sh -c 'vespa-stop-services && vespa-start-services' +``` + + + +Notes: + +- The match/stemming settings above are set to the same at default attribute settings + +## Appendix + +To inspect attribute and index data (can be useful when troubleshooting), use [vespa-proton-cmd](/en/reference/operations/self-managed/tools#vespa-proton-cmd), then list files: + +```bash +$ docker exec vespa vespa-proton-cmd --local triggerFlush +$ docker exec vespa find /opt/vespa/var/db/vespa/search/cluster.music/n0/documents/music/0.ready +``` \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/sizing-examples.mdx b/mintlify-docs/en/operations/self-managed/sizing-examples.mdx new file mode 100644 index 0000000000..d46448af68 --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/sizing-examples.mdx @@ -0,0 +1,232 @@ +--- +title: "Vespa Scaling Configuration Examples" +sidebarTitle: "Sizing examples" +--- +This guide provides some example [services.xml](/en/reference/applications/services/content) files for content clusters using flat or grouped data distribution in self-managed clusters. + +Refer to the [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) sample application for how to get started from a deployable multinode starting point. The examples below are trimmed down for readability in the `admin` and `container` sections. See the [appendix](#appendix-hosts-xml) for *hosts.xml* to use when testing deployments. + +## Flat Distribution + +Flat (single group) distribution with [min-redundancy](/en/reference/applications/services/content#min-redundancy)=3. Data is distributed and partitioned over 9 nodes and there are 3 replicas of each document, stored on 3 different nodes. Queries are dispatched in parallel to all nodes. In case of a node failure, the remaining nodes will index (make ready) and activate the *not ready* (stored) copies to restore full search coverage. + +```xml expandable + + + + + + + + + + + + + + + + + + + + + + + + + + + 3 + + + + + + + + + + + + + +``` + +## Grouped Distribution + +See [sizing search](/en/performance/sizing-search) more on when to use grouped distribution. When using grouped distribution in an indexed content cluster, the following restrictions apply: + +- There can only be a single level of leaf groups under the top group +- The number of leaf groups must be a factor of the *redundancy* +- The [distribution partitions](/en/reference/applications/services/content#distribution) must be specified such that the redundancy per group is equal + +With a low number of nodes per group, it's important to remember that a node failure will cause the data to be re-distributed to the remaining nodes and their memory footprint and disk usage will grow when those nodes start activating the documents originally activated on the failed node. E.g. with 2 nodes per group, the remaining healthy node will start activating all the content, which will cause a 2x memory and disk footprint compared with the ideal state. + +The [min-node-ratio-per-group](/en/reference/applications/services/content#min-node-ratio-per-group) controls the data distribution behavior inside a group in cases of node failures. This sets a lower bound on the ratio of nodes within groups that must be online and accepting feed and query traffic, before the entire group is automatically taken out of service from both feed and search/serving. Once number of nodes in the group have been restored, and ideal state has been achieved, the group will be automatically set in service. + +## 9 nodes, 3 groups with 3 nodes per group + +This example has 3 groups and each group index all the documents over the 3 nodes in the group. With 3 groups there are 3 replicas in total of each document, and each replica is indexed and active. Losing a node does not reduce search coverage. + +```xml expandable + + + + + + + + + + + + + + + + + + + + + + + + + + + 3 + + + + + + + + + + + + + + + + + + + + +``` + +## 9 nodes, 9 groups with 1 node per group + +This example has 9 groups and each group index all the documents on a single node. With 9 groups there are 9 replicas in total of each document, and each replica is indexed and active. Losing a node does not reduce search coverage. With a single node, indexing throughput is limited by the single node performance, as all data needs to go all nodes. + +```xml expandable + + + + + + + + + + + + + + + + + + + + + + + + + + + 9 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +``` + +## Serving Availability Tuning + +When using flat distribution, *soft failing nodes* is a challenge for serving with high availability and low latency. Soft failing nodes are nodes which answers health checks from [cluster controllers](/en/content/content-nodes) and search container dispatch health checks, but still experiences issues which impacts serving latency (e.g. cpu frequency throttling due to thermal heating, memory corruptions and so forth). In a cluster with a flat distribution, the slowest node determines the latency, as the query request is dispatched to all content nodes in parallel. The probability of a soft failing node increases with the number of nodes used to distribute the data over. + +Use [adaptive coverage timeout](/en/reference/applications/services/content#coverage) to prevent slow soft failing nodes to impact availability. This allows the dispatcher to stop waiting for the slowest node(s). See also [graceful search degradation](/en/performance/graceful-degradation). + +Grouped distributions are less impacted by soft failing nodes in general, as queries are dispatched to one group at a time using a [dispatch policy](/en/reference/applications/services/content#dispatch-policy). The *adaptive* policy takes group latency into account when deciding which group the query request should be routed to. + +## Changing Group Configuration + +It is easy to change the group topology without service disruption, with a few caveats - read more in [elasticity](/en/content/elasticity#changing-topology). + +## Appendix: hosts.xml + +```xml expandable + + + node0 + + + node1 + + + node2 + + + node3 + + + node4 + + + node5 + + + node6 + + + node7 + + + node8 + + +``` \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/slobrok.mdx b/mintlify-docs/en/operations/self-managed/slobrok.mdx new file mode 100644 index 0000000000..2c01b6d11f --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/slobrok.mdx @@ -0,0 +1,17 @@ +--- +title: "Service location broker - slobrok" +sidebarTitle: "Service Location Broker" +--- + +Slobrok is an acronym for *Service Location Broker*, and it is a name service used in Vespa. The service listens on a specific port - use [vespa-model-inspect](/en/reference/operations/self-managed/tools#vespa-model-inspect) to find the `Slobrok` service's port number. + +Slobrok is running by default on the administration node as well as one or two other random nodes for redundancy. Best practise for a multi-node, high-availability application is found in the [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) sample application. In this application, slobrok instances are hosted on nodes running [config servers](/en/operations/self-managed/configuration-server). The motivation is, like the config servers, Vespa requires slobrok to be up for services to function. Operating slobrok is the same as config servers, too - three is enough for most applications. As slobrok requires minimal system resources, it does not impact other services running on the same node -> using config server nodes is ideal. + +Clients, like the [Document API](/en/reference/api/api), will do lookups on any of the service location broker nodes. Slobrok is not used in the query pipeline. The [cluster-controller](/en/content/content-nodes#cluster-controller) uses slobrok to evaluate service availability. + +The Slobrok process looks like: + +```sh +$ ps ax | grep vespa-slobrok +93906 ?? SJ 2:31.52 $VESPA_HOME/sbin/vespa-slobrok -p 19100 -c slobrok.0 +``` \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/using-kubernetes-with-vespa.mdx b/mintlify-docs/en/operations/self-managed/using-kubernetes-with-vespa.mdx new file mode 100644 index 0000000000..11314c47ee --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/using-kubernetes-with-vespa.mdx @@ -0,0 +1,234 @@ +--- +title: "Using Kubernetes with Vespa" +sidebarTitle: "Using Kubernetes" +--- + + +**Note:** + +In this article, find a recipe for how to start self-managed Vespa in a Kubernetes cluster. For production serving, [Vespa on Kubernetes](/en/operations/kubernetes/vespa-on-kubernetes) is a good read; The Vespa Operator provides a more Kubernetes-native integration with a high degree of automation and value-adds. + + +This article outlines how to run Vespa using Kubernetes. Find a quickstart for running Vespa in a single pod in [singlenode quickstart with minikube](#singlenode-quickstart-with-minikube). + +Setting up a multi-pod Vespa cluster is a bit more complicated, and requires knowledge about how Vespa configures its services. Use the [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA/gke) sample application as a basis for configuration. + + +![Vespa overview illustration](/assets/img/vespa-overview.svg) + + +- A Vespa cluster is made of one or more config servers in a config server cluster. This cluster keeps configuration for the services running in the service pods. The config server cluster pods should hence be started first. +- Config servers use Apache Zookeeper for shared state. The config servers will not set their */state/v1/health* to UP before Zookeeper quorum is reached. This means that all config server pods must be running before quorum is reached, and one cannot use a *readinessProbe* probe for the config servers for a staggered start. +- See a practical example at [config server cluster startup](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA/gke#config-server-cluster-startup) - once completed it should look like:$ kubectl get pods + + ```bash + $ kubectl get pods + NAME READY STATUS RESTARTS AGE + vespa-configserver-0 1/1 Running 0 2m45s + vespa-configserver-1 1/1 Running 0 107s + vespa-configserver-2 1/1 Running 0 62s + ``` +- Once the config server cluster is started successfully, the [application package](/en/basics/applications) can be deployed, and the pods for the services nodes started. The application package maps services to pods (nodes), so this must be deployed successfully before the services in the pods can start. It does not matter whether one deploys the application package before or after starting the service pods, as the pods will idle, waiting for configuration. +- [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA/gke) starts the pods first, see [Vespa startup](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA/gke#vespa-startup). As the application package is not yet deployed, the service inside the pods is not started (as it is not configured). The Vespa infrastructure is started, however, see [config sentinel](/en/operations/self-managed/config-sentinel) - so the pod is started with the config-proxy waiting for services config at this point. +- The [cluster startup](/en/operations/self-managed/config-sentinel#cluster-startup) feature is good to know. This is a setting to not start a service before enough services can run - see the *Connectivity check* log messages. +- Deploy the application package. At this point, the pods will know which service to run, and start a container or content node service. Shortly after, the */state/v1/health* endpoint is enabled on the pods. +- Note that ports are allocated dynamically, but the defaults will get you started - see the illustration with [services and ports](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA#get-started) for */state/v1/health*: + - Config server: 19071 + - Container node: 8080 + - Content node: 19107 + +The list above is an overview of the config server -> application package -> service */state/v1/health* dependency chain. This sequence of steps must be considered when building the Kubernetes cluster configuration. + +A good next step is running the [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA/gke) for Kubernetes - there you will also find useful [troubleshooting](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA/gke#misc--troubleshooting) tools. + +## Singlenode quickstart with minikube + +This section describes how to install and run Vespa on a single machine using Kubernetes (K8s). Also see [Vespa example on GKE](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/basic-search-on-gke). + + +**Prerequisites:** + +- Linux, macOS or Windows 10 Pro on x86\_64 or arm64, with [Podman Desktop](https://podman.io/) or [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed, with an engine running. + - Alternatively, start the Podman daemon: + + ```bash + $ podman machine init --memory 6000 + $ podman machine start + ``` + - See [Docker Containers](/en/operations/self-managed/docker-containers) for system limits and other settings. +- For CPUs older than Haswell (2013), see [CPU Support](/en/operations/self-managed/cpu-support). +- Memory: Minimum 5 GB RAM dedicated to Docker/Podman. [Memory recommendations](/en/operations/self-managed/node-setup#memory-settings). +- Disk: Avoid `NO_SPACE` - the vespaengine/vespa container image + headroom for data requires disk space. [Read more](/en/writing/feed-block). +- [Homebrew](https://brew.sh/) to install the [Vespa CLI](/en/clients/vespa-cli), or download the Vespa CLI from [Github releases](https://github.com/vespa-engine/vespa/releases). +- [Git](https://git-scm.com/downloads). +- [Minikube](https://kubernetes.io/docs/tasks/tools/). + + + + +Refer to [Docker memory](/en/operations/self-managed/docker-containers#memory) for details and troubleshooting: + +```bash +docker info | grep "Total Memory" +or +podman info | grep "memTotal" +``` + + +```bash +minikube start --driver docker --memory 4096 +``` + + +**Clone the [Vespa sample apps](https://github.com/vespa-engine/sample-apps):** + +```bash +git clone --depth 1 https://github.com/vespa-engine/sample-apps.git +export VESPA_SAMPLE_APPS=`pwd`/sample-apps +``` + + +```yaml expandable +cat << EOF > service.yml +apiVersion: v1 +kind: Service +metadata: + name: vespa + labels: + app: vespa +spec: + selector: + app: vespa + type: NodePort + ports: + - name: container + port: 8080 + targetPort: 8080 + protocol: TCP + - name: config + port: 19071 + targetPort: 19071 + protocol: TCP +EOF +``` + +```yaml expandable +cat << EOF > statefulset.yml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: vespa + labels: + app: vespa +spec: + replicas: 1 + serviceName: vespa + selector: + matchLabels: + app: vespa + template: + metadata: + labels: + app: vespa + spec: + containers: + - name: vespa + image: vespaengine/vespa + imagePullPolicy: Always + env: + - name: VESPA_CONFIGSERVERS + value: vespa-0.vespa.default.svc.cluster.local + securityContext: + runAsUser: 1000 + ports: + - containerPort: 8080 + protocol: TCP + readinessProbe: + httpGet: + path: /state/v1/health + port: 19071 + scheme: HTTP +EOF +``` + + +```bash +kubectl apply -f service.yml -f statefulset.yml +``` + + +```bash +kubectl get pods --watch +``` + +Wait for STATUS Running: + +```bash +NAME READY STATUS RESTARTS AGE +vespa-0 0/1 ContainerCreating 0 8s +vespa-0 0/1 Running 0 2m4s +``` + + +```bash +kubectl port-forward vespa-0 19071 8080 & +``` + + +```bash +curl -s --head http://localhost:19071/state/v1/health +``` + + +```bash +vespa deploy ${VESPA_SAMPLE_APPS}/album-recommendation +``` + + +This normally takes a minute or so: + +```bash +$ curl -s --head http://localhost:8080/state/v1/health +``` + + +```bash +$ vespa feed sample-apps/album-recommendation/ext/documents.jsonl +``` + + +```bash +$ vespa query 'select * from music where true' +``` + + +```bash +$ vespa document get id:mynamespace:music::love-is-here-to-stay +``` + + +Stop the running container: + +```bash +$ kubectl delete service,statefulsets vespa +``` + +Stop port forwarding: + +```bash +$ killall kubectl +``` + +Stop minikube: + +```bash +$ minikube stop +``` + + + +At any point during the procedure, dump logs for troubleshooting: + +```bash +$ kubectl logs vespa-0 +``` \ No newline at end of file diff --git a/mintlify-docs/en/operations/self-managed/vespa-gpu-container.mdx b/mintlify-docs/en/operations/self-managed/vespa-gpu-container.mdx new file mode 100644 index 0000000000..1a1f43e473 --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/vespa-gpu-container.mdx @@ -0,0 +1,122 @@ +--- +title: "Container GPU setup" +sidebarTitle: "Docker Containers GPU setup" +--- + +Vespa supports using GPUs to evaluate ONNX models, as part of its [stateless model evaluation feature](/en/ranking/stateless-model-evaluation). When running Vespa inside a container engine such as Docker or Podman, special configuration is required to make GPU(s) available inside the container. + +The following guide explains how to do this for Nvidia GPUs, using Podman on RHEL8. This should also work on plain Rocky Linux 8.8 and AlmaLinux 8.8 on x86\_64. For other platforms and container engines, see the [Nvidia container toolkit installation guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html). Commands below need to run as root (use `sudo bash` first). + +## Run a script + +Fetch and run our script for RHEL8 / x86\_64 and run it as follows: + +```bash +sudo dnf -y install wget +wget https://raw.githubusercontent.com/vespa-engine/docker-image/master/experimental/gpu-setup-rhel8-x86.sh +sh gpu-setup-rhel8-x86.sh +``` + +This will follow the steps below and check if a sample application is able to utilise the GPU. For more details see the steps below. + +## Configuration steps + + + +Check that SELinux is disabled with `getenforce`; edit `/etc/selinux/config` and reboot if necessary. To temporarily avoid SELinux interfering, it's possible to run `setenforce Permissive` instead. + + +Ensure that Nvidia drivers are installed on your **host** where you want to run the `vespaengine/vespa` container image. On RHEL 8 this can be done as follows: + +```bash +dnf config-manager \ + --add-repo=https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo +dnf module install -y --enablerepo cuda-rhel8-x86_64 nvidia-driver:530 +nvidia-modprobe +ls -ld /dev/nvidia* +``` + +You should have (at least) these devices listed after running the above commands: + +```bash +crw-rw-rw-. 1 root root 195, 0 Aug 16 11:24 /dev/nvidia0 +crw-rw-rw-. 1 root root 195, 255 Aug 16 11:24 /dev/nvidiactl +crw-rw-rw-. 1 root root 238, 0 Aug 16 11:24 /dev/nvidia-uvm +crw-rw-rw-. 1 root root 238, 1 Aug 16 11:24 /dev/nvidia-uvm-tools +``` + +See [Device Node Verification](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#device-node-verification) in the CUDA installation guide for more details. + + +Install `nvidia-container-toolkit`. This grants the container engine access to your GPU device(s). On RHEL 8 this can be done as follows: + +```bash +dnf config-manager \ + --add-repo=https://nvidia.github.io/libnvidia-container/rhel8.6/libnvidia-container.repo +dnf install -y --enablerepo libnvidia-container nvidia-container-toolkit +``` + + +Generate a "Container Device Interface" config: + +```bash +nvidia-ctk cdi generate --device-name-strategy=type-index --format=json --output /etc/cdi/nvidia.json +``` + + +Verify that the GPU device is exposed to the container: + +```bash +podman run --rm -it --device nvidia.com/gpu=all docker.io/nvidia/cuda:11.6.2-base-ubuntu20.04 nvidia-smi +``` + +This should print details about your GPU(s) if everything is configured correctly. + + +Start the Vespa container with the `--device` option: + +```bash +podman run --detach --name vespa --hostname vespa-container \ + --publish 8080:8080 --publish 19071:19071 \ + --device nvidia.com/gpu=all \ + vespaengine/vespa +``` + + +The `vespaengine/vespa` image does not currently include the necessary CUDA libraries by default, due to their large size. These libraries must be installed inside the container manually:podman exec -u 0 -it vespa /bin/bash + +```bash +podman exec -u 0 -it vespa /bin/bash +dnf -y install dnf-plugins-core +dnf config-manager \ + --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo +dnf -y install vespa-onnxruntime-cuda +``` + +Instead of the above installation of `vespa-onnxruntime-cuda` inside the running container, you might want to build your own container image using the following `Dockerfile` as it avoids having to run the container image with install privileges. + +```bash +FROM vespaengine/vespa + +USER root + +RUN dnf -y install 'dnf-command(config-manager)' +RUN dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/x86_64/cuda-rhel8.repo +RUN dnf -y install $(rpm -q --queryformat '%{NAME}-cuda-%{VERSION}' vespa-onnxruntime) + +USER vespa +``` + +Then instead run with your container image name: + +```bash +podman run --detach --name vespa --hostname vespa-container \ + --publish 8080:8080 --publish 19071:19071 \ + --device nvidia.com/gpu=all \ + your-container-image-name +``` + + +All Nvidia GPUs on the host should now be available inside the container, with devices exposed at `/dev/nvidiaN`. See [stateless model evaluation](/en/ranking/stateless-model-evaluation#onnx-inference-options) for how to configure the ONNX runtime to use a GPU for computation. Similar for embedding models using GPU, see [embedder onnx reference](/en/reference/rag/embedding#embedder-onnx-reference-config). + + diff --git a/mintlify-docs/en/operations/self-managed/vespa-support.mdx b/mintlify-docs/en/operations/self-managed/vespa-support.mdx new file mode 100644 index 0000000000..7af4bff150 --- /dev/null +++ b/mintlify-docs/en/operations/self-managed/vespa-support.mdx @@ -0,0 +1,118 @@ +--- +title: "Vespa Support CLI" +--- + +The Vespa Enterprise image includes a built-in command-line tool called `vespa-support`. It is designed to simplify and expedite the process of collecting diagnostic information from your system, making it easier to request and receive support from the Vespa team. + +You should run this tool whenever you are: + +- Experiencing unexpected behaviour or degraded performance in your Vespa deployment +- Preparing to open or respond to a Vespa support ticket +- Asked by Vespa support to provide diagnostic information + +## Accessing the Tool + +`vespa-support` is available in the shell of any running Vespa Enterprise container. To access it, open a shell session into the container using your platform's standard method, then verify the tool is available: + +```bash +$ vespa support --help +``` + +To confirm the tool version and the container version: + +```bash +$ vespa support version +``` + +## Commands + +### vespa support version + +Prints the version of the `vespa-support` tool and the Vespa Enterprise container version. + +### vespa support diagnostics sysinfo + +Collects host-level system information from the node where the command is run. `--dest-dir` is required. The command will fail if it is not provided. + +```bash +$ vespa support diagnostics sysinfo --dest-dir +``` + +### vespa support diagnostics application + +Collects the currently deployed Vespa application state. `--dest-dir` is required. The command will fail if it is not provided. + +```bash +$ vespa support diagnostics application --dest-dir +``` + +## Options + +The following options apply to all `diagnostics` subcommands. + +| Option | Required | Default | Description | +| :--- | :--- | :--- | :--- | +| `--dest-dir` | Yes | — | Directory where the diagnostic output files will be written. | +| `--config-server-host` | No | Auto-detected | Host address of the Vespa config server. If not set, the tool resolves it automatically from the environment. | +| `--config-server-port` | No | 19071 | Port of the Vespa config server. | +| `--timeout-secs` | No | 60 | Timeout in seconds for operations that contact the config server. | + +## What Is Collected + +### sysinfo + +The output is written as a ZIP file named `diag-sysinfo.zip` in the specified destination directory. It includes the following information about the host: + +- Vespa version +- Hostname, operating system, kernel version, and CPU architecture +- CPU model and core count +- Memory usage (total, free, and available) +- Disk usage per mounted filesystem (size, free space, usage percentage) +- Network interfaces, routing table, DNS servers, and firewall rules as seen from within the container +- Host level diagnostics metrics + +### application + +The output is written as a ZIP file named `diag-application.zip` in the specified destination directory. It includes the following information about the application: + +- The full running Vespa application state +- Application level diagnostics metrics + +## Creating a Support Bundle + +The following steps walk you through collecting a complete set of diagnostic files to share with the Vespa support team. + + + + +```bash +$ mkdir /tmp/vespa-diagnostics +``` + + +```bash +$ vespa support diagnostics sysinfo --dest-dir /tmp/vespa-diagnostics +``` + + +```bash +$ vespa support diagnostics application --dest-dir /tmp/vespa-diagnostics +``` + + + +```bash +$ ls /tmp/vespa-diagnostics +diag-sysinfo.zip +diag-application.zip +``` + +Once complete, copy the ZIP files from the container and attach them to your support ticket. + + + +## Sharing with Vespa Support + +Attach the ZIP files generated in the previous step to your Vespa support ticket. If you have not yet opened a ticket, include the files when submitting your request. + +If the files are too large to attach directly, the Vespa support team will provide an alternative upload method. \ No newline at end of file diff --git a/mintlify-docs/en/operations/zones.mdx b/mintlify-docs/en/operations/zones.mdx new file mode 100644 index 0000000000..714051e584 --- /dev/null +++ b/mintlify-docs/en/operations/zones.mdx @@ -0,0 +1,52 @@ +--- +title: "Zones" +--- + +An application is deployed to a *zone*, which is a combination of an [environment](/en/operations/environments) and a *region*, like `vespa deploy -z dev.aws-us-east-1c`. + +If an application requires zone-specific configuration (e.g., different capacity requirements per zone), use [environment and region variants](/en/operations/deployment-variants#services.xml-variants). Also see [deployment.xml](/en/reference/applications/deployment). + +`dev` zones for development and performance testing: + +| Environment | Default | Region | AWS Zone ID | +| --- | --- | --- | --- | +| [dev](/en/operations/environments#dev) | Yes | aws-us-east-1c | use1-az6 | +| [dev](/en/operations/environments#dev) | No | aws-euw1-az1 | euw1-az1 | +| [dev](/en/operations/environments#dev) | No | azure-eastus-az1 | | +| [dev](/en/operations/environments#dev) | No | gcp-us-central1-f | | + +`prod` zones for production serving, with a [CD pipeline](/en/operations/automated-deployments): + +| Environment | Region | AWS Zone ID | +| --- | --- | --- | +| [prod](/en/operations/environments#prod) | aws-us-east-1c | use1-az6 | +| [prod](/en/operations/environments#prod) | aws-use1-az4 | use1-az4 | +| [prod](/en/operations/environments#prod) | aws-use2-az1 | use2-az1 | +| [prod](/en/operations/environments#prod) | aws-use2-az3 | use2-az3 | +| [prod](/en/operations/environments#prod) | aws-us-west-2a | usw2-az1 | +| [prod](/en/operations/environments#prod) | aws-usw2-az3 | usw2-az3 | +| [prod](/en/operations/environments#prod) | aws-eu-west-1a | euw1-az2 | +| [prod](/en/operations/environments#prod) | aws-euw1-az1 | euw1-az1 | +| [prod](/en/operations/environments#prod) | aws-euc1-az1 | euc1-az1 | +| [prod](/en/operations/environments#prod) | aws-euc1-az3 | euc1-az3 | +| [prod](/en/operations/environments#prod) | aws-cac1-az1 | cac1-az1 | +| [prod](/en/operations/environments#prod) | aws-cac1-az2 | cac1-az2 | +| [prod](/en/operations/environments#prod) | aws-aps1-az1 | aps1-az1 | +| [prod](/en/operations/environments#prod) | aws-ap-northeast-1a | apne1-az4 | +| [prod](/en/operations/environments#prod) | aws-apne1-az1 | apne1-az1 | +| [prod](/en/operations/environments#prod) | gcp-europe-west3-b | | +| [prod](/en/operations/environments#prod) | gcp-us-central1-a | | +| [prod](/en/operations/environments#prod) | gcp-us-central1-b | | +| [prod](/en/operations/environments#prod) | gcp-us-central1-c | | +| [prod](/en/operations/environments#prod) | gcp-us-central1-f | | + +The `prod` zones use ephemeral instances for system tests and staging tests, running in [test](/en/operations/environments#test) and [staging](/en/operations/environments#staging) environments. These are internal zones, and never directly deployed to, included here for reference: + +| Environment | Region | AWS Zone ID | +| --- | --- | --- | +| [test](/en/operations/environments#test) | aws-us-east-1c | use1-az6 | +| [test](/en/operations/environments#test) | gcp-us-central1-f | | +| [staging](/en/operations/environments#staging) | aws-us-east-1c | use1-az6 | +| [staging](/en/operations/environments#staging) | gcp-us-central1-f | | + +Contact [Support](https://vespa.ai/support/) to request more zones. \ No newline at end of file diff --git a/mintlify-docs/en/performance.mdx b/mintlify-docs/en/performance.mdx new file mode 100644 index 0000000000..69b50ae604 --- /dev/null +++ b/mintlify-docs/en/performance.mdx @@ -0,0 +1,36 @@ +--- +title: "Performance" +sidebarTitle: "Performance overview" +--- +## Practical search performance guide + +See [practical search performance guide](/en/performance/practical-search-performance-guide). The guide walks through a music search use case and gives a practical introduction to Vespa search performance. + +## Sizing and capacity planning + +Sizing and capacity planning involves figuring out how many nodes are needed and what kind of hardware flavor best fits the use case: + +- [Sizing Vespa search](/en/performance/sizing-search): How to size a Vespa search cluster +- [Caching in Vespa](/en/performance/caches-in-vespa): How to enable caches in Vespa +- [Attributes and memory usage](/en/content/attributes): How attributes impact the memory footprint, find attribute memory usage +- [Proton maintenance jobs](/en/content/proton#proton-maintenance-jobs): Impact on resource usage +- [Coverage degradation](/en/performance/graceful-degradation): Timeout handling and Degraded Coverage + +## Benchmarking and tuning + +Benchmarking is important both during sizing and for testing new features. What tools to use for benchmarking and how to tune system aspects of Vespa: + +- [Benchmarking Vespa](/en/performance/benchmarking): Test Vespa performance +- [Search features and performance](/en/performance/feature-tuning): Test Vespa performance +- [Feed performance](/en/performance/sizing-feeding): Test Vespa performance +- [Container Http performance testing using Gatling](/en/performance/container-http): Test Vespa performance +- [Container tuning](/en/performance/container-tuning): JVM, container, docproc +- [vespa-fbench](/en/reference/operations/tools#vespa-fbench): Reference documentation +- [HTTP/2](/en/performance/http2): improve HTTP performance using HTTP/2 + +## Profiling + +Do a deep performance analysis - how to profile the application as well as Vespa: + +- [Profiling](/en/performance/profiling): Generic profiling tips. +- [Valgrind](/en/performance/valgrind): Run Vespa with Valgrind diff --git a/mintlify-docs/en/performance/benchmarking-cloud.mdx b/mintlify-docs/en/performance/benchmarking-cloud.mdx new file mode 100644 index 0000000000..aaf53dd7d1 --- /dev/null +++ b/mintlify-docs/en/performance/benchmarking-cloud.mdx @@ -0,0 +1,252 @@ +--- +title: "Benchmarking" +sidebarTitle: "Benchmarking using Vespa Cloud" +--- + +This is a step-by-step guide to get started with benchmarking on Vespa Cloud, based on the [Vespa benchmarking guide](/en/performance/benchmarking), using the [sample app](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation). Overview: + +![Vespa Cloud Benchmarking](/assets/img/cloud-benchmarks.svg) + +## Set up a performance test instance + +Use an instance in a [dev zone](/en/operations/environments#dev) for benchmarks. To deploy an instance there, use the [getting started](/en/basics/deploy-an-application) guide, and make sure to specify the resources using a `deploy:environment="dev"` attribute: + +```xml + + + +``` + +```bash +$ vespa deploy --wait 600 +``` + +Feed documents: + +```bash +$ vespa feed ext/documents.jsonl +``` + +Query documents to validate the feed: + +```bash +$ vespa query "select * from music where true" +``` + +Query documents using curl: + +```bash +$ curl \ + --cert ~/.vespa/mytenant.myapp.default/data-plane-public-cert.pem \ + --key ~/.vespa/mytenant.myapp.default/data-plane-private-key.pem \ + -H "Content-Type: application/json" \ + --data '{"yql" : "select * from music where true"}' \ + https://baaae1db.b68ddc0d.z.vespa-app.cloud/search/ +``` + +At this point, the instance is ready, with data, and can be queried using data-plane credentials. + +## Test using vespa-fbench with a token endpoint + +To test using an API token endpoint instead of mTLS, use these options: + +```bash +$ vespa-fbench -q query.txt -s 5 -H "Authorization: Bearer [your token]" -T /etc/ssl/certs/ca-bundle.crt -o out.txt "[token-endpoint]" 443 +``` + +ca-bundle.crt is just the root certificates, nothing Vespa specific. + +## Test using vespa-fbench + +The rest of the guide assumes we're using an mTLS endpoint and have data-plane credentials are in working directory: + +```bash +$ ls -1 \*.pem + data-plane-private-key.pem + data-plane-public-cert.pem +``` + +Prepare a query file: + +```bash +$ echo "/search/?yql=select+*+from+music+where+true" > query001.txt +``` + +Test using [vespa-fbench](/en/reference/operations/tools#vespa-fbench) running in a docker container: + +```bash +$ docker run -v $(pwd):/files -w /files \ + --entrypoint /opt/vespa/bin/vespa-fbench \ + vespaengine/vespa \ + -C data-plane-public-cert.pem \ + -K data-plane-private-key.pem \ + -T /etc/ssl/certs/ca-bundle.crt \ + -n 1 -q query001.txt -s 1 -c 0 \ + -o output.txt \ + baaae1db.b68ddc0d.z.vespa-app.cloud 443 +``` + +`-o output.txt` is useful when validating the test - remove this option when load testing. Make sure there are no `SSL_do_handshake` errors in the output. Expect HTTP status code 200: + +```bash expandable + Starting clients... + Stopping clients + Clients stopped. + . + Clients Joined. + *** HTTP keep-alive statistics *** + connection reuse count -- 4 + ***************** Benchmark Summary ***************** + clients: 1 + ran for: 1 seconds + cycle time: 0 ms + lower response limit: 0 bytes + skipped requests: 0 + failed requests: 0 + successful requests: 5 + cycles not held: 5 + minimum response time: 128.17 ms + maximum response time: 515.35 ms + average response time: 206.38 ms + 25 percentile: 128.70 ms + 50 percentile: 129.60 ms + 75 percentile: 130.20 ms + 90 percentile: 361.32 ms + 95 percentile: 438.36 ms + 99 percentile: 499.99 ms + actual query rate: 4.80 Q/s + utilization: 99.03 % + zero hit queries: 5 + http request status breakdown: + 200 : 5 +``` + +At this point, running queries using *vespa-fbench* works well from local laptop. + +## Run queries inside data center + +Next step is to run this from the same location (data center) as the dev zone. In this example, an AWS [zone](/en/operations/zones). Deduce the AWS zone from Vespa Cloud zone name. Below is an example using a host with Amazon Linux 2023 AMI (HVM) image: + + + +Create the host - here assume key pair is named *key.pem*. No need to do anything other than default. + + +Log in, update, install docker: + +```bash +$ ssh -i key.pem ec2-user@ec2-xx-xxx-xxx-xxx.compute-1.amazonaws.com +[ec2-user]$ sudo yum update -y +[ec2-user]$ sudo yum install -y docker +[ec2-user]$ sudo service docker start +[ec2-user]$ sudo usermod -a -G docker ec2-user +[ec2-user]$ exit +``` + + +Copy credentials for endpoint access, log in and validate docker setup: + +```bash +$ scp -i key.pem data-plane-private-key.pem ec2-user@ec2-xx-xxx-xxx-xxx.compute-1.amazonaws.com: +$ scp -i key.pem data-plane-public-cert.pem ec2-user@ec2-xx-xxx-xxx-xxx.compute-1.amazonaws.com: + +$ ssh -i key.pem ec2-user@ec2-xx-xxx-xxx-xxx.compute-1.amazonaws.com +[ec2-user]$ docker info +``` + + +Make a dummy query: + +```bash +[ec2-user]$ echo "/search/?yql=select+*+from+music+where+true" > query001.txt +``` + + +Run vespa-fbench and verify 200 response: + +```bash +[ec2-user]$ docker run -v $(pwd):/files -w /files \ + --entrypoint /opt/vespa/bin/vespa-fbench \ + vespaengine/vespa \ + -C data-plane-public-cert.pem \ + -K data-plane-private-key.pem \ + -T /etc/ssl/certs/ca-bundle.crt \ + -n 1 -q query001.txt -s 1 -c 0 \ + baaae1db.b68ddc0d.z.vespa-app.cloud 443 +``` + + + +At this point, you are able to benchmark using *vespa-fbench* in the same zone as the Vespa Cloud dev instance. + +## Run benchmark + +Use the [Vespa Benchmarking Guide](/en/performance/benchmarking) to plan and run benchmarks. Also see [sizing](#sizing) below. Make sure the client running the benchmark tool has sufficient resources. + +Export [metrics](../operations/metrics.html): + +```bash +$ curl \ + --cert data-plane-public-cert.pem \ + --key data-plane-private-key.pem \ + https://baaae1db.b68ddc0d.z.vespa-app.cloud/prometheus/v1/values +``` + +Notes: + +- Periodically dump all metrics using `consumer=Vespa`. +- Make sure you will not exhaust your serving threads on your container nodes while in production. This can be verified by making sure this expression stays well below 100% (typically below 50%) for the traffic you expect: `100 * (jdisc.thread_pool.active_threads.sum / jdisc.thread_pool.active_threads.count) / jdisc.thread_pool.size.max` for each `threadpool` value. You can increase the number of threads in the pools by using larger container nodes, more container nodes or by tuning the number of threads as described in [services-search](/en/reference/applications/services/search#threadpool). In the case you do exhaust a threadpool and its queue you will experience HTTP 503 responses for requests that are rejected by the container. + +## Making changes + +Whenever deploying changes to configuration, track progress in the Deployment dashboard. Some changes, like changing [requestthreads](/en/reference/applications/services/content#requestthreads) will restart content nodes, and this is done in sequence and takes time. Wait for successful completion in *Wait for services and endpoints to come online*. + +When changing node type/count, wait for auto data redistribution to complete, watching the `vds.idealstate.merge_bucket.pending.average` metric: + +```bash +$ while true; do curl -s \ + --cert data-plane-public-cert.pem \ + --key data-plane-private-key.pem \ + https://baaae1db.b68ddc0d.z.vespa-app.cloud/prometheus/v1/values?consumer=Vespa | \ + grep idealstate.merge_bucket.pending.average; \ + sleep 10; done +``` + +Notes: + +- Dump all metrics using `consumer=Vespa`. +- After changing the number of content nodes, this metric will jump, then decrease (not necessarily linearly) - speed depending on data volume. + +## Sizing + +Using Vespa Cloud enables the Vespa Team to assist you to optimise the application to reduce resource spend. Based on 150 applications running on Vespa Cloud today, savings are typically 50%. Cost optimization is hard to do without domain knowledge - but few teams are experts in both their application and its serving platform. Sizing means finding both the right node size and the right cluster topology: + +![Resize to fewer and smaller nodes](/assets/img/nodes.svg) + +Applications use Vespa for their primary business use cases. Availability and performance vs. cost are business decisions. The best sized application can handle all expected load situations, and is configured to degrade quality gracefully for the unexpected. + +Even though Vespa is cost-efficient out of the box, Vespa experts can usually spot over/under-allocations in CPU, memory and disk space/IO, and discuss trade-offs with the application team. + +Using [automated deployments](/en/operations/automated-deployments) applications go live with little risk. After launch, right-size the application based on true load after using Vespa's elasticity features with automated data migration. + +Use the [Vespa sizing guide](/en/performance/sizing-search) to size the application and find metrics used there. Pro-tips: + +- 60% is a good max memory allocation +- 50% is a good max CPU allocation, although application dependent. +- 70% is a good max disk allocation + +Rules of thumb: + +- Memory and disk scales approximately linearly for indexed fields' data - attributes have a fixed cost for empty fields. +- Data variance will impact memory usage. +- Undersized instances will [block writes](/en/writing/feed-block). +- If is often a good idea to use the `dev` zone to test memory impact of adding large fields, e.g. adding an embedding. + +## Notes + +- The user running benchmarks must have read access to the endpoint - if you already have, you can skip this section. Refer to the [Vespa security guide](/en/security/guide). +- [Monitoring](/en/operations/monitoring) is useful to track metrics when benchmarking. diff --git a/mintlify-docs/en/performance/benchmarking.mdx b/mintlify-docs/en/performance/benchmarking.mdx new file mode 100644 index 0000000000..b79f2a82f5 --- /dev/null +++ b/mintlify-docs/en/performance/benchmarking.mdx @@ -0,0 +1,161 @@ +--- +title: "Vespa Benchmarking" +sidebarTitle: "Benchmarking" +--- +Benchmarking a Vespa application is essential to get an idea of how well the test configuration performs. Thus, benchmarking is an essential part of sizing a search cluster itself. Benchmarking a cluster can answer the following questions: + +- What throughput and latency to expect from a search node? +- Which resource is the bottleneck in the system? + +These in turn indirectly answers other questions such as how many nodes are needed, and if it will help to upgrade disk or CPU. Thus, benchmarking will help in finding the optimal Vespa configuration, using all resources optimally, which in turn lowers costs. + +A good rule is to benchmark whenever the workload changes. Benchmarking should also be done when adding new features to queries. + +Having an understanding of the query mix and SLA will help to set the test parameters. Before benchmarking, consider: + +- What is the expected query mix? Having a representative query mix to test with is essential in order to get valid results. Splitting up in different types of queries is also a useful way to get an idea of which query classes are resource intensive. +- What is the expected SLA, both in terms of latency and query throughput? +- How important is real-time behavior? What is the rate of incoming documents, if any? +- Timeout, in a benchmarking scenario, is it ok for requests to time out? Default [timeout](/en/reference/querying/yql#timeout) is 500 ms, and [softtimeout](/en/reference/api/query#ranking.softtimeout.enable) is enabled. If the full cost of all queries are to be considered: + - Disable soft timeout with execution parameter + - by a [query profile](/en/reference/querying/query-profiles) + - by appending: `&ranking.softtimeout.enable=false` to with the [vespa-fbench](#vespa-fbench) `-a` option + - Set timeout to e.g. 5 seconds + - Note that `timeout` in YQL takes precedence + - Replace timeout in YQL or use the execution parameter [timeout](/en/reference/api/query#timeout) as above. + +If benchmarking using Vespa Cloud, see [Vespa Cloud Benchmarking](/en/performance/benchmarking-cloud). + +## vespa-fbench + +Vespa provides a query load generator tool, [vespa-fbench](/en/reference/operations/tools#vespa-fbench), to run queries and generate statistics - much like a traditional web server load generator. It allows running any number of *clients* (i.e. the more clients, the higher load), for any length of time, and adjust the client response time before issuing the next query. It outputs the throughput, max, min, and average latency, as well as the 25, 50, 75, 90, 95, 99 and 99.9 latency percentiles. This provides quite accurate information of how well the system manages the workload. + +**Disclaimer:** *vespa-fbench* is a tool to drive load for benchmarking and tuning. It is not a tool for finding the maximum load or latencies in a production setting. This is due to the way it is implemented: It is run with `-n` number of clients per run. It is good for testing, as proton can be observed at different levels of concurrency. In the real world, the number of clients and query arrival will follow a different distribution, and impact 95p / 99p latency percentiles. + +### Prepare queries + +vespa-fbench uses *query files* for GET and POST queries - see the [reference](/en/reference/operations/tools#vespa-fbench) - examples: *HTTP GET* requests: + +```bash +/search/?yql=select%20%2A%20from%20sources%20%2A%20where%20true +``` + +*HTTP POST* requests format: + +```text +/search/ +{"yql" : "select * from sources * where true"} +``` + +### Run queries + +A typical vespa-fbench command looks like: + +```bash +$ vespa-fbench -n 8 -q queries.txt -s 300 -c 0 myhost.mydomain.com 8080 +``` + +This starts 8 clients, using requests read from `queries.txt`. The `-s` parameter indicates that the benchmark will run for 300 seconds. The `-c` parameter, states that each client thread should wait for 0 milliseconds between each query. The last two parameters are container hostname and port. Multiple hosts and ports can be provided, and the clients will be uniformly distributed to query the containers round-robin. + +A more complex example, using docker, hitting a Vespa Cloud endpoint: + +```bash +$ docker run -v /Users/myself/tmp:/testfiles \ + -w /testfiles --entrypoint '' vespaengine/vespa \ + /opt/vespa/bin/vespa-fbench \ + -C data-plane-public-cert.pem -K data-plane-private-key.pem -T /etc/ssl/certs/ca-bundle.crt \ + -n 10 -q queries.txt -o result.txt -s 300 -c 0 \ + myapp.mytenant.aws-us-east-1c.z.vespa-app.cloud 443 +``` + +When using a query file with HTTP POST requests (`-P` option) one also need to pass the *Content-Type* header using the `-H` header option. + +```bash + $ docker run -v /Users/myself/tmp:/testfiles \ + -w /testfiles --entrypoint '' vespaengine/vespa \ + /opt/vespa/bin/vespa-fbench \ + -C data-plane-public-cert.pem -K data-plane-private-key.pem -T /etc/ssl/certs/ca-bundle.crt \ + -n 10 -P -H "Content-Type: application/json" -q queries_post.txt -o output.txt -s 300 -c 0 \ + myapp.mytenant.aws-us-east-1c.z.vespa-app.cloud 443 +``` + +### Post Processing + +After each run, a summary is written to stdout (and possibly an output file from each client) - example: + +```text +***************** Benchmark Summary ***************** +clients: 30 +ran for: 1800 seconds +cycle time: 0 ms +lower response limit: 0 bytes +skipped requests: 0 +failed requests: 0 +successful requests: 12169514 +cycles not held: 12169514 +minimum response time: 0.82 ms +maximum response time: 3010.53 ms +average response time: 4.44 ms +25 percentile: 3.00 ms +50 percentile: 4.00 ms +75 percentile: 6.00 ms +90 percentile: 7.00 ms +95 percentile: 8.00 ms +99 percentile: 11.00 ms +actual query rate: 6753.90 Q/s +utilization: 99.93 % +``` + +Take note of the number of *failed requests*, as a high number here can indicate that the system is overloaded, or that the queries are invalid. + +- In some modes of operation, vespa-fbench waits before sending the next query. "utilization" represents the time that vespa-fbench is sending queries and waiting for responses. For example, a 'system utilization' of 50% means that vespa-fbench is stress testing the system 50% of the time, and is doing nothing the remaining 50% of the time +- vespa-fbench latency results include network latency between the client and the Vespa instance. Measure and subtract network latency to obtain the true vespa query latency. + +## Benchmark + +Strategy: find optimal *requestthreads* number, then find capacity by increasing number of parallel test clients: + + + +Test with single client (n=1), single thread to find a *latency baseline*. For each test run, increase [threads](/en/reference/applications/services/content#requestthreads): + +```xml + + + + + + + 1 + +``` + +use 1, 2, 4, 8, ... threads and measure query latency (vespa-fbench output) and CPU utilization ([metric](#metrics) - below). Note: after deploying the thread config change, [proton](/en/content/proton) must be restarted for new thread setting to take effect (look for ONLINE): + +```bash +$ vespa-stop-services && vespa-start-services && sleep 60 && vespa-proton-cmd --local getProtonStatus + ... + "matchengine","OK","state=ONLINE","" + "documentdb:search","OK","state=ONLINE configstate=OK","" +``` + + +use #threads sweet spot, then increase number of clients, observe latency and CPU. + + + +### Metrics + +The *container* nodes expose the [/metrics/v2/values](/en/operations/metrics) interface - use this to dump metrics during benchmarks. Example - output all metrics from content node: + +```bash +$ curl http://localhost:8080/metrics/v2/values | \ + jq '.nodes[] | select(.role=="content/mysearchcluster/0/0") | .node.metrics[].values' +``` + +Output CPU util: + +```bash +$ curl http://localhost:8080/metrics/v2/values | \ + jq '.nodes[] | select(.role=="content/mysearchcluster/0/0") | .node.metrics[].values."cpu.util"' +``` diff --git a/mintlify-docs/en/performance/caches-in-vespa.mdx b/mintlify-docs/en/performance/caches-in-vespa.mdx new file mode 100644 index 0000000000..83eb8b2044 --- /dev/null +++ b/mintlify-docs/en/performance/caches-in-vespa.mdx @@ -0,0 +1,44 @@ +--- +title: "Caches" +--- +## Content node summary cache + +The summary cache caches summary requests and is enabled by [proton tuning configuration](/en/reference/applications/services/content#summary). When enabling a proton summary cache, one should also change the way proton reads summary data from mmap to directio as done below. The summary cache saves IO and cpu spent on decompressing of chunked blocks (default 64 KB) of summary data. + +Note that the summary cache is shared across multiple document types. + +By default, the cache is enabled, using up to 5% of available memory - configuration example: + +```xml + + + + + + + + directio + + + + 5 + + + + + + + + + +``` + + +**Note:** + +If the requested document-summary only contains fields that are [attributes](/en/content/attributes), the summary store (and cache) is not used. + + +## Protocol phases caches + +*ranking.queryCache* and *groupingSessionCache* described in the [Query API reference](/en/reference/api/query) are only caching data in between phases for a given a query, so other queries do not get any benefits, but these caches saves container - content node(s) round-trips for a *given* query. \ No newline at end of file diff --git a/mintlify-docs/en/performance/container-http.mdx b/mintlify-docs/en/performance/container-http.mdx new file mode 100644 index 0000000000..00a57216b7 --- /dev/null +++ b/mintlify-docs/en/performance/container-http.mdx @@ -0,0 +1,191 @@ +--- +title: "HTTP Performance Testing of the Container using Gatling" +sidebarTitle: "HTTP performance testing" +--- + +For container testing, more flexibility and more detailed checking than straightforward saturating an interface with HTTP requests is often required. The stress test tool [Gatling](https://gatling.io/) provides such capabilities in a flexible manner with the possibility of writing arbitrary plug-ins and a DSL for the most common cases. This document shows how to get started using Gatling with Vespa. Experienced Gatling users should find there is nothing special with testing Vespa versus other HTTP services. + +## Install Gatling + +Refer to Gatling's [documentation for getting started](https://gatling.io/docs/gatling/reference/current/), or simply get the newest version from the [Gatling front page](https://gatling.io/), unpack the tar ball and jump straight into it. The tool runs happily from the directory created when unpacking it. This tutorial is written with Gatling 2 in mind. + +## Configure the First Test with a Query Log + +Refer to the Gatling documentation on how to set up the recorder. This tool acts as a browser proxy, recording what you do in the browser, allowing you to replay that as a test scenario. + +After running *bin/recorder.sh* and setting package to *com.vespa.example* and class name to *VespaTutorial*, running a simple query against your node *mynode* (running e.g. [album-recommendation-java](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation-java)), should create a basic simulation looking something like the following in *user-files/simulations/com/vespa/example/VespaTutorial.scala*: + +```scala expandable +package com.vespa.example + +import io.gatling.core.Predef._ +import io.gatling.core.session.Expression +import io.gatling.http.Predef._ +import io.gatling.jdbc.Predef._ +import io.gatling.http.Headers.Names._ +import io.gatling.http.Headers.Values._ +import scala.concurrent.duration._ +import bootstrap._ +import assertions._ + +class VespaTutorial extends Simulation { + + val httpProtocol = http + .baseURL("http://mynode:8080") + .acceptHeader("text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") + .acceptEncodingHeader("gzip, deflate") + .connection("keep-alive") + .userAgentHeader("Mozilla/5.0 (X11; Linux x86_64; rv:27.0) Gecko/20100101 Firefox/27.0") + + val headers_1 = Map("""Cache-Control""" -> """max-age=0""") + + val scn = scenario("Scenario Name") + .exec(http("request_1") + .get("""/search/?query=bad""") + .headers(headers_1)) + + setUp(scn.inject(atOnce(1 user))).protocols(httpProtocol) +} +``` + +Running a single query over and over again is not useful, so we have a tiny query log in a CSV file we want to run in our test, *user-files/data/userinput.csv*: + +```text +userinput +bad religion +bad +lucky oops +radiohead +bad jackson +``` + +As usual for CSV files, the first line names the parameters. A literal comma may be escaped with backslash as "\\,". Gatling takes hand of URL quoting, there is no need to e.g. encode space as "%20". + +Add a feeder: + +```scala expandable +package com.vespa.example + +import io.gatling.core.Predef._ +import io.gatling.core.session.Expression +import io.gatling.http.Predef._ +import io.gatling.jdbc.Predef._ +import io.gatling.http.Headers.Names._ +import io.gatling.http.Headers.Values._ +import scala.concurrent.duration._ +import bootstrap._ +import assertions._ + +class VespaTutorial extends Simulation { + + val httpProtocol = http + .baseURL("http://mynode:8080") + .acceptHeader("text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") + .acceptEncodingHeader("gzip, deflate") + .connection("keep-alive") + .userAgentHeader("Mozilla/5.0 (X11; Linux x86_64; rv:27.0) Gecko/20100101 Firefox/27.0") + + val headers_1 = Map("""Cache-Control""" -> """max-age=0""") + + val scn = scenario("Scenario Name") + .feed(csv("userinput.csv").random) + .exec(http("request_1") + .get("/search/") + .queryParam("query", "${userinput}") + .headers(headers_1)) + + setUp(scn.inject(constantRate(100 usersPerSec) during (10 seconds))) + .protocols(httpProtocol) +} +``` + +Now, we have done a couple of changes to the original scenario. First, we have added the feeder. Since we do not have enough queries available for running long enough to get a scenario for some traffic, we chose the "random" strategy. This means a random user input string will be chosen for each invocation, and it might be reused. Also, we have changed how the test is run, from just a single query, into a constant rate of 100 users for 10 seconds. We should expect something as close as possible to 100 QPS in our test report. + +## Running a Benchmark + +We now have something we can run both on a headless node and on a personal laptop, sample run output: + +```bash expandable +$ ./bin/gatling.sh +GATLING_HOME is set to ~/tmp/gatling-charts-highcharts-2.0.0-M3a +Choose a simulation number: + [0] advanced.AdvancedExampleSimulation + [1] basic.BasicExampleSimulation + [2] com.vespa.example.VespaTutorial +2 +Select simulation id (default is 'vespatutorial'). Accepted characters are a-z, A-Z, 0-9, - and _ + +Select run description (optional) + +Simulation com.vespa.example.VespaTutorial started... + +================================================================================ +2014-04-09 11:54:33 0s elapsed +---- Scenario Name ------------------------------------------------------------- +[- ] 0% + waiting: 998 / running: 2 / done:0 +---- Requests ------------------------------------------------------------------ +> Global (OK=0 KO=0 ) + +================================================================================ + + +================================================================================ +2014-04-09 11:54:38 5s elapsed +---- Scenario Name ------------------------------------------------------------- +[#################################### ] 49% + waiting: 505 / running: 0 / done:495 +---- Requests ------------------------------------------------------------------ +> Global (OK=495 KO=0 ) +> request_1 (OK=495 KO=0 ) +================================================================================ + + +================================================================================ +2014-04-09 11:54:43 10s elapsed +---- Scenario Name ------------------------------------------------------------- +[######################################################################### ] 99% + waiting: 8 / running: 0 / done:992 +---- Requests ------------------------------------------------------------------ +> Global (OK=992 KO=0 ) +> request_1 (OK=992 KO=0 ) +================================================================================ + + +================================================================================ +2014-04-09 11:54:43 10s elapsed +---- Scenario Name ------------------------------------------------------------- +[##########################################################################]100% + waiting: 0 / running: 0 / done:1000 +---- Requests ------------------------------------------------------------------ +> Global (OK=1000 KO=0 ) +> request_1 (OK=1000 KO=0 ) +================================================================================ + +Simulation finished. +Generating reports... +Parsing log file(s)... +Parsing log file(s) done + +================================================================================ +---- Global Information -------------------------------------------------------- +> numberOfRequests 1000 (OK=1000 KO=0 ) +> minResponseTime 10 (OK=10 KO=- ) +> maxResponseTime 30 (OK=30 KO=- ) +> meanResponseTime 10 (OK=10 KO=- ) +> stdDeviation 2 (OK=2 KO=- ) +> percentiles1 10 (OK=10 KO=- ) +> percentiles2 10 (OK=10 KO=- ) +> meanNumberOfRequestsPerSecond 99 (OK=99 KO=- ) +---- Response Time Distribution ------------------------------------------------ +> t < 800 ms 1000 (100%) +> 800 ms < t < 1200 ms 0 ( 0%) +> t > 1200 ms 0 ( 0%) +> failed 0 ( 0%) +================================================================================ + +Reports generated in 0s. +Please open the following file : ~/tmp/gatling-charts-highcharts-2.0.0-M3a/results/vespatutorial-20140409115432/index.html +``` + +The report gives graphs showing how the test progressed and summaries for failures and time spent. diff --git a/mintlify-docs/en/performance/container-tuning.mdx b/mintlify-docs/en/performance/container-tuning.mdx new file mode 100644 index 0000000000..2a2cba3070 --- /dev/null +++ b/mintlify-docs/en/performance/container-tuning.mdx @@ -0,0 +1,154 @@ +--- +title: "Container Tuning" +--- + +A collection of configuration parameters to tune the Container as used in Vespa. Some configuration parameters have native [services.xml](/en/basics/applications) support while others are configured through [generic config overrides](/en/reference/applications/config-files#generic-configuration-in-services-xml). + +## Container worker threads + +The container uses multiple thread pools for its operations. Most components including request handlers use the container's [default thread pool](/en/reference/applications/services/container#threadpool), which is controlled by a shared executor instance. Any component can utilize the default pool by injecting an `java.util.concurrent.Executor` instance. Some built-in components have dedicated thread pools - such as the Jetty server, the [search handler](/en/reference/applications/services/search#threadpool) and [document-processing](/en/reference/applications/services/docproc#threadpool) chains. These thread pools are injected through special wiring in the config model and are not easily accessible from other components. + +The thread pools are by default scaled on the system resources as reported by the JVM (`Runtime.getRuntime().availableProcessors()`). It's paramount that the `-XX:ActiveProcessorCount`/`jvm_availableProcessors` configuration is correct for the container to work optimally. The [default thread pool](/en/reference/applications/services/container#threadpool) configuration can be overridden through services.xml. We recommend you keep the default configuration as it's tuned to work across a variety of workloads. Note that the default configuration and pool usage may change between minor versions. + +The container will pre-start the minimum number of worker threads, so even an idle container may report running several hundred threads. The [search handler](/en/reference/applications/services/search#threadpool) and [document processing handler](/en/reference/applications/services/docproc#threadpool) thread pools each pre-start the number of workers set in their configurations. Note that tuning the capacity upwards increases the risk of high GC pressure as concurrency becomes higher with more in-flight requests. The GC pressure is a function of number of in-flight requests, the time it takes to complete the request and the amount of garbage produced per request. Increasing the queue size will allow the application to handle shorter traffic bursts without rejecting requests, although increasing the average latency for those requests that are queued up. Large queues will also increase heap consumption in overload situations. For some thread pools, extra threads will be created once the queue is full (when [`max`](/en/reference/applications/services/search#threads.max) is specified), and are destroyed after an idle timeout. If all threads are occupied, requests are rejected with a 503 response. + +The effective thread pool configuration and utilization statistics can be observed through the [Container Metrics](/en/operations/metrics#container-metrics). See [Thread Pool Metrics](/en/operations/metrics#thread-pool-metrics) for a list of metrics exported. + + +**Note:** + +If the queue size is set to 0 the metric measuring the queue size - `jdisc.thread_pool.work_queue.size` - will instead switch to measure how many threads are active. + + +### Recommendation + +A fixed size pool is preferable for stable latency during peak load, at a cost of a higher static memory load and increased context-switching overhead if excessive number of threads are configured. Variable size pool is mostly beneficial to minimize memory consumption during low-traffic periods, and in general if the size of peak load is somewhat unknown. The downside is that once all core threads are active, latency will increase as additional tasks are queued and launching extra threads is relatively expensive as it involves system calls to the OS. + +### Example + +Consider a container host with 8 vCPU. Setting `4` on the [search handler threadpool](/en/reference/applications/services/search#threadpool) yields `4 * 8 = 32` worker threads, and adding `25` gives the pool a total queue capacity of `32 * 25 = 800` requests. The same thread calculation applies to the [document processing handler threadpool](/en/reference/applications/services/docproc#threadpool), which does not support queue configuration. The example below shows a consistent configuration where the default thread pool, the search handler threadpool, and the document processing handler threadpool are all kept fixed. + +```xml expandable + + + + + + + + 5 + + + 25 + + + + + + + + + 4 + + + 25 + + + + + + + + 2 + + + + +``` + +## Container memory usage + +> *Help, my container nodes are using more than 70% memory!* + +It's common to observe the container process utilizing its maximum configured heap size. This, by itself, is not necessarily an indication of a problem. The Java Virtual Machine (JVM) manages memory within the allocated heap, and it's designed to use as much of it as possible to reduce the frequency of garbage collection. + +To understand whether enough memory is allocated, look at the garbage collection activity. If GC is running frequently and using significant CPU or causing long pauses, it might indicate that the heap size is too small for the workload. In such cases, consider increasing the maximum heap size. However, if the garbage collector is running infrequently and efficiently, it's perfectly normal for the container to utilize most or all of its allocated heap, and even more (as some memory will also be allocated outside the heap; e.g. direct buffers for efficient data transfer). + +Vespa exports several metrics to allow you to monitor JVM GC performance, such as [jvm.gc.overhead](/en/reference/operations/metrics/container#jvm_gc_overhead) - if this exceeds 8-10% you should consider increasing heap memory and/or tuning GC settings. + +## JVM heap size + +Change the default JVM heap size settings used by Vespa to better suit the specific hardware settings or application requirements. + +By setting the relative size of the total JVM heap in [percentage of available memory](/en/reference/applications/services/container#nodes), one does not know exactly what the heap size will be, but the configuration will be adaptable and ensure that the container can start even in environments with less available memory. The example below allocates 50% of available memory on the machine to the JVM heap: + +```xml + + + + + + +``` + +## JVM Tuning + +Use *gc-options* for controlling GC related parameters and *options* for tuning other parameters. See [reference documentation](/en/reference/applications/services/container#nodes). Example: Running with 4 GB heap using G1 garbage collector and using NewRatio = 1 (equal size of old and new generation) and enabling verbose GC logging (logged to stdout to vespa.log file). + +```xml + + + + + + +``` + +The default heap size with docker image is 1.5g which can for high throughput applications be on the low side, causing frequent garbage collection. By default, the G1GC collector is used. + +### Config Server and Config Proxy + +The config server and proxy are not executed based on the model in *services.xml*. On the contrary, they are used to bootstrap the services in that model. Consequently, one must use configuration variables to set the JVM parameters for the config server and config proxy. They also need to be restarted (*services* in the config proxy's case) after a change, but one does *not* need to *vespa prepare* or *vespa activate* first. Example: + +```bash +VESPA_CONFIGSERVER_JVMARGS -Xlog:gc +VESPA_CONFIGPROXY_JVMARGS -Xlog:gc -Xmx256m +``` + +Refer to [Setting Vespa variables](/en/operations/self-managed/files-processes-and-ports#environment-variables). + +## Container warmup + +Some applications observe that the first queries made to a freshly started container take a long time to complete. This is typically due to some components performing lazy setup of data structures or connections. Lazy initialization should be avoided in favor of eager initialization in component constructor, but this is not always possible. + +A way to avoid problems with the first queries in such cases is to perform warmup queries at startup. This is done by issuing queries from the constructor of the Handler of regular queries. If using the default handler, [com.yahoo.search.handler.SearchHandler](https://github.com/vespa-engine/vespa/blob/master/container-search/src/main/java/com/yahoo/search/handler/SearchHandler.java), subclass this and configure your subclass as the handler of query requests in *services.xml*. + +Add a call to a warmupQueries() method as the last line of your handler constructor. The method can look something like this: + +```java +private void warmupQueries() { + String[] requestUris = new String[] {"warmupRequestUri1", "warmupRequestUri2"}; + int warmupIterations = 50; + + for (int i = 0; i < warmupIterations; i++) { + for (String requestUri : requestUris) { + handle(HttpRequest.createTestRequest(requestUri, com.yahoo.jdisc.http.HttpRequest.Method.GET)); + } + } +} +``` + +Since these queries will be executed before the container starts accepting external queries, they will cause the first external queries to observe a warmed up container instance. + +Use [metrics.ignore](/en/reference/api/query#metrics.ignore) in the warmup queries to eliminate them from being reported in metrics. + +### Disabling warmups + +Warmups can be disabled by adding the following container http config to the container section in services.xml: + +```xml + + false + +``` \ No newline at end of file diff --git a/mintlify-docs/en/performance/feature-tuning.mdx b/mintlify-docs/en/performance/feature-tuning.mdx new file mode 100644 index 0000000000..8233e45b69 --- /dev/null +++ b/mintlify-docs/en/performance/feature-tuning.mdx @@ -0,0 +1,482 @@ +--- +title: "Vespa Serving Tuning" +sidebarTitle: "Feature tuning" +--- + +This document describes how to tune certain features of an application for high query serving performance, where the main focus is on content cluster search features; see [Container tuning](/en/performance/container-tuning) for tuning of container clusters. The [search sizing guide](/en/performance/sizing-search) is about *scaling* an application deployment. + +## Attribute vs index + +The [attribute](/en/content/attributes) documentation summarizes when to use [attribute](/en/reference/schemas/schemas#attribute) in the [indexing](/en/reference/schemas/schemas#indexing) statement. Also see the [procedure](/en/reference/schemas/schemas#modifying-schemas) for changing from attribute to index and vice-versa. + +```js +field timestamp type long { + indexing: summary | attribute +} +``` + +If both index and attribute are configured for string-type fields, Vespa will search and match against the index with default match `text`. All numeric type fields and tensor fields are attribute (in-memory) fields in Vespa. + +## When to use fast-search for attribute fields + +By default, Vespa does not build any posting list index structures over *attribute* fields. Adding *fast-search* to the attribute definition as shown below will add an in-memory B-tree posting list structure which enables faster search for some cases (but not all, see next paragraph): + +```js +field timestamp type long { + indexing: summary | attribute + attribute: fast-search + rank: filter +} +``` + +When Vespa runs a query with multiple query items, it builds a query execution plan. It tries to optimize the plan so that the temporary result set is as small as possible. To do this, restrictive query tree items (matching few documents) are evaluated early. The query execution plan looks at hit count estimates for each part of the query tree using the index and B-tree dictionaries, which track the number of documents in which a given term occurs. + +However, for attribute fields without [fast-search](/en/content/attributes#fast-search) there is no hit count estimate, so the estimate becomes the total number of documents (matches all) and the query tree item is moved to the end of the query evaluation. A query with only one query term searching an attribute field without `fast-search` would be a linear scan over all documents and thus expensive: + +```text +select * from sources * where range(timestamp, 0, 100) +``` + +But if this query term is *and*\-ed with another term that matches fewer documents, that term will determine the cost instead, and fast-search won't be necessary, e.g.: + +```text +select * from sources * where range(timestamp, 0, 100) and uuid contains "123e4567-e89b-12d3-a456-426655440000" +``` + +The general rules of thumb for when to use fast-search for an attribute field are: + +- Use *fast-search* if the attribute field is searched without any other query terms +- Use *fast-search* if the attribute field could limit the total number of hits efficiently + +Changing fast-search aspect of the attribute is a [live change](/en/reference/schemas/schemas#modifying-schemas) which does not require any re-feeding, so testing the performance with and without is low effort. Adding or removing *fast-search* requires restart. + +Note that *attribute* fields with *fast-search* that are not used in term based [ranking](/en/basics/ranking) should use *rank: filter* for optimal performance. See reference [rank: filter](/en/reference/schemas/schemas#rank). + +See optimization for sorting on a *single-value numeric attribute with fast-search* using [sorting.degrading](/en/reference/api/query#sorting.degrading). + +## Tuning query performance for lexical search + +Lexical search (or keyword-based search) is a method that matches query terms as they appear in indexed documents. It relies on the lexical representation of words rather than their meaning, and is one of the two retrieval methods used in [hybrid search](/en/learn/tutorials/hybrid-search). Lexical search in Vespa is done by querying string (text) [index](/en/basics/schemas#document-fields) fields, typically using the [weakAnd](/en/ranking/wand#weakand) query operator with [BM25](/en/ranking/bm25) ranking. + +The following schema represents a simple article document with *title* and *content* fields, that can represent Wikipedia articles as an example. A *default* fieldset is specified such that user queries are matched against both the *title* and *content* fields. BM25 ranking combines the scores of both fields in the *default* rank profile. In addition, the *optimized* rank profile specifies tuning parameters to improve query performance: + +```js expandable +schema article { + document article { + field title type string { + indexing: index | summary + index: enable-bm25 + } + field content type string { + indexing: index | summary + index: enable-bm25 + } + } + + fieldset default { + fields: title, content + } + + rank-profile default { + first-phase { + expression: bm25(title) + bm25(content) + } + } + + rank-profile optimized inherits default { + filter-threshold: 0.05 + weakand { + stopword-limit: 0.6 + adjust-target: 0.01 + } + } +} +``` + +The following shows an example question-answer query against a collection of articles, using the *weakAnd* query operator and the *optimized* rank profile. Question-answer queries are often written in full sentences, and as a consequence, they tend to contain many stopwords that are present in many documents and of less relevance when it comes to ranking. E.g., terms as "the", "in", and "are" are typically present in more the 60% of the documents: + +```json +{ + "yql": "select * from article where userQuery()", + "ranking.profile": "optimized", + "query": "what are the three highest mountains in the world" +} +``` + +The cost of evaluating such a query is primarily linear with the number of matched documents. The *AND* operator is most effective, but often ends up being too restrictive by not returning enough matches. The *OR* operator is less restrictive, but has the problem of returning too many matches, which is very costly. The *weakAnd* operator is somewhere in between the two in cost. + +### Posting Lists + +To find matching documents, the query operator uses the *posting lists* associated with each query term. A posting list is part of the inverted index and contains all occurrences of a term within a collection of documents. It consists of document IDs for documents that contain the term, and additional information such as the positions of the term within those documents (used for ranking purposes). For common terms (e.g., stopwords), the posting lists are very large and can be expensive to use during evaluation and ranking. CPU work is required to iterate them, and I/O work is required to load portions of them from disk to memory with MMAP. The last part is especially problematic when all posting lists of a disk index cannot fit into physical memory, and the system must constantly swap parts of them in and out of memory, leading to high I/O wait times. + +To improve query performance, the following tuning parameters are available, as seen used in the *optimized* rank profile. These are used to make tradeoffs between performance and quality. + +- **Use more compact posting lists for common terms**: Setting [filter-threshold](/en/reference/schemas/schemas#filter-threshold) to 0.05 ensures that all terms that are estimated to occur in more than 5% of the documents are handled with [compact posting lists (bitvectors)](/en/content/proton#index) instead of the full posting lists. This makes matching faster at the cost of producing less information for BM25 ranking (only a boolean signal is available). +- **Avoid using large posting lists all together**: Setting [stopword-limit](/en/reference/schemas/schemas#weakand-stopword-limit) to 0.6, ensures that all terms that are estimated to occur in more than 60% of the documents are considered stopwords and dropped entirely from the query and also from ranking. +- **Reduce the number of hits produced by *weakAnd***: Setting [adjust-target](/en/reference/schemas/schemas#weakand-adjust-target) ensures that documents that only match terms that occur very frequently in the documents are not considered hits. This also removes the need to calculate *first-phase* ranking for these documents, which is beneficial if *first-phase* ranking is more complex and expensive. + +### Performance + + +The tuning parameters used in the *optimized* rank profile have been shown to provide a good tradeoff between performance and quality in testing. A Wikipedia dataset with [SQuAD](https://nlp.stanford.edu/pubs/rajpurkar2016squad.pdf) (Stanford Question Answering Dataset) queries was used to analyze performance, and [trec-covid](https://ir.nist.gov/trec-covid/), [MS MARCO](https://microsoft.github.io/msmarco/) and [nfcorpus](https://huggingface.co/datasets/BeIR/nfcorpus) from the BEIR dataset to analyze quality implications. + +For instance, the query performance was tripled without any measurable drop in quality with the Wikipedia dataset, using the tuning parameters in the *optimized* rank profile. See the blog post [Tripling the query performance of lexical search](https://blog.vespa.ai/tripling-the-query-performance-of-lexical-search/) for more details. Note that testing should be conducted on your particular dataset to find the right tradeoff between performance and quality. + +## Hybrid TAAT and DAAT query evaluation + +Vespa supports **hybrid** query evaluation over inverted indexes, combining *TAAT* and *DAAT* evaluation to combine the best of both query evaluation techniques. Hybrid is not enabled per default and is triggered by a run-time query parameter. + +- **TAAT:** *Term At A Time* scores documents one query term at a time. The entire posting iterator can be read per query term, and the score of a document is accumulated. It is CPU cache friendly as posting data is read sequentially without randomly seeking the posting list iterator. The downside is that *TAAT* limits the term-based ranking function to be a linear sum of term scores. This downside is one reason why most search engines use *DAAT*. +- **DAAT:** *Document At A Time* scores documents completely one at a time. This requires multiple seeks in the term posting lists, which is CPU cache unfriendly but allows non-linear ranking functions. + +Generally, Vespa does *DAAT* (document-at-a-time) query evaluation and not *TAAT* (term-at-a time) for the reason listed above. + +Ranking (score calculation) and matching (does the document match the query logic) is not fully two separate disjunct phases, where one first finds matches and calculates the ranking score in a later phase. Matching and *first-phase* score calculation is interleaved when using *DAAT*. + +The *first-phase* ranking score is assigned to the hit when it satisfies the query constraints. At that point, the term iterators are positioned at the document ID and one can unpack additional data from the term posting lists - e.g., for term proximity scoring used by the [nativeRank](/en/ranking/nativerank) ranking feature, which also requires unpacking of positions of the term within the document. + +The way hybrid query evaluation is done is that *TAAT* is used for sub-branches of the overall query tree, which is not used for term-based ranking. + +Using *TAAT* can speed up query matching significantly (up to 30-50%) in cases where the query tree is large and complex, and where only parts of the query tree are used for term-based ranking. Examples of query tree branches that would require *DAAT* is using text ranking features like [bm25 or nativeRank](/en/reference/ranking/rank-features). The list of ranking features which can handle *TAAT* is long, but using [attribute or tensor](/en/ranking/tensor-user-guide) features only can have the entire tree evaluated using *TAAT*. + +For example, for a query where there is a user text query from an end user, one can use *userQuery()* YQL syntax and combine it with application-level constraints. The application level filter constraints in the query could benefit from using *TAAT*. Given the following document schema: + +```js expandable +search news { + document news { + field title type string {} + field body type string{} + field popularity type float {} + field market type string { + rank:filter + indexing: attribute + attribute: fast-search + } + field language type string { + rank:filter + indexing: attribute + attribute: fast-search + } + } + fieldset default { + fields: title,body + } + rank-profile text-and-popularity { + first-phase { + expression: attribute(popularity) + log10(bm25(title)) + log10(bm25(body)) + } + } +} +``` + +In this case, the rank profile only uses two ranking features, the popularity attribute and the [bm25](/en/ranking/bm25) score of the userQuery(). These are used in the default fieldset containing the title and body. Notice how neither *market* nor *language* is used in the ranking expression. + +In this query example, there is a language constraint and a market constraint, where both language and market are queried with a long list of valid values using OR, meaning that the document should match any of the market constraints and any of the language constraints: + +```json +{ + "hits": 10, + "ranking.profile": "text-and-popularity", + "yql": "select * from sources * where userQuery() and + (language contains \"en\" or language contains \"br\") and + (market contains \"us\" or market contains \"eu\" or market contains \"apac\" or market contains \"..\" )", + "query": "cat video", + "ranking.matching.termwiselimit": 0.1 +} +``` + +The language and the market constraints in the query tree are not used in the ranking score, and that part of the query tree could be evaluated using *TAAT*. See also [multi lookup set filter](#multi-lookup-set-filtering) for how to most efficiently search with large set filters. The subtree result is then passed as a bit vector into the *DAAT* query evaluation, which could significantly speed up the overall evaluation. + +Enabling hybrid *TAAT* is done by passing `ranking.matching.termwiselimit=0.1` as a request parameter. It's possible to evaluate the performance impact by changing this limit. Setting the limit to 0 will force termwise evaluation, which might hurt performance. + +One can evaluate if using the hybrid evaluation improves search performance by adding the above parameter. The limit is compared to the hit fraction estimate of the entire query tree. If the hit fraction estimate is higher than the limit, the termwise evaluation is used to evaluate the sub-branch of the query. + +## Indexing uuids + +When configuring [string](/en/reference/schemas/schemas#string) type fields with `index`, the default [match](/en/reference/schemas/schemas#match) mode is `text`. This means Vespa will [tokenize](/en/linguistics/linguistics-opennlp#tokenization) the content and index the tokens. + +The string representation of an [Universally unique identifier](https://en.wikipedia.org/wiki/Universally_unique_identifier) (UUID) is 32 hexadecimal (base 16) digits, in five groups, separated by hyphens, in the form 8-4-4-4-12, for a total of 36 characters (32 alphanumeric characters and four hyphens). + +Example: Indexing `123e4567-e89b-12d3-a456-426655440000` with the above document definition, Vespa will tokenize this into 5 tokens: `[123e4567,e89b,12d3,a456,426655440000]`, each of which could be matched independently, leading to possible incorrect matches. + +To avoid this, change the mode to [match: word](/en/reference/schemas/schemas#word) to treat the entire uuid as *one* token/word: + +```js +field uuid type string { + indexing: summary | index + match: word + rank: filter +} +``` + +In addition, configure the `uuid` as a [rank: filter](/en/reference/schemas/schemas#rank) field - the field will then be represented as efficiently as possible during search and ranking. The `rank:filter` behavior can also be triggered at query time on a per-query item basis by the `com.yahoo.prelude.query.Item.setRanked()` in a [custom searcher](/en/applications/searchers). + +## Parent child and search performance + +When searching imported attribute fields (with `fast-search`) from parent document types, there is an additional indirection that can be reduced significantly if the imported field is defined with `rank:filter` and [visibility-delay](/en/reference/applications/services/content#visibility-delay) is configured to > 0. The [rank:filter](/en/reference/schemas/schemas#rank) setting impacts posting list granularity and `visibility-delay` enables a cache for the indirection between the child and parent document. + +## Ranking and ML Model inferences + +Vespa [scales](/en/performance/sizing-search) with the number of hits the query retrieves per node/search thread, and which needs to be evaluated by the first-phase ranking function. Read more on [phased ranking](/en/ranking/phased-ranking). Phased ranking enables using more resources during the second phase ranking step than in the first phase. The first phase should focus on getting decent recall (retrieving relevant documents in the top k), while the second phase should tune precision. + +For [text search](/en/ranking/nativerank) applications, consider using the [WAND](/en/ranking/wand) query operator - WAND can efficiently (sublinear) find the top-k documents using an inner scoring function. + +## Multi Lookup - Set filtering + +Several real-world search use cases are built around limiting or filtering based on a set filter. If the contents of a field in the document match any of the values in the query set, it should be retrieved. E.g., searching data for a set of users: + +```sql +select * from sources * where user_id = 1 or user_id = 2 or user_id = 3 or user_id = 3 or user_id = 4 or user_id 5 ... +``` + +For OR filters over the same field, it is strongly recommended to use the [in query operator](/en/reference/querying/yql#in) instead. It has considerably better performance than plain OR for set filtering: + +```sql +select * from sources * where user_id in (1, 2, 3, 4, 5) +``` + + +**Note:** + +Large sets can slow down YQL-parsing of the query - see [parameter substitution](/en/reference/querying/yql#parameter-substitution) for how to send the set in a compact, performance-effective way. + + +Attribute fields used like the above without other stronger query terms, should have `fast-search` and `rank: filter`. If there is a large number of unique values in the field, it is also faster to use `hash` dictionary instead of `btree`, which is the default data structure for dictionaries for attribute fields with `fast-search`: + +```sql +field user_id type long { + indexing: summary | attribute + attribute: fast-search + dictionary: hash + rank: filter +} +``` + +For `string` fields, we also need to include [match](/en/reference/schemas/schemas#match) settings if using the `hash` dictionary: + +```sql +field user_id_str type string { + indexing: summary | attribute + attribute: fast-search + match: cased + rank: filter + dictionary { + hash + cased + } +} +``` + +If having 10M unique user\_ids in the dictionary and searching for 1000 users per query, the *btree* dictionary would be 1000 lookup times log(10M), while *hash* based would be 1000 lookups times O(1). Still, the *btree* dictionary offers more flexibility in terms of [match](/en/reference/schemas/schemas#match) settings. + +The `in` query set filtering approach can be used in combination with hybrid *TAAT* evaluation to further improve performance. See the [hybrid TAAT/DAAT](#hybrid-taat-daat) section. + +Also see the [dictionary schema reference](/en/reference/schemas/schemas#dictionary). + + +**Note:** + +For most use cases, the time spent on dictionary traversal is negligible compared to the time spent on query evaluation (matching and ranking). If the query is very selective, for example, using vespa as a key-value lookup store with ranking support, the dictionary traversal time can be significant. + + +## Document summaries - hits + +If queries request many (thousands) of hits from a content cluster with few content nodes, increasing the [summary cache](/en/performance/caches-in-vespa) might reduce latency and cost. + +Using [explicit document summaries](/en/querying/document-summaries), Vespa can support memory-only summary fetching if all fields referenced in the document summary are **all** defined with `attribute`. Dedicated in-memory summaries avoid (potential) disk read and summary chunk decompression. Vespa document summaries are stored using compressed [chunks](/en/reference/applications/services/content#summary-store-logstore-chunk). See also the [practical search performance guide on hits fetching](/en/performance/practical-search-performance-guide#hits-and-summaries). + +## Boolean, numeric, text attribute + +When using the attribute field type, considering performance, this is a rule of thumb: +1. Use boolean if a field is a boolean (max two values) +2. Use a string attribute if there is a set of values - only unique strings are stored +3. Use a numeric attribute for range searches +4. Use a numeric attribute if the data really is numeric; don't replace numeric with string numeric + + +Refer to [attributes](/en/content/attributes) for details. + +## Tensor ranking + +The ranking workload can be significant for large tensors - it is important to understand both the potential memory and computational cost for each query. + +### Memory + +Assume the dot product of two tensors with 1000 values of 8 bytes each, as in `tensor(x[1000])`. With one query tensor and one document tensor, the dot product is `sum(query(tensor1) * attribute(tensor2))`. Given a Haswell CPU architecture, where the theoretical upper memory bandwidth is 68 GB/sec, this gives 68 GB/sec / 8 KB = 9M ranking evaluations/sec. In other words, for a 1 M index, 9 queries per second before being memory bound. + +See below for using smaller [cell value types](#cell-value-types), and read more about [quantization](https://blog.vespa.ai/from-research-to-production-scaling-a-state-of-the-art-machine-learning-system/#model-quantization). + +### Compute + +When using tensor types with at least one mapped dimension (sparse or mixed tensor), [attribute: fast-rank](/en/reference/schemas/schemas#attribute) can be used to optimize the tensor attribute for ranking expression evaluation at the cost of using more memory. This is a good tradeoff if benchmarking indicates significant latency improvements with `fast-rank`. + +When optimizing ranking functions with tensors, try to avoid temporary objects. Use the [Tensor Playground](https://docs.vespa.ai/playground/) to evaluate what the expressions map to, using the execution details to list the detailed steps - find examples below. + +### Multiphase ranking + +To save both memory and compute resources, use [multiphase ranking](/en/ranking/phased-ranking). In short, use less expensive ranking evaluations to find the most promising candidates, then a high-precision evaluation for the top-k candidates. + +The blog post series on [Building Billion-Scale Vector Search](https://blog.vespa.ai/building-billion-scale-vector-search/) is a good read. + +### Cell value types + +| Type | Description | +| :--- | :--- | +| double | The default tensor cell type is the 64-bit floating-point `double` format. It gives the best precision at the cost of high memory usage and somewhat slower calculations. Using a smaller value type increases performance, trading off precision, so consider changing to one of the cell types below before scaling the application. | +| float | The 32-bit floating-point format `float` should usually be used for all tensors when scaling for production. Note that some frameworks like TensorFlow prefer 32-bit floats. A vector with 1000 dimensions, `tensor(x[1000])` uses approximately 4K memory per tensor value. | +| bfloat16 | This type has the range as a normal 32-bit float but only 8 bits of precision and can be thought of as a "float with lossy compression" - see [Wikipedia](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format). If memory (or memory bandwidth) is a concern, change the most space-consuming tensors to use the `bfloat16` cell type. Some careful analysis of the data is required before using this type.

When doing calculations, `bfloat16` will act as if it was a 32-bit float, but the smaller size comes with a potential computational overhead. In most cases, the `bfloat16` needs conversion to a 32-bit float before the actual calculation can occur, adding an extra conversion step.

In some cases, having tensors with `bfloat16` cells might bypass some built-in optimizations (like matrix multiplication) that will be hardware-accelerated only if the cells are of the same type. To avoid this, use the [cell\_cast](/en/reference/ranking/ranking-expressions#cell_cast) tensor operation to make sure the cells are of the right type before doing the more expensive operations. | +| int8 | If using machine learning to generate a model with data quantization, one can target the `int8` cell value type, which is a signed integer with a range from -128 to +127 only. This is also treated like a "float with limited range and lossy compression" by the Vespa tensor framework, and gives results as if it were a 32-bit float when any calculation is done. This type is also suitable when representing boolean values (0 or 1).

**Note:**

If the input for an `int8` cell is not directly representable, the resulting cell value is undefined, so take care to only input numbers in the `[-128,127]` range.


It's also possible to use `int8` representing binary data for [hamming distance](/en/reference/schemas/schemas#distance-metric) Nearest-Neighbor search. Refer to [billion-scale-knn](https://blog.vespa.ai/billion-scale-knn/) for example use.> | + +### Inner/outer products + +The following is a primer into inner/outer products and execution details: + +| tensor a | tensor b | product | sum | comment | +| :--- | :--- | :--- | :--- | :--- | +| tensor(x\[3\]):\[1.0, 2.0, 3.0\] | tensor(x\[3\]):\[4.0, 5.0, 6.0\] | tensor(x\[3\]):\[4.0, 10.0, 18.0\] | 32 | [Playground example](https://docs.vespa.ai/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEMSybIiFIAXA2gZywAnABQAPRAGYAugEo4iAIzEwAJmXTIrCAF9W20hmrlcDIgbaU0WugwBGLGpg5Qe-IWMmz5AFmUBWZQA2KU1HXQx9ViNME04zawp8aPJ6TlhzR3YGRgAqe2tw1EjDBNjCBwsk6whIVKhoAFdaAGMKzOdIPgaAW2Fc2xlQmkKdFCkQbSA). The dimension name and size are the same in both tensors - this is an inner product with a scalar result. | +| tensor(x\[3\]):\[1.0, 2.0, 3.0\] | tensor(y\[3\]):\[4.0, 5.0, 6.0\] | tensor(x\[3\],y\[3\]):\[ \[4.0, 5.0, 6.0\], \[8.0, 10.0, 12.0\], \[12.0, 15.0, 18.0\] \] | 90 | [Playground example](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEMSybIiFIAXA2gZywAnABQAPRAGYAugEo4iAIzEwAJmXTIrCAF9W20hmrlcDIgbaU0WugwBGLGpg5Qe-IWMmz5AFmUBWZQA2KU1HXQx9ViNME04zawp8aPJ6TlhzR3YGRgAqe2tw1EjDBNjCBwsk6whIVKhoAFdaAGMKzOdIPgaAW2Fc2xlQmkKdFCkQbSA). The dimension size is the same in both tensors, but dimensions have different names -> this is an outer product; the result is a two-dimensional tensor. | +| tensor(x\[3\]):\[1.0, 2.0, 3.0\] | tensor(x\[2\]):\[4.0, 5.0\] | undefined | | [Playground example](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEMSybIiFIAXA2gZywAnABQAPRAGYAugEo4iAIzEwAJmXTIrCAF9W20hmrlcDIgbaU0WugwBGLGpg5Qe-IWMQrZ8gCzKArFKajroY+qxGmCacZtYU+JHk9Jyw5o7sDIwAVPbWoajhhnHRhA4WCdYQkMlQ0ACutADGZenOkHx1ALbC2bYywTT5OihSINpAA). Two tensors in the same dimension but with different lengths -> undefined. | +| tensor(x\[3\]):\[1.0, 2.0, 3.0\] | tensor(y\[2\]):\[4.0, 5.0\] | tensor(x\[3\],y\[2\]):\[ \[4.0, 5.0\], \[8.0, 10.0\], \[12.0, 15.0\] \] | 54 | [Playground example](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEMSybIiFIAXA2gZywAnABQAPRAGYAugEo4iAIzEwAJmXTIrCAF9W20hmrlcDIgbaU0WugwBGLGpg5Qe-IcICeiFbPkAWZQBWKU1HXQx9ViNME04zawp8aPJ6TlhzR3YGRgAqe2tw1EjDBNjCBwsk6whIVKhoAFdaAGMKzOdIPgaAW2Fc2xlQmkKdFCkQbSA). Two tensors with different names and dimensions -> this is an outer product; the result is a two-dimensional tensor. | + +Inner product - observe optimized into `DenseDotProductFunction` with no temporary objects: + +```json +[ { + "class": "vespalib::eval::tensor_function::Inject", + "symbol": "" + }, + { + "class": "vespalib::eval::tensor_function::Inject", + "symbol": "" + }, + { + "class": "vespalib::eval::DenseDotProductFunction", + "symbol": "vespalib::eval::(anonymous namespace)::my_cblas_double_dot_product_op(vespalib::eval::InterpretedFunction::State&, unsigned long)" + } ] +``` + +Outer product, parsed into a tensor multiplication (`DenseSimpleExpandFunction`), followed by a `Reduce` operation: + +```json +[ { + "class": "vespalib::eval::tensor_function::Inject", + "symbol": "" + }, + { + "class": "vespalib::eval::tensor_function::Inject", + "symbol": "" + }, + { + "class": "vespalib::eval::DenseSimpleExpandFunction", + "symbol": "void vespalib::eval::(anonymous namespace)::my_simple_expand_op, true>(vespalib::eval::InterpretedFunction::State&, unsigned long)" + }, + { + "class": "vespalib::eval::tensor_function::Reduce", + "symbol": "void vespalib::eval::instruction::(anonymous namespace)::my_full_reduce_op >(vespalib::eval::InterpretedFunction::State&, unsigned long)" + } ] +``` + +Note that an inner product can also be run on mapped tensors ([Playground example](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEMSybIiFIAXA2gZywAnABQAPYAF8AlHGCiAjHHnEwogExw1K0QGY4OiZFYQJrCaQzVyuBkQttKaY3QYAjFjUwcoPfkLGSMnKKACwAdAAM2hoArJHaegBskYbOphjmrFaYNpx2zhT42eT0nACWtLQEgjiCWAAmAK4AxlwenoQMjABU7mlmKAC6IBJAA)): + +```json +[ { + "class": "vespalib::eval::tensor_function::Inject", + "symbol": "" + }, + { + "class": "vespalib::eval::tensor_function::Inject", + "symbol": "" + }, + { + "class": "vespalib::eval::SparseFullOverlapJoinFunction", + "symbol": "void vespalib::eval::(anonymous namespace)::my_sparse_full_overlap_join_op, true>(vespalib::eval::InterpretedFunction::State&, unsigned long)" + } ] +``` + +### Mapped lookups + +`sum(model_id * models, m_id)` + +| tensor name | tensor type | +| :--- | :--- | +| model\_id | `tensor(m_id{})` | +| models | `tensor(m_id{}, x[3])` | + +Using a mapped dimension to select an indexed tensor can be considered a [mapped lookup](/en/ranking/tensor-examples#using-a-tensor-as-a-lookup-structure). This is similar to creating a slice but optimized into a single `MappedLookup` - see [Tensor Playground](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gFssATAgGwH0BLFksmpCIJIAFwK0AzlgBOACkY8WwAL4BKOMEYAmOAEYAdAAYVkARBUCVpDNXK4GRG4MppzdBszbtJ-GpmEocSlZBSVVYgAPRABmAF0NLT04RAAWY2IwAFYMsAA2YzjMnRSAdlyADlyATkLTd0sMawE7TAcRJ3cKfFbyehFJAFdGBVYOJTAAKjAvDklipTU-f0IGIZHZrl4pmbGfBd4lhqtnCF6odtXTzFdziEh+qEl2bgBjTpXVkVHvSUnNxZaJRwHT1fyNVDNWxdS5CZbkW7ue6PJgAQxwOAILE47CwWAA1oMcJwZFjBu94YJApBSSxyQQfnN-nslMR1sRFIczOCrCg4iAVEA) example. + +```json +[ { + "class": "vespalib::eval::tensor_function::Inject", + "symbol": "" + }, + { + "class": "vespalib::eval::tensor_function::Inject", + "symbol": "" + }, + { + "class": "vespalib::eval::MappedLookup", + "symbol": "void vespalib::eval::(anonymous namespace)::my_mapped_lookup_op(vespalib::eval::InterpretedFunction::State&, unsigned long)" + } ] +``` + +### Three-way dot product - mapped + +`sum(query(model_id) * model_weights * model_features)` + +| tensor name | tensor type | +| :--- | :--- | +| query(model\_id) | `tensor(model{})` | +| model\_weights | `tensor(model{}, feature{})` | +| model\_features | `tensor(feature{})` | + +Three-way mapped (sparse) dot product: [Tensor Playground](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEcBXAgJwE8AKAWywBMCAGwD6AS34BKEmRqQiCSABcCtAM5Y2AHmhCsAQyUA+XgOHAAvpLjAeABjgBGC5FkQLsi6QzVyuBkTecpRobnQMfIKiAO4EYgDmABZKajI0mApQKuqaOnqGJpHmXmDQBIbMbASW1sBgADq0tmZCcPbEpeVKlQRw0HYWTsSNzVFtdh1lFVV9znAATMNNRa08jpNdPX0DcADMS6PCbeud073QcwAsYC5hHhhesr6Y-oqBYRT4z+T0iisiU26VVSQXS8gY2Q02l0BmMXEBPRqNn6cAArJNHHAAGy3dL3VCPHwfV6ENLBL5hCCQX5QFjsbj-CSSMAAKjA-1iCWSalZ7JaAM2wLJYMyTFYnFMUXEUl5HLiSRSsv5CKFd08oNCYJJ4I1VJC33CijUzB4XDpEsZMrZcq5iutysFBDU0l1GQYxtN5oZ-KZSqlnIVPPtUpVTukaoeKAAuiALEA) + +```json +[ { + "class": "vespalib::eval::tensor_function::Inject", + "symbol": "" + }, + { + "class": "vespalib::eval::tensor_function::Inject", + "symbol": "" + }, + { + "class": "vespalib::eval::tensor_function::Inject", + "symbol": "" + }, + { + "class": "vespalib::eval::Sparse112DotProduct", + "symbol": "void vespalib::eval::(anonymous namespace)::my_sparse_112_dot_product_op(vespalib::eval::InterpretedFunction::State&, unsigned long)" + } ] +``` + +### Three-way dot product - mixed + +`sum(query(model_id) * model_weights * model_features)` + +| tensor name | tensor type | +| :--- | :--- | +| query(model\_id) | `tensor(model{})` | +| model\_weights | `tensor(model{}, feature[2])` | +| model\_features | `tensor(feature[2])` | + +Three-way mapped (mixed) dot product: [Tensor Playground](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEcBXAgJwE8AKAWywBMCAGwD6AS34BKEmRqQiCSABcCtAM5Y2AHmhCsAQyUA+XgOHAAvpLjAeABjgBGC5FkQLsi6QzVyuBkTecpRobnQMfIKiAO4EYgDmABZKajI0mApQKuqaOnqGJpHmXmDQBIbMbASIAEwAutbAYAA6tLZmQnD2xKXlSpUEcHYWTsSt7VFddj1lFVVOIzVjbUWdPI4zfQNDIwDMyxPCXRu9c4POcAAsYC5hHhhesr6Y-oqBYRT4z+T0iqsis36VVSQXS8gY2Q02l0BmMXEBA1qDTgiAArMQAGx1Vzpe6oR4+D6vQhpYJfMIQSC-KAsdjcf4SSRgABUYH+sQSyTULLZHQBW2BpLBmSYrE4pii4ikPPZcSSKRlfIRgrunlBoTBxPB6spIW+4UUamYPC4tPFDOlrNlnIVVqVAoIamkOoyDCNJrN9L5jMVko58u5dslysd0lVDxQdRAFiAA) + +```json +[ { + "class": "vespalib::eval::tensor_function::Inject", + "symbol": "" + }, + { + "class": "vespalib::eval::tensor_function::Inject", + "symbol": "" + }, + { + "class": "vespalib::eval::tensor_function::Inject", + "symbol": "" + }, + { + "class": "vespalib::eval::Mixed112DotProduct", + "symbol": "void vespalib::eval::(anonymous namespace)::my_mixed_112_dot_product_op(vespalib::eval::InterpretedFunction::State&, unsigned long)" + } ] +``` \ No newline at end of file diff --git a/mintlify-docs/en/performance/graceful-degradation.mdx b/mintlify-docs/en/performance/graceful-degradation.mdx new file mode 100644 index 0000000000..71a3fa732a --- /dev/null +++ b/mintlify-docs/en/performance/graceful-degradation.mdx @@ -0,0 +1,122 @@ +--- +title: "Graceful Query Coverage Degradation" +sidebarTitle: "Graceful degradation" +--- + +Ideally you want to query all data indexed in a Vespa cluster within the specified timeout, but that might not always be possible: + +- The system might be overloaded due to capacity constraints, and queries do not complete within the timeout, as they are sitting in a queue waiting for a resource. +- A complex query might take longer time to execute than the specified timeout, or the timeout is too low given the complexity of the query and available resource capacity. + +This document describes how Vespa could gracefully degrade the result set if the query cannot be completed within the timeout specified. + +Definitions: + +- **Coverage**: The percentage of documents indexed which were evaluated by the query. The ideal coverage is 100%. +- **Timeout**: The total time a query is allowed to run for, see [timeout](/en/reference/api/query#timeout) (default 500 ms). Vespa is a distributed system where multiple components are involved in the query execution. +- **Soft Timeout**: Soft timeout allows coverage to be less than 100%, but larger than 0% if the query is approaching timeout. Soft timeout might also be considered as an *early termination* technique, and is enabled by default. Refer to [ranking.softtimeout.enable](/en/reference/api/query#ranking.softtimeout.enable). + +## Detection + +The default JSON renderer template will always render a *coverage* element below the root element, which has a *degraded* element if the query execution was degraded in some way and the *coverage* field will be less than 100. Example request with a query timeout of 200 ms and *ranking.softtimeout.enable=true*: + +```bash +/search/?searchChain=vespa&yql=select * from sources * where foo contains bar&presentation.format=json&timeout=200ms&ranking.softtimeout.enable=true +``` + +```json +{ + "root": { + "coverage": { + "coverage": 99, + "degraded": { + "adaptive-timeout": false, + "match-phase": false, + "non-ideal-state": false, + "timeout": true + }, + "documents": 167006201, + "full": false, + "nodes": 11, + "results": 1, + "resultsFull": 0 + }, + "fields": { + "totalCount": 16469732 + } + } +} +``` + +The result was delivered in 200 ms but the query was degraded as coverage is less than 100. In this case, 167,006,201 out of x documents where queried, and 16,469,732 documents where matched and ranked, using the first-phase ranking expression in the default rank profile. + +The *degraded* field contains the following fields which explains why the result had coverage less than 100: + +- *adaptive-timeout* is true if [adaptive node timeout](#adaptive-node-timeout) has been enabled, and one or more nodes fail to produce a result at all within the timeout. This could be caused by nodes with degraded hardware making them slower than peers in the cluster. +- *match-phase* is true if the rank profile has defined [match phase ranking degradation](/en/reference/schemas/schemas#match-phase). Match-phase can be used to control which documents are ranked within the timeout. +- *non-ideal-state* is true in cases where the system is not in [ideal state](/en/content/idealstate). This case is extremely rare. +- *timeout* is true if softtimeout was enabled, and not all documents could be matched and ranked within the query timeout. + +Note that the degraded reasons are not mutually exclusive. In the example, the softtimeout was triggered and only 99% of the documents where queried before the time budget ran out. One could imagine scenarios where 10 out of 11 nodes involved in the query execution were healthy and triggered soft timeout and delivered a result, while the last node was in a bad state (e.g. hw issues) and could not produce a result at all, and that would cause both *timeout* and *adaptive-timeout* to be true. + +When working on Results in a [Searcher](/en/applications/searchers), get the coverage information programmatically: + +```java +@Override + public Result search(Query query, Execution execution) { + Result result = execution.search(query); + Coverage coverage = result.getCoverage(false); + if (coverage != null && coverage.isDegraded()) { + logger.warning("Got a degraded result for query " + query + " : " + + coverage.getResultPercentage() + "% was searched"); + } + return result; + } +``` + +## Adaptive node timeout + +For a content cluster with [flat](/en/performance/sizing-search#data-distribution) data distribution, query performance is no better than the slowest node. The worst case scenario happens when a node in the cluster is experiencing underlying HW issues. In such a state, a node might answer health checks and pings, but still not be able to serve queries within the timeout. + +Using [adaptive coverage](/en/reference/applications/services/content#coverage) allows ignoring slow node(s). The following example demonstrates how to use adaptive timeout. The example uses a flat content cluster with 10 nodes: + +```xml + + + + 0.9 + 0.2 + 0.3 + + + +``` +- Assuming using the default vespa timeout of 500ms, the stateless container dispatches the query to all 10 nodes in parallel and waits until 9 out of 10 have replied (minimum coverage 0.9). +- Assuming 9 could respond in 100ms, there is 400ms left. The dispatcher then waits minimum 80 ms (0.2\*400ms) for the last node to respond, and at maximum 120 (0.3\*400ms) before giving up waiting for the slowest node and return the result. +- The min wait setting is used to allow some per node response time variance. Using min wait 0 will cause the query to return immediately when min coverage has been reached (9 out of 10 nodes replied). A higher than 0 value for min allows a node to be slightly slower than the peers and overall still reach 100% coverage. + +## Match phase degradation + +Refer to the [match-phase reference](/en/reference/schemas/schemas#match-phase). Concrete examples of using match phase is found in the [practical performance guide](/en/performance/practical-search-performance-guide#match-phase-limit---early-termination). + +Match-phase works by specifying an `attribute` that measures document quality in some way (popularity, click-through rate, pagerank, ad bid value, price, text quality). In addition, a `total.max-hits` value is specified that specifies how many hits in total over the content nodes are "more than enough" for the application. Then an estimate is made after collecting a reasonable amount of hits for the query, and if the estimate is higher than the node's share of the `total-max-hits` value, an extra limitation is added to the query, ensuring that only the highest quality documents can become hits. + +In effect, this limits the documents actually queried to the highest quality documents, a subset of the full corpus, where the size of subset is calculated in such a way that the query is estimated to give the node's share of `total-max-hits` hits. Since some (low-quality) hits will already have been collected to do the estimation, the actual number of hits returned will usually be higher than total-max-hits. But since the distribution of documents isn't perfectly smooth, you risk sometimes getting less than the configured `total-max-hits` hits back. + +Note that limiting hits in the match-phase also affects [aggregation/grouping](/en/querying/grouping), and total-hit-count since it actually limits, so the query gets fewer hits. Also note that it doesn't really make sense to use this feature together with a [WAND operator](/en/ranking/wand) that also limit hits, since they both operate in the same manner, and you would get interference between them that could cause unpredictable results. The graph shows possible hits versus actual hits in a corpus with 100 000 documents, where `total-max-hits` is configured to 10 000 per node. The corpus is a synthetic (slightly randomized) data set, in practice the graph will be less smooth: + +![Plot of possible vs. actual hits](/assets/img/relevance/match-phase-max-hits.png) + +There is a content node metric per rank-profile named *content.proton.documentdb.matching.rank\_profile.limited\_queries* which can be used to see how many of the queries are actually affected by these settings; compare with the corresponding *content.proton.documentdb.matching.rank\_profile.queries* metric to measure the percentage. + +### Match Phase Tradeoffs + +There are some important things to consider before using *match-phase*. In a normal query scenario, latency is directly proportional to the number of hits the query matches: a query that matches few documents will have low latency and a query that matches many documents will have high latency. Match-phase has the **opposite** effect. This means that if you have queries that match few documents, match-phase might make these queries significantly slower. It might actually be faster to run the query without the filter. + +Example: Lets say you have a corpus with a document attribute named *created\_time*. For all queries you want the newest content surfaced, so you enable match-phase on *created\_time*. So far, so good - you get a great latency and always get your top-k hits. The problem might come if you introduce a filter. If you have a filter saying you only want documents from the last day, then match-phase can become suboptimal and in some cases much worse than running without match-phase. + +By design, Vespa will evaluate potential matches for a query by the order of their internal documentid. This means it will start evaluating documents in the order they were indexed on the node, and for most use-cases that means the oldest documents first. Without a filter, every document is a potential match, and match-phase will quickly figure out how it can optimize. With the filter, on the other hand, the algorithm need to evaluate almost the full corpus before it reaches potential matches (1 day old corpus), and because of the way the algorithm is implemented, end up with doing a lot of unnecessary work and can have orders of magnitude higher latencies than running the query without the filter. + +Another important thing to mention is that the reported total-hits will be different when doing queries with match-phase enabled. This is because match-phase works on an estimated "virtual" corpus, which might have much fewer hits than is actually in the full corpus. + +If used correctly match-phase can be a life-saver, however, it is not a straight forward fix-it-all silver bullet. Please test and measure your use of match-phase, and contact the Vespa team if your results are not what you expect. \ No newline at end of file diff --git a/mintlify-docs/en/performance/http2.mdx b/mintlify-docs/en/performance/http2.mdx new file mode 100644 index 0000000000..5eebbdee96 --- /dev/null +++ b/mintlify-docs/en/performance/http2.mdx @@ -0,0 +1,73 @@ +--- +title: "HTTP/2" +--- + +This document contains HTTP/2 performance considerations on the container—see [Container tuning](/en/performance/container-tuning) for general tuning of container clusters. + +## Enabling HTTP/2 on container + +HTTP/2 is enabled by default on a container for all connectors. We recommend HTTP/2 with TLS, both for added security, but also for a more robust connection upgrade mechanism. Web browsers will typically only allow HTTP/2 over TLS. + +### HTTP/2 with TLS + +Both HTTP/1.1 and HTTP/2 will be served over the same connector using the [TLS ALPN Extension](https://datatracker.ietf.org/doc/html/rfc7301). The Application-Layer Protocol Negotiation (ALPN) extension allows the client to send a list of supported protocols during TLS handshake. The container selects a supported protocol from that list. + +The [HTTP/2 specification](https://datatracker.ietf.org/doc/html/rfc7540) dictates multiple requirements for the TLS connection. Vespa may enforce some or all of these restrictions. See the HTTP/2 specification for the full list. The most significant are listed below: + +- Client must use at least TLSv1.2. +- Client must provide target domain with the TLS Server Name Indication (SNI) Extension. +- Client must not use any of the banned [TLSv1.2 ciphers](https://datatracker.ietf.org/doc/html/rfc7540#appendix-A). + +### HTTP/2 without TLS + +The jdisc container supports both mechanism for HTTP/2 without TLS - see [testing](#testing): + +1. Upgrading to HTTP/2 from HTTP/1 +2. HTTP/2 with prior knowledge + +## Feeding over HTTP/2 + +One of the major improvements with HTTP/2 is multiplexing of multiple concurrent requests over a single TCP connection. This allows for high-throughput feeding through the [/document/v1/](/en/reference/api/document-v1) HTTP API, with a simple one-operation–one-request model, but without the overhead of hundreds of parallel connections that HTTP/1.1 would require for sufficient concurrency. + +`vespa feed` in the [Vespa CLI](/en/clients/vespa-cli#documents) and [vespa-feed-client](/en/clients/vespa-feed-client) use /document/v1/ over HTTP/2. + +## Performance tuning + +### Client + +The number of multiple concurrent requests per connection is typically adjustable in HTTP/2 clients/libraries. Document v1 API is designed for high concurrency and can easily handle thousands of concurrent requests. Its implementation is asynchronous and max concurrency is not restricted by a thread pool size, so configure your client to allow enough concurrent requests/streams to saturate the feed container. Other APIs such as the [Query API](/en/querying/query-api) is backed by a synchronous implementation, and max concurrency is restricted by the [underlying thread pool size](/en/performance/container-tuning#container-worker-threads). Too many concurrent streams may result in the container rejecting requests with 503 responses. + +There are also still some reasons to use multiple TCP connections—even with HTTP/2: + +- **Utilize multiple containers**. A single container may not saturate the content layer. A client may have to use more connections than container nodes if the containers are behind a load balancer. +- **Higher throughput**. Many clients allow only for a single thread to operate each connection. Multiple connections may be required for utilizing several CPU cores. + +## Client recommendations + +Use [vespa-feed-client](/en/clients/vespa-feed-client) for feeding through Document v1 API (JDK8+). + +We recommend the [h2load benchmarking tool](https://nghttp2.org/documentation/h2load-howto.html) for load testing. [vespa-fbench](/en/reference/operations/tools#vespa-fbench) does not support HTTP/2 at the moment. + +For Java there are 4 good alternatives: + +1. [Jetty Client](https://javadoc.jetty.org/jetty-11/org/eclipse/jetty/client/HttpClient.html) +2. [OkHttp](https://square.github.io/okhttp/) +3. [Apache HttpClient 5.x](https://hc.apache.org/httpcomponents-client-5.1.x/) +4. [java.net.http.HttpClient (JDK11+)](https://docs.oracle.com/en/java/javase/11/docs/api/java.net.http/java/net/http/HttpClient.html) + +## Testing + +The server does not perform a protocol upgrade if a request contains content (POST, PUT, PATCH with payload). This might be a limitation in Jetty, the HTTP server used in Vespa. Any client should assume HTTP/2 supported - example using `curl --http2-prior-knowledge`: + +```bash highlight={6} +$ curl -i --http2-prior-knowledge \ + -X POST -H 'Content-Type: application/json' \ + --data-binary @ext/A-Head-Full-of-Dreams.json \ + http://127.0.0.1:8080/document/v1/mynamespace/music/docid/a-head-full-of-dreams + +HTTP/2 200 +date: Tue, 06 Dec 2022 11:04:13 GMT +content-type: application/json;charset=utf-8 +vary: Accept-Encoding +content-length: 122 +``` \ No newline at end of file diff --git a/mintlify-docs/en/performance/index.mdx b/mintlify-docs/en/performance/index.mdx new file mode 100644 index 0000000000..e69de29bb2 diff --git a/mintlify-docs/en/performance/instance-types/aws-instance-types.mdx b/mintlify-docs/en/performance/instance-types/aws-instance-types.mdx new file mode 100644 index 0000000000..4c54ac5950 --- /dev/null +++ b/mintlify-docs/en/performance/instance-types/aws-instance-types.mdx @@ -0,0 +1,175 @@ +--- +title: "AWS Instance Type Reference" +sidebarTitle: "AWS instance types" +--- + +All instance types without Local SSD use [Amazon Elastic Block Store](https://aws.amazon.com/ebs/) for storage. + +These volumes can be any size from a minimum of 3 x memory, up to 16TB. + +| Architecture | CPU cores | Memory (GB) | Local SSD (GB) | GPU Memory (GB) | | +| :--- | :--- | :--- | :--- | :--- | :--- | +| arm64 | 1.0 | 8 | \- | \- | | +| arm64 | 1.0 | 8 | 59 | \- | | +| arm64 | 1.0 | 16 | \- | \- | | +| arm64 | 1.0 | 16 | 59 | \- | | +| arm64 | 2.0 | 8 | \- | \- | | +| arm64 | 2.0 | 8 | 118 | \- | | +| arm64 | 2.0 | 16 | \- | \- | | +| arm64 | 2.0 | 16 | 118 | \- | | +| arm64 | 2.0 | 16 | 468 | \- | | +| arm64 | 2.0 | 32 | \- | \- | | +| arm64 | 2.0 | 32 | 118 | \- | | +| arm64 | 4.0 | 8 | \- | \- | | +| arm64 | 4.0 | 8 | 237 | \- | | +| arm64 | 4.0 | 16 | \- | \- | | +| arm64 | 4.0 | 16 | 237 | \- | | +| arm64 | 4.0 | 32 | \- | \- | | +| arm64 | 4.0 | 32 | 237 | \- | | +| arm64 | 4.0 | 32 | 937 | \- | | +| arm64 | 4.0 | 64 | \- | \- | | +| arm64 | 4.0 | 64 | 237 | \- | | +| arm64 | 8.0 | 16 | \- | \- | | +| arm64 | 8.0 | 16 | 474 | \- | | +| arm64 | 8.0 | 32 | \- | \- | | +| arm64 | 8.0 | 32 | 474 | \- | | +| arm64 | 8.0 | 64 | \- | \- | | +| arm64 | 8.0 | 64 | 474 | \- | | +| arm64 | 8.0 | 64 | 1875 | \- | | +| arm64 | 8.0 | 128 | \- | \- | | +| arm64 | 8.0 | 128 | 475 | \- | | +| arm64 | 16.0 | 32 | \- | \- | | +| arm64 | 16.0 | 32 | 950 | \- | | +| arm64 | 16.0 | 64 | \- | \- | | +| arm64 | 16.0 | 64 | 950 | \- | | +| arm64 | 16.0 | 128 | \- | \- | | +| arm64 | 16.0 | 128 | 950 | \- | | +| arm64 | 16.0 | 128 | 3750 | \- | | +| arm64 | 16.0 | 256 | \- | \- | | +| arm64 | 16.0 | 256 | 950 | \- | | +| arm64 | 32.0 | 64 | \- | \- | | +| arm64 | 32.0 | 64 | 1900 | \- | | +| arm64 | 32.0 | 128 | \- | \- | | +| arm64 | 32.0 | 128 | 1900 | \- | | +| arm64 | 32.0 | 256 | \- | \- | | +| arm64 | 32.0 | 256 | 1900 | \- | | +| arm64 | 32.0 | 256 | 7500 | \- | | +| arm64 | 32.0 | 512 | \- | \- | | +| arm64 | 32.0 | 512 | 1900 | \- | | +| arm64 | 48.0 | 96 | \- | \- | | +| arm64 | 48.0 | 96 | 2850 | \- | | +| arm64 | 48.0 | 192 | \- | \- | | +| arm64 | 48.0 | 192 | 2850 | \- | | +| arm64 | 48.0 | 384 | \- | \- | | +| arm64 | 48.0 | 384 | 2850 | \- | | +| arm64 | 48.0 | 384 | 11250 | \- | | +| arm64 | 48.0 | 768 | \- | \- | | +| arm64 | 48.0 | 768 | 2850 | \- | | +| arm64 | 64.0 | 128 | \- | \- | | +| arm64 | 64.0 | 128 | 3800 | \- | | +| arm64 | 64.0 | 256 | \- | \- | | +| arm64 | 64.0 | 256 | 3800 | \- | | +| arm64 | 64.0 | 512 | \- | \- | | +| arm64 | 64.0 | 512 | 3800 | \- | | +| arm64 | 64.0 | 512 | 15000 | \- | | +| arm64 | 64.0 | 1024 | \- | \- | | +| arm64 | 64.0 | 1024 | 3800 | \- | | +| arm64 | 96.0 | 192 | \- | \- | | +| arm64 | 96.0 | 192 | 5700 | \- | | +| arm64 | 96.0 | 384 | \- | \- | | +| arm64 | 96.0 | 384 | 5700 | \- | | +| arm64 | 96.0 | 768 | \- | \- | | +| arm64 | 96.0 | 768 | 5700 | \- | | +| arm64 | 96.0 | 768 | 22500 | \- | | +| arm64 | 96.0 | 1536 | \- | \- | | +| arm64 | 192.0 | 384 | \- | \- | | +| arm64 | 192.0 | 384 | 11400 | \- | | +| arm64 | 192.0 | 768 | \- | \- | | +| arm64 | 192.0 | 768 | 11400 | \- | | +| arm64 | 192.0 | 1536 | \- | \- | | +| arm64 | 192.0 | 1536 | 11400 | \- | | +| arm64 | 192.0 | 1536 | 45000 | \- | | +| arm64 | 192.0 | 3072 | \- | \- | | +| x86\_64 | 2.0 | 8 | \- | \- | | +| x86\_64 | 2.0 | 8 | 75 | \- | | +| x86\_64 | 2.0 | 8 | 118 | \- | | +| x86\_64 | 2.0 | 16 | \- | \- | | +| x86\_64 | 2.0 | 16 | 75 | \- | | +| x86\_64 | 2.0 | 16 | 468 | \- | | +| x86\_64 | 2.0 | 16 | 1250 | \- | | +| x86\_64 | 4.0 | 8 | \- | \- | | +| x86\_64 | 4.0 | 8 | 100 | \- | | +| x86\_64 | 4.0 | 8 | 237 | \- | | +| x86\_64 | 4.0 | 16 | \- | \- | | +| x86\_64 | 4.0 | 16 | 125 | 16.0 | | +| x86\_64 | 4.0 | 16 | 150 | \- | | +| x86\_64 | 4.0 | 16 | 237 | \- | | +| x86\_64 | 4.0 | 32 | \- | \- | | +| x86\_64 | 4.0 | 32 | 150 | \- | | +| x86\_64 | 4.0 | 32 | 237 | \- | | +| x86\_64 | 4.0 | 32 | 937 | \- | | +| x86\_64 | 4.0 | 32 | 2500 | \- | | +| x86\_64 | 8.0 | 16 | \- | \- | | +| x86\_64 | 8.0 | 16 | 200 | \- | | +| x86\_64 | 8.0 | 16 | 474 | \- | | +| x86\_64 | 8.0 | 32 | \- | \- | | +| x86\_64 | 8.0 | 32 | 225 | 16.0 | | +| x86\_64 | 8.0 | 32 | 300 | \- | | +| x86\_64 | 8.0 | 32 | 474 | \- | | +| x86\_64 | 8.0 | 64 | \- | \- | | +| x86\_64 | 8.0 | 64 | 300 | \- | | +| x86\_64 | 8.0 | 64 | 1875 | \- | | +| x86\_64 | 8.0 | 64 | 5000 | \- | | +| x86\_64 | 12.0 | 96 | 7500 | \- | | +| x86\_64 | 16.0 | 32 | \- | \- | | +| x86\_64 | 16.0 | 32 | 400 | \- | | +| x86\_64 | 16.0 | 32 | 950 | \- | | +| x86\_64 | 16.0 | 64 | \- | \- | | +| x86\_64 | 16.0 | 64 | 600 | \- | | +| x86\_64 | 16.0 | 64 | 950 | \- | | +| x86\_64 | 16.0 | 128 | \- | \- | | +| x86\_64 | 16.0 | 128 | 600 | \- | | +| x86\_64 | 16.0 | 128 | 3750 | \- | | +| x86\_64 | 24.0 | 192 | 900 | \- | | +| x86\_64 | 24.0 | 192 | 15000 | \- | | +| x86\_64 | 32.0 | 64 | \- | \- | | +| x86\_64 | 32.0 | 64 | 1200 | \- | | +| x86\_64 | 32.0 | 64 | 1900 | \- | | +| x86\_64 | 32.0 | 128 | \- | \- | | +| x86\_64 | 32.0 | 128 | 1200 | \- | | +| x86\_64 | 32.0 | 128 | 1900 | \- | | +| x86\_64 | 32.0 | 256 | \- | \- | | +| x86\_64 | 32.0 | 256 | 1200 | \- | | +| x86\_64 | 32.0 | 256 | 7500 | \- | | +| x86\_64 | 36.0 | 72 | \- | \- | | +| x86\_64 | 36.0 | 72 | 900 | \- | | +| x86\_64 | 48.0 | 96 | \- | \- | | +| x86\_64 | 48.0 | 96 | 1800 | \- | | +| x86\_64 | 48.0 | 96 | 2850 | \- | | +| x86\_64 | 48.0 | 192 | \- | \- | | +| x86\_64 | 48.0 | 192 | 1800 | \- | | +| x86\_64 | 48.0 | 192 | 2850 | \- | | +| x86\_64 | 48.0 | 384 | \- | \- | | +| x86\_64 | 48.0 | 384 | 1800 | \- | | +| x86\_64 | 48.0 | 384 | 30000 | \- | | +| x86\_64 | 64.0 | 128 | \- | \- | | +| x86\_64 | 64.0 | 128 | 3800 | \- | | +| x86\_64 | 64.0 | 256 | \- | \- | | +| x86\_64 | 64.0 | 256 | 2400 | \- | | +| x86\_64 | 64.0 | 256 | 3800 | \- | | +| x86\_64 | 64.0 | 512 | \- | \- | | +| x86\_64 | 64.0 | 512 | 2400 | \- | | +| x86\_64 | 64.0 | 512 | 15000 | \- | | +| x86\_64 | 72.0 | 144 | \- | \- | | +| x86\_64 | 72.0 | 144 | 1800 | \- | | +| x86\_64 | 72.0 | 576 | 45000 | \- | | +| x86\_64 | 96.0 | 192 | \- | \- | | +| x86\_64 | 96.0 | 192 | 3600 | \- | | +| x86\_64 | 96.0 | 192 | 5700 | \- | | +| x86\_64 | 96.0 | 384 | \- | \- | | +| x86\_64 | 96.0 | 384 | 3600 | \- | | +| x86\_64 | 96.0 | 768 | \- | \- | | +| x86\_64 | 96.0 | 768 | 3600 | \- | | +| x86\_64 | 96.0 | 768 | 60000 | \- | | +| x86\_64 | 128.0 | 1024 | 30000 | \- | | +| x86\_64 | 192.0 | 1536 | 120000 | \- | | \ No newline at end of file diff --git a/mintlify-docs/en/performance/instance-types/azure-instance-types.mdx b/mintlify-docs/en/performance/instance-types/azure-instance-types.mdx new file mode 100644 index 0000000000..fdc9130fa3 --- /dev/null +++ b/mintlify-docs/en/performance/instance-types/azure-instance-types.mdx @@ -0,0 +1,75 @@ +--- +title: "Azure Instance Type Reference" +sidebarTitle: "Azure instance types" +--- + +All instance types without Local SSD use [Azure Managed Disk](https://learn.microsoft.com/en-us/azure/virtual-machines/managed-disks-overview) for storage. + + +| Architecture | CPU cores | Memory (GB) | Local SSD (GB) | GPU Memory (GB) | | +| :--- | :--- | :--- | :--- | :--- | :--- | +| x86\_64 | 2.0 | 8 | \- | \- | | +| x86\_64 | 2.0 | 8 | 75 | \- | | +| x86\_64 | 2.0 | 8 | 118 | \- | | +| x86\_64 | 2.0 | 16 | \- | \- | | +| x86\_64 | 2.0 | 16 | 75 | \- | | +| x86\_64 | 2.0 | 16 | 118 | \- | | +| x86\_64 | 4.0 | 8 | \- | \- | | +| x86\_64 | 4.0 | 8 | 150 | \- | | +| x86\_64 | 4.0 | 16 | \- | \- | | +| x86\_64 | 4.0 | 16 | 150 | \- | | +| x86\_64 | 4.0 | 16 | 236 | \- | | +| x86\_64 | 4.0 | 32 | \- | \- | | +| x86\_64 | 4.0 | 32 | 150 | \- | | +| x86\_64 | 4.0 | 32 | 236 | \- | | +| x86\_64 | 8.0 | 16 | \- | \- | | +| x86\_64 | 8.0 | 16 | 300 | \- | | +| x86\_64 | 8.0 | 32 | \- | \- | | +| x86\_64 | 8.0 | 32 | 300 | \- | | +| x86\_64 | 8.0 | 32 | 472 | \- | | +| x86\_64 | 8.0 | 64 | \- | \- | | +| x86\_64 | 8.0 | 64 | 300 | \- | | +| x86\_64 | 8.0 | 64 | 472 | \- | | +| x86\_64 | 16.0 | 32 | \- | \- | | +| x86\_64 | 16.0 | 32 | 600 | \- | | +| x86\_64 | 16.0 | 64 | \- | \- | | +| x86\_64 | 16.0 | 64 | 600 | \- | | +| x86\_64 | 16.0 | 64 | 944 | \- | | +| x86\_64 | 16.0 | 128 | \- | \- | | +| x86\_64 | 16.0 | 128 | 600 | \- | | +| x86\_64 | 16.0 | 128 | 944 | \- | | +| x86\_64 | 20.0 | 160 | \- | \- | | +| x86\_64 | 20.0 | 160 | 750 | \- | | +| x86\_64 | 20.0 | 160 | 1181 | \- | | +| x86\_64 | 32.0 | 64 | \- | \- | | +| x86\_64 | 32.0 | 64 | 1200 | \- | | +| x86\_64 | 32.0 | 128 | \- | \- | | +| x86\_64 | 32.0 | 128 | 1200 | \- | | +| x86\_64 | 32.0 | 128 | 1889 | \- | | +| x86\_64 | 32.0 | 256 | \- | \- | | +| x86\_64 | 32.0 | 256 | 1200 | \- | | +| x86\_64 | 32.0 | 256 | 1889 | \- | | +| x86\_64 | 48.0 | 96 | \- | \- | | +| x86\_64 | 48.0 | 96 | 1800 | \- | | +| x86\_64 | 48.0 | 192 | \- | \- | | +| x86\_64 | 48.0 | 192 | 1800 | \- | | +| x86\_64 | 48.0 | 192 | 2834 | \- | | +| x86\_64 | 48.0 | 384 | \- | \- | | +| x86\_64 | 48.0 | 384 | 1800 | \- | | +| x86\_64 | 48.0 | 384 | 2834 | \- | | +| x86\_64 | 64.0 | 128 | \- | \- | | +| x86\_64 | 64.0 | 128 | 2400 | \- | | +| x86\_64 | 64.0 | 256 | \- | \- | | +| x86\_64 | 64.0 | 256 | 2400 | \- | | +| x86\_64 | 64.0 | 256 | 3779 | \- | | +| x86\_64 | 64.0 | 512 | \- | \- | | +| x86\_64 | 64.0 | 512 | 2400 | \- | | +| x86\_64 | 64.0 | 512 | 3779 | \- | | +| x86\_64 | 96.0 | 192 | \- | \- | | +| x86\_64 | 96.0 | 192 | 3600 | \- | | +| x86\_64 | 96.0 | 384 | \- | \- | | +| x86\_64 | 96.0 | 384 | 3600 | \- | | +| x86\_64 | 96.0 | 384 | 5669 | \- | | +| x86\_64 | 96.0 | 672 | \- | \- | | +| x86\_64 | 96.0 | 672 | 3600 | \- | | +| x86\_64 | 96.0 | 672 | 5669 | \- | | \ No newline at end of file diff --git a/mintlify-docs/en/performance/instance-types/gcp-instance-types.mdx b/mintlify-docs/en/performance/instance-types/gcp-instance-types.mdx new file mode 100644 index 0000000000..60b513e3ad --- /dev/null +++ b/mintlify-docs/en/performance/instance-types/gcp-instance-types.mdx @@ -0,0 +1,220 @@ +--- +title: "GCP Instance Type Reference" +sidebarTitle: "GCP instance types" +--- + +All instance types without Local SSD use [GCP Persistent Disk](https://cloud.google.com/persistent-disk) for storage. + +These volumes can be any size from a minimum of 3 x memory, up to 64TB. + + +| Architecture | CPU cores | Memory (GB) | Local SSD (GB) | GPU Memory (GB) | | +| :--- | :--- | :--- | :--- | :--- | :--- | +| arm64 | 2.0 | 8 | \- | \- | | +| arm64 | 2.0 | 16 | \- | \- | | +| arm64 | 4.0 | 8 | \- | \- | | +| arm64 | 4.0 | 16 | \- | \- | | +| arm64 | 4.0 | 16 | 375 | \- | | +| arm64 | 4.0 | 32 | \- | \- | | +| arm64 | 4.0 | 32 | 375 | \- | | +| arm64 | 8.0 | 16 | \- | \- | | +| arm64 | 8.0 | 32 | \- | \- | | +| arm64 | 8.0 | 32 | 750 | \- | | +| arm64 | 8.0 | 64 | \- | \- | | +| arm64 | 8.0 | 64 | 750 | \- | | +| arm64 | 16.0 | 32 | \- | \- | | +| arm64 | 16.0 | 64 | \- | \- | | +| arm64 | 16.0 | 64 | 1500 | \- | | +| arm64 | 16.0 | 128 | \- | \- | | +| arm64 | 16.0 | 128 | 1500 | \- | | +| arm64 | 32.0 | 64 | \- | \- | | +| arm64 | 32.0 | 128 | \- | \- | | +| arm64 | 32.0 | 128 | 2250 | \- | | +| arm64 | 32.0 | 256 | \- | \- | | +| arm64 | 32.0 | 256 | 2250 | \- | | +| arm64 | 48.0 | 96 | \- | \- | | +| arm64 | 48.0 | 192 | \- | \- | | +| arm64 | 48.0 | 192 | 3750 | \- | | +| arm64 | 48.0 | 384 | \- | \- | | +| arm64 | 48.0 | 384 | 3750 | \- | | +| arm64 | 64.0 | 128 | \- | \- | | +| arm64 | 64.0 | 256 | \- | \- | | +| arm64 | 64.0 | 256 | 5250 | \- | | +| arm64 | 64.0 | 512 | \- | \- | | +| arm64 | 64.0 | 512 | 5250 | \- | | +| arm64 | 72.0 | 144 | \- | \- | | +| arm64 | 72.0 | 288 | \- | \- | | +| arm64 | 72.0 | 288 | 6000 | \- | | +| arm64 | 72.0 | 576 | \- | \- | | +| arm64 | 72.0 | 576 | 6000 | \- | | +| x86\_64 | 2.0 | 8 | \- | \- | | +| x86\_64 | 2.0 | 8 | 375 | \- | | +| x86\_64 | 2.0 | 8 | 750 | \- | | +| x86\_64 | 2.0 | 8 | 1500 | \- | | +| x86\_64 | 2.0 | 8 | 3000 | \- | | +| x86\_64 | 2.0 | 8 | 6000 | \- | | +| x86\_64 | 2.0 | 8 | 9000 | \- | | +| x86\_64 | 2.0 | 16 | \- | \- | | +| x86\_64 | 2.0 | 16 | 375 | \- | | +| x86\_64 | 2.0 | 16 | 750 | \- | | +| x86\_64 | 2.0 | 16 | 1500 | \- | | +| x86\_64 | 2.0 | 16 | 3000 | \- | | +| x86\_64 | 2.0 | 16 | 6000 | \- | | +| x86\_64 | 2.0 | 16 | 9000 | \- | | +| x86\_64 | 4.0 | 8 | \- | \- | | +| x86\_64 | 4.0 | 16 | \- | \- | | +| x86\_64 | 4.0 | 16 | 375 | \- | | +| x86\_64 | 4.0 | 16 | 750 | \- | | +| x86\_64 | 4.0 | 16 | 1500 | \- | | +| x86\_64 | 4.0 | 16 | 3000 | \- | | +| x86\_64 | 4.0 | 16 | 6000 | \- | | +| x86\_64 | 4.0 | 16 | 9000 | \- | | +| x86\_64 | 4.0 | 32 | \- | \- | | +| x86\_64 | 4.0 | 32 | 375 | \- | | +| x86\_64 | 4.0 | 32 | 750 | \- | | +| x86\_64 | 4.0 | 32 | 1500 | \- | | +| x86\_64 | 4.0 | 32 | 3000 | \- | | +| x86\_64 | 4.0 | 32 | 6000 | \- | | +| x86\_64 | 4.0 | 32 | 9000 | \- | | +| x86\_64 | 8.0 | 8 | \- | \- | | +| x86\_64 | 8.0 | 8 | 375 | \- | | +| x86\_64 | 8.0 | 8 | 750 | \- | | +| x86\_64 | 8.0 | 8 | 1500 | \- | | +| x86\_64 | 8.0 | 8 | 3000 | \- | | +| x86\_64 | 8.0 | 8 | 6000 | \- | | +| x86\_64 | 8.0 | 8 | 9000 | \- | | +| x86\_64 | 8.0 | 16 | \- | \- | | +| x86\_64 | 8.0 | 32 | \- | \- | | +| x86\_64 | 8.0 | 32 | 375 | \- | | +| x86\_64 | 8.0 | 32 | 750 | \- | | +| x86\_64 | 8.0 | 32 | 1500 | \- | | +| x86\_64 | 8.0 | 32 | 3000 | \- | | +| x86\_64 | 8.0 | 32 | 6000 | \- | | +| x86\_64 | 8.0 | 32 | 9000 | \- | | +| x86\_64 | 8.0 | 64 | \- | \- | | +| x86\_64 | 8.0 | 64 | 375 | \- | | +| x86\_64 | 8.0 | 64 | 750 | \- | | +| x86\_64 | 8.0 | 64 | 1500 | \- | | +| x86\_64 | 8.0 | 64 | 3000 | \- | | +| x86\_64 | 8.0 | 64 | 6000 | \- | | +| x86\_64 | 8.0 | 64 | 9000 | \- | | +| x86\_64 | 16.0 | 16 | \- | \- | | +| x86\_64 | 16.0 | 16 | 375 | \- | | +| x86\_64 | 16.0 | 16 | 750 | \- | | +| x86\_64 | 16.0 | 16 | 1500 | \- | | +| x86\_64 | 16.0 | 16 | 3000 | \- | | +| x86\_64 | 16.0 | 16 | 6000 | \- | | +| x86\_64 | 16.0 | 16 | 9000 | \- | | +| x86\_64 | 16.0 | 32 | \- | \- | | +| x86\_64 | 16.0 | 64 | \- | \- | | +| x86\_64 | 16.0 | 64 | 375 | \- | | +| x86\_64 | 16.0 | 64 | 750 | \- | | +| x86\_64 | 16.0 | 64 | 1500 | \- | | +| x86\_64 | 16.0 | 64 | 3000 | \- | | +| x86\_64 | 16.0 | 64 | 6000 | \- | | +| x86\_64 | 16.0 | 64 | 9000 | \- | | +| x86\_64 | 16.0 | 128 | \- | \- | | +| x86\_64 | 16.0 | 128 | 375 | \- | | +| x86\_64 | 16.0 | 128 | 750 | \- | | +| x86\_64 | 16.0 | 128 | 1500 | \- | | +| x86\_64 | 16.0 | 128 | 3000 | \- | | +| x86\_64 | 16.0 | 128 | 6000 | \- | | +| x86\_64 | 16.0 | 128 | 9000 | \- | | +| x86\_64 | 32.0 | 32 | \- | \- | | +| x86\_64 | 32.0 | 32 | 750 | \- | | +| x86\_64 | 32.0 | 32 | 1500 | \- | | +| x86\_64 | 32.0 | 32 | 3000 | \- | | +| x86\_64 | 32.0 | 32 | 6000 | \- | | +| x86\_64 | 32.0 | 32 | 9000 | \- | | +| x86\_64 | 32.0 | 64 | \- | \- | | +| x86\_64 | 32.0 | 128 | \- | \- | | +| x86\_64 | 32.0 | 128 | 750 | \- | | +| x86\_64 | 32.0 | 128 | 1500 | \- | | +| x86\_64 | 32.0 | 128 | 3000 | \- | | +| x86\_64 | 32.0 | 128 | 6000 | \- | | +| x86\_64 | 32.0 | 128 | 9000 | \- | | +| x86\_64 | 32.0 | 256 | \- | \- | | +| x86\_64 | 32.0 | 256 | 750 | \- | | +| x86\_64 | 32.0 | 256 | 1500 | \- | | +| x86\_64 | 32.0 | 256 | 3000 | \- | | +| x86\_64 | 32.0 | 256 | 6000 | \- | | +| x86\_64 | 32.0 | 256 | 9000 | \- | | +| x86\_64 | 48.0 | 48 | \- | \- | | +| x86\_64 | 48.0 | 48 | 750 | \- | | +| x86\_64 | 48.0 | 48 | 1500 | \- | | +| x86\_64 | 48.0 | 48 | 3000 | \- | | +| x86\_64 | 48.0 | 48 | 6000 | \- | | +| x86\_64 | 48.0 | 48 | 9000 | \- | | +| x86\_64 | 48.0 | 96 | \- | \- | | +| x86\_64 | 48.0 | 192 | \- | \- | | +| x86\_64 | 48.0 | 192 | 750 | \- | | +| x86\_64 | 48.0 | 192 | 1500 | \- | | +| x86\_64 | 48.0 | 192 | 3000 | \- | | +| x86\_64 | 48.0 | 192 | 6000 | \- | | +| x86\_64 | 48.0 | 192 | 9000 | \- | | +| x86\_64 | 48.0 | 384 | \- | \- | | +| x86\_64 | 48.0 | 384 | 750 | \- | | +| x86\_64 | 48.0 | 384 | 1500 | \- | | +| x86\_64 | 48.0 | 384 | 3000 | \- | | +| x86\_64 | 48.0 | 384 | 6000 | \- | | +| x86\_64 | 48.0 | 384 | 9000 | \- | | +| x86\_64 | 64.0 | 64 | \- | \- | | +| x86\_64 | 64.0 | 64 | 1500 | \- | | +| x86\_64 | 64.0 | 64 | 3000 | \- | | +| x86\_64 | 64.0 | 64 | 6000 | \- | | +| x86\_64 | 64.0 | 64 | 9000 | \- | | +| x86\_64 | 64.0 | 128 | \- | \- | | +| x86\_64 | 64.0 | 256 | \- | \- | | +| x86\_64 | 64.0 | 256 | 1500 | \- | | +| x86\_64 | 64.0 | 256 | 3000 | \- | | +| x86\_64 | 64.0 | 256 | 6000 | \- | | +| x86\_64 | 64.0 | 256 | 9000 | \- | | +| x86\_64 | 64.0 | 512 | \- | \- | | +| x86\_64 | 64.0 | 512 | 1500 | \- | | +| x86\_64 | 64.0 | 512 | 3000 | \- | | +| x86\_64 | 64.0 | 512 | 6000 | \- | | +| x86\_64 | 64.0 | 512 | 9000 | \- | | +| x86\_64 | 80.0 | 80 | \- | \- | | +| x86\_64 | 80.0 | 80 | 1500 | \- | | +| x86\_64 | 80.0 | 80 | 3000 | \- | | +| x86\_64 | 80.0 | 80 | 6000 | \- | | +| x86\_64 | 80.0 | 80 | 9000 | \- | | +| x86\_64 | 80.0 | 160 | \- | \- | | +| x86\_64 | 80.0 | 320 | \- | \- | | +| x86\_64 | 80.0 | 320 | 1500 | \- | | +| x86\_64 | 80.0 | 320 | 3000 | \- | | +| x86\_64 | 80.0 | 320 | 6000 | \- | | +| x86\_64 | 80.0 | 320 | 9000 | \- | | +| x86\_64 | 80.0 | 640 | \- | \- | | +| x86\_64 | 80.0 | 640 | 1500 | \- | | +| x86\_64 | 80.0 | 640 | 3000 | \- | | +| x86\_64 | 80.0 | 640 | 6000 | \- | | +| x86\_64 | 80.0 | 640 | 9000 | \- | | +| x86\_64 | 96.0 | 96 | \- | \- | | +| x86\_64 | 96.0 | 96 | 3000 | \- | | +| x86\_64 | 96.0 | 96 | 6000 | \- | | +| x86\_64 | 96.0 | 96 | 9000 | \- | | +| x86\_64 | 96.0 | 384 | \- | \- | | +| x86\_64 | 96.0 | 384 | 3000 | \- | | +| x86\_64 | 96.0 | 384 | 6000 | \- | | +| x86\_64 | 96.0 | 384 | 9000 | \- | | +| x86\_64 | 96.0 | 768 | \- | \- | | +| x86\_64 | 96.0 | 768 | 3000 | \- | | +| x86\_64 | 96.0 | 768 | 6000 | \- | | +| x86\_64 | 96.0 | 768 | 9000 | \- | | +| x86\_64 | 128.0 | 128 | \- | \- | | +| x86\_64 | 128.0 | 128 | 3000 | \- | | +| x86\_64 | 128.0 | 128 | 6000 | \- | | +| x86\_64 | 128.0 | 128 | 9000 | \- | | +| x86\_64 | 128.0 | 512 | \- | \- | | +| x86\_64 | 128.0 | 512 | 3000 | \- | | +| x86\_64 | 128.0 | 512 | 6000 | \- | | +| x86\_64 | 128.0 | 512 | 9000 | \- | | +| x86\_64 | 224.0 | 224 | \- | \- | | +| x86\_64 | 224.0 | 224 | 3000 | \- | | +| x86\_64 | 224.0 | 224 | 6000 | \- | | +| x86\_64 | 224.0 | 224 | 9000 | \- | | +| x86\_64 | 224.0 | 896 | \- | \- | | +| x86\_64 | 224.0 | 896 | 3000 | \- | | +| x86\_64 | 224.0 | 896 | 6000 | \- | | +| x86\_64 | 224.0 | 896 | 9000 | \- | | \ No newline at end of file diff --git a/mintlify-docs/en/performance/memory-visualizer.mdx b/mintlify-docs/en/performance/memory-visualizer.mdx new file mode 100644 index 0000000000..38f76755c7 --- /dev/null +++ b/mintlify-docs/en/performance/memory-visualizer.mdx @@ -0,0 +1,57 @@ +--- +title: "Memory Visualizer" +--- + +The [schema](/en/basics/schemas) defines fields, types of fields and settings per field, e.g. + +```js expandable +schema product { + + document product { + + field productId type long { + indexing: summary | attribute + attribute: fast-search + rank: filter + } + + field description type string { + indexing: summary | index + } + + ... + } +} +``` + +The field types are often given by the application's data, but the *usage* of the fields is also important - examples: + +- high-speed updates to documents can be achieved by using attributes for memory-writes only, even though the field could be a summary or indexed field, use case permitting +- string fields can be faster than numeric, if the access is equality (not range like "price < 100") + +In short, there are functional, performance and cost tradeoffs. There are guides to help estimate resource use, see [attributes](/en/content/attributes), but often one does not know factors like number of unique values in the data. It might as well be easier to feed the data to Vespa Cloud and do schema changes online and observe the effect. Vespa Cloud has two features that accelerates this process - the Memory Visualizer and Automated Reindexing: + +![Memory Visualizer](/assets/img/memory-visualizer-1.png) + +The Memory Visualizer lets you browse the attribute fields and observe absolute and relative size. This can help find the cost drivers for memory-bound applications, and identify bottlenecks for optimizations. + +The Memory Visualizer is found in the "services" view in the console for an application. Click a node of type "documentation (type: search)" and use the *Memory visualizer* link. + +## Adding or changing fields + +Use the Memory Visualizer to track memory when adding a field. Attribute, index and summary fields have different behavior when it comes to empty fields and memory use, depending on data type - here, the tool indicates headroom for more data to assist in the evaluation. + +Use the [field change procedure](/en/operations/self-managed/procedure-change-attribute-index) to plan the schema changes for data availability in the transition. The Console will display reindexing progress: + +![Reindex progress](/assets/img/reindex-progress.png) + +This makes it easy to estimate when the reindexing is complete. Note that attribute memory usage might require a node restart for all data structures to drain, take note of this when using the Memory Visualizer again. + +## Using the visualizer + +Some fields have a different color code. To understand the types of fields, read more about the [content node data structures](/en/content/proton) - in short: + +- `Ready` are indexed documents that might or might not be included in queries +- `Not Ready` are document replicas stored on the nodes that might be indexed later +- `Removed` are deleted documents, either by the application, or the document replica has been moved to another node +- `Documentmetastore` is the document ID mapping - see [attributes](/en/content/attributes#document-meta-store) \ No newline at end of file diff --git a/mintlify-docs/en/performance/node-resources.mdx b/mintlify-docs/en/performance/node-resources.mdx new file mode 100644 index 0000000000..517502f025 --- /dev/null +++ b/mintlify-docs/en/performance/node-resources.mdx @@ -0,0 +1,104 @@ +--- +title: "Node Resources" +--- + +This guide goes through the following aspects of node resource configuration: + +1. Independent configuration of resource dimensions +2. Using automated resource suggestions +3. Deployment automation for rapid optimization cycles +4. Automated instance type migration for optimal performance over time + + +## Independent resource dimensions +In Vespa Cloud, a node's `resources` is configured like: + +```xml + + + +``` + +With this, you specify the dimensions independently. E.g., one can double the CPU, keeping all other dimensions constant. + +This is important when tuning for the optimal price/performance point, as the pieces of an application has different sweet spots. For example, the product search cluster of an application can be more CPU bound than product recommendations; the latter might need relatively more memory. + + +**Note:** + +the above is a simplified example, there are more resource dimensions like GPU use, or CPU architecture, available. + + +Optimizing for cost/performance is therefore easy. Simplified, applications can be CPU, disk, or memory bound. A general rule of thumb is to be bound by the most expensive component, often CPU. Refer to the node resource [reference](/en/reference/applications/services/services#resources) for all dimensions. + +## Resource suggestions + +Applications change over time: + +- Data size growth +- Query rate growth +- Write rate growth +- Schema changes, like new fields or binarized embeddings + +Finding the optimal node configuration is an iterative process. It is simplified by using the Resource Suggestions view in the Vespa Console: + +![Resource Suggestions](/assets/img/resource-suggestions-1.png) + +Vespa Cloud tracks usage over time and suggests node configuration and [topology](/en/performance/topology-and-resizing) changes based on last week's load. In the example above, observe a suggestion that doubles the memory relative to CPU. + +This simplifies *what* to configure, and one can roll out isolated changes while observing latency and other business metrics like relevance quality. + +## Automated resource configuration deployment + +Resource configuration is part of the [application package](/en/basics/applications). To change a cluster's resources, deploy the new version of the application package to Vespa Cloud and wait for the changes to apply: + + + +Changes to stateless Vespa Container clusters are almost instant, dependent on the cloud provider's provisioning latency. + + +Changes to stateful Vespa Content clusters (where the document indices are stored) take more time, as data is redistributed for uniform load: + 1. Changing the node `count` will modify the existing cluster. + 2. Changing the `resources` configuration will set up a parallel cluster and migrate data to it. This is generally slower than changing the node count, as more data moves. + + + + +**Important:** + +Vespa Cloud is designed for online changes. All of the above changes can be deployed to a running system, with zero to minimal disruption. See [content cluster elasticity](/en/content/elasticity) for details. + + +Making changes to the resource specifications is hence fully automated. The quickest way to the sweet spot is to initially deploy with enough capacity and do daily re-tuning to cut cost. + +Vespa Cloud provides performance dashboards with the relevant metrics in this phase: + +![performance dashboard](/assets/img/dashboard.png) + +Eventually, the application has its optimal price/performance characteristics, without lengthy benchmarking activities. + +## Automated instance type migration + +Resource configurations map to the cloud provider's real resources, like AWS EC2 compute instances. The instance inventory develops over time, like: + +- *r7g\_4xlarge (Graviton3)* +- *r8g\_4xlarge (Graviton4)* + +Both have 16 vCPU and 128G RAM, but *r8g\_4xlarge* is of a newer generation, and has presumably higher performance: *"R8g instances deliver around 30% higher performance over R7g instances, …"* + + +**Important:** + +As the `resources` configuration is general and independent of instance types, Vespa Cloud will automatically migrate load to more cost-effective compute instances over time. + + +This means, Vespa Cloud applications will migrate to more recent instance types of the same configuration, with **zero manual interventions**. This keeps the total cost in check, and performance tracking advances in hardware. + +Find a list of supported instance types at [AWS](/en/performance/instance-types/aws-instance-types), [GCP](/en/performance/instance-types/gcp-instance-types), and [Azure](/en/performance/instance-types/azure-instance-types) instance types. + +## Next reads + + + + + diff --git a/mintlify-docs/en/performance/practical-search-performance-guide.mdx b/mintlify-docs/en/performance/practical-search-performance-guide.mdx new file mode 100644 index 0000000000..eb072bb3e0 --- /dev/null +++ b/mintlify-docs/en/performance/practical-search-performance-guide.mdx @@ -0,0 +1,1907 @@ +--- +title: "Vespa query performance - a practical guide" +sidebarTitle: "Practical performance guide" +--- + +This is a practical Vespa query performance guide. It uses the [Last.fm](http://millionsongdataset.com/lastfm/) tracks dataset to illustrate Vespa query performance. Latency numbers mentioned in the guide are obtained from running this guide on a MacBook Pro x86. + +This guide covers the following query serving performance aspects: + + + + + + + + + + + + + + +The guide includes step-by-step instructions on how to reproduce the experiments. This guide is best read after having read the [Vespa Overview](/en/learn/overview) documentation first. + + +**Prerequisites:** + +- Linux, macOS or Windows 10 Pro on x86\_64 or arm64, with [Podman Desktop](https://podman.io/) or [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed, with an engine running. + - Alternatively, start the Podman daemon: + + ```bash + $ podman machine init --memory 6000 + $ podman machine start + ``` + - See [Docker Containers](/en/operations/self-managed/docker-containers) for system limits and other settings. +- For CPUs older than Haswell (2013), see [CPU Support](/en/operations/self-managed/cpu-support). +- Memory: Minimum 4 GB RAM dedicated to Docker/Podman. [Memory recommendations](/en/operations/self-managed/node-setup#memory-settings). +- Disk: Avoid `NO_SPACE` - the vespaengine/vespa container image + headroom for data requires disk space. [Read more](/en/writing/feed-block). +- [Homebrew](https://brew.sh/) to install the [Vespa CLI](/en/clients/vespa-cli), or download the Vespa CLI from [Github releases](https://github.com/vespa-engine/vespa/releases). +- Python3 for converting the dataset to Vespa JSON. +- `curl` to download the dataset and run the Vespa health-checks. + + +## Installing vespa-cli + +This tutorial uses [Vespa-CLI](/en/clients/vespa-cli), Vespa CLI is the official command-line client for Vespa.ai. It is a single binary without any runtime dependencies and is available for Linux, macOS and Windows. + +```bash +$ brew install vespa-cli +``` + +## Dataset + +This guide uses the [Last.fm](http://millionsongdataset.com/lastfm/) tracks dataset. Note that the dataset is released under the following terms: + +> *Research only, strictly non-commercial. For details, or if you are unsure, please contact Last.fm. Also, Last.fm has the right to advertise and refer to any work derived from the dataset.* + +To download the dataset directly (120 MB zip file), run: + +```bash +$ curl -L -o lastfm_test.zip \ + http://millionsongdataset.com/sites/default/files/lastfm/lastfm_test.zip +$ unzip lastfm_test.zip +``` + +The downloaded data needs to be converted to [the JSON format expected by Vespa](/en/reference/schemas/document-json-format). + +This [python](https://www.python.org/) script is used to traverse the dataset files and create a JSONL formatted feed file with Vespa feed operations. The schema for this feed is introduced in the next sections. + + +Paste the above into file create-vespa-feed.py + + +```python expandable +import os +import sys +import json +import unicodedata + +directory = sys.argv[1] +seen_tracks = set() + +def remove_control_characters(s): + return "".join(ch for ch in s if unicodedata.category(ch)[0]!="C") + +def process_file(filename): + global seen_tracks + with open(filename) as fp: + doc = json.load(fp) + title = doc['title'] + artist = doc['artist'] + hash = title + artist + if hash in seen_tracks: + return + else: + seen_tracks.add(hash) + + track_id = doc['track_id'] + tags = doc['tags'] + tags_dict = dict() + for t in tags: + k,v = t[0],int(t[1]) + tags_dict[k] = v + similars = doc['similars'] + tensor_cells = [] + keys_seen = dict() + for s in similars: + k,v = s[0],float(s[1]) + if k in keys_seen: + continue + else: + keys_seen[k] = 1 + cell = { + "address": { + "trackid": k + }, + "value": v + } + tensor_cells.append(cell) + + vespa_doc = { + "put": "id:music:track::%s" % track_id, + "fields": { + "title": remove_control_characters(title), + "track_id": track_id, + "artist": remove_control_characters(artist), + "tags": tags_dict, + "similar": { + "cells": tensor_cells + } + } + } + print(json.dumps(vespa_doc)) + +sorted_files = [] +for root, dirs, files in os.walk(directory): + for filename in files: + filename = os.path.join(root, filename) + sorted_files.append(filename) +sorted_files.sort() +for filename in sorted_files: + process_file(filename) +``` + + +Run the script and create the `feed.jsonl` file: + +```bash +$ python3 create-vespa-feed.py lastfm\_test > feed.jsonl +``` + +## Create a Vespa Application Package + +A [Vespa application package](/en/basics/applications) is the set of configuration files and Java plugins that together define the behavior of a Vespa system: what functionality to use, the available document types, how ranking will be done, and how data will be processed during feeding and indexing. + +The minimum required files to create the basic search application are `track.sd` and `services.xml`. Create directories for the configuration files: + +```bash +$ mkdir -p app/schemas; mkdir -p app/search/query-profiles/ +``` + +### Schema + +A Vespa [schema](/en/basics/schemas) is a configuration of a document type and ranking and compute specifications. This app use a `track` schema defined as: + +```js expandable +schema track { + + document track { + + field track\_id type string { + indexing: summary | attribute + rank: filter + match: word + } + + field title type string { + indexing: summary | index + index: enable-bm25 + } + + field artist type string { + indexing: summary | index + } + + field tags type weightedset { + indexing: summary | attribute + } + + field similar type tensor(trackid{}) { + indexing: summary | attribute + } + } + + fieldset default { + fields: title, artist + } +} +``` + + +Paste the above into file app/schemas/track.sd + + +Notice that the `track_id` field has : + +- [rank: filter](/en/reference/schemas/schemas#rank). This setting can save resources when matching against the field. +- [match: word](/en/reference/schemas/schemas#match). This is a database-style matching mode, preserving punctuation characters. + +### Services Specification + +The [services.xml](/en/reference/applications/services/services) defines the services that make up the Vespa application — which services to run and how many nodes per service. + +```xml + + + + + + + + + + 1 + + + + + + + + +``` + + +Paste the above into file app/services.xml + + +The default [query profile](/en/reference/querying/query-profiles) can be used to override default query api settings for all queries. + +The following enables [presentation.timing](/en/reference/api/query#presentation.timing) and renders `weightedset` fields as JSON maps. + +```xml + + true + true + +``` + + +Paste the above into file app/search/query-profiles/default.xml + + +## Deploy the application package + +The application package can now be deployed to a running Vespa instance. See also the [Vespa quick start guide](/en/basics/deploy-an-application-local). + +Start the Vespa container image using Docker: + +```bash +$ docker run --detach --name vespa --hostname vespa-container \ + --publish 8080:8080 --publish 19071:19071 --publish 19110:19110 \ + vespaengine/vespa +``` + +Starting the container can take a short while. Before continuing, make sure that the configuration service is running by using `vespa status deploy`. + +```bash +$ vespa config set target local +$ vespa status deploy --wait 300 +``` + +Once ready, the Vespa application can be deployed using the Vespa CLI: + +```bash +$ vespa deploy --wait 300 app +``` + +## Index the dataset + +Feed the feed file generated in the previous section: + +```bash +$ vespa feed -t http://localhost:8080 feed.jsonl +``` + +## Basic text search query performance +The following sections use the Vespa [query api](/en/reference/api/query) and formulate queries using Vespa [query language](/en/querying/query-language). For readability, all query examples are expressed using the [vespa-cli](/en/clients/vespa-cli) command which supports running queries against a Vespa instance. The CLI uses the Vespa http search api internally. Use `vespa query -v` to see the actual http request sent: + +```bash +$ vespa query -v 'yql=select ..' +``` + +The first query uses `where true` to match all `track` documents. It also uses [hits](/en/reference/api/query#hits) to specify how many documents to return in the response: + +```bash +$ vespa query \ + 'yql=select artist, title, track_id, tags from track where true' \ + 'hits=1' +``` + +The [result json output](/en/reference/querying/default-result-format) for this query will look something like this: + +```json expandable +{ + "timing": { + "querytime": 0.009000000000000001, + "summaryfetchtime": 0.001, + "searchtime": 0.011 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 95666 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/632facf01973795ba294b7d5", + "relevance": 0.0, + "source": "tracks", + "fields": { + "track_id": "TRRRGWV128F92FC7E0", + "title": "Zombies", + "artist": "True Blood" + } + } + ] + } +} +``` + +Observations: + +- The query searched one node (`coverage.nodes`) and the coverage (`coverage.coverage`) was 100%, see [graceful-degradation](/en/performance/graceful-degradation) for more information about the `coverage` element, and Vespa timeout behavior. Vespa's default timeout is 0.5 seconds. +- The query matched a total of 95666 documents (`totalCount`) out of 95666 documents available (`coverage.documents`). + +The response `timing` has three fields. A Vespa query is executed in two protocol phases: + +- Query matching phase which fans the query out from the stateless container to a content group, each node in the group finds the nodes top-k documents and returns k. The stateless container then merges the nodes' k hits each to obtain a globally ordered top-k documents. +- Summary phase which asks the content nodes that produced the global top-k hits for summary data. + +See also [Life of a query in Vespa](/en/performance/sizing-search#life-of-a-query-in-vespa). The `timing` in the response measures the time it takes to execute these two phases: + +- `querytime` - Time to execute the first protocol phase/matching phase. +- `summaryfetchtime` - Time to execute the summary fill protocol phase for the globally ordered top-k hits. +- `searchtime` Is roughly the sum of the above and is close to what a client will observe (except network latency). + +All three metrics are second resolution. Moving on, the following query performs a free text query: + +```bash +$ vespa query \ + 'yql=select artist, title, track_id from track where userQuery()' \ + 'query=total eclipse of the heart' \ + 'hits=1' +``` + +This query request combines YQL [userQuery()](/en/reference/querying/yql#userquery) with Vespa's [simple query language](/en/reference/querying/simple-query-language), the default [query type](/en/reference/api/query#model.type) is using `all`, requiring that all the terms match. + +The above example searches for *total AND eclipse AND of AND the AND heart* in the fieldset `default`, which in the schema includes the `title` and `artist` fields. Since the request did not specify any [ranking](/en/basics/ranking) parameters, the matched documents were ranked by Vespa's default text rank feature: [nativeRank](/en/ranking/nativerank). + +The result output for the above query: + +```json expandable +{ + "timing": { + "querytime": 0.012, + "summaryfetchtime": 0.001, + "searchtime": 0.014 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 1 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/3f18869c19c25e3ae237702f", + "relevance": 0.13274821039905835, + "source": "tracks", + "fields": { + "track_id": "TRUKHZD128F92DF70A", + "title": "Total Eclipse Of The Heart", + "artist": "Bonnie Tyler" + } + } + ] + } +} +``` + +This query only matched one document because the query terms were ANDed. Matching can be relaxed to `type=any` instead using [query model type](/en/reference/api/query#model.type). + +```bash +$ vespa query \ + 'yql=select artist, title, track_id from track where userQuery()' \ + 'query=total eclipse of the heart' \ + 'hits=1' \ + 'type=any' +``` + +Now, the query matches 24,053 documents and is considerably slower than the previous `all` query. Comparing `querytime` of these two query examples, the one which matches the most documents have the highest `querytime`. In worst case, the search query matches all documents, and without any techniques for early termination or skipping, all matches are exposed to ranking. Query performance is greatly impacted by the number of documents that matches the query specification. Generally, type `any` queries requires more query compute resources than type `all`. + +There is an algorithmic optimization available for `type=any` queries, using the `weakAnd` query operator which implements the WAND algorithm. See the [using wand with Vespa](/en/ranking/wand) for an introduction to the algorithm. + +Run the same query, but instead of `type=any` use `type=weakAnd`: + +```bash +$ vespa query \ + 'yql=select artist, title, track_id from track where userQuery()' \ + 'query=total eclipse of the heart' \ + 'hits=1' \ + 'type=weakAnd' +``` + +Compared to the type `any` query which fully ranked 24,053 documents, `weakAnd` only fully ranks 3,679 documents. Also notice that the faster search returns the same document at the first position. Conceptually a search query is about finding the documents that match the query, then score the documents using a ranking model. In the worst case a search query can match all documents, which will expose all of them to the ranking. + +## Hits and summaries + +The previous examples used `hits=1` query parameter, and in the previous query examples the `summaryfetchtime` has been close to constant. + +The following query requests considerably more hits, note that the result is piped to `head` to increase readability: + +```bash +$ vespa query \ + 'yql=select artist, title, track_id from track where userQuery()' \ + 'query=total eclipse of the heart' \ + 'hits=200' \ + 'type=weakAnd' |head -40 +``` + +Increasing number of hits increases `summaryfetchtime` significantly from the previous query examples, while `querytime` is relatively unchanged. Repeating the query a second time will reduce the `summaryfetchtime` due to the content node summary cache, see [caches in Vespa](/en/performance/caches-in-vespa) for details. + +There are largely four factors which determines the `summaryfetchtime`: + +- The number of hits requested and number of content nodes that produced the query result which makes up the `querytime`. With many content nodes in the group the query was dispatched to, we expect that top-ranking hits would be distributed across the nodes, so that each node does less work. +- The network package size of each hit. Returning hits with larger fields costs more resources and higher `summaryfetchtime` than smaller docs. +- The summary used with the query, and which fields go into the summary. For example, a [document-summary](/en/querying/document-summaries) which only contain fields that are defined as `attribute` will be read from memory. For the `default` summary, or others containing at least one non-attribute field, a fill will potentially access data from summary storage on disk. Read more about in-memory [attribute](/en/content/attributes) fields. +- [summary-features](/en/reference/schemas/schemas#summary-features) used to return computed [rank features](/en/reference/ranking/rank-features) from the content nodes. + +Creating a dedicated [document-summary](/en/querying/document-summaries) which only contain the `track_id` field can improve performance, since `track_id` is defined in the schema with `attribute`, any summary fetches using this document summary will be reading in-memory data. In addition, since the summary only contain one field, it saves network time as less data is transferred during the summary fill phase. + +```js +document-summary track_id { + summary track_id { } +} +``` + +The new schema then becomes: + +```js expandable +schema track { + + document track { + + field track_id type string { + indexing: summary | attribute + rank: filter + match: word + } + + field title type string { + indexing: summary | index + index: enable-bm25 + } + + field artist type string { + indexing: summary | index + } + + field tags type weightedset { + indexing: summary | attribute + } + + field similar type tensor(trackid{}) { + indexing: summary | attribute + } + } + + fieldset default { + fields: title, artist + } + + document-summary track_id { + summary track_id { } + } +} +``` + + +Paste the above into file app/schemas/track.sd + + +Re-deploy the application: + +```bash +$ vespa deploy --wait 300 app +``` + +Re-executing the query using the `track_id` `document-summary` is done by setting the [summary](/en/reference/api/query#presentation.summary) query request parameter: + +```bash +$ vespa query \ + 'yql=select artist, title, track_id from track where userQuery()' \ + 'query=total eclipse of the heart' \ + 'hits=200' \ + 'type=weakAnd' \ + 'summary=track_id' |head -40 +``` + +In this particular case the `summaryfetchtime` difference is not that large, but for larger number of hits and larger documents the difference is significant. Especially in single content node deployments. + +A note on select field scoping with YQL, e.g. `select title, track_id from ..`. When using the default summary by not using a summary parameter, all fields are delivered from the content nodes to the stateless search container in the summary fill phase, regardless of field scoping. The search container removes the set of fields not selected and renders the result. Hence, select scoping only reduces the amount of data transferred back to the client, and does not impact or optimize the performance of the internal communication and potential summary cache miss. For optimal performance for use cases asking for large number of hits to the client it is recommended to use dedicated document summaries. Note also that Vespa per default limits the max hits to 400 per default, the behavior can be overridden in the [default queryProfile](/en/reference/api/query#queryprofile). + +When requesting large amounts of data, consider how to reduce response size. Vespa supports gzip compression if the HTTP client uses the [Accept-Encoding](https://www.rfc-editor.org/rfc/rfc9110.html#name-accept-encoding) HTTP request header: + +```text +Accept-Encoding: gzip +``` + +Compression reduces data transfer but adds CPU overhead. The lowest latency is achieved without compression if network bandwidth is sufficient. [CBOR format](/en/reference/api/query#presentation.format) (`format=cbor` or `Accept: application/cbor`) is both more compact and faster to generate than JSON, especially for numeric data such as tensors and embeddings. CBOR can also be combined with gzip compression. CBOR is a drop-in replacement for JSON - when deserialized, the result is identical. + +## Searching attribute fields + +The previous section covered free text searching in a `fieldset` containing fields with `indexing:index`. See [indexing reference](/en/reference/schemas/schemas#indexing). Fields of [type string](/en/reference/schemas/schemas#field) are treated differently depending on having `index` or `attribute`: + +- `index` integrates with [linguistic](/en/linguistics/linguistics) processing and is matched using [match:text](/en/reference/schemas/schemas#match). +- `attribute` does not integrate with linguistic processing and is matched using [match:word](/en/reference/schemas/schemas#match). + +With `index` Vespa builds inverted index data structures which roughly consist of: + +- A dictionary of the unique text tokens (after linguistic processing) +- Posting lists for each unique text token in the collection. Posting lists comes in different formats, and using `rank: filter` can help guide the decision on what format to use. Bitvector representation is the most compacting post list representation. + +With `attribute` Vespa will per default not build any inverted index-like data structures for potential faster query evaluation. See [Wikipedia:Inverted Index](https://en.wikipedia.org/wiki/Inverted_index) and [Vespa internals](/en/content/proton#index). The reason for this default setting is that Vespa `attribute` fields can be used for many different aspects: [ranking](/en/basics/ranking), [result grouping](/en/querying/grouping), [result sorting](/en/reference/querying/sorting-language), and finally searching/matching. + +The following section focuses on the `tags` field which we defined with `attribute`, matching in this field will be performed using `match:word` which is the default match mode for string fields with `indexing: attribute`. The `tags` field is of type [weightedset](/en/reference/schemas/schemas#weightedset). + +```js + field tags type weightedset { + indexing: summary | attribute + } +``` + +`weightedset` is a field type that allows representing a tag with an integer weight, which can be used for ranking. In this case, there is no inverted index structure, and matching against the `tags` field is performed as a linear scan. The following scans for documents where `tags` match *rock*: + +```bash +$ vespa query \ + 'yql=select track_id, tags from track where tags contains "rock"' \ + 'hits=1' +``` + +The query matches 8,160 documents, notice that for `match: word`, matching can also include whitespace, or generally punctuation characters which are removed and not searchable when using `match:text` with string fields that have `index`: + +```bash +$ vespa query \ + 'yql=select track_id, tags from track where tags contains "classic rock"' \ + 'hits=1' +``` + +The above query matches exactly tags with "classic rock", not "rock" and also not "classic rock music". + +Another query searching for *rock* or *pop*: + +```bash +$ vespa query \ + 'yql=select track_id, tags from track where tags contains "rock" or tags contains "pop"' \ + 'hits=1' +``` + +In all these examples searching the `tags` field, the matching is done by a linear scan through all `track` documents. The `tags` search can be combined with regular free text query terms searching fields that do have inverted index structures: + +```bash +$ vespa query \ + 'yql=select track_id, tags from track where tags contains "rock" and userQuery()' \ + 'hits=1' \ + 'query=total eclipse of the heart' +``` + +In this case - the query terms searching the default fieldset will restrict the number of documents that needs to be scanned for the *tags* constraint. This query is automatically optimized by the Vespa query planner. + +### Searching attribute fields using fast-search + +This section adds `fast-search` to the `tags` field to speed up searches where there are no other query filters which restricts the search. The schema with `fast-search`: + +```js expandable +schema track { + + document track { + + field track_id type string { + indexing: summary | attribute + rank: filter + match: word + } + + field title type string { + indexing: summary | index + index: enable-bm25 + } + + field artist type string { + indexing: summary | index + } + + field tags type weightedset { + indexing: summary | attribute + attribute: fast-search + } + + field similar type tensor(trackid{}) { + indexing: summary | attribute + } + } + + fieldset default { + fields: title, artist + } + + document-summary track_id { + summary track_id { } + } +} +``` + + +Paste the above into file app/schemas/track.sd + + +Re-deploy the application: + +```bash +$ vespa deploy --wait 300 app +``` + +The above will print a WARNING: + +```bash +vespa deploy --wait 300 app/ +Uploading application package ... done + +Success: Deployed app/ +WARNING Change(s) between active and new application that require restart: +In cluster 'tracks' of type 'search': + Restart services of type 'searchnode' because: + 1) Document type 'track': Field 'tags' changed: add attribute 'fast-search' + +Waiting up to 300 seconds for query service to become available ... +``` + +To enable `fast-search`, content node(s) needs to be restarted to re-build the fast-search data structures for the attribute. + +The following uses [vespa-sentinel-cmd command tool](/en/reference/operations/self-managed/tools#vespa-sentinel-cmd) to restart the searchnode process: + +```bash +$ docker exec vespa vespa-sentinel-cmd restart searchnode +``` + +This step requires waiting for the searchnode, use the [health state api](/en/reference/operations/metrics#health-state-api): +```bash +$ curl -s http://localhost:19110/state/v1/health +``` + +Wait for status code to flip to `up` before querying again: + +```json +{ + "status": { + "code": "up" + } +} +``` + +Once up, execute the `tags` query again: + +```bash +$ vespa query \ + 'yql=select track_id, tags from track where tags contains "rock" or tags contains "pop"' \ + 'hits=1' +``` + +Now the `querytime` will be a few milliseconds, since Vespa has built index structures to support `fast-search` in the attribute. The downside of enabling `fast-search` is increased memory usage and slightly reduced indexing throughput. See also [when to use fast-search for attributes](/en/performance/feature-tuning#when-to-use-fast-search-for-attribute-fields). + +For use cases requiring `match:text` when searching multivalued string field types like [weightedset](/en/reference/schemas/schemas#weightedset), see [searching multi-value fields](/en/querying/searching-multivalue-fields). + +For fields that don't need any match ranking features, it's strongly recommended to use [rank: filter](/en/reference/schemas/schemas#rank). + +```js +field availability type int { + indexing: summary | attribute + rank: filter + attribute { + fast-search + } +} +``` + +With the settings above, bit vector posting list representations are used. This is especially efficient when used in combination with [TAAT (term at a time)](/en/performance/feature-tuning#hybrid-taat-daat) query evaluation. For some cases with many query terms, enabling `rank: filter` can reduce match latency by 75%. + +## Multi-valued query operators + +This section covers [multi-value query operators](/en/ranking/multivalue-query-operators) and their query performance characteristics. Many real-world search and recommendation use cases involve structured multivalued queries. + +Assuming a process has learned a sparse user profile representation, which, for a given user, based on past interactions with a service, could produce a user profile with *hard rock*, *rock*, *metal* and *finnish metal*. Sparse features from a fixed vocabulary/feature space. + +Retrieving and ranking using sparse representations can be done using the dot product between the sparse user profile representation and document representation. In the track example, the `tags` field could be the document side sparse representation. Each document is tagged with multiple `tags` using a weight, and similar the sparse user profile representation could use weights. + +In the following examples, the [dotProduct()](/en/reference/querying/yql#dotproduct) and [wand()](/en/reference/querying/yql#wand) query operators are used. + +To configure [ranking](/en/basics/ranking), add a `rank-profile` to the schema: + +```js expandable +schema track { + + document track { + + field track_id type string { + indexing: summary | attribute + rank: filter + match: word + } + + field title type string { + indexing: summary | index + index: enable-bm25 + } + + field artist type string { + indexing: summary | index + } + + field tags type weightedset { + indexing: summary | attribute + attribute: fast-search + } + + field similar type tensor(trackid{}) { + indexing: summary | attribute + } + } + + fieldset default { + fields: title, artist + } + + document-summary track_id { + summary track_id { } + } + + rank-profile personalized { + first-phase { + expression: rawScore(tags) + } + } +} +``` + + +Paste the above into file app/schemas/track.sd + + +The `dotProduct`and `wand` query operators produce a `rank feature` called [rawScore(name)](/en/reference/ranking/rank-features#rawScore\(field\)). This feature calculates the sparse dot product between the query and document weights. + +Deploy the application again: + +```bash +$ vespa deploy --wait 300 app +``` + +The [dotProduct](/en/reference/querying/yql#dotproduct) query operator accepts a field to match over and supports [parameter substitution](/en/reference/querying/yql#parameter-substitution). Using substitution is recommended for large inputs as it saves compute resources when parsing the YQL input. + +The following example assumes a learned sparse representation, with equal weight: + +```js +userProfile={"hard rock":1, "rock":1,"metal":1, "finnish metal":1} +``` + +This userProfile is referenced as a parameter + +```text +where dotProduct(tags, @userProfile) +``` + +```bash +$ vespa query \ + 'yql=select track_id, title, artist, tags from track where dotProduct(tags, @userProfile)' \ + 'userProfile={"hard rock":1, "rock":1,"metal":1, "finnish metal":1}' \ + 'hits=1' \ + 'ranking=personalized' +``` + +The query also specifies the `rank-profile` `personalized`, if not specified, ranking would be using `nativeRank`. The above query returns the following response: + +```json expandable +{ + "timing": { + "querytime": 0.004, + "summaryfetchtime": 0.001, + "searchtime": 0.006 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 10323 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/74d3f4df2989650b2cc095be", + "relevance": 400.0, + "source": "tracks", + "fields": { + "track_id": "TRJTBAO128F932191C", + "title": "Vastarannan valssi", + "artist": "Viikate", + "tags": { + "Suomi": 100, + "rautalanka": 100, + "suomi rock": 100, + "hard rock": 100, + "melodic metal": 100, + "finnish": 100, + "finnish metal": 100, + "metal": 100, + "rock": 100 + } + } + } + ] + } +} +``` + +Notice that the query above will brute-force rank all tracks where the `tags` field matches *any* of the multivalued userProfile features. Due to this, the query ranks 10,323 tracks as seen by `totalCount`. Including for example *pop* in the userProfile list increases the number of hits to 13,638. + +For a large user profile with many learned features/tags, one would easily match and rank the entire document collection. Also notice the `relevance` score which is 400, since the document matches all the query input tags (4x100 = 400). + +To optimize the evaluation the [wand query operator](/en/reference/querying/yql#wand) can be used. The `wand` query operator supports setting a target number of top ranking hits that gets exposed to the `first-phase` ranking function. + +Repeating the query from above, replacing `dotProduct` with `wand`: + +```bash +$ vespa query \ + 'yql=select track_id, title, artist, tags from track where {totalTargetHits:10}wand(tags, @userProfile)' \ + 'userProfile={"hard rock":1, "rock":1,"metal":1, "finnish metal":1}' \ + 'hits=1' \ + 'ranking=personalized' +``` + +The `wand` query operator retrieves the exact same hit at rank 1, which is the expected behavior. The `wand` query operator is safe, meaning it returns the same top-k results as the `dotProduct` query operator. + +For larger document collections, the *wand* query operator can significantly improve query performance compared to `dotProduct`. + +*wand* is a query operator which performs matching and ranking interleaved and skips documents which cannot make it into the top-k results. [Using wand with Vespa](/en/ranking/wand) guide has more details on the WAND algorithm. + +Finally, these multi-value query operators work on both single-valued fields and array fields, but optimal performance is achieved using the [weightedset](/en/reference/schemas/schemas#weightedset) field type. The `weightedset` field type only supports integer weights. The next section covers tensors that support more floating point number types. + +## Tensor computations + +The previous sections covered matching and ranking where query matching query operators also produced rank features which could be used to influence the order of the hits returned. In this section we look at ranking with [tensor computations](/en/ranking/tensor-examples) using [tensor expressions](/en/ranking/tensor-user-guide). + +Tensor computations can be used to calculate dense dot products, sparse dot products, matrix multiplication, neural networks and more. Tensor computations can be performed on documents that are retrieved by the query matching operators. The only exception to this is dense single order tensors (vectors) where Vespa also supports "matching" using [(approximate) nearest neighbor search](/en/querying/approximate-nn-hnsw). + +The `track` schema was defined with a `similar` tensor field with one named *mapped* dimension. *Mapped* tensors can be used to represent sparse feature representations, similar to the `weightedset` field, but in a more generic way, and here using `float` to represent the tensor cell value. + +```js +field similar type tensor(trackid{}) { + indexing: summary | attribute +} +``` + +Inspecting one document, using the vespa-cli (Wraps [Vespa document/v1 api](/en/writing/document-v1-api-guide)): + +```bash +$ vespa document get id:music:track::TRQIQMT128E0791D9C +``` + +Returns: + +```json expandable +{ + "pathId": "/document/v1/music/track/docid/TRUAXHV128F42694E8", + "id": "id:music:track::TRUAXHV128F42694E8", + "fields": { + "artist": "Bryan Adams", + "title": "Summer Of '69", + "similar": { + "cells": [ + { + "address": { + "trackid": "TRWJIPT128E0791D99" + }, + "value": 1.0 + }, + { + "address": { + "trackid": "TRKPGHH128F1453DD0" + }, + "value": 0.9129049777984619 + }, + { + "address": { + "trackid": "TRGVORX128F4291DF1" + }, + "value": 0.3269079923629761 + } + ] + }, + "tags": { + "All time favourites": 1, + "male vocalists": 7, + "singer-songwriter": 6, + "happy": 2, + "Driving": 3, + "classic rock": 59, + "loved": 1, + "Energetic": 2, + "male vocalist": 1, + "dance": 1, + "soft rock": 2, + "1980s": 1 + } + } +} +``` + +In the lastfm collection, each track lists similar tracks with a similarity score using float resolution, according to this similarity algorithm the most similar track to this sample document is `TRWJIPT128E0791D99`, with a similarity score of 1.0. + +Searching for that doc using the query api: + +```bash +$ vespa query \ + 'yql=select title, artist from track where track_id contains "TRWJIPT128E0791D99"' \ + 'hits=1' +``` + +Note that `track_id` was not defined with `fast-search`, so searching it without any other query terms makes this query a linear scan over all tracks. + +The query returns: + +```json expandable +{ + "timing": { + "querytime": 0.01, + "summaryfetchtime": 0.002, + "searchtime": 0.013000000000000001 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 1 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/63eadd770a5dfde1f111aed7", + "relevance": 0.0017429193899782135, + "source": "tracks", + "fields": { + "title": "Run To You", + "artist": "Bryan Adams" + } + } + ] + } +} +``` + +Given a single track, one could just retrieve the document and display the offline computed similar tracks, but, if a user has listened to multiple tracks in a real time session, one could use a sparse dot product between the user recent activity and the track similarity fields. For example, listening to the following tracks: + +- `TRQIQMT128E0791D9C` Summer Of '69 by Bryan Adams +- `TRWJIPT128E0791D99` Run To You by Bryan Adams +- `TRGVORX128F4291DF1` Broken Wings by Mr. Mister + +could be represented as a query tensor `query(user_liked)` and passed with the query request like this: + +```bash +input.query(user_liked)={{trackid:TRUAXHV128F42694E8 }:1.0,{trackid:TRQIQMT128E0791D9C}:1.0,{trackid:TRGVORX128F4291DF1}:1.0} +``` + +Both the document tensor and the query tensor are defined with `trackid{}` as the *named* *mapped* dimension. The sparse tensor dot product can then be expression in a `rank-profile`: + +```js +rank-profile similar { + inputs { + query(user_liked) tensor(trackid{}) + } + first-phase { + expression: sum(attribute(similar) * query(user_liked)) + } +} +``` + +See [tensor user guide](/en/ranking/tensor-user-guide) for more on tensor fields and tensor computations with Vespa. Adding this `rank-profile` to the document schema: + +```js expandable +schema track { + + document track { + + field track_id type string { + indexing: summary | attribute + rank: filter + match: word + } + + field title type string { + indexing: summary | index + index: enable-bm25 + } + + field artist type string { + indexing: summary | index + } + + field tags type weightedset { + indexing: summary | attribute + attribute: fast-search + } + + field similar type tensor(trackid{}) { + indexing: summary | attribute + } + } + + fieldset default { + fields: title, artist + } + + document-summary track_id { + summary track_id { } + } + + rank-profile personalized { + first-phase { + expression: rawScore(tags) + } + } + + rank-profile similar { + inputs { + query(user_liked) tensor(trackid{}) + } + first-phase { + expression: sum(attribute(similar) * query(user_liked)) + } + } +} +``` + + +Paste the above into file app/schemas/track.sd + + +Deploy the application again : + +```bash +$ vespa deploy --wait 300 app +``` + +The track list of recently played tracks (or liked): + +- `TRQIQMT128E0791D9C` Summer Of '69 by Bryan Adams +- `TRWJIPT128E0791D99` Run To You by Bryan Adams +- `TRGVORX128F4291DF1` Broken Wings by Mr. Mister + +Is represented as the `query(user_liked)` query tensor + +```bash +input.query(user_liked)={{trackid:TRUAXHV128F42694E8 }:1.0,{trackid:TRQIQMT128E0791D9C}:1.0,{trackid:TRGVORX128F4291DF1}:1.0} +``` + +The first query example runs the tensor computation over all tracks using `where true`, notice also `ranking=similar`, without it, ranking with `nativeRank` would not take into account the query tensor: + +```bash +$ vespa query \ + 'yql=select title, artist, track_id from track where true' \ + 'input.query(user_liked)={{trackid:TRUAXHV128F42694E8}:1.0,{trackid:TRQIQMT128E0791D9C}:1.0,{trackid:TRGVORX128F4291DF1}:1.0}' \ + 'ranking=similar' \ + 'hits=5 +``` + +This query also retrieved some of the previous *liked* tracks. These can be removed from the result set using the `not` query operator, in YQL represented as `!`. + +```bash +where !(track_id in (@userLiked)) +``` + +The [in query operator](/en/reference/querying/yql#in) is the most efficient multi-value *filtering* query operator, either using a positive filter (match if any of the keys matches) or negative filter using `not` (remove from result if any of the keys matches). + +See more examples in [feature-tuning set filtering](/en/performance/feature-tuning#multi-lookup-set-filtering). + +Run query with the `not` filter: + +```bash +vespa query \ + 'yql=select title, artist, track_id from track where !(track_id in (@userLiked))' \ + 'input.query(user_liked)={{trackid:TRQIQMT128E0791D9C}:1.0,{trackid:TRWJIPT128E0791D99}:1.0,{trackid:TRGVORX128F4291DF1}:1.0}' \ + 'ranking=similar' \ + 'hits=5' \ + 'userLiked=TRQIQMT128E0791D9C,TRWJIPT128E0791D99,TRGVORX128F4291DF1'' +``` + +Note that the tensor query input format is slightly different from the variable substitution supported for the multivalued query operators `wand`, `in` and `dotProduct`. The above query produces the following result: + +```json expandable +{ + "timing": { + "querytime": 0.121, + "summaryfetchtime": 0.004, + "searchtime": 0.125 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 95663 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/83b83fed0f2353b738591b15", + "relevance": 1.1211640238761902, + "source": "tracks", + "fields": { + "track_id": "TRGJNAN128F42AEEF6", + "title": "Holding Out For A Hero", + "artist": "Bonnie Tyler" + } + }, + { + "id": "index:tracks/0/be76cb3bc209be6f818c91a7", + "relevance": 1.0151770114898682, + "source": "tracks", + "fields": { + "track_id": "TRAONMM128F92DF7B0", + "title": "Africa", + "artist": "Toto" + } + }, + { + "id": "index:tracks/0/074b6b937d0ff7b59710c279", + "relevance": 1.0, + "source": "tracks", + "fields": { + "track_id": "TRFQRYC12903CD0BB9", + "title": "Kyrie", + "artist": "Mr. Mister" + } + }, + { + "id": "index:tracks/0/f13697952a0d5eaeb2c43ffc", + "relevance": 0.7835690081119537, + "source": "tracks", + "fields": { + "track_id": "TRKLIXH128F42766B6", + "title": "Total Eclipse Of The Heart", + "artist": "Bonnie Tyler" + } + }, + { + "id": "index:tracks/0/1c3ab39c8ffa4fd2ba388b4e", + "relevance": 0.7503079921007156, + "source": "tracks", + "fields": { + "track_id": "TRAFGCY128F92E5F6C", + "title": "Hold The Line", + "artist": "Toto" + } + } + ] + } +} +``` + +This query retrieves 95,663 documents, and the three tracks previously *liked* were removed from the result. + +The following example filters by a tags query, `tags:popular`, reducing the complexity of the query as fewer documents gets ranked by the tensor ranking expression: + +```bash +vespa query \ + 'yql=select title,artist, track_id from track where tags contains "popular" and !(track_id in (@userLiked))' \ + 'input.query(user_liked)={{trackid:TRQIQMT128E0791D9C}:1.0,{trackid:TRWJIPT128E0791D99}:1.0,{trackid:TRGVORX128F4291DF1}:1.0}' \ + 'ranking=similar' \ + 'hits=5' \ + 'userLiked=TRQIQMT128E0791D9C,TRWJIPT128E0791D99,TRGVORX128F4291DF1' +``` + +With fewer matches to score using the tensor expression the latency decreases. In this query case latency is strictly linear with number of matches. One could also use a combination of `wand` for efficient retrieval and tensor computations for ranking. Notice that `querytime` of the unconstrained search was around 120 ms, which is on the high side for real-time serving. + +The sparse tensor product can be optimized by adding `attribute: fast-search` to the mapped field tensor. `attribute: fast-search` is supported for `tensor` fields using mapped dimensions, or mixed tensors using both mapped and dense dimensions. The cost of doing this is increased memory usage. The schema with `attribute: fast-search` added to the `similar` tensor field: + +```js expandable +schema track { + document track { + + field track_id type string { + indexing: summary | attribute + rank: filter + match: word + } + + field title type string { + indexing: summary | index + index: enable-bm25 + } + + field artist type string { + indexing: summary | index + } + + field tags type weightedset { + indexing: summary | attribute + attribute: fast-search + } + + field similar type tensor(trackid{}) { + indexing: summary | attribute + attribute: fast-search + } + } + + fieldset default { + fields: title, artist + } + + document-summary track_id { + summary track_id { } + } + + rank-profile personalized { + first-phase { + expression: rawScore(tags) + } + } + + rank-profile similar { + inputs { + query(user_liked) tensor(trackid{}) + } + first-phase { + expression: sum(attribute(similar) * query(user_liked)) + } + } +} +``` + + +Paste the above into file app/schemas/track.sd + + +Deploy the application again : + +```bash +$ vespa deploy --wait 300 app +``` + +And again, adding `fast-search`, requires a re-start of the searchnode process: + +```bash +$ docker exec vespa vespa-sentinel-cmd restart searchnode +``` + +Wait for the searchnode to start by waiting for `status:code:up`: + +```bash +$ curl -s http://localhost:19110/state/v1/health +``` + +Re-run the tensor ranking query: + +```bash +vespa query \ + 'yql=select title,artist, track_id from track where !(track_id in (@userLiked))' \ + 'input.query(user_liked)={{trackid:TRQIQMT128E0791D9C}:1.0,{trackid:TRWJIPT128E0791D99}:1.0,{trackid:TRGVORX128F4291DF1}:1.0}' \ + 'ranking=similar' \ + 'hits=5' \ + 'userLiked=TRQIQMT128E0791D9C,TRWJIPT128E0791D99,TRGVORX128F4291DF1' +``` + +The `querytime` dropped to 40 ms instead of 120 ms without the `fast-search` option. See also [performance considerations](/en/performance/feature-tuning#tensor-ranking) when using tensor expression. Vespa supports `int8`, `bfloat16`, `float` and `double` precision cell types. A tradeoff between speed, accuracy and memory usage. + +## Multithreaded search and ranking + +So far in this guide all search queries and ranking computations have been performed using single threaded execution. To enable multithreaded execution, a setting needs to be added to `services.xml`. Multithreaded search and ranking can improve query latency significantly and make better use of multi-cpu core architectures. + +The following adds a `tuning` element to `services.xml` overriding [requestthreads:persearch](/en/reference/applications/services/content#requestthreads-persearch). The default number of threads used `persearch` is one. + +```xml expandable + + + + + + + + + + + + + + + 4 + + + + + + 1 + + + + + + + + +``` + + +Paste the above into file app/services.xml + + +Deploy the application again : + +```bash +$ vespa deploy --wait 300 app +``` + +Changing the global threads per search requires a restart of the `searchnode` process: + +```bash +$ docker exec vespa vespa-sentinel-cmd restart searchnode +``` + +Wait for the `searchnode` to start: +```bash +$ curl -s localhost:19110/state/v1/health +``` + +Then repeat the tensor ranking query: + +```bash +vespa query \ + 'yql=select title,artist, track_id from track where !(track_id in (@userLiked))' \ + 'input.query(user_liked)={{trackid:TRQIQMT128E0791D9C}:1.0,{trackid:TRWJIPT128E0791D99}:1.0,{trackid:TRGVORX128F4291DF1}:1.0}' \ + 'ranking=similar' \ + 'hits=5' \ + 'userLiked=TRQIQMT128E0791D9C,TRWJIPT128E0791D99,TRGVORX128F4291DF1' +``` + +Now, the content node(s) will parallelize the matching and ranking using multiple search threads and `querytime` drops to about 15 ms. + +The setting in `services.xml` sets the global *persearch* value, It is possible to tune down the number of threads used for a query with `rank-profile` overrides using [num-threads-per-search](/en/reference/schemas/schemas#num-threads-per-search). Note that the per rank-profile setting can only be used to tune the number of threads to a lower number than the global default. + +Note that increasing `persearch` reduces the maximum number of queries that can execute concurrently, since the match engine executor has `search / persearch` slots. See [thread configuration](/en/performance/sizing-search#thread-configuration) for details on sizing the thread pool to avoid this bottleneck. + +This adds a new `rank-profile` `similar-t2` using `num-threads-per-search: 2` instead of the global 4 setting. It's also possible to set the number of threads in the query request using [ranking.matching.numThreadsPerSearch](/en/reference/api/query#ranking.matching). + +```js expandable +schema track { + + document track { + + field track_id type string { + indexing: summary | attribute + rank: filter + match: word + } + + field title type string { + indexing: summary | index + index: enable-bm25 + } + + field artist type string { + indexing: summary | index + } + + field tags type weightedset { + indexing: summary | attribute + attribute: fast-search + } + + field similar type tensor(trackid{}) { + indexing: summary | attribute + attribute: fast-search + } + } + + fieldset default { + fields: title, artist + } + + document-summary track_id { + summary track_id { } + } + + rank-profile personalized { + first-phase { + expression: rawScore(tags) + } + } + + rank-profile similar { + inputs { + query(user_liked) tensor(trackid{}) + } + first-phase { + expression: sum(attribute(similar) * query(user_liked)) + } + } + + rank-profile similar-t2 inherits similar { + num-threads-per-search: 2 + } +} +``` + + +Paste the above into file app/schemas/track.sd + + +Deploy the application again : + +```bash +$ vespa deploy --wait 300 app +``` + +Adding a new rank-profile does not require any restart, repeat the query again, now using the `similar-t2` profile: + +```bash +$ vespa query \ + 'yql=select title,artist, track_id from track where !(track_id in (@userLiked))' \ + 'input.query(user_liked)={{trackid:TRQIQMT128E0791D9C}:1.0,{trackid:TRWJIPT128E0791D99}:1.0,{trackid:TRGVORX128F4291DF1}:1.0}' \ + 'ranking=similar-t2' \ + 'hits=5' \ + 'userLiked=TRQIQMT128E0791D9C,TRWJIPT128E0791D99,TRGVORX128F4291DF1' +``` + +By using multiple rank profiles developers can find the sweet-spot where latency does not improve much by using more threads. Using more threads per search limits query concurrency as more threads will be occupied per query. Read more in [Vespa sizing guide:reduce latency with multithreaded search](/en/performance/sizing-search#reduce-latency-with-multi-threaded-per-search-execution). + +## Advanced range search with hitLimit + +Vespa has an advanced query operator that allows selecting the documents with the k-largest or k-smallest values of a `fast-search` attribute field. + +To demonstrate this query operator, this guide introduces a `popularity` field. Since the last.fm dataset does not have a real popularity metric, the number of tags per track is used as a *proxy* of the true track popularity. + +The following script runs through the dataset and counts the number of tags and creates a Vespa [partial update](/en/writing/partial-updates) feed operation per track. + + +Paste the above into file create-popularity-updates.py + + +```python expandable +import os +import sys +import json + +directory = sys.argv[1] +seen_tracks = set() + +def process_file(filename): + global seen_tracks + with open(filename) as fp: + doc = json.load(fp) + title = doc['title'] + artist = doc['artist'] + hash = title + artist + if hash in seen_tracks: + return + else: + seen_tracks.add(hash) + + track_id = doc['track_id'] + tags = doc['tags'] + tags_dict = dict() + for t in tags: + k,v = t[0],int(t[1]) + tags_dict[k] = v + n = len(tags_dict) + + vespa_doc = { + "update": "id:music:track::%s" % track_id, + "fields": { + "popularity": { + "assign": n + } + } + } + print(json.dumps(vespa_doc)) + +sorted_files = [] +for root, dirs, files in os.walk(directory): + for filename in files: + filename = os.path.join(root, filename) + sorted_files.append(filename) +sorted_files.sort() +for filename in sorted_files: + process_file(filename) +``` + +With this script, run through the dataset and create the [partial update](/en/writing/partial-updates) feed : + +```bash +$ python3 create-popularity-updates.py lastfm\_test > updates.jsonl +``` + +Add the `popularity` field defined with `fast-search` to the `track` schema. Also, a `popularity` rank profile is added, which uses one thread per search: + +```js expandable +schema track { + + document track { + + field track_id type string { + indexing: summary | attribute + rank: filter + match: word + } + + field title type string { + indexing: summary | index + index: enable-bm25 + } + + field artist type string { + indexing: summary | index + } + + field tags type weightedset { + indexing: summary | attribute + attribute: fast-search + } + + field similar type tensor(trackid{}) { + indexing: summary | attribute + attribute: fast-search + } + + field popularity type int { + indexing: summary | attribute + attribute: fast-search + } + } + + fieldset default { + fields: title, artist + } + + document-summary track_id { + summary track_id { } + } + + rank-profile personalized { + first-phase { + expression: rawScore(tags) + } + } + + rank-profile similar { + inputs { + query(user_liked) tensor(trackid{}) + } + first-phase { + expression: sum(attribute(similar) * query(user_liked)) + } + } + + rank-profile similar-t2 inherits similar { + num-threads-per-search: 2 + } + + rank-profile popularity { + num-threads-per-search: 1 + first-phase { + expression: attribute(popularity) + } + } +} +``` + + +Paste the above into file app/schemas/track.sd + + +Deploy the application again : + +```bash +$ vespa deploy --wait 300 app +``` + +Adding a new field does not require a restart, apply the partial updates by: + +```bash +$ vespa feed -t http://localhost:8080 updates.jsonl +``` + +With that feed job completed, it is possible to select the five tracks with the highest popularity by using the [range()](/en/reference/querying/yql#range) query operator with [hitLimit](/en/reference/querying/yql#hitlimit): + +```bash +vespa query \ + 'yql=select track_id, popularity from track where {hitLimit:5,descending:true}range(popularity,0,Infinity)' \ + 'ranking=popularity' +``` + +The search returned 1,352 documents, while we asked for just five. The reason is that the `hitLimit` annotation for the `range` operator only specifies the lower bound. Documents that are tied with the same `popularity` value within the 5 largest values are returned. + +The `range()` query operator with `hitLimit` can be used to efficiently implement *top-k* selection for ranking a subset of the documents in the index. For example, use the `range` search with `hitLimit` to only run the track [recommendation tensor computation](#tensor-computations) over the most popular tracks: + +```bash +vespa query \ + 'yql=select title,artist, track_id, popularity from track where {hitLimit:5,descending:true}range(popularity,0,Infinity) and !(track_id in (@userLiked))' \ + 'input.query(user_liked)={{trackid:TRQIQMT128E0791D9C}:1.0,{trackid:TRWJIPT128E0791D99}:1.0,{trackid:TRGVORX128F4291DF1}:1.0}' \ + 'ranking=similar' \ + 'hits=5' \ + 'userLiked=TRQIQMT128E0791D9C,TRWJIPT128E0791D99,TRGVORX128F4291DF1' +``` + +Notice that this query returns 1,349 documents while the `range` search from the previous example returned 1,352 documents. This is due to the `not` filter. + +The range search with `hitLimit` can be used for cases where one wants to select efficiently *top-k* of a single valued numeric `attribute` with `fast-search`. Some use cases which can be efficiently implemented by using it: + +- Run ranking computations over the most recent documents using a `long` to represent a timestamp (e.g., using Unix epoch). +- Compute personalization tensor expressions over pre-selected content, e.g. using popularity. +- Optimize [sorting](/en/reference/querying/sorting-language) queries, instead of sorting a large result, find the smallest or largest values quickly by using range search with `hitLimit`. +- Autosuggest - see [#25333](https://github.com/vespa-engine/vespa/issues/25333). + +Do note that any other query or filter terms in the query are applied after having found the top-k documents, so an aggressive filter removing many documents might end up recalling 0 documents. + +This behavior is illustrated with this query: + +```bash +vespa query \ + 'yql=select track_id, popularity from track where {hitLimit:5,descending:true}range(popularity,0,Infinity) and popularity=99' +``` + +This query fails to retrieve any documents because the range search finds 1,352 documents where popularity is 100, *and'ing* that top-k result with the popularity=99 filter constraint ends up with 0 results. + +Using range search query operator with `hitLimit` is practical for search use cases like auto-complete or [search suggestions](https://github.com/vespa-engine/sample-apps/tree/master/incremental-search/search-suggestions) where one typically use [match: prefix](/en/reference/schemas/schemas#match) or n-gram matching using [match: gram](/en/reference/schemas/schemas#match). Limiting the short few first character searches to include a `hitLimit` range on popularity can greatly improve the query performance and at the same time match against popular suggestions. As the user types more characters, the number of matches is greatly reduced, so ranking can focus on more factors than just the single popularity attribute and increase the `hitLimit`. + +## Match phase limit - early termination + +An alternative to `range` search with `hitLimit` is using early termination with [match-phase](/en/reference/schemas/schemas#match-phase), which enables early termination of search and `first-phase` ranking using a document field to determine the search evaluation order. + +Match-phase early termination uses an attribute field during matching and ranking to impact the order the search and ranking is performed in. If a query is likely to generate more than `ranking.matchPhase.maxHits` per node the search will be terminated early and matching and evaluaton of the query will be in the order dictated by the `ranking.matchPhase.attribute` attribute field. + +Match phase early termination requires using a single-value numeric field with `attribute` and `fast-search`. See [Match phase query parameters](/en/reference/api/query#ranking.matchPhase). Match-phase limit cannot terminate early or stop any potential `second-phase` ranking expression, only matching and `first-phase` ranking, hence the name: *match phase limit*. + +The following enables `matchPhase` early termination with `maxHits` target set to 100: + +```bash +$ vespa query \ + 'yql=select track\_id, popularity from track where true' \ + 'ranking=popularity' \ + 'ranking.matchPhase.maxHits=100' \ + 'ranking.matchPhase.attribute=popularity' \ + 'hits=2' +``` + +Which will produce the following result: + +```json expandable +{ + "timing": { + "querytime": 0.007, + "summaryfetchtime": 0.002, + "searchtime": 0.01 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 1476 + }, + "coverage": { + "coverage": 0, + "documents": 252, + "degraded": { + "match-phase": true, + "timeout": false, + "adaptive-timeout": false, + "non-ideal-state": false + }, + "full": false, + "nodes": 1, + "results": 1, + "resultsFull": 0 + }, + "children": [ + { + "id": "index:tracks/0/63f963f1f9372275e12d9e9c", + "relevance": 100.0, + "source": "tracks", + "fields": { + "track_id": "TRGCNGP12903CFA2BA", + "popularity": 100 + } + }, + { + "id": "index:tracks/0/7a74f1cd064acef348a1a701", + "relevance": 100.0, + "source": "tracks", + "fields": { + "track_id": "TRFVTTT128F930D148", + "popularity": 100 + } + } + ] + } +} +``` + +In this case, totalCount became 1,476, a few more than the `range` search with `hitLimit`. Notice also the presence of `coverage:degraded` - this informs the client that this result was not fully evaluated over all matched documents. Read more about [graceful result degradation](/en/performance/graceful-degradation). Note that the example uses the `popularity` rank-profile which was configured with one thread per search, for low settings of `maxHits`, this is the recommended setting. + +```js +rank-profile popularity { + num-threads-per-search: 1 + first-phase { + expression: attribute(popularity) + } +} +``` + +The core difference from capped range search is that `match-phase` is safe, as filters work inline with the search and are not applied after finding the top-k documents. + +This query does not trigger match-phase early termination because there are few hits matching the query: + +```bash +vespa query \ + 'yql=select track_id, popularity from track where popularity=99' \ + 'ranking=popularity' \ + 'ranking.matchPhase.maxHits=100' \ + 'ranking.matchPhase.attribute=popularity' \ + 'hits=2' +``` + +Generally, prefer `match-phase` early termination over `range` search with `hitLimit`. Match phase limiting can also be used in combination with text search queries: + +```bash +vespa query \ + 'yql=select title, artist, popularity from track where userQuery()' \ + 'query=love songs' \ + 'type=any' \ + 'ranking=popularity' \ + 'ranking.matchPhase.maxHits=100' \ + 'ranking.matchPhase.attribute=popularity' \ + 'hits=2' +``` + +Since this query uses `type=any` the above query retrieves a lot more documents than the target `matchPhase.maxHits`, so early termination is triggered, which will then cause the search core to match and rank tracks with the highest popularity. + +Early termination using match-phase limits is a powerful feature that can keep latency and cost in check for many large scale serving use cases where a document quality signal is available. Match phase termination also supports specifying a result diversity constraint. See [Result diversification blog post](https://blog.vespa.ai/result-diversification-with-vespa/). Note that result diversity is normally obtained with Vespa [result grouping](/en/querying/grouping), the match-phase diversity is used to ensure that diverse hits are also collected **if** early termination kicks in. + +## Advanced query tracing + +This section introduces query tracing. Tracing helps understand where time (and cost) is spent, and how to best optimize the query or schema settings. Query tracing can be enabled using the following parameters: + + + + + + + +A simple example query with tracing enabled: + +```bash +vespa query 'yql=select track_id from track where tags contains "rock"' \ + 'trace.level=3' 'trace.timestamps=true' 'trace.explainLevel=1' 'hits=1' +``` + +The first part of the trace traces the query through the stateless container search chain. For each searcher invoked in the chain a timestamp relative to the start of the query request is emitted: + +```json +{ + "trace": { + "children": [ + { + "message": "Using query profile 'default' of type 'root'" + }, + { + "message": "Invoking chain 'vespa' [com.yahoo.search.querytransform.WeakAndReplacementSearcher@vespa -> com.yahoo.prelude.statistics.StatisticsSearcher@native -> ... -> federation@native]" + }, + { + "children": [ + { + "timestamp": 0, + "message": "Invoke searcher 'com.yahoo.search.querytransform.WeakAndReplacementSearcher in vespa'" + }] + } + ] + } +} +``` + +The trace runs all the way to the query is dispatched to the content node(s) and the merged response is returned up to the client. + +```json +{ + "timestamp": 2, + "message": "sc0.num0 search to dispatch: query=[tags:rock] timeout=9993ms offset=0 hits=1 restrict=[track]" +} +``` + +In this case, with tracing it has taken 2ms of processing in the stateless container, before the query is about to be put on the wire on its way to the content nodes. + +The first protocol phase is the next trace message. In this case the reply, is ready read from the wire at timestamp 6, so approximately 4 ms was spent in the first protocol matching phase, including network serialization and deserialization. + +```json +{ + "timestamp": 6, + "message": [ + { + "start_time": "2022-03-27 15:03:20.769 UTC", + "traces": [ + + ], + "distribution-key": 0, + "duration_ms": 1.9814 + } + ] +} +``` + +```text +Inside this message is the content node traces of the query, `timestamp_ms` is relative to the start of the query on the content node. In this case, the content node uses 1.98 ms to evaluate the first protocol phase of the query (`duration_ms`). +``` + +More explanation of the content node `traces` is coming soon. It includes information like + +- How much time was spent traversing the dictionary and setting up the query. +- How much time was spent on matching and first-phase ranking. +- How much time was spent on second-phase ranking (if enabled). + +These traces can help guide both feature tuning decisions and [scaling and sizing](/en/performance/sizing-search). + +Later in the trace one can also see the second query protocol phase, which is the summary fill: +```json +{ + "timestamp": 7, + "message": "sc0.num0 fill to dispatch: query=[tags:rock] timeout=9997ms offset=0 hits=1 restrict=[track] summary=[null]" +} +``` + +And finally an overall breakdown of the two phases: + +```json +{ + "timestamp": 9, + "message": "Query time query 'tags:rock': 7 ms" +} +{ + "timestamp": 9, + "message": "Summary fetch time query 'tags:rock': 2 ms" +} +``` + +Also try the [Trace Visualizer](https://github.com/vespa-engine/vespa/tree/master/client/js/app#trace-visualizer) for a flame-graph of the query trace. + +## Tear down the container + +This concludes this tutorial. The following removes the container and the data: + +```bash +$ docker rm -f vespa +``` diff --git a/mintlify-docs/en/performance/profiling.mdx b/mintlify-docs/en/performance/profiling.mdx new file mode 100644 index 0000000000..a2830a10ae --- /dev/null +++ b/mintlify-docs/en/performance/profiling.mdx @@ -0,0 +1,107 @@ +--- +title: "Profiling" +--- + +Guidelines when profiling: + +- Define clearly what to profile. +- Find a load that represents what to profile. This is often the hardest part, as there is a lot of noise if stressing other components. +- Make sure that there are no other bottlenecks that blocks stressing the profiled component. It makes little sense to do CPU profiling if the network or the disk is the limitation factor. +- If possible, write special unit-tests like benchmark programs that stress exactly what to profile. +- If the system is multithreaded: + - Always profile single threaded first - that gives a baseline for doing the scaling tests. Verify utilizing as many cores as expected. + - Increase scaling gradually to at least 2x numcores or until throughput degrades. + +Also see [using valgrind with Vespa](/en/performance/valgrind). + +## CPU profiling + +||| +|---|---| +| **vmstat** | *vmstat* can be used to figure out what kind of resources are used:

• cpu usage split in user, system, idle, and io wait: system should be low(`<10`)
• swap in/out: should be zero.

**Note:**
A maxed out system should have either maxed out disks or cpu (`idle == 0`). If not, there might be lock contention or the system is bottlenecked by upstream services.


Example:

`$ vmstat 1`

`procs -----------memory---------- ---swap-- -----io---- --system-- ----cpu----`
`r b swpd free buff cache si so bi bo in cs us sy id wa`
`0 0 5628 3315460 304024 23008616 0 0 14 34 0 0 0 0 99 0`
`1 0 5628 3298884 304024 23008640 0 0 0 396 33 4615 9 1 90 0`
`0 0 5628 3316336 304028 23008644 0 0 0 0 15 4469 4 1 95 0`
`0 0 5628 3316592 304028 23008644 0 0 0 0 24 4364 0 0 100 0`
`0 0 5628 3316592 304028 23008644 0 0 0 2948 20 4305 0 0 100 0`
`0 0 5628 3316468 304028 23008644 0 0 0 0 22 4259 0 0 100 0`
`0 0 5628 3316468 304028 23008644 0 0 0 180 20 4279 0 0 100 0`
`0 0 5628 3316468 304028 23008644 0 0 0 0 26 4349 0 0 100 0`
`16 0 5628 3284236 304056 23008688 0 0 12 188 17 9196 38 2 60 0`
`19 0 5628 3267020 304056 23008732 0 0 8 128 44 6408 99 1 0 0`
`16 0 5628 3245472 304060 23008840 0 0 20 0 9 7191 99 1 0 0`
`17 0 5628 3227784 304060 23008872 0 0 20 0 27 6420 99 1 0 0`| +| **top** | Use [top](https://linux.die.net/man/1/top) to get a real-time view of which processes consume CPU and memory. | +| **iostat** | Use [iostat](https://linux.die.net/man/1/iostat) to monitor disk IO. Note that the % busy is useless for SSD/NVMe storage disks, see [Two traps in iostat: %util and svctm](https://brooker.co.za/blog/2014/07/04/iostat-pct.html). | + + +## CPU Profiling using perf + +Sometimes, when debugging cpu usage in a remote cluster and debugging performance, it might be beneficial to get a performance profile snapshot. To use `perf` find the pid of the [vespa-proton-bin](/en/content/proton) process which can be obtained using [vespa-sentinel-cmd](/en/reference/operations/self-managed/tools#vespa-sentinel-cmd), or *top/ps*. Record: + +```bash +$ sudo perf record -g --pid= sleep 60 +``` + +View a performance profile report: + +```text +$ sudo perf report +``` + +Sometimes it is useful to have kernel debug info installed to get symbol info for the Linux kernel: + +```bash +$ sudo yum install --enablerepo=base-debuginfo kernel-debuginfo-$(uname -r) +``` + +It is important to get somewhat same version of *kernel-debuginfo* as the *kernel* package. + +### Container privileges + +When debugging an unprivileged docker container, `perf` commands can be executed from inside a privileged container sharing pid space: + +```bash +$ CONTAINER=host002-09 +$ sudo docker run -ti --rm --privileged --pid container:$CONTAINER \ + --entrypoint bash $(sudo docker ps --filter name=$CONTAINER --format "{{.Image}}") +``` + +This starts a privileged container that shares the pid namespace, using the same docker image as the container to debug. Run `perf record ...` inside this privileged container. + +## Profiling the Query Container + +This section describes how you can configure the Container to allow for profiling custom searchers in order to identify performance bottlenecks - be it lock contention or CPU intensive algorithms. + +### Install YourKit profiler on the Container + +Yourkit is a good and simple tool for finding hotspots in Java code. It supports both sampling and tracing. Often it is necessary to use both modes. Tracing is accurate as to how many times a method is invoked and from where. That can be used to analyze if you are actually not computing the same thing from multiple places and overall doing more than you need. However, it will hide effects of cache miss and especially cost of atomic operations and synchronization costs. + +Assume there is an installation in a data center that you would like to profile, preferably with a nice UI running on your local desktop. All this is just a few steps away: +- Install yourkit +- Modify *services.xml*: + + ```xml + + + ... + + ``` + Read more about [jvm tuning](/en/performance/container-tuning). Disabling the freezedetector stops the container from shutting down during profiling. +- Re-deploy the application: + + ```bash + $ vespa deploy appdir + ``` +- restart Vespa on the node that runs the Container + + +Browse *$VESPA\_HOME/logs/vespa.log* for errors. You are now ready to perform profiling; you just need to install the UI on your desktop. + +### Install YourKit UI on the Desktop + +The server is ready for profiling, now install the YourKit profiler on the desktop. Download the distribution that fits the OS you are running from [YourKit](https://www.yourkit.com/). Follow the installation instructions, including setting the *license server*. + + +**Note:** + +By default the YourKit agent runs on port 10001. If Vespa is running on hosts not directly reachable from the desktop, setting up an SSH tunnel can work around: + + +```bash +$ ssh -L 1080:$hostname:10001 +``` + +where *$hostname* is the node that is running the container with the YourKit agent profiler. All traffic to localhost (the desktop) port 1080 will be forwarded to the remote application running on port 10001. + +### Using Yourkit + +You are now ready to profile your application. (You will need to put some realistic load against the container instance, see the [Vespa benchmarking guide](/en/performance/benchmarking)) After having started the load simulation you can start the profiling session, open the YourKit application installed locally and select *Monitor Remote Applications => Connect to remote application*. Enter *localhost:1080* and press Connect. You should now see the profiling screen with *Remote application "Server" (PID XXXXX) is being profiled at localhost:1080*. \ No newline at end of file diff --git a/mintlify-docs/en/performance/rate-limiting-searcher.mdx b/mintlify-docs/en/performance/rate-limiting-searcher.mdx new file mode 100644 index 0000000000..69cdc1cddd --- /dev/null +++ b/mintlify-docs/en/performance/rate-limiting-searcher.mdx @@ -0,0 +1,85 @@ +--- +title: "Rate Limiting Search Requests" +sidebarTitle: "Rate-limiting queries" +--- + +To avoid overloading a Vespa content cluster or to limit query load from e.g. certain clients, the bundled [Vespa Rate Limiting Searcher](https://github.com/vespa-engine/vespa/blob/master/container-search/src/main/java/com/yahoo/search/searchers/RateLimitingSearcher.java) can be configured to reject incoming requests to a search chain with [HTTP return code 429](https://en.wikipedia.org/wiki/List_of_HTTP_status_codes#429) if number of requests per second exceed a certain quota. The counter will reset once the quota is refilled the next second. + +## Getting Started + +While the rate limiting searcher is bundled with Vespa, it needs to be explicitly configured in [services.xml](/en/reference/applications/services/services) before it is loaded. This example shows how the searcher is configured for the default search chain: + +```xml + + + + + + + +``` + +When this configuration is live, the rate limiting searcher is loaded, but not active. It is enabled on a per-request basis using either parameters directly in your HTTP search request or by configuring query profiles. Both approaches are shown below. + +## Activate With Query Parameters + +The searcher takes these query parameter arguments: + +| Argument | Type | Description | +| :--- | :--- | :--- | +| rate.id | String | The id of the client from rate limiting perspective. | +| rate.cost | Double | The cost Double of this query. This is read after executing the query and so can be set by downstream searchers inspecting the result to allow differencing the cost of various queries. Default is 1. | +| rate.quota | Double | The cost per second a particular id is allowed to consume in this system. | +| rate.idDimension | String | The name of the rate-id dimension used when logging metrics. If this is not specified, the metric will be logged without dimensions. | +| rate.dryRun | Boolean | Emit metrics on rejected requests but don't actually reject them. | + +In a typical scenario, the application logic constructing the HTTP search request will set `&rate.id` and `&rate.quota` in the request depending on where the traffic originated - example: + +```bash +http://localhost:8080/search?query=foo&rate.id=clientA&rate.quota=300 +``` + +## Activate With Query Profiles + +If you don't want to add the rate limiting parameters to every request or don't control the application logic constructing the search requests, you can enable the rate limiting using [query profiles](/en/reference/querying/query-profiles). An example default query profile enabling rate limiting in the application package: + +```xml + + 100 + default + +``` + +### Per Client Quotas + +In a shared service scenario, you may want to assign different quota based on a query parameter passed with the request, e.g. `&clientId`. The example below will assign different quotas based on the clientId parameter passed with the request: + +```xml + + + clientId + + + 100 + default + clientID + + + + 200 + clientA + clientID + + + + + 400 + clientB + clientID + + +``` + +## Metrics + +The searcher will emit the [count metric](/en/operations/metrics) `requestsOverQuota` with the dimension `[rate.idDimension=rate.id]`. \ No newline at end of file diff --git a/mintlify-docs/en/performance/sizing-feeding.mdx b/mintlify-docs/en/performance/sizing-feeding.mdx new file mode 100644 index 0000000000..50438e6f82 --- /dev/null +++ b/mintlify-docs/en/performance/sizing-feeding.mdx @@ -0,0 +1,226 @@ +--- +title: "Vespa Feed Sizing Guide" +sidebarTitle: "Feed sizing guide" +--- + +Vespa is optimized to sustain a high feed load while serving - also during planned and unplanned changes to the instance. This guide provides an overview of how to optimize feed performance and also understand bottlenecks. + +The [reads and writes](/en/writing/reads-and-writes) guide has an overview of the Vespa architecture and relevant APIs for feeding and searching. One key takeaway is that Vespa is split into two main service types: + +- Stateless container cluster(s) +- Stateful content cluster(s) + +The stateless container cluster is responsible for processing all document operations to Vespa. The stateful content cluster is responsible for writing and syncing all document operations (persisting state and managing data structures). + +Generally, Vespa cannot sustain a higher write rate than the underlying storage can handle (MB/s and IOPS). To understand resource utilization it is critical that resource usage like CPU, memory, disk and network are monitored. Only this way can a system be sized correctly and bottlenecks identified. + +## Stateless container cluster + +The processing of all document operations to Vespa are routed through the stateless *container* cluster. Processing includes both Vespa processing like [linguistic processing](/en/linguistics/linguistics) and custom [document processing](/en/applications/document-processors). The stateless container cluster is also responsible for [embedding](/en/rag/embedding) inference. Embedding inference can be compute-resource intensive, depending on number of *embed* calls and the size of the embedding model. See [embedding performance](/en/rag/embedding#embedder-performance). + +The stateless cluster is compute (CPU/GPU util) bound and processing rates should scale linearly with the number of nodes and the number of V-CPU's in the cluster as long as the client can deliver enough operations over the network. + +See [multiple container clusters](/en/writing/document-routing#multiple-container-clusters) for how to separate search and write to different container clusters. Isolated container clusters are useful for high load scenarios where the container cluster is compute bound and where there is concurrent search and write load and where we want to avoid write operations to impact search queries due to compute-related resource contention. + +The stateless container cluster is implemented in Java (JVM), ensure enough memory allocated for heap to avoid excessive JVM garbage collection. See [stateless container tuning](/en/performance/container-tuning) for tuning options. The default max heap size is 1.5GB in self-hosted deployments unless overridden. + +## Stateful content cluster + +**All feed operations** to Vespa are **written and synced** to the [transaction log](/en/content/proton#transaction-log) on the content node(s). This include both writing new documents and updating existing documents. The Vespa transaction log is a write-ahead log (WAL) that ensures durability of the data. A reply is only sent back to the client when the operation is written successfully to the transaction log and applied (visible in search/get). + +The write pattern is append and sequential (not random) IO. Note that Vespa cannot sustain a higher write rate than the underlying storage can handle. Feeding might be impacted severely if the content nodes are using network attached storage where the sync operation (for durability) has a much higher cost than on local attached storage (e.g. SSD). See [sync-transactionlog](/en/reference/applications/services/content#sync-transactionlog). + +### Document store + +Documents are written to the [document store](/en/content/proton#document-store) in all [indexing modes](/en/reference/applications/services/content#document) - this is where the copy of the document is persisted. + +Adding new documents to the document store is append-only with a sequential IO pattern. Writing a new version of a document (PUT a document that already exists) is the same as for a document ID that does not exist. The in-memory mapping from document ID to summary data file position is updated to point to the latest version in both cases. The summary files are [defragmented](/en/content/proton#defragmentation) to remove old versions of documents. + +### Attribute store + +Fields that are defined with the [attribute](/en/content/attributes) property are in-memory fields that supports in-place updates with higher [partial update](/en/writing/partial-updates) throughput than fields that are indexed with \`index\` property (avoiding read-apply-write pattern). The attribute store is a memory-only data structure that is regularly persisted to disk in the [attribute store](/en/content/proton#attributes). + +```js +schema ticker { + document ticker { + field volume type int { + indexing: summary | attribute + } + } +} +``` + +See [partial updates](/en/writing/partial-updates) for details. + +#### Redundancy settings + +To achieve memory-only updates (plus transaction log writing), make sure all attributes to update are [ready](/en/content/proton#sub-databases), meaning the content node has loaded the attribute field into memory: + +- One way to ensure this is to set [searchable copies](/en/reference/applications/services/content#searchable-copies) equal to [redundancy](/en/reference/applications/services/content#redundancy) - i.e. all nodes that has a replica of the document has loaded it as searchable +- Another way is by setting [fast-access](/en/reference/schemas/schemas#attribute) on each attribute to update + +### Index + +Changes to index fields are written to the [document store](#document-store) and the [index](/en/content/proton#index). Note that an UPDATE operation requires a read-modify-write to the document store and limits throughput. Refer to [partial updates](/en/writing/partial-updates) for more details. + +```js +schema music { + document music { + field artist type string { + indexing: summary | index + } + } +} +``` + +### Content Node Thread pools + +Several thread pools are involved when handling write operations on a content node. These are summarized in the following table. Not all mutating operations can be handled in parallel and tracking these metrics can help identify bottlenecks. For example, if you notice that feed throughput is not increasing beyond a certain CPU utilization, it might be that one of the thread pools is saturated. Metrics are available for each thread pool, see [searchnode metrics](/en/reference/operations/metrics/vespa-metric-set#searchnode-metrics) for details. + +To analyse performance and bottlenecks, the most relevant metrics are *.utilization* and *.queuesize*. In addition, *.saturation* is relevant for the [field writer](#field-writer-executor) thread pool. See [bottlenecks](#bottlenecks) for details. + +| Thread pool | Description || +|---|---|---| +| **master** | Updates the [document metastore](/en/content/attributes#document-meta-store), prepares tasks to the [index](#index-thread) and [summary](#summary-thread) threads, and splits a write operation into a set of tasks to update individual [attributes](/en/content/proton#attributes), executed by the threads in the [field writer](#field-writer-executor). | | +| | **Threads** | 1 | +| | **Instances** | One instance per document database. | +| | **Metric prefix** | *content.proton.documentdb.threading_service.master.* | +| **index** | Manages writing of index fields in the [memory index](/en/content/proton#index). It splits a write operation into a set of tasks to update individual index fields, executed by the threads in the [field writer](#field-writer-executor). | | +| | **Threads** | 1 | +| | **Instances** | One instance per document database. | +| | **Metric prefix** | *content.proton.documentdb.threading_service.index.* | +| **summary** | Writes documents to the [document store](/en/content/proton#document-store). | | +| | **Threads** | 1 | +| | **Instances** | One instance per document database. | +| | **Metric prefix** | *content.proton.documentdb.threading_service.summary.* | +| **field writer** | The threads in this thread pool are used to invert index fields, write changes to the memory index, and write changes to attributes. Index fields and attribute fields across all document databases are randomly assigned to one of the threads in this thread pool. A field that is costly to write or update might become the bottleneck during feeding. | | +| | **Threads** | Many, controlled by [feeding concurrency](/en/reference/applications/services/content#feeding). | +| | **Instances** | One instance shared between all document databases. | +| | **Metric prefix** | *content.proton.executor.field_writer.* | +| **shared** | The threads in this thread pool are among other used to compress and de-compress documents in the [document store](/en/content/proton#document-store), merge files as part of [disk index fusion](/en/content/proton#disk-index-fusion), and prepare for inserting a vector into a [HNSW index](/en/reference/schemas/schemas#index-hnsw). | | +| | **Threads** | Many, controlled by [feeding concurrency](/en/reference/applications/services/content#feeding). | +| | **Instances** | One instance shared between all document databases. | +| | **Metric prefix** | *content.proton.executor.shared.* | + +## Multivalue attribute + +[Multivalued attributes](/en/reference/schemas/schemas#field) are *weightedset*, *array of struct/map*, *map of struct/map* and *tensor*. The attributes have different characteristics, which affects write performance. Generally, updates to multivalue fields are more expensive as the field size grows: + +| Attribute | Description | +| :--- | :--- | +| **weightedset** | Memory-only operation when updating: read full set, update, write back. Make the update as inexpensive as possible using numeric types instead of strings, where possible Example: a weighted set of string with many (1000+) elements. Adding an element to the set means an enum store lookup/add and add/sort of the attribute multivalue map - details in [attributes](/en/content/attributes). Use a numeric type instead to speed this up - this has no string comparisons. | +| **array/map of struct/map** | Update to array of struct/map and map of struct/map requires a read from the [document store](/en/content/proton#document-store) and will reduce update rate - see [#10892](https://github.com/vespa-engine/vespa/issues/10892). | +| **tensor** | Updating tensor cell values is a memory-only operation: copy tensor, update, write back. For large tensors, this implicates reading and writing a large chunk of memory for single cell updates. | + +## Parent/child + +[Parent documents](/en/schemas/parent-child) are global, i.e. has a replica on all nodes. Writing to fields in parent documents often simplify logic, compared to the de-normalized case where all (child) documents are updated. Write performance depends on the average number of child documents vs number of nodes in the cluster - examples: + +- 10-node cluster, avg number of children=100, redundancy=2: A parent write means 10 writes, compared to 200 writes, or 20x better +- 50-node cluster, avg number of children=10, redundancy=2: A parent write means 50 writes, compared to 20 writes, or 2.5x worse + +Hence, the more children, the better performance effect for parent writes. + +## Conditional updates + +A conditional update looks like: + +```json +{ + "update" : "id:namespace:myDoc::1", + "condition" : "myDoc.myField == \"abc\"", + "fields" : { "myTimestamp" : { "assign" : 1570187817 } } +} +``` + +If the [document store](/en/content/proton#document-store) is accessed when evaluating the condition, performance drops significantly because you get random access instead of just appending to the persisted data structures. Conditions should be evaluated using attribute values for high performance - in the example above, *myField* should be an attribute. + + +**Note:** + +If the condition uses struct or map, values are read from the document store: + + +```bash + "condition" : "myDoc.myMap{1} == 3" +``` + +This is true even though all struct fields are defined as attribute. Improvements to this is tracked in [#10892](https://github.com/vespa-engine/vespa/issues/10892). + +## Client roundtrips + +Consider the difference when sending two fields assignments to the same document: + +```json +{ + "update" : "id:namespace:doctype::1", + "fields" : { + "myMap{1}" : { "assign" : { "timestamp" : 1570187817 } } + "myMap{2}" : { "assign" : { "timestamp" : 1570187818 } } + } +} +``` + +vs. + +```json +{ + "update" : "id:namespace:doctype::1", + "fields" : { + "myMap{1}" : { "assign" : { "timestamp" : 1570187817 } } + } +} +{ + "update" : "id:namespace:doctype::1", + "fields" : { + "myMap{2}" : { "assign" : { "timestamp" : 1570187818 } } + } +} +``` + +In the first case, *one* update operation is sent from [vespa feed](/en/clients/vespa-cli) - in the latter, the client will send the second update operation *after* receiving an ack for the first. When updating multiple fields, put the updates in as few operations as possible. See [ordering details](/en/content/content-nodes#ordering). + +## Feed vs. search + +A content node normally has a fixed set of resources (CPU, memory, disk). Configure the CPU allocation for feeding vs. searching in [concurrency](/en/reference/applications/services/content#feeding) - value from 0 to 1.0 - a higher value means more CPU resources for feeding. + +In addition, you can also control priority of feed versus search, or rather how nice feeding shall be. Since a process needs root privileges for increasing feed, we have opted to reduce priority (be nice) of feeding. This is controlled by a [niceness](/en/reference/applications/services/content#feeding-niceness) number from 0 to 1.0 - higher value will favor search over feed. 0 is default. + +## Feed testing + +When testing for feeding capacity: + + + +Use [vespa feed](/en/clients/vespa-cli). + + +Test using one content node to find its capacity and where the bottlenecks are (resource utilization metrics) and Vespa metrics + + +Test feeding performance by adding feeder instances. Make sure network and CPU (content and container node) usage increases, until saturation. + + +See troubleshooting at end to make sure there are no errors. + + + +Other scenarios: Feed testing for capacity for sustained load in a system in steady state, during state changes, during query load. + +## Troubleshooting + + +**Note:** + +Use the [monitoring sample app](/en/operations/self-managed/monitoring#monitoring-with-grafana) to set up a sample system, with a document/query feed and dashboards, to familiarize with metrics. + + +||| +| :--- | :--- | +| **Metrics** | Use [metrics](/en/reference/operations/metrics/vespa-metric-set#storage-metrics) from content nodes and look at queues - queue wait time and queue size (all metrics in milliseconds):

`vds.filestor.averagequeuewait.sum`
`vds.filestor.queuesize`

Check content node metrics across all nodes to see if there are any outliers. Also check latency metrics per operation type:

`vds.filestor.allthreads.put.latency`
`vds.filestor.allthreads.update.latency`
`vds.filestor.allthreads.remove.latency` | +| **Bottlenecks** | One of the [threads](#content-node-thread-pools) used to handle write operations might become the bottleneck during feeding. Look at the *.utilization* metrics for all thread pools:

`content.proton.documentdb.threading_service.master.utilization`
`content.proton.documentdb.threading_service.index.utilization`
`content.proton.documentdb.threading_service.summary.utilization`
`content.proton.executor.field_writer.utilization`
`content.proton.executor.shared.utilization`

If utilization is high for [field writer](#field-writer-executor) or [shared](#shared-executor), adjust [feeding concurrency](/en/reference/applications/services/content#feeding) to allow more CPU cores to be used for feeding.

For the field writer also look at the *.saturation* metric:

`content.proton.executor.field_writer.saturation`

If this is close to 1.0 and higher than *.utilization* it indicates that one of its worker threads is a bottleneck. The reason can be that this particular thread is handling a large index or attribute field that is naturally expensive to write and update. Use the [custom component state API](/en/content/proton#custom-component-state-api) to find which index and attribute fields are assigned to which thread (identified by *executor\_id*), and look at the detailed statistics of the field writer to find which thread is the actual bottleneck:

`state/v1/custom/component/documentdb/mydoctype/subdb/ready/index`
`state/v1/custom/component/documentdb/mydoctype/subdb/ready/attributewriter`
`state/v1/custom/component/threadpools/field_writer | +| **Failure rates** | Inspect these metrics for failures during load testing:

`vds.distributor.updates.latency`
`vds.distributor.updates.ok`
`vds.distributor.updates.failures.total`
`vds.distributor.puts.latency`
`vds.distributor.puts.ok`
`vds.distributor.puts.failures.total`
`vds.distributor.removes.latency`
`vds.distributor.removes.ok`
`vds.distributor.removes.failures.total` | +| **Blocked feeding** | This metric should be 0 - refer to [feed block](/en/writing/feed-block):

`content.proton.resource_usage.feeding_blocked` | +| **Concurrent mutations** | Multiple clients updating the same document concurrently will stall writes:

`vds.distributor.updates.failures.concurrent_mutations`

Mutating client operations towards a given document ID are sequenced on the [distributors](/en/content/content-nodes#distributor). If an operation is already active towards a document, a subsequently arriving one will be bounced back to the client with a transient failure code. Usually this happens when users send feed from multiple clients concurrently without synchronisation. Note that feed operations sent by a single client are sequenced client-side, so this should not be observed with a single client only. Bounced operations are never sent on to the backends and should not cause elevated latencies there, although the client will observe higher latencies due to automatic retries with back-off. | +| **Wrong distribution** | `vds.distributor.updates.failures.wrongdistributor`

Indicates that clients keep sending to the wrong distributor. Normally this happens infrequently (but is *does* happen on client startup or distributor state transitions), as clients update and cache all state required to route directly to the correct distributor (Vespa uses a deterministic CRUSH-based algorithmic distribution). Some potential reasons for this:

1. Clients are being constantly re-created with no cached state.
2. The system is in some kind of flux where the underlying state keeps changing constantly.
3. The client distribution policy has received so many errors that it throws away its cached state to start with a clean slate to e.g. avoid the case where it only has cached information for the bad side of a network partition.
4. The system has somehow failed to converge to a shared cluster state, causing parts of the cluster to have a different idea of the correct state than others. | +| **Cluster out of sync** | *update\_puts/gets* indicate "two-phase" updates:

`vds.distributor.update_puts.latency`
`vds.distributor.update_puts.ok`
`vds.distributor.update_gets.latency`
`vds.distributor.update_gets.ok`
`vds.distributor.update_gets.failures.total`
`vds.distributor.update_gets.failures.notfound`

If replicas are out of sync, updates cannot be applied directly on the replica nodes as they risk ending up with diverging state. In this case, Vespa performs an explicit read-consolidate-write (write repair) operation on the distributors. This is usually a lot slower than the regular update path because it doesn't happen in parallel. It also happens in the write-path of other operations, so risks blocking these if the updates are expensive in terms of CPU. Replicas being out of sync is by definition not the expected steady state of the system. For example, replica divergence can happen if one or more replica nodes are unable to process or persist operations. Track (pending) merges:

`vds.idealstate.buckets`
`vds.idealstate.merge_bucket.pending`
`vds.idealstate.merge_bucket.done_ok`
`vds.idealstate.merge_bucket.done_failed`| diff --git a/mintlify-docs/en/performance/sizing-search.mdx b/mintlify-docs/en/performance/sizing-search.mdx new file mode 100644 index 0000000000..42683d15b7 --- /dev/null +++ b/mintlify-docs/en/performance/sizing-search.mdx @@ -0,0 +1,343 @@ +--- +title: "Vespa Serving Scaling Guide" +sidebarTitle: "Serving sizing guide" +--- + +*Vespa can scale in multiple scaling dimensions:* + +- Scale document volume and write volume +- Scale query throughput +- Scale serving latency to meet service level agreements (SLA) + +The question one tries to answer during a sizing exercise is: *What the total cost would be to serve a use case using Vespa?*. + +This document helps sizing an application correctly with as low cost as possible. Vespa is used to implement many use cases, and this document is relevant for all of them: + +- Serving a [text ranking](/en/ranking/nativerank) use case or a [recommendation](/en/learn/tutorials/news-1-deploy-an-application) use case +- Serving a machine learned model, e.g., an [ONNX](/en/ranking/onnx), [XGBoost](/en/ranking/xgboost), or [LightGBM](/en/ranking/lightgbm) model + +With Vespa, it is possible to do benchmarking on a few nodes to infer the overall performance and cost of the chosen deployment architecture, and as Vespa supports [live resizing](/en/content/elasticity), it is easy to scale from a prototype to a full production size deployment. + +This document covers sizing and capacity planning for serving, see [feed performance sizing](/en/performance/sizing-feeding) for feed performance sizing and [Vespa serving feature tuning](/en/performance/feature-tuning). It also covers the following topics: +- [Data distribution](#data-distribution) in Vespa and how it impacts serving +- [Scaling Serving Latency and Throughput](#content-cluster-scalability-model) in Vespa +- [Scaling Data Volume](#scaling-document-volume-per-node) in Vespa + +## Data distribution in Vespa - flat versus grouped + +The basic element in the Vespa search architecture is a content node, which is part of a content cluster. A Vespa deployment can have several content clusters, which can be scaled independently. + +A content node holds a fraction of the entire data corpus. Data is distributed to nodes using a [distribution algorithm](/en/content/idealstate), which goal is to uniformly distribute data over the set of nodes. The goal is also to avoid distribution skew, while at the same time supporting re-distribution of data, with minimal data movement, if the size of the content cluster changes. Read [content cluster elasticity](/en/content/elasticity) to learn how data is distributed across nodes, and how adding or removing nodes works. See also [Vespa's consistency model](/en/content/consistency) documentation. + +### Flat content distribution + +![Flat content distribution](/assets/img/flat-content-distribution.svg) + +With a flat distribution, the content is distributed to content nodes using the [ideal state distribution algorithm](/en/content/idealstate). A query is dispatched in parallel from a container instance to **all** content nodes in the content cluster. Each content node searches the *active* part of the *ready* sub-database. The above figure illustrates a deployment using 4 nodes with *redundancy* 2 and *searchable-copies* 2 - see the [availability](#high-data-availability) section. + +When using flat data distribution, the only way to scale query throughput is to reduce the search latency. Given a fixed occupancy (users, load clients), this relationship between query throughput and latency is described by [Little's law](https://en.wikipedia.org/wiki/Little%27s_law) - more on this in [content cluster scalability model](#content-cluster-scalability-model) section. + +### Grouped content distribution + +![Grouped content distribution](/assets/img/grouped-content-distribution.svg) + +With a grouped distribution, content is distributed to a configured set of *groups*, such that the entire document collection is contained in each group. A *group* contains a set of content nodes where the content is distributed using the [distribution algorithm](/en/content/idealstate). In the above illustration, there are 4 nodes in total, 2 groups with 2 nodes in each group. *redundancy* is 2 and *searchable-copies* is also 2. As can be seen from the figure with this grouped configuration, the content nodes only have a populated ready sub-database. A query is dispatched in parallel to all nodes in **one group** at a time using a [dispatch-policy](/en/reference/applications/services/content#dispatch-policy). The default policy is *adaptive*, which load balances over the set of groups, aiming at even latency. + +### High Data Availability + +Ideally, the data is available and searchable at all times, even during node failures. High availability costs resources due to data replication. How many replicas of the data to configure depends on what kind of availability guarantees the deployment should provide. Configure availability vs cost: + +||| +| :--- | :--- | +| [redundancy](/en/reference/applications/services/content#redundancy) | Defines the total number of copies of each piece of data the cluster will store and maintain to avoid data loss. Example: with a redundancy of 2, the system tolerates 1 node failure before any further node failures may cause data to become unavailable. | +| [searchable-copies](/en/reference/applications/services/content#searchable-copies) | Configures how many of the copies (as configured with *redundancy*) to be indexed (*ready*) at any time. Configuring *searchable-copies* to be less than *redundancy* saves resources (memory, disk, cpu), as not all copies are indexed (*ready*). In case of node failure, the remaining nodes needs to index the *not ready* documents which belonged to the failed node. In this transition period, the search has reduced search coverage. | + +### Content node database + +![Content node databases](/assets/img/proton-databases.svg) + +The above figure illustrates the three [sub-databases](/en/content/proton#sub-databases) inside a Vespa content node. + +- The documents in the **Ready** DB are indexed, but only the documents in **Active** state are searchable. In a flat distributed system there is only one active instance of the same document, while with grouped distribution there is one active instance per group. +- The documents in the **Not Ready** DB are stored but not indexed. +- The documents in the **Removed** DB are stored but blocklisted, hidden from search. The documents are permanently deleted from storage by [Proton maintenance jobs](/en/content/proton#proton-maintenance-jobs). + +If the availability guarantees tolerate temporary search coverage loss during node failures (e.g. *searchable-copies*\=1), this is by far the most optimal for serving performance - the query work per node is less, as index structures like posting lists are smaller. The index structures only contains documents in *Active* state, not including *Not Active* documents. + +With *searchable-copies*\=2 and *redundancy*\=2, each replica is fully indexed on separate content nodes. Only the documents in *Active* state are searchable, the posting lists for a given term are (up to) doubled as compared to *searchable-copies*\=1. + +## Life of a query in Vespa + +Find an overview in [query execution](/en/querying/query-api#query-execution): + +![Query execution - from query to response](/assets/img/query-to-response.svg) + +Vespa executes a query in two protocol phases (or more if using [result grouping features](/en/querying/grouping)) to optimize the network footprint of the parallel query execution. The first protocol phase executes the query in parallel over content nodes in a group to find the global top hits, the second protocol phase fetches the data of the global top hits. + +During the first phase, content nodes match and [rank](/en/basics/ranking) documents using the selected rank-profile/model. The hits are returned to the stateless container for merging and potentially blending when multiple content clusters are involved. + +When the global top ranking documents are found, the second protocol phase fetch the summary data for the global best hits (e.g. summary snippets, the original field contents, and ranking features). By doing the query in two protocol phases one avoids transferring summary data for hits which will not make it into the global best hits. + +Components Involved in query execution: + +- **Container** + - Parses the [API](/en/querying/query-api) request and the [query](/en/querying/query-language) and run time context features. + - Modifies the query according to the schema specification (stemming, etc.) for a text search application or creating run time query or user context tensors for an ML serving application. + - Invokes chains of custom [container components/plugins](/en/applications/components) which can work on the request and query input and also the results. + - Dispatching of query to content nodes in the content cluster(s) for parallel execution. With flat distribution queries are dispatched to all content nodes, while with a grouped distribution the query is dispatched to all content nodes within a group and the queries are load-balanced between the groups using a [dispatch-policy](/en/reference/applications/services/content#dispatch-policy). + - Blending of global top ranking results from cluster(s). + - Fetching the top ranking results with document summaries from cluster(s). + - Result processing and possible top-k re-ranking and finally rendering of results back to client. +- **Content node (Proton)** + - Finding all documents matching the [query specification](/en/querying/query-api). For an ML serving use case, the selection might be a subset of the content pool (e.g. limit the model to only be evaluated for content-type video documents), while for a text ranking application it might be a [WAND](/en/ranking/wand) text matching query. + - Calculating the score (which might be a text ranking relevancy score or the inferred score of a Machine Learned model) of each hit, using the chosen rank-profile. See [ranking with Vespa](/en/basics/ranking). + - Aggregating information over all the generated hits using [result grouping](/en/querying/grouping). + - Sorting hits on relevancy score (text ranking) or inference score (e.g. ML model serving), or on attribute(s). See *max-hits-per-partition* and *top-k-probability* in [dispatch tuning](/en/reference/applications/services/content#dispatch-tuning) for how to tune how many hits to return. + - Processing and returning the document summaries of the selected top hits (during summary fetch phase after merging and blending has happened on levels above). + +The detailed execution inside Proton during the matching and ranking first protocol phase is: + + + +Build up the query tree from the serialized network representation. + + +Lookup the query terms in the index and B-tree dictionaries and estimate the number of hits each term and parts of the query tree will produce. Terms which search attribute fields without [fast-search](/en/content/attributes#fast-search) will be given a hit count estimate to the total number of documents. + + +Optimize and re-arrange the query tree for most efficient performance, trying to move terms or operators with the lowest hit ratio estimate first in the query tree. + + +Prepare for query execution, by fetching posting lists from the index and B-tree structures. + + +Multithreaded execution per search starts using the above information. Each thread will do its own thread local setup. + + +Each search thread will evaluate the query over its document space. + + +The search threads complete first phase and agree which hits will continue to second phase ranking (if enabled per the used rank-profile). The threads operate over a shared heap with the global top ranking hits. + + +Each thread will the complete second phase and grouping/aggregation/sorting. + + +Merge all threads results and return up to the container. + + + +[Container](/en/applications/components) clusters are stateless and easy to scale horizontally, and don't require any data distribution during re-sizing. The set of stateful content nodes can be scaled independently and [re-sized](/en/content/elasticity) which requires re-distribution of data. Re-distribution of data in Vespa is supported and designed to be done without significant serving impact. Altering the number of nodes or groups in a Vespa content cluster does not require re-feeding of the corpus, so it's easy to start out with a sample prototype and scale it to production scale workloads. + +## Content cluster scalability model + +Vespa is a parallel computing platform where the work of matching and ranking is parallelized across a set of nodes and processors. The speedup one can get by altering the number of nodes in a Vespa content group follows [Amdahl's law](https://en.wikipedia.org/wiki/Amdahl%27s_law), which is a formula used to find the maximum improvement possible by improving a particular part of a system. In parallel computing, *Amdahl's law* is mainly used to predict the theoretical maximum speedup for program processing using multiple processors. In Vespa, as in any parallel computing system, there is work which can be parallelized and work which cannot. The relationship between these two work types determine how to best scale the system, using a flat or grouped distribution. + +||| +| :--- | :--- | +| **static query work** | Portion of the query work on a content node that does not depend on the number of documents indexed on the node. This is an administrative overhead caused by system design and abstractions, e.g. number of memory allocations per query term. Typically, a large query tree means higher static work, and this work cannot be parallelized over multiple processors, threads or nodes. The static query work portion is described in step 1 to 4 and step 9 in the detailed life of a query explanation above. | +| **dynamic query work** | Portion of the query work on a content node that depends on the number of documents indexed and active on the node. This portion of the work scales mostly linearly with the number of matched documents. The dynamic query work can be parallelized over multiple processors and nodes. Referenced later as *DQW*. The *DQW* also depends on the phase two protocol summary fill where the actual contents of the global best documents is fetched from the content nodes which produced the hit in the first protocol phase. | +| **Total query work** | The total query work is given as the dynamic query work (*DQW*) + static query work (*SQW*). | + +Adding content nodes to a content cluster (keeping the total document volume fixed) with flat distribution reduces the dynamic query work per node (*DQW*), but does not reduce the static query work (*SQW*). The overall system cost also increases as you need to rent another node. + +Since *DQW* depends and scales almost linearly with the number of documents on the content nodes, one can try to distribute the work over more nodes. *Amdahl's law* specifies that the maximum speedup one achieve by parallelizing the dynamic work (*DQW*) is given by the formula: + + +$$ +\text{max\_speedup}_{\text{group}} = \frac{1}{1 - \frac{DQW}{SQW + DQW}} +$$ + +For example, if one by inspecting [metrics](#metrics-for-vespa-sizing) see that the *DQW* = 0.50, the maximum speedup one can get by increasing parallelism by using more nodes and decreasing *DQW* is 2. With fixed occupancy (number of users, clients or load), [Little's Law'](https://en.wikipedia.org/wiki/Little%27s_law) tells us that one could achieve two times the throughput if one is able to speed up the latency by a factor of two: + + +$$ +\frac{1}{1 - \frac{0.5}{0.5 + 0.5}} = 2 +$$ + + +When *SQW* is no longer significantly less than *DQW*, adding more nodes in a flat distributed cluster just increases the overall system cost. This without any serving performance gain, except increasing overall supported feed throughput, which increases almost linearly with number of nodes. + +Two different *DQW/(DQW+SQW)* factors are illustrated in the figures below. The overall query work *TQW* is the same for both cases (10 ms), but the *DQW* portion of the *TQW* is different. The throughput (QPS) is a function of the latency ([Little's Law](https://en.wikipedia.org/wiki/Little%27s_law)) and the number of cpu cores \* nodes. Using 1 node with 24 v-cpu cores and 10 ms service time (*TQW*), one can expect reaching close to 2400 QPS at 100% utilization (unless there are other bottlenecks like network or stateless container processing). + +![Scaling throughput/latency where DQW/(SQW+DQW)=0.5](/assets/img/ScalingLatencyFactor0.5.svg) +![Scaling throughput/latency where DQW/(SQW+DQW)=0.5](/assets/img/ScalingLatencyFactor0.005.svg) + +In the first figure the overall latency is 10 ms, but the dynamic query work (latency) is only 50% and given *Amdahl's law* it follows that the maximum speedup one can get is two. This is true regardless of how many processors or nodes the dynamic query work is parallelized over. No matter how many nodes one adds, one don't get above 4800 queries/s. The only thing one achieve by adding more nodes is increasing the cost without *any* performance benefits. + +In the second figure there is a system where the dynamic work portion is much higher (0.9), and the theoretical maximum speedup becomes bound by 10x as given by *Amdahl's law*. Note that both figures are with a single flat distributed content cluster with a fixed document volume. + +Given the theory, one can derive two rules of thumb for scaling throughput and latency: + +||| +| :--- | :--- | +| **Add nodes in a flat distribution** | When DQW/TQW is large (close to 1.0), throughput QPS can be scaled by just adding more content nodes in a system using flat distribution. This will reduce the number of documents per node, and thus reduce the *DQW* per node. | +| **Add groups using grouped distribution** | When DQW/TQW is low, one can no longer just add more content nodes to scale throughput and must instead use a grouped distribution to scale throughput. | + +## Scaling latency in a content group + +Irrespective of using single group (flat distribution) or multiple groups, the serving latency depends on the factors already described; *DQW* and *SQW*. For use cases where *DQW* dominates the total query work *TQW*, one can effectively scale latency down by parallelizing the *DQW* over more nodes per group. + +It is important to decide on a latency service level agreement (SLA) before sizing the Vespa deployment for the application and query features. A latency SLA is often specified as a latency percentile at a certain throughput level - example: + +- SLA Example 1: 95.0 percentile < 100 ms @ 2000 QPS +- SLA Example 2: 95.0 percentile < 40 ms @ 8000 QPS + +Different use cases might have different performance characteristics, depending on how the dynamic work query portion is compared to the static query work portion. This graph illustrates the relationship between overall latency versus number of documents indexed per node for two different use cases. + +![Latency vs document count per node](/assets/img/latency-documents.svg) + +- For the yellow use case the measured latency is almost independent of the total document volume. This is called sublinear latency scaling, which calls for scaling up using better flavor specification instead of scaling out. + The observed latency at 10M documents per node is almost the same as with 1M documents per node. Such a use case would be most cost-effective by storing as many documents as possible (within the memory/disk/feeding constrains set by the concurrency settings and node flavor) and scale throughput by using a grouped distribution. Efficient query operators which are sublinear has scaling properties like the yellow case. Example of such query operators include the [wand operators](/en/ranking/wand), and [approximate nearest neighbor search operator](/en/querying/approximate-nn-hnsw) +- For the blue use case the measured latency shows a clear correlation with the document volume. This is a case where the dynamic query work portion is high, and adding nodes to the flat group will reduce the serving latency. The sweet spot is found where targeted latency SLA is achieved. This sweet spot depends on which model or ranking features are in use, e.g. how expensive the model is per retrieved or ranked document. + For example, a [GBDT xgboost model](/en/ranking/xgboost) with 3000 trees might breach the targeted latency SLA already at 200K documents, while a 300 tree model might be below the SLA at 2M documents. Using exact [nearest neighbor search](/en/querying/nearest-neighbor-search) has scaling properties like the blue case. See also [feature tuning](/en/performance/feature-tuning). + +### Reduce latency with multithreaded per-search execution + +It is possible to reduce latency of queries where the [dynamic query work](#dynamic-query-work) portion is high. Using multiple threads per search for a use case where the static query work is high will be as wasteful as adding nodes to a flat distribution. + +![Content node search latency vs threads per search](/assets/img/Threads-per-search.svg) + +Using more threads per search will reduce latency as long as the dynamic portion of the query cost is high compared to the static query cost. The reduction in latency comes with the cost of higher cpu utilization. + +A search request with four threads will occupy all four threads until the last thread has completed, and the intra-node per thread document space partitioning must be balanced to give optimal results. + +For rank profiles with second phase ranking, see [phased ranking](/en/ranking/phased-ranking), the hits from first-phase ranking are rebalanced so that each matching thread scores about the same amount of hits using the second phase ranking expression. + +From the above examples with the blue and yellow use case it follows that + +- Linear exact nearest neighbor search latency can be reduced by using more threads per search +- Sublinear approximate nearest neighbor search latency does not benefit from using more threads per search + +By default the number of threads per search is one, as that gives the best resource usage measured as CPU resources used per query. The optimal threads per search depends on the query use case, and should be evaluated by benchmarking. + +The threads per search settings globally is tuned by [persearch](/en/reference/applications/services/content#requestthreads-persearch). This can be overridden to a lower value in [rank profiles](/en/reference/schemas/schemas#num-threads-per-search) so that different query use cases can use different number of threads per search. Using multiple threads per search allows better utilization of multicore cpu architectures for low query volume applications. The `persearch` number in services should always be equal to the highest `num-threads-per-search` in your rank profiles. Setting it higher reduces the maximum number of concurrent queries without any latency benefit. + +#### Thread configuration + +The `search` and `persearch` settings together determine the maximum number of queries that can execute concurrently on a content node: + +```text +max concurrent queries = search / persearch +``` + +For example, with `search=64` (the default) and `persearch=8`, only 8 queries can execute simultaneously. Queries arriving when all slots are busy are queued in the match engine, adding latency without any backpressure to the caller. This can significantly reduce throughput even when CPU cores are available. + +To get started, set `search` to be 2x number of CPU cores. `persearch` should never be more than number of cores. `summary` should be equal to number of cores. Start with the default value of 1 for `persearch` and only increase it for lower query latency, as it will reduce throughput and efficiency. + +If you increase `persearch`, consider increasing `search` proportionally to maintain enough concurrent query slots. For example, if you need `persearch=8` on a 16-core machine, setting `search=128` gives 16 concurrent query slots — enough that queries using fewer threads (via per-rank-profile [num-threads-per-search](/en/reference/schemas/schemas#num-threads-per-search) overrides) are not starved of executor slots. + +Monitor `content.proton.executor.match.utilization` to detect when the match engine is saturated — a sustained value at or near 1.0 indicates all executor slots are busy and queries are queuing. A high `content.proton.executor.match.queuesize.max` relative to `search / persearch` confirms the bottleneck. + +## Scaling the size of the retrievable unit + +Retrieving units that are too small or too large can have a drastic impact on both the quality and performance of your search. Consider an example where we want to search in PDF files: Creating one document per PDF file seems like a logical solution, and with Vespa this is possible - up to a point. But as system architects we must consider the potential edge cases: some files may be entire books, or long reports with many hundreds or even thousands of pages. + +The current max document size is limited by the max protobuf message size of 2 GB, but we advise staying well below this limit, at least < 200MB and ideally < 1MB for even, predictable performance. For reference to scale, the complete text of the bible is about 4MB. + +*Split too-large documents into smaller units for better search quality and performance!* Natural subdivisions like chapters, parts or sections are good candidates for splitting into separate Vespa documents. + +### When documents are too large + +If each document is a complete PDF file and some are very large, what problems could we run into? + +**Usefulness of the result** - knowing that there are relevant parts *somewhere* in several hundred pages of text is not very helpful to the user. As of 2025, this is still true also if the "user" is an LLM in a RAG or agentic workflow. + +We can improve the usefulness by providing [dynamic snippets](/en/querying/document-summaries#dynamic-snippets) or returning per-chunk similarity scores as [feature values](/en/ranking/ranking-expressions-features#accessing-feature-function-values-in-results) to be able to identify the most relevant portions of the returned summary. + +**Performance** - Returning large document values in the query response over HTTP has a significant cost, both in CPU time spent in rendering the response, compression, and network transfer time. This can easily become the largest contribution to the total end-to-end latency. + +Large documents can also contribute to poor performance in indexing and query execution, or greatly increase the amount of temporary memory required for complex ranking expressions like multi-dimensional ColBert maxsim. As document are processed, indexed, stored and ranked as individual units, working on a few very large documents at a time may not offer the system enough opportunity to parallelize and result in poor, uneven utilization of resources, and even a small fraction of very large documents may impact your mean (and especially higher percentile) latencies both for processing and query execution. + +### When documents are too small + +What problems can occur if documents are very small? Consider indexing small fragments of text, like a single sentence or even a word. + +**Granularity** - As the size of text fragments decrease, we are less likely to find good matches for queries as the relevant terms or context is spread across multiple documents. The response may not contain enough information to resolve the user information need, or to even judge if it is likely to resolve the need if the source is examined in full. This problem is described both in [traditional information retrieval literature](https://nlp.stanford.edu/IR-book/html/htmledition/choosing-a-document-unit-1.html) and has also been a popular topic in recent years as "chunking" for semantic search. + +**Overhead** - splitting a document into very small pieces means that more resources will be spent on per-document overhead. Shared metadata like e.g. the abstract or access permissions of a document will be replicated many times, and updates/deletes to the source document or its metadata must fan-out to all the sub-documents, increasing write load. Unlike too-large documents, having a fraction of very small documents is fine, what matters for efficiency is that the average size is not too small. + +## Scaling document volume per node + +One want to fit as many documents as possible into a node given the node constrains (e.g. available cpu, memory, disk) while maintaining: + +- The targeted search latency SLA +- The targeted feed and update rate, and feed latency + +With the latency SLA in mind, benchmark with increasing number of documents per node and watch system level metrics and Vespa metrics. If latency is within the stated latency SLA and the system meets the targeted sustained feed rate, overall cost is reduced by fitting more documents into each node (e.g. by increasing memory, cpu and disk constraints set by the node flavor). + +With larger fan-out using more nodes to partition the data also overcomes higher tail latency as search waits for all results from all nodes. Therefore, the overall execution time depends on the slowest node at the time of the query. In such cases with large fan-out, using [adaptive timeout](/en/reference/applications/services/content#coverage) is recommended to keep tail latency in check. + +Vespa will block feed operations if [resource limits](/en/reference/applications/services/content#resource-limits) have been reached. + +### Disk usage sizing + +Disk usage of a content node increases as the document volume increases: The disk usage per document depends on various factors like the number of schemas, the number of indexed fields and their settings, and most important the size of the fields that are indexed and stored. The simplest way to determine the disk usage is to simply index documents and watch the disk usage along with the relevant metrics. The *redundancy* (number of copies) impact the disk usage footprint, obviously. + +Note that [content node maintenance jobs](/en/content/proton#proton-maintenance-jobs) temporarily increases disk usage. E.g. *index fusion* is an example, where new index files are written, causing an increase in used disk space while running. Space used depends on configuration and data - headroom must include the temporary usage. See [metrics for capacity planning](#metrics-for-vespa-sizing). + +### Memory usage sizing + +The memory usage on a content node increases as the document volume increases. The memory usage increases almost linearly with the number of documents. The vespa-proton-bin process (content node) uses the full 64-bit virtual address space, so the virtual memory usage reported might be high, as both index and summary files are mapped into memory using mmap and pages are paged into memory as needed. + +The memory usage per document depends on the number of fields, the raw size of the documents and how many of the fields are defined as [attributes](/en/content/attributes). Also see [metrics for capacity planning](#metrics-for-vespa-sizing). + +## Scaling Throughput + +As seen in the previous sections, when the static query work (*SQW*) becomes large, scale throughput using grouped distribution. Regardless, if throughput is scaled by grouped distribution for use cases with high static query work portion or a flat distribution for high dynamic query work portion, one should identify how much throughput the total system supports. + +Finding where the latency starts climbing exponentially versus throughput is important in order to make sure that the deployed system is scaled well below this throughput threshold. Also, that it has capacity to absorb load increases over time, as well as having sufficient capacity to sustain node outages during peak traffic. + +At some throughput level some resource(s) in the system will be fully saturated, and requests will be queued up causing latency to spike up, as requests are spending more time waiting for the saturated resource. + +This behaviour is illustrated in the figure below: + +![Latency vs throughput](/assets/img/QPS-scaling.svg) + +A small increase in serving latency is observed as throughput increases, until saturated at approximately 2200 QPS. Pushing more queries than this only increases queueing time, and latency increases sharply. + +## Scaling for failures and headroom + +It is important to also measure behaviour under non-ideal circumstances, to avoid getting too good results. E.g., by simulating node failures or node replacements, verifying feeding concurrency versus search and serving. + +Generally, the higher utilization a system has in production, the more fragile it becomes when changing query patterns or ranking models. + +The target system utilization should be kept sufficiently low for the response times to be reasonable and within latency SLA, even with some extra traffic occurring at peak hours. See also [graceful degradation](/en/performance/graceful-degradation). Also see [sizing write versus read](/en/performance/sizing-feeding#feed-vs-search). + +## Metrics for Vespa Sizing + +The relevant [Vespa Metrics](/en/reference/operations/metrics) for measuring the cost factors, in addition to system level metrics like cpu util, are: + +*Metric capturing static query work (SQW) at content nodes + +```text +content.proton.documentdb.matching.rank_profile.query_setup_time +``` + +*Metric capturing dynamic query work (DQW) at content nodes* + +```text +content.proton.documentdb.matching.rank_profile.query_latency +``` + +By sampling these metrics, one can calculate the theoretical speedup we can achieve by increasing number of nodes using flat distribution, by using *Amdahl's law*: + + +$$ +\text{max\_speedup} = \frac{1}{1 - \frac{\text{match\_time}}{\text{query\_setup\_time} + \text{match\_time}}} +$$ + + +In addition, the following metrics are used to find number of matches per query, per node: + +```text +content.proton.documentdb.matching.rank_profile.docs_matched +content.proton.documentdb.matching.rank_profile.queries +``` + +**Disk usage**: +- documentdb: *vespa.content.proton.documentdb.disk\_usage.last* +- transaction log: *vespa.content.proton.transactionlog.disk\_usage.last* + +**Memory usage**: +- documentdb: *vespa.content.proton.documentdb.memory\_usage.allocated\_bytes.last* diff --git a/mintlify-docs/en/performance/streaming-search.mdx b/mintlify-docs/en/performance/streaming-search.mdx new file mode 100644 index 0000000000..95e1b746d1 --- /dev/null +++ b/mintlify-docs/en/performance/streaming-search.mdx @@ -0,0 +1,180 @@ +--- +title: "Streaming Search" +--- + +Search engines make queries fast by creating indexes over the stored data. While the indexes cost extra resources to build and maintain, this is usually a good tradeoff because they make queries so much cheaper. However, this does not hold for use cases where the data is split into many small subsets where each query just searches one (or a few) of these subsets, the canonical example being *personal indexes* where a user only searches their own data. + +For such use cases, Vespa provides *streaming search* - a mode where only the raw data of the documents is [stored](/en/content/proton#document-store) and searches are implemented by streaming - no indexes required. In addition, attributes are also only stored on disk so that the only data needed in memory is 45 bytes per document, meaning that streaming mode lets you store billions of documents on each node. + +This is especially important in personal data applications using vector embeddings, which otherwise require a lot of memory and require ANN to perform well, which is often unsuited for searching personal data as they don't surface all the most relevant documents. + +Streaming mode is suitable when subsets are *on average* small compared to the entire corpus. Vespa delivers low query latency also for the occasional large subset (say, users with huge amounts of data) by automatically sharding such data groups over multiple content nodes, searched in parallel. + + +**Note:** + +Using both streaming and indexed mode in the same cluster is discouraged. The resource usage and performance characteristics for the two modes are very different, and it might be very hard to operate and get good performance for such a system. + + +## Differences in streaming search + +Streaming search uses the same implementation of most features in Vespa, including matching, ranking, grouping and sorting, and mostly supports the same features. A [schema](/en/basics/schemas) used in [indexed mode](/en/reference/applications/services/content#document) can in most cases be used in streaming search without any changes. The following differences however apply: + +- Streaming search does not use the [linguistics](/en/linguistics/linguistics) module while feeding documents. Instead, the string fields of each streamed document are [tokenized](/en/linguistics/linguistics-opennlp#tokenization) and [normalized](/en/linguistics/linguistics-opennlp#normalization) on the fly as part of performing a search. Query terms are [normalized](/en/linguistics/linguistics-opennlp#normalization) in the same way. [Stemming](/en/linguistics/linguistics-opennlp#stemming) is not supported for streaming search. +- Since there are no indexes, the content nodes do not collect term statistics and average field length statistics. + - Term significance should be provided by a [global significance model](/en/ranking/significance#global-significance-model), if [text matching features](/en/reference/ranking/rank-features) that benefit from it are used. This includes among others *[bm25](/en/ranking/bm25)*, *nativeRank*, *nativeFieldMatch*, *nativeProximity* and *fieldMatch*. + - If using *bm25*, adjust the [averageFieldLength](/en/reference/ranking/rank-feature-configuration#properties) configuration for a more precise *bm25* score. +- Even without any indexes, fields must be specified as [index](/en/reference/schemas/schemas#index) or [attribute](/en/reference/schemas/schemas#attribute) to make them available for matching, ranking, grouping and sorting. The associated default [match](/en/reference/schemas/schemas#match) setting for a field is equivalent to [indexed mode](/en/reference/applications/services/content#document). +- Streaming search supports a wider range of matching options (such as substring and prefix), and these can be specified either at query time or at configuration time. See [matching options](#matching-options-in-streaming-search) for details. +- [HNSW](/en/reference/schemas/schemas#index-hnsw) indexes are not supported in streaming search. This means a [nearest neighbor search](/en/querying/nearest-neighbor-search#using-nearest-neighbor-search) is always *exact* when used in streaming search. The following parameters for adjusting *approximate* nearest neighbor search thus have no effect: + - [post-filter-threshold](/en/reference/schemas/schemas#post-filter-threshold) + - [approximate-threshold](/en/reference/schemas/schemas#approximate-threshold) + - [filter-first-threshold](/en/reference/schemas/schemas#filter-first-threshold) + - [filter-first-exploration](/en/reference/schemas/schemas#filter-first-exploration) + - [exploration-slack](/en/reference/schemas/schemas#exploration-slack) + - [target-hits-max-adjustment-factor](/en/reference/schemas/schemas#target-hits-max-adjustment-factor) +- [Parent/child relationships](/en/schemas/parent-child) are not supported in streaming search. Using such functionality will fail [deployment](/en/learn/glossary#deployment). +- [Predicate fields](/en/schemas/predicate-fields) are not supported in streaming search. They can exist as summary only fields in the document, but they are not searchable. +- [URI-fields](/en/reference/schemas/schemas#uri) are not supported in streaming search. They are handled as regular string fields, and do not support the uri search functionality. +- [firstPhaseRank](/en/reference/ranking/rank-features#firstPhaseRank) rank feature always returns the default value in streaming search. + +## Using streaming search + +These are the steps required to use streaming search: + + + +Set indexing mode to [streaming](/en/reference/applications/services/content#document): + +```xml + + + +``` + + +Use [document IDs](/en/schemas/documents) which contains a *group* value specifying the small subset the document belongs to (usually a userid). These have the form `id:myNamespace:myType:**g=myUserid**:myLocalid` and when represented as paths in [document/v1](/en/writing/document-v1-api-guide) requests, `document/v1/myNamespace/myType/**group/myUserId**/myLocalId` + + +Specify the subset to search using the query parameter [streaming.groupname](/en/reference/api/query#streaming.groupname). + + + +See the [vector streaming search sample application](https://github.com/vespa-engine/sample-apps/tree/master/vector-streaming-search) for a complete example. + +## Enabling indexing statements in streaming search + + +**Important:** + +Since Vespa 8.287, this section can be disregarded - it is not necessary to add `` tags. The configuration is identical to using indexed mode. + + +Indexing statements are - as the name indicates - mostly used for indexing, and so they are not executed by default with streaming search. + +However, sometimes it is convenient to run indexing statements also when using streaming, for example to use the `embed` function to turn text into an embedding vector, as in + +```js +indexing: input myTextField | embed | attribute +``` + +Indexing statements are run by a document processor, so to enable them with streaming, enable document processing enabled on a container cluster and point to it as the one to do indexing processing from the content cluster: + +```xml + + + ... + + ... + + + + ... + + + + + ... + + +``` + +## Matching options in streaming search + +Streaming search offers more flexibility in matching text fields: Match settings can be specified at query time on any text field, and fields marked with `indexing: index` supports [suffix](/en/reference/schemas/schemas#suffix) and [substring](/en/reference/schemas/schemas#substring) matching. + +To specify match settings at query time in YQL: + +```text +select * from sources * where artist contains ({prefix:true}"col") +select * from sources * where artist contains ({substring:true}"old") +select * from sources * where artist contains ({suffix:true}"play") +``` + +To specify a default match setting for a field in the schema: + +```js +field artist type string { + indexing: summary | index + match: substring +} +``` + +## Streaming search grouping extension + +[Grouping](/en/querying/grouping) works as normal with streaming search but offers two additional features, explained here. + +### Grouping over all documents + +Since streaming search "looks at" all documents matching the group name/selection regardless of the query, it is possible to group over all those documents and not just the ones matching the query. This is done by using `where(true)` in the grouping expression: + +```text +all( where(true) all(group(myfield) each(output(count()))) ) +``` + +When doing this, relevancy is not calculated for groups, as only matched hits have relevance. + +### The docidnsspecific function + +The `docidnsspecific` function returns the docid without namespace. + +```text +all( group(docidnsspecific()) each(output(count())) ) +``` + +## Resource usage with streaming search + +**Memory**: Streaming search requires 45 bytes of memory per document regardless of the document content. + +**Disk**: Streaming search requires disk space to store the raw document data in compressed form. The size is dependent on the actual data but can be extrapolated linearly with the number of documents. + +## Query tuning in streaming search + +Streaming search is a [visit](/en/writing/visiting) operation. Parallelism is configured using [persistence-threads](/en/reference/applications/services/content#persistence-threads): + +```xml + + +``` + +On [Vespa Cloud](/), this number is set automatically to match the number of VCPUs set in [resources](/en/reference/applications/services/services). If you cannot get lower latency by increasing VCPUs, it means your streaming searches have become IO bound. + +### Tuning document store: Direct IO and cache + +For better control of memory usage, use direct IO for reads when [document store cache](/en/reference/applications/services/content#summary) is enabled - this makes the OS buffer cache size smaller and more predictable performance. The document store cache will cache recent entries and increase performance for users or groups doing repeated accesses. This sets aside 1 GB for document store cache. + +```xml highlight= {7-8, 11-13} + + + + + + + directio + directio + + + + 1073741824 + +``` diff --git a/mintlify-docs/en/performance/topology-and-resizing.mdx b/mintlify-docs/en/performance/topology-and-resizing.mdx new file mode 100644 index 0000000000..483b263c20 --- /dev/null +++ b/mintlify-docs/en/performance/topology-and-resizing.mdx @@ -0,0 +1,127 @@ +--- +title: "Topology and Resizing" +sidebarTitle: "Topology and resizing" +--- + +Vespa has features to optimize cost, query latency, and throughput, at the same time, making tradeoffs for availability. This guide goes through various topologies by example, highlighting the most relevant tradeoffs and discusses operational events like node stop and changing the topology. + +Use cases for using a grouped topology is found in the [elasticity](/en/content/elasticity#grouped-distribution) guide. E.g., query latency can dictate the maximum number of documents per node and hence how many nodes are needed in a group - if query latency is at the maximum tolerated for 1M documents, 6 nodes are needed in a group for a 6M index. + + +**Note:** + +Vespa Cloud supports a one-level grouped topology - a group of groups is hence not supported. + + +Content nodes are stateful, holding replicas of the documents to be queried. Content nodes can be deployed in different topologies - example using 6 nodes: + +![4 different topologies](/assets/img/grouped-topology.svg) + +Vespa Cloud requires a redundancy of at least 2. In this guide, it is assumed that redundancy, configured as [min-redundancy](/en/reference/applications/services/content#min-redundancy), is set to n=3. Redundancy is a function of data availability / criticality and cost, and varies from application to application. + +Redundancy is for storing a document replica on a node. Not all replicas are searchable - read [Proton](/en/content/proton) for a detailed understanding of sub-databases. + +## Out of the box: 1x6 + +Most applications should be configured without a grouped topology, until optimizing for a use case - see the elasticity guide linked above. Therefore, start with a *flat* configuration, like: + + +![1x6](/assets/img/1x6.svg) + +```xml +3 + + + +``` + + +This means the corpus is spread over 6 nodes, with 17% of documents active in queries each. This topology is called 1x6 in this guide. + +This is important to remember when benchmarking for latency, normally done on a single node with n=1. In the 6-node system with n=3, more memory and disk space are used for the redundant replicas - more on that later. + +This topology is the default topology, and works great: + +- When a node is stopped (unplanned, or planned like a software upgrade), there are 5 other nodes to serve queries, where each of the 5 will have 1/5 larger corpus to serve +- Adding capacity, say 17% is done by increasing the node count to 7 + + +**Note:** + +This topology is the default, and what most applications should start with. + + +## 3-row topology: 3x2 + +Some applications, particularly the ones with extreme low-latency serving, will find that queries are dominated by the static part of query execution. This means that reducing the number of documents queried does not lower latency. + +The flip side is, increasing document count does not increase the latency much, either - consider 3x2: + + +![3x2](/assets/img/3x2.svg) + +```xml +3 + + + +``` + + +Here we have configured 3 groups, with n=3. This means the other node in the row does not have a replica - redundancy is between the rows. + +Each node now has 3x the number of documents per query (compared to 1x6), but query capacity is also tripled, as each row has the full document corpus. This can be a great way to scale query throughput! Notes: + +- At planned/unplanned node stop, the full row is eliminated from query serving - there are four nodes total left, in two rows. Query capacity is hence down to 67%. +- Feeding requirements are the same as in 1x6 - every document write is written to 3 replicas. +- Document reconciliation is independent of topology - replicas from all nodes are used when rebuilding nodes after a node stop. + +## 6-row topology: 6x1 + +Maximizing the number of documents per node is good for cases where the query latency is still within requirements, and less total work is done, as fewer nodes in a row calculate candidates in ranking. The extreme case is all documents on a single node replicated with 6 groups. This is a quite common configuration due to high throughput and simplicity: + + +![6x1](/assets/img/6x1.svg) + +```xml +6 + + + +``` + + +Notes: + +- Feeding *total work* is higher - with n=6, six replicas are written (compared to three above). See [feeding latency](#feeding) notes below. + +## 2-row topology: 2x3 + +In this case, the application has a redundancy of 2 - it must be the same as the number of rows: + + +![2x3](/assets/img/2x3.svg) + +```xml +2 + + + +``` + + +By default, when a node stops (and it does daily for Vespa upgrades), the full row stops serving, which is 50% of the capacity out. + +When using this topology, we recommend setting [min-active-docs-coverage](/en/reference/applications/services/content#min-active-docs-coverage) to a lower number than the default 97%. E.g., if there are three nodes in a group, each node is 33.3%, and a node loss gives 66.7% coverage. Setting min-active-docs-coverage to 65 means that *more than one node* must stop for the group (row) to stop serving, which is normally what one would want in this topology. + +## Topology migration + +Migrating from one topology to another is easy, as Vespa Cloud will auto-migrate documents: + +- All rows must have same node count, meaning `count / groups` must be an integer. +- When changing topology, Vespa Cloud will provision new nodes as needed to ensure no coverage loss. An increased node count is hence normal in the transition phase, superfluous nodes are de-provisioned after data migration. +- Topology migration is therefore a safe operation and makes it easy to optimize for the best price/performance. + +## Feeding + +Documents are fed to Vespa Cloud using the [``](/en/reference/api/document-v1#configuration) endpoint. This means one Vespa Container node forwards document writes to all the replicas in parallel. As all groups have a replica, adding a group will not add feed *latency* in theory due to the parallelism. However, there will be an increase in practice as more nodes mean more latency variation, and the slowest node sets the end latency. \ No newline at end of file diff --git a/mintlify-docs/en/performance/valgrind.mdx b/mintlify-docs/en/performance/valgrind.mdx new file mode 100644 index 0000000000..018f02b033 --- /dev/null +++ b/mintlify-docs/en/performance/valgrind.mdx @@ -0,0 +1,59 @@ +--- +title: "Using Valgrind with Vespa" +sidebarTitle: "Valgrind" +--- + +Valgrind is a useful tool to investigate bugs, and to get a detailed performance profile of an application after [profiling](/en/performance/profiling) to get the higher level picture. This documents how to run Vespa processes with valgrind. + +## Valgrind with callgrind + +Install valgrind. One might need to enable world writeable `$VESPA_HOME`: + +```bash +$ sudo chmod 777 $VESPA_HOME +``` + +Keep in mind to reset that after profiling session is completed. General use of valgrind - show memory errors: + +```bash +$ valgrind 'application' +``` + +Show call graph: + +```bash +$ valgrind --tool=callgrind 'application' +``` + +Show a detailed profiling graph - use this to optimize the application: + +```bash +$ valgrind --tool=callgrind --simulate-hwpref=yes --simulate-cache=yes \ + --dump-instr=yes --collect-jumps=yes 'application' +``` + +After running valgrind, copy *callgrind.out.\** to a host that has *kcachegrind* installed. Also copy the binary to the same path as it had while running. It might also be nice to have access to the code - path to code can be specified in kcachegrind. + +## Start Vespa using valgrind + +Start Vespa with the following environment variables set: + +```bash +$ VESPA_USE_VALGRIND="vespa-proton" +``` + +Run Vespa under valgrind and check for memory errors (logs in `$VESPA_HOME/tmp/`): + +```bash +$ VESPA_USE_VALGRIND="vespa-proton" VESPA_VALGRIND_OPT="--tool=callgrind --simulate-hwpref=yes \ + --simulate-cache=yes --dump-instr=yes --collect-jumps=yes" +``` + +Profile the application: + +```bash +$ VESPA_USE_VALGRIND="vespa-proton" VESPA_VALGRIND_OPT="--tool=callgrind --simulate-hwpref=yes \ + --simulate-wb=yes --dump-instr=yes --collect-jumps=yes --collect-bus=yes --branch-sim=yes" +``` + +Remember to stop Vespa - the callgrind.* files are not generated until the program stops. diff --git a/mintlify-docs/en/querying/approximate-nn-hnsw.mdx b/mintlify-docs/en/querying/approximate-nn-hnsw.mdx new file mode 100644 index 0000000000..ff0fef4ecd --- /dev/null +++ b/mintlify-docs/en/querying/approximate-nn-hnsw.mdx @@ -0,0 +1,158 @@ +--- +title: "Approximate nearest neighbor search using HNSW index" +--- + +This document describes how to speed up searches for nearest neighbors in vector spaces by adding [HNSW index](../reference/schemas/schemas#index-hnsw) to tensor fields. For an introduction to nearest neighbor search, see [nearest neighbor search](/en/querying/nearest-neighbor-search) documentation, for practical usage of Vespa's nearest neighbor search, see [nearest neighbor search - a practical guide](/en/querying/nearest-neighbor-search-guide), and to have Vespa create vectors for you, see [embedding](../rag/embedding). + +Vespa implements a modified version of the Hierarchical Navigable Small World (HNSW) graph algorithm [paper](https://arxiv.org/abs/1603.09320). The implementation in Vespa supports: + +* **Filtering** - The search for nearest neighbors can be constrained by query filters. The [nearestNeighbor](../reference/querying/yql#nearestneighbor) query operator can be combined with other filters or query terms using the [Vespa query language](/en/querying/query-language). See the query examples in the [practical guide](/en/querying/nearest-neighbor-search-guide#combining-approximate-nearest-neighbor-search-with-query-filters). + +* **Multi-field vector Indexing** - A schema can include multiple indexed tensor fields and search any combination of them in a query. This is useful to support multiple models, multiple text sources, and multi-modal search such as indexing both a textual description and image for the same entity. + +* **Multi-vector Indexing** - A single document field can contain any number of vector values by defining it as a mixed tensor (a "map of vectors"). Documents will then be retrieved by the closest vector in each document compared to the query vector. See the [Multi-vector indexing sample application](https://github.com/vespa-engine/sample-apps/tree/master/multi-vector-indexing) for examples. This is commonly used to [index documents with multiple chunks](../rag/working-with-chunks). See also [this blog post](https://blog.vespa.ai/semantic-search-with-multi-vector-indexing/#implementation). + +* **Real Time Indexing** - CRUD (Create, Add, Update, Remove) vectors in the index in true real time. + +* **Mutable HNSW Graph** - No query or indexing overhead from searching multiple HNSW graphs. In Vespa, there is one graph per tensor field per content node. No segmented or partitioned graph where a query against a content node need to scan multiple HNSW graphs. + +* **Multithreaded Indexing** - The costly part when performing real time changes to the *HNSW* graph is distance calculations while searching the graph layers to find which links to change. These distance calculations are performed by multiple indexing threads. + +* **Multiple value types** - The cost driver of vector search is often storing the vectors in memory, which is required to produce accurate results at low latency. An effective way to reduce cost is to reduce the size of each vector value. Vespa supports double, float, bfloat16, int8 and [single-bit values](../rag/binarizing-vectors). Changing from float to bfloat16 can halve cost with negligible impact on accuracy, while single-bit values greatly reduce both memory and cpu costs, and can be effectively combined with larger vector values stored on disk as a paged attribute to be used for ranking. + +* **Optimized HNSW lookups** - ANN searches in Vespa [support](https://blog.vespa.ai/tweaking-ann-parameters/) both pre-and post-filtering, beam exploration, and filtering before distance calculation ("Acorn 1"). Tuning parameters for these makes it possible to strike a good balance between performance and accuracy for any data set. Vespa's [ANN tuning tool](https://vespa-engine.github.io/pyvespa/examples/ann-parameter-tuning-vespa-cloud) can be used to automate the process. + +## Using Vespa's approximate nearest neighbor search + +The query examples in [nearest neighbor search](/en/querying/nearest-neighbor-search) uses exact search, which has perfect accuracy. However, this is computationally expensive for large document volumes as distances are calculated for every document which matches the query filters. + +To enable fast approximate matching, the tensor field definition needs an `index` directive. A Vespa [document schema](../basics/schemas) can declare multiple tensor fields with `HNSW` enabled. + +```js expandable +field image_embeddings type tensor(i{},x[512]) { + indexing: summary | attribute | index + attribute { + distance-metric: angular + } + index { + hnsw { + max-links-per-node: 16 + neighbors-to-explore-at-insert: 100 + } + } +} + +field text_embedding type tensor(x[384]) { + indexing: summary | attribute | index + attribute { + distance-metric: prenormalized-angular + } + index { + hnsw { + max-links-per-node: 24 + neighbors-to-explore-at-insert: 200 + } + } +} +``` + +In the schema snippet above, fast approximate search is enabled by building an `HNSW` index for the `image_embeddings` and the `text_embedding` tensor fields. `image_embeddings` indexes multiple vectors per document, while `text_embedding` indexes one vector per document. + +The two vector fields use different [distance-metric](../reference/schemas/schemas#distance-metric) and `HNSW` index settings: + +* `max-links-per-node` - a higher value increases recall accuracy, but also memory usage, indexing and search cost. +* `neighbors-to-explore-at-insert` - a higher value increases recall accuracy, but also indexing cost. + +Choosing the value of these parameters affects both accuracy, search performance, memory usage and indexing performance. See [Billion-scale vector search with Vespa - part two](https://blog.vespa.ai/billion-scale-knn-part-two/) for a detailed description of these tradeoffs. See [HNSW index reference](../reference/schemas/schemas#index-hnsw) for details on the index parameters. + +### Indexing throughput + + +![Real-time indexing throughput](https://blog.vespa.ai/assets/2022-01-27-billion-scale-knn-part-two/throughput.png) + +The `HNSW` settings impacts indexing throughput. Higher values of `max-links-per-node` and `neighbors-to-explore-at-insert` reduces indexing throughput. Example from [Billion-scale vector search with Vespa - part two](https://blog.vespa.ai/billion-scale-knn-part-two/). + +### Memory usage + +Higher value of `max-links-per-node` impacts memory usage, higher values means higher memory usage: + +![Memory footprint](https://blog.vespa.ai/assets/2022-01-27-billion-scale-knn-part-two/memory.png) + +### Accuracy + +![Accuracy](https://blog.vespa.ai/assets/2022-01-27-billion-scale-knn-part-two/ann.png) + +Higher `max-links-per-node` and `neighbors-to-explore-at-insert` improves the quality of the graph and recall accuracy. As the search-time parameter [hnsw.exploreAdditionalHits](../reference/querying/yql#hnsw-exploreadditionalhits) is increased, the lower combination reaches about 70% recall@10, while the higher combination reaches about 92% recall@10. The improvement in accuracy needs to be weighted against the impact on indexing performance and memory usage. + +## Using approximate nearest neighbor search + +With an *HNSW* index enabled on the tensor field one can choose between approximate or exact (brute-force) search by using the [approximate query annotation](../reference/querying/yql#approximate) + +```json +{ + "yql": "select * from doc where {totalTargetHits: 10, approximate:false}nearestNeighbor(image_embeddings,query_image_embedding)", + "hits": 10 + "input.query(query_image_embedding)": [0.21,0.12,....], + "ranking.profile": "image_similarity" +} +``` + +By default, `approximate` is true when searching a tensor field with `HNSW` index enabled. The `approximate` parameter allows quantifying the accuracy loss of using approximate search. The loss can be calculated by performing an exact neighbor search using `approximate:false` and compare the retrieved documents with `approximate:true` and calculate the overlap@k metric. + +Note that exact searches over a large vector volume require adjustment of the [query timeout](../reference/api/query#timeout). The default [query timeout](../reference/api/query#timeout) is 500ms, which will be too low for an exact search over many vectors. + +In addition to [totalTargetHits](../reference/querying/yql#totaltargethits), there is a [hnsw.exploreAdditionalHits](../reference/querying/yql#hnsw-exploreadditionalhits) parameter which controls how many extra nodes in the graph (in addition to `totalTargetHits`) that are explored during the graph search. This parameter is used to tune accuracy quality versus query performance. + +## Combining approximate nearest neighbor search with filters + +The [nearestNeighbor](../reference/querying/yql#nearestneighbor) query operator can be combined with other query filters using the [Vespa query language](../reference/querying/yql) and its query operators. There are two high-level strategies for combining query filters with approximate nearest neighbor search: +* [pre-filtering](https://blog.vespa.ai/constrained-approximate-nearest-neighbor-search/#pre-filtering-strategy) (the default) +* [post-filtering](https://blog.vespa.ai/constrained-approximate-nearest-neighbor-search/#post-filtering-strategy) + +These strategies can be configured in a rank profile using [approximate-threshold](../reference/schemas/schemas#approximate-threshold) and [post-filter-threshold](../reference/schemas/schemas#post-filter-threshold). See [Controlling the filtering behavior with approximate nearest neighbor search](https://blog.vespa.ai/constrained-approximate-nearest-neighbor-search/#controlling-the-filtering-behavior-with-approximate-nearest-neighbor-search) for more details. + +Note that when using `pre-filtering` the following query operators are not included when evaluating the filter part of the query: + +* [geoLocation](../reference/querying/yql#geolocation) +* [predicate](../reference/querying/yql#predicate) + +These are instead evaluated after the approximate nearest neighbors are retrieved, more like a `post-filter`. This might cause the search to expose fewer hits to ranking than the wanted `totalTargetHits`. + +Since Vespa 8.78 the `pre-filter` can be evaluated using [multiple threads per query](../performance/practical-search-performance-guide#multithreaded-search-and-ranking). This can be used to reduce query latency for larger vector datasets where the cost of evaluating the `pre-filter` is significant. Note that searching the `HNSW` index is always single-threaded per query. Multithreaded evaluation when using `post-filtering` has always been supported, but this is less relevant as the `HNSW` index search first reduces the document candidate set based on `totalTargetHits`. + +## Nearest Neighbor Search Considerations + +* **totalTargetHits**: The [totalTargetHits](../reference/querying/yql#totaltargethits) parameter specifies how many hits one wants to expose to [ranking](../basics/ranking) in total over the content nodes participating in the query (you can also set this per node using [targetHits](../reference/querying/yql#targethits)). Approximate search exposes exactly `totalTargetHits` hits to `first-phase` ranking over the content nodes as long as `totalTargetHits` hits are actually found and not filtered out. Nearest neighbor search is typically used as an efficient retriever in a [phased ranking](../ranking/phased-ranking) pipeline. See [performance sizing](../performance/sizing-search). + +* **Pagination**: Pagination uses the standard [hits](../reference/api/query#hits) and [offset](../reference/api/query#offset) query api parameters. There is no caching of results in between pagination requests, so a query for a higher `offset` will cause the search to be performed over again. This aspect is no different from [sparse search](../ranking/wand) not using nearest neighbor query operator. + +* **Total hit count is not accurate**: Technically, all vectors in the searchable index are neighbors. There is no strict boundary between a match and no match. Both exact (`approximate:false`) and approximate (`approximate:true`) usages of the [nearestNeighbor](../reference/querying/yql#nearestneighbor) query operator does not produce an accurate `totalCount`. This is the same behavior as with sparse dynamic pruning search algorithms like [weakAnd](../reference/querying/yql#weakand) and [wand](../reference/querying/yql#wand). + +* **Grouping** counts are not accurate: Grouping counts from [grouping](/en/querying/grouping) are not accurate when using [nearestNeighbor](../reference/querying/yql#nearestneighbor) search. This is the same behavior as with other dynamic pruning search algorithms like [weakAnd](../reference/querying/yql#weakand) and [wand](../reference/querying/yql#wand). See the [Result diversification](https://blog.vespa.ai/result-diversification-with-vespa/) blog post on how grouping can be combined with nearest neighbor search. + +## Scaling Approximate Nearest Neighbor Search + +### Memory + +Vespa tensor fields are [in-memory](../content/attributes) data structures and so is the `HNSW` graph data structure. For large vector datasets the primary memory resource usage relates to the raw vector field memory usage. + +Using lower tensor cell type precision can reduce memory footprint significantly, for example using `bfloat16` instead of `float` saves close to 50% memory usage without significant accuracy loss. + +Vespa [tensor cell value types](../performance/feature-tuning#cell-value-types) include: + +* `int8` - 1 byte per value. Also used to represent [packed binary values](../rag/binarizing-vectors). +* `bfloat16` - 2 bytes per value. See [bfloat16 floating-point format](https://en.wikipedia.org/wiki/Bfloat16_floating-point_format). +* `float` - 4 bytes per value. Standard float. +* `double` - 8 bytes per value. Standard double. + +### Search latency and document volume + +The `HNSW` greedy search algorithm is sublinear (close to log(N) where N is the number of vectors in the graph). This has interesting properties when attempting to add more nodes horizontally using [flat data distribution](../performance/sizing-search#data-distribution). Even if the document volume per node is reduced by a factor of 10, the search latency is only reduced by 50%. Still, flat scaling helps scale document volume, and increasing indexing throughput as vectors are partitioned randomly over a set of nodes. + +Pure vector search applications (without filtering, or re-ranking) should attempt to scale up document volume by using larger instance type and maximize the number of vectors per node. To scale with query throughput, use [grouped data distribution](../performance/sizing-search#data-distribution) to replicate content. + +Note that strongly sublinear search is not necessarily true if the application uses nearest neighbor search for candidate retrieval in a multiphase ranking pipeline, or combines nearest neighbor search with filters. + +## HNSW Operations + +Changing the [distance-metric](../reference/schemas/schemas#distance-metric) for a tensor field with `hnsw` index requires [restarting](../reference/schemas/schemas#changes-that-require-restart-but-not-re-feed), but not re-indexing (re-feed vectors). Similar, changing the `max-links-per-node` and `neighbors-to-explore-at-insert` construction parameters requires re-starting. diff --git a/mintlify-docs/en/querying/document-summaries.mdx b/mintlify-docs/en/querying/document-summaries.mdx new file mode 100644 index 0000000000..4ea0fd0b6e --- /dev/null +++ b/mintlify-docs/en/querying/document-summaries.mdx @@ -0,0 +1,194 @@ +--- +title: "Document Summaries" +--- + +A *document summary* is the information that is shown for each document in a query result. What information to include is determined by a *document summary class*: A named set of fields with config on which information they should contain. + +A special document summary named `default` is always present and used by default. This contains: + +- all fields which specifies in their indexing statements that they may be included in summaries +- all fields specified in any document summary +- [sddocname](../reference/querying/default-result-format#sddocname) +- [documentid](../reference/querying/default-result-format#documentid). + +Summary classes are defined in the schema: + +```js +schema music { + + document music { + field artist type string { + indexing: summary | index + } + field album type string { + indexing: summary | index + index: enable-bm25 + } + field year type int { + indexing: summary | attribute + } + field category_scores type tensor(cat{}) { + indexing: summary | attribute + } + } + + document-summary my-short-summary { + summary artist {} + summary album {} + } +} +``` + +See the [schema reference](../reference/schemas/schemas#summary) for details. + +The summary class to use for a query is determined by the parameter [presentation.summary](../reference/api/query#presentation.summary);: + +```bash +$ vespa query "select * from music where album contains 'head'" \ + "presentation.summary=my-short-summary" +``` + +A common reason to define a document summary class is [performance](#performance): By configuring a document summary which only contains attributes the result can be generated without disk accesses. Note that this is needed to ensure only memory is accessed even if all fields are attributes because the [document ID](../schemas/documents#document-ids) is not stored as an attribute. + +Document summaries may also contain [dynamic snippets and highlighted terms](#dynamic-snippets). + +The document summary class to use can also be issued programmatically to the `fill()` method from a searcher, and multiple fill operations interleaved with programmatic filtering can be used to optimize data access and transfer when programmatic filtering in a Searcher is used. + +## Selecting summary fields in YQL + +A [YQL](/en/querying/query-language) statement can also be used to filter which fields from a document summary to include in results. Note that this is just a field filter in the container - a summary containing all fields of a summary class is always fetched from content nodes, so to optimize performance it is necessary to create custom summary classes. + +```js +$ vespa query "select artist, album, documentid, sddocname from music where album contains 'head'" +``` + +```json +{ + "root": { }, + "children": [ + { + "id": "id:mynamespace:music::a-head-full-of-dreams", + "relevance": 0.16343879032006284, + "source": "mycontentcluster", + "fields": { + "sddocname": "music", + "documentid": "id:mynamespace:music::a-head-full-of-dreams", + "artist": "Coldplay", + "album": "A Head Full of Dreams" + } + } + ] + } +} +``` + +Use `*` to select all the fields of the chosen document summary class used, (which is `default` by default). + +```js +$ vespa query "select * from music where album contains 'head'" +``` + +```json +{ + "root": { }, + "children": [ + { + "id": "id:mynamespace:music::a-head-full-of-dreams", + "relevance": 0.16343879032006284, + "source": "mycontentcluster", + "fields": { + "sddocname": "music", + "documentid": "id:mynamespace:music::a-head-full-of-dreams", + "artist": "Coldplay", + "album": "A Head Full of Dreams", + "year": 2015, + "category_scores": { + "type": "tensor(cat{})", + "cells": { + "pop": 1.0, + "rock": 0.20000000298023224, + "jazz": 0.0 + } + } + } + } + ] + } +} +``` + +## Summary field rename + +Summary classes may define fields by names not used in the document type: + +```js +document-summary rename-summary { + summary artist_name { + source: artist + } + } +``` + +Refer to the [schema reference](../reference/schemas/schemas#source) for adding [attribute](../reference/schemas/schemas#add-or-remove-an-existing-document-field-from-document-summary) and [non-attribute](../reference/schemas/schemas#add-or-remove-a-new-non-attribute-document-field-from-document-summary) fields - some changes require re-indexing. + +## Dynamic snippets + +Use [dynamic](../reference/schemas/schemas#summary) to generate dynamic snippets from fields based on the query keywords. Example from Vespa Documentation Search - see the [schema](https://github.com/vespa-cloud/vespa-documentation-search/blob/main/src/main/application/schemas/doc.sd): + +```js +document doc { + + field content type string { + indexing: summary | index + summary : dynamic + } +``` + +A query for *document summary* returns: + +> *Use document summaries to configure which fields ... indexing: summary | index `}}` document-summary titleyear `{ summary title ...`* + +The example above creates a dynamic summary with the matched terms highlighted. The latter is called [bolding](../reference/schemas/schemas#bolding) and can be enabled independently of dynamic summaries. + +Refer to the [reference](../reference/schemas/schemas#summary) for the response format. + +### Dynamic snippet configuration + +You can configure generation of dynamic snippets by adding an instance of the [vespa.config.search.summary.juniperrc config](https://github.com/vespa-engine/vespa/blob/master/searchsummary/src/vespa/searchsummary/config/juniperrc.def) in services.xml inside the `` cluster tag for the content cluster in question. E.g: + +```xml + + ... + + 2 + 1000 + 500 + 300 + + ... + +``` + +Numbers here are in bytes. + +## Performance + +[Attribute](../content/attributes) fields are held in memory. This means summaries are memory-only operations if all fields requested are attributes, and is the optimal way to get high query throughput. The other document fields are stored as blobs in the [document store](../content/proton#document-store). Requesting these fields may therefore require a disk access, increasing latency. + + +**Important:** + +The default summary class will access the document store as it includes the [documentid](../reference/querying/default-result-format#documentid) field which is stored there. For maximum query throughput using memory-only access, use a dedicated summary class with attributes only. + + +When using additional summary classes to increase performance, only the network data size is changed - the data read from storage is unchanged. Having "debug" fields with summary enabled will hence also affect the amount of information that needs to be read from disk. + +See [query execution](/en/querying/query-api#query-execution) - breakdown of the summary (a.k.a. result processing, rendering) phase: + +- The document summary latency on the content node, tracked by [content_proton_search_protocol_docsum_latency_average](../operations/metrics). +- Getting data across from content nodes to containers. +- Deserialization from internal binary formats (potentially) to Java objects if touched in a [Searcher](../applications/searchers), and finally serialization to JSON (default rendering) + rendering and network. + +The work, and thus latency, increases with more [hits](../reference/api/query#hits). Use [query tracing](/en/querying/query-api#query-tracing) to analyze performance. + +Refer to [content node summary cache](../performance/caches-in-vespa#content-node-summary-cache). diff --git a/mintlify-docs/en/querying/federation.mdx b/mintlify-docs/en/querying/federation.mdx new file mode 100644 index 0000000000..891f3331fa --- /dev/null +++ b/mintlify-docs/en/querying/federation.mdx @@ -0,0 +1,589 @@ +--- +title: "Federation" +--- + + +![](/assets/img/federation-simple.svg) + + +The Vespa Container allows multiple sources of data to be *federated* to a common search service. The sources of data may be both search clusters belonging to the same application, or external services, backed by Vespa or any other kind of service. The container may be used as a pure *federation platform* by setting up a system consisting solely of container nodes federating to external services. + +This document gives a short intro to federation, explains how to create an application package doing federation and shows what support is available for choosing the sources given a query, and the final result given the query and some source specific results. + +*Federation* allows users to access data from multiple sources of various kinds through one interface. This is useful to: + +- enrich the results returned from an application with auxiliary data, like finding appropriate images to accompany news articles. +- provide more comprehensive results by finding data from alternative sources in the cases where the application has none, like back-filling web results. +- create applications whose main purpose is not to provide access to some data set but to provide users or frontend applications a single starting point to access many kinds of data from various sources. Examples are browse pages created dynamically for any topic by pulling together data from external sources. + +The main tasks in creating a federation solution are: + + + +creating connectors to the various sources + + +selecting the data sources which will receive a given query + + +rewriting the received request to an executable query returning the desired data from each source + + +creating the final result by selecting from, organizing and combining the returned data from each selected source + + + +The container aids with these tasks by providing a way to organize a federated execution as a set of search chains which can be configured through the application package. Read the [Container intro](../applications/containers) and [Chained components](../applications/chaining) before proceeding. Refer to the `com.yahoo.search.federation` [Javadoc](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/search/federation/package-summary). + +## Configuring Providers + +A *provider* is a search chain that produces data (in the form of a Result) from a data source. The provider must contain a Searcher which connects to the data source and produces a Result from the returned data. Configure a provider as follows: + +```xml + + + + + +``` + +You can add multiple searchers in the provider just like in other chains. + +Search chains that provide data from some content cluster in the same application are also *providers*. To explicitly configure a provider talking to internal content clusters, set the attribute type="local" on the provider. That will automatically add the searchers necessary to talk to internal content clusters to the search chain. Example: querying this provider will not lowercase / stem terms: + +```xml + +``` + +## Configuring Sources + +A single provider may be used to produce multiple kinds of results. To implement and present each kind of result, we can use *sources*. A *source* is a search chain that provides a specific kind of result by extending or modifying the behavior of one or more providers. + +Suppose that we want to retrieve two kinds of results from my-provider: Web results and java API documentation: + +```xml + + + + + + + + +``` + +This results in two *source search chains* being created, `web@my-provider` and `java-api@my-provider`. Each of them constitutes a source, namely `web` and `java-api` respectively. As the example suggests, these search chains are named after the source and the enclosing provider. The @-sign in the name should be read as *in*, so `web@my-provider` should for example be read as *web in my-provider*. + +The JavaApiSearcher is responsible for modifying the query so that we only get hits from the java API documentation. We added this searcher directly inside the source element; source search chains and providers are both instances of search chains. All the options for configuring regular search chains are therefore also available for them. + +How does the `web@my-provider` and `java-api@my-provider` source search chains use the `my-provider` provider to send queries to the external service? Internally, the source search chains *inherit* from the enclosing provider. Since the provider contains searchers that know how to talk to the external service, the sources will also contain the same searchers. As an example, consider the "web" search chain; It will contain exactly the same searcher instances as the `my-provider` search chain. By organizing chains for talking to data providers, we can reuse the same connections and logic for talking to remote services ("providers") for multiple purposes ("sources"). + +The provider search chain `my-provider` is *not modified* by adding sources. To verify this, try to send queries to the three search chains `my-provider`, `web@my-provider` and `java-api@my-provider`. + +### Multiple Providers per Source + +You can create a source that consists of source search chains from several providers. Effectively, this lets you vary which provider should be used to satisfy each request to the source: + +```xml + + + + + + + + + + +``` + +Here, the two source search chains `common-search@news-search` and `common-search@my-provider` constitutes a single source `common-search`. The source search chains using the `idref` attribute are called participants, while the ones using the `id` attribute are called leaders. Each source must consist of a single leader and zero or more participants. + +Per default, only the leader search chain is used when *federating* to a source. To use one of the participants instead, use [sources](../reference/api/query#model.sources) and *source*: + +```text +http://[host]:[port]/?sources=common-search&source.common-search.provider=news-search +``` + +## Federation + +Now we can search both the web and the java API documentation at the same time, and get a combined result set back. We achieve this by setting up a *federation* searcher: + +```xml + + + + + + + + + + + + + + +``` + +Inside the Federation element, we list the sources we want to use. Do not let the name *source* fool you; If it behaves like a source, then you can use it as a source (i.e. all types of search chains including providers are accepted). As an example, try replacing the *web* reference with *my-provider*. + +When searching, select a subset of the sources specified in the federation element by specifying the [sources](../reference/api/query#model.sources) query parameter. + +## Built-in Federation + +The built-in search chains *native* and *vespa* contain a federation searcher named *federation.* This searcher has been configured to federate to: + +- All sources +- All providers that does not contain a source + +If configuring your own federation searcher, you are not limited to a subset of these sources - you can use any provider, source or search chain. + +## Inheriting default Sources + +To get the same sources as the built-in federation searcher, inherit the default source set: + +```xml + + + + + ... + + + +``` + +## Changing content cluster chains + +With the information above, we can create a configuration where we modify the search chain sending queries to and receiving queries form a single content cluster (here, removing a searcher and adding another): + +```xml + + + + + + + + + + +``` + +## Timeout behavior + +What if we want to limit how much time a provider is allowed to use to answer a query? + +```xml + + + + + + + + + +``` + +The provider search chain will then be limited to use 100 ms to execute each query. The Federation layer allows all providers to continue until the non-optional provider with the longest timeout is finished or canceled. + +In some cases it is useful to be able to keep executing the request to a provider longer than we are willing to wait for it in that particular query. This allows us to populate caches inside sources which can only meet the timeout after caches are populated. To use this option, specify a [request timeout](../reference/applications/services/search#federationoptions) for the provider: + +```xml + + + + ... + +``` + +Also see [Searcher timeouts](../applications/searchers#timeouts). + +## Non-essential Providers + +Now let us add a provider that retrieves ads: + +```xml + + + + +``` + +Suppose that it is more important to return the result to the user as fast as possible, than to retrieve ads. To signal this, we mark the ads provider as *optional*: + +```xml + + + + + +``` + +The Federation searcher will then only wait for ads as long as it waits for mandatory providers. If the ads are available in time, they are used, otherwise they are dropped. + +If only optional providers are selected for Federation, they will all be treated as mandatory. Otherwise, they would not get a chance to return any results. + +## Federation options inheritance + +The sources automatically use the same Federation options as the enclosing provider. *override* one or more of the federation options in the sources: + +```xml + + + + + + + + + + + +``` + +You can use a single source in different Federation searchers. If you send queries with different cost to the same source from different federation searchers, you might also want to *override* the federation options for when they are used: + +```xml + + + + + + + + + + + + + + + + + + + +``` + +## Selecting Search Chains programmatically + +If we have complicated rules for when a search chain should be used, we can select search chains programmatically instead of setting up sources under federation in services.xml. The selection code is implemented as a [TargetSelector](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/search/federation/selection/TargetSelector). This TargetSelector is used by registering it on a federation searcher. + +```java expandable +package com.yahoo.example; + +import com.google.common.base.Preconditions; +import com.yahoo.component.chain.Chain; +import com.yahoo.processing.execution.chain.ChainRegistry; +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.result.Hit; +import com.yahoo.search.Searcher; +import com.yahoo.search.federation.selection.FederationTarget; +import com.yahoo.search.federation.selection.TargetSelector; +import com.yahoo.search.searchchain.model.federation.FederationOptions; + +import java.util.Arrays; +import java.util.Collection; + +class MyTargetSelector implements TargetSelector { + + @Override + public Collection> getTargets(Query query, + ChainRegistry searcherChainRegistry) { + Chain searchChain = searcherChainRegistry.getComponent("my-chain"); + Preconditions.checkNotNull(searchChain, "No search chain named 'my-chain' exists in services.xml"); + + return Arrays.asList(new FederationTarget<>(searchChain, new FederationOptions(), null)); + } + + @Override + public void modifyTargetQuery(FederationTarget target, Query query) { + query.setHits(10); + } + + @Override + public void modifyTargetResult(FederationTarget target, Result result) { + for (Hit hit: result.hits()) { + hit.setField("my-field", "hello-world"); + } + } + +} +``` + +The target selector chooses search chains for the federation searcher. In this example, MyTargetSelector.getTargets returns a single chain named "my-chain" that has been set up in `services.xml`. + +Before executing each search chain, the federation searcher allows the target selector to modify the query by calling modifyTargetQuery. In the example, the number of hits to retrieve is set to 10. + +After the search chain has been executed, the federation searcher allows the target selector to modify the result by calling modifyTargetResult. In the example, each hit gets a field called "my-field" with the value "hello-world". + +Configure a federation searcher to use a target selector in ``services.xml``. Only a single target selector is supported. + +```js + + + + + + +``` + +We can also set up both a target-selector and normal sources. The federation searcher will then send queries both to programmatically selected sources and those that would normally be selected without the target selector: + +```js + + + + + + + ... + + +``` + +## Example: Setting up a Federated Service + +A federation application is created by providing custom searcher components performing the basic federation tasks and combining these into chains in a federation setup in [services.xml](/en/reference/applications/services/services). For example, this is a complete configuration which sets up a cluster of container nodes (having 1 node) which federates to the another Vespa service (news) and to some web service: + +```js expandable + + + + + + + + + + + + + + + + + + + + +``` + +This creates a configuration of search chains like: + + +![](/assets/img/federation.svg) + + +Each provider is a search chain ending in a Searcher forwarding the query to a remote service. In addition, there is a main chain (included by default) ending in a FederationSearcher, which by default forwards the query to all the providers in parallel. The provider chains returns their result upwards to the federation searcher which merges them into a complete result which is returned up the main chain. + +This services file, an implementation of the example classes (see below), and [hosts.xml](/en/reference/applications/hosts) listing the container nodes, is all that is needed to set up and [deploy](/en/basics/applications#deploying-applications) an application federating to multiple sources. For a reference to these XML sections, see the [chains reference](/en/reference/applications/services/search#chain). + +The following sections outlines how this can be elaborated into a solution producing more user-friendly federated results. + +### Selecting Sources + +To do the best possible job of bringing relevant data to the user, we should send every query to all sources, and decide what data to include when all the results are available, and we have as much information as possible at hand. In general this is not advisable because of the resource cost involved, so we must select a subset based on information in the query. This is best viewed as a probabilistic optimization problem: The selected sources should be the ones having a high enough probability of being useful to offset the cost of querying it. + +Any Searcher which is involved in selecting sources or processing the entire result should be added to the main search chain, which was created implicitly in the examples above. To do this, the main chain should be created explicitly: + +```js expandable + + + + + + + + + + + + + + + + + + + + + + +``` + +This adds an explicit main chain to the configuration which has two additional searchers in addition to those inherited from the native chain, which includes the FederationSearcher. Note that if the full Vespa functionality is needed, the `vespa` chain should be inherited rather than `native`. + +The chain called default will be invoked if no searchChain parameter is given in the query. + +To learn more about creating Searcher components, see [searcher development](/en/applications/searchers). + +### Rewriting Queries to Individual Providers + +The provider searchers are responsible for accepting a Query object, translating it to a suitable request to the backend in question and deserializing the response into a Result object. There is often a need to modify the query to match the particulars of a provider before passing it on: + +- To get results from the provider which matches the determined interpretation and intent as well as possible, the query may need to be rewritten using detailed information about the provider +- Parameters beyond the basic ones supported by each provider searcher may need to be translated to the provider +- There may be a need for provider specific business rules + +These query changes may range in complexity from setting a query parameter, applying some source specific information to the query or transferring all the relevant query state into a new object representation which is consumed by the provider searcher. + +This example shows a searcher adding a customer id to the news request: + +```java expandable +package com.yahoo.example; + +import com.yahoo.search.searchchain.Execution; +import com.yahoo.search.*; + +public class NewsCustomerIdSearcher extends Searcher { + + @Override + public Result search(Query query, Execution execution) { + String customerId="provider.news.custid"; + if (query.properties().get(customerId) == null) + query.properties().set(customerId, "yahoo/test"); + if (query.getTraceLevel() >= 3) + query.trace("News provider: Will use " + + customerId + "=" + query.properties().get(customerId), false, 3); + return execution.search(query); + } + +} +``` + +This searcher should be added to the news source chain as shown above. + +You may have noticed that we have referred to the search chains talking to a service as a **provider** while referring to selection of **sources**. The reason for making this distinction is that it is sometimes useful to treat different kinds of processing of queries and results to/from the same service as different sources. Hence, it is possible to create source search chains in addition to the provider chains in services.xml. Each such source will refer to a provider (by inheriting the provider chain) but include some searchers specific to that source. Selection and routing of the query from the federation searchers is always to sources, not providers. By default, if no source tags are added in the provider, each provider implicitly creates a source by the same name. + +## Processing Results + +When we have selected the sources, created queries fitting to get results from each source and executed those queries, we have produced a result which contains a HitGroup per source containing the list of hits from that source. These results may be returned in XML as is, preserving the structure as XML, by requesting the [page](/en/reference/querying/page-result-format) result format: + +```bash +http://[host]:[port]/search/?query=test&presentation.format=page +``` + +However, this is not suitable for presenting to the user in most cases. What we want to do is select the subset of the hits having the highest probable utility to the user, organized in a way that maximizes the user experience. This is not an easy task, and we will not attempt to solve it here, other than noting that any solution should make use of both the information in the intent model and the information within the results from each source, and that this is a highly connected optimization problem because the utility of including some data in the result depends on what other data is included. + +Here we will just use a searcher which shows how this is done in principle, this searcher flattens the news and web service hit groups into a single list of hits, where only the highest ranked news ones are included: + +```java expandable +package com.yahoo.example; + +import com.yahoo.search.*; +import com.yahoo.search.result.*; +import com.yahoo.search.searchchain.Execution; + +public class ResultBlender extends Searcher { + + @Override + public Result search(Query query,Execution execution) { + Result result = execution.search(query); + HitGroup news = (HitGroup)result.hits().remove("source:news"); + HitGroup webService = (HitGroup)result.hits().remove("source:webService"); + if (webService == null) return result; + result.hits().addAll(webService.asList()); + if (news == null) return result; + for (Hit hit : news.asList()) + if (shouldIncludeNewsHit(hit)) + result.hits().add(hit); + return result; + } + + private boolean shouldIncludeNewsHit(Hit hit) { + if (hit.isMeta()) return true; + if (hit.getRelevance().getScore() > 0.7) return true; + return false; + } + +} +``` + +The optimal result to return to the user is not necessarily one flattened list. In some cases it may be better to keep the source organization, or to pick some other organization. The [page result format](/en/reference/querying/page-result-format) requested in the query above is able to represent any hierarchical organization as XML. A more realistic version of this searcher will use that to choose between some predefined layouts which the frontend in question knows how to handle, and choose some way of grouping the available hits suitable for the selected layout. + +This searcher should be added to the main (`default`) search chain in services.xml together with the SourceSelector (the order does not matter). + +## Unit Testing the Result Processor + +Unit test example for the Searcher above: + +```java expandable +package com.yahoo.search.example.test; + +import org.junit.Test; +import com.yahoo.search.searchchain.*; +import com.yahoo.search.example.ResultBlender; +import com.yahoo.search.*; +import com.yahoo.search.result.*; + +public class ResultBlenderTestCase { + + @Test + public void testBlending() { + Chain chain = new Chain(new ResultBlender(), new MockBackend()); + Context context = Execution.Context.createContextStub(null); + Result result = new Execution(chain, context).search(new Query("?query=test")); + assertEquals(4, result.hits().size()); + assertEquals("webService:1", result.hits().get(0).getId().toString()); + assertEquals("news:1", result.hits().get(1).getId().toString()); + assertEquals("webService:2", result.hits().get(2).getId().toString()); + assertEquals("webService:3", result.hits().get(3).getId().toString()); + } + + private static class MockBackend extends Searcher { + + @Override + public Result search(Query query,Execution execution) { + Result result = new Result(query); + HitGroup webService = new HitGroup("source:webService"); + webService.add(new Hit("webService:1",0.9)); + webService.add(new Hit("webService:2",0.7)); + webService.add(new Hit("webService:3",0.5)); + result.hits().add(webService); + HitGroup news = new HitGroup("source:news"); + news.add(new Hit("news:1",0.8)); + news.add(new Hit("news:2",0.6)); + news.add(new Hit("news:3",0.4)); + result.hits().add(news); + return result; + } + } + +} +``` + +This shows how a search chain can be created programmatically, with a mock backend producing results suitable for exercising the functionality of the searcher being tested. diff --git a/mintlify-docs/en/querying/geo-search.mdx b/mintlify-docs/en/querying/geo-search.mdx new file mode 100644 index 0000000000..c34b6c16a6 --- /dev/null +++ b/mintlify-docs/en/querying/geo-search.mdx @@ -0,0 +1,246 @@ +--- +title: "Geo Search" +--- + +To model a geographical position in documents, use a field where the type is [position](../reference/schemas/schemas#position) for a single, required position. To allow any number of positions (including none at all) use `array` instead. This can be used to limit hits (only those documents with a position inside a circular area will be hits), the distance from a point can be used as input to ranking functions, or both. + +A geographical point in Vespa is specified using the geographical [latitude](https://en.wikipedia.org/wiki/Latitude) and [longitude](https://en.wikipedia.org/wiki/Longitude). As an example, a location in [Sunnyvale, California](https://www.google.com/maps/place/721+1st+Ave,+Sunnyvale,+CA+94089/@37.4181488,-122.0256157,12z) could be latitude 37.4181488 degrees North, longitude 122.0256157 degrees West. This would be represented as `{ "lat": 37.4181488, "lng": -122.0256157 }` in JSON. + +As seen above, positive numbers are used for north (latitudes) and east (longitudes); negative numbers are used for south and west. This is the usual convention. + + +**Note:** + +Old formats for position (those used in Vespa 5, 6, and 7) are still accepted as feed input; enabling legacy output is temporarily possible also. See [legacy flag v7-geo-positions](../reference/querying/default-result-format#geo-position-rendering). + + +## Sample schema and document + +A sample schema could be a business directory, where every business has a position (for its main office or contact point): + +```js +schema biz { + document biz { + field title type string { + indexing: index + } + field mainloc type position { + indexing: attribute | summary + } + } + fieldset default { + fields: title + } +} +``` + +Using this schema is one possible business entry with its location: + +```json +{ + "put": "id:mynamespace:biz::business-1", + "fields": { + "title": "Yahoo Inc (main office)", + "mainloc": { "lat": 37.4181488, "lng": -122.0256157 } + } +} +``` + +## Restrict + +The API for adding a geographical restriction is to use a [geoLocation](../reference/querying/yql#geolocation) clause in the YQL statement, specifying a point and a maximum distance from that point: + +```bash +$ curl -H "Content-Type: application/json" \ + --data '{"yql" : "select * from sources * where title contains \"office\" and geoLocation(mainloc, 37.416383, -122.024683, \"20 miles\")"}' \ + http://localhost:8080/search/ +``` + +One can also build or modify the query programmatically by adding a [GeoLocationItem](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/GeoLocationItem) anywhere in the query tree. + +To use a position for ranking only (without *any* requirement for a matching position), specify it as a ranking-only term. Use the [rank()](../reference/querying/yql#rank) operation in YQL for this, or a [RankItem](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/RankItem) when building the query programmatically. + +At the *same time*, specify a negative radius (for example `-1 m`). This matches any position, and computes distance etc. for the closest position in the document. Example: + +```bash +$ curl -H "Content-Type: application/json" \ + --data '{"yql" : "select * from sources * where title contains \"office\" and geoLocation(mainloc, 37.416383, -122.024683, \"20 miles\")"}' \ + http://localhost:8080/search/ +``` + +## Ranking from a position match + +The main rank feature to use for the example above would be [distance(mainloc).km](../reference/ranking/rank-features#distance\(name\).km) and doing further calculation on it, giving better rank to documents that are closer to the wanted (query) position. Here one needs to take into consideration what sort of distances is practical; traveling on foot, by car, or by plane should have quite different ranking scales - using different rank profiles would be one natural way to support that. If the query specifies a maximum distance, that could be sent as an input to ranking as well, and used for scaling. + +There is also a [closeness(mainloc)](../reference/ranking/rank-features#closeness\(name\)) which goes from 1.0 at the exact location to 0.0 at a tunable maximum distance, which is enough for many needs. + +### Useful summary-features + +To do further processing, it may be useful to get the computed distance back. The preferred way to do this is to use the associated rank features as [summary-features](../reference/schemas/schemas#summary-features). In particular, [distance(*fieldname*).km](../reference/ranking/rank-features#distance\(name\).km) gives the geographical distance in kilometers, while [distance(*fieldname*).latitude](../reference/ranking/rank-features#distance\(name\).latitude) and [distance(*fieldname*).longitude](../reference/ranking/rank-features#distance\(name\).longitude) gives the geographical coordinates for the best location directly, in degrees. These are easy to use programmatically from a searcher, accessing [feature values in results](../ranking/ranking-expressions-features#accessing-feature-function-values-in-results) for further processing. + + +**Note:** + +`geoLocation` doesn't do proper great-circle-distance calculations. It works well for 'local' search (city or metro area), using simpler distance calculations. For positions which are very distant or close to the international date line (e.g. the Bering sea), the computed results may be inaccurate. + + +## Using multiple position fields + +For some applications, it can be useful to have several position attributes that may be searched. For example, we could expand the above examples with the locations of subsidiary offices: + +```js +schema biz { + document biz { + field title type string { + indexing: index + } + field mainloc type position { + indexing: attribute | summary + } + field otherlocs type array { + indexing: attribute + } + } + fieldset default { + fields: title + } +} +``` + +Expanding the example business with an office in Australia and one in Norway could look like: + +```json +{ + "put": "id:mynamespace:biz::business-1", + "fields": { + "title": "Yahoo Inc (some offices)", + "mainloc": { "lat": 37.4, "lng": -122.0 }, + "otherlocs": [ { "lat": -33.9, "lng": 151.2 }, + { "lat": 63.4, "lng": 10.4 } ] + } +} +``` + +A single query item can only search in one of the position attributes. For a search that spans several fields, use YQL to combine several `geoLocation` items inside an `or` clause, or combine several fields into a combined array field (so in the above example, one could duplicate the "mainloc" position into the "otherlocs" array as well, possibly changing the name from "otherlocs" to "all_locs"). + +## Example with airport positions + +To give some more example positions, here is a list of some airports with their locations in JSON format: + +| Airport code | City | Location | +|:---|:---|:---| +| SFO | San Francisco, USA | `{ "lat": 37.618806, "lng": -122.375416 }` | +| LAX | Los Angeles, USA | `{ "lat": 33.942496, "lng": -118.408048 }` | +| JFK | New York, USA | `{ "lat": 40.639928, "lng": -73.778692 }` | +| LHR | London, UK | `{ "lat": 51.477500, "lng": -0.461388 }` | +| SYD | Sydney, Australia | `{ "lat": -33.946110, "lng": 151.177222 }` | +| TRD | Trondheim, Norway | `{ "lat": 63.457556, "lng": 10.924250 }` | +| OSL | Oslo, Norway | `{ "lat": 60.193917, "lng": 11.100361 }` | +| GRU | São Paulo, Brazil | `{ "lat": -23.435555, "lng": -46.473055 }` | +| GIG | Rio de Janeiro, Brazil | `{ "lat": -22.809999, "lng": -43.250555 }` | +| BLR | Bangalore, India | `{ "lat": 13.198867, "lng": 77.705472 }` | +| FCO | Rome, Italy | `{ "lat": 41.804475, "lng": 12.250797 }` | +| NRT | Tokyo, Japan | `{ "lat": 35.765278, "lng": 140.385556 }` | +| PEK | Beijing, China | `{ "lat": 40.073, "lng": 116.598 }` | +| CPT | Cape Town, South Africa | `{ "lat": -33.971368, "lng": 18.604292 }` | +| ACC | Accra, Ghana | `{ "lat": 5.605186, "lng": -0.166785 }` | +| TBU | Nuku'alofa, Tonga | `{ "lat": -21.237999, "lng": -175.137166 }` | + +## Distance to path + +This example provides an overview of the [DistanceToPath](../reference/ranking/rank-features#distanceToPath\(name\).distance) rank feature. This feature matches *document locations* to a path given in the query. Not only does this feature return the closest distance for each document to the path, it also includes the length traveled *along* the path before reaching the closest point, or *intersection*. This feature has been nick named the *gas* feature because of its obvious use case of finding gas stations along a planned trip. + +In this example we have been traveling from the US to Bangalore, and we are now planning our trip back. We have decided to rent a car in Bangalore that we are to return upon arrival at the airport in Chennai. We are already quite hungry and wish to stop for a meal once we are outside of town. To avoid having to pay an additional fueling premium, we also wish to refuel just before reaching the airport. We need to figure out what roads to take, what restaurants are available outside of Bangalore, and what fuel stations are available once we get close to Chennai. Here we have plotted our trip from Bangalore to the airport: + + +![](/assets/img/geo/path1.png) + +If we search for restaurants along the path, we only see a small subset of all restaurants present in the window of our quite large map. Here you see how the most relevant results are actually all in Bangalore or Chennai: + +![](/assets/img/geo/path2.png) + +To find the best results, move the map window to just about where we expect to be eating, and redo the search: + +![](/assets/img/geo/path3.png) + + +This has to be done similarly for finding a gas station near the airport. This illustrates searching for restaurants in a smaller window along the planned trip without *DistanceToPath*. Next, we outline how *DistanceToPath* can be used to quickly and easily improve this type of planning to be more convenient for the user. + +The nature of this feature requires that the search corpus contains documents with position data. A [searcher component](../applications/searchers) needs to be written that is able to pass paths with the queries that lie in the same coordinate space as the searchable documents. Finally, a [rank-profile](../basics/ranking) needs to defined that scores documents according to how they match some target distance traveled and at the same time lies close "enough" to the path. + +### Query Syntax + +This document does not describe how to write a searcher plugin for the Container, refer to the [container documentation](../applications/searchers). However, let us review the syntax expected by *DistanceToPath*. As noted in the [rank features reference](../reference/ranking/rank-features#distanceToPath\(name\).distance), the path is supplied as a query parameter by name of the feature and the `path` keyword: + +```js +yql=(…)&rankproperty.distanceToPath(name).path=(x1,y1,x2,y2,…,xN,yN) +``` + +Here `name` has to match the name of the position attribute that holds the positions data. + +The path itself is parsed as a list of `N` coordinate pairs that together form `N-1` line segments: + +$$ +(x_1,y_1) \rightarrow (x_2,y_2),\ (x_2,y_2) \rightarrow (x_3,y_3),\ (\ldots),\ (x_{N-1},y_{N-1}) \rightarrow (x_N,y_N) +$$ + + +**Note:** + +The path is *not* in a readable (latitude, longitude) format, but is a pair of integers in the internal format (degrees multiplied by 1 million). If a transform is required from geographic coordinates to this, the search plugin must do it; note that the first number in each pair (the 'x') is longitude (degrees East or West) while the second ('y') is latitude (degrees North or South), corresponding to the usual orientation for maps - *opposite* to the usual order of latitude/longitude. + + +### Rank profile + +If we were to disregard our scenario for a few moments, we could suggest the following rank profile: + +```js +rank-profile default { + first-phase { + expression: nativeRank + } + second-phase { + expression: firstPhase * if (distanceToPath(ll).distance < 10000, 1, 0) + } +} +``` + +This profile will first rank all documents according to Vespa's *nativeRank* feature, and then do a second pass over the top 100 results and order these based on their distance to our path. If a document lies within 100 metres of our path it retains its relevancy, otherwise its relevancy is set to 0. Such a rank profile would indeed solve the current problem, but Vespa's ranking model allows for us to take this a lot further. + +The following is a rank profile that ranks documents according to a query-specified target distance to path and distance traveled: + +```js +rank-profile default { + first-phase { + expression { + max(0, query(distance) - distanceToPath(ll).distance) * + (1 - fabs(query(traveled) - distanceToPath(ll).traveled)) + } + } +} +``` + +The expression is two-fold; a first component determines a rank based on the document's distance to the given path as compared to the [query parameter](../reference/ranking/ranking-expressions) `distance`. If the allowed distance is exceeded, this component's contribution is 0. The distance contribution is then multiplied by the difference of the actual distance traveled as compared to the query parameter `traveled`. In short, this profile will include all documents that lie close enough to the path, ranked according to their actual distance and traveled measure. + + +**Note:** + +*DistanceToPath* is only compatible with *2D coordinates* because pathing in 1 dimension makes no sense. + + +### Results + +For the sake of this example, assume that we have implemented a custom path searcher that is able to pass the path found by the user's initial directions query to Vespa's [query syntax](#query-syntax). There are then two more parameters that must be supplied by the user; `distance` and `traveled`. Vespa expects these parameters to be supplied in a scale compatible with the feature's output, and should probably also be mapped by the container plugin. The feature's *distance* output is given in Vespa's internal resolution, which is approximately 10 units per meter. The *traveled* output is a normalized number between 0 and 1, where 0 represents the beginning of the path, and 1 is the end of the path. + +This illustrates how these parameters can be used to return the most appropriate hits for our scenario. Note that the figures only show the top hit for each query: + + +![](/assets/img/geo/path4.png) + + +![](/assets/img/geo/path5.png) + + +1. Searching for restaurants with the DistanceToPath feature. `distance = 1000, traveled = 0.1` +2. Searching for gas stations with the DistanceToPath feature. `distance = 1000, traveled = 0.9` + diff --git a/mintlify-docs/en/querying/grouping.mdx b/mintlify-docs/en/querying/grouping.mdx new file mode 100644 index 0000000000..837338e4f4 --- /dev/null +++ b/mintlify-docs/en/querying/grouping.mdx @@ -0,0 +1,1079 @@ +--- +title: "Grouping and aggregation" +--- + +## Grouping Interface + +Try running requests on the [grouping example data](https://github.com/vespa-engine/sample-apps/blob/master/examples/part-purchases-demo/ext/feed.jsonl): + +```js +all( group(customer) each(output(sum(price))) ) +``` + +The Vespa grouping language is a list-processing language which describes how the query hits should be grouped, aggregated, and presented in result sets. A grouping statement takes the list of all matches to a query as input and groups/aggregates it, possibly in multiple nested and parallel ways to produce the output. This is a logical specification and does not indicate how it is executed, as instantiating the list of all matches to the query somewhere would be too expensive, and execution is distributed instead. + +Refer to the [Query API reference](../reference/api/query#select) for how to set the *select* parameter, and the [Grouping reference](../reference/querying/grouping-language) for details. Fields used in grouping must be defined as [attribute](../content/attributes) in the document schema. Grouping supports continuation objects for [pagination](#pagination). + +The [Grouping Results](https://github.com/vespa-engine/sample-apps/tree/master/examples/part-purchases-demo) sample application is a practical example. + +## The grouping language structure + +The operations defining the structure of a grouping are: + +- `all(statement)`: Execute the nested statement once on the input list as a whole. +- `each(statement)`: Execute the nested statement on each element of the input list. +- `group(specification)`: Turn the input list into a list of lists according to the grouping specification. +- `output`: Output some value(s) at the current location in the structure. + +The parallel and nested collection of these operations defines both the structure of the computation and of the result it produces. For example, `all(group(customer) each(output(count())))` will take all matches, group them by customer id, and for each group, output the count of hits in the group. + +Vespa distributes and executes the grouping program on content nodes and merges results on container nodes - in multiple phases, as needed. As realizing such programs over a distributed data set requires more network round-trips than a regular search query, these queries may be more expensive than regular queries - see [defaultMaxGroups](../reference/api/query#grouping.defaultmaxgroups) and the likes for how to control resource usage. + +## Grouping by example + +For the entirety of this document, assume an index of engine part purchases: + +| Date | Price | Tax | Item | Customer | Is paid | +|:---|:---|:---|:---|:---|:---| +| 2006-09-06 09:00:00 | $1 000 | 0.24 | Intake valve | Smith | true | +| 2006-09-07 10:00:00 | $1 000 | 0.12 | Rocker arm | Smith | false | +| 2006-09-07 11:00:00 | $2 000 | 0.24 | Spring | Smith | true | +| 2006-09-08 12:00:00 | $3 000 | 0.12 | Valve cover | Jones | false | +| 2006-09-08 10:00:00 | $5 000 | 0.24 | Intake port | Jones | true | +| 2006-09-08 11:00:00 | $8 000 | 0.12 | Head | Brown | false | +| 2006-09-09 12:00:00 | $1 300 | 0.24 | Coolant | Smith | true | +| 2006-09-09 10:00:00 | $2 100 | 0.12 | Engine block | Jones | false | +| 2006-09-09 11:00:00 | $3 400 | 0.24 | Oil pan | Brown | true | +| 2006-09-09 12:00:00 | $5 500 | 0.12 | Oil sump | Smith | false | +| 2006-09-10 10:00:00 | $8 900 | 0.24 | Camshaft | Jones | true | +| 2006-09-10 11:00:00 | $1 440 | 0.12 | Exhaust valve | Brown | false | +| 2006-09-10 12:00:00 | $2 330 | 0.24 | Rocker arm | Brown | true | +| 2006-09-10 10:00:00 | $3 770 | 0.12 | Spring | Brown | false | +| 2006-09-10 11:00:00 | $6 100 | 0.24 | Spark plug | Smith | true | +| 2006-09-11 12:00:00 | $9 870 | 0.12 | Exhaust port | Jones | false | +| 2006-09-11 10:00:00 | $1 597 | 0.24 | Piston | Brown | true | +| 2006-09-11 11:00:00 | $2 584 | 0.12 | Connection rod | Smith | false | +| 2006-09-11 12:00:00 | $4 181 | 0.24 | Rod bearing | Jones | true | +| 2006-09-11 13:00:00 | $6 765 | 0.12 | Crankshaft | Jones | false | + +## Basic Grouping + +Example: *Return the total sum of purchases per customer* - steps: + + + +Select all documents: + +```js +/search/?yql=select * from sources * where true +``` + + +Take the list of all hits: + +```js +all(...) +``` + + +Turn it into a list of lists of all hits having the same customer id: + +```js +group(customer) +``` + + +For each of those lists of same-customer hits: +each(...) + + +Output the sum (an aggregator) of the price over all items in that list of hits: + +```js +output(sum(price)) +``` + + + + +Final query, producing the sum of the price of all purchases for each customer: + +```js +/search/?yql=select * from sources * where true limit 0 | + all( group(customer) each(output(sum(price))) ) +``` + +Here, limit is set to zero to get the grouping output only. URL encoded equivalent: + +```js +/search/?yql=select%20%2A%20from%20sources%20%2A%20where%20true%20limit%200%20%7C%20 + all%28%20group%28customer%29%20each%28output%28sum%28price%29%29%29%20%29 +``` + +Result: + +| GroupId | Sum(price) | +|:---|:---| +| Brown | $20 537 | +| Jones | $39 816 | +| Smith | $19 484 | + +Example: *Sum price of purchases [per date](#time-and-date):* + +```js +select (…) | all(group(time.date(date)) each(output(sum(price)))) +``` + +Note: in examples above, *all* documents are evaluated. Modify the query to add filters (and thus cut latency), like (remember to URL encode): + +```js +/search/?yql=select * from sources * where customer contains "smith" +``` + +## Ordering and Limiting Groups + +In many scenarios, a large collection of groups is produced, possibly too large to display or process. This is handled by ordering groups, then limiting the number of groups to return. + +The `order` clause accepts a list of one or more expressions. Each of the arguments to `order` is prefixed by either a plus/minus for ascending/descending order. + +Limit the number of groups using `max` and `precision` - the latter is the number of groups returned per content node to be merged to the global result. Larger document distribution skews hence require a higher `precision` for accurate results. + +An implicit limit can be specified through the [grouping.defaultMaxGroups](../reference/api/query#grouping.defaultmaxgroups) query parameter. This value will always be overridden if `max` is explicitly specified in the query. Use `max(inf)` to retrieve all groups when the query parameter is set. + +If `precision` is not specified, it will default to a factor times `max`. This factor can be overridden through the [grouping.defaultPrecisionFactor](../reference/api/query#grouping.defaultprecisionfactor) query parameter. + +Example: To find the 2 globally best groups, make an educated guess on how many samples are needed to fetch from each node in order to get the right groups. This is the `precision`. An initial factor of 3 has proven to be quite good in most use cases. If however, the data for customer 'Jones' was spread on 3 different content nodes, 'Jones' might be among the 2 best on only one node. But based on the distribution of the data, we have concluded by earlier tests that if we fetch 5.67 as many groups as we need to, we will have a correct answer with at least 99.999% confidence. So then we just use 6 times as many groups when doing the merge. + +However, there is one exception. Without an `order` constraint, `precision` is not required. Then, local ordering will be the same as global ordering. Ordering will not change after a merge operation. + +### Example + +Example: *The two customers with most purchases, returning the sum for each:* + +```js +select (…) | all(group(customer) max(2) precision(12) order(-count()) + each(output(sum(price)))) +``` + +## Hits per Group + +Use `summary` to print the fields for a hit, and `max` to limit the number of hits per group. + +An implicit limit can be specified through the [grouping.defaultMaxHits](../reference/api/query#grouping.defaultmaxhits) query parameter. This value will always be overridden if `max` is explicitly specified in the query. Use `max(inf)` to retrieve all hits when the query parameter is set. + +### Example + +Example: Return the three most expensive parts per customer: + +```js +/search/?yql=select * from sources * where true | + all(group(customer) each(max(3) each(output(summary())))) +``` + +Notes on ordering in the example above: + +- The `order` clause is a directive for *group* ordering, not *hit* ordering. Here, there is no order clause on the groups, so default ordering `-max(relevance())` is used. The *\-* denotes the sorting order, *\-* means descending (higher score first). In this case, the query is "all documents", so all groups are equally relevant and the group order is random. +- To order hits inside groups, use ranking. Add `ranking=pricerank` to the query to use the pricerank [rank profile](../basics/ranking) to rank by price: + + ```sd + rank-profile pricerank inherits default { + first-phase { + expression: attribute(price) + } + } + ``` + + +## Filter within a group + +Use the `filter` clause to select which values to keep in a group. See the [reference](../reference/querying/grouping-language#filtering-groups) for details. + +### Examples + +Example: Sum the price per customer of `Bonn.*` where price was over 1000. + +```js +/search/?yql=select * from sources * where true | + all(group(customer) filter(regex("Bonn.*", attributes{"sales_rep"}) and not range(0, 1000, price)) each(output(sum(price)) each(output(summary())))) +``` + +With many faceted search UIs, you often have a filtering problem: if the user clicks on a facet option, like a customer name, you want to still show all the customers, even though results are filtered by that customer. + +Still, if there's another option from another facet (e.g., item name), you want the other facet options, such as customer names, to only include results matching that item name. In other words, clicking on a facet option should apply that filter to everything else (other facet options and results list), but not to itself. + +Here's how users expect a UI where they selected both a customer and an item: + + +![](/assets/img/ecommerce-facets.png) + + +To implement this, you need one grouping expression for each filter combination. In this case, one for the main results list, one for the customer facet, and one for the item facet. using grouping filters for this is **a lot slower** than running separate queries and filtering there. + +```js +/search/?yql=select * from sources * where true | + all( group(1) filter(regex('Brown',customer) and regex('Rocker arm',item)) each(output(count()) each(output(summary())) ) ) | + all( group(customer) filter(regex('Rocker arm',item)) each(output(count()) ) ) | + all( group(item) filter(regex('Brown',customer)) each(output(count()) ) ) +``` + +Use `istrue` to filter using a boolean attribute field directly. For instance, if you want to output how many purchases have been paid by each customer, you could express that as: + +```js +/search/?yql=select * from sources * where true | + all(group(customer) filter(istrue(is_paid)) each(output(count()))) +``` + +To find out how many purchases have not been paid yet, prepend `istrue` with `not` like this: `not istrue(is_paid)`. + +## Global limit for grouping queries + +Use the [grouping.globalMaxGroups](../reference/api/query#grouping.globalmaxgroups) query parameter to restrict execution of queries that are potentially too expensive in terms of compute and bandwidth. Queries that may return a result exceeding this threshold are failed preemptively. This limit is compared against the total number of groups and hits that query could return at worst-case. + +### Examples + +The following query may return 5 groups and 0 hits. It will be rejected when `grouping.globalMaxGroups < 5` + +```js +select (…) | all(group(item) max(5) each(output(count()))) +``` + +The following query may return 5 groups and 35 hits. It will be rejected when `grouping.globalMaxGroups < 5+5*7`. + +```js +select (…) | all( + group(customer) max(5) + each( + output(count()) max(7) + each(output(summary())) + ) + ) +``` + +The following query may return 6 groups and 30 hits. It will be rejected when `grouping.globalMaxGroups < 2*(3+3*5)`. + +```js +select (…) | +all( + all(group(item) max(3) + each(output(count()) max(5) + each(output(summary())))) + all(group(customer) max(3) + each(output(count()) max(5) + each(output(summary()))))) +``` + +### Combining with default limits for groups/hits + +The `grouping.globalMaxGroups` restriction will utilize the [grouping.defaultMaxGroups](../reference/api/query#grouping.defaultmaxgroups)/ [grouping.defaultMaxHits](../reference/api/query#grouping.defaultmaxhits) values for grouping statements without a `max`. The two queries below are identical, assuming `defaultMaxGroups=5` and `defaultMaxHits=7`, and both will be rejected when `globalMaxGroups < 5+5*7`. + +```js +select (…) | +all( + group(customer) max(5) + each( + output(count()) max(7) + each(output(summary())) + ) +) +text +select (…) | +all( + group(customer) + each( + output(count()) + each(output(summary())) + ) +) +``` + +A grouping without `max` combined with `defaultMaxGroups=-1`/`defaultMaxHits=-1` will be rejected unless `globalMaxGroups=-1`. This is because the query produces an unbounded result, an infinite number of groups if `defaultMaxGroups=-1` or an infinite number of summaries if `defaultMaxHits=-1`. An unintentional DoS (Denial of Service) could be the utter consequence if a query returns thousands of groups and summaries. This is why setting `globalMaxGroups=-1` is risky. + +### Recommended settings + +The best practice is to always specify `max` in groupings, making it easy to reason about the worst-case cardinality of the query results. The performance will also benefit. Set `globalMaxGroups` to the overall worst-case result cardinality with some margin. The `defaultMaxGroups`/`defaultMaxHits` should be overridden in a query profile if some groupings do not use `max` and the default values are too low. + +```xml + + 20 + 100 + 8000 + +``` + +## Performance and Correctness + +Grouping is, by default, tuned to favor performance over correctness. Perfect correctness may not be achievable; result of queries using [non-default ordering](#ordering-and-limiting-groups) can be approximate, and correctness can only be partially achieved by a larger `precision` value that sacrifices performance. + +The [grouping session cache](../reference/querying/grouping-language#grouping-session-cache) is enabled by default. Disabling it will improve correctness, especially for queries using `order` and `max`. The cost of multi-level grouping expressions will increase, though. + +Consider increasing the [precision](#ordering-and-limiting-groups) value when using `max` in combination with `order`. The default precision may not achieve the required correctness for your use case. + +## Nested Groups + +Groups can be nested. This offers great drilling capabilities, as there are no limits to nesting depth or presented information on any level. Example: How much each customer has spent per day by grouping on customer, then date: + +```js +select (…) | all(group(customer) each(group(time.date(date)) each(output(sum(price))))) +``` + +Use this to query for all items on a per-customer basis, displaying the most expensive hit for each customer, with subgroups of purchases on a per-date basis. Use the [summary](#hits-per-group) clause to show hits inside any group at any nesting level. Include the sum price for each customer, both as a grand total and broken down on a per-day basis: + +```js +/search/?yql=select * from sources * where true limit 0| + all(group(customer) + each(max(1) output(sum(price)) each(output(summary()))) + each(group(time.date(date)) + each(max(10) output(sum(price)) each(output(summary()))))) + &ranking=pricerank +``` + +| GroupId | sum(price) | | | | | | +|:---|:---|:---|:---|:---|:---|:---| +| Brown | $20 537 | | | | | | +| | Date | Price | Tax | Item | Customer | | +| | 2006-09-08 11:00 | $8 000 | 0.12 | Head | Brown | | +| | GroupId | Sum(price) | | | | | +| | 2006-09-08 | $8 000 | | | | | +| | | Date | Price | Tax | Item | Customer | +| | | 2006-09-08 11:00 | $8 000 | 0.12 | Head | Brown | +| | 2006-09-09 | $3 400 | | | | | +| | | Date | Price | Tax | Item | Customer | +| | | 2006-09-09 11:00 | $3 400 | 0.12 | Oil pan | Brown | +| | 2006-09-10 | $7 540 | | | | | +| | | Date | Price | Tax | Item | Customer | +| | | 2006-09-10 10:00 | $3 770 | 0.12 | Spring | Brown | +| | | 2006-09-10 12:00 | $2 330 | 0.24 | Rocker arm | Brown | +| | | 2006-09-10 11:00 | $1 440 | 0.12 | Exhaust valve | Brown | +| | 2006-09-11 | $1 597 | | | | | +| | | Date | Price | Tax | Item | Customer | +| | | 2006-09-11 10:00 | $1 597 | 0.24 | Piston | Brown | +| Jones | $39 816 | | | | | | +| | Date | Price | Tax | Item | Customer | | +| | 2006-09-11 12:00 | $9 870 | 0.12 | Exhaust port | Jones | | +| | GroupId | Sum(price) | | | | | +| | 2006-09-08 | $8 000 | | | | | +| | | Date | Price | Tax | Item | Customer | +| | | 2006-09-08 10:00 | $5 000 | 0.24 | Intake port | Jones | +| | | 2006-09-08 12:00 | $3 000 | 0.12 | Valve cover | Jones | +| | 2006-09-09 | $2 100 | | | | | +| | | Date | Price | Tax | Item | Customer | +| | | 2006-09-09 10:00 | $2 100 | 0,12 | Engine block | Jones | +| | 2006-09-10 | $8 900 | | | | | +| | | Date | Price | Tax | Item | Customer | +| | | 2006-09-10 10:00 | $8 900 | 0.24 | Camshaft | Jones | +| | 2006-09-11 | $20 816 | | | | | +| | | Date | Price | Tax | Item | Customer | +| | | 2006-09-11 12:00 | $9 870 | 0.12 | Exhaust port | Jones | +| | | 2006-09-11 13:00 | $6 765 | 0.12 | Crankshaft | Jones | +| | | 2006-09-11 12:00 | $4 181 | 0.24 | Rod bearing | Jones | +| Smith | $19 484 | | | | | | +| | Date | Price | Tax | Item | Customer | | +| | 2006-09-10 11:00 | $6 100 | 0.24 | Spark plug | Smith | | +| | GroupId | Sum(price) | | | | | +| | 2006-09-06 | $1 000 | | | | | +| | | Date | Price | Tax | Item | Customer | +| | | 2006-09-06 09:00 | $1 000 | 0.24 | Intake valve | Smith | +| | 2006-09-07 | $3 000 | | | | | +| | | Date | Price | Tax | Item | Customer | +| | | 2006-09-07 11:00 | $2 000 | 0.24 | Spring | Smith | +| | | 2006-09-07 10:00 | $1 000 | 0.12 | Rocker arm | Smith | +| | 2006-09-09 | $6 800 | | | | | +| | | Date | Price | Tax | Item | Customer | +| | | 2006-09-09 12:00 | $5 500 | 0.12 | Oil sump | Smith | +| | | 2006-09-09 12:00 | $1 300 | 0.24 | Coolant | Smith | +| | 2006-09-10 | $6 100 | | | | | +| | | Date | Price | Tax | Item | Customer | +| | | 2006-09-10 11:00 | $6 100 | 0.24 | Spark plug | Smith | +| | 2006-09-11 | $2 584 | | | | | +| | | Date | Price | Tax | Item | Customer | +| | | 2006-09-11 11:00 | $2 584 | 0.12 | Connection rod | Smith | + +## Structured grouping + +Structured grouping is nested grouping over an array of structs or maps. In this case, each array element is treated as a sub-document and may be grouped separately. See the reference for grouping on [multivalue attributes](../reference/querying/grouping-language#multivalue-attributes) for details. It is also possible to [filter the groups](../reference/querying/grouping-language#filtering-groups) so only matching elements are considered. An example could be: + +```js +select (…) | all(group(attributes.value) filter(regex("delivery_method",attributes.key)) each(output(sum(price)) each(output(summary())))) +``` + +## Range grouping + +In the examples above, results are grouped on distinct values, like customer or date. To group on price: + +```js +select (…) | all(group(price) each(each(output(summary())))) +``` + +This gives one group per price. To group on price *ranges*, one could compress the price range. This gives prices in `$0` - `$999` in bucket 0, `$1 000` - `$2 000` in bucket 1 and so on: + +```js +select (…) | all(group(price/1000) each(each(output(summary())))) +``` + +An alternative is using [bucket expressions](../reference/querying/grouping-language#bucket-expressions) - think of a bucket as the range per group. Group on price, make groups have a width of 1000: + +```js +select (…) | all(group(fixedwidth(price,1000)) each(each(output(summary())))) +``` + +Use `predefined` to configure group sizes individually (the two below are equivalent): + +```js +select (…) | +all( + group(predefined(price, + bucket(0,1000), + bucket(1000,2000), + bucket(2000,5000), + bucket(5000,inf))) + each(each(output(summary()))) +) +``` + +This works with strings as well - put Jones and Smith in the second group: + +```js +select (…) | all(group(predefined(customer, bucket(-inf,"Jones"), bucket("Jones", inf))) each(each(output(summary())))) +``` + +... or have Jones in his own group: + +```js +select (…) | all(group(predefined(customer, bucket<-inf,"Jones">, bucket["Jones"], bucket<"Jones", inf>)) each(each(output(summary())))) +``` + +Use decimal numbers in bucket definitions if the expression evaluates to a double or float: + +```js +select (…) +| all( + group(predefined(tax, + bucket(0.0, 0.2), + bucket(0.2, 0.5), + bucket(0.5, inf))) + each( + each(output(summary())) + ) +) +``` + +## Pagination + +Grouping supports [continuation](../reference/querying/grouping-language#continuations) objects that are passed as annotations to the grouping statement. The `continuations` annotation is a list of zero or more continuation strings, returned in the grouping result. For example, given the result: + +```json expandable +{ + "root": { + "children": [ + { + "children": [ + { + "children": [ + { + "fields": { + "count()": 7 + }, + "value": "Jones", + "id": "group:string:Jones", + "relevance": 1.0 + } + ], + "continuation": { + "next": "BGAAABEBEBC", + "prev": "BGAAABEABC" + }, + "id": "grouplist:customer", + "label": "customer", + "relevance": 1.0 + } + ], + "continuation": { + "this": "BGAAABEBCA" + }, + "id": "group:root:0", + "relevance": 1.0 + } + ], + "fields": { + "totalCount": 20 + }, + "id": "toplevel", + "relevance": 1.0 + } +} +``` + +reproduce the same result by passing the *this*\-continuation along the original select: + +```js +select (…) | { 'continuations':['BGAAABEBCA'] }all(…) +``` + +To display the next page of customers, pass the *this*\-continuation of the root group, and the *next* continuation of the customer list: + +```js +select (…) | { 'continuations':['BGAAABEBCA', 'BGAAABEBEBC'] }all(…) +``` + +To display the previous page of customers, pass the *this*\-continuation of the root group, and the *prev* continuation of the customer list: + +```js +select (…) | { 'continuations':['BGAAABEBCA', 'BGAAABEABC'] }all(…) +``` + +The `continuations` annotation is an ordered list of continuation strings. These are combined by replacement so that a continuation given later will replace any shared state with a continuation given before. Also, when using the `continuations` annotation, always pass the *this*\-continuation as its first element. + + +**Note:** + +Continuations work best when the ordering of hits is stable - which can be achieved by using [ranking](../basics/ranking) or [ordering](../reference/querying/grouping-language#order). Adding a tie-breaker might be needed - like [random.match](../reference/ranking/rank-features#random) or a random double value stored in each document - to keep the ordering stable in case of multiple documents that would otherwise get the same rank score or the same value used for ordering. + + +## Expressions + +Instead of just grouping on some attribute value, the `group` clause may contain arbitrarily complex expressions - see `group` in the [grouping reference](../reference/querying/grouping-language) for an exhaustive list. Examples: + +- Select everything. For example, `group("all") each(output(sum(price)))` gives total revenue +- Select the minimum or maximum of sub-expressions +- Addition, subtraction, multiplication, division, and even modulo of sub-expressions +- Bitwise operations on sub-expressions +- Concatenation of the results of sub-expressions + +Sum the prices of purchases on a per-hour-of-day basis: + +```js +select (…) | all(group(mod(div(date,mul(60,60)),24)) each(output(sum(price)))) +``` + +These types of expressions may also be used inside `output` operations, so instead of simply calculating the sum price of the grouped purchases, calculate the sum income after taxes per customer: + +```js +select (…) | all(group(customer) each(output(sum(mul(price,sub(1,tax)))))) +``` + +Note that the validity of an expression depends on the current nesting level. For, while `sum(price)` would be a valid expression for a group of hits, `price` would not. As a general rule, each operator within an expression either applies to a single hit or aggregates values across a group. + +## Search Container API + +As an alternative to a textual representation, one can use the programmatic API to execute grouping requests. This allows multiple grouping requests to run in parallel, and does not collide with the `yql` parameter - example: + +```js expandable +@Override +public Result search(Query query, Execution execution) { + // Create grouping request. + GroupingRequest request = GroupingRequest.newInstance(query); + request.setRootOperation(new AllOperation() + .setGroupBy(new AttributeValue("foo")) + .addChild(new EachOperation() + .addOutput(new CountAggregator().setLabel("count")))); + + // Perform grouping request. + Result result = execution.search(query); + + // Process grouping result. + Group root = request.getResultGroup(result); + GroupList foo = root.getGroupList("foo"); + for (Hit hit : foo) { + Group group = (Group)hit; + Long count = (Long)group.getField("count"); + // TODO: Process group and count. + } + + // Pass results back to calling searcher. + return result; +} +``` + +Refer to the [API documentation](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/search/grouping/package-summary) for the complete reference. + +## TopN / Full corpus + +Simple grouping: count the number of documents in each group: + +```js +select * from purchase where true | all( group(customer) each(output(count())) ) +``` + +Two parallel groupings: + +```js +select * from purchase where true | all( + all( + group(customer) each(output(count())) + ) + all( + group(item) each(output(count())) + ) +) +``` + +Only the 1000 best hits will be grouped at each content node. Lower accuracy, but higher speed: + +```js +select * from purchase where true limit 0 | + all( + max(1000) + all( + group(customer) + each(output(count())) + ) + ) +``` + +## Selecting groups + +Do a modulo 3 operation before selecting the group: + +```js +select * from purchase where true limit 0 | + all( + group(price % 3) + each(output(count())) + ) +``` + +Do `price + tax * price` before selecting the group: + +```js +select * from purchase where true limit 0 | all( + group(price + tax * price) + each(output(count())) +) +``` + +## Ordering groups + +Do a modulo 5 operation before selecting the group - the groups are then ordered by their aggregated sum of attribute "tax": + +```js +select * from purchase where true limit 0 | all( + group(price % 5) + order(sum(tax)) + each(output(count())) +) +``` + +Do `price + tax * price` before selecting the group. Ordering is given by the maximum value of attribute "price" in each group: + +```js +select * from purchase where true limit 0 | + all( + group(price + tax * price) + order(max(price)) + each(output(count())) + ) +``` + +Take the average relevance of the groups and multiply it with the number of groups to get a cumulative count: + +```js +select * from purchase where true limit 0 | + all( + group(customer) + order(avg(relevance()) * count()) + each(output(count())) +) +``` + +One can not directly reference an attribute in the order clause, as this: + +```js +select * from purchase where true limit 0 | + all( + group(customer) + order(price * count()) + each(output(count())) +) +``` + +However, one can do this: + +```js +select * from purchase where true limit 0 | all( + group(customer) + order(max(price) * count()) + each(output(count())) +) +``` + +Ordering alphabetically works in a similar way: + +```js +select * from purchase where true limit 0 | all( + group(customer) + order(max(customer)) + each(output(count())) +) +``` + + +**Note:** + +You can control non-ASCII character folding behavior with [unicode collation](../reference/querying/grouping-language#other-expressions). + + +## Collecting aggregates + +Simple grouping to count the number of documents in each group and return the best hit in each group: + +```js +select * from purchase where true limit 0 | + all( + group(customer) + each( + max(1) + each(output(summary())) + ) +) +``` + +Also return the sum of attribute "price": + +```js +select * from purchase where true limit 0 | + all( group(customer) each(max(1) output(count(), sum(price)) each(output(summary()))) ) +``` + +Also, return an XOR of the 64 most significant bits of an MD5 over the concatenation of attributes "customer", "price" and "tax": + +```js +select * from purchase where true limit 0 | + all(group(customer) each(max(1) output(count(), sum(price), xor(md5(cat(customer, price, tax), 64))) each(output(summary())))) +``` + +It is also possible to return quantiles, for instance, the p50 and p90 of the price. + +```js +select * from purchase where true limit 0 | + all(group(customer) each(output(quantiles([0.5,0.9], price)))) +``` + +## Grouping + +Single-level grouping on "customer" attribute, returning at most 5 groups with full hit count as well as the 69 best hits. + +```js +select * from purchase where true limit 0 | all(group(customer) max(5) each(max(69) output(count()) each(output(summary())))) +``` + +Two level grouping on "customer" and "item" attribute: + +```js +select * from purchase where true limit 0 | all(group(customer) max(5) each(output(count()) all(group(item) max(5) each(max(69) output(count()) each(output(summary())))))) +``` + +Three-level grouping on "customer", "item" and "attributes.key(coupon)" attribute: + +```js +select * from purchase where true limit 0 | all(group(customer) max(1) each(output(count()) all(group(item) max(1) each(output(count()) max(1) all(group(attributes.key) max(1) each(output(count()) each(output(summary())))))))) +``` + +As above, but also collect best hit in level 2: + +```js +select * from purchase where true limit 0 | all(group(customer) max(5) each(output(count()) all(group(item) max(5) each(output(count()) all(max(1) each(output(summary()))) all(group(attributes.key) max(5) each(max(69) output(count()) each(output(summary())))))))) +``` + +As above, but also collect best hit in level 1: + +```js +select * from purchase where true limit 0 | all(group(customer) max(5) each(output(count()) all(max(1) each(output(summary()))) all(group(item) max(5) each(output(count()) all(max(1) each(output(summary()))) all(group(attributes.key) max(5) each(max(69) output(count()) each(output(summary())))))))) +``` + +As above, but using different document summaries on each level: + +```js +select * from purchase where true limit 0 | + all( group(customer) max(5) each(output(count()) + all(max(1) each(output(summary(complexsummary)))) + all(group(item) max(5) each(output(count()) + all(max(1) each(output(summary(simplesummary)))) + all(group(price) max(5) each(max(69) output(count()) + each(output(summary(fastsummary)))))) ) +``` + +Deep grouping with counting and hit collection on all levels: + +```js +select * from purchase where true limit 0 | + all( group(customer) max(5) each(output(count()) + all(max(1) each(output(summary()))) + all(group(item) each(output(count()) + all(max(1) each(output(summary()))) + all(group(price) each(output(count()) + all(max(1) each(output(summary())))))))) ) +``` + +## Time and date + +The field (`time` below, but can have any name) must be a [long](../reference/schemas/schemas#long), with second resolution (unix timestamp/epoch). See the [reference](../reference/querying/grouping-language#time-expressions) for all time-functions. + +Group by year: + +```js +select * from purchase where true limit 0 | all(group(time.year(date)) each(output(count()))) +``` + +Group by year, then by month: + +```js +select * from purchase where true limit 0 | + all( group(time.year(date)) each(output(count()) + all(group(time.monthofyear(date)) each(output(count())))) ) +``` + +Groups *today*, *yesterday*, *lastweek*, and *lastmonth* using `predefined` aggregator, and groups each day within each of these separately: + +```js +select * from purchase where true limit 0 | + all( + group( + predefined((now() - date) / (60 * 60 * 24), + bucket(0,1), + bucket(1,2), + bucket(3,7), + bucket(8,31)) + ) + each(output(count()) + all(max(2) each(output(summary()))) + all(group((now() - date) / (60 * 60 * 24)) + each(output(count()) + all(max(2) each(output(summary()))) + ) + ) + ) + ) +``` + +### Timezones in grouping + +The `timezone` query parameter can be used to rewrite each time-function with a timezone offset. See the [reference](../reference/api/query#timezone). Example: + +```bash +$ vespa query "select * from purchase where true | \ + all( group(time.hourofday(date)) each(output(count()))" \ + "timezone=America/Los_Angeles" +``` + +This query selects all documents from `purchase`, groups them by the hour they were made (adjusted to the local time in `America/Los_Angeles`), and counts how many purchases fall into each hour. + +## Geo distance + +Use `geo_distance` to compute the great-circle distance from a [position](../reference/schemas/schemas#position) field to a given point. Append `.km` or `.miles` to select the output unit. The function works on both `position` and `array` fields. For arrays, the minimum distance is returned. See the [reference](../reference/querying/grouping-language#geo_distance). + +Group into 100 km distance buckets from a point: + +```js +select * from purchase where true limit 0 | + all( group(fixedwidth(geo_distance(attribute(location), 63.4, 10.4).km, 100.0)) each(output(count())) ) +``` + +Output the minimum and maximum distance per customer: + +```js +select * from purchase where true limit 0 | + all( group(customer) each(output(min(geo_distance(attribute(location), 63.4, 10.4).km), + max(geo_distance(attribute(location), 63.4, 10.4).km))) ) +``` + +## Counting unique groups + +The `count` aggregator can be applied on a list of groups to determine the number of unique groups without having to explicitly retrieve all groups. Note that this count is an estimate using HyperLogLog++ which is an algorithm for the count-distinct problem. To get an accurate count, one needs to explicitly retrieve all groups and count them in a custom component or in the middle tier calling out to Vespa. This is network intensive and might not be feasible in cases with many unique groups. + +Another use case for this aggregator is counting the number of unique instances matching a given expression. + +Output an estimate of the number of groups, which is equivalent to the number of unique values for attribute "customer": + +```js +select * from purchase where true limit 0 | all( group(customer) each(output(count())) ) +``` + +Output an estimate of the number of unique string lengths for the attribute "item": + +```js +select * from purchase where true limit 0 | all(group(strlen(item)) each(output(count()))) +``` + +Output the sum of the "price" attribute for each group in addition to the accurate count of the overall number of unique groups as the inner each causes all groups to be returned. + +```js +select * from purchase where true limit 0 | all(group(customer) output(count()) each(output(sum(price)))) +``` + +The `max` clause is used to restrict the number of groups returned. The query outputs the sum for the 3 best groups. The `count` clause outputs the estimated number of groups (potentially >3). The `count` becomes an estimate here as the number of groups is limited by max, while in the above example, it's not limited by max: + +```js +select * from purchase where true limit 0 | all(group(customer) max(3) output(count()) each(output(sum(price)))) +``` + +Output the number of top-level groups, and for the 10 best groups, output the number of unique values for attribute "item": + +```js +select * from purchase where true limit 0 | all(group(customer) max(10) output(count()) each(group(item) output(count()))) +``` + +## Counting unique groups - multivalue fields + +A [multivalue](/en/querying/searching-multivalue-fields) attribute is a [weighted set](../reference/schemas/schemas#weightedset), [array](../reference/schemas/schemas#array) or [map](../reference/schemas/schemas#map). Most grouping functions will just handle the elements of multivalued attributes separately, as if they were all individual values in separate documents. If you are grouping over array of struct or maps, scoping will be used to preserve structure. Each entry in the array/map will be treated as a separate sub-document, so documents can be counted twice or more - see [#33646](https://github.com/vespa-engine/vespa/issues/33646) for details. + +This could be solved by adding an additional level of grouping, where you group on a field that is unique for each document (grouping on document ID is not supported). You may then count the unique groups to determine the unique document count: + +```js +select * from purchase where true limit 0 | all(group(customer) each(group(item) output(count()))) +``` + +## Impression forecasting + +Using impression logs for a given user, one can make a function that maps from rank score to the number of impressions an advertisement would get - example: + +```js +Score Integer (# impressions for this user) +0.200 0 +0.210 1 +0.220 2 +0.240 3 +0.320 4 +0.420 5 +0.560 6 +0.700 7 +0.800 8 +0.880 9 +0.920 10 +0.940 11 +0.950 12 +``` + +Storing just the first column (the rank scores, including a rank score for 0 impressions) in an array attribute named *impressions*, the grouping operation [interpolatedlookup(impressions, relevance())](../reference/querying/grouping-language#interpolatedlookup) can be used to figure out how many times a given advertisement would have been shown to this particular user. + +So if the rank score is 0.420 for a specific user/ad/bid combination, then `interpolatedlookup(impressions, relevance())` would return 5.0. If the bid is increased so the rank score gets to 0.490, it would get 5.5 as the return value instead. + +In this context, a count of 5.5 isn't meaningful for the past of a single user, but it gives more information that may be used as a forecast. Summing this across more, different users may then be used to forecast the total of future impressions for the advertisement. + +## Aggregating over all documents + +Grouping is useful for analyzing data. To aggregate over the full document set, create *one* group (which will have *all* documents) by using a constant (here 1) - example: + +```js +select rating from restaurant where true | all(group(1) each(output(avg(price)))) +``` + +Make sure all documents have a value for the given field, if not, NaN is used, and the final result is also NaN: + +```json +{ + "id": "group:long:1", + "relevance": 0.0, + "value": "1", + "fields": { + "avg(rating)": "NaN" + } +} +``` + +## Count fields with NaN + +Count number of documents missing a value for an [attribute](../content/attributes) field (actually, in this example, unset or less than 0, see the bucket expression below). Set a higher query timeout, just in case. Example, analyzing a field called *price*: + +```js +select rating from restaurant where true | all( group(predefined(price, bucket[-inf, 0>, bucket[0, inf>)) each(output(count())) ) +``` + +Example output, counting 2 documents with `-inf` in *rating*: + +```json expandable +"children": [ + { + "id": "group:long_bucket:-9223372036854775808:0", + "relevance": 0.0, + "limits": { + "from": "-9223372036854775808", + "to": "0" + }, + "fields": { + "count()": 2 + } + }, + { + "id": "group:long_bucket:0:9223372036854775807", + "relevance": 0.0, + "limits": { + "from": "0", + "to": "9223372036854775807" + }, + "fields": { + "count()": 8 + } + } +] +``` + +See [analyzing field values](../writing/visiting#analyzing-field-values) for how to export ids of documents meeting given criteria from the full corpus. + +## List fields with NaN + +This is similar to the counting of NaN above, but instead of aggregating the count, for each hit, print a [document summary](../reference/schemas/schemas#document-summary): + +```js +select rating from restaurant where true | + all( group(predefined(price, bucket[-inf, 0>, bucket[0, inf>)) + order(max(price)) + max(1) + each( max(100) each(output(summary()))) ) +``` + +Notes: + +- We are only interested in the first group, so order by `max(price)` and use `max(1)` to get only the first +- Uses `max(100)` in order to limit result set sizes. Read more about [grouping.defaultmaxhits](../reference/api/query#grouping.defaultmaxhits). +- Use the [continuation token](#pagination) to iterate over the result set. + +## Grouping over a Map field + +In the example data, a record looks like: + +```json +{ + "fields": { + "attributes": { + "delivery_method": "Curbside Pickup", + "sales_rep": "Bonnie", + "coupon": "SAVE10" + }, + "customer": "Smith", + "date": 1157526000, + "item": "Intake valve", + "price": "1000", + "tax": "0.24" + } +} +``` + +The map field [schema definition](../reference/schemas/schemas#map) is: + +```js +field attributes type map { + indexing: summary + struct-field key { indexing: attribute } + struct-field value { indexing: attribute } +} +``` + +With this, one can group on both key (`delivery_method`, `sales_rep`, and `coupon`) and values (here counting each value). Try the link to see the output: + +```js +select * from purchase where true limit 0 | + all( + group(attributes.key) + each( group(attributes.value) each(output(count()))) + ) +``` + +A more interesting example is to see the sum per sales rep: + +```js +select * from purchase where true limit 0 | + all( + group(attributes.key) + each( group(attributes.value) each(output(sum(price)))) + ) +``` diff --git a/mintlify-docs/en/querying/nearest-neighbor-search-guide.mdx b/mintlify-docs/en/querying/nearest-neighbor-search-guide.mdx new file mode 100644 index 0000000000..d0ff249b8a --- /dev/null +++ b/mintlify-docs/en/querying/nearest-neighbor-search-guide.mdx @@ -0,0 +1,1825 @@ +--- +title: "Vespa nearest neighbor search - a practical guide" +sidebarTitle: "Nearest neighbor search guide" +--- + +This guide is a practical introduction to using Vespa nearest neighbor search query operator and how to combine nearest neighbor search with other Vespa query operators. The guide uses Vespa's [embedding](../rag/embedding) support to map text to vectors. The guide also covers diverse, efficient candidate retrievers which can be used as candidate retrievers in a [multiphase ranking](../ranking/phased-ranking) funnel. + +The guide uses the [Last.fm](http://millionsongdataset.com/lastfm/) tracks dataset for illustration. Latency numbers mentioned in the guide are obtained from running this guide on a M1. See also the generic [Vespa performance - a practical guide](../performance/practical-search-performance-guide). + +This guide covers the following: + +- [Free text search using Vespa weakAnd](#free-text-search-using-vespa-weakand) +- [Sparse maximum inner product search using Vespa wand](#maximum-inner-product-search-using-vespa-wand) +- [Exact nearest neighbor search](#exact-nearest-neighbor-search) +- [Approximate nearest neighbor search](#approximate-nearest-neighbor-search) +- [Combining approximate nearest neighbor search with filters](#combining-approximate-nearest-neighbor-search-with-query-filters) +- [Strict filters and distant neighbors - distanceThresholding](#strict-filters-and-distant-neighbors) +- [Hybrid sparse and dense retrieval methods with Vespa](#hybrid-sparse-and-dense-retrieval-methods-with-vespa) +- [Using multiple nearest neighbor search operators in the same query](#multiple-nearest-neighbor-search-operators-in-the-same-query) +- [Controlling filter behavior](#controlling-filter-behavior) + +The guide includes step-by-step instructions on how to reproduce the experiments. + +This guide expects a Vespa deployment with at least **4 GB** memory. + + +**Prerequisites:** + +- Linux, macOS or Windows 10 Pro on x86\_64 or arm64, with [Podman Desktop](https://podman.io/) or [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed, with an engine running. + - Alternatively, start the Podman daemon: + ``` + $ podman machine init --memory 6000 + $ podman machine start + ``` + - See [Docker Containers](/en/operations/self-managed/docker-containers.html) for system limits and other settings. +- For CPUs older than Haswell (2013), see [CPU Support](/en/operations/self-managed/cpu-support). +- Memory: Minimum 4 GB RAM dedicated to Docker/Podman. [Memory recommendations](/en/operations/self-managed/node-setup#memory-settings). +- Disk: Avoid `NO_SPACE` - the vespaengine/vespa container image + headroom for data requires disk space. [Read more](/en/writing/feed-block.html). +- [Homebrew](https://brew.sh/) to install the [Vespa CLI](/en/clients/vespa-cli.html), or download the Vespa CLI from [Github releases](https://github.com/vespa-engine/vespa/releases). +- Python3 for converting the dataset to Vespa json. +- `curl` to download the dataset. + + +## Installing vespa-cli + +This tutorial uses [Vespa-CLI](../clients/vespa-cli), the official command-line client for Vespa.ai. It is a single binary without any runtime dependencies and is available for Linux, macOS and Windows: + +```bash +$ brew install vespa-cli +``` + +## Dataset + +This guide uses the [Last.fm](http://millionsongdataset.com/lastfm/) tracks dataset. Note that the dataset is released under the following terms: + +> *Research only, strictly non-commercial. For details, or if you are unsure, please contact Last.fm. Also, Last.fm has the right to advertise and refer to any work derived from the dataset.* + +To download the dataset execute the following (120 MB zip file): + +```bash +$ curl -L -o lastfm_test.zip \ + http://millionsongdataset.com/sites/default/files/lastfm/lastfm_test.zip +$ unzip lastfm_test.zip +``` + +The downloaded data must be be converted to [the Vespa JSON feed format](../reference/schemas/document-json-format). + +This [python](https://www.python.org/) script can be used to traverse the dataset files and create a JSONL formatted feed file with Vespa put operations. The [schema](/en/basics/schemas) is covered in the next section. The number of unique `tags` is used as a proxy for the popularity of the track. + +`Paste the above into file create-vespa-feed.py` + +```python expandable +import os +import sys +import json +import unicodedata + +directory = sys.argv[1] +seen_tracks = set() + +def remove_control_characters(s): + return "".join(ch for ch in s if unicodedata.category(ch)[0]!="C") + +def process_file(filename): + global seen_tracks + with open(filename) as fp: + doc = json.load(fp) + title = doc['title'] + artist = doc['artist'] + hash = title + artist + if hash in seen_tracks: + return + else: + seen_tracks.add(hash) + + track_id = doc['track_id'] + tags = doc['tags'] + tags_dict = dict() + for t in tags: + k,v = t[0],int(t[1]) + tags_dict[k] = v + n = len(tags_dict) + + vespa_doc = { + "put": "id:music:track::%s" % track_id, + "fields": { + "title": remove_control_characters(title), + "track_id": track_id, + "artist": remove_control_characters(artist), + "tags": tags_dict, + "popularity": n + } + } + print(json.dumps(vespa_doc)) + +sorted_files = [] +for root, dirs, files in os.walk(directory): + for filename in files: + filename = os.path.join(root, filename) + sorted_files.append(filename) +sorted_files.sort() +for filename in sorted_files: + process_file(filename) +python +import os +import sys +import json +import unicodedata + +directory = sys.argv[1] +seen_tracks = set() + +def remove_control_characters(s): + return "".join(ch for ch in s if unicodedata.category(ch)[0]!="C") + +def process_file(filename): + global seen_tracks + with open(filename) as fp: + doc = json.load(fp) + title = doc['title'] + artist = doc['artist'] + hash = title + artist + if hash in seen_tracks: + return + else: + seen_tracks.add(hash) + + track_id = doc['track_id'] + tags = doc['tags'] + tags_dict = dict() + for t in tags: + k,v = t[0],int(t[1]) + tags_dict[k] = v + n = len(tags_dict) + + vespa_doc = { + "put": "id:music:track::%s" % track_id, + "fields": { + "title": remove_control_characters(title), + "track_id": track_id, + "artist": remove_control_characters(artist), + "tags": tags_dict, + "popularity": n + } + } + print(json.dumps(vespa_doc)) + +sorted_files = [] +for root, dirs, files in os.walk(directory): + for filename in files: + filename = os.path.join(root, filename) + sorted_files.append(filename) +sorted_files.sort() +for filename in sorted_files: + process_file(filename) +``` + +Process the dataset and convert it to [Vespa JSON document operation](../reference/schemas/document-json-format) format. + + +```bash +$ python3 create-vespa-feed.py lastfm_test > feed.jsonl +``` + +## Create a Vespa Application Package + +A [Vespa application package](../basics/applications) is the set of configuration files and Java plugins that together define the behavior of a Vespa system: what functionality to use, the available document types, how ranking will be done, and how data will be processed during feeding and indexing. + +The minimum required files to create the basic search application are `track.sd` and `services.xml`. Create directories for the configuration files and embedding model: + +```bash +$ mkdir -p app/schemas; mkdir -p app/search/query-profiles/; mkdir -p app/model +``` + +### Schema + +A [schema](/en/basics/schemas) is a configuration of a document type and additional synthetic fields and [ranking](../basics/ranking) configuration. + +For this application, we define a `track` document type. + +Write the following to `app/schemas/track.sd`: + +`Paste the above into file app/schemas/track.sd` + +```js expandable +schema track { + + document track { + + field track_id type string { + indexing: summary | attribute + match: word + } + + field title type string { + indexing: summary | index + index: enable-bm25 + } + + field artist type string { + indexing: summary | index + } + + field tags type weightedset { + indexing: summary | attribute + attribute: fast-search + } + + field popularity type int { + indexing: summary | attribute + attribute: fast-search + rank: filter + } + } + + field embedding type tensor(x[384]) { + indexing: input title | embed e5 |attribute | index + attribute { + distance-metric: angular + } + index { + hnsw { + max-links-per-node: 32 + neighbors-to-explore-at-insert: 200 + multi-threaded-indexing: false + } + } + } + + fieldset default { + fields: title, artist + } + + document-summary track_id { + summary track_id { } + } + + rank-profile tags { + first-phase { + expression: rawScore(tags) + } + } + + rank-profile bm25 { + first-phase { + expression: bm25(title) + } + } + + rank-profile closeness { + num-threads-per-search: 1 + match-features: distance(field, embedding) + + inputs { + query(q) tensor(x[384]) + query(q1) tensor(x[384]) + } + + first-phase { + expression: closeness(field, embedding) + } + } + + rank-profile closeness-t4 inherits closeness { + num-threads-per-search: 4 + } + + rank-profile closeness-label inherits closeness { + match-features: closeness(label, q) closeness(label, q1) + } + + rank-profile hybrid inherits closeness { + inputs { + query(wTags) : 1.0 + query(wPopularity) : 1.0 + query(wTitle) : 1.0 + query(wVector) : 1.0 + } + first-phase { + expression { + query(wTags) * rawScore(tags) + + query(wPopularity) * log(attribute(popularity)) + + query(wTitle) * log(bm25(title)) + + query(wVector) * closeness(field, embedding) + } + } + match-features { + rawScore(tags) + attribute(popularity) + bm25(title) + closeness(field, embedding) + distance(field, embedding) + } + } +} +sd +schema track { + + document track { + + field track_id type string { + indexing: summary | attribute + match: word + } + + field title type string { + indexing: summary | index + index: enable-bm25 + } + + field artist type string { + indexing: summary | index + } + + field tags type weightedset { + indexing: summary | attribute + attribute: fast-search + } + + field popularity type int { + indexing: summary | attribute + attribute: fast-search + rank: filter + } + } + + field embedding type tensor(x[384]) { + indexing: input title | embed e5 |attribute | index + attribute { + distance-metric: angular + } + index { + hnsw { + max-links-per-node: 32 + neighbors-to-explore-at-insert: 200 + } + } + } + + fieldset default { + fields: title, artist + } + + document-summary track_id { + summary track_id { } + } + + rank-profile tags { + first-phase { + expression: rawScore(tags) + } + } + + rank-profile bm25 { + first-phase { + expression: bm25(title) + } + } + + rank-profile closeness { + num-threads-per-search: 1 + match-features: distance(field, embedding) + + inputs { + query(q) tensor(x[384]) + query(q1) tensor(x[384]) + } + + first-phase { + expression: closeness(field, embedding) + } + } + + rank-profile closeness-t4 inherits closeness { + num-threads-per-search: 4 + } + + rank-profile closeness-label inherits closeness { + match-features: closeness(label, q) closeness(label, q1) + } + + rank-profile hybrid inherits closeness { + inputs { + query(wTags) : 1.0 + query(wPopularity) : 1.0 + query(wTitle) : 1.0 + query(wVector) : 1.0 + } + first-phase { + expression { + query(wTags) * rawScore(tags) + + query(wPopularity) * log(attribute(popularity)) + + query(wTitle) * log(bm25(title)) + + query(wVector) * closeness(field, embedding) + } + } + match-features { + rawScore(tags) + attribute(popularity) + bm25(title) + closeness(field, embedding) + distance(field, embedding) + } + } +} +``` + +This document schema is explained in the [practical search performance guide](../performance/practical-search-performance-guide), the addition is the `embedding` field which is defined as a synthetic field outside of the document. This field is populated by Vespa's [embedding](../rag/embedding) functionality. Using the [E5](https://huggingface.co/intfloat/e5-small-v2) text embedding model (described in this [blog post](https://blog.vespa.ai/enhancing-vespas-embedding-management-capabilities/)). + +Note that the `closeness` rank-profile defines two query input tensors using [inputs](../reference/schemas/schemas#inputs). + +```js +field embedding type tensor(x[384]) { + indexing: input title | embed e5 | attribute | index + attribute { + distance-metric: angular + } + index { + hnsw { + max-links-per-node: 32 + neighbors-to-explore-at-insert: 200 + } + } + } +``` + +See [Approximate Nearest Neighbor Search using HNSW Index](/en/querying/approximate-nn-hnsw) for an introduction to `HNSW` and the `HNSW` tuning parameters. + +### Services Specification + +The [services.xml](../reference/applications/services/services) defines the services that make up the Vespa application — which services to run and how many nodes per service. Write the following to `app/services.xml`: + +```js expandable + + + + + + + + + + + + + + + + + + + 4 + + + + + + 1 + + + + + + + + +``` + +`Paste the above into file app/services.xml` + +The default [query profile](/en/querying/query-profiles) can be used to override default query api settings for all queries. + +The following enables [presentation.timing](../reference/api/query#presentation.timing) and renders `weightedset` fields as a JSON maps. + +```xml + + true + true + +``` + +`Paste the above into file app/search/query-profiles/default.xml` + +The final step is to download embedding model files + +```bash +$ curl -L -o app/model/e5-small-v2-int8.onnx \ + https://github.com/vespa-engine/sample-apps/raw/master/examples/model-exporting/model/e5-small-v2-int8.onnx +$ curl -L -o app/model/tokenizer.json \ + https://github.com/vespa-engine/sample-apps/raw/master/examples/model-exporting/model/tokenizer.json +``` + +## Deploy the application package + +The application package can now be deployed to a running Vespa instance. See also the [Vespa quick start guide](../basics/deploy-an-application-local). + +Start the Vespa container image using Docker: + +```bash +$ docker run --detach --name vespa --hostname vespa-container \ + --publish 8080:8080 --publish 19071:19071 \ + vespaengine/vespa +``` + +Starting the container can take a short while. Before continuing, make sure that the configuration service is running by using `vespa status deploy`. + + +```bash +$ vespa config set target local +$ vespa status deploy --wait 300 +``` + +Once ready, deploy the application using `vespa deploy`: + +```bash +$ vespa deploy --wait 300 app +``` + +## Index the dataset + +Feed the dataset. During indexing, Vespa will invoke the embedding model (which is relatively computationally expensive), so feeding and indexing this dataset takes about 180 seconds on a M1 laptop (535 inserts/s). + + +```bash +$ vespa feed -t http://localhost:8080 feed.jsonl +``` + +## Free-text search using Vespa weakAnd + +The following sections uses the Vespa [query api](../reference/api/query) and formulate queries using Vespa [query language](/en/querying/query-language). The examples uses the [vespa-cli](../clients/vespa-cli) command which supports running queries. + +The CLI uses the [Vespa query api](/en/querying/query-api). Use `vespa query -v` to see the curl equivalent: + +```bash +$ vespa query -v 'yql=select ..' +``` + +The first example is searching and ranking using the `bm25` rank profile defined in the schema. It uses the [bm25](../ranking/bm25) rank feature as the `first-phase` relevance score: + + +```bash +$ vespa query \ + 'yql=select artist, title, track_id from track where userQuery()' \ + 'query=total eclipse of the heart' \ + 'hits=1' \ + 'type=all' \ + 'ranking=bm25' +``` + +This query combines YQL [userQuery()](../reference/querying/yql#userquery) with Vespa's [simple query language](../reference/querying/simple-query-language). The [query type](../reference/api/query#model.type) is using `all`, requiring that all the terms match. + +The above query example searches for *total AND eclipse AND of AND the AND heart* in the `default` fieldset, which in the schema includes the `title` and `artist` fields. + +The [result](/en/reference/querying/default-result-format) for the above query will look something like this: + +```json expandable +{ + "timing": { + "querytime": 0.007, + "summaryfetchtime": 0.002, + "searchtime": 0.01 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 1 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/f13697952a0d5eaeb2c43ffc", + "relevance": 22.590392521579684, + "source": "tracks", + "fields": { + "track_id": "TRKLIXH128F42766B6", + "title": "Total Eclipse Of The Heart", + "artist": "Bonnie Tyler" + } + } + ] + } +} +``` + +This query only matched one document because the query terms were `AND`ed. We can change matching to use `type=any` instead of the default `type=all`. See [supported query types](../reference/api/query#model.type). + +```bash +$ vespa query \ + 'yql=select artist, title, track_id from track where userQuery()' \ + 'query=total eclipse of the heart' \ + 'hits=1' \ + 'ranking=bm25' \ + 'type=any' +``` + +Now, the query matches 24,053 documents and is considerably slower than the previous query. Comparing `querytime` of these two query examples, the one which matches the most documents have highest `querytime`. In worst case, the search query matches all documents. + +Query matching performance is greatly impacted by the number of documents that matches the query specification. Type `any` queries requires more compute resources than type `all`. + +There is an optimization available for `type=any` queries, using the `weakAnd` query operator which implements the WAND algorithm. See the [using wand with Vespa](../ranking/wand) guide for more details. + +Run the same query, but instead of `type=any` use `type=weakAnd`, see [supported query types](../reference/api/query#model.type): + + +```bash +$ vespa query \ + 'yql=select artist, title, track_id from track where userQuery()' \ + 'query=total eclipse of the heart' \ + 'hits=1' \ + 'ranking=bm25' \ + 'type=weakAnd' +``` + +Compared to the type `any` query which fully ranked 24,053 documents, the query only expose about 3,600 documents to the `first-phase` ranking expression. Also notice that the faster search returns the same document at the first position. + +```bash +$ vespa query \ + 'yql=select artist, title, track_id from track where userQuery()' \ + 'query="total eclipse of the heart"' \ + 'hits=1' \ + 'ranking=bm25' \ + 'type=weakAnd' +``` + +In this case, the query input *"total eclipse of the heart"* is parsed as a phrase query, and the search only finds 1 document matching the exact phrase. + +## Maximum Inner Product Search using Vespa WAND + +The previous section introduced the `weakAnd` query operator which integrates with [linguistic processing](../linguistics/linguistics) and string matching using `match: text`. + +The following examples uses the [wand()](../reference/querying/yql#wand) query operator. The `wand` query operator calculates the maximum inner product between the sparse query and document feature integer weights. The inner product ranking score calculated by the `wand` query operator can be used in a ranking expression by the [rawScore(name)](../reference/ranking/rank-features#match-operator-scores) rank feature. + +```js +rank-profile tags { + first-phase { + expression: rawScore(tags) + } +} +``` + +This query searches the track document type using a learned sparse *userProfile* representation, performing a maximum inner product search over the `tags` weightedset field. + +```bash +$ vespa query \ + 'yql=select track_id, title, artist from track where {totalTargetHits:10}wand(tags, @userProfile)' \ + 'userProfile={"pop":1, "love songs":1,"romantic":10, "80s":20 }' \ + 'hits=2' \ + 'ranking=tags' +``` + +The query asks for 2 hits to be returned, and uses the `tags` rank profile. The [result](/en/reference/querying/default-result-format) for the above query will look something like this: + +```json expandable +{ + "timing": { + "querytime": 0.051000000000000004, + "summaryfetchtime": 0.004, + "searchtime": 0.057 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 66 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/57037bdeb9caadebd8c235e1", + "relevance": 2500.0, + "source": "tracks", + "fields": { + "track_id": "TRMIBBE128E078B487", + "title": "The Rose ***", + "artist": "Bonnie Tyler" + } + }, + { + "id": "index:tracks/0/8eb2e19ee627b054113ba4c9", + "relevance": 2344.0, + "source": "tracks", + "fields": { + "track_id": "TRKDRVK128F421815B", + "title": "Nothing's Gonna Change My Love For You", + "artist": "Glenn Medeiros" + } + } + ] + } +} +``` + +The `wand` query operator exposed a total of about 60 documents to the `first-phase` ranking which uses the `rawScore(tag)` rank-feature directly, so the `relevancy` is the result of the sparse dot product between the sparse user profile and the document tags. + +The `wand` query operator is safe, meaning, it returns the same top-k results as the brute-force `dotProduct` query operator. `wand` is a type of query operator which performs matching and ranking interleaved and skipping documents which cannot compete into the final top-k results. See the [using wand with Vespa](../ranking/wand) guide for more details on using `wand` and `weakAnd` query operators. + +## Exact nearest neighbor search + +Vespa's nearest neighbor search operator supports doing exact brute force nearest neighbor search using dense representations. The first query example uses [exact nearest neighbor search](/en/querying/nearest-neighbor-search) and Vespa embed functionality: + +```bash +$ vespa query \ + 'yql=select title, artist from track where {approximate:false,totalTargetHits:10}nearestNeighbor(embedding,q)' \ + 'hits=1' \ + 'ranking=closeness' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +Query breakdown: + +- Search for a ten (`totalTargetHits:10`) nearest neighbors of the `query(q)` query tensor over the `embedding` document tensor field. +- The annotation `approximate:false` tells Vespa to perform exact search. +- The `hits` parameter controls how many results are returned in the response. Number of `hits` requested does not impact `totalTargetHits`. +- `ranking=closeness` tells Vespa which [rank-profile](../basics/ranking) to score documents. One must specify how to *rank* the `totalTargetHits` documents retrieved and exposed to `first-phase` ranking expression in the `rank-profile`. +- `input.query(q)` is the query vector produced by the [embedder](../rag/embedding#embedding-a-query-text). + +Not specifying [ranking](../reference/api/query#ranking.profile) will cause Vespa to use [nativeRank](../ranking/nativerank) which does not use the vector similarity, causing results to be randomly sorted. + +The above exact nearest neighbor search will return the following [result](/en/reference/querying/default-result-format): + +```json expandable +{ + "timing": { + "querytime": 0.012, + "summaryfetchtime": 0.001, + "searchtime": 0.014 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 101 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/f13697952a0d5eaeb2c43ffc", + "relevance": 1.0, + "source": "tracks", + "fields": { + "matchfeatures": { + "distance(field,embedding)": 0.0 + }, + "title": "Total Eclipse Of The Heart", + "artist": "Bonnie Tyler" + } + } + ] + } +} +``` + +The exact search takes approximately 14ms, performing 95,666 distance calculations. A total of about 101 documents were exposed to the first-phase ranking during the search as can be seen from `totalCount`. The `relevance` is the result of the `rank-profile` scoring. + +It is possible to reduce search latency of the exact search by throwing more CPU resources at it. Changing the rank-profile to `closeness-t4` makes Vespa use four threads per query: + +```bash +$ vespa query \ + 'yql=select title, artist from track where {approximate:false,totalTargetHits:10}nearestNeighbor(embedding,q)' \ + 'hits=1' \ + 'ranking=closeness-t4' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +Now, the exact search latency is reduced by using more threads, see [multithreaded searching and ranking](../performance/practical-search-performance-guide#multithreaded-search-and-ranking) for more on this topic. + +```json +{ + "timing": { + "querytime": 0.008, + "summaryfetchtime": 0.001, + "searchtime": 0.008 + } +} +``` + +## Approximate nearest neighbor search + +This section covers using the faster, but approximate, nearest neighbor search. The `track` schema's `embedding` field has the `index` property, which means Vespa builds a `HNSW` index to support fast, approximate vector search. See [Approximate Nearest Neighbor Search using HNSW Index](/en/querying/approximate-nn-hnsw) for an introduction to `HNSW` and the tuning parameters. + +The default query behavior is using `approximate:true` when the `embedding` field has `index`: + +```bash +$ vespa query \ + 'yql=select title, artist from track where {totalTargetHits:10,hnsw.exploreAdditionalHits:20}nearestNeighbor(embedding,q)' \ + 'hits=1' \ + 'ranking=closeness' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +Which returns the following response: + +```json expandable +{ + "timing": { + "querytime": 0.004, + "summaryfetchtime": 0.001, + "searchtime": 0.004 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 10 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/f13697952a0d5eaeb2c43ffc", + "relevance": 0.5992897837210658, + "source": "tracks", + "fields": { + "title": "Total Eclipse Of The Heart", + "artist": "Bonnie Tyler" + } + } + ] + } +} +``` + +Now, the query is faster, and also uses less resources during the search. To get latency down to 20 ms with the exact search one had to use 4 matching threads. In this case the result latency is down to 4ms with a single matching thread. For this query example, the approximate search returned the exact same top-1 hit and there was no accuracy loss for the top-1 position. Note that the overall query time is dominated by the `embed` inference. + +A few key differences between `exact` and `approximate` neighbor search: + +- `totalCount` is different, when using the approximate version, Vespa exposes exactly `targethits` to the configurable `first-phase` rank expression in the chosen `rank-profile`. The exact search is using a scoring heap during evaluation (chunked distance calculations), and documents which at some time were put on the top-k heap are exposed to first phase ranking. + +- The search is approximate and might not return the exact top 10 closest vectors as with exact search. This is a complex tradeoff between accuracy, query performance , and memory usage. See [Billion-scale vector search with Vespa - part two](https://blog.vespa.ai/billion-scale-knn-part-two/) for a deep-dive into these trade-offs. + +With the support for setting `approximate:false|true` a developer can quantify accuracy loss by comparing the results of exact nearest neighbor search with the results of the approximate search. By doing so, developers can quantify the recall@k or overlap@k, and find the right balance between search performance and accuracy. Increasing `hnsw.exploreAdditionalHits` improves accuracy (recall@k) at the cost of a slower query. + +## Combining approximate nearest neighbor search with query filters + +Vespa allows combining the search for nearest neighbors to be constrained by regular query filters. In this query example the `title` field must contain the term `heart`: + +```bash +$ vespa query \ + 'yql=select title, artist from track where {totalTargetHits:10}nearestNeighbor(embedding,q) and title contains "heart"' \ + 'hits=2' \ + 'ranking=closeness' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +Which returns the following response: + +```json expandable +{ + "timing": { + "querytime": 0.005, + "summaryfetchtime": 0.001, + "searchtime": 0.007 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 47 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/f13697952a0d5eaeb2c43ffc", + "relevance": 1.0, + "source": "tracks", + "fields": { + "matchfeatures": { + "distance(field,embedding)": 0.0 + }, + "title": "Total Eclipse Of The Heart", + "artist": "Bonnie Tyler" + } + }, + { + "id": "index:tracks/0/d0cb22cbcdb30796eca3a731", + "relevance": 0.6831990558852824, + "source": "tracks", + "fields": { + "matchfeatures": { + "distance(field,embedding)": 0.46370225688355227 + }, + "title": "Cirrhosis of the Heart", + "artist": "Foetus" + } + } + ] + } +} +``` + +The query term `heart` does in this case not impact the ordering (ranking) of the results, as the rank-profile used only uses the vector similarity closeness. + +When using filtering, it is important for performance reasons that the fields that are included in the filters have been defined with `index` or `attribute:fast-search`. See [searching attribute fields](../performance/practical-search-performance-guide#searching-attribute-fields). + +The optimal performance for combining nearestNeighbor search with filtering, where the query term(s) does not influence ranking, is achieved using `rank: filter` in the schema (See [ranking expressions](../reference/ranking/ranking-expressions)): + +```js +field popularity type int { + indexing: summary | attribute + rank: filter + attribute: fast-search +} +``` + +Matching against the popularity field does not influence ranking, and Vespa can use the most efficient posting list representation. Note that one can still access the value of the `popularity` attribute in [ranking expressions](../ranking/ranking-expressions-features). + +```js +rank-profile popularity { + first-phase { + expression: attribute(popularity) + } +} +``` + +In the following example, since the `title` field does not have `rank: filter` one can instead flag that the term should not be used by any ranking expression by using the [`ranked` query annotation](../reference/querying/yql#ranked). + +The following disables [term based ranking](../reference/querying/yql#ranked) and the matching against the `title` field can use the most efficient posting list representation. + +```bash +$ vespa query \ + 'yql=select title, artist from track where {totalTargetHits:10}nearestNeighbor(embedding,q) and title contains ({ranked:false}"heart")' \ + 'hits=2' \ + 'ranking=closeness' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +In the previous examples, since the rank-profile did only use the [closeness](../reference/ranking/rank-features#closeness(dimension,name)) rank feature, the matching would not impact the score anyway. + +Vespa also allows combining the [nearestNeighbor](../reference/querying/yql#nearestneighbor) query operator with any other Vespa query operator. + +```bash +$ vespa query \ + 'yql=select title, popularity, artist from track where {totalTargetHits:10}nearestNeighbor(embedding,q) and popularity > 20 and artist contains "Bonnie Tyler"' \ + 'hits=2' \ + 'ranking=closeness' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +This query example restricts the search to tracks by `Bonnie Tyler` with `popularity > 20`. + +### Strict filters and distant neighbors + +When combining nearest neighbor search with strict filters that match less than 2 percent of the total number of documents, Vespa will instead of searching the HNSW graph, constrained by the filter, fall back to using exact nearest neighbor search. See [Controlling filter behavior](#controlling-filter-behavior) for how to adjust the threshold for which strategy that is used. Since exact search may expose more than `totalTargetHits` hits to the `first-phase` ranking expression, users will observe that `totalCount` increases and is higher than `totalTargetHits` when falling back to exact search. This can be seen in the previous examples. When using exact search with filters, the search can also use multiple threads to evaluate the query, which helps reduce the latency impact. + +With strict filters that remove many hits, the hits (nearest neighbors) might not be *near* in the embedding space, but *far*, or *distant* neighbors. Technically, all document vectors are a neighbor of the query vector, but with a varying distance. + +With restrictive filters, the neighbors that are returned might be of low quality (far distance). One way to combat this effect is to use the [distanceThreshold](../reference/querying/yql#distancethreshold) query annotation parameter of the `nearestNeighbor` query operator. The value of the `distance` depends on the [distance-metric](../reference/schemas/schemas#distance-metric) used. By adding the [distance(field,embedding)](../reference/ranking/rank-features#distance(dimension,name)) rank-feature to the `match-features` of the `closeness` rank-profiles, it is possible to analyze what distance could be considered too far. See [match-features reference](../reference/schemas/schemas#match-features). + +Note that distance of 0 is perfect, while distance of 1 is distant. The `distanceThreshold` remove hits that have a **higher** `distance(field, embedding)` than `distanceThreshold`. The `distanceThreshold` is applied regardless of performing exact or approximate search. + +The following query with a restrictive filter on popularity is used for illustration: + +```bash +$ vespa query \ + 'yql=select title, popularity, artist from track where {totalTargetHits:10}nearestNeighbor(embedding,q) and popularity > 80' \ + 'hits=2' \ + 'ranking=closeness-t4' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +The above query returns + +```json expandable +{ + "timing": { + "querytime": 0.008, + "summaryfetchtime": 0.002, + "searchtime": 0.011 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 63 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/f13697952a0d5eaeb2c43ffc", + "relevance": 0.5992897875290117, + "source": "tracks", + "fields": { + "matchfeatures": { + "distance(field,embedding)": 0.0 + }, + "title": "Total Eclipse Of The Heart", + "artist": "Bonnie Tyler", + "popularity": 100 + } + }, + { + "id": "index:tracks/0/57c74bd2d466b7cafe30c14d", + "relevance": 0.6700445710774405, + "source": "tracks", + "fields": { + "matchfeatures": { + "distance(field,embedding)": 0.49243803049099677 + }, + "title": "Eclipse", + "artist": "Kyoto Jazz Massive", + "popularity": 100 + } + } + ] + } +} +``` + +By using a `distanceTreshold` of 0.2, the `Eclipse` track will be removed from the result because it's `distance(field, embedding)` is close to 0.5. + +```js +$ vespa query \ + 'yql=select title, popularity, artist from track where {distanceThreshold:0.2,totalTargetHits:10}nearestNeighbor(embedding,q) and popularity > 80' \ + 'hits=2' \ + 'ranking=closeness' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +```json expandable +{ + "timing": { + "querytime": 0.008, + "summaryfetchtime": 0.001, + "searchtime": 0.011 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 1 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/f13697952a0d5eaeb2c43ffc", + "relevance": 0.5992897875290117, + "source": "tracks", + "fields": { + "matchfeatures": { + "distance(field,embedding)": 0.0 + }, + "title": "Total Eclipse Of The Heart", + "artist": "Bonnie Tyler", + "popularity": 100 + } + } + ] + } +} +``` + +Setting appropriate `distanceThreshold` is best handled by supervised learning as the distance threshold should be calibrated based on the query complexity and possibly also the feature distributions of the returned top-k hits. Having the `distance` rank feature returned as `match-features`, enables post-processing of the result using a custom [re-ranking/filtering searcher](../ranking/reranking-in-searcher). The post-processing searcher can analyze the score distributions of the returned top-k hits (using the features returned with `match-features`), remove low scoring hits before presenting the result to the end user, or not return any results at all. + +## Hybrid sparse and dense retrieval methods with Vespa + +In the previous filtering examples the ranking was not impacted by the filters. They were only used to impact recall, not the order of the results. The following examples demonstrate how to perform hybrid retrieval combining the efficient query operators in a single query. Hybrid retrieval can be used as the first phase in a multiphase ranking funnel, see Vespa's [phased ranking](../ranking/phased-ranking). + +The first query example combines the `nearestNeighbor` operator with the `weakAnd` operator, combining them using logical disjunction (`OR`). This type of query enables retrieving both based on semantic (vector distance) and traditional sparse (exact) matching. + +```bash +$ vespa query \ + 'yql=select title, artist from track where {totalTargetHits:100}nearestNeighbor(embedding,q) or userQuery()' \ + 'query=total eclipse of the heart' \ + 'type=weakAnd' \ + 'hits=2' \ + 'ranking=hybrid' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +The query combines the sparse `weakAnd` and dense `nearestNeighbor` query operators using logical disjunction. Both query operator retrieves the target number of hits (or more), ranked by its inner raw score function. + +The hits exposed to the configurable `first-phase` ranking expression is a combination of the best hits from the two different retrieval strategies. The ranking is performed using the `hybrid` rank profile which serves as an example how to combine the different efficient retrievers. + +```js expandable +rank-profile hybrid inherits closeness { + inputs { + query(wTags) : 1 + query(wPopularity) : 1 + query(wTitle) : 1 + query(wVector) : 1 + } + first-phase { + expression { + query(wTags) * rawScore(tags) + + query(wPopularity) * log(attribute(popularity)) + + query(wTitle) * log(bm25(title)) + + query(wVector) * closeness(field, embedding) + } + } + match-features { + rawScore(tags) + attribute(popularity) + bm25(title) + closeness(field, embedding) + } + } +``` + +The query returns the following result: + +```json expandable +{ + "timing": { + "querytime": 0.005, + "summaryfetchtime": 0.0, + "searchtime": 0.005 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 1181 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/f13697952a0d5eaeb2c43ffc", + "relevance": 8.72273659535481, + "source": "tracks", + "fields": { + "matchfeatures": { + "attribute(popularity)": 100.0, + "bm25(title)": 22.591334631476823, + "closeness(field,embedding)": 1.0, + "distance(field,embedding)": 0.0, + "query(wVector)": 1.0, + "rawScore(tags)": 0.0 + }, + "title": "Total Eclipse Of The Heart", + "artist": "Bonnie Tyler" + } + }, + { + "id": "index:tracks/0/57c74bd2d466b7cafe30c14d", + "relevance": 7.762818642331215, + "source": "tracks", + "fields": { + "matchfeatures": { + "attribute(popularity)": 100.0, + "bm25(title)": 12.03241051547921, + "closeness(field,embedding)": 0.6700445710774405, + "distance(field,embedding)": 0.49243803049099677, + "query(wVector)": 1.0, + "rawScore(tags)": 0.0 + }, + "title": "Eclipse", + "artist": "Kyoto Jazz Massive" + } + } + ] + } +} +``` + +The result hits also include [match-features](../reference/schemas/schemas#match-features) which can be used for feature logging for learning to rank, or to simply debug the various feature components used to calculate the `relevance` score. + +In the below query, we lower the weight of the popularity factor by adjusting `query(wPopularity)` to 0.1: + +```bash +$ vespa query \ + 'yql=select title, artist from track where {totalTargetHits:100}nearestNeighbor(embedding,q) or userQuery()' \ + 'query=total eclipse of the heart' \ + 'type=weakAnd' \ + 'hits=2' \ + 'ranking=hybrid' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' \ + 'input.query(wPopularity)=0.1' +``` + +Which changes the order and a different hit is surfaced at position two: + +```json expandable +{ + "timing": { + "querytime": 0.005, + "summaryfetchtime": 0.0, + "searchtime": 0.006 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 1181 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/f13697952a0d5eaeb2c43ffc", + "relevance": 4.5780834279655265, + "source": "tracks", + "fields": { + "matchfeatures": { + "attribute(popularity)": 100.0, + "bm25(title)": 22.591334631476823, + "closeness(field,embedding)": 1.0, + "distance(field,embedding)": 0.0, + "query(wVector)": 1.0, + "rawScore(tags)": 0.0 + }, + "title": "Total Eclipse Of The Heart", + "artist": "Bonnie Tyler" + } + }, + { + "id": "index:tracks/0/51bae2353aa0c9e9c70bf94e", + "relevance": 4.044676118507788, + "source": "tracks", + "fields": { + "matchfeatures": { + "attribute(popularity)": 23.0, + "bm25(title)": 20.07427771872962, + "closeness(field,embedding)": 0.7316874168710507, + "distance(field,embedding)": 0.3667038368328748, + "query(wVector)": 1.0, + "rawScore(tags)": 0.0 + }, + "title": "Total Eclipse", + "artist": "The Alan Parsons Project" + } + } + ] + } +} +``` + +The following query adds the personalization component using the sparse user profile into the retriever mix. + +```text +userProfile={"love songs":1, "love":1,"80s":1} +``` + +Which can be used with the `wand` query operator to retrieve personalized hits for ranking. + +```bash +$ vespa query \ + 'yql=select title, artist from track where {totalTargetHits:100}nearestNeighbor(embedding,q) or userQuery() or ({totalTargetHits:10}wand(tags, @userProfile))' \ + 'query=total eclipse of the heart' \ + 'type=weakAnd' \ + 'hits=2' \ + 'ranking=hybrid' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' \ + 'input.query(wPopularity)=0.1' \ + 'userProfile={"love songs":1, "love":1,"80s":1}' +``` + +Now we have new top ranking documents. Notice that `totalCount` increases as the `wand` query operator retrieved more hits into `first-phase` ranking. Also notice that the `relevance` score changes. + +```json expandable +{ + "timing": { + "querytime": 0.01, + "summaryfetchtime": 0.003, + "searchtime": 0.014 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 1243 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/fc82cbb0d5d5b5747d65c451", + "relevance": 144.9997464905854, + "source": "tracks", + "fields": { + "matchfeatures": { + "attribute(popularity)": 100.0, + "bm25(title)": 6.722228506472074, + "closeness(field,embedding)": 0.633809749439362, + "distance(field,embedding)": 0.5777605202263811, + "query(wVector)": 1.0, + "rawScore(tags)": 0.0 + }, + "title": "Straight From The Heart", + "artist": "Bryan Adams" + } + }, + { + "id": "index:tracks/0/66b3ab21d5eb0a9078bf8787", + "relevance": 135.51757722374737, + "source": "tracks", + "fields": { + "matchfeatures": { + "attribute(popularity)": 34.0, + "bm25(title)": 4.7884050004449215, + "closeness(field,embedding)": 0.5987438006081908, + "distance(field,embedding)": 0.6701634304759765, + "query(wVector)": 1.0, + "rawScore(tags)": 0.0 + }, + "title": "Lady Of The Dawn", + "artist": "Mike Batt" + } + } + ] + } +} +``` + +Changing from logical `OR` to `AND` instead will intersect the result of the two efficient retrievers. The search for nearest neighbors is constrained to documents that at least match one of the query terms in the `weakAnd`. + +```bash +$ vespa query \ + 'yql=select title, artist from track where {totalTargetHits:100}nearestNeighbor(embedding,q) and userQuery()' \ + 'query=total eclipse of the heart' \ + 'type=weakAnd' \ + 'hits=2' \ + 'ranking=hybrid' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +In this case, the documents exposed to ranking must match at least one of the query terms (for WAND to retrieve it). It is also possible to combine hybrid search with filters, this filters both the sparse and dense retrieval on popularity + +```bash +$ vespa query \ + 'yql=select title, artist from track where {totalTargetHits:100}nearestNeighbor(embedding,q) and userQuery() and popularity 99,{totalTargetHits:10}nearestNeighbor(embedding,q))' \ + 'hits=2' \ + 'ranking=closeness' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +Another interesting approach for hybrid retrieval is to use Vespa's [rank()](../reference/querying/yql#rank) query operator. The first operand of the `rank()` operator is used for retrieval, and the remaining operands are only used to compute rank features for those hits retrieved by the first operand. + +```bash +$ vespa query \ + 'yql=select title, artist from track where rank({totalTargetHits:100}nearestNeighbor(embedding,q), userQuery())' \ + 'query=total eclipse of the heart' \ + 'type=weakAnd' \ + 'hits=2' \ + 'ranking=hybrid' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +This query returns 100 documents, since only the first operand of the `rank` query operator was used for *retrieval*, the sparse `userQuery()` representation was only used to calculate sparse [rank features](../reference/ranking/rank-features.html) for the results retrieved by the `nearestNeighbor`. Sparse rank features such as `bm25(title)` for example. + +```json expandable +{ + "timing": { + "querytime": 0.005, + "summaryfetchtime": 0.003, + "searchtime": 0.009000000000000001 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 100 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/f13697952a0d5eaeb2c43ffc", + "relevance": 8.72273659535481, + "source": "tracks", + "fields": { + "matchfeatures": { + "attribute(popularity)": 100.0, + "bm25(title)": 22.591334631476823, + "closeness(field,embedding)": 1.0, + "distance(field,embedding)": 0.0, + "query(wVector)": 1.0, + "rawScore(tags)": 0.0 + }, + "title": "Total Eclipse Of The Heart", + "artist": "Bonnie Tyler" + } + }, + { + "id": "index:tracks/0/202014b34cdd67ac28585105", + "relevance": 7.063803473430664, + "source": "tracks", + "fields": { + "matchfeatures": { + "attribute(popularity)": 100.0, + "bm25(title)": 6.120690332283275, + "closeness(field,embedding)": 0.6469583978870177, + "distance(field,embedding)": 0.5456944422794805, + "query(wVector)": 1.0, + "rawScore(tags)": 0.0 + }, + "title": "Loose Heart", + "artist": "Riverside" + } + } + ] + } +} +``` + +One can also do this the other way around, retrieve using the sparse representation, and have Vespa calculate the `closeness(field, embedding)` or related rank features for the hits retrieved by the sparse query representation. + +```bash +$ vespa query \ + 'yql=select title, artist from track where rank(userQuery(),{totalTargetHits:100}nearestNeighbor(embedding,q))' \ + 'query=total eclipse of the heart' \ + 'type=weakAnd' \ + 'hits=2' \ + 'ranking=hybrid' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +The `weakAnd` query operator exposes more hits to ranking than approximate nearest neighbor search, similar to the `wand` query operator. Generally, using the `rank` query operator is more efficient than combining query retriever operators using `or`. See also the [Vespa passage ranking](https://github.com/vespa-engine/sample-apps/blob/master/msmarco-ranking/) for complete examples of different retrieval strategies for [multiphase ranking](../ranking/phased-ranking.html) funnels. + +One can also use the `rank` operator to first retrieve by some filter logic, and compute distance or similarity for the retrieved documents. + +```bash +vespa query \ + 'yql=select title, popularity, artist from track where rank(popularity>99,{totalTargetHits:10}nearestNeighbor(embedding,q))' \ + 'hits=2' \ + 'ranking=closeness' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +Queries that only use the `nearestNeighbor` operator as the second operand of `rank` does not need to add `HNSW` indexing, which saves a lot of indexing and memory resource footprint. + +## Multiple nearest neighbor search operators in the same query + +This section looks at how to use multiple `nearestNeighbor` query operator instances in the same Vespa query request. The following Vespa query combines two `nearestNeighbor` query operators using logical disjunction (`OR`) and referencing two different query tensor inputs: + +- `input.query(q)` holding the *Total Eclipse Of The Heart* query vector. +- `input.query(q1)` holding the *Summer of '69* query vector. + +```bash +$ vespa query \ + 'yql=select title from track where ({totalTargetHits:10}nearestNeighbor(embedding,q)) or ({totalTargetHits:10}nearestNeighbor(embedding,q1))' \ + 'hits=2' \ + 'ranking=closeness' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' \ + 'input.query(q1)=embed(e5, "Summer of 69")' +``` + +The query exposes 20 hits to first phase ranking, as seen from `totalCount`. Ten from each nearest neighbor query operator: + +```json expandable +{ + "timing": { + "querytime": 0.004, + "summaryfetchtime": 0.002, + "searchtime": 0.007 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 20 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/5b1c2ae1024d88451c2f1c5a", + "relevance": 1.0, + "source": "tracks", + "fields": { + "matchfeatures": { + "distance(field,embedding)": 0.0 + }, + "title": "Summer of 69" + } + }, + { + "id": "index:tracks/0/f13697952a0d5eaeb2c43ffc", + "relevance": 1.0, + "source": "tracks", + "fields": { + "matchfeatures": { + "distance(field,embedding)": 0.0 + }, + "title": "Total Eclipse Of The Heart" + } + } + ] + } +} +``` + +Utilizing a combination of various query embeddings within a single query request holds numerous applications, particularly in cases involving shorter queries with inherent ambiguity. In such scenarios, employing query expansion and query rewrites can facilitate retrieval by accommodating multiple interpretations. + +### Using label + +One can also use the [label](../reference/querying/yql#label) query term annotation when there are multiple `nearestNeighbor` operators in the same query to get the distance or closeness per query vector. Notice we use the `closeness-label` rank-profile defined in the schema: + +```js +rank-profile closeness-label inherits closeness { + match-features: closeness(label, q) closeness(label, q1) +} +``` + +```bash +$ vespa query \ + 'yql=select title from track where ({ label:"q", totalTargetHits:10}nearestNeighbor(embedding,q)) or ({label:"q1",totalTargetHits:10}nearestNeighbor(embedding,q1))' \ + 'hits=2' \ + 'ranking=closeness-label' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' \ + 'input.query(q1)=embed(e5, "Summer of 69")' +``` + +The above query annotates the two `nearestNeighbor` query operators using [label](../reference/querying/yql#label) query annotation. + +```json expandable +{ + "timing": { + "querytime": 0.004, + "summaryfetchtime": 0.0, + "searchtime": 0.005 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 20 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/5b1c2ae1024d88451c2f1c5a", + "relevance": 1.0, + "source": "tracks", + "fields": { + "matchfeatures": { + "closeness(label,q)": 0.6039000433424319, + "closeness(label,q1)": 1.0 + }, + "title": "Summer of 69" + } + }, + { + "id": "index:tracks/0/f13697952a0d5eaeb2c43ffc", + "relevance": 1.0, + "source": "tracks", + "fields": { + "matchfeatures": { + "closeness(label,q)": 1.0, + "closeness(label,q1)": 0.6039000433424319 + }, + "title": "Total Eclipse Of The Heart" + } + } + ] + } +} +``` + +Note that the previous examples used `or` to combine the two operators. Using `and` instead, requires that there are documents that is in both the top-k results. Increasing `totalTargetHits` to 500, finds a few tracks that overlap. + +```bash +$ vespa query \ + 'yql=select title from track where ({label:"q", totalTargetHits:500}nearestNeighbor(embedding,q)) and ({label:"q1",totalTargetHits:500}nearestNeighbor(embedding,q1))' \ + 'hits=2' \ + 'ranking=closeness-label' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' \ + 'input.query(q1)=embed(e5, "Summer of 69")' +``` + +Note that the `closeness-label` rank profile uses `closeness(field, embedding)` which in the case of multiple nearest neighbor search operators uses the maximum score to represent the unlabeled `closeness(field,embedding)`. This can be seen from the `relevance` value, compared with the labeled [closeness()](../reference/ranking/rank-features#closeness(dimension,name)) rank features. + +```json expandable +{ + "timing": { + "querytime": 0.008, + "summaryfetchtime": 0.0, + "searchtime": 0.009000000000000001 + }, + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 5 + }, + "coverage": { + "coverage": 100, + "documents": 95666, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:tracks/0/439ca5f008b2b72704704b65", + "relevance": 0.6442831379812195, + "source": "tracks", + "fields": { + "matchfeatures": { + "closeness(label,q)": 0.6442831379812195, + "closeness(label,q1)": 0.6212453963738567 + }, + "title": "Dolorous Stroke" + } + }, + { + "id": "index:tracks/0/698485b7a93ddeb7574670ec", + "relevance": 0.6401157063988596, + "source": "tracks", + "fields": { + "matchfeatures": { + "closeness(label,q)": 0.6401157063988596, + "closeness(label,q1)": 0.6324869732783777 + }, + "title": "Fever of the Time" + } + } + ] + } +} +``` + +Vespa also supports having multiple document side embedding fields, which also can be searched using multiple `nearestNeighbor` operators in the query. + +```js expandable +field embedding type tensor(x[384]) { + indexing: attribute | index + attribute { + distance-metric: euclidean + } + index { + hnsw { + max-links-per-node: 16 + neighbors-to-explore-at-insert: 50 + } + } + } + field embedding_two tensor(x[768]) { + indexing: attribute | index + attribute { + distance-metric: euclidean + } + index { + hnsw { + max-links-per-node: 16 + neighbors-to-explore-at-insert: 50 + } + } + } +``` + +## Controlling filter behavior + +Vespa allows developers to control how filters are combined with nearestNeighbor query operator, see [Query Time Constrained Approximate Nearest Neighbor Search](https://blog.vespa.ai/constrained-approximate-nearest-neighbor-search/) for a detailed description of *pre-filtering* and *post-filtering* strategies. The following query examples explore the two query-time parameters which can be used to control the filtering behavior. The parameters are + +- [ranking.matching.postFilterThreshold](../reference/api/query#ranking.matching) default 1.0 +- [ranking.matching.approximateThreshold](../reference/api/query#ranking.matching) default 0.02 + +These parameters can be used per query or configured in the rank-profile in the [document schema](../reference/schemas/schemas#post-filter-threshold). + +The following query runs with the default setting for *ranking.matching.postFilterThreshold* which is 1, which means, do not perform post-filtering, use *pre-filtering* strategy: + +```bash +$ vespa query \ + 'yql=select title, artist, tags from track where {totalTargetHits:10}nearestNeighbor(embedding,q) and tags contains "rock"' \ + 'hits=2' \ + 'ranking=closeness' \ + 'ranking.matching.postFilterThreshold=1.0' \ + 'ranking.matching.approximateThreshold=0.05' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +The query exposes `totalTargetHits` to ranking as seen from the `totalCount`. Now, repeating the query, but forcing *post-filtering* instead by setting *ranking.matching.postFilterThreshold=0.0*: + + +```bash +$ vespa query \ + 'yql=select title, artist, tags from track where {totalTargetHits:10}nearestNeighbor(embedding,q) and tags contains "rock"' \ + 'hits=2' \ + 'ranking=closeness' \ + 'ranking.matching.postFilterThreshold=0.0' \ + 'ranking.matching.approximateThreshold=0.05' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +In this case, Vespa will estimate how many documents the filter matches and auto-adjust `targethits` internally to a higher number, attempting to expose the `totalTargetHits` to first phase ranking: + +The query exposes 16 documents to ranking as can be seen from `totalCount`. There are `8420` documents in the collection that are tagged with the `rock` tag, so roughly 8%. + +Auto adjusting `totalTargetHits` upwards for post-filtering is not always what you want, because it is slower than just retrieving from the HNSW index without constraints. We can change the `totalTargetHits` adjustment factor with the [ranking.matching.targetHitsMaxAdjustmentFactor](../reference/api/query#ranking.matching) parameter. In this case, we set it to 1, which disables adjusting the `totalTargetHits` upwards. + + +```bash +$ vespa query \ + 'yql=select title, artist, tags from track where {totalTargetHits:10}nearestNeighbor(embedding,q) and tags contains "rock"' \ + 'hits=2' \ + 'ranking=closeness' \ + 'ranking.matching.postFilterThreshold=0.0' \ + 'ranking.matching.approximateThreshold=0.05' \ + 'ranking.matching.targetHitsMaxAdjustmentFactor=1' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +Since we are post-filtering without upward adjusting totalTargetHits, we end up with fewer hits. + +Changing the query to limit to a tag which is less frequent, for example, `90s`, which matches 1,695 documents or roughly 1.7%, will cause Vespa to fall back to exact search as the estimated filter hit count is less than the `approximateThreshold`. + +```bash +$ vespa query \ + 'yql=select title, artist, tags from track where {totalTargetHits:10}nearestNeighbor(embedding,q) and tags contains "90s"' \ + 'hits=2' \ + 'ranking=closeness' \ + 'ranking.matching.postFilterThreshold=0.0' \ + 'ranking.matching.approximateThreshold=0.05' \ + 'input.query(q)=embed(e5, "Total Eclipse Of The Heart")' +``` + +The fallback to exact search will expose more than `totalTargetHits` documents to ranking. Read more about combining filters with nearest neighbor search in the [Query Time Constrained Approximate Nearest Neighbor Search](https://blog.vespa.ai/constrained-approximate-nearest-neighbor-search/) blog post. + +## Tear down the container + +This concludes this tutorial. + +The following removes the container and the data: + +```bash +$ docker rm -f vespa +``` diff --git a/mintlify-docs/en/querying/nearest-neighbor-search.mdx b/mintlify-docs/en/querying/nearest-neighbor-search.mdx new file mode 100644 index 0000000000..1f174aa1d5 --- /dev/null +++ b/mintlify-docs/en/querying/nearest-neighbor-search.mdx @@ -0,0 +1,349 @@ +--- +title: "Nearest Neighbor Search" +--- + +Nearest neighbor search, or vector search, is a technique used to find the closest data points to a given query point in a high-dimensional vector space. This is supported in Vespa using the [nearestNeighbor](../reference/querying/yql#nearestneighbor) query operator. This operator can be combined with other filters or query terms using the [Vespa query language](/en/querying/query-language), making it easy to create hybrid solutions that combine modern vector based techniques with [traditional information retrieval](/en/querying/text-matching). + +Also see the [pyvespa examples](https://vespa-engine.github.io/pyvespa/examples/pyvespa-examples#Neighbors). + +## Minimal example + +A nearest neighbor search has at least these components: a document vector, a query vector, a rank profile using `closeness()` and a query with the `nearestNeighbor` operator: + +```js expandable +# Schema definition of the vector in documents + document doc { + + field `d_vector` type tensor(d[3]) { + indexing: attribute | index + attribute { + distance-metric: angular + } + } + + } + + # Rank profile definition in schema: + # - defining the q_vector type that must match the d_vector type + # - using the closeness() rank feature in the ranking expression + rank-profile rank_docs inherits default { + inputs { + query(`q_vector`) tensor(d[3]) + } + first-phase { + expression: `closeness`(field, d_vector) + } + } + +# Documents with vectors +{ + "put": "id:mynamespace:music::a-head-full-of-dreams", + "fields": { + "d_vector": [0,1,2] + } +} + +# A query with +# - a nearestNeighbor operator with document and query vectors +# - selecting the rank_docs rank profile +$ vespa query 'select * from docs where {targetHits: 3}`nearestNeighbor`(d_vector, q_vector)' \ + `ranking=rank_docs` \ + 'input.query(q_vector)'='[1,2,3]' +``` + +The `nearestNeighbor` query operator will calculate values used by the [closeness()](../reference/ranking/rank-features#closeness(dimension,name)) rank feature. + + +**Note:** + +closeness(`field`, d_vector) means that the closeness rank feature shall use the d_vector field. +Applications can have multiple vector fields. These cases assign labels to the different `nearestNeighbor` operators, so the closeness() rank feature refers to the different operators (using different fields) See other examples of using closeness(`label`, q) in the [nearest neighbor search guide](/en/querying/nearest-neighbor-search-guide#using-label). + + +Read more in this guide on tensor types, distance metrics, rank profiles and approximate nearest neighbor search. + +## Vectors + +A vector is represented by a [tensor](../ranking/tensor-user-guide) with one indexed dimension. Example [tensor type](../reference/ranking/tensor#tensor-type-spec) representing a float vector with 384 dimensions: + +```js +tensor(x[384]) +``` + +Document vectors are stored in a [tensor field](../reference/schemas/schemas#tensor) defined in the document [schema](../reference/schemas/schemas). A tensor type (dense) with one indexed dimension stores a single vector per document: + +```js +field doc_embedding type tensor(x[384]) { + indexing: attribute +} +``` + +A tensor type (mixed) with one or more mapped dimensions and one indexed dimension stores multiple vectors per document: + +```js +field doc_embeddings type tensor(m{},x[384]) { + indexing: attribute +} +``` + +Similarly, the type of a query vector is defined in a [rank-profile](../reference/schemas/schemas#rank-profile): + +```js +rank-profile my_profile { + inputs { + query(query_embedding) tensor(x[384]) + } + ... +} +``` + +This all ties together with the [nearestNeighbor](../reference/querying/yql#nearestneighbor) query operator that expects two arguments; the document tensor field name which is searched and the input query tensor name. The operator finds the documents that are closest to the query vector using the [distance-metric](#distance-metrics-for-nearest-neighbor-search) defined in the tensor field. Note that the document schema can have multiple tensor fields storing vectors, and the query can have multiple `nearestNeighbor` operators searching different tensor fields. + +Support for using the `nearestNeighbor` operator with a mixed tensor with multiple vectors per document is available in `Vespa 8.144.19`. + +To learn how Vespa can create the vectors for you, see [embedding](../rag/embedding). + +## Using nearest neighbor search + +The following sections demonstrates how to use the [nearestNeighbor](../reference/querying/yql#nearestneighbor) query operator. + +These examples use **exact** nearest neighbor search with perfect accuracy, but which is computationally expensive for large document volumes since the distance metric must be calculated for every document that matches the boolean query filters See also the [practical nearest neighbor search guide](/en/querying/nearest-neighbor-search-guide) for more examples. + +Exact nearest neighbor search scales close to linearly with [number of threads used per query](../performance/practical-search-performance-guide#multithreaded-search-and-ranking). This can be used to make exact nearest neighbor search run with acceptable serving latency. Using more threads per search to reduce latency is still costly for larger vector volumes or high query throughput applications, as the number of distance calculations involved in the query does not change by changing number of threads performing the search. + +A cost-efficient approach is to use **approximate** search instead. See how to use **approximate** nearest neighbor search with `HNSW` in the [Approximate Nearest Neighbor Search](/en/querying/approximate-nn-hnsw) document. + +The following [document schema](../basics/schemas) is used to illustrate Vespa's support for vector search, or nearest neighbor search: + +```js expandable +schema product { + + document product { + + field in_stock type bool { + indexing: summary | attribute + rank: filter + attribute: fast-search + } + + field popularity type float { + indexing: summary | attribute + } + + field text_embedding type tensor(x[384]) { + indexing: summary | attribute + attribute { + distance-metric: prenormalized-angular + } + } + + field image_embeddings type tensor(i{},x[512]) { + indexing: summary | attribute + attribute { + distance-metric: angular + } + } + + } + +} +``` + +The `product` document schema has 4 fields. The fields of type [tensor](../ranking/tensor-user-guide) represent vector embeddings: + +- `text_embedding` - float vector with 384 dimensions. +- `image_embeddings` - multiple float vectors with 512 dimensions. + +The `text_embedding` field stores a dense vectorized embedding representation of the product description and which use [prenormalized-angular](../reference/schemas/schemas#distance-metric) as `distance-metric`. See for example [Dense Retrieval using bi-encoders over Transformer models](https://blog.vespa.ai/pretrained-transformer-language-models-for-search-part-2/). + +The `image_embeddings` field stores multiple dense vectorized embedding representations of the product images and which use [angular](../reference/schemas/schemas#distance-metric) as `distance-metric`. See for example [text to image search using CLIP with Vespa](https://blog.vespa.ai/text-image-search/). + +### Distance metrics for nearest neighbor search + +Vespa supports six different [distance-metrics](../reference/schemas/schemas#distance-metric): + +* `euclidean` +* `angular` +* `dotproduct` +* `prenormalized-angular` +* `hamming` +* `geodegrees` + +The distance-metric is a property of the field. This is obvious when `index` is set on the vector field, as the distance metric is used when building the index structure. Without `index`, no extra data structures are built for the field, and the distance-metric setting is used when calculating the distance at query-time. It still makes sense to have the metric as a field-property, as the field values are often produced using a specific distance metric. + +### Configure rank profiles for nearest neighbor search + +Lastly, one need to configure how to [rank](../basics/ranking) products which are retrieved by the nearest neighbor search: + +```js +rank-profile semantic_similarity { + inputs { + query(query_embedding) tensor(x[384]) + } + first-phase { + expression: closeness(field, text_embedding) + } +} + +rank-profile image_similarity { + inputs { + query(image_query_embeddings) tensor(x[512]]) + } + first-phase { + expression: closeness(field, image_embeddings) + } +} +``` + +The `rank-profile` specifies the query input tensor names and types. The query input tensors must be of the same dimensionality as the document vector and have the same dimension name. + +Skipping the query tensor definition will cause a query time error: + +```bash +Expected a tensor value of 'query(query_embedding)' but has [...] +``` + +The `closeness(field, text_embedding)` is a [rank-feature](../reference/ranking/rank-features#closeness(dimension,name)) calculated by the [nearestNeighbor](../reference/querying/yql#nearestneighbor) query operator. This calculates a score in the range [0, 1], where 0 is infinite distance, and 1 is zero distance. This is convenient because Vespa sorts hits by decreasing relevancy score, and one usually want the closest hits to be ranked highest. + +The `closeness(field, image_embeddings)` [rank-feature](../reference/ranking/rank-features#closeness(dimension,name)) operators over a tensor field that stores multiple vectors per document. For each document, the vector that is closest to the query vector is used in the calculation. + +The `first-phase` is part of Vespa's [phased ranking](../ranking/phased-ranking) support. Phased ranking enables re-ranking of the top-k best scoring hits as ranked or retrieved from the previous ranking phase. The computed ranking score is rendered as `relevance` in the default [Vespa JSON result format](../reference/querying/default-result-format). If the `relevance` field of the hit becomes 0.0 one usually have forgotten to specify the correct ranking profile. + +An example of a `rank-profile` also specifying an additional [re-ranking phase](../ranking/phased-ranking): + +```js +rank-profile image_similarity_with_reranking { + inputs { + query(image_query_embedding) tensor(x[512]]) + } + first-phase { + expression: closeness(field, image_embeddings) + } + second-phase { + total-rerank-count: 1000 + expression: closeness(field, image_embeddings) * attribute(popularity) + } +} +``` + +In this case, hits retrieved by the [nearestNeighbor](../reference/querying/yql#nearestneighbor) query operator are re-scored also using the product's popularity as a signal. The value of the `popularity` field can be read by the `attribute(popularity)` rank-feature. The `second-phase` [ranking expression](../ranking/ranking-expressions-features) combines the popularity with the `closeness(field, image_embeddings)` rank-feature using multiplication. + +## Indexing product data + +After deploying the application package with the document schema, you can [index](../writing/reads-and-writes) the product data using the [Vespa JSON feed format](../reference/schemas/document-json-format). + +In the example below there are two documents. The vector embedding fields are using [indexed tensor short form](../reference/schemas/document-json-format#tensor) and [mixed tensor short form](../reference/schemas/document-json-format#tensor): + +```json expandable +[ + { + "put": "id:shopping:product::998211", + "fields": { + "in_stock": true, + "popularity": 0.342, + "text_embedding": [ + 0.16766378547490635, + 0.3737005826272204, + 0.040492891373747675, + .. + ], + "image_embeddings": { + "0": [ + 0.9147281579191466, + 0.5453696694173961, + 0.7529545687063771, + .. + ], + "1": [0.3737005826272204, ..] + } + } + }, + { + "put": "id:shopping:product::97711", + "fields": { + "in_stock": false, + "popularity": 0.538, + "text_embedding": [ + 0.03515862084651311, + 0.24585168798559187, + 0.6123057708571111, + .. + ], + "image_embeddings": { + "0": [ + 0.9785931815169806, + 0.5697209315543527, + 0.5352198004501647, + .. + ], + "1": [0.24585168798559187, ..] + } + } + } +] +``` + +The above JSON formatted data can be fed to Vespa using any of the [Vespa feeding APIs](../writing/reads-and-writes#api-and-utilities). + +## Querying using nearestNeighbor query operator + +The [nearestNeighbor](../reference/querying/yql#nearestneighbor) query operator is used to search the product dataset. The operator expects two arguments; the document tensor field which is searched and the input query tensor name. + +The [targetHits](../reference/querying/yql#targethits) query annotation specifies the number of results to expose to `first-phase` ranking per content node involved in the query. `targetHits` is a required parameter and the query will fail if not specified. With exact search, `targetHits` is a lower bound per content node: At least but possibly more than `targetHits` hits are exposed to `first-phase` ranking on every content node as long as `targetHits` hits are actually found and not filtered out afterwards. + +The query tensor is sent as a query input and the query tensor name is referenced in the second argument of the `nearestNeighbor` operator. In the following example, the `nearestNeighbor` operator is used to recommend similar products based on image similarity. For a given image (e.g. a product image shown in the product search result page) one can find products which have similar product images. + +Note that the nearest neighbors search is limited to products where `in_stock` is `true`. + +The overall query is specified using the [Vespa query language](/en/querying/query-language) using the [Query API](/en/querying/query-api#http): + +```json +{ + "yql": "select * from product where {totalTargetHits: 100}nearestNeighbor(image_embeddings, image_query_embedding) and in_stock = true", + "input.query(image_query_embedding)": [ + 0.22507139604882176, + 0.11696498718517367, + 0.9418422036734729, + .. + ], + "ranking.profile": "image_similarity", + "hits": 10 +} +``` + +The YQL query uses logical conjunction `and` to filter the `nearestNeighbor` by a constraint on the `in_stock` field. + +The query request also specifies [hits](../reference/api/query#hits), which determines how many hits are returned to the client using the [JSON result format](../reference/querying/default-result-format). + +The total number of hits which is ranked by the ranking profile depends on the query filters and how fast the nearest neighbor search algorithm converges (for exact search). + +The [ranking.profile](../reference/api/query#ranking.profile) parameter controls which ranking profile is used. In this case, it simply ranks documents based on how close they are in the CLIP embedding space. + +## Using Nearest Neighbor from a Searcher Component + +As with all query operators in Vespa, one can build the query tree programmatically in a custom [searcher component](../applications/searchers). See [Centroids](https://github.com/vespa-engine/sample-apps/blob/master/billion-scale-image-search/app/src/main/java/ai/vespa/examples/Centroids.java) in [Billion-Scale Image Search](https://github.com/vespa-engine/sample-apps/tree/master/billion-scale-image-search) for an example of how the `NearestNeighborItem` is used. + +## Using binary embeddings with hamming distance + +The following packs a 128 bit embedding representation into a 16 dimensional dense tensor using `int8` tensor value precision (16 x 8 = 128 bit): + +```js +document vector { + field vector type tensor(x[16]) { + indexing: summary | attribute + attribute { + distance-metric: hamming + } + } +} +rank-profile hamming-nn { + num-threads-per-search: 12 + first-phase { + expression: closeness(field,vector) + } +} +``` + +Hamming distance search over binary vectors is implemented with xor and pop count cpu instructions. The rank-profile specifies [num-threads-per-search](../reference/schemas/schemas#num-threads-per-search) to reduce serving latency (but not cost). + +See the [Billion Scale Vector Search with Vespa](https://blog.vespa.ai/billion-scale-knn-part-two/) blog post for a detailed introduction to using binary vectors with hamming distance. diff --git a/mintlify-docs/en/querying/page-templates.mdx b/mintlify-docs/en/querying/page-templates.mdx new file mode 100644 index 0000000000..c68f01808b --- /dev/null +++ b/mintlify-docs/en/querying/page-templates.mdx @@ -0,0 +1,282 @@ +--- +title: "Page Templates" +--- + +When multiple kinds of data is fetched for a request, the application must decide how to lay out the data to return to the user. *Page templates* allows such page layouts to be defined as XML configuration files - one file per layout, corresponding to one use case. + +The layouts are *structural* - they do not specify widths and heights, colors and similar, but define the various boxed components that will make up the page, and their ordering and nesting. It is also assumed that the complete application includes a *frontend* which is capable of rendering finished pages from result laid out by a template. + +Page layouts may contain *choices* which specify alternative versions of the template. The choices in a template are taken by a *resolver* component at run time. Given an optimizing resolver the system can then learn to make the right choices given each particular query and result. An optimizing resolver is not bundled with the platform but must be added as a component. + +This document describes how to get started, explains the [page template language](#introduction) and [how to add a choice resolver](#using-choice-resolvers). A complete reference of all the permissible content of page templates is found in the [page template reference](../reference/querying/page-templates). + +## Getting Started + +A page template is an XML file which is placed in the directory `search/page-templates/` in the [application package](../basics/applications). To start using page templates: + +- Create template XML files as shown [below](#introduction) in `[app-package]/page-templates/` +- Add the searcher [com.yahoo.search.pagetemplates.PageTemplateSearcher](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/search/pagetemplates/PageTemplateSearcher) to the default [search chain](../reference/applications/services/search) in *services.xml*. +- [deploy](../basics/applications#deploying-applications) the application package. +- Add these query parameters: `page.id=[comma-separated list of page id's]` and `presentation.format=[page](../reference/querying/page-result-format)`. + +The results returned will be as defined by the page template selected for each query. + +The source names used in page templates are the same as those defined in the [federation](/en/querying/federation) setup, and/or of any internal search clusters defined in the application. + +A presentation layer (frontend) which understands the results created by the page templates in use must be set up or created to produce rendered pages. That is beyond the scope of this document. + +## Introduction to Page Templates + +A page template is an XML file which contains a `` tag at the top level. The page element must have an `id` attribute, where the file name is the same as the id, followed by *.xml*. If the id is *default*, this template will be used whenever no template is specified in the query. The templates may also be versioned, see [Component Versioning](../reference/applications/components#component-versioning). + +A page template consist of nested *sections* which correspond to screen areas in the final layout. The top level section is defined by the page itself, while further sections can be defined by explicit `
` tags. Each section may set a layout which will be used by the frontend renderer to lay out its content - `column` and `row` must be supported by all renderers, while some renderers may specify additional layouts. Each section may also specify sources of data which should be placed in the section. Renderers must be able to render multiple data items from different sources in a section. + +For example, this template creates a page consisting of four equally large regions containing one source each: + +```xml + +
+
+
+
+
+
+
+
+ +``` + +To use this template, save it as *[application-package]/page-templates/fourSquare.xml*. + +Suppose we want to extend this template to be able to also show blogs in the "news" section. This can be done as follows: + +```xml + +
+
+
+
+
+
+
+
+ +``` + +Data items from each possible source has a rendering implemented by the frontend. These renderers are used when nothing is specified in the template. If some alternative rendering is desired, this can be specified by a `renderer` tag. The same is true for rendering of the sections themselves. Here we specify a different renderer for blog data items (hits), as well as for the entire *news/blog* section. + +```xml + +
+
+ + + + +
+
+
+
+
+
+
+ +``` + +Note that in order to add a renderer subelement, we now specify the blog source by a tag rather than by an attribute. These two forms are equivalent - the attribute variant is just a shorthand syntax. + +Sources and renderers can be given arbitrary key-value parameters - see the [reference](../reference/querying/page-templates) for details. + +But what if we want to choose either news or blogs, but not both? This can be achieved using a choice: + +```xml + +
+ + + + + + + +
+
+
+
+
+
+
+ +``` + +We can insert choices anywhere in a template, for example choose to show either the first or the second row rather than both: + +```xml + + +
+ + + + + + + + +
+
+
+
+
+
+
+ + +``` + +If we wanted to choose between two groups of multiple sections (or sources), this can be done by adding an enclosing `alternative` tag around each group. For the common special case of assigning a set of elements to a set of placeholders, a choice can contain a `map` tag instead of a list of alternatives. See the [reference](../reference/querying/page-templates) for details. + +## Using Choice Resolvers + +If templates including choices are used, some component must resolve those choices given each query and result. The system includes some resolvers for demo and testing purposes, but a proper optimizing resolver must be deployed as part of the application. This section describes how to create, deploy and choose a resolver to use at runtime. + +### Writing a Resolver + +Resolvers are subclasses of [com.yahoo.search.pagetemplates.engine.Resolver](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/search/pagetemplates/engine/Resolver). This API defines a method which accepts the page template in use (which contains the choices), the Query/Result pair and returns a Resolution. It is called at runtime once for every query which uses a page template. + +There are also some helper methods which makes it simple to write resolvers which make each choice independently. Here is an example resolver which makes all choices by random using this helper methods: + +```java expandable +package com.yahoo.search.pagetemplates.engine.resolvers; + +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.pagetemplates.engine.*; +import com.yahoo.search.pagetemplates.model.*; + +import java.util.*; + +/** A resolver which makes all choices by random. */ +public class RandomResolver extends Resolver { + + private Random random=new Random(System.currentTimeMillis()); // Use of this is multithread safe + + /** Chooses the last alternative of any choice */ + @Override + public void resolve(Choice choice, Query query, Result result, Resolution resolution) { + resolution.addChoiceResolution(choice,random.nextInt(choice.alternatives().size())); + } + + /** Chooses a mapping which is always by the literal order given in the source template */ + @Override + public void resolve(MapChoice choice,Query query,Result result,Resolution resolution) { + Map> mapping=new HashMap>(); + // Draw a random element from the value list on each iteration and assign it to a placeholder + List placeholderIds=choice.placeholderIds(); + List> valueList=new ArrayList>(choice.values()); + for (String placeholderId : placeholderIds) + mapping.put(placeholderId,valueList.remove(random.nextInt(valueList.size()))); + resolution.addMapChoiceResolution(choice,mapping); + } + +} +``` + +### Deploying a Resolver + +Resolvers must be packaged as [OSGI bundles](https://en.wikipedia.org/wiki/Osgi#Bundles) for deployment, see [container components](../applications/components). + +The packaged component is added to the `components/` directory of the [application package](../basics/applications). + +The page template searcher must be configured with a list of the resolvers which should be available. This is done by expanding the page template searcher configuration with a *components* configuration: + +```xml + + + + default + com.yahoo.my.Resolver1 + myBundleSymbolicName + + + + mySecondResolver + com.yahoo.my.Resolver2 + myBundleSymbolicName + + + +``` + +With this, the application is [deployed](../basics/applications#deploying-applications) as usual. + +### Choosing a Resolver + +The resolver to use is determined by setting the query property `page.resolver` to the id (and optionally version) of the resolver component - either in the request, in a query profile or programmatically. + +Two templates suitable for testing purposes are always available: `native.random`, which makes each choice by random, and `native.deterministic` which selects the last alternative of each choice. + +If the `page.resolver` parameter is not set, the resolver having the id `default` is used. If no default resolver is deployed the random resolver is used. + +## Examples + +This section contains a few complete examples of page templates. + +A blending search result page: + +```xml + +
+
+ +``` + +A richer search result page: + +```xml + +
+
+
+ + + + +
+
+
+
+
+
+
+ +``` + +A mapping of multiple source modules to places on the page: + +```xml + + +
+
+
+
+
+
+
+
+ + + + + + + + + + +
+``` diff --git a/mintlify-docs/en/querying/query-api.mdx b/mintlify-docs/en/querying/query-api.mdx new file mode 100644 index 0000000000..e8996c1412 --- /dev/null +++ b/mintlify-docs/en/querying/query-api.mdx @@ -0,0 +1,807 @@ +--- +title: "Query API" +--- + +Use the Vespa Query API to query, rank and organize data. Example: + +```bash +$ vespa query "select * from music where year > 2001" \ + "ranking=rank_albums" \ + "input.query(user_profile)={{cat:pop}:0.8,{cat:rock}:0.2,{cat:jazz}:0.1}" +``` + +Simplified, a query has the following components: + +- Input data +- Ranking and grouping specification +- Results +- Other execution parameters + +This guide is an introduction to the more important elements in the API - refer to the [Query API reference](../reference/api/query) for details. See [query execution](#query-execution) below for data flow. + +## Input + +Input data is both structured data and unstructured text: + +```bash +$ vespa query "select * from music where artist contains \"coldplay\" and default contains text(@q)" \ + "q=head" +```bash +$ vespa query "select * from music where artist contains \"coldplay\" and default contains text(@q)" \ + "q=head" +```bash +$ vespa query "select * from music where artist contains \"coldplay\" and default contains text(@q)" \ + "q=head" \ + "ranking=rank_albums" \ + "input.query(user_profile)={{cat:pop}:0.8,{cat:rock}:0.2,{cat:jazz}:0.1}" +``` + +See [query execution](#query-execution) below. + +### Input examples + +```bash +$ vespa query "select * from sources * where default contains text(@animal)" \ + "animal=panda" +``` + +The text() operator will access the query property "animal", and tokenize the property value into a [weakAnd](../reference/querying/yql#grammar) query, resulting in: + +`select * from sources * where weakAnd(default contains "panda")` + +Changing the value of "animal" without changing the rest of the expression: + +```bash +$ vespa query "select * from sources * where default contains text(@animal)" \ + "animal=panda smokey" +``` + +The result is: + +`select * from sources * where weakAnd(default contains "panda", default contains "smokey")` + +Combining multiple query properties, and having a more complex expression: + +```bash +$ vespa query "select * from sources * where range(year, 1963, 2014) and (default contains text(@animal) or default contains text(@teddy))" \ + "animal=panda" \ + "teddy=bear roosevelt" +``` + +The resulting YQL expression is: + +`select * from sources * where range(year, 1963, 2014) and (weakAnd(default contains "panda") or weakAnd(default contains "bear", default contains "roosevelt"))` + +### Fieldset + +Use a fieldset to query multiple fields in the `where` clause. In the schema: + +```sd +fieldset text_content { + fields: title, description +} +``` + +Query both fields using the fieldset: + +```bash +$ vespa query 'select * from sources where text_content contains text(@q)' \ + 'q=query a fieldset' +``` + +Fields in the fields set must have the same match modes - read more in the [reference](../reference/schemas/schemas#fieldset). + +### Query Profiles + +Use a [query profile](../reference/api/query#queryprofile) to store query parameters in configuration. This makes query strings shorter, and makes it easy to modify queries by modifying configuration only. Use cases are setting query properties for different markets, parameters that do not change, and so on. Query profiles can be nested, versioned and use inheritance. + +### Geo Filter and Ranking + +Filter by position using latitude and longitude to implement [geo search](/en/querying/geo-search). [DistanceToPath](../reference/ranking/rank-features#distanceToPath\(name\).distance) is a [rank function](../basics/ranking) based on closeness. Using ranking can often improve results instead of geo filtering. + +### Parameter substitution + +Parameter substitution lets you provide query values as request parameters instead of inserting this into the YQL string itself. This simplifies query generation, separating the value of the string/set/array from the YQL string - i.e. the value will not corrupt the YQL string if it contains YQL-like syntax: + +- Simplify query generation, separating the value of the set/array from the YQL string. +- Speed up query parsing. Using parameter substitution accelerates string parsing. +- Reduce duplication. + +In its simplest form, use [text()](../reference/querying/yql#text) for strings: + +```text +... where default contains text(@user_input)&user_input=free+text +``` + +Lists, maps and arrays can also be used - examples: + +```text +# Simple example: provide a set for the IN operator +... where id in (@my_set)&my_set=10,20,30 + +# Same set, but use the set as a block-list (exclude items in the set) +... where !(id in (@my_set))&my_set=10,20,30 + +# Use a weightedSet operator +... where weightedSet(field, @my_set)&my_set={a:1,b:2} +``` + +It is also great to eliminate data duplication, from Vespa 8.287 one can use parameter substitution with `embed`: + +```bash +$ vespa query \ + 'yql=select id, from product where {targetHits:10}nearestNeighbor(embedding, query_embedding) or userQuery()' \ + 'input.query(query_embedding)=embed(transformer, @query)' \ + 'input.query(query_tokens)=embed(tokenizer, @query)' \ + 'query=running shoes for kids, white' +``` + +Note the use of the parameter named [query](../reference/api/query#model.querystring) used by the [userQuery()](../reference/querying/yql#userquery) operator. Also note the value substituted in the [embed](../rag/embedding#embedding-a-query-text) functions. + +See the [reference](../reference/querying/yql#parameter-substitution) for a complete list of formats. + +## Ranking + +[Ranking](../basics/ranking) specifies the computation of the query and data. It assigns scores to documents, and returns documents ordered by score. A [rank profile](../reference/api/query#ranking.profile) is a specification for how to compute a document's score. An application can have multiple rank profiles, to run different computations. Example, a query specifies query categories and a user embedding (from the [tensor user guide](../ranking/tensor-user-guide#ranking-with-tensors)): + +```sd +rank-profile product_ranking inherits default { + inputs { + query(q_category) tensor(category{}) + query(q_embedding) tensor(x[4]) + } + + function p_sales_score() { + expression: sum(query(q_category) * attribute(sales_score)) + } + + function p_embedding_score() { + expression: closeness(field, embedding) + } + + first-phase { + expression: p_sales_score() + p_embedding_score() + } + match-features: p_sales_score() p_embedding_score() +} +``` + +```bash +vespa query 'yql=select * from product where {targetHits:1}nearestNeighbor(embedding,q_embedding)' \ + 'input.query(q_embedding)=[1,2,3,4]' \ + 'input.query(q_category)={"Tablet Keyboard Cases":0.8, "Keyboards":0.3}' \ + 'ranking=product_ranking' +``` + + +**Note:** + +In this example, `input.query(q_embedding)` is short for `ranking.features.query(q_embedding)` - see the [reference](../reference/api/query#ranking.features) for tensor formats. + + +Results can be ordered using [sorting](../reference/querying/sorting-language) instead of ranking. + +The above rank profile does not do text ranking - there are however such profiles built-in. Text search is described in more detail in [Text Matching](/en/querying/text-matching) - find information about normalizing, prefix search and linguistics there. + +## Grouping + +[Grouping](/en/querying/grouping) is a way to group documents in the result set after ranking. Example, return max 3 albums per artist, grouped on year: + +```bash +$ vespa query "select * from music where true limit 0 | all(group(year) each(max(3) each(output(summary())) ) )" +``` + +Fields used in grouping must be [attributes](../content/attributes). The grouping expression is part of the YQL query string, appended at the end. + +Applications can group *all* documents (select all documents in YQL). Using `limit 0` returns grouping results only. + +## Results + +All fields are returned in results by default. To specify a subset of fields, use [document summaries](/en/querying/document-summaries). When searching text, having a static abstract of the document in a field, or using a [dynamic summary](../reference/schemas/schemas#summary) can both improve the visual relevance of the search, and cut bandwidth used. + +The default output format is [JSON](../reference/querying/default-result-format). A binary [CBOR](https://cbor.io/) format is also available via [format=cbor](../reference/api/query#presentation.format) - responses are smaller and faster to render, especially for numeric data, and is a drop-in replacement that produces identical results when deserialized. Write a custom [Renderer](../applications/result-renderers) to generate results in other formats. + +Read more on [request-response](../applications/processing) processing - use this to write code to manipulate results. + +## Query execution + +![Query execution - from query to response](/assets/img/query-to-response.svg) + +Phases: + + + +**Query processing**: Normalizations, rewriting and enriching. Custom logic in search chains + + +**Matching, ranking and grouping/aggregation:** This phase dispatches the query to content nodes + + +**Result processing, rendering:** Content fetching and snippeting of the top global hits found in the query phase + + + +The above is a simplification - if the query also specifies [result grouping](/en/querying/grouping), the query phase might involve multiple phases or round-trips between the container and content nodes. See [life of a query](../performance/sizing-search#life-of-a-query-in-vespa) for a deeper dive into query execution details. + +Use [trace.explainlevel](../reference/api/query#trace.explainlevel) to analyze the query plan. Use these hints to modify the query plan: + +- Use [ranked: false](../reference/querying/yql#ranked) query annotations to speed up evaluation +- Use [capped range search](../reference/querying/yql#numeric) to efficiently implement top-k selection for ranking a subset of the documents in the index. + +### Query processing and dispatch + + + +A query is sent from a front-end application to a container node using the *Query API* or in any custom request format handled by a custom [request handler](../applications/request-handlers), which translates the custom request format to native Vespa APIs. + + +Query pre-processing, like [linguistic processing](../linguistics/linguistics) and [query rewriting](../linguistics/query-rewriting), is done in built-in and custom [search chains](../applications/chaining) - see [searcher development](../applications/searchers). + +The default search chain is *vespa* - find installed components in this chain by inspecting `ApplicationStatus` like in the [quick-start](../basics/deploy-an-application-local). Adding `&trace.level=4` (or higher) to the query will emit the components invoked in the query, and is useful to analyze ordering. + +This is the integration point to plug in code to enrich a query - example: Look up user profile data from a user ID in the request. Set *&trace.level=2* to inspect the search chain components. + + +The query is sent from the container to the *content cluster* - see [federation](/en/querying/federation) for more details. An application can have multiple content clusters - Vespa searches in all by default. [Federation](/en/querying/federation) controls how to query the clusters, [sources](../reference/api/query#model.sources) names the clusters The illustration above has one content cluster but multiple is fully supported and allows scaling [document types](../basics/schemas) differently. E.g. a *tweet* document type can be indexed in a separate content cluster from a *user* document type, enabling independent scaling of the two. + +![Query processing and dispatch](/assets/img/query-dispatch.svg) + + + +### Matching, ranking, grouping + + + +At this point the query enters one or more [content clusters](../reference/applications/services/content). In a content cluster with [grouped distribution](../content/elasticity#grouped-distribution), the query is dispatched to all content nodes within a single group using a [dispatch policy](../reference/applications/services/content#dispatch-tuning), while with a flat single group content cluster the query is dispatched to all content nodes. + + +The query arrives at the content nodes which performs matching, [ranking](../basics/ranking) and aggregation/grouping over the set of documents in the [Ready sub database](../content/proton). By default, Vespa uses [DAAT](../performance/feature-tuning#hybrid-taat-daat) where the matching and first-phase score calculation is interleaved and not two separate, sequential phases. *vespa-proton* does matching over the *ready* documents and [ranks](../basics/ranking) as specified with the request/schema. Each content node matches and ranks a subset of the total document corpus and returns the hits along with meta information like total hits and sorting and grouping data, if requested. + +![Queries](/assets/img/proton-query.svg) + + + +Once the content nodes within the group have replied within the [timeout](../performance/graceful-degradation), [max-hits / top-k](../reference/applications/services/content#dispatch-tuning) results are returned to the container for query phase result processing. In this phase, the only per hit data available is the internal global document ID (gid) and the ranking score. There is also result meta information like coverage and total hit count. Additional hit specific data, like the contents of fields, is not available until the result processing phase has completed the content fetching. + + + +### Result processing (fill) phase + + + +When the result from the query phase is available, a custom chained [searcher component](../applications/searchers#multiphase-searching) can process the limited data available from the first search phase before contents of the hits is fetched from the content nodes. The fetching from content nodes is lazy and is not invoked before rendering the response, unless asked for earlier by a custom searcher component. + + +Only fields in the requested [document summaries](/en/querying/document-summaries) is fetched from content nodes. The summary request goes directly to the content nodes that produced the result from the query phase. + + +After the content node requests have completed, the full result set can be processed further by custom components (e.g. doing result deduping, top-k re-ranking), before [rendering](../applications/result-renderers) the response. + + + +## HTTP + + +**Note:** + +Vespa does not provide a java client library for the query API. Best practice for queries is submitting the user-generated query as-is, then use [Searcher components](../applications/searchers) to implement additional logic. + + +The Vespa Team does not recommend any specific HTTP client, since we haven't done any systematic evaluation. We have most experience with the Apache HTTP client. See also [HTTP best practices](/en/clients/http-best-practices) (for Vespa Cloud, but most of it is generally applicable). Also see a discussion in [#24534](https://github.com/vespa-engine/vespa/issues/24534). + +Use GET or POST. Parameters can either be sent as GET-parameters or posted as JSON, these are equivalent: + +```bash +$ curl -H "Content-Type: application/json" \ + --data '{"yql" : "select * from sources * where default contains \"coldplay\""}' \ + http://localhost:8080/search/ + +$ curl http://localhost:8080/search/?yql=select+%2A+from+sources+%2A+where+default+contains+%22coldplay%22 +``` + +### Using POST + +The format is based on the [Query API reference](../reference/api/query), and has been converted from the *flat* dot notation to a *nested* JSON-structure. The request-method must be POST and the *Content-Type* must be *"application/json"*, e.g.: + +```bash +$ curl -X POST -H "Content-Type: application/json" --data ' + { + "yql": "select * from sources * where true", + "offset": 5, + "ranking": { + "matchPhase": { + "ascending": true, + "maxHits": 15 + } + }, + "presentation" : { + "bolding": false, + "format": "json" + } + }' \ + http://localhost:8080/search/ +``` + + +**Note:** + +Try the [Query Builder](https://github.com/vespa-engine/vespa/tree/master/client/js/app#query-builder) application! + +**Important:** + +Security filters can block GET and POST requests differently. This can block POSTed queries. + + +### HTTP + +Configure the [http server](../reference/applications/services/http#server) - e.g. set *requestHeaderSize* to configure URL length (including headers): + +```xml + + + + + 32768 + + + + +``` + +HTTP keepalive is supported. + +Values must be encoded according to standard URL encoding. Thus, space is encoded as +, + as %2b and so on - see [RFC 2396](https://www.ietf.org/rfc/rfc2396.txt). + +HTTP status codes are found in the [Query API reference](../reference/api/query#http-status-codes). Also see [Stack Overflow question](https://stackoverflow.com/questions/54340386/how-should-i-customize-my-search-result-in-vespa/54344429#54344429). + +When implementing a client for the query API, consider the following guidelines for handling HTTP status codes: + +#### Client errors vs. server errors + +In general clients should only retry requests on *server errors* (5xx) - not on *client errors* (4xx). For example, a client should **not** retry a request after receiving a `400 Bad Request` response. + +#### Back-pressure handling + +Be careful when handling 5xx responses, especially `503 Service Unavailable` and `504 Gateway Timeout`. These responses typically indicate an overloaded system, and blindly retrying without backoff will only worsen the situation. For example, `503 Service Unavailable` is returned whenever there are no available search handler threads to serve the request. This is a clear indication of back-pressure from the system, and clients should reduce overall throughput and implement appropriate throttling mechanisms to avoid exacerbating the overload condition. + +## Timeout + +See the [reference](../reference/api/query#timeout) for how to set the query timeout. Common questions: + +- *Does the timeout apply to the whole query or just from when it is sent to the content cluster? If a [Searcher](../applications/searchers) goes to sleep in the container for 2*timeout, will the caller still get a response indicating a timeout?* + + The timeout applies to the whole query, both container and content node processing. However, the timeout handling is cooperative - if having Searchers that are time-consuming or access external resources, the Searcher code should check [Query.getTimeLeft()](https://github.com/vespa-engine/vespa/blob/master/container-search/src/main/java/com/yahoo/search/Query.java). So, in this case, you will time out, but only after 2*timeout + some more. + +- *During multiphase searching, is the query timeout set for each individual searcher, or is the query timeout set for the entire search chain?* + + The timeout is for the entire query (and most Searchers don’t check timeout - use `Query.getTimeLeft()`). E.g., if a Search Chain has 3 Searchers, it is OK for 1 Searcher to take 497 ms and 2 Searchers to each take 1 ms for a query timeout of 500 ms. + +- *If we asynchronously execute several search chains, can we set different query timeouts for each of these chains plus a separate overall timeout for the searcher that performs the asynchronous executions?* + + You can set a different timeout in each cloned query you send to any of those chains, and you can specify the timeout when waiting for responses from them. + + +## Error handling + +Check for a `root: error` element in the [result](../reference/querying/default-result-format#error): + +```json +{ + "root": { + "errors": [ + { + "code": 8, + "summary": "Error in search reply.", + "source": "music", + "message": "Could not locate attribute for grouping number 0 : Failed locating attribute vector 'year'. Ignoring this grouping." + } + ], +``` + +## Troubleshooting + +If Vespa cannot generate a valid search expression from the query string, it will issue the error message *Null query*. To troubleshoot, add [&trace.level=2](../reference/api/query#trace.level) to the request. A missing *yql* parameter will also emit this error message. + +### Query tracing + +Use *query tracing* to debug query execution. Enable by using [trace.level=1](../reference/api/query#trace.level) (or higher). Add [trace.timestamps=true](../reference/api/query#trace.timestamps) for timing info for every searcher invoked. Find a trace example in the result examples below, and try the [practical search performance guide](../performance/practical-search-performance-guide#advanced-query-tracing). + +In custom code, use [Query.trace](https://javadoc.io/page/com.yahoo.vespa/container-search/latest/com/yahoo/search/Query) to add trace output. + +### Large memory usage + +Queries that allocate more than 2G RAM will log messages like: + +```text +mmap 2727 of size 8589934592 from : search::attribute::PostingListMerger::reserveArray(unsigned int, unsigned long)(0x40001513eef0) + +(0x400013595334) from (0x400013593acc) from operator new(unsigned long)(0x400013592f88) from search::attribute::PostingListMerger::reserveArray(unsigned int, unsigned long)(0x40001513eef0) from search::attribute::PostingListSearchContextT::fetchPostings(search::queryeval::ExecuteInfo const&, bool)(0x400015159a38) from search::queryeval::SameElementBlueprint::fetchPostings(search::queryeval::ExecuteInfo const&)(0x4000154f36fc) from search::queryeval::IntermediateBlueprint::fetchPostings(search::queryeval::ExecuteInfo const&)(0x40001549ad38) from proton::matching::MatchToolsFactory::MatchToolsFactory(proton::matching::QueryLimiter&, vespalib::Doom const&, proton::matching::ISearchContext&, search::attribute::IAttributeContext&, search::engine::Trace&, std::basic_string_view >, std::__cxx11::basic_string, std::allocator > const&, ... + +1 mappings of accumulated size 8589934592 +``` + +This does not necessarily indicate that something is wrong, e.g., range searches use much memory. + +## Result examples + +A regular query result: + +```json expandable +{ + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 1 + }, + "coverage": { + "coverage": 100, + "documents": 3, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "id:mynamespace:music::a-head-full-of-dreams", + "relevance": 0.16343879032006284, + "source": "music", + "fields": { + "sddocname": "music", + "documentid": "id:mynamespace:music::a-head-full-of-dreams", + "artist": "Coldplay", + "album": "A Head Full of Dreams", + "year": 2015, + "category_scores": { + "cells": [ + { + "address": { + "cat": "pop" + }, + "value": 1.0 + }, + { + "address": { + "cat": "rock" + }, + "value": 0.20000000298023224 + }, + { + "address": { + "cat": "jazz" + }, + "value": 0.0 + } + ] + } + } + } + ] + } +} +``` + +An empty result: + +```json +{ + "root": { + "fields": { + "totalCount": 0 + }, + "id": "toplevel", + "relevance": 1.0 + } +} +``` + +An error result: + +```json expandable +{ + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 2 + }, + "coverage": { + "coverage": 100, + "documents": 4, + "full": true, + "nodes": 2, + "results": 2, + "resultsFull": 2 + }, + "errors": [ + { + "code": 8, + "summary": "Error in search reply.", + "source": "music", + "message": "Could not locate attribute for grouping number 0 : Failed locating attribute vector 'year'. Ignoring this grouping." + } + ], +``` + +A simple search application, many undefined fields. Result for the query `/search/?query=blues&hits=3&trace.level=2` + +```json expandable +{ + + "trace": { + "children": [ + { + "message": "No query profile is used" + }, + { + "message": "Invoking chain 'vespa' [com.yahoo.prelude.statistics.StatisticsSearcher@native -> com.yahoo.prelude.querytransform.PhrasingSearcher@vespa -> ... -> federation@native]" + }, + { + "children": [ + { + "message": "Detected language: ENGLISH" + }, + { + "message": "Language ENGLISH determined by the characters in the terms." + }, + { + "message": "Query parsed to: select * from sources * where default contains \"blues\" limit 3" + }, + { + "message": "Child execution", + "children": [ + { + "message": "Stemming: [select * from sources * where default contains ({\"origin\": {\"original\": \"blues\", \"offset\": 0, \"length\": 5}, \"stem\": false}\"blue\") limit 3]" + }, + { + "message": "Lowercasing: [select * from sources * where default contains ({\"origin\": {\"original\": \"blues\", \"offset\": 0, \"length\": 5}, \"stem\": false, \"normalizeCase\": false}\"blue\") limit 3]" + }, + { + "message": "sc0.num0 search to dispatch: query=[blue] timeout=5000ms offset=0 hits=3 grouping=0 : collapse=false restrict=[music]" + }, + { + "message": "Current state of query tree: WORD[connectedItem=null connectivity=0.0 creator=ORIG explicitSignificance=false fromSegmented=false index=\"\" isRanked=true origin=\"(0 5)\" segmentIndex=0 significance=0.0 stemmed=true uniqueID=1 usePositionData=true weight=100 words=true]{\n \"blue\"\n}\n" + }, + { + "message": "YQL+ representation: select * from sources * where default contains ({\"origin\": {\"original\": \"blues\", \"offset\": 0, \"length\": 5}, \"stem\": false, \"normalizeCase\": false, \"id\": 1}\"blue\") limit 3" + }, + { + "message": "sc0.num0 dispatch response: Result (3 of total 10 hits)" + }, + { + "message": "sc0.num0 fill to dispatch: query=[blue] timeout=5000ms offset=0 hits=3 grouping=0 : collapse=false restrict=[music] summary=[null]" + }, + { + "message": "Current state of query tree: WORD[connectedItem=null connectivity=0.0 creator=ORIG explicitSignificance=false fromSegmented=false index=\"\" isRanked=true origin=\"(0 5)\" segmentIndex=0 significance=0.0 stemmed=true uniqueID=1 usePositionData=true weight=100 words=true]{\n \"blue\"\n}\n" + }, + { + "message": "YQL+ representation: select * from sources * where default contains ({\"origin\": {\"original\": \"blues\", \"offset\": 0, \"length\": 5}, \"stem\": false, \"normalizeCase\": false, \"id\": 1}\"blue\") limit 3" + } + ] + }, + { + "message": "Child execution" + } + ] + } + ] + }, + "root": { + "id": "toplevel", + "relevance": 1, + "fields": { + "totalCount": 10 + }, + "coverage": { + "coverage": 100, + "documents": 10, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "index:0/0/0/dfd9fcfa650b44545ef0b8b2", + "relevance": "-Infinity", + "source": "basicsearch", + "fields": { + "sddocname": "music", + "title": "Electric Blues", + "artist": "", + "song": "", + "bgndata": "", + "sales": "NaN", + "pto": -1, + "mid": 2, + "ew": "blues", + "surl": "https://shopping.yahoo.com/shop?d=hab&id=1807865261", + "userrate": "NaN", + "pid": "", + "weight": "NaN", + "url": "", + "isbn": "", + "fmt": "", + "albumid": "", + "disp_song": "", + "pfrom": "NaN", + "bgnpfrom": "NaN", + "categories": "Blues", + "data": "", + "numreview": "NaN", + "bgnsellers": 0, + "image": "", + "artistspid": "", + "newestedition": "NaN", + "bgnpto": "", + "year": "NaN", + "did": "NaN", + "scorekey": "NaN", + "cbid": "NaN", + "summaryfeatures": "", + "documentid": "id:test:music::https://shopping.yahoo.com/shop?d=hab&id=1807865261" + } + }, + { + "id": "index:0/0/0/273d384dc214386c934d793f", + "relevance": "-Infinity", + "source": "basicsearch", + "fields": { + "sddocname": "music", + "title": "Delta Blues", + "artist": "", + "song": "", + "bgndata": "", + "sales": "NaN", + "pto": -1, + "mid": 2, + "ew": "blues", + "surl": "https://shopping.yahoo.com/shop?d=hab&id=1804905714", + "userrate": "NaN", + "pid": "", + "weight": "NaN", + "url": "", + "isbn": "", + "fmt": "", + "albumid": "", + "disp_song": "", + "pfrom": "NaN", + "bgnpfrom": "NaN", + "categories": "Blues", + "data": "", + "numreview": "NaN", + "bgnsellers": 0, + "image": "", + "artistspid": "", + "newestedition": "NaN", + "bgnpto": "", + "year": "NaN", + "did": "NaN", + "scorekey": "NaN", + "cbid": "NaN", + "summaryfeatures": "", + "documentid": "id:test:music::https://shopping.yahoo.com/shop?d=hab&id=1804905714" + } + }, + { + "id": "index:0/0/0/b3c74a9bf3aea1e2260311c0", + "relevance": "-Infinity", + "source": "basicsearch", + "fields": { + "sddocname": "music", + "title": "Chicago Blues", + "artist": "", + "song": "", + "bgndata": "", + "sales": "NaN", + "pto": -1, + "mid": 2, + "ew": "blues", + "surl": "https://shopping.yahoo.com/shop?d=hab&id=1804905710", + "userrate": "NaN", + "pid": "", + "weight": "NaN", + "url": "", + "isbn": "", + "fmt": "", + "albumid": "", + "disp_song": "", + "pfrom": "NaN", + "bgnpfrom": "NaN", + "categories": "Blues", + "data": "", + "numreview": "NaN", + "bgnsellers": 0, + "image": "", + "artistspid": "", + "newestedition": "NaN", + "bgnpto": "", + "year": "NaN", + "did": "NaN", + "scorekey": "NaN", + "cbid": "NaN", + "summaryfeatures": "", + "documentid": "id:test:music::https://shopping.yahoo.com/shop?d=hab&id=1804905710" + } + } + ] + } +} +``` + +Result for the grouping query `/search/?hits=0&yql=select * from sources * where sddocname contains purchase | all(group(customer) each(output(sum(price))))` + +```json expandable +{ + + "trace": { + "children": [ + { + "children": [ + { + "message": "Child execution" + } + ] + } + ] + }, + "root": { + "id": "toplevel", + "relevance": 1, + "fields": { + "totalCount": 20 + }, + "coverage": { + "coverage": 100, + "documents": 20, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "group:root:0", + "relevance": 1, + "continuation": { + "this": "" + }, + "children": [ + { + "id": "grouplist:customer", + "relevance": 1, + "label": "customer", + "children": [ + { + "id": "group:string:Jones", + "relevance": 9870, + "value": "Jones", + "fields": { + "sum(price)": 39816 + } + }, + { + "id": "group:string:Brown", + "relevance": 8000, + "value": "Brown", + "fields": { + "sum(price)": 20537 + } + }, + { + "id": "group:string:Smith", + "relevance": 6100, + "value": "Smith", + "fields": { + "sum(price)": 19484 + } + } + ] + } + ] + } + ] + } +} +``` diff --git a/mintlify-docs/en/querying/query-language.mdx b/mintlify-docs/en/querying/query-language.mdx new file mode 100644 index 0000000000..b0f13fe7be --- /dev/null +++ b/mintlify-docs/en/querying/query-language.mdx @@ -0,0 +1,134 @@ +--- +title: "Vespa Query Language - YQL" +sidebarTitle: "The YQL query language" +--- + +Vespa accepts unstructured human input and structured queries for application logic separately, then combines them into a single data structure for executing. Human input is parsed heuristically, see [Query API input](/en/querying/query-api#input). Application logic is expressed in YQL, use this guide for examples - also see the [YQL reference](../reference/querying/yql). + +## Live query examples + +The following are live YQL examples: + + +Selection: Select all documents from the `doc` source. This is the easiest way to count all documents in a source: + +`select * from doc where true` + + +Filtering: Find all documents with `ranking` in the `title` field: + +`select * from doc where title contains "ranking"` + + +Filtering: Find all documents with `ranking` in the `default` [fieldset](../reference/schemas/schemas#fieldset). + +`select * from doc where default contains "ranking"` + + +Ordering: Order by number of terms in the document, descending. + +`select * from doc where true order by term_count desc` + + +Pagination: Select all documents, return hits 6-15. + +`select * from doc where true limit 15 offset 5` + + +Grouping: + +- Select all documents from the `doc` source. +- Group by term count in buckets of 100, display average term count per bucket. +- Note on `limit 0`: This returns zero regular hits, only the grouping result. + +`select * from doc where true limit 0 | all( group( fixedwidth(term_count,100) ) each( output( avg(term_count) ) ) )` + +Find more [grouping examples](/en/querying/grouping). + + +Numeric: Select documents with attribute "last_updated" > 1646167144: + +`select * from doc where last_updated > 1646167144` + +Numeric: Select documents with integer in field - works both for single-value fields and multivalue, like array``: + +`select * from doc where term_count = 258` + + +Phrase: Select documents with the phrase "question answering": + +`select * from doc where default contains phrase("question","answering")` + + +Timeout: Time out query execution after 100 ms, returning hits found before timing out - see [ranking.softtimeout.enable](../reference/api/query#ranking.softtimeout.enable): + +`select * from doc where true timeout 100` + + +Regular expressions: Select documents matching the regular expression in the `namespace` [attribute](../content/attributes) field: + +`select * from doc where namespace matches "op.*"` + + +### Command-line queries + +Use the [Vespa CLI](../clients/vespa-cli) to run a query from the command-line: + +```bash +$ vespa query 'select * from doc where true' +``` + +To use any HTTP client, use `-v` to generate the encoded YQL string: + +```bash +$ vespa query -v 'select * from doc where true' + +curl http://127.0.0.1:8080/search/?timeout=10s&yql=select+%2A+from+doc+where+true +``` + +Run the query: + +```bash +$ curl http://127.0.0.1:8080/search/?timeout=10s&yql=select+%2A+from+doc+where+true +``` + +Alternatively, set the query as the `yql` parameter in a POST: + +```bash +$ curl --data-urlencode 'yql=select * from doc where true' \ + http://127.0.0.1:8080/search/ +``` + +## Query examples + +Boolean: + +```bash +$ vespa query 'select * from doc where is_public = true' +``` + +Map: + +```bash +$ vespa query 'select * from doc where my_map contains sameElement(key contains "Coldplay", value > 10)' + +# +# Schema definition for my_map: +# +field my_map type map { + indexing: summary + struct-field key { indexing: attribute } + struct-field value { indexing: attribute } +} +``` + +Escapes - see the [FAQ](../learn/faq#how-does-backslash-escapes-work): + +```bash +# +# The artist field is: +# "artist": "Meta..ica" +# + +$ vespa query -v 'select * from music where artist matches "M.ta\\.\\.ica"' +``` diff --git a/mintlify-docs/en/querying/query-profiles.mdx b/mintlify-docs/en/querying/query-profiles.mdx new file mode 100644 index 0000000000..1302b7107d --- /dev/null +++ b/mintlify-docs/en/querying/query-profiles.mdx @@ -0,0 +1,358 @@ +--- +title: "Query Profiles" +--- + +A Query Profile is a named collection of search request parameters given in the configuration. The search request can specify a query profile whose parameters will be used as parameters of that request. This frees the client from having to manage and send a large number of parameters, and enables the request parameters to use for a use case to be changed without having to change the client. Query profiles enables [bucket tests](../applications/testing#feature-switches-and-bucket-tests), where a part of the query stream is given some experimental treatment, as well as differentiating behavior based on (OEM) customer, user type, region, frontend type etc. This document explains how to create and use query profiles. See also the [query profile reference](../reference/querying/query-profiles) for the full syntax. + +## Using a Query Profile + +A Query Profile is an XML file containing the request parameter names and their values, e.g.: + +```xml + + 20 + 2000 + merchantid + +``` + +See the [query profile reference](../reference/querying/query-profiles) for the full syntax. + + +**Important:** + +Note that full property names must be used, aliases like `input.query(...)` are only supported in requests and programmatic lookup. + + +See the [Query API reference](../reference/api/query) for a list of the built-in query properties. + +To deploy a query profile: + + + +Create a file for the profile, using the format above, having the name *[my-profile-name].xml*, e.g. *MyProfile.xml* (replace any `/` in the name by `_`) + + +Put this in the directory *search/query-profiles* in the [application package](../basics/applications) root + + +[Redeploy](../basics/applications#deploying-applications) the application package + + + +Any number of query profile files may be added to this directory. If the query profiles contains errors, like incorrect syntax and/or infinite reference loops, deployment will fail. + +To use a query profile in a query request, send the name of the profile as the parameter `queryProfile`: + +```js +queryProfile=MyProfile +``` + +If the request does not specify a query profile, the profile named `default` will be used. If no `default` profile is configured, no profile will be used. If the queryProfile parameter is set but does not resolve to an existing profile, an error message is returned. Example, set default query timeout to 200ms for all queries not using a query profile: + +```bash +$ cat search/query-profiles/default.xml + + + 0.2 + +``` + +The query profile values (whether set from a configured query profile or by the request) is available as *query properties*. To look up a value from a [Searcher component](../applications/searchers), use: + +```js +query.properties().get("myVariable") +``` + +Note that property names are case-sensitive. + +### Example + +Use a query profile to modify the YQL query string using an [IN](../reference/querying/yql#in) operator and [local substitution](#local-substitution) of *cities*: + +```js + + select * from restaurant where userQuery() and city in (%{cities}) + "" + +``` + +Use [match: word](../reference/schemas/schemas#match) for the IN operator: + +```js +field city type string { + indexing: summary | index + match: word +} +``` + +An example query passing the values for the IN operator to be substituted into the YQL string: + +```bash +$ vespa query \ + queryProfile=city-filter \ + cities='"berlin","paris"' \ + query='what the user typed' +``` + +With this, the application can use the *city-filter* query profile if there are cities in the filter, if empty, use a *no-filter* query profile: + +```js + + select * from restaurant where userQuery() + + +``` + +```js +$ vespa query \ + queryProfile=no-filter \ + query='what the user typed' +``` + +### Overrides + +The parameter values set in *MyProfile.xml* will be used as if they were present directly in the request. If a parameter is present both directly in the request and in a profile, the request value takes precedence by default. Individual query profile field can be made to take priority by setting the [overridable](../reference/querying/query-profiles#overridable) attribute to `false`. Example: + +```xml + + 0.2 + +``` + +## Nested Structure + +To support structure in the set of request variables, a query profile value may not be a string but a reference to another query profile. In this case, the referenced query profile functions as a map (or struct, if types are used, see below) in the referencing query profile. The parameter names of the nested profile gets preceded by the name of the reference variable and a dot. For example: + +```xml + + 10 + merchantid + MyUserProfile + +``` + +Where the referenced profile might look like: + +```xml + + 20 + student + +``` + +If `MyProfile` is referenced in a query now, it will contain the variables + +```js +hits=10 +unique=merchantid +user.age=20 +user.profession=student +``` + +References can be nested to provide a deeper structure, producing variables having multiple dots. The dotted variables can be overridden directly in the search request (using the dotted name) just as other parameters. + +Note that the id value of a profile reference can also be set in the request, making it possible to choose not just the top level profile but also any number of specific subprofiles in the request. For example, the request can contain + +```text +queryProfile=MyUserProfile&user=ref:MyOtherUserprofile +``` + +to change the reference in the above example to some other subprofile. Note the `ref:` prefix which is required to identify this as setting user to a query profile referenced by id rather than setting it to a string. + +## Inheritance + +A query profile may inherit one or more other query profiles. This is useful when there is some common set of parameters applicable to multiple use cases, and a smaller set of parameters which varies between them. To inherit another query profile, reference it as follows: + +```xml + + … + +``` + +The parameters of `MyBaseProfile` will be present when this profile is used exactly as if they were explicitly written in this profile. + +Multiple inheritance is supported by specifying multiple space-separated profile ids in the inheritance attribute. Order matters in this list - if a parameter is set in more than one of the inherited profiles, the first one encountered in the depth first, left to right search order is used. + +Parameters specified in the child query profile will always override the same parameters in an inherited one. + +## Value Substitution + +Query profile values may contain substitution strings on the form `%{property-name}`. Example: + +```xml + + Hello %{world}! + Earth + +``` + +The value returned by looking up `message` will be *Hello Earth!*. + +### Global resolution + +Values are normally replaced by the value returned from `query.properties().get("property-name")` *at the time of the lookup*. Therefore, substituted values may be looked up in variants, in inherited profiles, in values set at run time and by following query profile references. Details: + +- No substitution will be performed *in* values set at run time +- If the value referenced in a substitution returns null, the reference is substituted by the empty string +- Unclosed substitutions cause an error at deploy time, but unknown values do not (they may exist at run time and will be replaced by an empty string if not) +- Recursive substitution works as expected. However, there is no loop detection + +### Local substitution + +To substitute by a value in the same query profile (or variant), prefix the property by a dot, as in + +```xml + + Hello %{.world}! + Earth + +``` + +Local substitutions can be verified at deploy time and will cause an error if not found. + +## Query Profile Variants + +In some cases, it is convenient to allow the values returned from a query profile to vary depending on the values of some properties input in the request. For example, a query profile may contain values which depend on both the market in which the request originated (`market`), the kind of device (`model`) *and* the bucket in question (`bucket`). + +Such variants over a set of request parameters may be represented in a single query profile, by defining nested variants of the query profile for the relevant combinations of request values. A complete example: + +```xml expandable + + + {/* Names of the request parameters defining the variant profiles of this. Order matters as described below. + Each individual value looked up in this profile is resolved from the most specific matching virtual + variant profile */} + region,model,bucket + + {/* Values may be set in the profile itself as usual, this becomes the default values given no matching + virtual variant provides a value for the property */} + My general a value + + {/* The "for" attribute in a child profile supplies values in order for each of the dimensions */} + + My value of the combination us-nokia-test1-a + + + {/* Same as [us,*,*] - trailing "*"'s may be omitted */} + + My value of the combination us-a + My value of the combination us-b + + + {/* Given a request which matches both the below, the one which specifies concrete values to the left + gets precedence over those specifying concrete values to the right + (i.e the first one gets precedence here) */} + + My value of the combination us-nokia-a + My value of the combination us-nokia-b + + + My value of the combination us-test1-a + My value of the combination us-test1-b + + + +``` + +### Variants and Inheritance + +It is possible to define variants across several levels in an inheritance hierarchy. The variant dimensions are inherited from parent to child, with the usual precedence rules (depth first left to right), so a parent profile may define the dimensions and the child the values over which it should vary. + +Variant resolution within a profile has precedence over resolution in parents. This means e.g. that a default value for a given property in a sub-profile will be chosen over a perfect variant match in an inherited profile. + +Variants may specify their own inherited profiles, as in: + +```xml + + … + + … + + +``` + +Values are resolved in this profile and inherited profiles "interleaved" by the variant resolution order (which is specificity by default). E.g. by decreasing priority: + +```text +1. Highest prioritized variant value +2. Value in inherited from highest prioritized variant +3. Next highest prioritized variant value +4. Value in inherited from next highest prioritized variant +… +n. Value defined at top level in profile +n+1. Value in inherited from query profile +``` + +## Query Profile Types + +The query profiles may optionally be *type checked*. Type checking is turned on by referencing a *Query Profile Type* from the query profile. The type lists the legal set of parameters of the query profile, whether additional parameters are allowed, and so on. + +A query profile type is referenced by: + +```xml + + … + +``` + +And the type is defined as: + +```xml + + + + + +``` + +This specifies that these three parameters may be present in profiles using this type, as well as the query profile type of the `user` parameter. + +It is also possible to specify that parameters are mandatory, that no additional parameters are allowed (strict), to inherit other types and so on, refer to the full syntax in [the query profile reference](../reference/querying/query-profiles#query-profile-types). If the base profile type is strict, it *must* extend a built-in query profile type, see the [strict reference documentation](../reference/querying/query-profiles#strict). + +A query profile type is deployed by adding a file named *[query-profile-type-name].xml* in the *search/query-profiles/types* directory in the application package. + +Query profile types may be useful even if query profiles are not used to set values. As they define the names, types and structure of the parameters which can be accepted in the search request, they can also be used to define, restrict and check the content of search requests. For example, as the built-in search api parameters are also type checked if a typed query profile is used, types can be used to restrict the parameters that can be set in a request, or to mandate that some are always set. The built-in parameters are defined in a set of query profile types which are always present and which can be inherited and referenced in application-defined types. These built-in types are defined in the [Query API](../reference/api/query). + +## Path Matching + +By adding `` to a profile type, *path* name matching is used rather than the default exact matching when a profile is looked up from a name. Path matching interprets the profile name as a slash separated path and matches references which are subpaths (more specific paths) to super-paths. The most specific match becomes the target of the reference. For example: + +```text +Given the query profile names: + a1 + a1/b1 + +Then: + a1/b1/c1/d1 resolves to a1/b1 + a1/b resolves to a1 + a does not resolve +``` + +This is useful to assign specific query profile id's to every client or bucket without having to create a different configuration item for each of these cases. If there is a need to provide a differentiated configuration for any such client or bucket in the future, this can be done without having the client change its request parameter because a specific id is already used. + +## Versioning + +Query profiles (and types) may exist in multiple versions at the same time. Wherever a name of a query profile (or type) is referenced, the name may also append a version string, separated by a colon, e.g `MyProfile:1.2.3`. The version number is *resolved* - if no version is given the highest version known is used. If the version number is only partially specified, as in `my-version:1`, the highest version starting by 1 is used. + +Where a query profile (or type) is defined, the id may specify the version, followed by a colon: + +```xml + + … + +``` + +Any sub-number omitted is taken to mean 0 where a version is defined, so `id="MyProfile:1"` is the same as `"id=MyProfile:1.0.0"`. + +Query profiles (and types) which specifies a version in their id must use a file name which includes the same version string after the name, separated by a dash, e.g. *MyProfile-1.2.3.xml*. + +For more information on versions, see [component versioning](../reference/applications/components#component-versioning). + +## Dump Tool + +It can sometimes be handy to be able to dump resolved query profiles offline. Run without arguments to get usage: + +```bash +$ vespa-query-profile-dump-tool +``` diff --git a/mintlify-docs/en/querying/result-diversity.mdx b/mintlify-docs/en/querying/result-diversity.mdx new file mode 100644 index 0000000000..42080528fb --- /dev/null +++ b/mintlify-docs/en/querying/result-diversity.mdx @@ -0,0 +1,101 @@ +--- +title: "Result Diversity" +--- + +In Search and Recommendation applications, the highest-ranking documents are displayed to the user. A document’s rank score is computed by a function over rank features. The score is computed per document, independently of other documents’ scores (except when using certain features in global-phase). + +This greedy approach gives the best overall result when documents are sufficiently dissimilar, but if not it might look like this: + +![Result diversity example](/assets/img/diversity-1.png) + +This can be improved by preferring some less similar documents even though they produce a lower individual rank score, which can usually be done by using the value of a field to create variation - like a domain field in the example above. + +Vespa has a set of features that can be used to create diversity, this guide outlines strategies and tradeoffs to create good-looking result sets. + +## Diversity + +You can use the [diversity](../reference/schemas/schemas#diversity) element in the schema definition to filter out non-diverse results before second-phase ranking (and during match-phase if used): + +* The [attribute](../reference/schemas/schemas#diversity-attribute) value is the name of the attribute field that holds the value to diversify over (think of the domain in the example above). +* [min-groups](../reference/schemas/schemas#diversity-min-groups) is the minimum number of different values of that attribute that should be included in the result set (when available). + +Example: + +```js +field domain type string { + indexing: attribute | summary +} + +rank-profile diverse_example { + + first-phase { + expression: ... + } + + diversity { + attribute: domain + min-groups: 10 + } + + second-phase { + expression: ... + } + +} +``` + +## Grouping + +Grouping is a general feature for organizing and aggregating in results, which can be used for diversification. Grouping is specified at query time, so no rank-profile configuration is required. For example, to get the highest ranking result per domain with grouping, use: + +```bash +all( group(domain) max(1) each( output(summary() ) ) ) +``` + +With grouping, you have more accurate control over the behavior, such as being able to specify the number of items per group. You can also hierarchically group on multiple attributes, aggregate values etc. + +The grouping result structure comes in addition to (or instead of) the regular query results, so enabling diversity using grouping will change the rendered result structure (unless you flatten the grouped results in a Searcher before returning). + +Grouping runs on the results after second-phase ranking, and across all nodes. Generally, this approach uses more resources compared to using the diversity element to filter out results after first-phase, and means the second phase will be used to pick the best result also in each group (which gives better relevance at higher cost). + +## Match-phase diversity + +Diversity above runs over the result after first-phase ranking. + +The [match-phase](../reference/schemas/schemas#match-phase) feature lets you increase performance by limiting hits exposed to first-phase ranking to the highest (lowest) values of some attribute. Adding the diversity element when using match-phase means that the diversity field attribute is also used to produce the set of matches returned from the match-phase attribute. + +```js expandable +field popularity type int { + indexing: attribute | summary +} + +field domain type string { + indexing: attribute | summary +} + +rank-profile diverse_example { + + match-phase { + attribute: popularity + total-max-hits: 1000 + max-filter-coverage: 1.0 + } + + diversity { + attribute: domain + min-groups: 10 + } + + first-phase { + expression: attribute(popularity) + } +} +``` + +## Collapsefield + +The final processing before returning a result happens in a container node - refer to [query execution](/en/querying/query-api#query-execution) for details. + +Setting the [collapsefield](../reference/api/query#collapsefield) parameter lets you filter out hits which have the same value for one or more fields as a higher-ranked hit. + +Using collapsefield is a cheap option when results only contains a small number of duplicates on average, since it defers all diversification work until the end. When there may be many duplicates, using either the diversify tag or grouping should be preferred. diff --git a/mintlify-docs/en/querying/searching-multivalue-fields.mdx b/mintlify-docs/en/querying/searching-multivalue-fields.mdx new file mode 100644 index 0000000000..2f73072f9e --- /dev/null +++ b/mintlify-docs/en/querying/searching-multivalue-fields.mdx @@ -0,0 +1,588 @@ +--- +title: "Searching and ranking of multivalued fields" +--- + +This guide explains how to search and rank over structured multivalued fields. The examples in this guide use the [weightedset](../reference/schemas/schemas#weightedset) [field type](../reference/schemas/schemas#field). The generic [map](../reference/schemas/schemas#map) field type does not currently support ranking and can only be used for matching and filtering. + +When building a search application we need to think about: + +- How to [match](/en/querying/query-language) a user specified query against a [document schema](../basics/schemas) using Vespa [query language](/en/querying/query-language). +- How to [rank](../basics/ranking) documents matching the query. + +## Matching documents + +There is a lot of text matching options we should think about when designing and mapping our document model to a Vespa document schema: + +- For string fields we should think about using text style matching or database-style exact matching. +- For string fields there are also several [linguistic processing](../linguistics/linguistics) options like [tokenization](../linguistics/linguistics-opennlp#tokenization), normalization and language dependent [stemming](../linguistics/linguistics-opennlp#stemming). +- String fields which shares the same [match](../reference/schemas/schemas#match) and linguistic processing settings can be combined using [fieldsets](../reference/schemas/schemas#fieldset). + +At query time, we can take the user query and translate it into a valid Vespa query request which implements our matching and retrieval strategy over the designed document schema. + +## Ranking documents + +The documents which match the query and are retrieved by the query are scored using a ranking model. Once a document is retrieved by the query logic the document can be scored using the full flexibility of the Vespa [ranking](../basics/ranking) framework. + +## A minimal Vespa application + +In the following sections we explore matching and ranking over multivalued string fields. + + +**Prerequisites:** + +- Linux, macOS or Windows 10 Pro on x86\_64 or arm64, with [Podman Desktop](https://podman.io/) or [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed, with an engine running. + - Alternatively, start the Podman daemon: + ``` + $ podman machine init --memory 6000 + $ podman machine start + ``` + - See [Docker Containers](/en/operations/self-managed/docker-containers.html) for system limits and other settings. +- For CPUs older than Haswell (2013), see [CPU Support](/en/operations/self-managed/cpu-support). +- Memory: Minimum 4 GB RAM dedicated to Docker/Podman. [Memory recommendations](/en/operations/self-managed/node-setup#memory-settings). +- Disk: Avoid `NO_SPACE` - the vespaengine/vespa container image + headroom for data requires disk space. [Read more](/en/writing/feed-block.html). +- [Homebrew](https://brew.sh/) to install the [Vespa CLI](/en/clients/vespa-cli.html), or download the Vespa CLI from [Github releases](https://github.com/vespa-engine/vespa/releases). + + +Assuming we have the following sample data document where we have a structured tag-like field where there is a weight associated with each element. + +```json +{ + "put": "id:photos:photo::0", + "fields": { + "title": "Mira in the sunset", + "description": "A sunny afternoon with our dogs", + "tags": { + "no filter":1, + "light": 3, + "black and white": 3, + "clear sky": 2, + "sunset dogs": 4 + } + } +} +``` + +`Paste the above into file doc.json` + +Structured data like the `tags`, where we both want to match and rank is best represented using the [weightedset](../reference/schemas/schemas#weightedset) [field type](../reference/schemas/schemas#field). The Vespa weightedset field type can be used to represent: + +- Document side tags like in the above example. +- [Document expansion by query prediction](https://github.com/castorini/docTTTTTquery). +- Editorial ranking overrides, for example sponsored search listings. + +How should we design our Vespa schema, and how should we match and search this data model for end-user free text queries? + +- We want to use text matching when searching the title and description. +- We also want to match the free form tags field as these tags might increase recall and the weight of the matched element(s) could influence ranking of documents matched - schema: + +```js expandable +schema photo { + + stemming: none + + document photo { + + field title type string { + indexing: summary | index + match:text + index: enable-bm25 + } + + field description type string { + indexing: summary | index + match:text + index: enable-bm25 + } + + field interestingness type float { + indexing: summary | attribute + } + + field tags type weightedset { + indexing: summary | index + match:text + index: enable-bm25 + } + + } + + fieldset default { + fields: title, description, tags + } + + rank-profile default { + first-phase { + expression: nativeRank + } + } +} +``` + +`Paste the above into file my-app/schemas/photo.sd` + +In the schema we disable [stemming](../reference/schemas/schemas#stemming) and also enable [bm25](../ranking/bm25) text ranking feature for all string fields. + +Since all string fields shares the same [match](../reference/schemas/schemas#match) settings we can use a [fieldset](../reference/schemas/schemas#fieldset) so that queries does not need to mention all three fields. + +We also include a default rank profile (this is the implicit default rank profile) using the Vespa [nativeRank](../ranking/nativerank) text matching rank feature. + +Along with the schema, we also need a [services.xml](../reference/applications/services/services) file to make up a Vespa [application package](../reference/applications/application-packages): + +```xml + + + + + + + + + + + + + 1 + + + + + + + + + +``` + +`Paste the above into file my-app/services.xml` + +## Starting Vespa + +This example uses the vespa container image: + + +```bash +$ docker pull vespaengine/vespa +$ docker run --detach --name vespa --hostname vespa-container \ + --publish 8080:8080 --publish 19071:19071 \ + vespaengine/vespa +``` + +Install [Vespa-cli](../clients/vespa-cli) using Homebrew: + +```bash +$ brew install vespa-cli +``` + +Deploy the application: + + + +```bash +$ vespa deploy --wait 300 my-app +``` + +## Feeding to Vespa +Feed a sample document: + + + +```bash +$ vespa document -v doc.json +``` + +## Query the data + +Assuming a free text query *sunset photos featuring dogs*, translate the user query into a Vespa query request using YQL: + +```bash +$ vespa query 'yql=select * from photos where userQuery()' \ + 'query=sunset photos featuring dogs' \ + 'type=all' +``` + +The above query returns 0 hits, since the query requires that *all* query terms matches the document. By adding [trace.level](../reference/api/query#trace.level) to the query request we can see how the query is parsed and executed against the content nodes: + +```bash +$ vespa query 'yql=select * from photos where userQuery()' \ + 'query=sunset photos featuring dogs' \ + 'type=all' \ + 'trace.level=3' +``` + +In the trace we can see the query which is dispatched to the content nodes: `query=[AND sunshot photos featuring dogs]` + +Using tracing is very useful when debugging why documents match or does not match. + +Since the sample document does not contain the term *featuring* or *photos*, the query fails to retrieve the example document. Relax the query matching to instead of requiring that **all** terms match, to use **any**. See [model.type](../reference/api/query#model.type) query api reference for supported query types: + +```bash +$ vespa query 'yql=select * from photos where userQuery()' \ + 'query=sunset photos featuring dogs' \ + 'type=any' +``` + +Changing the type to `any`, recalls the sample document as we no longer require that all query terms must match. With `type` it also possible to require that individual query terms match by using `+`: + +```bash +$ vespa query 'yql=select * from photos where userQuery()' \ + 'query=+sunset photos featuring +dogs' \ + 'type=any' +``` + +In this example `sunset` and `dogs` must be matched. Note that we have disabled stemming so querying for `dogs` won't recall documents with `dog`. This is one of the reasons we disabled stemming, to demonstrate that stemming has impact on recall. Requiring `dog` will cause the query to not recall our sample document. + +```bash +$ vespa query 'yql=select * from photos where userQuery()' \ + 'query=+sunset photos featuring +dog' \ + 'type=any' +``` + +Now, explore how Vespa matches the multivalued tags field of type [weightedset](../reference/schemas/schemas#weightedset). Notice that we change back to `type=all`. In this example we also use the [default-index](../reference/api/query#model.defaultindex) query parameter to limit matching to the `tags` field. + +```bash +$ vespa query 'yql=select * from photos where userQuery()' \ + 'query=clear sky' \ + 'type=all' \ + 'default-index=tags' +``` + +The query matches the document which is no surprise since a tag contains the exact content `clear sky`. Let us search for just `clear` instead: + +```bash +$ vespa query 'yql=select * from photos where userQuery()' \ + 'query=clear' \ + 'type=all' \ + 'default-index=tags' +``` + +Also matches the document, this demonstrates that matching is partial, it does not require to match the set element exactly. `clear` matches `clear sky` and `sky` will match `clear sky`. + +But what about `black sky`: + +```bash +$ vespa query 'yql=select * from photos where userQuery()' \ + 'query=black sky' \ + 'type=all' \ + 'default-index=tags' +``` + +Also matches the document. This is an example of cross-element matching. With weightedset using `indexing:index` with `match:text` multi term queries match across elements. + +This might be a good decision, as we increase recall, however in some cases we want to differentiate an exact match from a partial match during ranking, so that exact matches are ranked higher than partial matches. + +## Ranking + +We have now explored querying and matching, now it's time to focus on how to rank the documents matched. You might not have noticed, but in the above examples, each of the queries produced a `relevance` score per hit, this score was in our previous examples calculated using the `default` rank profile which in our case used [nativeRank](../ranking/nativerank). + +We can start by analyzing other [rank features](../reference/ranking/rank-features) by asking Vespa to produce them for us. We use [match-features](../reference/schemas/schemas#match-features) to return rank features with the retrieved documents. We explicitly mention which ranking features we want to have calculated and returned. Notice that we don't change the actual scoring, we still use `nativeRank` as the scoring function: + +```js expandable +schema photo { + + stemming: none + + document photo { + + field title type string { + indexing: summary | index + match:text + index: enable-bm25 + } + + field description type string { + indexing: summary | index + match:text + index: enable-bm25 + } + + field interestingness type float { + indexing: summary | attribute + } + + field tags type weightedset { + indexing: summary | index + match:text + index: enable-bm25 + } + + } + + fieldset default { + fields: title, description, tags + } + + rank-profile default { + first-phase { + expression: nativeRank + } + + match-features { + bm25(title) + bm25(description) + bm25(tags) + + nativeRank + nativeRank(title) + nativeRank(description) + + elementSimilarity(tags) + elementCompleteness(tags).elementWeight + elementCompleteness(tags).fieldCompleteness + elementCompleteness(tags).queryCompleteness + elementCompleteness(tags).completeness + } + } +} +``` + +`Paste the above into file my-app/schemas/photo.sd` + +Re-deploy with the changed rank profile: + + +```bash +$ vespa deploy --wait 300 my-app +``` + +Now we will see a list of features in the response: + +```bash +$ vespa query 'yql=select * from photos where userQuery()' \ + 'query=clear sky' \ + 'type=any' +``` + +The output includes [matchfeatures](../reference/querying/default-result-format#matchfeatures) where we can see the various scores for the features: + +Especially look at the `elementCompleteness` and `elementSimilarity` rank features which are example of [features for indexed multivalued string fields](../reference/ranking/rank-features#features-for-indexed-multivalue-string-fields). + +We can also notice that `elementCompleteness(tags).fieldCompleteness` is 1.0 which means that the tag was matched exactly and the `"elementCompleteness(tags).elementWeight` outputs the weight of the best matched element. + +The `elementSimilarity(tags)` ranking feature is very flexible and even allow us to override the [calculation and output new features](../reference/ranking/rank-feature-configuration#elementSimilarity). + +In this example we defined two new ranking features: + +- `elementSimilarity(tags).sumWeight` which uses the sum of matching elements using field completeness x weight. +- `elementSimilarity(tags).maxWeight` which uses the max over the matching elements using field completeness x weight. + +```js expandable +schema photo { + + stemming: none + + document photo { + + field title type string { + indexing: summary | index + match:text + index: enable-bm25 + } + + field description type string { + indexing: summary | index + match:text + index: enable-bm25 + } + + field interestingness type float { + indexing: summary | attribute + } + + field tags type weightedset { + indexing: summary | index + match:text + index: enable-bm25 + } + + } + + fieldset default { + fields: title, description, tags + } + + rank-profile default { + rank-properties { + elementSimilarity(tags).output.sumWeight: "sum(f*w)" + elementSimilarity(tags).output.maxWeight: "max(f*w)" + } + + first-phase { + expression: nativeRank + } + + match-features { + bm25(title) + bm25(description) + bm25(tags) + + nativeRank + nativeRank(title) + nativeRank(description) + + elementSimilarity(tags) + elementSimilarity(tags).sumWeight + elementSimilarity(tags).maxWeight + + elementCompleteness(tags).elementWeight + elementCompleteness(tags).fieldCompleteness + elementCompleteness(tags).queryCompleteness + elementCompleteness(tags).completeness + } + } +} +``` + +`Paste the above into file my-app/schemas/photo.sd` + +Re-deploy with the changed rank profile: + +```bash +$ vespa deploy --wait 300 my-app +``` + +Now we will see a list of features in the response: + +```bash +$ vespa query 'yql=select * from photos where userQuery()' \ + 'query=clear sky' 'type=any' +``` + +Each hit returned contains a [matchfeatures](../reference/querying/default-result-format#matchfeatures) field where we can see the various scores for the features. + +Now, we can include these features in a ranking expression used in `first-phase` to actually change the ranking. The actual _best_ scoring function is data dependent. A trained function using machine learning is by far the easiest way. + +The bag of words [bm25](../ranking/bm25) ranking feature is not normalized, so combining it in a linear function is challenging, as the score range of the feature is unbound. To overcome this, and allow easy exploration without changing the rank profile, make the parameters in the function overridable on a per-query basis by: + +```js +first-phase { + expression { + query(titleWeight)*bm25(title) + + query(descriptionWeight)*bm25(description) + + query(tagWeight)*elementSimilarity(tags).maxWeight + } +} +``` + +See [using query variables](../ranking/ranking-expressions-features#using-query-variables). + +```js expandable +schema photo { + + stemming: none + + document photo { + + field title type string { + indexing: summary | index + match:text + index: enable-bm25 + } + + field description type string { + indexing: summary | index + match:text + index: enable-bm25 + } + + field interestingness type float { + indexing: summary | attribute + } + + field tags type weightedset { + indexing: summary | index + match:text + index: enable-bm25 + } + } + + fieldset default { + fields: title, description, tags + } + + rank-profile tunable inherits default { + inputs { + query(titleWeight): 2 + query(descriptionWeight): 1 + query(tagWeight): 2 + } + + rank-properties { + elementSimilarity(tags).output.sumWeight: "sum(f*w)" + elementSimilarity(tags).output.maxWeight: "max(f*w)" + } + + first-phase { + expression { + query(titleWeight)*bm25(title) + query(descriptionWeight)*bm25(description) + + query(tagWeight)*elementSimilarity(tags).maxWeight + } + } + + match-features { + bm25(title) + bm25(description) + bm25(tags) + elementSimilarity(tags).maxWeight + firstPhase + } + } +} +``` + +`Paste the above into file my-app/schemas/photo.sd` + +Re-deploy: + +```bash +$ vespa deploy --wait 300 my-app +``` + +Run a query with the new rank profile: + +```bash +$ vespa query 'yql=select * from photos where userQuery()' \ + 'query=clear sky' 'type=any' 'ranking=tunable' +``` + +With the function above, since 'clear sky' does not match any of the title or description fields, the bm25 features becomes zero. + +Our `elementSimilarity(tags).maxWeight` feature is 2.0 and the first phase expression becomes 4 which is reflected in the hit relevance score. + +Change the `query(tagWeight)` with the query request and observe that the relevance becomes 6.0: + +```bash +$ vespa query 'yql=select * from photos where userQuery()' \ + 'query=clear sky' \ + 'type=any' \ + 'ranking=tunable' \ + 'input.query(tagWeight)=3' +``` + +Similar, we could also include a document-only signal to our ranking function by: + +```js expandable +inputs { + query(titleWeight): 2 + query(descriptionWeight): 1 + query(tagWeight): 2 + query(staticWeight): 1 +} + +rank-properties { + elementSimilarity(tags).output.sumWeight: "sum(f*w)" + elementSimilarity(tags).output.maxWeight: "max(f*w)" +} + +first-phase { + expression { + query(titleWeight)*bm25(title) + query(descriptionWeight)*bm25(description) + + query(tagWeight)*elementSimilarity(tags).maxWeight + query(staticWeight)*attribute(interestingness) + } +} +``` + +That concludes the matching and ranking experiments. To shut down the container: + +```bash +$ docker rm -f vespa +``` diff --git a/mintlify-docs/en/querying/text-matching.mdx b/mintlify-docs/en/querying/text-matching.mdx new file mode 100644 index 0000000000..a8bf04fbcf --- /dev/null +++ b/mintlify-docs/en/querying/text-matching.mdx @@ -0,0 +1,368 @@ +--- +title: "Text Matching" +--- + +This guide demonstrates tokenization, linguistic processing and matching over [string](../reference/schemas/schemas#string) fields in Vespa. The guide features examples based on the [quick start](../basics/deploy-an-application-local). + +Refer to the [ranking](../basics/ranking) introduction for ranking, and review the different [match modes](../reference/schemas/schemas#match) that Vespa supports per field. See the [text search](../learn/tutorials/text-search) and [text search through ML](../learn/tutorials/text-search-ml) tutorials. Finally, refer to [linguistics](../linguistics/linguistics) for linguistic processing in Vespa. + +Using [query tracing](#query-trace) is useful when debugging text matching. + +## Index and attribute + +Vespa string fields can have a mix of settings specified per field, such as the [indexing](../reference/schemas/schemas#indexing) and [match modes](../reference/schemas/schemas#match). + +- The *index* for free-text search with default match mode *text*, integrating with linguistic processing such as [tokenization and stemming](../linguistics/linguistics). +- The *attribute* indexing is used for database-style of string matching without linguistic processing and where the exact string contents are matched. + +Free-text search is normally solved using a string field in *index* mode: + +```js +field album type string { + indexing: summary | index +} +``` + +If both index and attribute are configured for string-type fields, Vespa will search and match against the index with default match `text`. When the field is both index and attribute, the index aspect is used for matching. (The attribute could still be useful in general, for grouping and sorting.) To get multiple match modes on a single source field you could define a synthetic field outside the document block: + +```js +schema music { + + document music { + + field album type string { + indexing: summary | index + } + + } + + field album_as_attribute type string { + indexing: input album | attribute + } + +} +``` + +If you want substring matching for indexed search, consider using [n-gram matching](#n-gram-match). + +Below, find details on transformations to the text for text indexing and search using the quick start sample application as an example. + +The *album* field has [index](../reference/schemas/schemas#indexing) mode. For text fields, this enables transformations of the string field to increase query recall. + +The following is useful for dumping the resulting text tokens after indexing, to understand the transformations. This coupled with [query tracing](#query-trace) can help us understand why a document field doesn't match or match a query. + +Add another [document summary](/en/querying/document-summaries) to [schemas/music.sd](https://github.com/vespa-engine/sample-apps/blob/master/album-recommendation/app/schemas/music.sd) that contains an extra summary field using [tokens](../reference/schemas/schemas#tokens) with the [source](../reference/schemas/schemas#source) set to the proper index field: + +```js +document-summary my-debug-summary { + summary album { } + summary album_tokens { + source: album + tokens + } + from-disk +} + +fieldset default { + fields: artist, album +} +``` + +Redeploy the application to enable the new document summary: + +```bash +$ vespa deploy --wait 300 +``` + +Show original content of album field: + +```bash +$ vespa query "select * from music where true" summary=my-debug-summary | \ + jq -c '.root.children[].fields.album' +"Liebe ist für alle da" +"A Head Full of Dreams" +"Hardwired...To Self-Destruct" +"When We All Fall Asleep, Where Do We Go?" +"Love Is Here To Stay" +``` + +Show tokens used for indexing the album field: + +```bash +$ vespa query "select * from music where true" summary=my-debug-summary | \ + jq -c '.root.children[].fields.album_tokens' +["lieb","ist","fur","all","da"] +["a","head","full","of","dream"] +["hardwire","to","self","destruct"] +["when","we","all","fall","asleep","where","do","we","go"] +["love","is","here","to","stay"] +``` + +Observe the [linguistic transformations](../linguistics/linguistics) to the data before indexed: + +| Transformation | Type | +|:---|:---| +| Hardwired...To → hardwire to | Tokenization - split terms on non-characters, here "..." | +| Head → head | Lowercasing | +| für → fur | Normalizing | +| dreams → dream | Stemming | + +Then, change from *index* to [attribute](../reference/schemas/schemas#indexing) in [schemas/music.sd](https://github.com/vespa-engine/sample-apps/blob/master/album-recommendation/app/schemas/music.sd) (and remove all bm25 settings): + +```js +@@ -13,8 +13,7 @@ + } + + field album type string { +- indexing: summary | index +- index: enable-bm25 ++ indexing: summary | attribute + } + + field year type int { +@@ -51,7 +50,7 @@ + query(user_profile) tensor(cat{}) + } + first-phase { +- expression: bm25(album) + 0.25 * sum(query(user_profile) * attribute(category_scores)) ++ expression: 0.25 * sum(query(user_profile) * attribute(category_scores)) + } + } +``` + +Run the tutorial again using the new schema, stop after feeding. Show tokens used for indexing the album field: + +```bash +$ vespa query "select * from music where true" summary=my-debug-summary | \ + jq -c '.root.children[].fields.album_tokens' +["a head full of dreams"] +["love is here to stay"] +["when we all fall asleep, where do we go?"] +["liebe ist für alle da"] +["hardwired...to self-destruct"] +``` + +The most important observation is that the strings are added *as-is* for attributes and matching considers the full value, including whitespace (no tokenization). + +The only transformation is lowercasing, both query terms and attribute data are lowercased before matching unless the `match` setting for the field has been set to `cased`. Read more about the attribute [word match mode](../reference/schemas/schemas#match). + +## Prefix match + +Use the [prefix](../reference/querying/yql#prefix) annotation to match string prefixes in attributes of type string: + +```js +field album type string { + indexing: summary | attribute +} +``` + +Note that regular *index* fields does not support prefix matching. + +```bash +$ vespa query 'select * from music where album contains ({prefix: true}"a hea")' +``` + +The [search-suggestions](https://github.com/vespa-engine/sample-apps/tree/master/incremental-search/search-suggestions) sample application uses prefix search, see README for a design discussion. + +To prefix-match individual terms in a string, use an attribute with array of strings in addition to the index string field, e.g.: + +```js +schema company { + document company { + field company_name_string type string { + indexing: index | summary + } + } + field company_name_array type array { + indexing: input company_name_string | trim | split " +" | attribute | summary + } +} +``` + +Use the [indexing-language](../reference/writing/indexing-language) to split the string, as shown above. Adding "Goldman" and "Sachs" will match query terms like "Gold" and "Sach". + +## Fuzzy match + +Use [fuzzy](../reference/querying/yql#fuzzy) matching to match in string attributes with configurable edit distance. Field configuration: + +```js +field album type string { + indexing: summary | attribute + attribute: fast-search +} +``` + +```bash +$ vespa query 'select * from music where album contains ({maxEditDistance: 1}fuzzy("A Head Full of Dreems"))' +``` + +Fuzzy matching is great for misspellings. See use of *prefixLength* and *fast-search* in the [reference](../reference/querying/yql#fuzzy). + +Character [normalization](../linguistics/linguistics-opennlp#normalization) is not performed for fuzzy matches. + +### Fuzzy prefix match + +By default, `fuzzy` matches *full* strings against the query. For use-cases such as type-ahead search this means a user query such as "Ahead Full" will fail to match the document string "A Head Full of Dreams", both when using fuzzy matching (too many characters missing) as well as regular, *non-fuzzy* prefix matching (prefixes do not exactly match). + +Adding `prefix:true` enables *fuzzy prefix* semantics. If a string has a *prefix* that can match the query string within the specified maximum number of edits, it will be considered a match. + +```bash +$ vespa query 'select * from music where album contains ({maxEditDistance: 1, prefix: true}fuzzy("Ahead Full"))' +``` + +This query will match strings such as "A Head Full of Dreams", "A Head Full of Clouds", "Ahead Full Steam" etc. + +Exact prefix locking (`prefixLength:n`) can be used alongside fuzzy prefix matching to constrain the candidate set to strings that have prefix that *exactly* matches *n* characters of the query. Fuzzy prefix matching then applies to the remainder (suffix) of the candidate string. This greatly speeds up dictionary scans since only a subset of the dictionary needs to be considered. + + +**Important:** + +Fuzzy prefix matching often matches many more documents than non-prefix fuzzy matching. For instance, a query such as `{maxEditDistance:2,prefix:true}fuzzy("XY")` will end up matching *every* document, since *all possible strings* can have their prefix transformed to "XY" with at most 2 edits. This is the case for all fuzzy prefix queries where the length of the query string is equal to, or lower than, `maxEditDistance`. This should be taken into consideration when constructing queries based on user input. + + +### Test fuzzy matching + +Use a synthetic (derived) field to easily test fuzzy matching: + +```js expandable +schema music { + + document music { + field songs type array { + indexing: summary | index + index: enable-bm25 + } + } + + field songs_attr type array { + indexing: input songs | attribute | summary + attribute: fast-search + } +} +``` + +In the example above, you have a field with an array of song titles (in the `document`) section in the schema. Take the value of that field, and input into an [attribute](../content/attributes), which is a field type that supports fuzzy matching. + +Add this field and deploy to Vespa. On Vespa Cloud, the data is automatically reindexed, so the `songs_attr` field is populated with data. This might take a little time on a large corpus. Then test with a query: + +```bash +$ vespa query 'select * from music where songs_attr contains ({maxEditDistance: 1}fuzzy("enter zandman"))' +``` + +## Regular expression match + +Using [regular expressions](../reference/querying/yql#matches) is supported in attributes. In the general case, there are however no optimizing data structures for query speed, it runs the expression over all attribute values. If *fast-search* is present on the attribute, explicitly prefix anchored regex queries (e.g. `^abc.*`") will run more efficiently, i.e. only over the range of potentially matching attribute values (must start with "abc"). + +```js +field album type string { + indexing: summary | attribute +} +``` + +Example, matching from start of string: + +```bash +$ vespa query 'select * from music where album matches "^a head fu[l]+ of dreams"' +``` + +A substring search: + +```bash +$ vespa query 'select * from music where album matches "head"' +``` + +Character [normalization](../linguistics/linguistics-opennlp#normalization) is not performed for regular expression matches. + +## N-Gram match + +N-gram matching splits text into subword tokens ("grams") of the specified N size. This is useful with CJK languages which does not use space, as well as for substring matching in text indexes. + +Vespa will by default combine the grams it creates from a text in queries into an AND item, requiring all of them to match. This can be overridden by sending the query parameter `gram.match`, which can be set to the name of any composite query item: `all` (default), `any`, `weakAnd`, `phrase`, `near`, or `onear`. + +```js +Example: Input text "滿腦子的夢想" with these settings + + field album type string { + indexing: { + "zh-hant" | set_language; + summary | index + } + match { + gram + gram-size: 2 + } + } + +Produces the tokens 子的 夢想 滿腦 的夢 腦子 + +Any query which contains at least two tokens frmo this text will therefore be matched. +``` + +## Example use case + +*What is the best way to index short word-length documents, like names of all locations/towns in the world, such that they:* + +- *Are robust to misspelling in user queries eg: "Amsterdam" --> "amstredam"* +- *Are cross-lingual for search, e.g.: "America" --> "美國"* + +To make this multilingual, use an [array``](../reference/schemas/schemas#array) field to store all the alternatives. One can also translate to a canonical single language used in indexing at query time, but in cases with very short documents, opt for doing it indexing time. + +Alternatives for matching with spell checking: + 1. Make the field an attribute and use [fuzzy matching](#fuzzy-match). + 2. Make the field an index with [gram matching](#n-gram-match). + 3. Having an array of alternatives anyway, just stuff all the misspellings to match into it. Consider using a [weighted set](../reference/schemas/schemas#weightedset) instead to weight them by closeness to the original. + +3\. will give the cheapest queries and exact control over misspelled matching, but a larger index, more work for the developer, and adjusting spell correction becomes more complicated. 1. will be most expensive, but maybe also most convenient There are currently no rank signals giving you the match quality. 2. Is in between, and will probably work best when incorporating ranking signals that use proximity (such as e.g. [nativeRank](../ranking/nativerank) but not [bm25](../ranking/bm25)). + +Read [Simplify Search with Multilingual Embedding Models](https://blog.vespa.ai/simplify-search-with-multilingual-embeddings/) for semantic matching and ranking. + +## Query Trace + +Adding [trace.level=2](../reference/api/query#trace.level) gives insight when testing queries - example attribute lowercasing (observe that queries with "Liebe" and "liebe" give the same result): + +```bash +$ vespa config set target local +$ vespa query 'select * from music where album contains "Liebe ist für alle da"' \ + ranking=rank_albums \ + trace.level=2 +``` + +Also try query tracing to see how query parsing changes with *index* and *attribute* indexing modes. + +## Appendix: Match Configuration Debugging + +Inspect generated configuration to understand or validate the match configuration. Run this to find the value of the -i argument used below: + +```bash +$ docker exec vespa sh -c vespa-configproxy-cmd | grep IndexingProcessor + + vespa.configdefinition.ilscripts,default/docprocchains/chain/indexing/component/com.yahoo.docprocs.indexing.IndexingProcessor, ... +``` + +Start over, deploy with the indexing settings below and feed data. Note the difference for the *artist* (with exact matching) and *album* fields: + +```js +field artist type string { + indexing: summary | index + match : exact +} +field album type string { + indexing: summary | index +} + +$ docker exec vespa sh -c 'vespa-get-config \ + -n vespa.configdefinition.ilscripts \ + -i default/docprocchains/chain/indexing/component/com.yahoo.docprocs.indexing.IndexingProcessor' + +maxtermoccurrences 100 +fieldmatchmaxlength 1000000 +ilscript[0].doctype "music" +ilscript[0].docfield[0] "artist" +ilscript[0].docfield[1] "album" +ilscript[0].docfield[2] "year" +ilscript[0].docfield[3] "category_scores" +ilscript[0].content[0] "clear_state | guard { input artist | exact | summary artist | index artist; }" +ilscript[0].content[1] "clear_state | guard { input album | tokenize normalize stem:"BEST" | summary album | index album; }" +ilscript[0].content[2] "clear_state | guard { input year | summary year | attribute year; }" +ilscript[0].content[3] "clear_state | guard { input category_scores | summary category_scores | attribute category_scores; }" +``` diff --git a/mintlify-docs/en/querying/vector-search-intro.mdx b/mintlify-docs/en/querying/vector-search-intro.mdx new file mode 100644 index 0000000000..5d6551291a --- /dev/null +++ b/mintlify-docs/en/querying/vector-search-intro.mdx @@ -0,0 +1,100 @@ +--- +title: "An intro to vector search" +--- + +Vector Search is a method to search objects using a digital representation of both the query and the objects - easier explained by example: + +Consider a user searching for the flower "Dandelion". A regular text search will match [this Wikipedia article](https://en.wikipedia.org/wiki/Taraxacum), as the "dandelion" term is in the text: + +***Taraxacum*** + +From Wikipedia, the free encyclopedia (Redirected from Dandelion) + +> "Dandelion" redirects here. It may refer to any species of the genus Taraxacum or specifically to Taraxacum officinale. For similar plants, see False dandelion. For other uses, see Dandelion (disambiguation) + +***Taraxacum*** (/təˈræksəkʊm/) is a large genus of flowering plants in the family Asteraceae, which consists of species commonly known as dandelions. + +... + + +This document is about dandelions, for sure. However, if the user searches for "blowball" (the mature spherical seed head of a dandelion), the text above will not match, although the document is a good match for "blowball". Example images of a dandelion: + + + +![](https://upload.wikimedia.org/wikipedia/commons/4/4f/DandelionFlower.jpg) + + +![](https://upload.wikimedia.org/wikipedia/commons/5/54/TaraxacumOfficinaleSeed.JPG) + + + + +This gets more complicated with more content types. It is hard to use text search to match the images without any text. The same goes for videos, podcasts, songs, TV shows, and so on - this is often called multi-modal search. A workaround is to search the object’s textual metadata, like a song’s title, the podcast episode summary, and the image’s *alt* text on the webpage. Such metadata is only sometimes available and is often too short/imprecise - this does not solve the root problem. + +The query can be an image itself or a song, where the users want *more like this*, in the context of the current song or an image being viewed. This is a problem found in recommender systems. + +## Digital representations + +A solution to the synonym or multi-modal problem is to change from matching in the textual to the digital domain. This means transforming text, images, and songs into a set of numbers that indicate what it is *about*. + +Examples of different objects, and their vector representation: + +| ![DandelionFlower.jpg](https://upload.wikimedia.org/wikipedia/commons/4/4f/DandelionFlower.jpg) | [0.560, 0.001, 0.223, ...] | +|:---|:---| +| ![Dandelion bu Anna of the North](/assets/img/dandelion-song.png) | [0.0, 0.011, 0.0, ...] | +| "Taraxacum is a large genus of flowering plants in the family Asteraceae, which consists of species commonly known as dandelions." | [0.002, 0.001, 0.411, ...] | +| dandelions | [0.002, 0.021, 0.355, ...] | + +The digital representation of the object is hence a sequence of numbers, called a *vector*, also called an *embedding*. + +By converting all the objects searched for *and* the query to vectors (digital representations), the search problem is changed into finding similar vectors - i.e., *vector search*. + +## How vectors are created + +Creating vectors from the objects can be done in multiple ways. Machine learning is often used; there are many ready-made models to get you started. + +The quality of the vectors will decide the quality of the search, so this is where organizations will want to spend their effort improving search. + +A vector has a *dimension* (length) and type (type of each cell). + +The cost/quality tradeoff is essential - given a vector, it can be represented with lower precision or shortened, keeping the most relevant dimensions. This reduces search precision, but cuts costs into a fraction. A 75% cost reduction might reduce precision, but acceptable for the use case. Vespa supports [four types](../reference/ranking/tensor), with an 8x difference in memory cost: + +| Int8 | 8 bits, 1 byte per dimension | +|:---|:---| +| bfloat16 | 16 bits, 2 bytes per dimension | +| float | 32 bits, 4 bytes per dimension | +| double | 64 bits, 8 bytes per dimension | + +Read more on selecting the optimal type: + +- [billion-scale-knn](https://blog.vespa.ai/billion-scale-knn/) + +Vectors are often some hundred numbers long, like 768 or 384. A longer vector can hold more information, but some dimensions are more information-rich than others. If most of the values in a dimension are equal or very close to each other, the dimension has little value and can be eliminated: + +- [Dimension reduction using Principal Component Analysis (PCA)](https://blog.vespa.ai/building-billion-scale-vector-search-part-two/#dimension-reduction-using-principal-component-analysis-pca) + +A better approach is often to use an ML model with the optimal size from the start for the use case, once found. + +## Doing a vector search + +"dandelion" matches "dandelion", but "rose" does not. Text search has a binary nature. However, both are flowers and considered more similar to each other than "airplane". Unlike text matching, a vector search will not match items exactly. In the following example, we are using a very short 3-dim vector for simplicity and using the dimensions as coordinates in a 3D space: + + +![](/assets/img/3Dplot.png) + + +We see that [0.000, 0.497, 0.110] is not equal to [0.101, 0.560, 0.093], but quite close - closer than [0.611, 0.000, 0.217]. + +In other words, if the values for each dimension are close, the vectors are similar - and this can be used in search to find the *nearest neighbor* or NN. There are multiple ways to calculate vector similarity using a *distance metric*: + +- [distance-metric](../reference/schemas/schemas#distance-metric) + +## Approximate for speed + +Calculating vector similarity can be thought of as multiplying the number of dimensions with the number of vectors. If you are a Canadian, represented by a 768-dim vector, this is 768 x 39,000,000 = 29,952,000,000, or 29 billion calculations to find the other Canadian most similar to you. + +There are ways to approximate this by doing fewer calculations, and still finding the closest vector with high probability - this is called *Approximate Nearest Neighbor search*, or *ANN search*. Vespa supports both exact nearest neighbor search and ANN search: + +- [approximate-nn-hnsw](/en/querying/approximate-nn-hnsw) + +Note that ANNs require some kind of indexing to speed up search, so inserts (adding a new vector) are more expensive (uses more CPU) and takes more space (memory and disk). When evaluating different kinds of ANN indexing, consider if your use case requires updates, including deletes, to the vectors - Vespa supports all. diff --git a/mintlify-docs/en/rag/binarizing-vectors.mdx b/mintlify-docs/en/rag/binarizing-vectors.mdx new file mode 100644 index 0000000000..78ab30ec08 --- /dev/null +++ b/mintlify-docs/en/rag/binarizing-vectors.mdx @@ -0,0 +1,595 @@ +--- +title: "Binarizing Vectors" +--- + +Binarization in this context is mapping numbers in a vector (embedding) to bits (reducing the value range), and representing the vector of bits efficiently using the `int8` data type. Examples: + +| input vector | binarized floats | pack\_bits (to INT8) | +| :--- | :--- | :--- | +| [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] | [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0] | -1 | +| [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] | 0 | +| [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] | 0 | +| [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] | [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] | -128 | +| [2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] | [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0] | -127 | + +Binarization is key to reducing memory requirements and, therefore, cost. Binarization can also improve feeding performance, as the memory bandwidth requirements go down accordingly. + +Refer to [embedding](/en/rag/embedding) for more details on how to create embeddings from text. + +## Summary + +This guide maps all the steps required to run a successful binarization project using Vespa only - there is no need to re-feed data. This makes a project feasible with limited incremental resource usage and man-hours required. + +Approximate Nearest Neighbor vector operations are run using an HNSW index in Vespa, with online data structures. The cluster is operational during the procedure, gradually building the required data structures. + +This guide is useful to map the steps and tradeoffs made for a successful vector binarization. Other relevant articles on how to reduce vector size in memory are: + +- [Exploring the potential of OpenAI Matryoshka 🪆 embeddings with Vespa](https://blog.vespa.ai/matryoshka-embeddings-in-vespa/) +- [Matryoshka 🤝 Binary vectors: Slash vector search costs with Vespa](https://blog.vespa.ai/combining-matryoshka-with-binary-quantization-using-embedder/) + +Adding to this, using algorithms like SPANN can solve problems with huge vector data sizes, read more in [Billion-scale vector search using hybrid HNSW-IF](https://blog.vespa.ai/vespa-hybrid-billion-scale-vector-search/). + +A binarization project normally involves iteration over different configuration settings, measuring quality loss for each iteration - this procedure it built with that in mind. + +## Converters + +Vespa’s built-in indexing language [converters](/en/reference/writing/indexing-language#converters)`binarize` and `pack_bits` let you easily generate binarized vectors. Example schema definitions used to generate the vectors in the table above: + +```bash +schema doc { + + document doc { + field doc_embedding type tensor(x[8]) { + indexing: summary | attribute + } + } + + field doc_embedding_binarized_floats type tensor(x[8]) { + indexing: input doc_embedding | binarize | attribute + } + + field doc_embedding_binarized type tensor(x[1]) { + indexing: input doc_embedding | binarize | pack_bits | attribute + } +} +``` + +We see that the `binarize` function itself will not compress vectors to a smaller size, as the output cell type is the same as the input - it is only the values that are mapped to 0 or 1. Above, the vectors are binarized using a threshold value of 0, the Vespa default - any number \> 0 will map to 1 - this threshold is configurable. + +`pack_bits` reads binarized vectors and represents them using int8. In the example above: + +- `tensor(x[8])` is 8 x sizeof(float) = 8 x 32 bits = 256 bits = 32 bytes +- `tensor(x[1])` is 1 x sizeof(int8) = 1 x 8 bits = 8 bits = 1 byte + +In other words, a compression factor of 32, which is expected, mapping a 32-bit float into 1 bit. + +As memory usage often is the cost driver for applications, this has huge potential. However, there is a loss of precision, so the tradeoff must be evaluated. Read more in [billion-scale-knn](https://blog.vespa.ai/billion-scale-knn/) and [combining-matryoshka-with-binary-quantization-using-embedder](https://blog.vespa.ai/combining-matryoshka-with-binary-quantization-using-embedder/). + +## Binarizing an existing embedding field + +In the example above, we see that `doc_embedding` has the original embedding data, and the fields `doc_embedding_binarized_floats` and `doc_embedding_binarized` are generated from `doc_embedding`. This is configured through the `indexing: input …` statement, and defining the generated fields outside the `document { … }` block. + + + **Note:** The `doc_embedding_binarized_floats` field is just for illustration purposes, as input to the `doc_embedding_binarized` field, which is the target binarized and packed field with low memory requirements. From here, we will call this the binarized embedding. + + +This is a common case for many applications - how to safely binarize and evaluate the binarized data for subsequent use. The process can be broken down into: + +- Pre-requisites. +- Define the new binarized embedding, normally as an addition to the original field. +- Deploy and re-index the data to populate the binarized embedding. +- Create new ranking profiles with the binarized embeddings. +- Evaluate the quality of the binarized embedding. +- Remove the original embedding field from memory to save cost. + +## Pre-requisites + +Adding a new field takes resources, on disk and in memory. A new binarized embedding field is smaller - above, it is 1/32 of the original field. Also note that embedding fields often have an index configured, like: + +```bash +field doc_embeddings type tensor(x[8]) { + indexing: summary | attribute | index + attribute { + distance-metric: angular + } + index { + hnsw { + max-links-per-node: 16 + neighbors-to-explore-at-insert: 100 + } + } +} +``` + +The index is used for approximate nearest neighbor (ANN) searches, and also consumes memory. + +Use the Vespa Cloud console to evaluate the size of original fields and size of indexes to make sure that there is room for the new embedding field, possibly with an index. + + +**Note:** The size of an index is a function of the number of documents, regardless of tensor type. In this context, this means that adding a new field with and index, the new index will have the same size as the index of the existing embedding field. + + +Use status pages to find the index size in memory - example: + +https://api-ctl.vespa-cloud.com/application/v4/tenant/ +TENANT\_NAME/application/APP\_NAME/instance/INSTANCE\_NAME/environment/prod/region/REGION/ +service/searchnode/NODE\_HOSTNAME/ +state/v1/custom/component/documentdb/SCHEMA/subdb/ready/attribute/ATTRIBUTE\_NAME + +### Example + +```json +tensor: { + compact_generation: 33946879, + ref_vector: { + memory_usage: { + used: 1402202052, + dead: 0, + allocated: 1600126976, + onHold: 0 + } + }, + tensor_store: { + memory_usage: { + used: 205348904436, + dead: 10248636768, + allocated:206719921232, + onHold: 0 + } + }, + nearest_neighbor_index: { + memory_usage: { + all: { + used: 10452397992, + dead: 360247164, + allocated:13346516304, + onHold: 0 + } +``` + +In this example, the index is 13G, the tensor data is 206G, so the index is 6.3% of the tensor data. The original tensor is of type `bfloat16`, a binarized version is 1/16 of this and hence 13G. As an extra index is 13G, the temporal incremental memory usage is approximately 26G during the procedure. + +## Define the binarized embedding field + +The new field is _added_ to the schema, example schema, before: + +```bash +schema doc { + + document doc { + field doc_embedding type tensor(x[8]) { + indexing: summary | attribute + } + } +} +``` + +After: + +```bash +schema doc { + + document doc { + field doc_embedding type tensor(x[8]) { + indexing: summary | attribute + } + } + + field doc_embedding_binarized type tensor(x[1]) { + indexing: input doc_embedding | binarize | pack_bits | attribute + } +} +``` + +The above are simple examples, with no ANN settings on the fields. Following is a more complex example - schema before: + +```bash +schema doc { + + document doc { + field doc_embedding type tensor(x[8]) { + indexing: summary | attribute | index + attribute { + distance-metric: angular + } + index { + hnsw { + max-links-per-node: 16 + neighbors-to-explore-at-insert: 200 + } + } + } + } +} +``` + +Schema after: + +```bash +schema doc { + + document doc { + field doc_embedding type tensor(x[8]) { + indexing: summary | attribute | index + attribute { + distance-metric: angular + } + index { + hnsw { + max-links-per-node: 16 + neighbors-to-explore-at-insert: 200 + } + } + } + } + + field doc_embedding_binarized type tensor(x[1]) { + indexing: input doc_embedding | binarize | pack_bits | attribute | index + attribute { + distance-metric: hamming + } + index { + hnsw { + max-links-per-node: 16 + neighbors-to-explore-at-insert: 200 + } + } + } +} +``` + +Note that we replicate the index settings to the new field. + +## Deploy and reindex the binarized embedding field + +Deploying the field will trigger a reindexing on Vespa Cloud to populate the binarized embedding, fully automated. + +Self-hosted, the `deploy` operation will output the below - [trigger a reindex](/en/operations/reindexing). + +```sh +$ vespa deploy + +Uploading application package... done + +Success: Deployed '.' with session ID 3 +WARNING Change(s) between active and new application that may require re-index: +reindexing: Consider re-indexing document type 'doc' in cluster 'doc' because: + 1) Document type 'doc': Non-document field 'doc_embedding_binarized' added; this may be populated by reindexing +``` + +Depending on the size of the corpus and resources configured, the reindexing process takes time. + +## Create new ranking profiles and queries using the binarized embeddings + +After reindexing, you can query using the new, binarized embedding field. Assuming a query using the doc\_embedding field: + +```sh +$ vespa query \ + 'yql=select * from doc where {totalTargetHits:5}nearestNeighbor(doc_embedding, q)' \ + 'input.query(q)=[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0]' \ + 'ranking=app_ranking' +``` + +The same query, with a binarized query vector, to the binarized field: + +```sh +$ vespa query \ + 'yql=select * from doc where {totalTargetHits:5}nearestNeighbor(doc_embedding_binarized, q_bin)' \ + 'input.query(q_bin)=[-119]' \ + 'ranking=app_ranking_bin' +``` + +See [tensor-hex-dump](/en/reference/schemas/document-json-format#tensor-hex-dump) for more information about how to create the int8-typed tensor. + +### Quick Hamming distance intro + +Example embeddings: + +| document embedding | binarized floats | pack\_bits (to INT8) | +| :--- | :--- | :--- | +| [-1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0] | [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] | 0 | +| **query embedding** | **binarized floats** | **to INT8** | +| [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0] | [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0] | -119 | + +Use [matchfeatures](/en/reference/schemas/schemas#match-features) to debug ranking (see ranking profile `app_ranking_bin` below): + +```json +"matchfeatures": { + "attribute(doc_embedding_binarized)": { + "type": "tensor(x[1])", + "values": [0] + }, + "distance(field,doc_embedding_binarized)": 3.0, + "query(q_bin)": { + "type": "tensor(x[1])", + "values": [-119] + } +} +``` + +See distance calculated to 3.0, which is the number of bits different in the binarized vectors, which is the hamming distance. + +## Rank profiles and queries + +Assuming a rank profile like: + +```bash +rank-profile app_ranking { + match-features { + distance(field, doc_embedding) + query(q) + attribute(doc_embedding) + } + inputs { + query(q) tensor(x[8]) + } + first-phase { + expression: closeness(field, doc_embedding) + } +} +``` + +Query: + +```sh +$ vespa query \ + 'yql=select * from doc where {totalTargetHits:5}nearestNeighbor(doc_embedding, q)' \ + 'input.query(q)=[2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]' \ + 'ranking=app_ranking' +``` + +A binarized version is like: + +```bash +rank-profile app_ranking_bin { + match-features { + distance(field, doc_embedding_binarized) + query(q_bin) + attribute(doc_embedding_binarized) + } + inputs { + query(q_bin) tensor(x[1]) + } + first-phase { + expression: closeness(field, doc_embedding_binarized) + } +} +``` + +Query: + +```bash +$ vespa query \ + 'yql=select * from doc where {totalTargetHits:5}nearestNeighbor(doc_embedding_binarized, q_bin)' \ + 'input.query(q_bin)=[-119]' \ + 'ranking=app_ranking_bin' +``` + +Query with full-precision query vector, against a binarized vector - rank profile: + +```bash +rank-profile app_ranking_bin_full { + match-features { + distance(field, doc_embedding_binarized) + query(q) + query(q_bin) + attribute(doc_embedding_binarized) + } + function unpack_to_float() { + expression: 2*unpack_bits(attribute(doc_embedding_binarized), float)-1 + } + function dot_product() { + expression: sum(query(q) * unpack_to_float) + } + inputs { + query(q) tensor(x[8]) + query(q_bin) tensor(x[1]) + } + first-phase { + expression: closeness(field, doc_embedding_binarized) + } + second-phase { + expression: dot_product + } +} +``` + +Notes: + +- The first-phase ranking is as the binarized query above. +- The second-phase ranking is using the full-precision query vector query(q) with a bit-precision vector cast to float for type match. +- Both query vectors must be supplied in the query. + +Note the differences when using full values in the query tensor, see the relevance score for the results: + +```sh +$ vespa query \ + 'yql=select * from music where {totalTargetHits:5}nearestNeighbor(doc_embedding_binarized, q_bin)' \ + 'input.query(q)=[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0]' \ + 'input.query(q_bin)=[-119]' \ + 'ranking=app_ranking_bin_full' + +... + +"relevance": 3.0 +``` + +```sh +$ vespa query \ + 'yql=select * from music where {totalTargetHits:5}nearestNeighbor(doc_embedding_binarized, q_bin)' \ + 'input.query(q)=[2.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0]' \ + 'input.query(q_bin)=[-119]' \ + 'ranking=app_ranking_bin_full' + +"relevance": 4.0 +``` + +Read the [closeness](/en/reference/ranking/rank-features#closeness(dimension,name)) reference documentation. + +### TargetHits for ANN + +Given the lower precision with binarization, it might be a good idea to increase the `{targetHits:5}` annotation in the query, to generate more candidates for later ranking phases. + +## Evaluate the quality of the binarized embeddings + +This exercise is about evaluating a lower-precision retrieval phase, using the original full-sized (here we use floats) query-result pairs as reference. Experiments, query-document precision: + +1. float-float +2. binarized-binarized +3. float-binarized +4. float-float, with binarized retrieval + +To evaluate the precision, compute the differences for each query @10, like: + +```python +def compute_list_differences(list1, list2): + set1 = set(list1) + set2 = set(list2) + return len(set1 - set2) + +list1 = [1, 3, 5, 7, 9, 11, 13, 15, 17, 20] +list2 = [2, 3, 5, 7, 9, 11, 14, 15, 18, 20] +num_hits = compute_list_differences(list1, list2) +print(f"Hits different: {num_hits}") +``` + +## Remove the original embedding field from memory + +The purpose of the binarization is reducing memory footprint. Given the results of the evaluation above, store the full-precision embeddings on disk or remove them altogether. Example with paging the attribute to disk-only: + +```bash +schema doc { + + document doc { + field doc_embedding type tensor(x[8]) { + indexing: summary | attribute | index + attribute: paged + } + } + + field doc_embedding_binarized type tensor(x[1]) { + indexing: input doc_embedding | binarize | pack_bits | attribute | index + attribute { + distance-metric: hamming + } + index { + hnsw { + max-links-per-node: 16 + neighbors-to-explore-at-insert: 200 + } + } + } +} +``` + +This example only indexes the binarized embedding, with data binarized before indexing: + +```bash +schema doc { + + document doc { + field doc_embedding_binarized type tensor(x[1]) { + indexing: input doc_embedding | binarize | pack_bits | attribute | index + attribute { + distance-metric: hamming + } + index { + hnsw { + max-links-per-node: 16 + neighbors-to-explore-at-insert: 200 + } + } + } + } +} +``` + +## Appendix: Binarizing from text input + +To generate the embedding from other data types, like text, use the [converters](/en/reference/writing/indexing-language#converters) - example: + +```bash +field doc_embedding type tensor(x[1]) { + indexing: (input title || "") . " " . (input content || "") | embed | attribute + attribute { + distance-metric: hamming + } + } +``` + +Find examples in [Matryoshka 🤝 Binary vectors: Slash vector search costs with Vespa](https://blog.vespa.ai/combining-matryoshka-with-binary-quantization-using-embedder/). + +## Appendix: conversion to int8 + +Find examples of how to binarize values in code: + +```python +import numpy as np + +def floats_to_bits(floats): + if len(floats) != 8: + raise ValueError("Input must be a list of 8 floats.") + bits = [1 if f > 0 else 0 for f in floats] + return bits + +def bits_to_int8(bits): + bit_string = ''.join(str(bit) for bit in bits) + int_value = int(bit_string, 2) + int8_value = np.int8(int_value) + return int8_value + +def floats_to_int8(floats): + bits = floats_to_bits(floats) + int8_value = bits_to_int8(bits) + return int8_value + +floats = [0.5, -1.2, 3.4, 0.0, -0.5, 2.3, -4.5, 1.2] +int8_value = floats_to_int8(floats) +print(f"The int8 value is: {int8_value}") +``` + +```python +import numpy as np + +def binarize_tensor(tensor: torch.Tensor) -> str: + """ + Binarize a floating-point 1-d tensor by thresholding at zero + and packing the bits into bytes. Returns the hex str representation of the bytes. + """ + if not tensor.is_floating_point(): + raise ValueError("Input tensor must be of floating-point type.") + return ( + np.packbits(np.where(tensor > 0, 1, 0), axis=0).astype(np.int8).tobytes().hex() + ) +``` + +Multivector example, from [ColPali: Efficient Document Retrieval with Vision Language Models](https://vespa-engine.github.io/pyvespa/examples/colpali-document-retrieval-vision-language-models-cloud.html): + +```python expandable +import numpy as np +from typing import Dict, List +from binascii import hexlify + +def binarize_token_vectors_hex(vectors: List[torch.Tensor]) -> Dict[str, str]: + vespa_tensor = list() + for page_id in range(0, len(vectors)): + page_vector = vectors[page_id] + binarized_token_vectors = np.packbits( + np.where(page_vector > 0, 1, 0), axis=1 + ).astype(np.int8) + for patch_index in range(0, len(page_vector)): + values = str( + hexlify(binarized_token_vectors[patch_index].tobytes()), "utf-8" + ) + if ( + values == "00000000000000000000000000000000" + ): # skip empty vectors due to padding of batch + continue + vespa_tensor_cell = { + "address": {"page": page_id, "patch": patch_index}, + "values": values, + } + vespa_tensor.append(vespa_tensor_cell) + + return vespa_tensor +``` \ No newline at end of file diff --git a/mintlify-docs/en/rag/document-enrichment.mdx b/mintlify-docs/en/rag/document-enrichment.mdx new file mode 100644 index 0000000000..d5bf4c8047 --- /dev/null +++ b/mintlify-docs/en/rag/document-enrichment.mdx @@ -0,0 +1,427 @@ +--- +title: "Document enrichment with LLMs" +--- + +Document enrichment enables automatic generation of document field values using large language models (LLMs) or custom code during feeding. It can be used to transform raw text into a more structured representation or expand it with additional contextual information. +Examples of enrichment tasks include: + +- Extraction of named entities (e.g., names of people, organizations, locations, and products) for fuzzy matching and customized ranking +- Categorization and tagging (e.g., sentiment and topic analysis) for filtering and faceting +- Generation of relevant keywords, queries, and questions to improve search recall and search suggestions +- Anonymization to remove personally identifiable information (PII) and reduction of bias in search results +- Translation of content for multilingual search +- LLM chunking + +These tasks are defined through prompts, which can be customized for a particular application. +Generated fields are indexed and stored as normal fields and can be used for searching without additional latency associated with LLM inference. + +## Setting up document enrichment components + +This section provides guidelines for configuring document enrichment, using the +[LLM document enrichment sample app](https://github.com/vespa-engine/sample-apps/tree/master/field-generator) as an example. + +### Defining generated fields + +Enrichments are defined in a schema using a [generate indexing expression](/en/reference/writing/indexing-language#generate). +For example the following schema defines two [synthetic fields](/en/operations/reindexing) with `generate`: + +```yaml +schema passage { + document passage { + field id type string { + indexing: summary | attribute + } + + field text type string { + indexing: summary | index + index: enable-bm25 + } + } + + # Generate relevant questions to increase recall and search suggestions + field questions type array { + indexing: input text | generate questions_generator | summary | index + index: enable-bm25 + } + + # Extract named entities for fuzzy matching with ngrams + field names type array { + indexing: input text | generate names_extractor | summary | index + match { + gram + gram-size: 3 + } + } +} +``` + +Indexing statement `input text | generate questions_generator | summary | index` is interpreted as follows: +1. Take document field named `text` as an input +2. Pass the input to a field generator with id `questions_generator` +3. Store the output of the generator as summary +4. Index the output of the generator for lexical search + +Example of a document generated with this schema: +```json +{ + "id": "71", + "text": "Barley (Hordeum vulgare L.), a member of the grass family, is a major cereal grain. It was one of the first cultivated grains and is now grown widely. Barley grain is a staple in Tibetan cuisine and was eaten widely by peasants in Medieval Europe. Barley has also been used as animal fodder, as a source of fermentable material for beer and certain distilled beverages, and as a component of various health foods.", + "questions": [ + "What are the major uses of Barley (Hordeum vulgare L.) in different cultures and regions throughout history?", + "How has the cultivation and consumption of Barley (Hordeum vulgare L.) evolved over time, from its initial cultivation to its present-day uses?", + "What role has Barley (Hordeum vulgare L.) played in traditional Tibetan cuisine and Medieval European peasant diets?" + ], + "names": [ + "Barley", + "Hordeum vulgare L.", + "Tibetan", + "Medieval Europe" + ] +} +``` + +### Configuring field generators + +A schema can contain multiple generated fields that use one or multiple field generators. +All used field generators should be configured in `services.xml`, e.g. + +```xml expandable + + ... + + ... + + + local_llm + Generate 3 questions relevant for this text: {input} + + + + + + openai + files/names_extractor.txt + + + ... + + ... + +``` + +All field generators must specify `` that references a language model client, +which is either a local LLM, an OpenAI client or a custom component. + +In addition to the language model, field generators require a prompt. +Prompts are constructed from three parts: + +1. Prompt template, specified either inline inside `` or in a file within application package with the path in ``. +2. Input from the indexing statement, e.g. `input text` where `text` is a document field name. +3. Output type of the field being generated. + +If neither `` nor `` are provided, the default prompt is set to the input part. +When both are provided, `` has precedence. + +A prompt template must contain `{input}` placeholder, which will be replaced with the input value. +It is possible to combine several fields into one input by concatenating them into a single string, e.g. + +```yaml +input "title: " . title . " text: " . text | generate names_extractor | summary | index +``` + +A prompt template might also contain a `{jsonSchema}` placeholder which will be replaced with a JSON +schema based on the type of the field being generated, see the [structured output section](#structured-output) for details. +Including a JSON schema in your prompt can help language models generate output in a specific format. +However, it's important to understand that field generators already provide the JSON schema +as a separate inference parameter to the underlying language model client. +Both local LLM and OpenAI client utilize [structured output](#structured-output) functionality, +which forces LLMs to produce outputs that conform to the schema. +For this reason, explicitly including `{jsonSchema}` in your prompt template is unnecessary for most use cases. + +Structured output can be disabled by specifying `TEXT`. +In this case, the generated field must have a `string` type. +This is useful for very small models (less than a billion parameters) that struggle to generate structured output. +For most use cases, it is recommended to use structured output even for `string` fields. + +The last parameter in the field generator configuration is ``, +which specifies what to do when the output from the underlying language model can't be converted to the generated field type. +This shouldn't happen when using structured output, but it can happen with `TEXT` response format. +The default value is `DISCARD`, which leaves the field empty, sets it to `null`. +Other values `WARN` and `FAIL` log a warning and throw an exception respectively. + +Overview of all the field generator parameters is available in the +[configuration definition file](https://github.com/vespa-engine/vespa/blob/master/model-integration/src/main/resources/configdefinitions/language-model-field-generator.def). + + +## Configuring language models + +Field generators specify `` to reference a language model client +to be used for generation, which is either a local LLM, an OpenAI client or a custom component. + +Configuration details for local LLM and OpenAI client are covered in [local LLM](/en/rag/local-llms) +and [OpenAI client](/en/rag/external-llms) documentation. +This section focuses on configuration parameters that are important for document enrichment. + +Both local LLM and OpenAI client can be configured with different models. +For efficient scaling of document enrichment, it is recommended to select the smallest +model that delivers acceptable performance for the task at hand. +In general, larger models produce better results but are more expensive and slower. + +Document enrichment tasks such as information extraction, summarization, expansion and classification +are often less complex than the problem-solving capabilities targeted by larger models. +These tasks can be accomplished by smaller, cost-efficient models, +such as [Microsoft Phi-3.5-mini](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) for a local model +or [GPT-4o mini](https://platform.openai.com/docs/models/gpt-4o-mini) for OpenAI API. + +Here is an example of a OpenAI client configured with GPT-4o mini model: + +```xml + + ... + + ... + {/* OpenAI client */} + + + openai-key + gpt-4o-mini + + + ... + + ... + +``` + +For OpenAI client, model selection influences API cost and latency. + +In addition to the model, local LLM client has several other parameters +that are important for performance of document enrichment. +The following configuration is a good starting point: + +```xml expandable + + ... + + ... + + + {/* For Vespa Cloud, specify model by model-id to speed-up deployment */} + + + {/* For self-hosted Vespa and Vespa Cloud, specify model by URL */} + {/* */} + + {/* Number of tokens a LLM can do inference with. + This includes prompt and completion tokens for all parallel request. */} + 5000 + + {/* Requests are processed in parallel using continuous batching. + Each request is allocated 5000 / 5 = 1000 context tokens. */} + 5 + + {/* Request context size split between prompt and completion tokens: 500 + 500 = 1000 */} + 500 + 500 + + {/* A waiting line for requests to start processing. + It is reasonable to set it to <= parallelRequests */} + 3 + + {/* How long a request can wait until added to the queue, otherwise timeout. + On average, Ca. = number of milliseconds it takes to process all parallel requests. */} + 60000 + + {/* How long a request can wait in the queue until starting processing + In the worst case, ca. = number of milliseconds it takes to process all requests in the queue. */} + 60000 + + {/* Context overflow occurs when a request uses more context tokens than allocated in contextSize / parallelRequests. + This should not happen if contextSize, parallelRequests, maxPromptTokens, maxTokens are configured correctly. + In this case, we want the request to fail so we know if some configuration is wrong. */} + FAIL + + + ... + + ... + +``` + +There are three important aspects of this configuration in addition to the model used. + +1. `model`, `contextSize` and `parallelRequests` determine compute resources necessary to run the model. +2. `contextSize`, `parallelRequests`, `maxPromptTokens` and `maxTokens` should be configured to avoid context overflow - a situation when context size is too small to process multiple parallel requests with the given number of prompt and completion tokens. +3. `maxQueueSize`, `maxEnqueueWait` and `maxQueueWait` are related to managing the queue used for storing and feeding parallel requests into LLM runtime (llama.cpp). + +[Local LLMs documentation](/en/rag/local-llms) explains how to configure +`model`, `contextSize` and `parallelRequests` with respect to the model and compute resources used. +Memory usage (RAM or GPU VRAM) is especially important to considered when configuring these parameters. + +To avoid context overflow, configure `contextSize`, `parallelRequests`, `maxPromptTokens` and `maxTokens` +parameters so that `contextSize / parallelRequests >= maxPromptTokens + maxTokens`. Also consider that larger `contextSize` takes longer to process. + +The queue related parameters are used to balance latency with throughput. Values for these parameters heavily depends on underlying compute resources. Local LLM configuration presented above is optimized for CPU nodes with 16 cores and 32GB RAM as well as GPU nodes with NVIDIA T4 GPUs 16GB VRAM. + +### Configuring compute resources + +Provisioned compute resources only affect local LLM performance, as OpenAI client merely calls a remote API that leverages the service provider's infrastructure. +In practice, GPU is highly recommended for running local LLMs. It provided order of magnitude speedup compared to CPU. +For Vespa Cloud, a reasonable starting configuration is as follows: + +```xml + + ... + + ... + + + + + + ... + + ... + +``` + +This configuration provisions a container cluster with a single node containing NVIDIA T4 GPUs 16GB VRAM. +Local model throughput scales linearly with the number of nodes in the container cluster used for feeding. +For example with 8 GPU nodes (``) and throughput per node 1.5 generations/second, +combined throughput will be close to 12 generations/second. + +### Feeding configuration + +Generated fields introduce considerable latency during feeding. Large number of high-latency parallel requests might lead to timeouts in the document processing pipeline. +To avoid this, it is recommended to reduce the number of connections during feeding. +A reasonable starting point is to use three connections per GPU node and one connection per CPU node. +Example for one GPU node: + +```sh +vespa feed data.json --connections 3 +``` + +## Structured output + +Document enrichment generates field values based on the data types defined in a document schema. +Both local LLMs and the OpenAI client support structured output, forcing LLMs to produce JSON that conforms to a specified schema. This JSON schema is automatically constructed by a field generator according to the data type of the field being created. For example, a JSON schema for `field questions type array` in document `passage` will be as follows: + +```json +{ + "type": "object", + "properties": { + "passage.questions": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "passage.questions" + ], + "additionalProperties": false +} +``` + +Constructed schemas for different data types correspond to the +[document JSON format](/en/reference/schemas/document-json-format) used for feeding. +The following field types are supported: +- string +- bool +- int +- long +- byte +- float +- float16 +- double +- array of types mentioned above + +Types that are not supported: +- map +- struct +- weightset +- tensors +- references +- predicate +- position + +## Custom field generator + +As usual with Vespa, existing functionality can be extended by developing [custom application components](/en/applications/developer-guide). +A custom generator component can be used to implement application-specific logic to construct prompts, transform and validate LLM inputs and output, +combine outputs of several LLMs or use other sources such a knowledge graph. + +A custom field generator compatible with `generate` should implement `com.yahoo.language.process.FieldGenerator` +interface with `generate` method that returns a field value. +Here is toy example of a custom field generator: + +```java expandable +package ai.vespa.test; + +import ai.vespa.llm.completion.Prompt; +import com.yahoo.document.datatypes.FieldValue; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.language.process.FieldGenerator; + +public class MockFieldGenerator implements FieldGenerator { + private final MockFieldGeneratorConfig config; + + public MockFieldGenerator(MockFieldGeneratorConfig config) { + this.config = config; + } + + @Override + public FieldValue generate(Prompt prompt, Context context) { + var stringBuilder = new StringBuilder(); + + for (int i = 0; i < config.repetitions(); i++) { + stringBuilder.append(prompt.asString()); + + if (i < config.repetitions() - 1) { + stringBuilder.append(" "); + } + } + + + return new StringFieldValue(stringBuilder.toString()); + } +} +``` + +The config definition for this component looks as follows: +```yaml +namespace=ai.vespa.test +package=ai.vespa.test + +repetitions int default=1 +``` + +To be used with `generate` indexing expression this component should be added to `services.xml`: +```xml expandable + + ... + + + 2 + + + ... + +``` + +The last step is to use it in a document schema, e.g.: +```yaml +schema passage { + document passage { + field text type string { + indexing: summary | index + index: enable-bm25 + } + } + + field mock_text type string { + indexing: input text | generate mock_generator | summary + } +} +``` diff --git a/mintlify-docs/en/rag/embedding.mdx b/mintlify-docs/en/rag/embedding.mdx new file mode 100644 index 0000000000..abeb16ca27 --- /dev/null +++ b/mintlify-docs/en/rag/embedding.mdx @@ -0,0 +1,700 @@ +--- +title: "Embedding" +--- + +A common technique is to map unstructured data - say, text or images - to points in an abstract vector space and then do the computation in that space. For example, retrieve similar data by [finding nearby points in the vector space](/en/querying/approximate-nn-hnsw), or [using the vectors as input to a neural net](/en/ranking/onnx). This mapping is referred to as _embedding_. Read more about embedding and embedding management in this [blog post](https://blog.vespa.ai/tailoring-frozen-embeddings-with-vespa/). + +Embedding vectors can be sent to Vespa in queries and writes: + + + ![document- and query-embeddings](/assets/img/vespa-overview-embeddings-1.svg) + + +Alternatively, you can use the `embed` function to generate the embeddings inside Vespa to reduce vector transfer costs and make clients simpler: + + + ![Vespa's embedding feature, creating embeddings from text](/assets/img/vespa-overview-embeddings-2.svg) + + +Adding embeddings to schemas will change the characteristics of an application; Memory usage will grow, and feeding latency might increase. Read more on how to address this in [binarizing vectors](/en/rag/binarizing-vectors). + +## Configuring embedders + +Embedders are [components](/en/applications/components) which must be configured in your [services.xml](/en/reference/applications/services/services). Components are shared and can be used across schemas. + +```xml + + + + + + query: + passage: + + + ... + +``` + +You can [write your own](https://javadoc.io/doc/com.yahoo.vespa/linguistics/latest/com/yahoo/language/process/Embedder.html), or use [embedders provided in Vespa](#provided-embedders). + +If you have multiple container clusters that are using the same embedder, consider using an [include](/en/reference/applications/services/container#include) statement to avoid duplicating component config. + +## Embedding a query text + +Where you would otherwise supply a tensor in a query request, you can (with an embedder configured) instead supply any text enclosed in `embed()`: + +```bash +input.query(q)=embed(myEmbedderId, "Hello%20world") +``` + +Both single and double quotes are permitted, and if you have only configured a single embedder, you can skip the embedder id argument and the quotes. + +The text argument can be supplied by a referenced parameter instead, using the `@parameter` syntax: + +```json +{ + "yql": "select * from doc where {totalTargetHits:10}nearestNeighbor(embedding_field, query_embedding)", + "text": "my text to embed", + "input.query(query_embedding)": "embed(@text)", +} +``` + +Remember that regardless of whether you are using embedders, input tensors must always be [defined in the schema's rank-profile](/en/reference/schemas/schemas#inputs). + +## Embedding a document field + +Use the `embed` function of the [indexing language](/en/reference/writing/indexing-language#indexing-statement) to convert strings into embeddings: + +```text +schema doc { + + document doc { + + field title type string { + indexing: summary | index + } + + } + + field embeddings type tensor(x[384]) { + indexing { + input title |embed embedderId| attribute | index + } + } + +} +``` + +Notice that the embedding field is defined outside the `document` clause in the schema. If you have only configured a single embedder, you can skip the embedder id argument. + +The input field can also be an array, where the output becomes a rank two tensor, see [this blog post](https://blog.vespa.ai/semantic-search-with-multi-vector-indexing/): + +```bash +schema doc { + + document doc { + + field chunks type array { + indexing: index | summary + } + + } + + field embeddings type tensor(p{},x[5]) { + indexing: input chunks |embed embedderId| attribute | index + } + +} +``` + +## Provided embedders + +Vespa provides several embedders as part of the platform. + +### Huggingface Embedder + +An embedder using any [Huggingface tokenizer](https://huggingface.co/docs/tokenizers/index), including multilingual tokenizers, to produce tokens which are then input to a supplied transformer model in [ONNX](https://onnx.ai/) model format: + +```xml + + + + + + ... + +``` + +The huggingface-embedder supports all [Huggingface tokenizer implementations](https://huggingface.co/docs/tokenizers/index). + +- The `transformer-model` specifies the embedding model in [ONNX](https://onnx.ai/) format. See [exporting models to ONNX](/en/ranking/onnx#using-optimum-to-export-models-to-onnx-format) for how to export embedding models from Huggingface to be compatible with Vespa's `hugging-face-embedder`. See [Limitations on Model Size and Complexity](/en/ranking/onnx#limitations-on-model-size-and-complexity) for details on the ONNX model format supported by Vespa. +- The `tokenizer-model` specifies the Huggingface `tokenizer.json` formatted file. See [HF loading tokenizer from a JSON file.](https://huggingface.co/transformers/v4.8.0/fast_tokenizers.html#loading-from-a-json-file) + +Use `path` to supply the model files from the application package, `url` to supply them from a remote server, or `model-id` to use a [model supplied by Vespa Cloud](/en/rag/model-hub). You can also use a model hosted in private Huggingface Model Hub by adding your Huggingface API token to the [secret store](/en/security/secret-store) and referring to the secret using `secret-ref` in the model tag. See [model config reference](/en/reference/rag/embedding#model-config-reference) for more details. + +```xml + + + + + + ... + +``` + +See the [reference](/en/reference/rag/embedding#huggingface-embedder-reference-config) for all configuration parameters. + +#### Huggingface embedder models + +The following are examples of text embedding models that can be used with the hugging-face-embedder and their output [tensor](/en/ranking/tensor-user-guide) dimensionality. The resulting [tensor type](/en/reference/ranking/tensor#tensor-type-spec) can be `float`, `bfloat16` or using binarized quantization into `int8`. See blog post [Combining matryoshka with binary-quantization](https://blog.vespa.ai/combining-matryoshka-with-binary-quantization-using-embedder/) for more examples of using the Huggingface embedder with binary quantization. + +The following models use `pooling-strategy` `mean`, which is the default [pooling-strategy](/en/reference/rag/embedding#huggingface-embedder-reference-config): + +- [intfloat/e5-small-v2](https://huggingface.co/intfloat/e5-small-v2) produces `tensor(x[384])` +- [intfloat/e5-base-v2](https://huggingface.co/intfloat/e5-base-v2) produces `tensor(x[768])` +- [intfloat/e5-large-v2](https://huggingface.co/intfloat/e5-large-v2) produces `tensor(x[1024])` +- [intfloat/multilingual-e5-base](https://huggingface.co/intfloat/multilingual-e5-base) produces `tensor(x[768])` + +The following models are useful for binarization and Matryoshka dimensionality flexibility where only the first k dimensions are retained. [Matryoshka 🤝 Binary vectors: Slash vector search costs with Vespa](https://blog.vespa.ai/combining-matryoshka-with-binary-quantization-using-embedder/) is a great read on this subject. When enabling binarization with `int8` use [distance-metric hamming](/en/reference/schemas/schemas#hamming): + +- [mxbai-embed-large-v1](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1) produces `tensor(x[1024])`. This model is also useful for binarization, which can be triggered by using destination `tensor(x[128])`. Use `pooling-strategy` `cls` and `normalize` `true`. +- [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) produces `tensor(x[768])`. This model is also useful for binarization, which can be triggered by using destination `tensor(x[96])`. Use `normalize` `true`. + +Snowflake arctic model series: + +- [snowflake-arctic-embed-xs](https://huggingface.co/Snowflake/snowflake-arctic-embed-xs) produces `tensor(x[384])`. Use `pooling-strategy` `cls` and `normalize` `true`. +- [snowflake-arctic-embed-m](https://huggingface.co/Snowflake/snowflake-arctic-embed-m) produces `tensor(x[768])`. Use `pooling-strategy` `cls` and `normalize` `true`. + +All of these example text embedding models can be used in combination with Vespa's [nearest neighbor search](/en/querying/nearest-neighbor-search) using the appropriate [distance-metric](/en/reference/schemas/schemas#distance-metric). Notice that to use the [distance-metric: prenormalized-angular](/en/reference/schemas/schemas#prenormalized-angular), the `normalize` configuration must be set to `true`. + +Check the [Massive Text Embedding Benchmark](https://huggingface.co/blog/mteb) (MTEB) benchmark and [MTEB leaderboard](https://huggingface.co/spaces/mteb/leaderboard) for help with choosing an embedding model. + +### Bert embedder + +DEPRECATED; prefer using the [Huggingface Embedder](#huggingface-embedder) instead of the Bert embedder. + +An embedder using the [WordPiece](/en/reference/rag/embedding#wordpiece-embedder) embedder to produce tokens which are then input to a supplied [ONNX](https://onnx.ai/) model on the form expected by a BERT base model: + +```xml + + + + + 128 + last_hidden_state + + +``` + +- The `transformer-model` specifies the embedding model in [ONNX](https://onnx.ai/) format. See [exporting models to ONNX](/en/ranking/onnx#using-optimum-to-export-models-to-onnx-format), for how to export embedding models from Huggingface to compatible [ONNX](https://onnx.ai/) format. +- The `tokenizer-vocab` specifies the Huggingface `vocab.txt` file, with one valid token per line. Note that the Bert embedder does not support the `tokenizer.json` formatted tokenizer configuration files. This means that tokenization settings like max tokens should be set explicitly. +- The `transformer-output` specifies the name given to to embedding output in the `model.onnx` file; this will differ depending on how the model is exported to ONNX format. One common name is `last_hidden_state`, especially in transformer-based models. Other common names are `output` or `output_0`, `embedding` or `embeddings`, `sentence_embedding`, `pooled_output`, or `encoder_last_hidden_state`. The default is `output_0`. + +The Bert embedder is limited to English ([WordPiece](/en/reference/rag/embedding#wordpiece-embedder)) and BERT-styled transformer models with three model inputs (_input\_ids, attention\_mask, token\_type\_ids_). Prefer using the [Huggingface Embedder](#huggingface-embedder) instead of the Bert embedder. + +See [configuration reference](/en/reference/rag/embedding#bert-embedder-reference-config) for all configuration options. + +### ColBERT embedder + +An embedder supporting [ColBERT](https://github.com/stanford-futuredata/ColBERT) models. The ColBERT embedder maps text to _token_ embeddings, representing a text as multiple contextualized embeddings. This produces better quality than reducing all tokens into a single vector. + +Read more about ColBERT and the ColBERT embedder in blog post form [Announcing the Vespa ColBERT embedder](https://blog.vespa.ai/announcing-colbert-embedder-in-vespa/) and [Announcing Vespa Long-Context ColBERT](https://blog.vespa.ai/announcing-long-context-colbert-in-vespa/). + +```xml + + + + + 32 + 128 + + +``` + +- The `transformer-model` specifies the ColBERT embedding model in [ONNX](https://onnx.ai/) format. See [exporting models to ONNX](/en/ranking/onnx#using-optimum-to-export-models-to-onnx-format) for how to export embedding models from Huggingface to compatible [ONNX](https://onnx.ai/) format. The [vespa-engine/col-minilm](https://huggingface.co/vespa-engine/col-minilm) page on the HF model hub has a detailed example of how to export a colbert checkpoint to ONNX format for accelerated inference. +- The `tokenizer-model` specifies the Huggingface `tokenizer.json` formatted file. See [HF loading tokenizer from a JSON file.](https://huggingface.co/transformers/v4.8.0/fast_tokenizers.html#loading-from-a-json-file) +- The `max-query-tokens` controls the maximum number of query text tokens that are represented as vectors, and similarly, `max-document-tokens` controls the document side. These parameters can be used to control resource usage. + +See [configuration reference](/en/reference/rag/embedding#colbert-embedder-reference-config) for all configuration options and defaults. + +The ColBERT token embeddings are represented as a [mixed tensor](/en/ranking/tensor-user-guide#tensor-concepts): `tensor(token{}, x[dim])` where `dim` is the vector dimensionality of the contextualized token embeddings. The [colbert model checkpoint](https://huggingface.co/colbert-ir/colbertv2.0) on Hugging Face hub uses 128 dimensions. + +The embedder destination tensor is defined in the [schema](/en/basics/schemas), and depending on the target [tensor cell precision](/en/reference/ranking/tensor#tensor-type-spec) definition the embedder can compress the representation: If the target tensor cell type is `int8`, the ColBERT embedder compresses the token embeddings with binarization for the document to reduce storage to 1-bit per value, reducing the token embedding storage footprint by 32x compared to using float. The _query_ representation is not compressed with binarization. The following demonstrates two ways to use the ColBERT embedder in the document schema to [embed a document field](#embedding-a-document-field). + +```text +schema doc { + document doc { + field text type string {..} + } + field colbert_tokens type tensor(token{}, x[128]) { + indexing: input text | embed colbert | attribute + } + field colbert_tokens_compressed type tensor(token{}, x[16]) { + indexing: input text | embed colbert | attribute + } +} +``` + +The first field `colbert_tokens` stores the original representation as the tensor destination cell type is float. The second field, the `colbert_tokens_compressed` tensor is compressed. When using `int8` tensor cell precision, one should divide the original vector size by 8 (128/8 = 16). + +You can also use `bfloat16` instead of `float` to reduce storage by 2x compared to `float`. + +```text +field colbert_tokens type tensor(token{}, x[128]) { + indexing: input text | embed colbert | attribute +} +``` + +You can also use the ColBERT embedder with an array of strings (representing chunks): + +```text +schema doc { + document doc { + field chunks type array {..} + } + field colbert_tokens_compressed type tensor(chunk{}, token{}, x[16]) { + indexing: input text | embed colbert chunk | attribute + } +} +``` + +Here, we need a second mapped dimension in the target tensor and a second argument to embed, telling the ColBERT embedder the name of the tensor dimension to use for the chunks. + +Notice that the examples above did not specify the `index` function for creating a [HNSW](/en/querying/approximate-nn-hnsw) index. The colbert representation is intended to be used as a ranking model and not for retrieval with Vespa's nearestNeighbor query operator, where you can e.g., use a document-level vector and/or lexical matching. + +To reduce memory footprint, use [paged attributes](/en/content/attributes#paged-attributes). + +#### ColBERT ranking + +See the sample applications for using ColBERT in ranking with variants of the MaxSim similarity operator expressed using Vespa tensor computation expressions. See: [colbert](https://github.com/vespa-engine/sample-apps/tree/master/colbert) and [colbert-long](https://github.com/vespa-engine/sample-apps/tree/master/colbert-long). + +### SPLADE embedder + +An embedder supporting [SPLADE](https://github.com/naver/splade) models. The SPLADE embedder maps text to mapped tensor, representing a text as a sparse vector of unique tokens and their weights. + +```xml + + + + + + +``` + +- The `transformer-model` specifies the SPLADE embedding model in [ONNX](https://onnx.ai/) format. See [exporting models to ONNX](/en/ranking/onnx#using-optimum-to-export-models-to-onnx-format) for how to export embedding models from Huggingface to compatible [ONNX](https://onnx.ai/) format. +- The `tokenizer-model` specifies the Huggingface `tokenizer.json` formatted file. See [HF loading tokenizer from a JSON file.](https://huggingface.co/transformers/v4.8.0/fast_tokenizers.html#loading-from-a-json-file) + +See [configuration reference](/en/reference/rag/embedding#splade-embedder-reference-config) for all configuration options and defaults. + +The splade token weights are represented as a [mapped tensor](/en/ranking/tensor-user-guide#tensor-concepts): `tensor(token{})`. + +The embedder destination tensor is defined in the [schema](/en/basics/schemas). The following demonstrates how to use the SPLADE embedder in the document schema to [embed a document field](#embedding-a-document-field). + +```text +schema doc { + document doc { + field text type string {..} + } + field splade_tokens type tensor(token{}) { + indexing: input text | embed splade | attribute + } +} +``` + +You can also use the SPLADE embedder with an array of strings (representing chunks). Here, also using lower tensor cell precision `bfloat16`: + +```text +schema doc { + document doc { + field chunks type array {..} + } + field splade_tokens type tensor(chunk{}, token{}) { + indexing: input text | embed splade chunk | attribute + } +} +``` + +Here, we need a second mapped dimension in the target tensor and a second argument to embed, telling the splade embedder the name of the tensor dimension to use for the chunks. + +To reduce memory footprint, use [paged attributes](/en/content/attributes#paged-attributes). + +#### SPLADE ranking + +See the [splade](https://github.com/vespa-engine/sample-apps/tree/master/splade) sample application for how to use SPLADE in ranking, including also how to use the SPLADE embedder with an array of strings (representing chunks). + +#### SPLADE retrieval + +[Sparse retrieval in Vespa](https://blog.vespa.ai/redefining-hybrid-search-possibilities-with-vespa/#sparse-retrieval-in-vespa) is a good discussion on how SPLADE can be used for retrieval. + +### VoyageAI Embedder + +An embedder that uses the [VoyageAI](https://www.voyageai.com/) embedding API to generate high-quality embeddings for semantic search. This embedder calls the VoyageAI API service and does not require local model files or ONNX inference. All embeddings returned by VoyageAI are normalized to unit length, making them suitable for cosine similarity and [prenormalized-angular](/en/reference/schemas/schemas#prenormalized-angular) distance metrics (see [VoyageAI FAQ](https://docs.voyageai.com/docs/faq#which-similarity-function-should-i-use)). + +```xml + + + voyage-4 + voyage_api_key + 1024 + + +``` + +- The `model` specifies which VoyageAI model to use. +- The `api-key-secret-ref` references a secret in Vespa's [secret store](/en/security/secret-store) containing your VoyageAI API key. This is required for authentication. + +See the [reference](/en/reference/rag/embedding#voyageai-embedder-reference-config) for all configuration parameters. + +#### VoyageAI embedder models + +For the complete list of available models and their specifications, see: + +- [VoyageAI Embeddings Documentation](https://docs.voyageai.com/docs/embeddings) - General-purpose and specialized models +- [Contextualized Chunk Embeddings](https://docs.voyageai.com/docs/contextualized-chunk-embeddings) - Models for embedding document chunks with surrounding context. See [Working with chunks](../rag/working-with-chunks.html). +- [Multimodal Embeddings](https://docs.voyageai.com/docs/multimodal-embeddings) - Multimodal models for text, images, and video + +#### Contextualized chunk embeddings + +To use [contextualized chunk embeddings](https://docs.voyageai.com/docs/contextualized-chunk-embeddings), configure the VoyageAI embedder with a `voyage-context-*` model and use it to embed an `array` field containing your document chunks: + +```js +schema doc { + document doc { + field chunks type array { + indexing: index | summary + } + } + field embeddings type tensor(chunk{}, x[1024]) { + indexing: input chunks | embed voyage | attribute | index + attribute { + distance-metric: prenormalized-angular + } + } +} +``` + +When embedding array fields with a contextualized chunk embedding model, Vespa sends all chunks from a document in a single API request, allowing Voyage to encode each chunk with context from the other chunks. Be aware that the combined size of all chunks in a document must fit within the VoyageAI API's input token limit. See [Working with chunks](/en/rag/working-with-chunks) for chunking strategies. + +#### Input type detection + +VoyageAI models distinguish between query and document embeddings for improved retrieval quality. The embedder automatically detects the context and sets the appropriate input type based on whether the embedding is performed during feed (/en/writing/indexing ) or query processing in Vespa. + +For advanced use cases where you need to control the input type programmatically, you can use the `destination` property of the [Embedder.Context](https://javadoc.io/static/com.yahoo.vespa/linguistics/8.620.35/com/yahoo/language/process/Embedder.Context.html) when calling the embedder from Java code. + +#### Using voyage-4-nano for local query inference + +The [voyage-4-nano](/en/rag/model-hub#voyage-4-nano) model is available as an ONNX model for use with the [Hugging Face embedder](#huggingface-embedder). Since it shares the same embedding space as the larger [Voyage 4](https://blog.voyageai.com/2026/01/15/voyage-4/) models, it can be used for query embeddings with local inference — trading some accuracy for lower cost by eliminating API usage for queries entirely. + +### OpenAI Embedder + +Available since Vespa 8.678 + +An embedder that uses the [OpenAI](https://platform.openai.com/docs/guides/embeddings) embeddings API to generate embeddings for semantic search. The embedder can target any OpenAI-compatible API. + +```xml + + + text-embedding-3-small + openai_api_key + 1536 + + +``` + +- The `model` specifies which OpenAI model to use. +- The `api-key-secret-ref` references a secret in Vespa's [secret store](/en/security/secret-store) containing your OpenAI API key. For self-hosted OpenAI-compatible endpoints that do not require authentication, this element can be omitted. + +See the [reference](/en/reference/rag/embedding#openai-embedder-reference-config) for all configuration parameters. + +### Mistral Embedder + +Available since Vespa 8.678 + +An embedder that uses the [Mistral](https://docs.mistral.ai/capabilities/embeddings/overview/) embeddings API to generate embeddings for semantic search. + +```xml + + + mistral-embed + mistral_api_key + 1024 + + +``` + +- The `model` specifies which Mistral model to use. +- The `api-key-secret-ref` references a secret in Vespa's [secret store](/en/security/secret-store) containing your Mistral API key. This is required for authentication. + +Mistral supports output quantization on models that offer it, such as `codestral-embed`. See the [reference](/en/reference/rag/embedding#mistral-embedder-reference-config) for all configuration parameters. + +## Embedder performance + +Embedding inference can be resource-intensive for larger embedding models. Factors that impact performance: + +- The embedding model parameters. Larger models are more expensive to evaluate than smaller models. +- The sequence input length. Transformer models scale quadratically with input length. Since queries are typically shorter than documents, embedding queries is less computationally intensive than embedding documents. +- The number of inputs to the `embed` call. When encoding arrays, consider how many inputs a single document can have. For local CPU inference, increasing [feed timeout](/en/reference/api/document-v1#timeout) settings might be required when documents have many `embed`inputs. + +For local ONNX-based embedders (such as the [Hugging Face](#huggingface-embedder), [Bert](#bert-embedder), [ColBERT](#colbert-embedder), and [SPLADE](#splade-embedder) embedders), using [GPU](/en/reference/rag/embedding#embedder-onnx-reference-config), especially for longer sequence lengths (documents), can dramatically improve performance and reduce cost. See the blog post on [GPU-accelerated ML inference in Vespa Cloud](https://blog.vespa.ai/gpu-accelerated-ml-inference-in-vespa-cloud/). With GPU-accelerated instances, using fp16 models instead of fp32 can increase throughput by as much as 3x compared to fp32. + +For cloud embedders that call an external API ([VoyageAI](#voyageai-embedder), [OpenAI](#openai-embedder), [Mistral](#mistral-embedder)), throughput is bound by API latency and rate limits rather than local hardware. See [Thread pool tuning for cloud embedders](#thread-pool-tuning) and [dynamic batching](#dynamic-batching) for tuning guidance. + +Refer to [binarizing vectors](/en/rag/binarizing-vectors) for how to reduce vector size. + +## Metrics + +Vespa's built-in embedders emit metrics for computation time and token sequence length. These metrics are prefixed with `embedder.` and listed in the [Container Metrics](/en/reference/operations/metrics/container) reference documentation. Third-party embedder implementations may inject the `ai.vespa.embedding.Embedder.Runtime` component to easily emit the same predefined metrics, although emitting custom metrics is perfectly fine. + +## Sample applications + +These sample applications use embedders: + +- [commerce-product-ranking](https://github.com/vespa-engine/sample-apps/tree/master/commerce-product-ranking) - demonstrates using multiple embedders +- [multi-vector-indexing](https://github.com/vespa-engine/sample-apps/tree/master/multi-vector-indexing) demonstrates how to use embedders with multiple document field inputs +- [colbert](https://github.com/vespa-engine/sample-apps/tree/master/colbert) demonstrates how to use the colbert-embedder +- [colbert-long](https://github.com/vespa-engine/sample-apps/tree/master/colbert-long) demonstrates how to use the colbert-embedder with long contexts (array input) +- [splade](https://github.com/vespa-engine/sample-apps/tree/master/splade) demonstrates how to use the splade-embedder. + +## Tricks and tips + +Various tricks that are useful with embedders. + +### Adding a fixed string to a query text + +Embedding models might require text to be prepended with a fixed string, e.g.: + +```xml + + + + + query: + passage: + + +``` + +The above configuration prepends text in queries and field data. Find a complete example in the [ColBERT](https://github.com/vespa-engine/sample-apps/tree/master/colbert) sample application. + +The `` element is also supported by the [OpenAI embedder](/en/reference/rag/embedding#openai-embedder-reference-config), which is useful for OpenAI-compatible instruction-tuned models that expect a task-specific prefix. + +An alternative approach is using query profiles to prepend query data. If you need to add a standard wrapper or a prefix instruction around the input text you want to embed use parameter substitution to supply the text, as in `embed(myEmbedderId, @text)`, and let the parameter (`text` here) be defined in a [query profile](/en/querying/query-profiles), which in turn uses [value substitution](/en/querying/query-profiles#value-substitution) to place another query request with a supplied text value within it. The following is a concrete example where queries should have a prefix instruction before being embedded in a vector representation. The following defines a `text` input field to `search/query-profiles/default.xml`: + +```xml + + "Represent this sentence for searching relevant passages: %{user_query} + +``` + +Then, at query request time, we can pass `user_query` as a request parameter, this parameter is then used to produce the `text` value which then is embedded. + +```json +{ + "yql": "select * from doc where userQuery() or (totalTtargetHits: 100}nearestNeighbor(embedding, e))", + "input.query(e)": "embed(mxbai, @text)", + "user_query": "space contains many suns" +} +``` + +The text that is embedded by the embedder is then: _Represent this sentence for searching relevant passages: space contains many suns_. + +### Concatenating input fields + +You can concatenate values in indexing using "`.`", and handle missing field values using [choice](/en/writing/indexing#choice-example) to produce a single input for an embedder: + +```text +schema doc { + + document doc { + + field title type string { + indexing: summary | index + } + + field body type string { + indexing: summary | index + } + + } + + field embeddings type tensor(x[384]) { + indexing { + (input title || "") . " " . (input body || "") |embed embedderId| attribute | index + } + index: hnsw + } + +} +``` + +You can also use concatenation to add a fixed preamble to the string to embed. + +### Combining with foreach + +The indexing expression can also use `for_each` and include other document fields. For example, the _E5_ family of embedding models uses instructions along with the input. The following expression prefixes the input with _passage:_ followed by a concatenation of the title and a text chunk. + +```text +schema doc { + + document doc { + + field title type string { + indexing: summary | index + } + + field chunks type array { + indexing: index | summary + } + + } + field embedding type tensor(p{}, x[384]) { + indexing { + input chunks | + for_each { + "passage: " . (input title || "") . " " . ( _ || "") + } | embed e5 | attribute | index + } + attribute { + distance-metric: prenormalized-angular + } + } +} +``` + +See [Indexing language execution value](/en/writing/indexing#execution-value-example)for details. + +### Separate feed and search embedders + +In Vespa Cloud, it is general practice to configure separate container clusters for feed and search, so that bursty feed load cannot affect query latency. When using HTTP-based cloud embedders ([VoyageAI](#voyageai-embedder), [OpenAI](#openai-embedder), [Mistral](#mistral-embedder)), configure a separate embedder component in each cluster. This lets you pick different models and API keys per workload, and gives two additional benefits: **cost optimization** (via model variants) and **rate limit isolation**. + +```xml + + + voyage-4-large + 1024 + voyage_feed_api_key + + + + + + + voyage-4-lite + 1024 + voyage_search_api_key + + + +``` + +#### Cost optimization with model variants + +When a provider offers multiple model sizes that share the same embedding space, you can use a more powerful (and more expensive) model for document embeddings while using a smaller, cheaper model for query embeddings. Since document embedding happens once during indexing but query embedding occurs on every search request, this can significantly reduce operational costs while maintaining retrieval quality. + +For example, the [Voyage 4 model family](https://blog.voyageai.com/2026/01/15/voyage-4/) shares a vector space across sizes, making it a natural fit for this pattern: use `voyage-4-large` in the feed cluster and `voyage-4-lite` in the search cluster as shown above. See also [Using voyage-4-nano for local query inference](#voyageai-local-query-inference) for an even more cost-effective query-side option. + +#### Rate limit isolation + +Separating feed and search operations is particularly important for managing API rate limits. Bursty document feeding operations can consume significant API quota, potentially causing rate limit errors that affect search queries. By using **separate API keys** for feed and search embedders, you ensure that feeding bursts don't negatively impact search. + +### Thread pool tuning for cloud embedders + +When using an HTTP-based cloud embedder (VoyageAI, OpenAI, Mistral), container feed throughput is primarily limited by embedding API latency combined with the document processing thread pool size, not by CPU. Each document being fed blocks a thread while waiting for the embedding API response. To improve throughput, you likely have to increase the [document processing thread pool size](../reference/applications/services/docproc#threadpool), assuming the content cluster is not the bottleneck. + +For example, consider a container cluster with 2 nodes, each with 8 vCPUs. With the default document processing thread pool size of 1 thread per vCPU, you have 16 total threads. If the average embedding API latency is 200ms, the maximum throughput is approximately 16 / 0.2 = 80 documents/second. See [container tuning](/en/performance/container-tuning) for more on container tuning. + +Note that the effective throughput can never exceed the rate limit of your API key. Use the [embedder metrics](/en/reference/operations/metrics/container) to determine embedder latency and throughput. For additional throughput improvements, consider enabling [dynamic batching](#dynamic-batching). + +### Dynamic batching + +Dynamic batching combines multiple concurrent embedding requests into a single embedding invocation. This is useful when throughput is constrained by the provider's requests-per-minute (RPM) limit rather than the tokens-per-minute (TPM) limit. Batching reduces RPM usage by combining requests; TPM usage is unaffected. + +Dynamic batching is supported by the [VoyageAI](#voyageai-embedder), [OpenAI](#openai-embedder), and [Mistral](#mistral-embedder) embedders. + +```xml + + + voyage-4-large + 1024 + voyage_feed_api_key + + + + +``` + +The `max-size` attribute sets the maximum number of requests in a single batch, and `max-delay` sets the maximum time to wait for a full batch before sending a partial one. Batching is disabled by default. + +The [document processing thread pool size](/en/reference/applications/services/docproc#threadpool) should be at least `max-size`, since each thread contributes one request to the batch. + +## Troubleshooting + +This section covers common issues and how to resolve them. + +### Model download failure + +If models fail to download, it will cause the Vespa stateless container service to not start with `RuntimeException: Not able to create config builder for payload` - see [example](/en/applications/components#component-load). + +This usually means that the model download failed. Check the Vespa log for more details. The most common reasons for download failure are network issues or incorrect URLs. + +This will also be visible in the Vespa status output as the container will not listen to its port: + +```sh +vespa status -t http://127.0.0.1:8080 +Container at http://127.0.0.1:8080 is not ready: unhealthy container at http://127.0.0.1:8080/status.html: Get "http://127.0.0.1:8080/status.html": EOF +Error: services not ready: http://127.0.0.1:8080 +``` + +### Tensor shape mismatch + +The native embedder implementations expect that the output tensor has a specific shape. If the shape is incorrect, you will see an error message during feeding like: + +```json +feed: got status 500 ({"pathId":"..","..","message":"[UNKNOWN(252001) @ tcp/vespa-container:19101/chain.indexing]: +Processing failed. Error message: java.lang.IllegalArgumentException: Expected 3 output dimensions for output name 'sentence_embedding': [batch, sequence, embedding], got 2 -- See Vespa log for details. "}) for put xx:not retryable +``` + +This means that the exported ONNX model output tensor does not have the expected shape. For example, the above is logged by the [hf-embedder](#huggingface-embedder) that expects the output shape to be [batch, sequence, embedding] (A 3D tensor). This is because the embedder implementation performs the [pooling-strategy](/en/reference/rag/embedding#huggingface-embedder) over the sequence dimension to produce a single embedding vector. The batch size is always 1 for Vespa embeddings. + +See [onnx export](/en/ranking/onnx#using-optimum-to-export-models-to-onnx-format) for how to export models to ONNX format with the correct output shapes and [onnx debug](/en/ranking/onnx#debugging-onnx-models) for debugging input and output names. + +### Input names + +The native embedder implementations expect that the ONNX model accepts certain input names. If the names are incorrect, it will cause the Vespa container service to not start, and you will see an error message in the vespa log like: + +```sh +WARNING container Container.com.yahoo.container.di.Container +Caused by: java.lang.IllegalArgumentException: Model does not contain required input: 'input_ids'. Model contains: my_input +``` + +This means that the ONNX model accepts "my\_input", while our configuration attempted to use "input\_ids". The default input names for the [hf-embedder](#huggingface-embedder) are "input\_ids", "attention\_mask" and "token\_type\_ids". These are overridable in the configuration ([reference](/en/reference/rag/embedding#huggingface-embedder)). Some embedding models do not use the "token\_type\_ids" input. We can specify this in the configuration by setting `transformer-token-type-ids` to empty, illustrated by the following example. + +```xml + + + + + +``` + +### Output names + +The native embedder implementations expect that the ONNX model produces certain output names. It will cause the Vespa stateless container service to not start, and you will see an error message in the vespa log like: + +```sh +Model does not contain required output: 'test'. Model contains: last_hidden_state +``` + +This means that the ONNX model produces "last\_hidden\_state", while our configuration attempted to use "test". The default output name for the [hf-embedder](#huggingface-embedder) is "last\_hidden\_state". This is overridable in the configuration. See [reference](/en/reference/rag/embedding#huggingface-embedder). + +### EOF + +If vespa status shows that the container is healthy, but you observe an EOF error during feeding, this means that the stateless container service has crashed and stopped listening to its port. This could be related to the embedder ONNX model size, docker container memory resource constraints, or the configured JVM heap size of the Vespa stateless container service. + +```sh +vespa feed ext/1.json +feed: got error "Post "http://127.0.0.1:8080/document/v1/doc/doc/docid/1": unexpected EOF" (no body) for put id:doc:doc::1: giving up after 10 attempts +``` + +This could be related to insufficient stateless container (JVM) memory. Check the container logs for OOM errors. See [jvm-tuning](/en/performance/container-tuning#jvm-tuning) for JVM tuning options (The default heap size is 1.5GB). Container crashes could also be caused by too little memory allocated to the docker or podman container, which can cause the Linux kernel to kill processes to free memory. See the [docker containers memory](/en/operations/self-managed/docker-containers#memory) documentation. Vespa estimates the memory needed for embedder model inference automatically, but the estimate can be inaccurate. Override it with [\\](/en/reference/applications/services/container#inference-memory) to reserve a specific amount of container memory for inference. diff --git a/mintlify-docs/en/rag/external-llms.mdx b/mintlify-docs/en/rag/external-llms.mdx new file mode 100644 index 0000000000..66ffd1ca77 --- /dev/null +++ b/mintlify-docs/en/rag/external-llms.mdx @@ -0,0 +1,125 @@ +--- +title: "External LLMs in Vespa" +sidebarTitle: "Using external LLMs" +--- + +Please refer to [Large Language Models in Vespa](/en/rag/llms-in-vespa) for an +introduction to using LLMs in Vespa. + +Vespa provides a client for integration with OpenAI compatible APIs. +This includes, but is not limited to +[OpenAI](https://platform.openai.com/docs/overview), +[Google Gemini](https://ai.google.dev/), +[Anthropic](https://www.anthropic.com/api), +[Cohere](https://docs.cohere.com/docs/compatibility-api) +and [Together.ai](https://docs.together.ai/docs/openai-api-compatibility). +You can also host your own OpenAI-compatible server using for example +[VLLM](https://docs.vllm.ai/en/latest/getting_started/quickstart.html#quickstart-online) or +[llama-cpp-server](https://llama-cpp-python.readthedocs.io/en/latest/server/). + + +**Note:** This is currently a Beta feature so changes can be expected. + + +### Configuring the OpenAI client + +To set up a connection to an LLM service such as OpenAI's ChatGPT, you need to +define a component in your application's +[services.xml](/en/reference/applications/services/services): + +```xml + + + + ... + + + + {/* Optional configuration: */} + + ... + {/* endpoint example: https://openai-compatible-api.com/v1/ */} + ... + + + + + ... + + + +``` + +To see the full list of available configuration parameters, refer to the [llm-client config definition file](https://github.com/vespa-engine/vespa/blob/master/model-integration/src/main/resources/configdefinitions/llm-client.def). + +This sets up a client component that can be used in a +[searcher](/en/learn/glossary#searcher) or a [document processor](/en/learn/glossary#document-processor). + +### API key configuration + +Vespa provides several options to configure the API key used by the client. + +1. Using the [Vespa Cloud secret store](/en/security/secret-store) to store the API key. + This is done by setting the `apiKeySecretRef` configuration parameter to the name of the secret + in the secret store. This is the recommended way for Vespa Cloud users. +2. For self-managed Vespa, you can provide secrets via environment variables. + Set the `apiKeySecretRef` configuration parameter and expose the secret as an environment variable + named `VESPA_SECRET_`, where `` is the secret reference name converted to + upper snake case. For example, if `apiKeySecretRef` is set to `myApiKey`, the environment variable + should be named `VESPA_SECRET_MY_API_KEY`. +3. Providing the API key in the `X-LLM-API-KEY` HTTP header of the Vespa query. + +You can set up multiple connections with different settings. For instance, you +might want to run different LLMs for different tasks. To distinguish between the +connections, modify the `id` attribute in the component specification. We will +see below how this is used to control which LLM is used for which task. + +As a reminder, Vespa also has the option of running custom LLMs locally. Please refer to +[running LLMs in your application](/en/rag/local-llms) for more information. + +### Inference parameters + +Please refer to the general discussion in [LLM parameters](/en/rag/llms-in-vespa#llm-parameters) for setting inference +parameters. + +The OpenAI-client also has the following inference parameters that can be sent along +with the query: + +| Parameter (Vespa) | Parameter (OpenAI) | Description | +| :--- | :--- | :--- | +| `maxTokens` | `max_completion_tokens` | Maximum number of tokens that can be generated in the chat completion. | +| `temperature` | `temperature` | Number between 0 and 2. Higher values like 0.8 make output more random, while lower values like 0.2 make it more focused and deterministic. | +| `topP` | `top_p` | An alternative to temperature sampling. Model considers tokens with top\_p probability mass (0-1). Value of 0.1 means only tokens comprising top 10% probability are considered. | +| `seed` | `seed` | If specified, the system will attempt to sample deterministically, so repeated requests with the same seed should return similar results. Determinism is not guaranteed. | +| `npredict` | `n` | How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all choices. | +| `frequencypenalty` | `frequency_penalty` | Number between -2.0 and 2.0. Positive values penalize new tokens based on their frequency in the text so far, decreasing the likelihood of repetition. Negative values encourage repetition. | +| `presencepenalty` | `presence_penalty` | Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics. Negative values encourage repeating content from the prompt. | + + +Any parameter sent with the query will override configuration specified for the client component in `services.xml`. + +Note that if you are not using OpenAI's API, the parameters may be handled differently than the descriptions above. + + +### Connecting to other OpenAI-compatible providers + +By default, this particular client connects to the OpenAI service, but can be used against any [OpenAI chat completion compatible API](https://platform.openai.com/docs/guides/text-generation/chat-completions-api) by changing the `endpoint` configuration parameter. + +### FAQ + + + +The OpenAI client is compatible with any LLM that implements the OpenAI chat completion API. You can check the documentation of your LLM provider to see if they support this API. + + +[Responses](https://platform.openai.com/docs/api-reference/responses/create) + +No, currently only the [Chat Completion API](https://platform.openai.com/docs/api-reference/chat) is supported. + + +Yes, but currently, you need to implement a [custom searcher](/en/applications/searchers) that uses the OpenAI client to rerank the results. + + +No, currently, only the [Chat Completion API](https://platform.openai.com/docs/api-reference/chat) is supported. + + \ No newline at end of file diff --git a/mintlify-docs/en/rag/llms-in-vespa.mdx b/mintlify-docs/en/rag/llms-in-vespa.mdx new file mode 100644 index 0000000000..bab3f30331 --- /dev/null +++ b/mintlify-docs/en/rag/llms-in-vespa.mdx @@ -0,0 +1,389 @@ +--- +title: "Large Language Models in Vespa" +sidebarTitle: "LLMs in Vespa" +--- + +Large Language Models (LLMs) are AI systems that generate human-like text, supporting a variety of applications like chatbots and content generation. In Vespa, LLMs can enhance search relevance, create dynamic content based on search +results, and understand natural language by integrating into Vespa's processing chain structure, which handles querying and data ingestion. This allows Vespa to apply LLMs' deep linguistic and semantic capabilities across different stages, improving tasks such as document enrichment, query comprehension, summarization and question-answering. + +Vespa is ideally suited for retrieval-augmented generation (RAG). This technique allows these models to access relevant and up-to-date information beyond their training in real-time, enabling Vespa's output to be contextually informed. For more information, refer to [Retrieval-Augmented Generation in Vespa](/en/rag/rag). + +The advantage of setting up a client connection to an LLM from within your Vespa application compared to doing the API call(s) from your client after responses are returned from Vespa is that you eliminate an extra network hop, which means lower latency for end users. The importance of this is amplified if you want to leverage multiple LLM calls for eg. agentic applications or reranking. + +Vespa supports LLMs in three ways: + +1. [**External LLMs**](/en/rag/external-llms): Vespa can connect to any external LLM provider that serves an +OpenAI-compatible API. +2. [**Local LLMs**](/en/rag/local-llms): Vespa can run LLMs within the Vespa application itself. +This allows for customized models and avoids sending data outside the application. +This is particularly useful for applications with strict data privacy requirements or those needing specific model configurations. +3. [**Custom language models**](/en/rag/llms-in-vespa#custom-language-model-components): Vespa can be extended to support any language model, including those not based on OpenAI's API. This allows for flexibility in integrating various LLMs into Vespa applications. + +This document will focus on features that are common to both external and local LLMs. +For more information on configuration details for each type, please refer to the respective sections. + +For a quick start, check out the [RAG sample app](https://github.com/vespa-engine/sample-apps/tree/master/retrieval-augmented-generation), +which demonstrates setting up Vespa for RAG, using either an external LLM service or a local LLM. + +### Using LLMs + + +**Note:** This feature is available in Vespa versions >= 8.327 + + +Vespa distinguishes between the clients used to connect to LLMs and components that uses these clients. You can, for instance, set up a single client connection to an LLM, and use this connection for both [document enrichment](/en/rag/document-enrichment) and retrieval-augmented generation (RAG). + + +![LLM/RAG searcher](/assets/img/llm-rag-searcher.svg) + +After adding a client connection to your `services.xml`, you can use the same client for various tasks such as retrieval-augmented generation. To do this, you need to set up the searchers or document processors that will use them. An example of a simple searcher that uses the client component is the `LLMSearcher`, which can be set up like this: + +```xml + + + + ... + + + {/* Configure as required */} + + + + + + + openai + + + + + + ... + + + +``` + +This sets up a new [search chain](/en/reference/applications/services/search#chain) which includes an `LLMSearcher`. This searcher has the responsibility of calling out to the LLM connection using some prompt that has been sent along with the query. + +Note the `providerId` configuration parameter: this must match the `id` given in the component specification. Using this, one can set up as many clients and searchers and combinations of these as one needs. If you do not specify a +`providerId`, the searcher will use the first available LLM connection. + +This particular searcher doesn't provide a lot of functionality, it only calls out to the LLM service using a provided prompt sent along with the query. The searcher expects the prompt to be passed in the query parameter `prompt`. For instance, using the Vespa CLI: + +```sh +$ vespa query \ + --header="X-LLM-API-KEY:..." \ + searchChain=llm \ + prompt="what was the manhattan project?" +``` + +Here, we first pass along the API key to the OpenAI API. You need to provide your own OpenAI key for this. The `searchChain` parameter selects the `llm` chain set +up in `services.xml`. Finally, the `prompt` parameter determines what is sent to the language model. + +Note that if the `prompt` query parameter is not provided, the `LLMSearcher` will try to use the `query` query parameter. + +By running the above command you will get something like the following: + +```json expandable +{ + "root": { + "id": "token_stream", + "relevance": 1.0, + "fields": { + "totalCount": 0 + }, + "children": [ + { + "id": "event_stream", + "relevance": 1.0, + "children": [ + { + "id": "1", + "relevance": 1.0, + "fields": { + "token": "The" + } + }, + { + "id": "2", + "relevance": 1.0, + "fields": { + "token": " Manhattan" + } + }, + { + "id": "3", + "relevance": 1.0, + "fields": { + "token": " Project" + } + }, + { + "id": "4", + "relevance": 1.0, + "fields": { + "token": " was" + } + }, + ... + ] + } +} +``` + +### Streaming with Server-Sent Events + +By running the above, you will have to wait until the entire response is generated from the underlying LLM. This can take a while, as LLMs generate one token at a time. To stream the tokens as they arrive, use the `sse` (Server-Sent +Events) renderer by adding the `format` query parameter: + +```sh +$ vespa query \ + --header="X-LLM-API-KEY:..." \ + searchChain=llm \ + prompt="what was the manhattan project?" \ + format=sse + +The Manhattan Project was a research and development project during World War II that produced the first nuclear weapons. It was led by the United States with the support of the United Kingdom and Canada, and aimed to develop the technology necessary to build an atomic bomb. The project culminated in the bombings of the Japanese cities of Hiroshima and Nagasaki in August 1945. +``` + +The Vespa CLI understands this format and will stream the tokens as they arrive. +The underlying format is [Server-Sent Events](https://html.spec.whatwg.org/multipage/server-sent-events.html), and the +output from Vespa is like this: + +```sh +$ vespa query \ + --format=plain \ + --header="X-LLM-API-KEY:..." \ + searchChain=llm \ + prompt="what was the manhattan project?" \ + format=sse + +event: token +data: {"token":"The"} + +event: token +data: {"token":" Manhattan"} + +event: token +data: {"token":" Project"} + +event: token +data: {"token":" was"} + +event: token +data: {"token":" a"} + +... +``` + +Notice the use of the `--format=plain` in the Vespa CLI here to output exactly +what is sent from Vespa. + +These events can be consumed by using a `EventSource` as described in the [HTML specification](https://html.spec.whatwg.org/multipage/server-sent-events.html#server-sent-events), +or however you see fit as the format is fairly simple. Each `data` element contains a small JSON object which must be parsed, and contains a single `token` element containing the actual token. + +Errors are also sent in such events: + +```sh +$ vespa query \ + --header="X-LLM-API-KEY: banana" \ + prompt="what was the manhattan project?" \ + searchChain=llm \ + format=sse + +event: error +data: { + "source": "openai", + "error": 401, + "message": "{ \"error\": { \"message\": \"Incorrect API key provided: banana. You can find your API key at https://platform.openai.com/account/api-keys.\", \"type\": \"invalid_request_error\", \"param\": null, \"code\": \"invalid_api_key\" }}" +} +``` + +### LLM parameters + +The LLM service typically has a set of inference parameters that can be set. This can be parameters such as: + +- `model` - for OpenAI can be any valid model such as `gpt-4o` or `gpt-4o-mini` etc. +- `temperature` - for setting the model temperature +- `maxTokens` - for setting the maximum number of tokens to produce + +Note that these parameters are common to both [Local LLMs](/en/rag/local-llms) and [External LLMs](/en/rag/external-llms), but each of them also supports additional inference parameters. See the respective sections for more details on these. + +To provide inference parameters, you pass these along with the query: + +```sh +$ vespa query \ + --header="X-LLM-API-KEY: ..." \ + prompt="what was the manhattan project?" \ + searchChain=llm \ + format=sse \ + llm.model=gpt-4 \ + llm.maxTokens=10 +``` + +Note that these parameters are prepended with `llm`. This is so that you can have multiple LLM searchers and control them independently by setting them up with different property prefixes in `services.xml`. For instance: + +```xml + + + + openai + rag + + + + + openai + llm + + + +``` + +Here, we have set up a chain with two LLM searchers, that have set up different `propertyPrefix`s. The searchers use this to get their specific properties. This also includes prompts. The prompt for the first searcher would thus be `rag.prompt` and the second would be `llm.prompt`. + +Note that if this `propertyPrefix` is not set, the default is `llm` and all LLM searchers would share the same parameters. + +Also note that `prompt` does not need to be prefixed in the query, however the other parameters do need to. + +If you are using different LLM services, you can also distinguish between API keys sent along with the query by prepending them as well with the `propertyPrefix`. + +### Retrieval-Augmented Generation (RAG) + +Above we used the `LLMSearcher` to call out to LLMs using a pre-specified prompt. Vespa provides the `RAGSearcher` to construct a prompt based on search results. This enables a flexible way of first searching for content in Vespa, and using the results to generate a response. + +Please refer to [RAG in Vespa](/en/rag/rag) for more details. + +### Structured output + +Both the `OpenAI` and `LocalLLM` clients in Vespa can also be configured to return [structured output](https://platform.openai.com/docs/guides/structured-outputs). +This is done by providing an `llm.json_schema` in the query. (Assuming you are using the `LLMSearcher` or `RAGSearcher` with `propertyPrefix=llm`). + +This can be useful for different use cases. Examples include applying moderation of the output or providing the response in different styles and/or languages. + +```json expandable +{ + "type": "object", + "properties": { + "answer-short": { + "type": "string" + }, + "answer-short-french": { + "type": "string", + "description": "exact translation of short answer in French language" + }, + "answer-short-eli5": { + "type": "string", + "description": "explain the answer like I am 5 years old" + } + }, + "required": [ + "answer-short", + "answer-short-french", + "answer-short-eli5" + ], + "additionalProperties": false +} +``` + +The `json_schema` can be passed with the query using the `llm.json_schema` parameter: + +```sh +$ vespa query \ + --timeout 60 \ + --header="X-LLM-API-KEY:" \ + query="what was the manhattan project?" \ + hits=5 \ + searchChain=openai \ + format=sse \ + llm.json_schema="{\"type\":\"object\",\"properties\":{\"answer-short\":{\"type\":\"string\"},\"answer-short-french\":{\"type\":\"string\",\"description\":\"exact translation of short answer in French language\"},\"answer-short-eli5\":{\"type\":\"string\",\"description\":\"explain the answer like I am 5 years old\"}},\"required\":[\"answer-short\",\"answer-short-french\",\"answer-short-eli5\"],\"additionalProperties\":false}" \ + traceLevel=1 +``` + +Which for example, using `gpt-4o-mini` returns +```json +{ + "answer-short": "The Manhattan Project was a World War II research and development program that produced the first atomic bombs, led by the United States with help from the UK and Canada, overseen by Major General Leslie Groves and physicist Robert Oppenheimer.", + "answer-short-french": "Le Projet Manhattan était un programme de recherche et développement de la Seconde Guerre mondiale qui a produit les premières bombes atomiques, dirigé par les États-Unis avec l'aide du Royaume-Uni et du Canada, sous la supervision du général Leslie Groves et du physicien Robert Oppenheimer.", + "answer-short-eli5": "The Manhattan Project was a secret and important project during World War II where scientists worked together to make the first big bombs that could make huge explosions, which changed the world." +} +``` + +This can also leveraged for automated [Document Enrichment](/en/rag/document-enrichment) during ingestion. With this approach, the `json_schema` is automatically generated based on the Vespa schema (and your prompt). + +### Query profiles + +In all the above you have sent parameters along with each query. It is worth mentioning that Vespa supports [query profiles](/en/querying/query-profiles), which are +named collections of search parameters. This frees the client from having to manage and send a large number of parameters, and enables the request parameters for a use case to be changed without having to change the client. + +### Custom language model components + +Vespa also allows you to create your own language model components. This is useful in cases where you want to use a language model that is not [supported](/en/rag/local-llms#valid-llm-models) as local LLM through [llama.cpp](https://github.com/ggml-org/llama.cpp), or if you want to use an external LLM service that is incompatible with the OpenAI API. + +To create your own language model component, you need to implement the `ai.vespa.llm.LanguageModel` interface. +Minimal example shown below: + +```java expandable +// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +package ai.vespa.test; + +import ai.vespa.llm.InferenceParameters; +import ai.vespa.llm.completion.Completion; +import ai.vespa.llm.completion.Prompt; + +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.function.Consumer; + +public class MockLanguageModel implements ai.vespa.llm.LanguageModel { + private final MockLanguageModelConfig config; + + public MockLanguageModel(MockLanguageModelConfig config) { + this.config = config; + } + + @Override + public List complete(Prompt prompt, InferenceParameters params) { + var stringBuilder = new StringBuilder(); + + for (int i = 0; i < config.repetitions(); i++) { + stringBuilder.append(prompt.asString()); + + if (i < config.repetitions() - 1) { + stringBuilder.append(" "); + } + } + + return List.of(Completion.from(stringBuilder.toString().trim())); + } + + @Override + public CompletableFuture completeAsync(Prompt prompt, + InferenceParameters params, + Consumer consumer) { + throw new UnsupportedOperationException(); + } +} +``` + +You can also create a [config definition](/en/applications/configuring-components#config-definition) that will make your component configurable through the `services.xml` file. + +Example of a minimal config definition: + +```txt +# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. +namespace=ai.vespa.test +package=ai.vespa.test + +repetitions int default=1 +``` + +See also [developer guide](/en/applications/developer-guide) for more information on how to create your own components. + +### Creating your own searchers in Java + +The above example uses the `LLMSearcher` +[class](https://github.com/vespa-engine/vespa/blob/master/container-search/src/main/java/ai/vespa/search/llm/LLMSearcher.java). You can easily create your own LLM searcher in Java by either specifically [injecting](/en/applications/dependency-injection) the connection component, or subclassing the `LLMSearcher`. Please refer to [Searcher Development](/en/applications/searchers) or [Document Processor Development](/en/applications/document-processors) for more information on creating your own components. + +Note that it should not be necessary to create your own components in Java to use this functionality. diff --git a/mintlify-docs/en/rag/local-llms.mdx b/mintlify-docs/en/rag/local-llms.mdx new file mode 100644 index 0000000000..4d13d37aa5 --- /dev/null +++ b/mintlify-docs/en/rag/local-llms.mdx @@ -0,0 +1,249 @@ +--- +title: "Running LLMs inside your Vespa application" +sidebarTitle: "Using local LLMs" +--- + +Please refer to [Large Language Models in Vespa](/en/rag/llms-in-vespa) for an introduction to using LLMs in Vespa. + +Vespa supports evaluating LLMs within your application, both on CPU and GPU. + + +**Note:** This is currently a Beta feature so changes can be expected. + + +Running large language models (LLMs) directly in your Vespa application offers +various advantages, particularly in terms of data security and privacy. By +running LLMs locally, sensitive information remains within the confines of the +application or network, eliminating the risks associated with data transmission +to external services. This is especially important for [RAG use cases](/en/rag/rag) that handle sensitive or proprietary data, such as +healthcare, finance, and legal services, where privacy compliance and data +security are valid concerns. + +Moreover, hosting LLMs locally allows applications to select from a wider range of models that best suit their specific needs, rather than being limited to the models offered by external providers. This flexibility enables +businesses to optimize performance, cost, and efficiency tailored to their operational requirements. Additionally, managing LLMs in-house provides control over model versions, allowing companies to maintain stable and consistent +outputs by choosing when and how to update their models. + +Finally, while massively large foundation models dominates the generalist use case, the smaller, more specialized, models (sometimes called "small language models") have become much more capable. + +For a quick start, check out the [RAG sample app](https://github.com/vespa-engine/sample-apps/tree/master/retrieval-augmented-generation) +which demonstrates how to set up a local LLM. + + +### Setting up LLM clients in services.xml + + +**Note:** This feature is available in Vespa versions >= 8.331 + + +To set up the required inference engine for running your model, you need to +define a `LocalLLM` component in your application's +[services.xml](/en/reference/applications/services/services): + +```xml + + + + ... + + + + + + + + ... + + + +``` + +This component will ensure that the underlying inference engine is started and load the model when the container nodes are started. Each container node in the +cluster will load the LLM. Note that you can set up +[multiple clusters of container nodes](/en/applications/containers). This can be helpful for instance if you have multiple LLMs that don't fit in the available GPU memory, or you would like to offload LLM inference to dedicated +nodes for performance reasons. + +The [`model`](/en/reference/applications/config-files#parameter-types) configuration +parameter can be either set to a known `model-id` for Vespa Cloud, a `url` or a `path` to the model inside the application package. Usually, however, LLM files +are too large to practically be included in the application package, so the `url` attribute is used. See [below](/en/rag/local-llms#valid-llm-models) for more information on +model types that can be used in Vespa. + +There are many other configuration parameters to customize how inference is run, please see the [configuration](/en/rag/local-llms#local-llm-configuration) section for more details. + +### Valid LLM models + +Under the hood, Vespa uses [llama.cpp](https://github.com/ggerganov/llama.cpp). +Any model file that works with `llama.cpp` can be used in Vespa. This includes the following base models and finetunes of them: + +- LLama 2/3 +- Mistral 7B +- Mixtral MoE +- Gemma +- Command R+ +- Phi 2/3 +- And many more + +Refer to the [supported models](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#description) +section of `llama.cpp` for a full list of updated models. + +Vespa supports the `GGUF` file format. `GGUF` models can be found on [HuggingFace](https://huggingface.co/models), by searching for `GGUF`. Other LLM formats such safetensors and pytorch.bin models need to be converted GGUF +before use. Please refer to `llama.cpp` for conversion tools. + +Quantized models are also supported. Models are typically trained to [FP16](https://en.wikipedia.org/wiki/Half-precision_floating-point_format) precision, but `GGUF` files support reduced precision to 8-bit or lower. This +can save space so larger models can fit in less memory. Be aware however, that inference time can increase when using reduced precision, so be sure to benchmark your application accordingly, both in token generation performance but also in terms of output quality. + +### Local LLM configuration + +LLM model inference has a number of configuration parameters that is set in +`services.xml` and are applied during model loading. There are also a set of parameters +that can be set during inference which are passed as query parameters. Please refer +below to [inference parameters](#inference-parameters) for more information on those. + +The most significant model configuration parameters are: + +- `model`: the model file to use. The attributes are either `model-id` which + specifies a known model in Vespa Cloud, `url` which specifies a URL to a + model, for instance in HuggingFace, or `path` which specifies a file found in + the application package. +- `parallelRequests`: the maximum number of parallel requests to handle. This + is the batch size of concurrent texts to generate. +- `contextSize`: the size of context window. A model is typically trained with + a given context size, but this can typically be increased if required. This + setting has a direct impact on memory usage. +- `useGpu`: toggle the use of GPU if available. Default is `true`, which means + GPU will be used if it is found. See the [GPU section below](#using-gpus) for + more details. +- `gpuLayers` : number of layers in the model to offload to GPU. This setting + allows partial evaluation on CPU and GPU, so models larger than available GPU + memory can be used. Default is to offload all layers to the GPU. +- `threads`: the number of threads to use when using CPU only inference. The + default is the number of available cores - 2. Do not set this higher than the + core count, as this will severely impact performance. +- `maxTokens`: the maximum number of tokens that will be generated. Default is + 512. +- `maxPromptTokens`: the maximum number of tokens in the prompt. If the prompt + exceeds this number, it will be truncated. + Default is -1, which means that the prompt will not be truncated. +- `contextOverflowPolicy`: determines what to do when `contextSize` is too small + to fit prompt and completion tokens for all parallel requests. + The default is `NONE`, which allows new tokens to overwrite older ones. + This may result in lower quality completions and performance issues. + `DISCARD` ignores the request silently, returning without generating any tokens. + `FAIL` raises and error. + +Please refer to the [local LLM client configuration definition](https://github.com/vespa-engine/vespa/blob/master/model-integration/src/main/resources/configdefinitions/llm-local-client.def) +for an updated list of configuration parameters. + +Some important points are worth considering here. First is the context window, +given by the `contextSize` parameter, which is the size (in number of tokens) +that the model uses to generate the next token. In general, larger context +windows are required in [RAG applications](/en/rag/rag) to hold the context +from the retrieval stage. Models are trained with a certain context length, +but this context length can typically be increased up to 4x without much loss +in text generation quality. + +The size of the context window has a direct impact on memory use. For instance, +a typical 7B model such as Mistral 7B, with a size of 7 billion parameters will +use around 14Gb memory when using FP16 precision, and 7Gb with 8-bit +quantization. Assuming we use a 8-bit quantization: + +- A context window of `4096` will use 7.3Gb for the model, 512Mb for the context + window and 296Mb for the compute buffer, requiring around 8Gb memory in total. +- A context window of `32768` will use 7.3Gb for the model, 4Gb for the context + window and 2.2Gb for the compute buffer, requiring almost 14Gb memory in total. + +So, a single GPU with 16Gb memory can just about hold the 7B model with a +context size of `32768`. For reference, the Mistral 7B model is trained with +this context size. + +Now, when running in context of Vespa, we generally would like to handle multiple +requests in parallel. The number of parallel requests we can handle per container +node is set with the `parallelRequests` parameter. This in effect sets up a number +of **slots** that can be evaluated simultaneously. Each sequence that should +be generated requires a significant amount of memory to keep the context for +each generated token. + +The total amount of memory that is set for this task is given by the +`contextSize` parameter. The effective context size for each request is this +size divided by number of parallel requests. So, for a total context size of +`32768` tokens and `10` parallel requests, each request effectively has a +context window of `3276` tokens. To increase the context size per request, the +total context size must be increased, which naturally has significant impact on +memory use. This is most acute on GPU which has a limited available memory. + +Memory restrictions thus drive the settings of these two parameters. For +reference, a 16Gb GPU can hold a 7B 8-bit model with a context size of `4096` +for `10` parallel requests by setting the context size to `40960`. If a larger +context window is required, the number of parallel requests must be decreased. +Likewise, the number of parallel requests can be increased by decreasing the +context size. This depends on the requirements of your application. + + +### Inference parameters + +Please refer to the general discussion in [LLM parameters](/en/rag/llms-in-vespa#llm-parameters) for setting inference +parameters. + +Local LLM inference has the following inference parameters that can be sent along +with the query: + +- `npredict`: the number of tokens to generate. Overrides the `maxTokens` + setting in the model configuration. +- `temperature`: the temperature setting of the model, typically between `0.0` + and `1.0`. +- `repeatpenalty`: the penalty for repeating tokens. +- `topk` and `topp`: the probability of token sampling. Lower values tend to + produce more coherent and focused text, while higher values introduce more + diversity and creativity but potentially more errors or incoherence. +- `jsonSchema`: JSON schema to use for structured output. + Specifying this parameter also enables structured output. + See [structured output](/en/rag/llms-in-vespa#structured-output) for more details. + +The most significant here are `npredict` which will stop the token generation +process after a certain number of tokens has been generated. Some models can +for certain prompts enter a loop where an infinite number of tokens are +generated. This is clearly not beneficial situation, so this number should be +set to a high enough value, so all tokens for a response can be generated, but +low enough to stop the model from generating tokens infinitely. + + +## Using GPUs + +Using a GPU can significantly speed up token generation and is generally +recommended. The discussion above about memory requirements are especially +acute when running on GPUs due to memory limitations. In Vespa, the default is +to offload the entire model to the GPU if it is available, but by using the +`gpuLayers` parameter one can experiment with offloading parts of the model to +GPU. + +```xml + + + + {/* Sets up the inference on a mistral 7B model */} + + + + 10 + 40960 + true {/* default is true */} + 100 + + + + + + +``` + +Here, the model itself has 33 layers, and all are offloaded to the GPU. If your +model is too large to fit on the GPU, you can speed up model evaluation by +offloading parts of the model to the GPU. + +To set up GPUs on self-hosted, please refer to [Container GPU setup](/en/operations/self-managed/vespa-gpu-container) +for more details. + +It is very easy to use GPU acceleration on Vespa Cloud. To enable GPU +inference, you need to [request GPUs](/en/reference/applications/services/services#gpu) on the container +nodes. For a more practical introduction, please take a look at the +[RAG sample app](https://github.com/vespa-engine/sample-apps/tree/master/retrieval-augmented-generation) +which also demonstrates how to evaluate the LLM on GPUs on Vespa Cloud. \ No newline at end of file diff --git a/mintlify-docs/en/rag/model-hub.mdx b/mintlify-docs/en/rag/model-hub.mdx new file mode 100644 index 0000000000..915f2415fc --- /dev/null +++ b/mintlify-docs/en/rag/model-hub.mdx @@ -0,0 +1,322 @@ +--- +title: Using machine-learned models from Vespa Cloud +sidebarTitle: "Model hub" +--- + +Vespa Cloud provides a set of machine-learned models that you can use in your applications. These models will always be available on Vespa Cloud and are [frozen models](https://blog.vespa.ai/tailoring-frozen-embeddings-with-vespa/). You can also bring your own embedding model, by deploying it in the Vespa application package. + +You specify to use a model provided by Vespa Cloud by setting the `model-id` attribute where you specify a model config. For example, when configuring the [Huggingface embedder](/en/rag/embedding#huggingface-embedder) provided by Vespa, you can write: + +```xml + + + + + ... + +``` + +With this, your application will have support for [text embedding](/en/rag/embedding#embedding-a-query-text) inference for both queries and documents. Nodes that have been provisioned with GPU acceleration, will automatically use GPU for embedding inference. + +## Vespa Cloud Embedding Models + +Models on Vespa model hub are selected open-source embedding models with great performance. See the [Vespa blog on embedding tradeoffs](https://blog.vespa.ai/embedding-tradeoffs-quantified/) for details on performance and quality. These embedding models are useful for retrieval (semantic search), re-ranking, clustering, classification, and more. + +### Huggingface Embedder + +These models are available for the Huggingface Embedder `type="hugging-face-embedder"`. All these models support mapping from `string` or `array` to tensor representations. + +The output tensor [cell-precision](/en/performance/feature-tuning#cell-value-types) can be ``, ``, or ``. + +Most models also support [binarization](/en/rag/binarizing-vectors), which requires using [distance-metric](/en/reference/schemas/schemas#distance-metric) `hamming` instead of `angular`. The E5 and multilingual-e5 models do not support binarization. See the [nanobeir hybrid evaluation leaderboard](https://huggingface.co/spaces/vespa-engine/nanobeir-hybrid-evaluation) for details on quality impact. + + +#### alibaba-gte-modernbert + +| | | +| :--- | :--- | +| GTE (General Text Embedding) model trained from ModernBERT-base. | | +| Model id | `alibaba-gte-modernbert` | +| Tensor definition | `tensor(x[768])` | +| Matryoshka dimensions | `x[768]`, `x[256]` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| Source | [https://huggingface.co/Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) @ 3ab3f8c | +| Language | English | +| Component declaration | ```8192cls ``` | + +#### alibaba-gte-modernbert-int8 + +| | | +| :--- | :--- | +| INT8 quantized variant of alibaba-gte-modernbert. Offers faster inference with minimal accuracy loss. | | +| Model id | `alibaba-gte-modernbert-int8` | +| Tensor definition | `tensor(x[768])` | +| Matryoshka dimensions | `x[768]`, `x[256]` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| Source | [https://huggingface.co/Alibaba-NLP/gte-modernbert-base](https://huggingface.co/Alibaba-NLP/gte-modernbert-base) @ e7f32e3 | +| Language | English | +| Component declaration | ```8192cls ``` | + +#### e5-base-v2 +| | | +| :--- | :--- | +| The base model of the _E5_ family. | | +| Model id | `e5-base-v2` | +| Tensor definition | `tensor(x[768])` or `tensor(p{},x[768])` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [MIT](https://github.com/microsoft/unilm/blob/master/LICENSE) | +| Source | [https://huggingface.co/intfloat/e5-base-v2](https://huggingface.co/intfloat/e5-base-v2) @ 121b23b | +| Language | English | +| Component declaration | ```512query: passage: ``` | + + +#### e5-large-v2 + +| | | +| :--- | :--- | +| The largest model of the _E5_ family, at time of writing, this is the best performing embedding model on the MTEB benchmark. | | +| Model id | `e5-large-v2` | +| Tensor definition | `tensor(x[1024])` or `tensor(p{},x[1024])` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [MIT](https://github.com/microsoft/unilm/blob/master/LICENSE) | +| Source | [https://huggingface.co/intfloat/e5-large-v2](https://huggingface.co/intfloat/e5-large-v2) | +| Language | English | +| Component declaration | ```512query: passage: ``` | + +#### e5-small-v2 + +| | | +| :--- | :--- | +| The smallest and most cost-efficient model from the _E5_ family. | | +| Model id | `e5-small-v2` | +| Tensor definition | `tensor(x[384])` or `tensor(p{},x[384])` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [MIT](https://github.com/microsoft/unilm/blob/master/LICENSE) | +| Source | [https://huggingface.co/intfloat/e5-small-v2](https://huggingface.co/intfloat/e5-small-v2) | +| Language | English | +| Component declaration | ```512query: passage: ``` | + +#### lightonai-modernbert-large + +| | | +| :--- | :--- | +| Trained from ModernBERT-large on the Nomic Embed datasets, bringing the new advances of ModernBERT to embeddings. | | +| Model id | `lightonai-modernbert-large` | +| Tensor definition | `tensor(x[1024])` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| Source | [https://huggingface.co/lightonai/modernbert-embed-large](https://huggingface.co/lightonai/modernbert-embed-large) @ b3a781f | +| Language | English | +| Component declaration | ```8192search_query: search_document: ``` | +#### lightonai-modernbert-large-int8 + + +| | | +| :--- | :--- | +| INT8 quantized variant of lightonai-modernbert-large. Offers faster inference with minimal accuracy loss. | | +| Model id | `lightonai-modernbert-large-int8` | +| Tensor definition | `tensor(x[1024])` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| Source | [https://huggingface.co/lightonai/modernbert-embed-large](https://huggingface.co/lightonai/modernbert-embed-large) @ 95a19bf | +| Language | English | +| Component declaration | ```8192search_query: search_document: ``` | + +#### multilingual-e5-base + +| | | +| :--- | :--- | +| The multilingual model of the _E5_ family. Use this model for multilingual queries and documents. | | +| Model id | `multilingual-e5-base` | +| Tensor definition | `tensor(x[768])` or `tensor(p{},x[768])` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [MIT](https://github.com/microsoft/unilm/blob/master/LICENSE) | +| Source | [https://huggingface.co/intfloat/multilingual-e5-base](https://huggingface.co/intfloat/multilingual-e5-base) | +| Language | Multilingual | +| Component declaration | ```512query: passage: ``` | + + +#### nomic-ai-modernbert + + +| | | +| :--- | :--- | +| Trained from ModernBERT-base on the Nomic Embed datasets, bringing the new advances of ModernBERT to embeddings. | | +| Model id | `nomic-ai-modernbert` | +| Tensor definition | `tensor(x[768])` | +| Matryoshka dimensions | `x[768]`, `x[256]` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| Source | [https://huggingface.co/nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base) @ 92168cb | +| Language | English | +| Component declaration | ```token_embeddings8192search_query: search_document: ``` | + +#### nomic-ai-modernbert-int8 + + +| | | +| :--- | :--- | +| INT8 quantized variant of nomic-ai-modernbert. Offers faster inference with minimal accuracy loss. | | +| Model id | `nomic-ai-modernbert-int8` | +| Tensor definition | `tensor(x[768])` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| Source | [https://huggingface.co/nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base) @ d556a88 | +| Language | English | +| Component declaration | ```token_embeddings8192search_query: search_document: ``` | + +#### snowflake-arctic-embed-m-v2.0 + +| | | +| :--- | :--- | +| Embedding model based on snowflake-arctic-embed-m-v2.0. | | +| Model id | `snowflake-arctic-embed-m-v2.0` | +| Tensor definition | `tensor(x[768])` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| Source | [https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v2.0](https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v2.0) @ 95c2741 | +| Language | Multilingual | +| Component declaration | ```8192token_embeddingsclstruequery: ``` | + +#### snowflake-arctic-embed-m-v2.0-int8 + | | | +| :--- | :--- | +| INT8 quantized variant of snowflake-arctic-embed-m-v2.0. Offers faster inference with minimal accuracy loss. | | +| Model id | `snowflake-arctic-embed-m-v2.0-int8` | +| Tensor definition | `tensor(x[768])` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| Source | [https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v2.0](https://huggingface.co/Snowflake/snowflake-arctic-embed-m-v2.0) @ 95c2741 | +| Language | Multilingual | +| Component declaration | ```8192token_embeddingsclstruequery: ``` | + + +#### voyage-4-nano + +| | | +| :--- | :--- | +| Embedding model based on voyage-4-nano-ONNX. | | +| Model id | `voyage-4-nano` | +| Tensor definition | `tensor(x[2048])` | +| Matryoshka dimensions | `x[2048]`, `x[1024]`, `x[512]`, `x[256]` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| Source | [https://huggingface.co/thomasht86/voyage-4-nano-ONNX](https://huggingface.co/thomasht86/voyage-4-nano-ONNX) @ fcf290d | +| Language | English | +| Component declaration | ```32768meantrueRepresent the query for retrieving supporting documents: Represent the document for retrieval: ``` | + +#### voyage-4-nano-int8 +| | | +| :--- | :--- | +| INT8 quantized variant of voyage-4-nano. Offers faster inference with minimal accuracy loss. | | +| Model id | `voyage-4-nano-int8` | +| Tensor definition | `tensor(x[2048])` | +| Matryoshka dimensions | `x[2048]`, `x[1024]`, `x[512]`, `x[256]` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| Source | [https://huggingface.co/thomasht86/voyage-4-nano-ONNX](https://huggingface.co/thomasht86/voyage-4-nano-ONNX) @ fcf290d | +| Language | English | +| Component declaration | ```32768meantrueRepresent the query for retrieving supporting documents: Represent the document for retrieval: ``` | + +### Bert Embedder + +These models are available for the [Bert Embedder](/en/rag/embedding#bert-embedder)`type="bert-embedder"`: + +```xml + + + + + + ... + +``` + +Note bert-embedder requires both `transformer-model` and `tokenizer-vocab`. + +#### minilm-l6-v2 + +A small, fast sentence-transformer model. + +| | | +| :--- | :--- | +| Model-id | minilm-l6-v2 | +| Tensor definition | `tensor(x[384])` or `tensor(p{},x[384])` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| Source | [https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | +| Language | English | +| +#### mpnet-base-v2 + +A larger, but better than **minilm-l6-v2** sentence-transformer model. + +| | | +| :--- | :--- | +| Model-id | mpnet-base-v2 | +| Tensor definition | `tensor(x[768])` or `tensor(p{},x[768])` | +| [distance-metric](/en/reference/schemas/schemas#distance-metric) | `angular` | +| License | [apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| Source | [https://huggingface.co/sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) | +| Language | English | + +### Tokenization Embedders + +These are embedder implementations that tokenize text and embed string to the vocabulary identifiers. These are most useful for creating the tensor inputs to re-ranking models that take both the query and document token identifiers as input. Find examples in the [sample applications](https://github.com/vespa-engine/sample-apps/blob/master/README.md#vector-search-hybrid-search-and-embeddings). + + +#### bert-base-uncased +| | | +| :--- | :--- | +| A vocabulary text (_vocab.txt_) file on the format expected by [WordPiece](/en/rag/embedding#wordpiece-embedder): A text token per line. | +| Model-id | bert-base-uncased | +| License | [apache-2.0](https://www.apache.org/licenses/LICENSE-2.0) | +| Source | [https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | + +#### e5-base-v2-vocab +| | | +| :--- | :--- | +| A _tokenizer.json_ configuration file on the format expected by [HF tokenizer](/en/rag/embedding#huggingface-tokenizer-embedder). This tokenizer configuration can be used with `e5-base-v2`, `e5-small-v2` and `e5-large-v2`. | +| Model-id | e5-base-v2-vocab | +| License | [MIT](https://github.com/microsoft/unilm/blob/master/LICENSE) | +| Source | [https://huggingface.co/intfloat/e5-base-v2](https://huggingface.co/intfloat/e5-base-v2) | +| Language | English | +| +#### multilingual-e5-base-vocab +| | | +| :--- | :--- | +| A _tokenizer.json_ configuration file on the format expected by [HF tokenizer](/en/rag/embedding#huggingface-tokenizer-embedder). This tokenizer configuration can be used with `multilingual-e5-base-vocab`. | +| Model-id | multilingual-e5-base-vocab | +| License | [MIT](https://github.com/microsoft/unilm/blob/master/LICENSE) | +| Source | [https://huggingface.co/intfloat/multilingual-e5-base](https://huggingface.co/intfloat/multilingual-e5-base) | +| Language | Multilingual | + +### Significance models + +These are [global significance models](/en/ranking/significance#significance-models-in-servicesxml) that can be added to [significance element in services.xml](/en/reference/applications/services/search#significance). + +#### significance-en-wikipedia-v1 +| | | +| :--- | :--- | +| This significance model was generated from [English Wikipedia dump data from 2024-08-01](https://dumps.wikimedia.org/enwiki/). Available in Vespa as of version 8.426.8. | +| Model-id | significance-en-wikipedia-v1 | +| License | [Creative Commons Attribution-ShareAlike 3.0 Unported (CC BY-SA 3.0) License](https://creativecommons.org/licenses/by-sa/3.0/deed.en). | +| Source | [https://data.vespa-cloud.com/significance\_models/significance-en-wikipedia-v1.json.zst](https://data.vespa-cloud.com/significance_models/significance-en-wikipedia-v1.json.zst) | +| Language | English | + +## Creating applications working both self-hosted and on Vespa Cloud + +You can also specify both a `model-id`, which will be used on Vespa Cloud, and a url/path, which will be used on self-hosted deployments: + +```bash + +``` + +This can be useful for example to create an application package which uses models from Vespa Cloud for production and a scaled-down or dummy model for self-hosted development. + +## Using Vespa Cloud models with any config + +Specifying a model-id can be done for any [config field of type `model`](/en/applications/configuring-components#adding-files-to-the-component-configuration), whether the config is from Vespa or defined by you. + diff --git a/mintlify-docs/en/rag/rag.mdx b/mintlify-docs/en/rag/rag.mdx new file mode 100644 index 0000000000..6bf676212e --- /dev/null +++ b/mintlify-docs/en/rag/rag.mdx @@ -0,0 +1,167 @@ +--- +title: "Retrieval-augmented generation (RAG) in Vespa" +sidebarTitle: "RAG in Vespa" +--- + +Please refer to [Large Language Models in Vespa](/en/rag/llms-in-vespa) for an +introduction to using LLMs in Vespa. + +Retrieval-Augmented Generation (RAG) is a technique that merges retrieval systems with generative models to enhance language model outputs. It works by first using a retrieval system like Vespa to fetch relevant documents based on +an input query, and then a generative model, like an LLM, to generate more contextually relevant responses. This method allows language models to access up-to-date or specific domain knowledge beyond their training, improving performance in tasks such as question answering and dynamic content creation. + +In Vespa, the `RAGSearcher` first performs the query as specified by the user, +creates a prompt based on the results, and queries the language model to +generate a response. + +For a quick start, check out the [RAG sample app](https://github.com/vespa-engine/sample-apps/tree/master/retrieval-augmented-generation) +which demonstrates using either an external LLM service or a local LLM. + + +### Setting up the RAGSearcher + +In `services.xml`, specify your LLM connection and the `RAGSearcher`: + +```xml + + + + ... + + + {/* Configure as required */} + + + + + + + openai + + + + + + ... + + + +``` + +As mentioned in [LLMs in Vespa](/en/rag/llms-in-vespa), you can call this chain +using the Vespa CLI: + +```bash +$ vespa query \ + --header="X-LLM-API-KEY:..." \ + query="what was the manhattan project?" \ + searchChain=rag \ + format=sse +``` + +However, notice here the use of the `query` query parameter. In [LLMs in +Vespa](/en/rag/llms-in-vespa), we used a `prompt` parameter to set up the prompt +to send to the LLM. You can also do that in the `RAGSearcher`, however this means +that no actual query is run in Vespa. For Vespa to run a search, you need to +specify a `yql` or `query` parameter. By using `query` here, this text is +used as both query text for the document retrieval, and in the prompt sent to +the LLM, as we will see below. + +Indeed, with the `RAGSearcher` you can use any type of [search in Vespa](/en/querying/query-api), +including [text search based on BM25](/en/learn/tutorials/text-search) +and advanced [approximate vector search](/en/querying/approximate-nn-hnsw). +This makes the retrieval part of RAG very flexible. + +### Controlling the prompt + +Based on the query, Vespa will retrieve a set of documents. The `RAGSearcher` +will create a context from these documents looking like this: + +```text +field1: ... +field2: ... +field3: ... + +field1: ... +field2: ... +field3: ... + +... + +``` + +Here, `field1` and so on are the actual fields as returned from the search. For +instance, the [text search tutorial](/en/learn/tutorials/text-search) defines a +document schema consisting of fields: `id`, `title`, `url`, and `body`. If you +only want to include the `title` and `body` fields for use in the context, you +can issue a query like this: + +```bash +$ vespa query \ + --header="X-LLM-API-KEY:..." \ + yql="select title,body from msmarco where userQuery()" \ + query="what was the manhattan project?" \ + searchChain=rag \ + format=sse +``` + +The actual prompt that will be sent to the LLM will, by default, look like this: + +```text +{context} + +{@prompt or @query} +``` + +where `{context}` is as given above, and `@prompt` is replaced with the `prompt` query parameter if given, and `@query` is replaced with the user query if given. +This means you can customize the actual prompt by passing in a `prompt` parameter, and thus distinguish between what is searched for in Vespa, and what is asked for from the LLM. + +For instance: + +```bash +$ vespa query \ + --header="X-LLM-API-KEY:..." \ + yql="select title,body from msmarco where userQuery()" \ + query="what was the manhattan project?" \ + prompt="{context} @query Be as concise as possible." \ + searchChain=rag \ + format=sse +``` + +will results in a prompt like this: + +```text +title: +body: <body of first document> + +title: <title of second document> +body: <body of second document> + +<rest of documents> + +what was the manhattan project? Be as concise as possible. +``` + +Note that if your `prompt` does not contain `{context}`, the context will automatically be prepended to your prompt. However, if `@query` is not found in the prompt, it will not automatically be added to the prompt. + +Please be advised that all documents as returned by Vespa will be used in the context. Most LLMs have some form of limit for how large the prompt can be. LLM services also typically have a cost per query based on number of tokens both in input and output. To reduce context size it is important to control the number of results by using the `hits` [query parameter](/en/reference/api/query#hits). Also, using the query above +limit the fields to only what is strictly required. + +To debug the prompt, i.e. what is actually sent to the LLM, you can use the +`traceLevel` query parameter, and set that to a value larger than `0`: + +```bash +$ vespa query \ + --header="X-LLM-API-KEY:..." \ + query="what was the manhattan project?" \ + searchChain=rag \ + format=sse \ + traceLevel=1 + +event: prompt +data: {"prompt":"<the actual prompt sent to the LLM>"} + +event: token +data: {"token":"<first token of response>"} + +... +``` diff --git a/mintlify-docs/en/rag/working-with-chunks.mdx b/mintlify-docs/en/rag/working-with-chunks.mdx new file mode 100644 index 0000000000..6c0a83c491 --- /dev/null +++ b/mintlify-docs/en/rag/working-with-chunks.mdx @@ -0,0 +1,236 @@ +--- +title: "Working with chunks" +--- + +A key technique in RAG applications, and vector search applications in general, is to split longer text into chunks. This lets you: + +- Generate a vector embedding for each chunk rather than for an entire document text, to capture the semantic information of the text at a meaningful level. +- Select specific chunks to add to the context window in GenAI applications rather than the entire document content. + +Vespa contains the following functionality for working with chunks. Each is covered in a section below. + +<CardGroup> +<Card title="Including chunks in documents" icon="file" href="#including-chunks-in-documents" horizontal/> +<Card title="Creating vector embeddings from chunks" icon="file" href="#creating-vector-embeddings-from-chunks" horizontal/> +<Card title="Searching chunks" icon="file" href="#searching-chunks" horizontal/> +<Card title="Ranking with chunks" icon="file" href="#ranking-with-chunks" horizontal/> +<Card title="Layered ranking: Selecting chunks to return" icon="file" href="#layered-ranking-selecting-chunks-to-return" horizontal/> +</CardGroup> + +## Including chunks in documents + +Chunks that belong to the same text should be added to the same document. The chunks are represented as arrays of string. + +You can split text into chunks yourself, using a schema like this: + +```js +search myDocumentType { + document myDocumentType { + field myChunks type array<string> { + indexing: summary | index + } + } +} +``` + +You can then write your chunks into Vespa using this [document JSON](../reference/schemas/document-json-format.html): + +```json +"myChunks": ["My first chunk text", "My second chunk text"] +``` + +Alternatively you can let Vespa do the chunking for you, by using a synthetic field outside the document: + +```js +search myDocumentType { + document myDocumentType { + field myText type string { + } + } + field myChunks type array<string> { + indexing: input myText | chunk fixed-length 500 | summary | index + } +} +``` + +In the [chunk expression](/en/reference/writing/indexing-language#chunk) you can choose between chunkers provided by Vespa, or plug in your own, see the [chunking reference documentation](/en/reference/rag/chunking). + +## Creating vector embeddings from chunks + +To add embeddings to your documents, use a tensor field: + +```js +search myDocumentType { + document myDocumentType { + field myEmbedding type tensor<float>(x[384]) { + indexing: attribute | index + } + } +} +``` + +This lets you add a single embedding to each document, but usually you want to have many. In Vespa you can do that by adding [mapped dimensions](/en/ranking/tensor-user-guide#tensor-concepts) to your tensor: + +```js +search myDocumentType { + document myDocumentType { + field myEmbeddings type tensor<float>(chunk{}, x[384]) { + indexing: attribute | index + } + } +} +``` + +With this you can feed [tensors in JSON format](../reference/schemas/document-json-format#tensor-short-form-mixed) as part of your writes, e.g. writing an embedding tensor with chunks numbered 1 and 2: + +```js +"myEmbeddings": { + "1":[2.0,3.0,...], + "2":[4.0,5.0,...] +} +``` + +You may notice that parsing such JSON consumes a lot of CPU on container clusters. To avoid that you can also feed embeddings [hex encoded raw data](/en/reference/schemas/document-json-format#tensor-hex-dump). + +You can also let Vespa do the embedding for you, either using a model provided by Vespa, or one you decide in your application package: + +```js +search myDocumentType { + document myDocumentType { + field myChunks type array<string> { + } + } + field myEmbeddings type tensor<float>(chunk{}, x[384]) { + indexing: input myChunks | embed | attribute | index + } +} +``` + +See the [embedding guide](/en/rag/embedding) on how to configure embedders. + +You can of course combine this with chunking to have a single text field chunked and embedded automatically: + +```js +search myDocumentType { + document myDocumentType { + field myText type string { + } + } + field myChunks type array<string> { + indexing: input myText | chunk sentence | summary | index + } + field myEmbeddings type tensor<float>(chunk{}, x[384]) { + indexing: input myText | chunk sentence | embed | attribute | index + } +} +``` + +Some things to note: + +- All fields of Vespa documents are stored and here we represent the text both as a single field and as chunks of text, won't that consume a lot of unnecessary space? No, thanks to the wonders of modern compression, the overhead from this can be ignored. +- Why return the chunk array in results and not the full text field? This is because for large text we need to select a subset of the text chunks rather than returning the full text. +- We are chunking twice here, won't this be inefficient? No, Vespa will reuse the result of the first invocation in cases like this. + +## Searching chunks + +You can search in chunk text (if you added `index`), and in chunk embeddings (if you created embeddings). Usually, you want to do both ([hybrid search](/en/learn/tutorials/hybrid-search)) since text search gives you precise matches, and embedding nearest neighbor search gives you imprecise semantic matching. + +A simple hybrid query can look like this: + +```bash +yql=select * from doc where myField contains text(@query) or ({totalTargetHits:10}nearestNeighbor(myEmbeddings, e)) +input.query(e)=embed(@query) +query=Do Cholesterol Statin Drugs Cause Breast Cancer? +``` + +The `embed` function shown here can be used to embed a query text using the same model(s) as used for chunks. If embedding outside Vespa you can [pass the tensor value](/en/reference/ranking/tensor#tensor-literal-form) instead. See the [nearest neighbor guide](/en/querying/nearest-neighbor-search-guide#hybrid-sparse-and-dense-retrieval-methods-with-vespa) for more. + +Text matching works across chunks as if the chunks were re-joined into one text field. However, a proximity gap is inserted between each chunks so that tokens in different chunks are by default very (infinitely) far away when evaluating phrase and near matches (however, see [on configuring this](/en/reference/schemas/schemas#rank-element-gap)). + +Nearest neighbor search with many chunks will retrieve the documents where any single chunk embedding is close to the query embedding. + +## Ranking with chunks + +Ranking in Vespa is done by [mathematical expressions](/en/ranking/ranking-expressions-features) (hand-written or machine-learned) combining rank features. You'll typically want to use features that capture both how well vector embeddings and textual query terms matched the chunks. + +For vector search, the `closeness(dimension,field)` feature will contain the distance between the query vector and the _closest_ chunk embedding. In addition, the `closest(field)` feature will return a tensor providing the label(s) of the chunk which was closest. + +For text matching, all features are available as if the entire chunk array was a single string field, but with an infinitely large proximity gap between each element to treat each element as independent. When the array elements are chunks of the same text, you'd prefer to get a relevance contribution from matching adjacent elements since it means you are matching adjacent words in the source text. To achieve this, configure the elementGap in your chunk array to a low value (e.g. 0 to 3, depending on how well your chunking strategy identifies semantic transitions): + +```js +rank-profile myProfile { + rank myChunks { + element-gap: 1 + } + } +``` + +Using vector closeness and the normal text match features will help you rank documents mostly based on the text having the single best match to the query. Sometimes it is also useful to capture how well the text as a whole matches the query. For vectors, you can do this by computing and aggregating closeness to each vector using a [tensor expression](/en/ranking/tensor-user-guide#ranking-with-tensors) in your ranking expression, while for text matching you can use the `elementSimilarity(field)` feature, or the [elementwise(bm25(field),dimension,cell_type)](/en/reference/ranking/rank-features#elementwise-bm25) feature which returns a tensor containing the bm25 score of each chunk. + +## Layered ranking: Selecting chunks to return + +A search result will contain the top ranked documents including all fields you are requesting or [configuring](/en/querying/document-summaries), including all chunks of those documents, whether relevant or not. This is fine when every document has few chunks, but when they can have many, there are two problems: + +- Putting many irrelevant chunks into the context window of the LLM decreases quality, or may make the context window infeasibly large. +- Sending many chunks over the network increases latency and can impacting other queries running at the same time. + +To solve both of these, we can use [layered ranking](https://blog.vespa.ai/introducing-layered-ranking-for-rag-applications/): Rank the chunks in the highest ranked documents, and select only the best ones. + +To do this, specify the ranking function that will select the chunks to return, using `select-elements-by`. Here's a full example: + +```js expandable +schema docs { + document docs { + + field myEmbeddings type tensor<float>(chunk{}, x[386]) { + indexing: attribute + } + + field myChunks type array<string> { + indexing: index | summary + summary { + select-elements-by: best_chunks + } + } + + } + + rank-profile default { + + inputs { + query(embedding) tensor<float>(x[386]) + } + + function my_distance() { + expression: euclidean_distance(query(embedding), attribute(myEmbeddings), x) + } + + function my_distance_scores() { + expression: 1 / (1+my_distance) + } + + function my_text_scores() { + expression: elementwise(bm25(myChunks), chunk, float) + } + + function chunk_scores() { + expression: merge(my_distance_scores, my_text_scores, f(a,b)(a+b)) + } + + function best_chunks() { + expression: top(3, chunk_scores) + } + + first-phase { + expression: sum(chunk_scores()) + } + + summary-features { + best_chunks + } + + } +} +``` + +With this, we can use the powerful ranking framework in Vespa to select the best chunks to provide to the LLM, without sending any chunks that won't be used over the network. diff --git a/mintlify-docs/en/ranking/bm25.mdx b/mintlify-docs/en/ranking/bm25.mdx new file mode 100644 index 0000000000..d78258e5f4 --- /dev/null +++ b/mintlify-docs/en/ranking/bm25.mdx @@ -0,0 +1,57 @@ +--- +title: "The BuM25 rank featre" +--- + +The [bm25 rank feature](/en/reference/ranking/rank-features#bm25) implements the [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) ranking function used to estimate the relevance of a text document given a search query. It is a pure text ranking feature which operates over an [indexed string field](/en/reference/schemas/schemas#indexing-index). The feature is cheap to compute, about 3-4 times faster than [nativeRank](/en/ranking/nativerank), while still providing a good rank score quality wise. It is a good candidate to use in a first phase ranking function when ranking text documents. + +## Ranking function + +The *bm25* feature calculates a score for how good a query with terms $q_{1} , . . . , q_{n}$ matches an indexed string field *t* in a document *D*. The score is calculated as follows: + +$$ +\sum_{i}^{n} I D F \left(\right. q_{i} \left.\right) \cdot \frac{f \left(\right. q_{i} , D \left.\right) \cdot \left(\right. k_{1} + 1 \left.\right)}{f \left(\right. q_{i} , D \left.\right) + k_{1} \cdot \left(\right. 1 - b + b \cdot \frac{f i e l d _ l e n}{a v g _ f i e l d _ l e n} \left.\right)} +$$ + +Where the components in the function are: + +- $I D F \left(\right. q_{i} \left.\right)$: The [inverse document frequency](https://en.wikipedia.org/wiki/Tf%E2%80%93idf#Inverse_document_frequency) (*IDF*) of query term *i* in field *t*. This is calculated as: + $$ + l o g \left(\right. 1 + \frac{N - n \left(\right. q_{i} \left.\right) + 0.5}{n \left(\right. q_{i} \left.\right) + 0.5} \left.\right) + $$ + *N* is the total number of documents on the content node. $n \left(\right. q_{i} \left.\right)$ is the number of documents containing query term *i* for field *t*, which is calculated per index existing for that field. The max value among the indexes is used in the calculation, which typically comes from the largest [disk index](/en/content/proton#index). + As the *IDF* is calculated per content node and index, slight variations might occur. To use the same *IDF* across all content nodes, set it as the *significance* on each query term using [annotations](/en/reference/querying/yql#annotations). +- $f \left(\right. q_{i} , D \left.\right)$: The number of occurrences (term frequency) of query term *i* in the field *t* of document *D*. For multi-value fields we use the sum of occurrences over all elements. +- f i e l d __ l e n : The field length (in number of words) of field *t* in document *D*. For multi-value fields we use the sum of field lengths over all elements. +- a v g _ f i e l d _ l e n: The average field length of field *t* among the documents on the content node. Can be configured using [rank-properties](/en/reference/ranking/rank-feature-configuration#bm25). +- $k_{1}$: A parameter used to limit how much a single query term can affect the score for document *D*. With a higher value the score for a single term can continue to go up relatively more when more occurrences for that term exists. Default value is 1.2. Can be configured using [rank-properties](/en/reference/ranking/rank-feature-configuration#bm25). +- $b$: A parameter used to control the effect of the field length of field *t* compared to the average field length. Default value is 0.75. Can be configured using [rank-properties](/en/reference/ranking/rank-feature-configuration#bm25). + +## Example + +In the following example we have an indexed string field *content*, and a rank profile using the *bm25* rank feature. Note that the field must be enabled for usage with the bm25 feature by setting the *enable-bm25* flag in the [index](/en/reference/schemas/schemas#index) section of the field definition. + +```js +schema example { + document example { + field content type string { + indexing: index | summary + index: enable-bm25 + } + } + rank-profile default { + first-phase { + expression { + bm25(content) + } + } + } +} +``` + +If the *enable-bm25* flag is turned on after documents are already fed then [proton](/en/content/proton) performs a [memory index flush](/en/content/proton#memory-index-flush) followed by a [disk index fusion](/en/content/proton#disk-index-fusion) to prepare the posting lists for use with *bm25*. + +Use the [custom component state API](/en/content/proton#custom-component-state-api) on each content node and examine `pending_urgent_flush` to determine if the preparation is still ongoing: + +```bash +/state/v1/custom/component/documentdb/mydoctype/subdb/ready/index +``` \ No newline at end of file diff --git a/mintlify-docs/en/ranking/cross-encoders.mdx b/mintlify-docs/en/ranking/cross-encoders.mdx new file mode 100644 index 0000000000..3cbda39e55 --- /dev/null +++ b/mintlify-docs/en/ranking/cross-encoders.mdx @@ -0,0 +1,286 @@ +--- +title: "Ranking With Transformer Cross-Encoder Models" +sidebarTitle: "Cross-encoder transformer ranking" +--- + +[Cross-Encoder Transformer](https://blog.vespa.ai/pretrained-transformer-language-models-for-search-part-4/) based text ranking models are generally more effective than [text embedding](/en/rag/embedding) models as they take both the query and the document as input with full cross-attention between all the query and document tokens. + +The downside of cross-encoder models is the computational complexity. This document is a guide on how to export cross-encoder Transformer based models from [huggingface](https://huggingface.co/), and how to configure them for use in Vespa. + + +## Exporting cross-encoder models + +For exporting models from HF to [ONNX](/en/ranking/onnx), we recommend the [Optimum](https://huggingface.co/docs/optimum/main/en/index) library. Example usage for two relevant ranking models. + +Export [intfloat/simlm-msmarco-reranker](https://huggingface.co/intfloat/simlm-msmarco-reranker), which is a BERT-based transformer model for English texts: + + +```bash +$ optimum-cli export onnx --task text-classification -m intfloat/simlm-msmarco-reranker ranker +``` + + +Export [BAAI/bge-reranker-base](https://huggingface.co/BAAI/bge-reranker-base), which is a ROBERTA-based transformer model for English and Chinese texts (multilingual): + + +```bash +$ optimum-cli export onnx --task text-classification -m BAAI/bge-reranker-base ranker +``` + + +These two example ranking models use different language model [tokenization](/en/reference/rag/embedding#huggingface-tokenizer-embedder) and also different transformer inputs. + +After the above Optimum export command you have two important files that is needed for importing the model to Vespa: + + +```bash +├── ranker +│   └── model.onnx + └── tokenizer.json +``` + + +The Optimum tool also supports various Transformer optimizations, including quantization to optimize the model for faster inference. + + +## Importing ONNX and tokenizer model files to Vespa + +Add the generated `model.onnx` and `tokenizer.json` files from the `ranker` directory created by Optimum to the Vespa [application package](/en/basics/applications): + + +```bash +├── models +│   └── model.onnx + └── tokenizer.json +├── schemas +│   └── doc.sd +└── services.xml +``` + + + +## Configure tokenizer embedder + +To speed up inference, Vespa avoids re-tokenizing the document tokens, so we need to configure the [huggingface-tokenizer-embedder](/en/reference/rag/embedding#huggingface-tokenizer-embedder) in the `services.xml` file: + + +```xml +<container id="default" version="1.0"> + .. + <component id="tokenizer" type="hugging-face-tokenizer"> + <model path="models/tokenizer.json"> + </component> + .. +</container> +``` + + +This allows us to use the tokenizer while indexing documents in Vespa and also at query time to map (embed) query text to language model tokens. + + +## Using tokenizer in schema + +Assuming we have two fields that we want to index and use for re-ranking (title, body), we can use the `embed` indexing expression to invoke the tokenizer configured above: + + +```js +schema my_document { + document my_document { + field title type string {..} + field body type string {..} + } + field tokens type tensor<float>(d0[512]) { + indexing: (input title || "") . " " . (input body || "") | embed tokenizer | attribute + } +} +``` + + +The above will concat the title and body input document fields, and input to the `hugging-face-tokenizer` tokenizer which saves the output tokens as float (101.0). To use the generated `tokens` tensor in ranking, the tensor field must be defined with [attribute](/en/content/attributes). + +## Using the cross-encoder model in ranking + +Cross-encoder models are not practical for *retrieval* over large document volumes due to their complexity, so we configure them using [phased ranking](/en/ranking/phased-ranking). + +### Bert-based model + +Bert-based models have three inputs: + +- input_ids +- token_type_ids +- attention_mask + + +The [onnx-model](/en/reference/schemas/schemas#onnx-model) configuration specifies the input names of the model and how to calculate them. It also specifies the file `models/model.onnx`. Notice also the [GPU](/en/operations/self-managed/vespa-gpu-container). GPU inference is not required, and Vespa will fall back to CPU if no GPU device is found. See the section on [performance](#performance). + + +```js expandable +rank-profile bert-ranker inherits default { + inputs { + query(q_tokens) tensor<float>(d0[32]) + } + onnx-model cross_encoder { + file: models/model.onnx + input input_ids: my_input_ids + input attention_mask: my_attention_mask + input token_type_ids: my_token_type_ids + gpu-device: 0 + } + function my_input_ids() { + expression: tokenInputIds(256, query(q_tokens), attribute(tokens)) + } + + function my_token_type_ids() { + expression: tokenTypeIds(256, query(q_tokens), attribute(tokens)) + } + + function my_attention_mask() { + expression: tokenAttentionMask(256, query(q_tokens), attribute(tokens)) + } + + first-phase { + expression: #depends on the retriever used + } + + # The output of this model is a tensor of size ["batch", 1] + global-phase { + rerank-count: 25 + expression: onnx(cross_encoder){d0:0,d1:0} + } +} +``` + + +The example above limits the sequence length to `256` using the built-in [convenience functions](/en/reference/ranking/rank-features#tokenInputIds(length, input_1, input_2, ...)) for generating token sequence input to Transformer models. Note that `tokenInputIds` uses 101 as start of sequence and 102 as padding. This is only compatible with BERT-based tokenizers. See section on [performance](#performance) about sequence length and impact on inference performance. + + +### Roberta-based model + +ROBERTA-based models only have two inputs (input_ids and attention_mask). In addition, the default tokenizer start of sequence token is 1 and end of sequence is 2. In this case we use the `customTokenInputIds` function in `my_input_ids` function. See [customTokenInputIds](/en/reference/ranking/rank-features#customTokenInputIds(start_sequence_id, sep_sequence_id, length, input_1, input_2, ...)). + + +```js expandable +rank-profile roberta-ranker inherits default { + inputs { + query(q_tokens) tensor<float>(d0[32]) + } + onnx-model cross_encoder { + file: models/model.onnx + input input_ids: my_input_ids + input attention_mask: my_attention_mask + gpu-device: 0 + } + function my_input_ids() { + expression: customTokenInputIds(1, 2, 256, query(q_tokens), attribute(tokens)) + } + + function my_attention_mask() { + expression: tokenAttentionMask(256, query(q_tokens), attribute(tokens)) + } + + first-phase { + expression: #depends on the retriever used + } + + # The output of this model is a tensor of size ["batch", 1] + global-phase { + rerank-count: 25 + expression: onnx(cross_encoder){d0:0,d1:0} + } +} +``` + +## Using the cross-encoder model at query time + +At query time, we need to tokenize the user query using the [embed](/en/rag/embedding#embedding-a-query-text) support. + +The `embed` of the query text, sets the `query(q_tokens)` tensor that we defined in the ranking profile. + + +```json +{ + "yql": "select title,body from doc where userQuery()", + "query": "semantic search", + "input.query(q_tokens)": "embed(tokenizer, \"semantic search\")", + "ranking": "bert-ranker", +} +``` + + +The retriever (query + first-phase ranking) can be anything, including [nearest neighbor search](/en/querying/nearest-neighbor-search) a.k.a. dense retrieval using bi-encoders. + + +## Performance + +There are three major scaling dimensions: + +- The number of hits that are re-ranked [rerank-count](/en/reference/schemas/schemas#globalphase-rerank-count) Complexity is linear with the number of hits that are re-ranked. +- The size of the transformer model used. +- The sequence input length. Transformer models scales quadratic with the input sequence length. + +For models larger than 30-40M parameters, we recommend using GPU to accelerate inference. Quantization of model weights can drastically improve serving efficiency on CPU. See [Optimum Quantization](https://huggingface.co/docs/optimum/onnxruntime/usage_guides/quantization) + + +## Examples + +The [MS Marco](https://github.com/vespa-engine/sample-apps/tree/master/msmarco-ranking) sample application demonstrates using cross-encoders. + +## Using cross-encoders with multi-vector indexing + +When using [multi-vector indexing](https://blog.vespa.ai/semantic-search-with-multi-vector-indexing/) we can do the following to feed the best (closest) paragraph using the [closest()](../reference/ranking/rank-features#closest(name)) feature into re-ranking with the cross-encoder model. + + +```js +schema my_document { + document my_document { + field paragraphs type array<string>string {..} + } + field tokens type tensor<float>(p{}, d0[512]) { + indexing: input paragraphs | embed tokenizer | attribute + } + field embedding type tensor<float>(p{}, x[768]) { + indexing: input paragraphs | embed embedder | attribute + } +} +``` + + +Notice that both tokens use the same mapped embedding dimension name `p`. + + +```js expandable +rank-profile max-paragraph-into-cross-encoder inherits default { + inputs { + query(tokens) tensor<float>(d0[32]) + query(q) tensor<float>(x[768]) + } + first-phase { + expression: closeness(field, embedding) + } + function best_input() { + expression: reduce(closest(embedding)*attribute(tokens), max, p) + } + function my_input_ids() { + expression: tokenInputIds(256, query(tokens), best_input) + } + function my_token_type_ids() { + expression: tokenTypeIds(256, query(tokens), best_input) + } + + function my_attention_mask() { + expression: tokenAttentionMask(256, query(tokens), best_input) + } + match-features: best_input my_input_ids my_token_type_ids my_attention_mask + global-phase { + rerank-count: 25 + expression: onnx(cross_encoder){d0:0,d1:0} #Slice + } +} +``` + + +The `best_input` uses a tensor join between the `closest(embedding)` tensor and the `tokens` tensor, which then returns the tokens of the best-matching (closest) paragraph. + +This tensor is used in the other Transformer-related functions (`tokenTypeIds tokenAttentionMask tokenInputIds`) as the document tokens. + + diff --git a/mintlify-docs/en/ranking/lightgbm.mdx b/mintlify-docs/en/ranking/lightgbm.mdx new file mode 100644 index 0000000000..475b08a79f --- /dev/null +++ b/mintlify-docs/en/ranking/lightgbm.mdx @@ -0,0 +1,197 @@ +--- +title: "Ranking with LightGBM Models" +sidebarTitle: "Using LightGBM models" +--- + +[LightGBM](https://github.com/microsoft/LightGBM) is a gradient boosting framework, similar to [XGBoost](/en/ranking/xgboost). Among other [advantages](https://github.com/microsoft/LightGBM/blob/master/docs/Experiments.rst#comparison-experiment), one defining feature of LightGBM over XGBoost is that it directly supports categorical features. If you have models that are trained with [LightGBM](https://github.com/microsoft/LightGBM), Vespa can import the models and use them directly. + +## Exporting models from LightGBM + +Vespa supports importing LightGBM's [`dump_model`](https://lightgbm.readthedocs.io/en/latest/pythonapi/lightgbm.Booster.html#lightgbm.Booster.dump_model). This dumps the tree model and other useful data such as feature names, objective functions, and values of categorical features to a JSON file. An example of training and saving a model suitable for use in Vespa is as follows. + +```python expandable +import json +import lightgbm as lgb +import numpy as np +import pandas as pd + +# Create random training set +features = pd.DataFrame({ + "feature_1": np.random.random(100), + "feature_2": np.random.random(100), + }) +targets = ((features["feature_1"] + features["feature_2"]) > 1.0) * 1.0 +training_set = lgb.Dataset(features, targets) + +# Train the model +params = { + 'objective': 'binary', + 'metric': 'binary_logloss', + 'num_leaves': 3, +} +model = lgb.train(params, training_set, num_boost_round=5) + +# Save the model +with open("lightgbm_model.json", "w") as f: + json.dump(model.dump_model(), f, indent=2) +``` + +While this particular model isn't doing anything really useful, the output file `lightgbm_model.json` can be imported directly into Vespa. + +See also a complete example of how to train a ranking function, using learning to rank with ranking losses, in this [notebook](https://github.com/vespa-engine/sample-apps/blob/master/commerce-product-ranking/notebooks/Train-lightgbm.ipynb). + +## Importing LightGBM models + +To import the LightGBM model into Vespa, add the model file to the application package under a directory named `models`, or a subdirectory under `models`. For instance, for the above model `lightgbm_model.json`, add it to the application package resulting in a directory structure like this: + +```bash +├── models +│ └── lightgbm_model.json +├── schemas +│ └── main.sd +└── services.xml +``` + +Note that an application package can have multiple models. After putting the model in the `models` directory, it is available for both ranking and [stateless model evaluation](/en/ranking/stateless-model-evaluation). + +## Ranking with LightGBM models + +Vespa has a [ranking feature](/en/reference/ranking/rank-features) called `lightgbm`. This ranking feature specifies the model to use in a ranking expression, relative under the `models` directory. Consider the following example: + +```js +schema test { + rank-profile classify inherits default { + first-phase { + expression: lightgbm("lightgbm_model.json") + } + } +} +``` + +Here, we specify that the model `lightgbm_model.json` (directly under the `models` directory) is applied to all documents matching a query which uses rank-profile `classify`. One important issue to consider is how to map features in the model to features that are available for Vespa to use in ranking. + +Take a look at the JSON file dumped from the example above: + +```json +{ + "name": "tree", + "version": "v3", + "num_class": 1, + "num_tree_per_iteration": 1, + "label_index": 0, + "max_feature_idx": 1, + "average_output": false, + "objective": "binary sigmoid:1", + "feature_names": [ + "feature_1", + "feature_2" + ], + "monotone_constraints": [], + "tree_info": [ + .... + ], + "pandas_categorical": [] +} +``` + +Here, the section `feature_names` consists of the feature names used in the training set. When this model is evaluated in Vespa, Vespa expects that these feature names are valid [rank features](/en/reference/ranking/rank-features). Examples are `attribute(field_name)` for a value that should be retrieved from a document, `query(name)` for a value that should be retrieved from the query, or possibly from other more complex rank features such as `fieldMatch(name)`. You can also define [functions](/en/ranking/ranking-expressions-features#function-snippets) (which are valid rank features) with the LightGBM feature name to perform the mapping. An example: + +```js +schema test { + document test { + field doc_attrib type double { + indexing: summary | attribute + } + } + rank-profile classify inherits default { + function feature_1() { + expression: attribute(doc_attrib) + } + function feature_2() { + expression: query(query_value) + } + first-phase { + expression: nativeRank + } + second-phase { + expression: lightgbm("lightgbm_model.json") + } + } +} +``` + +Here, when Vespa evaluates the model, it retrieves the value of `feature_1` from a document attribute called `doc_attrib`, and the value if `feature_2` from a query value passed along with the query. + +One can also use `attribute(doc_attrib)` directly as a feature name when training the LightGBM model. This allows dumping rank features from Vespa to train a model directly. + +Here, we specify that the model `lightgbm_model.json` is applied to the top ranking documents by the first-phase ranking expression. The query request must specify `classify` as the [ranking.profile](/en/reference/api/query#ranking.profile). See also [Phased ranking](/en/ranking/phased-ranking) on how to control number of data points/documents which is exposed to the model. + +Generally the run time complexity is determined by: + +- The number of documents evaluated [per thread](/en/performance/sizing-search) / number of nodes and the query filter +- The complexity of computing features. For example `fieldMatch` features are 100x more expensive that `nativeFieldMatch/nativeRank`. +- The number of trees and the maximum depth per tree + +Serving latency can be brought down by [using multiple threads per query request](/en/performance/practical-search-performance-guide#multithreaded-search-and-ranking). + +## Objective functions + +If you have used XGBoost with Vespa previously, you might have noticed you have to wrap the `xgboost` feature in for instance a `sigmoid` function if using a binary classifier. That should not be needed in LightGBM, as that information is passed along in the model dump as seen in the `objective` section in the JSON output above. + +Currently, Vespa supports importing models trained with the following objectives: + +- `binary` +- `regression` +- `lambdarank` +- `rank_xendcg` +- `rank_xendcg` + +For more information on LightGBM and objective functions, see [`objective`](https://lightgbm.readthedocs.io/en/latest/Parameters.html#objective). + +## Using categorical features + +LightGBM has the option of directly training on categorical features. Example: + +```js +features = pd.DataFrame({ + "numerical": np.random.random(5), + "categorical": pd.Series(np.random.permutation(["a", "b", "c", "d", "e"])), dtype="category"), + }) +``` + +Here, the `categorical` feature is marked with the Pandas dtype `category`. This tells LightGBM to send the categorical values in the `pandas_categorical` section in the JSON example above. This allows Vespa to extract the proper categorical values to use. This is important, because other methods of using categorical variables will result in the category values being "1", "2", … "n", and sending in "a" in this case for model evaluation will probably result in an erroneous result. To ensure that categorical variables are properly handled, construct training data based on Pandas tables and use the `category` dtype on categorical columns. + +In Vespa categorical features are strings, so mapping the above feature for instance to a document field would be: + +```js +schema test { + document test { + field numeric_attrib type double { + indexing: summary | attribute + } + field string_attrib type string { + indexing: summary | attribute + } + } + rank-profile classify inherits default { + function numerical() { + expression: attribute(numeric_attrib) + } + function categorical() { + expression: attribute(string_attrib) + } + first-phase { + expression: lightgbm("lightgbm_model.json") + } + } +} +``` + +Here, the string value of the document would be used as the feature value when evaluating this model for every document. + +## Debugging Vespa inference score versus LightGBM predict score + +- When dumping LightGBM models to a JSON representation some of the model information is lost (e.g. the `base_score` or the optimal number of trees if trained with early stopping). +- For training, features should be scraped from Vespa, using either `match-features` or `summary-features` so that features from offline training matches the online Vespa computed features. Dumping features can also help debug any differences by zooming into specific query,document pairs using [recall](/en/reference/api/query#recall) parameter. +- It's also important to use the highest possible precision when reading Vespa features for training as Vespa outputs features using `double` precision. If the training routine rounds features to `float` or other more compact floating number representations, feature split decisions might differ in Vespa versus XGboost. +- In a distributed setting when multiple nodes uses the model, text matching features such as `nativeRank`, `nativFieldMatch`, `bm25` and `fieldMatch` might differ, depending on which node produced the hit. The reason is that all these features use [term(n).significance](/en/reference/ranking/rank-features#query-features), which is computed locally indexed corpus. The `term(n).significance` feature is related to *Inverse Document Frequency (IDF)*. The `term(n).significance` should be set by a searcher in the container for global correctness as each node will estimate the significance values from the local corpus. \ No newline at end of file diff --git a/mintlify-docs/en/ranking/multivalue-query-operators.mdx b/mintlify-docs/en/ranking/multivalue-query-operators.mdx new file mode 100644 index 0000000000..60e4642a5c --- /dev/null +++ b/mintlify-docs/en/ranking/multivalue-query-operators.mdx @@ -0,0 +1,238 @@ +--- +title: "Multivalue Query Operators" +--- + +This article is a followup to the [ranking introduction](/en/ranking/ranking-intro). Some use cases in this guide are better solved using [tensors](/en/ranking/tensor-user-guide). + +## dotProduct and wand + +_wand_ (aka Parallel Wand) is a search operator that can be used for efficient top-k retrieval. It implements the _Weak AND/Weighted AND_ algorithm as described by Broder et al. in [Efficient query evaluation using a two-level retrieval process](https://dl.acm.org/doi/10.1145/956863.956944). See [using Wand with Vespa](/en/ranking/wand) for details. + +_dotProduct_ is the brute force equivalent to _wand_. They are both used to search for documents where weighted tokens in a field matches a subset of weighted tokens in the query. The [raw scores](#raw-scores-and-query-item-labeling) produced by _dotProduct_ are equivalent to those produced by _wand_. + +The difference is that _wand_ will perform local optimizations in order to retrieve the top-k targetHits results that would be returned by inner maximum _dotproduct_. Which one of these are most cost-efficient is complex as it depends on the size of the vocabulary (features) and: + +- Number of query terms and their weight distribution +- Number of document terms and their weight distribution + +It is easy to compare the two approaches. One can run benchmarks using either and compare latency and total number of hits, if on average, total number of hits approaches the total number of documents matching the other filters in the query, it is cheaper to use tensor dot product. + +## dotProduct example + +Refer to the [dotProduct](/en/reference/querying/yql#dotproduct) reference. _dotProduct_ calculates the dot product of a weighted set in the query and a weighted set in a field - and stores the result in [raw scores](#raw-scores-and-query-item-labeling), which is used in ranking expressions. + +Use a weighted set field (use [attribute](/en/content/attributes) with _fast-search_ for higher performance) in the document to store the tokens: + +```bash +field features type weightedset<string> { + indexing: summary | attribute + attribute: fast-search +} +``` + +The query needs to be prepared by a custom searcher or sent using [YQL](/en/reference/querying/yql#dotproduct). The code below shows the relevant part. If using multiple dot products in the same query it is a good idea to label them. This enables us to use individual dot product scores when ranking results later. + +```java +Item makeDotProduct(String label, String field, Map<String, Integer> token_map) { + DotProductItem item = new DotProductItem(field); + item.setLabel(label); + for (Map.Entry<String, Integer> entry : token_map.entrySet()) { + item.addToken(entry.getKey(), entry.getValue()); + } + return item; +} +``` + +_dotProduct_ produces [raw scores](#raw-scores-and-query-item-labeling) that can be used in a ranking expression. The simplest approach is to use the sum of all raw scores for the field containing the tokens: + +```bash +rank-profile default { + first-phase { + expression: rawScore(features) + } +} +``` + +For better control, label each dot product in the query and use their scores individually: + +```bash +rank-profile default { + first-phase { + expression: itemRawScore(dp1) + itemRawScore(dp2) + } +} +``` + +## IN operator example + +Refer to the [in operator](/en/reference/querying/yql#in) reference. The use cases for the _in_ operator are for limiting the search result to documents with specific properties that can have a large number of distinct values, like: + +- We know who the user is, and want to restrict to documents written by one of the user's friends +- We have the topic area the user is interested in, and want to restrict to the top-ranked authors for this topic +- We have recorded the documents that have been clicked by users in the last 10 minutes, and want to search only in these + +Using the _in_ operator is more performant than a big OR expression: + +```bash +select * from data where category in ('cat1', 'cat2', 'cat3') +select * from data where category = 'cat1' OR category = 'cat2' OR category = 'cat3' ... +``` + +See [multi-lookup set filtering](/en/performance/feature-tuning#multi-lookup-set-filtering) for details. + +Note that in most actual use cases, the field we are searching in is some sort of user ID, topic ID, group ID, or document ID and can often be modeled as a number - usually in a field of type `long` (or `array<long>` if multiple values are needed). If a string field is used, it will usually also be some sort of ID; if you have data in a string field intended for searching with the _in_ operator, then using `match: word` (default for attribute string fields) is recommended. + +The following example shows how to use the _in_ operator programmatically in a [Java Searcher](/en/applications/searchers) to search a category field: + +```js +field category type string { + indexing: attribute | summary + attribute: fast-search + rank: filter +} +``` + +The Searcher will typically do the following: + +- Create a new `StringInItem` (or `NumericInIterm`) for the field you want to use as filter. +- Find the tokens to insert into the query item. +- Combine the new `StringInItem` with the original query by using an `AndItem`. + +A simple code example adding a hardcoded filter containing 10 tokens: + +```java +private Result hardCoded(Query query, Execution execution) { + var filter = new StringInItem("category"); + filter.addToken("magazine1"); + filter.addToken("magazine2"); + filter.addToken("magazine3"); + filter.addToken("tv"); + filter.addToken("tabloid1"); + filter.addToken("tabloid2"); + filter.addToken("tabloid3"); + filter.addToken("tabloid4"); + filter.addToken("tabloid5"); + filter.addToken("tabloid6"); + var tree = query.getModel().getQueryTree(); + var oldRoot = tree.getRoot(); + var newRoot = new AndItem(); + newRoot.addItem(oldRoot); + newRoot.addItem(filter); + tree.setRoot(newRoot); + query.trace("MyCustomFilterSearcher added hardcoded filter: ", true, 2); + return execution.search(query); +} +``` + +The biggest challenge here is finding the tokens to insert; normally the incoming search request URL might not contain all the tokens directly. For example, the search request could contain the user id, and a lookup (in a database or a Vespa index) would fetch the relevant categories for this user. + +Refer to javadoc for more details: [NumericInItem](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/NumericInItem.html) and [StringInItem](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/StringInItem.html). + +## Pin results example + +To pin a result to a fixed position, use a tiered ranking function. This means, define a range in ranking scores for the pinned queries, and rank the organic results below. (It is _possible_ to mix organic/pinned, to keep the example below simple, these are separate ranges) + +First, define the key for the query - the key is the ID used to pin results. How to define the key is user defined, a simple way is to hash the query string, like: + +```bash +$ md5 -q -s "65 inch tv set" +``` + +As a document can be pinned for multiple queries, use a [weightedset](/en/reference/schemas/schemas#weightedset) to store multiple key/position entries: + +```js +field pinned_query_position type weightedset<string> { + indexing: index | summary +} +``` + +In this set, we will map from key to a value - the value (_elementWeight_) will be used in a ranking profile for position. In this example, the "organic" ranking is a closeness function: + +```js expandable +rank-profile semantic_pin inherits default { + inputs { + query(query_embedding) tensor<float>(x[384]) + } + + function pin_topten(fieldname) { + expression { + if (elementCompleteness(fieldname).elementWeight > 0, + 11-elementCompleteness(fieldname).elementWeight, 0) + } + } + + first-phase { + expression { + # restrict the "organic" ranking contribution to [0-1>, add top ten contributions + min(closeness(field, doc_embedding), 0.99999) + + pin_topten(pinned_query_position) + } + } + + match-features { + # Use match-features to easily debug ranking + closeness(field, doc_embedding) + elementCompleteness(pinned_query_position).elementWeight + } +} +``` + +In short, we cap the organic range to 0-1, and add more to the ranking the higher the pinned position (11-pos). Add an OR-term that adds the rank contribution from matching in the weighted set `pinned_query_position`: + +```bash +$ vespa query 'select * from items where true + or rank (pinned_query_position contains "af6f44472a1d3b00d40d04309067b739")' \ + ranking=semantic_pin +``` + +Above, a `true` statement is used for simplicity, replace this with the real query. + +A sample document, with query keys and positions - here, a document is pinned for two queries: + +```json +[ + { + "put": "id:mynamespace:items::1", + "fields": { + "pinned_query_position": { + "af6f44472a1d3b00d40d04309067b739": 1, + "fe5e6fc6358aa1c59b8838852040bfb4": 2 + } + } + } +] +``` + +Snippet from a sample query result: + +```json +{ + "id": "id:mynamespace:items::1", + "relevance": 10.0, + "source": "items", + "fields": { + "matchfeatures": { + "closeness(field,doc_embedding)": 0.0, + "elementCompleteness(pinned_query_position).elementWeight": 1.0 + }, + "sddocname": "items", + "documentid": "id:mynamespace:items::1", + "pinned_query_position": { + "fe5e6fc6358aa1c59b8838852040bfb4": 2, + "af6f44472a1d3b00d40d04309067b739": 1 + } + } +} +``` + +Observe how a pinned result for position 1 gets a 10 relevance (rank) score. + +## Raw scores and query item labeling + +Vespa ranking is flexible and relatively decoupled from document matching. The output from the matching pipeline typically indicates how the different words in the query matches a specific document and lets the ranking framework figure out how this translates to match quality. + +However, some of the more complex match operators will produce scores directly, rather than expose underlying match information. A good example is the _wand_ operator. During ranking, a wand will look like a single word that has no detailed match information, but rather a numeric score attached to it. This is called a _raw score_, and can be included in ranking expressions using the `rawScore` feature. + +The `rawScore` feature takes a field name as parameter and gives the sum of all raw scores produced by the query for that field. If more fine-grained control is needed (the query contains multiple operators producing raw scores for the same field, but we want to handle those scores separately in the ranking expression), the `itemRawScore` feature may be used. This feature takes a query item _label_ as parameter and gives the raw score produced by that item only. + +Query item labeling is a generic mechanism that can be used to attach symbolic names to query items. A query item is labeled by using the `setLabel` method on a query item in the search container query API. \ No newline at end of file diff --git a/mintlify-docs/en/ranking/nativerank.mdx b/mintlify-docs/en/ranking/nativerank.mdx new file mode 100644 index 0000000000..86b140d7e5 --- /dev/null +++ b/mintlify-docs/en/ranking/nativerank.mdx @@ -0,0 +1,125 @@ +--- +title: "The nativeRank rank feature" +--- + +The *nativeRank* text match score is a reasonably good text feature score which is computed at an acceptable performance by Vespa. It computes a normalized rank score which tries to capture how well query terms matched the set of searched index fields. + +The *nativeRank* feature is computed as a linear combination of three other matching features: *nativeFieldMatch*, *nativeProximity* and *nativeAttributeMatch*, see [the nativeRank reference for details](/en/reference/ranking/nativerank). + +Ranking signals that might be useful, like freshness (the age of the document compared to the time of the query) or any other document or query features, are not a part of the *nativeRank* calculation. These need to be added to the final ranking function depending on application specifics. + +### Weight, significance and connectedness + +Modify the values of the match features from the query by sending *weight*, *significance* and *connectedness* with the query: + +| Feature input | Description | +| --- | --- | +| Weight | Set query term [weight](/en/reference/querying/yql#weight). Example: `... where (title contains ({weight:200}"heads") AND title contains "tails")` specifies that `heads` is twice as important for the final rank score than `tails` (the default weight is 100). <br/><br/> The term weight is used in several text scoring features, including [fieldMatch(*name*).weight](/en/reference/ranking/rank-features#fieldMatch(name).weight) and [nativeRank](/en/ranking/nativerank). Note that the term weight is not applicable for all text scoring features, for example [bm25](/en/ranking/bm25) does not use the term weight.<br/><br/> Configure static field weights in the [schema](/en/reference/schemas/schemas#weight). | +| Significance | Significance is an indication of how rare a term is in the corpus of the language, used by a number of text matching [rank features](/en/reference/ranking/rank-features). This can be set explicitly for each term in [the query](/en/reference/querying/yql#significance), or by calling item.setSignificance() in a [Searcher](/en/applications/searchers).<br/><br/> With *indexed search*, default significance values are calculated automatically during indexing. However, unless the indexed corpus is representative of the word frequencies in the user's language, relevance can be improved by passing significances derived from a representative corpus. Relative significance is accessible in ranking through the [fieldMatch(*name*).significance](/en/reference/ranking/rank-features#fieldMatch(name).significance) feature. Weight and significance are also averaged into [fieldMatch(*name*).importance](/en/reference/ranking/rank-features#fieldMatch(name).importance) for convenience.<br/><br/> *Streaming search* does not compute term significance, queries should pass this with the query terms. [Read more](/en/performance/streaming-search#differences-in-streaming-search). | +| Connectedness | Signify the degree of connection between adjacent terms in the query - set query term [connectivity](/en/reference/querying/yql#connectivity) to another term. <br/><br/> For example, the query `new york newspaper` should have a higher connectedness between the terms "new" and "york" than between "york" and "newspaper" to rank documents higher if they contain "new york" as a phrase. <br/><br/> Term connectedness is taken into account by [fieldMatch(*name*).proximity](/en/reference/ranking/rank-features#fieldMatch(name).proximity), which is also an important contribution to [fieldMatch(*name*)](/en/reference/ranking/rank-features#fieldMatch(name)). Connectedness is a normalized value which is 0.1 by default. It must be set by a custom Searcher, looking up connectivity information from somewhere - there is no query syntax for it. | + +## Using nativeRank + +In this section we describe a blog search application that uses *nativeRank* as the core text matching rank feature, in combination with other signals that could be important for a blog search type of application: + +```js +schema blog { + document blog { + field title type string { + indexing: summary | index + } + field body type string { + indexing: summary | index + } + #The quality of the source in the range 0 - 1.0 + field sourcequality type float { + indexing: summary | attribute + } + #seconds since epoch + field timestamp type long { + indexing: summary | attribute + } + field url type uri { + indexing: summary + } + } + fieldset default { + fields: title, body + } +} +``` + +In addition to the core text match feature (*nativeRank*), we have a pre-calculated document feature which indicates the quality of the document represented by the field *sourcequality* of type float. The *sourcequality* field has the [attribute](/en/reference/schemas/schemas#attribute) property which is required to refer that field in a ranking expression: *attribute(name)*. The sourcequality score could be calculated from a web map, or any other source and is outside the scope of this document. + +We also know when the documented was published (timestamp) and this document attribute can be used to calculate the age of the document. To summarize, we have three main rank signals that we would like our blog ranking function to consist of: + +- How well the query match the document text, where we use the *nativeRank* feature score. +- How fresh the document is, where we use the built-in *age(name)* feature to built our own feature score. +- The quality of the document, calculated outside of Vespa and referenced in a ranking expression by *attribute(name)*. + +## Designing our own blog freshness ranking function + +Vespa has several [built in rank-features](/en/reference/ranking/rank-features) that we can use directly, or we can design our own as well if the built-in features doesn't meet our requirements. The built in *freshness(name)* rank-feature is linearly decreasing from 0 age (now) to the configured max age. Ideally we would like to have a different shape for our blog application, we define the following feature which has the characteristic we want: + +```js +function freshness() { + expression: exp(-1 * age(timestamp)/(3600*12)) +} +``` + + +Timestamp resolution is seconds, so we divide by 3600 to go to an hour resolution, and further we divide with 12 to control the slope of the freshness function. Below is a plot of two freshness functions with different slope numbers for comparison: + +<Frame> +![Blog freshness ranking plot: freshness score](/assets/img/relevance/blog-freshness.png) +</Frame> + +The beauty is that we can control and experiment with the freshness rank score given the document age. We can define any shape over any resolution that we think will fit the exact application requirements. In our case we would like to have a non-linear relationship between the age of the document and the freshness score. We achieve this with an exponential decreasing function (exp(-x)), where the sensitivity of x is higher when the document is really fresh compared to an old blog post (24 hours). + +## Putting our features together into a ranking expression + +We now need to put our three main ranking signals together into one ranking expression. We would like to control the weight of each component at query time, so we can at query time do analysis to figure out if a certain signal should be weighted more than others. We chose to combine our three signals into a normalized weighted sum of the three signals. The shape of each of the three signals might be tuned individually as we have seen with design of our own freshness feature and *nativeRank* tuning. Below is the final blog rank-profile with all relevant settings (properties) and ranking expressions: + +```js expandable +rank-profile blog inherits default { + weight title: 200 + weight body: 100 + rank-type body: about + rank-properties { + nativeFieldMatch.occurrenceCountTable.title: "linear(0,8000)" + } + + # our freshness rank feature + function freshness() { + expression: exp(-1 * age(timestamp)/(3600*12)) + } + + # our quality rank feature + function quality() { + expression: attribute(sourcequality) + } + + # normalization factor for the weighted sum + function normalization() { + expression: query(textMatchWeight) + query(qualityWeight) + query(deservesFreshness) + } + + # ranking function that runs over all matched documents, determined by the boolean query logic + first-phase { + expression: (query(textMatchWeight) * (nativeRank(title,body) + query(qualityWeight) * quality + query(deservesFreshness) * freshness))/normalization + } + + summary-features: nativeRank(title,body) age(timestamp) freshness quality + } +} +``` + +We can override the weight of each signal at query time with the [query api](/en/reference/api/query), passing down the weights: + +```bash +/search/?query=vespa+ranking&datetime=now&ranking.profile=blog&input.query(textMatchWeight)=0.1&input.query(deservesFreshness)=0.85 +``` + +It is also possible to override the user-defined rank-features in a custom searcher plugin, note that we also use the *datetime* parameter to be able to calculate the age of the document. + +The [summary-features](/en/reference/schemas/schemas#summary-features) allows us to have access to the individual ranking signals along with the hit's summary fields. \ No newline at end of file diff --git a/mintlify-docs/en/ranking/onnx.mdx b/mintlify-docs/en/ranking/onnx.mdx new file mode 100644 index 0000000000..f5f354b8fa --- /dev/null +++ b/mintlify-docs/en/ranking/onnx.mdx @@ -0,0 +1,320 @@ +--- +title: "Ranking With ONNX Models" +sidebarTitle: "Using ONNX Models" +--- + +Vespa supports advanced ranking models through its tensor API. +If your model is in the [ONNX format](https://onnx.ai/), Vespa can import and use the model directly. +You can use ONNX models with Vespa [embedder](/en/rag/embedding) functionality or in [ranking](/en/ranking/ranking-intro). + + + +## Importing ONNX model files + +Add the file containing the ONNX models somewhere under the application package. +For instance, if your model file is `my_model.onnx`, +you could add it to the application package under a `files` directory, something like: + +```bash +├── files +│   └── my_model.onnx +├── schemas +│   └── main.sd +└── services.xml +``` + +An application package can have multiple onnx models. To download models during deployment, +see [deploying remote models](/en/applications/deployment#deploying-remote-models). + + + +## Ranking with ONNX models + +To make the above model available for ranking, you define the model in the schema, +and then you can refer to the model using the `onnx` (or `onnxModel`) ranking feature: + +```js +schema my_schema { + + document my_document { + field my_field type tensor(d0[1],d1[10]) { + indexing: attribute | summary + } + } + + rank-profile my_rank_profile { + + inputs { + query(myTensor) tensor(d0[1],d1[784]) + } + + onnx-model my_onnx_model { + file: files/my_model.onnx + input "model_input_0": attribute(my_field) + input "model_input_1": my_function + output "model_output_0": output_name + } + + function my_function() { + expression: tensor<float>(d0[1],d1[10])(d1) + } + + first-phase { + expression: sum( onnx(my_onnx_model).output_name ) + } + + } + +} +``` + +This defines the model called `my_onnx_model`. It is evaluated using the +`onnx` [rank feature](/en/reference/ranking/rank-features). +This rank feature specifies which model to evaluate in the ranking expression +and, optionally, which output to use from the model. + +The `onnx-model` section defines three things: + +1. The model's location under the applications package +2. The inputs to use for evaluation and where they should come from +3. The outputs to use for evaluation + +In the example above, the model should be found in `files/my_model.onnx`. This +model has two inputs. For inputs, the first name specifies the input as +named in the ONNX model file. The source is where the input should +come from. This can be either: + +- A document field: `attribute(field_name)` +- A query parameter: `query(query_param)` +- A constant: `constant(name)` +- A user-defined function: `function_name` + +For outputs, the output name is the name used in Vespa to specify the output. +If this is omitted, the first output in the ONNX file will be used. + +The output of a model is a tensor, however the rank score should result +in a single scalar value. In the example above we use `sum` to sum all the elements +of the tensor to a single value. You can also slice out parts of +the result using Vespa's [tensor API](/en/reference/ranking/ranking-expressions#tensor-functions). +For instance, if the output of the example above is a tensor with the two dimensions `d0` and `d1`, +and you want to extract the first value, this can be expressed by: + +```js +onnx(my_onnx_model).output_name{d0:0,d1:0} +``` + +Note that inputs to the ONNX model must be tensors; scalars are not supported. +The input tensors must have dimension names starting with `"d0"` for the first +dimension, and increasing for each dimension (i.e. `"d1"`, `"d2"`, etc.). The +result of the evaluation will likewise be a tensor with names `"d0"`, `"d1"`, etc. + +The types of document and input tensors are specified in the schema, as shown above. +You can pass tensors in HTTP requests by using the HTTP parameter +"input.query(myTensor)" (assuming the ranking expression contains "query(myTensor)"). + +A tensor example can be found in the +[sample application](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation). https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation + + +#### Batch dimensions + +When training your model you will typically have an input that contains a dimension for batches, for instance an input with sizes `[-1, 784]`. Here, -1 typically denotes the batch dimension. During ONNX inference in ranking, Vespa uses batch size 1. + + +## Limitations on model size and complexity + +Note that in the above rank profile example, the `onnx` model evaluation +was evaluated in the first phase. In general, evaluating these types of models are +more suitable in the `second-phase` or `global-phase` phases. +See [phased ranking](/en/ranking/phased-ranking). + +Models in which data tensors are located in external data files, are **only** supported when the following conditions are met: +- The model is used in an _embedder_, e.g `hugging-face-embedder` (see [embedding](/en/rag/embedding)). +- The model is referenced using a URL, e.g. `url="https://example.com/my-onnx-model/model.onnx"`. +- All external data files are located in the same parent path/directory as the model file, + e.g. `https://example.com/my-onnx-model/model.onnx_data`. + +For _ranking_, Vespa supports only ONNX models that are self-contained (has no external data files) and below 2GB in size. +The same restriction applies to models included in the application package. + + + +## Examples + +The [MS Marco](https://github.com/vespa-engine/sample-apps/tree/master/msmarco-ranking) +sample application uses a cross-encoder model to re-rank documents. +The [model-exporting](https://github.com/vespa-engine/sample-apps/tree/master/examples/model-exporting) example +uses onnx models for embedding inference. +[custom-embeddings](https://github.com/vespa-engine/sample-apps/tree/master/custom-embeddings) +has an example of a PyTorch model that is exported to onnx format for use in re-ranking. + + +### Using Optimum to export models to ONNX format + +We can highly recommend using the [Optimum](https://huggingface.co/docs/optimum/index) library +for exporting models hosted on Huggingface model hub. + +For example, to export [BAAI/bge-small-en](https://huggingface.co/BAAI/bge-small-en) from the model hub to onnx format: + +```bash +$ python3 -m pip install optimum onnx onnxruntime +$ optimum-cli export onnx --library transformers --task feature-extraction -m BAAI/bge-small-en --optimize O3 model-output-dir +``` + +The exported files in `model-output-dir`: `model.onnx` and `tokenizer.json` imported directly +into the Vespa [huggingface-embedder](/en/rag/embedding#huggingface-embedder). + +Refer to [debugging onnx](#debugging-onnx-models). + +In many cases, there are also onnx checkpoints available, +for example [mixedbread-ai/mxbai-embed-large-v1](https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1/tree/main), +these models can then be downloaded and used in Vespa. + +```xml +<container id="default" version="1.0"> + <component id="mixedbread" type="hugging-face-embedder"> + <transformer-model url="https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1/resolve/main/onnx/model_quantized.onnx"/> + <tokenizer-model url="https://huggingface.co/mixedbread-ai/mxbai-embed-large-v1/raw/main/tokenizer.json"/> + <pooling-strategy>cls</pooling-strategy> + </component> + ... +</container> +``` + +Vespa defaults to `mean` pooling-strategy. +Consult the model card for the pooling method used. +Note the url pattern above. +The url must point to the actual file, not the model card. + +See #[Limitations on Model Size and Complexity](#limitations-on-model-size-and-complexity) for how to use models +having external data files. + +See [cross-encoders](/en/ranking/cross-encoders#exporting-cross-encoder-models) documentation for examples on how to +export cross-encoder re-rankers using the Optimum library. + + +### Using Auto Classes to export HF models to ONNX format + +Transformer-based models have named inputs and outputs that must be compatible +with the input and output names used by the [embedder](/en/rag/embedding). + +The [model-exporting](https://github.com/vespa-engine/sample-apps/tree/master/examples/model-exporting) +example includes two scripts to export models and vocabulary files using the default expected input and output names +for embedders using ONNX models. +The input and output names to the embedder are tunable via the `transformer-`parameters in the +[config of the embedder in question](/en/reference/rag/embedding). + + +### Debugging ONNX models + +When loading [ONNX](https://onnx.ai/) models for Vespa native [embedders](/en/rag/embedding), +the model must have correct inputs and output parameters. +Vespa offers tools to inspect ONNX model files. +Here, _minilm-l6-v2.onnx_ is in the current working directory: + +```bash +$ docker run -v `pwd`:/w \ + --entrypoint /opt/vespa/bin/vespa-analyze-onnx-model \ + vespaengine/vespa \ + /w/minilm-l6-v2.onnx + +... +model meta-data: + input[0]: 'input_ids' long[batch][sequence] + input[1]: 'attention_mask' long[batch][sequence] + input[2]: 'token_type_ids' long[batch][sequence] + output[0]: 'output_0' float[batch][sequence][384] + output[1]: 'output_1' float[batch][384] +... +``` + +The above model input and output names conform with the +default [bert-embedder parameters](/en/reference/rag/embedding#bert-embedder-reference-config). + +Similarly, a model for the [hugging-face-embedder parameters](/en/reference/rag/embedding#huggingface-embedder-reference-config) defaults: +```bash +model meta-data: + input[0]: 'input_ids' long[batch][sequence] + input[1]: 'attention_mask' long[batch][sequence] + input[2]: 'token_type_ids' long[batch][sequence] + output[0]: 'last_hidden_state' float[batch][sequence][384] +``` + +The shape of the output should be: +```bash +float[batch][sequence][vector-embedding-dimensionality] +``` +Vespa embedders implement the pooling strategy over the output vectors (one per sequence length). + +If loading models without the expected input and output parameter names, the container service will not start +(check _vespa.log_ in the container running Vespa): + +```bash + WARNING container Container.com.yahoo.container.di.Container + Caused by: java.lang.IllegalArgumentException: Model does not contain required input: 'input_ids'. Model contains: input +``` + +When this happens, a deploy looks like: +```bash +$ vespa deploy --wait 300 +Uploading application package ... done + +Success: Deployed . + +Waiting up to 5m0s for query service to become available ... +Error: service 'query' is unavailable: services have not converged +``` + +Embedders supports changing the input and output names, +consult [embedding reference](/en/reference/rag/embedding) documentation. + + +### Using vespa-analyze-onnx-model + +[vespa-analyze-onnx-model](/en/reference/operations/tools#vespa-analyze-onnx-model) +is useful to find model inputs and outputs - +example run on a config server where an application package with a model is deployed to: + +```bash +$ docker exec vespa vespa-analyze-onnx-model \ + /opt/vespa/var/db/vespa/config_server/serverdb/tenants/default/sessions/1/files/Network.onnx + +unspecified option[0](optimize model), fallback: true +vm_size: 230228 kB, vm_rss: 44996 kB (before loading model) +vm_size: 233792 kB, vm_rss: 54848 kB (after loading model) +model meta-data: +input[0]: 'input' float[input][4] +output[0]: 'output' float[output][3] +unspecified option[1](symbolic size 'input'), fallback: 1 +test setup: +input[0]: tensor<float>(d0[1],d1[4]) -> float[1][4] +output[0]: float[1][3] -> tensor<float>(d0[1],d1[3]) +unspecified option[2](max concurrent evaluations), fallback: 1 +vm_size: 233792 kB, vm_rss: 54848 kB (no evaluations yet) +vm_size: 233792 kB, vm_rss: 54848 kB (concurrent evaluations: 1) +estimated model evaluation time: 0.00227701 ms +``` + +The corresponding input/output tensors should be defined as: +```js +document doc { + ... + field flowercategory type tensor<float>(d0[1],d1[3]) { + indexing: attribute | summary + } +} + +rank-profile myRank { + inputs { + query(myTensor) tensor<float>(d0[1],d1[4]) + } + onnx-model my_onnx_model { + file: files/Network.onnx + input "input" : query(myTensor) + output "output": outputTensor + } + first-phase { + expression: sum( onnx(my_onnx_model).outputTensor * attribute(flowercategory) ) + } +} +``` diff --git a/mintlify-docs/en/ranking/phased-ranking.mdx b/mintlify-docs/en/ranking/phased-ranking.mdx new file mode 100644 index 0000000000..cee7d63842 --- /dev/null +++ b/mintlify-docs/en/ranking/phased-ranking.mdx @@ -0,0 +1,341 @@ +--- +title: "Phased Ranking" +--- + +Vespa allows expressing multiphased retrieval and ranking of documents. The retrieval phase is done close to the data in the content nodes, while the ranking phase(s) can be done in the content nodes or in the stateless container after scatter-and-gather from content node(s). The retrieval part of a phased pipeline is expressed by using the [query API](/en/querying/query-api) and the ranking part is expressed by using the [rank-profile](/en/reference/schemas/schemas#rank-profile) in the schema. + +- **Retrieval:**[_Top-k_ query operators](#top-k-query-operators), like [weakAnd/wand](/en/ranking/wand) and [nearest neighbor search](/en/querying/nearest-neighbor-search) allow retrieval with sublinear complexity. These query operators use simple scoring functions that are computationally cheap to evaluate over Vespa indexes. Using the expressiveness of the Vespa query language, developers can combine multiple retrievers in the query, and expose the union of retrieved documents into Vespa ranking phases. +- **Per node ranking:** The query specification retrieves documents and ranks them using declarative phases evaluated within the [content nodes](#two-phase-ranking-on-content-nodes): + - [first-phase expression](#first-phase-ranking-on-content-nodes); configured in [rank-profile](/en/reference/schemas/schemas#rank-profile). This phase is evaluated for _all_ hits retrieved by the query logic. This phase can also remove retrieved documents using [rank-score-drop-limit](/en/reference/schemas/schemas#rank-score-drop-limit). + - [second-phase ranking](#two-phase-ranking-on-content-nodes); configured in [rank-profile](/en/reference/schemas/schemas#rank-profile). Optionally re-rank the top-scoring hits from the first-phase ranking using a more complex expression. The [total-rerank-count](/en/reference/schemas/schemas#secondphase-total-rerank-count) sets a strict upper bound on the number of documents that are re-ranked in total over the nodes. + +- **Global ranking:** Following the per content node local ranking phases, re-ranking can occur after the content nodes have returned the merged top-scoring hits to the stateless container. This phase is specified using a [global-phase](#using-a-global-phase-expression) expression in the [rank-profile](/en/reference/schemas/schemas#rank-profile). Additionally, the global-phase can conduct [cross-hit normalization](#cross-hit-normalization-including-reciprocal-rank-fusion) to combine unrelated scoring methods. +- Finally, for customized ranking that is difficult to express in declarative phases, one can implement re-ranking using [reranking in searcher](/en/ranking/reranking-in-searcher). + +<Frame> + ![Ranking in 3 phases](/assets/img/phased-ranking.png) +</Frame> + +## First-phase ranking on content nodes + +Normally, you will always start by having one ranking expression that is evaluated on the content nodes. This is configured in the `rank-profile` section of a [schema](/en/reference/schemas/schemas#rank-profile) as a `first-phase` expression. This expression can use various [rank features](/en/ranking/ranking-expressions-features) with information extracted during the matching phase to evaluate the relevance of each document. The first-phase expression is computed for every document retrieved by the query. The computational cost is bounded by the number of documents exposed to the ranking phase on each content node multiplied with the complexity of the first-phase expression; therefore the expression needs to be simple and cheap to allow scaling to large amounts of retrieved docs. Alternatively, use retrieval operators that will expose only the top-k hits to the first-phase expression. + +## Two-phase ranking on content nodes + +While some use cases only require one (simple) first-phase ranking expression, for more advanced use cases it's possible to add a `second-phase` ranking expression in a [rank-profile](/en/reference/schemas/schemas#rank-profile) in the schema. This enables more expensive computations than would be feasible to run as a first-phase computation, with predictable upper bounds for the cost. + +By default, second-phase ranking (if specified) is evaluated for the 100 best hits from the first-phase ranking per content node. The number that is reranked over all nodes can be set by [total-rerank-count](/en/reference/schemas/schemas#secondphase-total-rerank-count). + +```js +schema myapp { + … + rank-profile title-freshness inherits default { + first-phase { + expression { + bm25(title) + 3*freshness(timestamp) + } + } + second-phase { + expression { + xgboost("my-model.json") + } + total-rerank-count: 50 + } + } +} +``` + +In this example, the first phase uses the text matching feature [bm25](/en/ranking/bm25) scoped to the _title_ field plus one of the built-in document [rank feature](/en/reference/ranking/rank-features) named _freshness_ over a _timestamp_ field which stores the epoch time in second resolution. For each content node, the top 50 hits from the first phase function is re-ranked using a trained [xgboost](/en/ranking/xgboost) model. + +## Using a global-phase expression + +Using a rank expression configured as a [global-phase](/en/reference/schemas/schemas#globalphase-rank) in the `rank-profile` section of a schema, you can add a ranking phase that will run in the stateless container after gathering the best hits from the content node phases; this can be used instead of or in addition to [second-phase](#two-phase-ranking-on-content-nodes) ranking. The global-phase can also perform [cross-hit normalization](#cross-hit-normalization-including-reciprocal-rank-fusion) to combine unrelated scoring methods. + +By default, global-phase ranking runs on the 100 globally best hits for a schema; this can be tuned in the rank-profile using [`rerank-count`](/en/reference/schemas/schemas#globalphase-rank) or per-query using the [`ranking.globalPhase.rerankCount`](/en/reference/api/query#ranking.globalphase.rerankcount) query property. + +This phase is optimized for inference with [ONNX](/en/ranking/onnx) models, taking some input data from the document and some from the query, and finding a score for how well they match. A typical use case is re-ranking using [cross-encoders](/en/ranking/cross-encoders). + +It's possible to specify _gpu-device_ to get GPU-accelerated computation of the model as well. You can compute or re-shape the inputs to the ONNX model in a function if necessary, and use the output in some further calculation to compute the final score. + +If you have large and complex expressions (including [xgboost](/en/ranking/xgboost), [lightgbm](/en/ranking/lightgbm)), instead of an ONNX model, it's more efficient to use the highly optimized [second-phase](#two-phase-ranking-on-content-nodes) computation on content nodes. This is also true for sub-expressions that require lots of vector data, moving vector data across the network is expensive. + +<Info> + **Note:** You can force a sub-expression to be computed on the content nodes by making it a function and adding it to match-features +</Info> + +By adding the feature to [match-features](/en/reference/schemas/schemas#match-features) in the ranking profile, the global-phase expression can re-use the function output without the complexity of transferring the data across the network and performing inference in the stateless container (which is less optimized). + +```js expandable +schema myapp { + document myapp { + field per_doc_vector type tensor<float>(x[784]) { + indexing: attribute + } + … + } + … + rank-profile with-global-model inherits default { + inputs { + query(per_query_vector) tensor<float>(d0[32]) + } + first-phase { + expression: bm25(title) + } + + function my_expensive_function() { + expression: # some expensive computation better done on content nodes + } + + function per_doc_input() { + # simple reshaping: ONNX input wants the dimension name "d0" + expression: rename(attribute(per_doc_vector), x, d0) + } + onnx-model my_ranking_model { + file: files/my_ranking_model.onnx + input "model_input_1": per_doc_input + input "model_input_2": query(per_query_vector) + output "model_output_1": out + } + global-phase { + expression { + my_expensive_function + sum(onnx(my_ranking_model).out) + } + rerank-count: 50 + } + match-features { + my_expensive_function + } + } +} +``` + +In the example above, _my\_expensive\_function_ will be evaluated on the content nodes for the 50 top-ranking documents from the first-phase so that the global-phase does not need to re-evaluate. + +## Cross-hit normalization including reciprocal rank fusion + +The ranking expressions configured for global-phase may perform cross-hit normalization of input rank features or functions. This is designed to make it easy to combine unrelated scoring methods into one final relevance score. The syntax looks like a special pseudo-function call: + +- `normalize_linear(my_function_or_feature)` +- `reciprocal_rank(my_function_or_feature)` +- `reciprocal_rank(my_function_or_feature, k)` +- `reciprocal_rank_fusion(score_1, score_2 ...)` + +The normalization will be performed across the hits that global-phase reranks (see [configuration](#globalphase-rerank-count) above). This means that first, the input (_my\_function\_or\_feature_) is computed or extracted from each hit that global-phase will rerank; then the normalization step is applied; afterwards, when computing the actual global-phase expression, the normalized output is used. As an example, assume some text fields with bm25 enabled, an ONNX model (from the [example](#myapp-with-global-model) in the previous section), and a "popularity" numeric attribute: + +```js expandable +rank-profile with-normalizers inherits with-global-model { + function my_bm25_sum() { + expression: bm25(title) + bm25(abstract) + } + function my_model_out() { + expression: sum(onnx(my_ranking_model).out) + } + global-phase { + expression { + normalize_linear(my_bm25_sum) + normalize_linear(my_model_out) + normalize_linear(attribute(popularity)) + } + rerank-count: 200 + } + } +``` + +The `normalize_linear` normalizer takes a single argument, which must be a rank-feature or the name of a function. It computes the maximum and minimum values of that input and scales linearly to the range [0, 1], basically using the formula `output = (input - min) / (max - min)` + +The `reciprocal_rank` normalizer takes one or two arguments; the first must be a rank-feature or the name of a function, while the second (if present) must be a numerical constant, called `k` with default value 60.0. It sorts the input values and finds their _rank_ (so the highest score gets rank 1, next highest 2, and so on). The output from reciprocal\_rank is computed with the formula ` output = 1.0 / (k + rank) `, so note that even the best input only gets `1.0 / 61 = 0.016393` as output with the default k. + +The `reciprocal_rank_fusion` pseudo-function takes at least two arguments and expands to the sum of their `reciprocal_rank`; it's just a convenient way to write + +```js +reciprocal_rank(a) + reciprocal_rank(b) + reciprocal_rank(c) +``` + +as + +```js +reciprocal_rank_fusion(a,b,c) +``` + +for example. + +See the [Simple Hybrid Search with ColBERT](https://github.com/vespa-engine/sample-apps/tree/master/colbert) sample application for a practical example of using reciprocal rank fusion. + +## Stateless re-ranking + +If the logic required is not suited for the [global-phase](#using-a-global-phase-expression) above, it's possible to write [stateless searchers](/en/applications/searchers) which can re-rank hits using any custom scoring function or model. The searcher can also blend and re-order hits from multiple sources when using [federation](/en/querying/federation) of content sources. + +The searcher might request rank features calculated by the content nodes to be returned along with the hit fields using [summary-features](/en/applications/inspecting-structured-data). The features returned can be configured in the _rank-profile_ as [summary-features](/en/reference/schemas/schemas#summary-features). + +The number of _hits_ is limited by the query api [hits](/en/reference/api/query#hits) parameter and [maxHits](/en/reference/api/query#queryprofile) setting. The hits available for container-level re-ranking are the global top-ranking hits after content nodes have retrieved and ranked the hits, and global top-ranking hits have been found by merging the responses from the content nodes. + +## Top-K Query Operators + +If the first-phase ranking function can be approximated as a simple linear function, and the query mode is _weakAnd_, the [Weak And/WAND](/en/ranking/wand) implementations in Vespa allows avoiding fully evaluating all the documents matching the query with the _first-phase_ function. Instead, only the top-K hits using the internal wand scoring are exposed to the _first-phase_ ranking expression. + +The [nearest neighbor search](/en/querying/nearest-neighbor-search) operator is also a top-k retrieval operator, and the two operators can be combined in the same query. + +## Choosing phased ranking functions + +A good quality ranking expression will for most applications consume too much CPU to be runnable on all retrieved or matched documents within the latency budget/SLA. The application ranking function should hence in most cases be a second-phase function. The task then becomes to find a first-phase function, which correlates sufficiently well with the second-phase function. + +## Rank phase statistics + +Use [match-features](/en/reference/schemas/schemas#match-features) and [summary-features](/en/reference/schemas/schemas#summary-features) to export detailed match- and rank-information per query. This requires post-processing and aggregation in an external system for analysis. + +To evaluate how well the document corpus matches the queries, use [mutable attributes](/en/reference/schemas/schemas#mutate) to track how often each document survives each match- and ranking-phase. This is aggregated per document and makes it easy to analyse using the query and grouping APIs in Vespa - and no other processing/storage is required. + +A mutable attribute is a number where an operation can be executed in 4 phases: + +1. on-match +2. on-first-phase +3. on-second-phase +4. on-summary + +The common use case is to increase the value by 1 for each execution. With this, it is easy to evaluate the document's performance to the queries, e.g. find the documents that appear in most queries, or the ones that never matched - run a query and order by the mutable attribute. + +<Info> + **Note:** The mutable attributes are just counters and memory-operations only - the values might or might not survive content node restarts. The values cannot be compared across nodes. Use the values to assess relative document matching and ranking performance since Vespa start +</Info> + +This example is based on the album-recommendation sample application (see [deploying an application](/en/basics/deploy-an-application)). It uses 4 attributes that each track how many times a document participates in any of the 4 phases. This is tracked only if using rank-profile `rank_albums_track` in the query: + +```js expandable +schema music { + + document music { + + field artist type string { + indexing: summary | index + } + + field album type string { + indexing: summary | index + } + + field year type int { + indexing: summary | attribute + } + + field category_scores type tensor<float>(cat{}) { + indexing: summary | attribute + } + + } + + field match_count type long { + indexing: attribute | summary + attribute: mutable + } + field first_phase_count type long { + indexing: attribute | summary + attribute: mutable + } + field second_phase_count type long { + indexing: attribute | summary + attribute: mutable + } + field summary_count type long { + indexing: attribute | summary + attribute: mutable + } + + fieldset default { + fields: artist, album + } + + rank-profile rank_albums inherits default { + first-phase { + expression: sum(query(user_profile) * attribute(category_scores)) + } + second-phase { + expression: attribute(year) + rerank-count: 1 + } + summary-features: attribute(year) + } + + rank-profile rank_albums_track inherits rank_albums { + mutate { + on-match { match_count += 1 } + on-first-phase { first_phase_count += 1 } + on-second-phase { second_phase_count += 1 } + on-summary { summary_count += 1 } # this only happens when summary-features are present! + } + } + + rank-profile rank_albums_reset_on_match inherits rank_albums { + mutate { + on-match { match_count = 0 } + } + } + rank-profile rank_albums_reset_on_first_phase inherits rank_albums { + mutate { + on-match { first_phase_count = 0 } + } + } + rank-profile rank_albums_reset_on_second_phase inherits rank_albums { + mutate { + on-match { second_phase_count = 0 } + } + } + rank-profile rank_albums_reset_on_summary inherits rank_albums { + mutate { + on-match { summary_count = 0 } + } + } +} +``` + +```bash +$ vespa query \ + "select * from music where album contains 'head'" \ + "ranking=rank_albums_track" +``` + +### Usage + +The framework is flexible in use; the normal use case is: + +1. Reset the mutable attribute on all content nodes - use [searchPath](/en/reference/api/query#model.searchpath) to make sure all nodes are reset by sending a query using a rank profile that resets the value. For each phase, run a query that _matches_ all documents, and reset the attribute - e.g.: +```bash +$ for phase in match first_phase second_phase summary; do \ + for node in {0..3}; do vespa query \ + "select * from music where true" \ + "ranking=rank_albums_reset_on_$phase" \ + "model.searchPath=$node/0"; \ + done \ + done +``` + Alternatively, run a query against a group and verify that [coverage](/en/reference/querying/default-result-format) is 100%. +2. Run query load, using the tracking rank-profile, like `rank_albums_track` above +3. Run queries using [sorting](/en/reference/querying/sorting-language) or [grouping](/en/querying/grouping) on the mutable attributes. + +<Info> +**Note:** Make sure that only the relevant query load uses the tracking rank profile. E.g. exclude monitoring queries / automation by using a separate ranking profile. +</Info> + +To initialize a mutable attribute with a different value than 0 when a document is PUT, use: + +```js +field match_count type long { + indexing: 7 | to_long | attribute | summary # Initialized to 7 for a new document. The default is 0. + attribute: mutable +} +``` + +To dump values fast, from memory only (assuming the schema has an `id` field): + +```js +document-summary rank_phase_statistics { + summary id {} + summary match_count {} + summary first_phase_count {} + summary second_phase_count {} + summary summary_count {} +} +``` + +```bash +$ vespa query \ + "select * from music where true" \ + "presentation.summary=rank_phase_statistics" +``` diff --git a/mintlify-docs/en/ranking/ranking-expressions-features.mdx b/mintlify-docs/en/ranking/ranking-expressions-features.mdx new file mode 100644 index 0000000000..3443eec417 --- /dev/null +++ b/mintlify-docs/en/ranking/ranking-expressions-features.mdx @@ -0,0 +1,280 @@ +--- +title: "Ranking Expressions and Features" +--- + +Read the [ranking introduction](/en/basics/ranking) first. This guide is about [ranking expressions](/en/reference/ranking/ranking-expressions) and [rank features](/en/reference/ranking/rank-features), find guides and examples below. + +## Ranking expressions + +Vespa uses [ranking expressions](/en/reference/ranking/ranking-expressions) to rank documents matching a query, computing a rank score per document. A ranking expression is stored in a _rank profile_. + +Ranking expressions are mathematical functions. The function may contain anything from a single reference to a built-in feature to a machine-learned model. Ranking expressions support the usual operators and functions, as well as an _if_ function - enabling decision trees and conditional business logic. They support a comprehensive set of [tensor functions](/en/reference/ranking/tensor), which allows expressing machine-learned functions such as deep neural nets. Refer to [multivalue query operators](/en/ranking/multivalue-query-operators) for details on using dot products, tensors and wand. + +Ranking is most often the resource driver - this is where is application's logic is implemented. Use [two-phase ranking](/en/ranking/phased-ranking) to optimize, using an inexpensive _first-phase_ ranking to eliminate the lowest ranked candidates, then focus the resources on a strong _second-phase_ ranking. + +Ranking expressions can be _handwritten_ - works well if the ranking is well-defined enough to be easily mappable into a ranking expression. Alternatively, make the ranking expression automatically using _machine learning_. Ranking expressions can be large, and can be imported using [file:filename](/en/reference/schemas/schemas#expression). + +## Rank features + +The primitive values used in ranking expressions are called [rank features](/en/reference/ranking/rank-features). Rank features can be [tensors](/en/ranking/tensor-user-guide), [multivalue fields](/en/querying/searching-multivalue-fields) or scalars, and one of: + +- Constants set in the application package +- Values sent with the query or set in the document +- Computed by Vespa from the query and the document to provide information about how well the query matched the document + +Vespa's [rank feature set](/en/reference/ranking/rank-features) contains a large set of low level features, as well as some higher level features. If automated training is used, all features can often just be handed to the training algorithm to let it choose which ones to use. Depending on the algorithm, it can be a good idea to leave out the un-normalized features to avoid spending learning power on having to learn to normalize these features and determine that they really represent the same information as some of the normalized features. + +Include [default rank features](https://github.com/vespa-engine/system-test/blob/master/tests/search/rankfeatures/dump.txt) in query results by adding [ranking.listFeatures](/en/reference/api/query#ranking.listfeatures) to the query. This is useful for tasks like recording the rank feature values for automated training - learn more in the [tutorial](/en/learn/tutorials/text-search-ml). If more rank features than is available in the default set is wanted, they can be added to the set in the [rank profile](/en/reference/schemas/schemas#rank-features): + +```bash +rank-features: feature1 feature2 … +``` + +It is also possible to control which features to dump - add this to the rank profile: + +```bash +ignore-default-rank-features +``` + +This will make the explicitly listed rank features the only ones dumped when requesting rankfeatures in the query. + +### Normalization + +The rank features provided includes both features normalized to the range 0-1, and un-normalized features like counts and positions. Whenever possible, prefer the normalized features. They capture the same information (and more), but are easier to use because they can be combined more easily with other features. In addition, try to write ranking expressions such that the combined rank score is also normalized, for example by taking averages not sums. The resulting normalized rank scores makes it possible to implement relevance based blending, search assistance triggering when there are no good hits, and so on. + +### Configuration + +Some features, most notably the [fieldMatch](/en/reference/ranking/rank-features#fieldMatch(name)) features, have configuration parameters that enables the feature calculation to be tweaked per field for performance or relevance. Feature configuration values are set by adding to the rank profile: + +```bash +rank-properties { + featureName.configurationProperty: "value" +} +``` + +The values are set per field, like: + +```js +rank-properties { + fieldMatch(title).maxAlternativeSegmentations: 10 + fieldMatch(title).maxOccurrences: 5 + fieldMatch(description).maxOccurrences: 20 +} +``` + +Refer to the [rank feature configuration](/en/reference/ranking/rank-feature-configuration) reference. + +### Feature contribution functions + +Vespa ranking features are linear. For example, the [earliness](/en/reference/ranking/rank-features#fieldMatch(name).earliness) feature is 0 if the match is at the end of the field, 1 if the match is at the start of the field, and 0.5 if the match is exactly in the middle of the field. In many cases, the contribution of a feature should not be linear with its "goodness". For example, _earliness_ could decay quickly in the beginning and slowly at the end of the field. This from the intuition that it matters more if the match is of the first or the twentieth word in the field, but it doesn't matter as much if the match is at the thousands or thousand-and-twentieths. + +To achieve this, pass the feature value through a function which turns the line into a curve matching the intent. This is easiest with normalized fields. The function begins and ends in the same point, f(0)=0 and f(1)=1, but which curves in between. To get the effect described above, a curve which starts almost flat and ends steep works - example: + +```bash +pow(0-x,2) +``` + +The second number decides how pronounced the curving is. A larger number will make changes to higher x values even more important relative to the same change to lower x values. + +### Dumping rank features for specific documents + +For a training set containing judgements for certain documents, it is useful to select those documents in the query by adding a term matching the document ID, but without impacting the values of any rank features. To do this, add that term with [ranked](/en/reference/querying/yql#ranked) set to false: + +```bash +select * from mydocumenttype where myidfield contains ({ranked: false}"mydocumentid" and ...) +``` + +### Accessing feature/function values in results + +Any feature can be returned in the hit producing it by adding it to the list of [summary-features](/en/reference/schemas/schemas#summary-features) of the rank profile. As all functions are features this allows the result of any computation to be accessible in results. Example: + +```bash +rank-profile test { + + summary-features: tensor_join join_sum + + function tensor_join() { + expression: attribute(my_tensor_field) * query(my_query_tensor) + } + + function join_sum() { + expression: sum(tensor_join()) + } + +} +``` + +The results of these functions will be available in the Hits of the result as follows: + +```js +import com.yahoo.search.result.FeatureData; + + FeatureData featureData = (FeatureData)hit.getField("summaryfeatures"); + Tensor tensor_join_value = featureData.getTensor("rankingExpression(tensor_join)"); + double join_sum_value = featureData.getDouble("rankingExpression(join_sum)"); +``` + +Do further computation on the returned tensors, such as e.g `Tensor larger = tensor_join_value.map((value) -> 3 * value)`. + +If also leveraging [multiphase searching](/en/applications/searchers#multiphase-searching), it is possible to get rank features returned in the first phase using [match-features](/en/reference/schemas/schemas#match-features). This pre-populates the [matchfeatures](/en/reference/querying/default-result-format#matchfeatures) field. The effects which can be observed in the results are the same, so this may seem like the same functionality, but the performance trade-off is different: + +- The expressions in _match-features_ must be computed for all hits returned in the first phase, before selecting which hits to _fill_. But that also means it's possible to use the `matchfeatures` field to select which hits to keep and which to remove before calling `fill()` at all. +- The expressions in _summary-features_ are not available before the _fill_ phase, but only need to be calculated for those hits that are actually filled. + +The difference is most pronounced when the corpus is divided onto many content nodes. Consider a case with 7 content nodes, fetching 100 matches from each. These are merged (by relevance score) into a list of 700 hits, and the 100 with the best relevance are selected and _filled_. If you use _match-features_, they need to be calculated for all 700 hits. Compare with _summary-features_, where only the final 100 hits need to be considered for calculating those. + +## Conditional expressions + +Ranking expressions support conditional logic to choose between different sub-expressions based on document attributes, query parameters, or other features. This enables ranking to adapt to different document types, user segments, or business rules within a single rank profile. + +### The if function + +`if` can be used for other purposes than encoding MLR trained decision trees. One use is to choose different ranking functions for different types of documents in the same search. Ranking expressions are able to do string equality tests, so to choose between different ranking sub-functions based on the value of a string attribute (say, "category"), use an expression like: + +```py +if (attribute(category)=="restaurant",_…restaurant function_, if (attribute(category)=="hotel",_…hotel function_, …)) +``` + +This method is also used automatically when multiple schemas are deployed to the same cluster, and all is included in the same query to choose the ranking expression from the correct schema for each document. + +By using `if` functions, one can also implement strict tiering, ensuring that documents having some criterion always gets a higher score than the other documents. Example: + +```py +if (fieldMatch(business).fieldCompleteness==1, 0.8+document.distance*0.2, + if (attribute(category)=="shop", 0.6+fieldMatch(title)*0.2, + match*attribute(popularity)*0.6 ) +``` + +This function puts all exact matches on business names first, sorted by geographical distance, followed by all shops sorted by title match, followed by everything else sorted by the overall match quality and popularity. + +Also see [pin results](/en/ranking/multivalue-query-operators#pin-results-example) for a comprehensive examples of using a tiered ranking function to pin queries and results. + +### The switch function + +When comparing many values against the same discriminant value, the `switch` function provides a more readable alternative to deeply nested `if` statements. For example, the nested if expression above can be written more clearly as: + +```py +switch (attribute(category)) { + case "restaurant":_…restaurant function_, + case "hotel":_…hotel function_, + default:_…default function_} +``` + +Use `switch` when testing a single expression for equality against multiple values. Continue using `if` for different comparison operators or when each condition tests different expressions, such as in the tiering example above. See the [switch function reference](/en/reference/ranking/ranking-expressions#the-switch-function) for details. + +## Using constants + +Ranking expressions can refer to constants defined in a `constants` clause: + +```bash +first-phase { + expression: myConst1 + myConst2 +} +constants { + myConst1: 1.5 + myConst2: 2.5 + ... +} +``` + +Constants lists are inherited and can be overridden in sub-profiles. This is useful to create a set of rank profiles that use the same broad ranking but differs by constants values. + +For performance, always prefer constants to query variables (see below) whenever the constant values to use can be enumerated in a set of rank profiles. Constants are applied to ranking expressions at configuration time, and the resulting constant parts of expressions calculated, which may lead to reduced execution cost, especially with tensor constants. + +## Using query variables + +As ranking expressions can refer to any feature by name, one can use [query features](/en/reference/ranking/rank-features#feature-list) as ranking variables. These variables can be used for example to allow the query to specify the degree of importance to various parts of a ranking expression, or to quickly search large parameter spaces to find a good ranking, by trying different values in each query. These variables can be assigned default values in the [rank profile](/en/reference/schemas/schemas#rank-profile) by adding: + +```bash +inputs { + query(myvalue) double: 0.5 +} +``` + +to the rank profile. These variables can then be overridden in the query by adding: + +```bash +input.query(myvalue)=0.1 +``` + +to it - see the [Query API](/en/reference/api/query#ranking.features). + +## Query feature types + +The default type of all features are scalar. To use query feature _tensors_ we must [define their type in the rank profile](/en/reference/schemas/schemas#inputs). + +Without the correct tensor type, a passed query feature is handled as a string to be converted to a scalar, which will _not give an error but will produce incorrect results_. + +Tensors can be passed in requests using the [tensor literal form](/en/reference/ranking/tensor#tensor-literal-form), for example: + +```bash +input.query(user_profile)=%7B%7Bcat%3Apop%7D%3A0.8%2C%7Bcat%3Arock%7D%3A0.2%2C%7Bcat%3Ajazz%7D%3A0.1%7D +``` + +However, it is usually preferable instead to create them in a [Searcher](/en/applications/searchers). Set the tensor value using the [RankFeatures](https://github.com/vespa-engine/vespa/blob/master/container-search/src/main/java/com/yahoo/search/query/ranking/RankFeatures.java) instance associated with [Query](https://github.com/vespa-engine/vespa/blob/master/container-search/src/main/java/com/yahoo/search/Query.java) instance. This example makes a tensor with a single cell with value 500: + +```java +package com.yahoo.example; + +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.searchchain.Execution; +import com.yahoo.tensor.Tensor; +import com.yahoo.tensor.TensorType; + +public class TensorInQuerySearcher extends Searcher { + @Override + public Result search(Query query, Execution execution) { + // The Tensor type must also be in the application package as described in the above section + query.getRanking().getFeatures().put("query(tensor_feature)", + Tensor.Builder.of(TensorType.fromSpec("tensor<float>(x{})")).cell().label("x", "foo").value(500).build()); + return execution.search(query); + } +} +``` +Refer to the [Tensor](https://javadoc.io/doc/com.yahoo.vespa/vespajlib/latest/com/yahoo/tensor/Tensor.html) Java API for details on how to construct tensors programmatically. +## Function snippets + +When using machine learned ranking, we are searching a function space which is much more limited than the space of functions supported by ranking expressions. We can increase the space of functions available to MLR because the primitive features used in MLR training do not need to be primitive features in Vespa - they can just as well be ranking expression snippets. If there are certain mathematical combinations of features believed to be useful in an application, these can be pre-calculated from the actual primitive features of Vespa and given to MLR as primitives. Such primitives can then be replaced textually by the corresponding ranking expression snippet, before the learned expression is deployed on Vespa. + +Vespa supports [expression functions](/en/reference/schemas/schemas#function-rank). Functions having zero arguments can be used as summary- and rank-features. For example, the function "myfeature": + +```bash +rank-profile myrankprofile inherits default { + function myfeature() { + expression: fieldMatch(title).completeness * pow(0 - fieldMatch(title).earliness, 2) + } +} +``` + +becomes available as a feature as follows: + +```bash +summary-features { + myfeature +} +``` + +## Tracking relevance variations over time + +Vespa comes with a few simple metrics for relevance that enables applications to see how relevance changes over time, either as a result of changes to how relevance is computed, changes to query construction, changes to the content ingested, or as a result of changing user behavior. + +The relevance metrics are `relevance.at_1`, `relevance.at_3` and `relevance.at_10`. See [metrics](/en/operations/metrics) for more information. + +## Examples + +If the user is underage, assign 0 to adult content and use the average of match quality in the title field and popularity among kids. If the user is not, use the match quality in the title field: + +```py +if ( query(userage) < 18, + if ( attribute(adultness) > 0.1, 0 , (fieldMatch(title)+attribute(kidspopularity)) / 2 ), + fieldMatch(title) ) +``` + +Use a weighted average of the match quality in some fields, multiplied by 1-exp of the document age: + +```py +( 10*fieldMatch(title) + 5*fieldMatch(description) + + 7*attributeMatch(tags).normalizedWeight ) /22 * ( 1 - age(creationtime) ) +``` \ No newline at end of file diff --git a/mintlify-docs/en/ranking/ranking-intro.mdx b/mintlify-docs/en/ranking/ranking-intro.mdx new file mode 100644 index 0000000000..c57a65fa8f --- /dev/null +++ b/mintlify-docs/en/ranking/ranking-intro.mdx @@ -0,0 +1,329 @@ +--- +title: "Introduction to ranking" +--- + +Learn how [ranking](/en/basics/ranking) works in Vespa by using the open [query API](/en/querying/query-api) of [vespa-documentation-search](https://github.com/vespa-cloud/vespa-documentation-search).In this article, find a set of queries invoking different `rank-profiles`, which is the ranking definition. + +Ranking is the user-defined computation that scores documents to a query, here configured in [doc.sd](https://github.com/vespa-cloud/vespa-documentation-search/blob/main/src/main/application/schemas/doc.sd), also see [schema documentation](/en/reference/schemas/schemas). This schema has a set of (contrived) ranking functions, to help learn Vespa ranking. + + +## Ranking using document features only + +Let's start with something simple: _Irrespective of the query, score all documents by the number of in-links to it_. That is, for any query, return the documents with most in-links first in the result set (these queries are clickable!): + +[`yql=select * from doc where true&ranking=inlinks`](https://api.search.vespa.ai/search/?yql=select%20*%20from%20doc%20where%20true&ranking=inlinks&_gl=1*2d8dpa*_gcl_au*MjA3MzE0NDM4Ny4xNzc4ODIyODQ4) + +The score, named `relevance` in query results, is the size of the `inlinks` attribute array in the document, as configured in the `expression`: + + +```js +rank-profile inlinks { + first-phase { + expression: attribute(inlinks).count + } + summary-features { + attribute(inlinks).count + } +} +``` + +Count the number of entries in `inlinks` in the result and compare with `relevance` - it will be the same. Observe that the ranking expression does not use any features from the query, it only uses `attribute(inlinks).count`, which is a [document feature](/en/reference/ranking/rank-features#document-features). + + +## Observing values used in ranking + +When developing ranking expressions, it is useful to observe the input values. Output the input values using [summary-features](/en/reference/schemas/schemas#summary-features). In this experiment, we will use another rank function, still counting in-links but scoring older documents lower: + +$$ +\text{num\_inlinks} \times {\text{decay\_const}}^{\frac{\text{doc\_age\_seconds}}{3600}} +$$ + + +Notes: +- use of the `now` [ranking feature](/en/reference/ranking/rank-features#now) +- use `pow`, a mathematical function in [ranking expressions](/en/reference/ranking/ranking-expressions) +- use of constants and functions to write better code + +[`yql=select * from doc where true&ranking=inlinks_age`](https://api.search.vespa.ai/search/?yql=select%20*%20from%20doc%20where%20true&ranking=inlinks_age) + + +```js expandable +rank-profile inlinks_age { + first-phase { + expression: rank_score + } + summary-features { + attribute(inlinks).count + attribute(last_updated) + now + doc_age_seconds + age_decay + num_inlinks + rank_score + } + constants { + decay_const: 0.9 + } + function doc_age_seconds() { + expression: now - attribute(last_updated) + } + function age_decay() { + expression: pow(decay_const, doc_age_seconds/3600) + } + function num_inlinks() { + expression: attribute(inlinks).count + } + function rank_score() { + expression: num_inlinks * age_decay + } +} +``` + +In the query results, here we observe a document with 27 in-links, 9703 seconds old, get at relevance at 20.32 (the age of documents will vary with query time): + +```js expandable +"relevance": 20.325190122213748, +... +"summaryfeatures": { + "attribute(inlinks).count": 27.0, + "attribute(last_updated)": 1.615971522E9, + "now": 1.615981225E9, + "rankingExpression(age_decay)": 0.7527848193412499, + "rankingExpression(doc_age_seconds)": 9703.0, + "rankingExpression(num_inlinks)": 27.0, + "rankingExpression(rank_score)": 20.325190122213748, +} +``` + +Using `summary-features` makes it easy to validate and develop the ranking expression. + + +## Ranking with query features + +Let's assume we want to find similar documents, and we define document similarity as having the same number of words. From most perspectives, this is a poor similarity function, better functions are described later. + +The documents have a `term_count` field - so let's add an [input.query()](/en/reference/api/query#ranking.features) for term count: + +[`yql=select * from doc where true &ranking=term_count_similarity&input.query(q_term_count)=1000`](https://api.search.vespa.ai/search/?yql=select%20*%20from%20doc%20where%20true;&ranking=term_count_similarity&input.query(q_term_count)=1000&_gl=1*2d8dpa*_gcl_au*MjA3MzE0NDM4Ny4xNzc4ODIyODQ4) + + +$$ +1 - \frac{fabs(attribute(term\_count) - query(q\_term\_count))}{1 + attribute(term\_count) + query(q\_term\_count)} +$$ + + +```js +rank-profile term_count_similarity { + first-phase { + expression { + 1 - + fabs( attribute(term_count) - query(q_term_count) ) / + (1 + attribute(term_count) + query(q_term_count) ) + } + } + summary-features { + attribute(term_count) + query(q_term_count) + } +} +``` + +This rank function will score documents [0-1>, closer to 1 is more similar: + +```js +"relevance": 0.9985029940119761, +... +"summaryfeatures": { + "attribute(term_count)": 1003.0, + "query(q_term_count)": 1000.0, +} +``` + +The key learning here is how to transfer ranking features in the query, using `input.query()`. Use different names for more query features. + + +## Ranking with a query tensor + +Another similarity function can be overlap in in-links. We will map the inlinks [weightedset](/en/reference/schemas/schemas#weightedset) into a [tensor](/en/reference/schemas/schemas#tensor), query with a tensor of same type and create a scalar using a tensor product as the rank score. We use a [mapped](/en/reference/ranking/tensor#general-literal-form) query tensor, where the document name is the address in the tensor, using a value of 1 for each in-link: + +```js +{ + {links:/en/query-profiles.html}:1, + {links:/en/page-templates.html}:1, + {links:/en/overview.html}:1 +} +``` + + +<Warning> +**Important:** + +Vespa cannot know the query tensor type from looking at it - it must be configured using [inputs](/en/reference/schemas/schemas#inputs). +</Warning> + + +As the in-link data is represented in a weightedset, we use the [tensorFromWeightedSet](/en/reference/ranking/rank-features#tensorFromWeightedSet(source,dimension)) rank feature to transform it into a tensor named _links_: + +```js +rank-profile inlink_similarity { + inputs { + query(links) tensor<float>(links{}) + } + first-phase { + expression: sum(tensorFromWeightedSet(attribute(inlinks), links) * query(links)) + } + summary-features { + query(links) + tensorFromWeightedSet(attribute(inlinks), links) + } +} +``` + +[`yql=select * from doc where true&ranking=inlink_similarity&input.query(links)={{links:/en/query-profiles.html}:1,{links:/en/page-templates.html}:1,{links:/en/overview.html}:1}`](https://api.search.vespa.ai/search/?yql=select%20*%20from%20doc%20where%20true&ranking=inlink_similarity&input.query(links)=%7B%20%7Blinks:/en/query-profiles.html%7D:1,%20%7Blinks:/en/page-templates.html%7D:1,%20%7Blinks:/en/overview.html%7D:1%20%7D&_gl=1*j8zfax*_gcl_au*MjA3MzE0NDM4Ny4xNzc4ODIyODQ4) + +Inspect relevance and summary-features: + + +```js expandable +"relevance": 2.0 +... +"summaryfeatures": { + "query(links)": { + "type": "tensor<float>(links{})", + "cells": [ + { "address": { "links": "/en/query-profiles.html" }, "value": 1 }, + { "address": { "links": "/en/page-templates.html" }, "value": 1 }, + { "address": { "links": "/en/overview.html" }, "value": 1 } ] + }, + "tensorFromWeightedSet(attribute(inlinks),links)": { + "type": "tensor(links{})", + "cells": [ + { "address": { "links": "/en/page-templates.html" }, "value": 1 }, + { "address": { "links": "/en/jdisc/container-components.html" }, "value": 1 }, + { "address": { "links": "/en/query-profiles.html" }, "value": 1 } ] + } +} +``` + + +Here, the tensors have one dimension, so they are vectors - the sum of the tensor product is hence the doc product. As all values are 1, all products are 1 and the sum is 2: + + +| document | query | value | +| :--- | :--- | :--- | +| /en/jdisc/container-components.html | | 0 | +| | /en/overview.html | 0 | +| /en/page-templates.html | /en/page-templates.html | 1 | +| /en/query-profiles.html | /en/query-profiles.html | 1 | + + +Change values in the query tensor to see difference in rank score, setting different weights for links. + +Summary: The problem of comparing two lists of links is transformed into a numerical problem of multiplying two occurrence vectors, summing co-occurrences and ranking by this sum: + +```js +sum(tensorFromWeightedSet(attribute(inlinks), links) * query(links)) +``` + + +Notes: +- Query tensors can grow large. Applications will normally create the tensor in code using a [Searcher](/en/applications/searchers), also see [example](/en/ranking/ranking-expressions-features#query-feature-types). +- Here the document tensor is created from a weighted set - a better way would be to store this in a tensor in the document to avoid the transformation. + + +## Retrieval and ranking + +So far in this guide, we have run the ranking function over _all_ documents. This is a valid use case for many applications. However, ranking documents is generally CPU-expensive, optimizing by reducing the candidate set will increase performance. Example query using text matching, dumping [calculated rank features](/en/reference/api/query#ranking.listfeatures): + +[`yql=select * from doc where title contains "document"&ranking.listFeatures`](https://api.search.vespa.ai/search/?yql=select%20*%20from%20doc%20where%20title%20contains%20%22document%22&ranking.listFeatures&_gl=1*1jqth3v*_gcl_au*MjA3MzE0NDM4Ny4xNzc4ODIyODQ4) + +See the **long** list of rank features calculated per result. However, the query filters on documents with "document" in the title, so the features are only calculated for the small set of matching documents. + +Running a filter like this is _document retrieval_. Another good example is web search - the user query terms are used to _retrieve_ the candidate set cheaply (from billions of documents), then one or more _ranking functions_ are applied to the much smaller candidate set to generate the ranked top-ten. Another way to look at it is: +- In the retrieval (recall) phase, _find all relevant documents_ +- In the ranking phase, _show only relevant documents_. + +Still, the candidate set after retrieval can be big, a query can hit all documents. Ranking all candidates is not possible in many applications. + +Splitting the ranking into two phases is another optimization - use an inexpensive ranking expression to sort out the least promising candidates before spending most resources on the highest ranking candidates. In short, use increasingly more power per document as the candidate set shrinks: + +<Frame> + <img src="/assets/img/retrieval-ranking.svg" width="584" height="auto" alt="Retrieval and ranking"/> +</Frame> + +Let's try the same query again, with a two-phase rank-profile that also does an explicit rank score cutoff: + +[`yql=select * from doc where title contains "attribute"&ranking=inlinks_twophase`](https://api.search.vespa.ai/search/?yql=select%20*%20from%20doc%20where%20title%20contains%20%22attribute%22&ranking=inlinks_twophase&_gl=1*1jqth3v*_gcl_au*MjA3MzE0NDM4Ny4xNzc4ODIyODQ4) + + +```js +rank-profile inlinks_twophase inherits inlinks_age { + first-phase { + total-keep-rank-count : 50 + rank-score-drop-limit : 10 + expression : num_inlinks + } + second-phase { + expression : rank_score + } +} +``` + + +Note how using rank-profile `inherits` is a smart way to define functions once, then use in multiple rank-profiles. Read more about [schema inheritance](/en/schemas/inheritance-in-schemas). Here, `num_inlinks` and `rank_score` are defined in a rank profile we used earlier: + + +```js +function num_inlinks() { + expression: attribute(inlinks).count + } +``` + + +In the results, observe that no document has a _rankingExpression(num_inlinks)_ less than or equal to 10.0, meaning all such documents were purged in the first ranking phase due to the `rank-score-drop-limit`. Normally, the `rank-score-drop-limit` is not used, as the `total-keep-rank-count` is most important. Read more in the [reference](/en/reference/schemas/schemas#rank-score-drop-limit). + +For a dynamic limit, pass a ranking feature like `query(threshold)` and use an `if` statement to check if the score is above the threshold or not - if below, assign -1 (something lower than the `rank-score-drop-limit`) and have it dropped. Read more in [ranking expressions](/en/ranking/ranking-expressions-features#the-if-function-and-string-equality-tests). + +Two-phased ranking is a performance optimization - this guide is about functionality, so the rest of the examples will only be using one ranking phase. Read more in [first-phase](/en/reference/schemas/schemas#firstphase-rank). + + +## Retrieval: AND, OR, weakAnd + +This guide will not go deep in query operators in the retrieval phase, see [query-api](/en/querying/query-api) for details. + +Consider a query like _"vespa documents about ranking and retrieval"_. A query AND-ing these terms hits less than 3% of the document corpus, missing some of the documents about ranking and retrieval: + +[`yql=select * from doc where (default contains "vespa"AND default contains "documents"AND default contains "about"AND default contains "ranking"AND default contains "and"AND default contains "retrieval")`](https://api.search.vespa.ai/search/?yql=select%20*%20from%20doc%20where%20(default%20contains%20%22vespa%22%20AND%20default%20contains%20%22documents%22%20AND%20default%20contains%20%22about%22%20AND%20default%20contains%20%22ranking%22%20AND%20default%20contains%20%22and%22%20AND%20default%20contains%20%22retrieval%22)&_gl=1*1jqth3v*_gcl_au*MjA3MzE0NDM4Ny4xNzc4ODIyODQ4) + +Alternatively, OR-ing the terms hits more than 95% of the documents, unable to filter out irrelevant documents in the retrieval phase: [`yql=select * from doc where (default contains "vespa"OR default contains "documents"OR default contains "about"OR default contains "ranking"OR default contains "and"OR default contains "retrieval")`](https://api.search.vespa.ai/search/?yql=select%20*%20from%20doc%20where%20(default%20contains%20%22vespa%22%20OR%20default%20contains%20%22documents%22%20OR%20default%20contains%20%22about%22%20OR%20default%20contains%20%22ranking%22%20OR%20default%20contains%20%22and%22%20OR%20default%20contains%20%22retrieval%22)&_gl=1*n39r85*_gcl_au*MjA3MzE0NDM4Ny4xNzc4ODIyODQ4) + +Using a "weak AND" can address the problems of too few (AND) or too many (OR) hits in the retrieval phase. Think of it as an _optimized OR_, where the least relevant candidates are discarded from further evaluation. To find the least relevant candidates, a simple scoring function is used: + +```bash + rank_score = sum_n(term(n).significance * term(n).weight) +``` + +As the point of [weakAnd](/en/reference/querying/yql#weakand) is to early discard the worst candidates, _totalCount_ is an approximation: + +[`yql=select * from doc where{scoreThreshold: 0, totalTargetHits: 10}weakAnd(default contains "vespa", default contains "documents", default contains "about", default contains "ranking", default contains "and", default contains "retrieval")`](https://api.search.vespa.ai/search/?yql=select%20*%20from%20doc%20where%20%7BscoreThreshold:%200,%20totalTargetHits:%2010%7DweakAnd(%20default%20contains%20%22vespa%22,%20default%20contains%20%22documents%22,%20default%20contains%20%22about%22,%20default%20contains%20%22ranking%22,%20default%20contains%20%22and%22,%20default%20contains%20%22retrieval%22)&_gl=1*1psdofr*_gcl_au*MjA3MzE0NDM4Ny4xNzc4ODIyODQ4) + +Note that this blurs the distinction between filtering (retrieval) and ranking a little - here the `weakAnd` does **both** filtering and ranking to optimize the number of candidates for the later rank phases. The default rank-profile is used: + +```js +rank-profile documentation inherits default { + inputs { + query(titleWeight): 2.0 + query(contentsWeight): 1.0 + } + first-phase { + expression: query(titleWeight) * bm25(title) + query(contentsWeight) * bm25(content) + } +} +``` + +Observe we are here using text matching rank features, which fits well with weakAnd's scoring function that also uses text matching features. + +Read more in [the wand documentation](/en/ranking/wand). + +## Next steps +- Read more about custom re-ranking of the final result set in [reranking in searcher](/en/ranking/reranking-in-searcher). diff --git a/mintlify-docs/en/ranking/reranking-in-searcher.mdx b/mintlify-docs/en/ranking/reranking-in-searcher.mdx new file mode 100644 index 0000000000..7e6845f047 --- /dev/null +++ b/mintlify-docs/en/ranking/reranking-in-searcher.mdx @@ -0,0 +1,338 @@ +--- +title: "Re-ranking using a custom Searcher" +sidebarTitle: "Searcher re-ranking" +--- + +This guide demonstrates how to deploy a [searcher](/en/applications/searchers) implementing a last stage of [phased ranking](/en/ranking/phased-ranking). The searcher re-ranks the global top 200 documents which have been ranked by the content nodes using the configurable [ranking](/en/ranking/ranking-intro) specification in the document [schema(s)](/en/basics/schemas). + +The reranking searcher uses [multiphase searching](/en/applications/searchers#multiphase-searching): + +**Matching query protocol phase:** The matching protocol phase which asks each content node involved in the query to return the locally best ranking hits (ranked by the configurable ranking expressions defined in the schema). This matching query protocol phase can include several ranking phases which are executed per content node. In the query protocol phase the content nodes can also return [match-features](/en/reference/schemas/schemas#match-features) which a re-ranking searcher can use to re-rank results (or feature logging). In the custom searcher one is working on the global best ranking hits from the content nodes, and can have access to aggregated features which is calculated across the top-ranking documents (the global best documents). + +**Fill query protocol phase:** Fill summary data for the global top ranking hits after all ranking phases. If one needs access to the document fields, the searcher would need to call `execution.fill` before the re-ranking logic, this would then cost more resources than just using `match-features` which is delivered in the first protocol matching phase. If one needs access to a subset of fields during stateless re-ranking, consider configuring a dedicated [document summary](/en/querying/document-summaries). + +See also [life of a query in Vespa](/en/performance/sizing-search#life-of-a-query-in-vespa). + +<Info> +**Prerequisites:** + +- Linux, macOS or Windows 10 Pro on x86\_64 or arm64, with [Podman Desktop](https://podman.io/) or [Docker Desktop](https://www.docker.com/products/docker-desktop/) installed, with an engine running. + - Alternatively, start the Podman daemon: + + ```bash + $ podman machine init --memory 6000 + $ podman machine start + ``` + + - See [Docker Containers](/en/operations/self-managed/docker-containers.html) for system limits and other settings. +- For CPUs older than Haswell (2013), see [CPU Support](/en/operations/self-managed/cpu-support). +- Memory: Minimum 4 GB RAM dedicated to Docker/Podman. [Memory recommendations](/en/operations/self-managed/node-setup#memory-settings). +- Disk: Avoid `NO_SPACE` - the vespaengine/vespa container image + headroom for data requires disk space. [Read more](/en/writing/feed-block.html). +- [Homebrew](https://brew.sh/) to install the [Vespa CLI](/en/clients/vespa-cli.html), or download the Vespa CLI from [Github releases](https://github.com/vespa-engine/vespa/releases). +- [Java 17](https://openjdk.org/projects/jdk/17/). +- [Apache Maven](https://maven.apache.org/install.html) is used to build the application. +</Info> + +### A minimal Vespa application + +To define the Vespa app package using our custom reranking searcher, four files is needed: + +- The schema +- The deployment specification `services.xml` +- The custom reranking searcher +- [pom.xml](https://maven.apache.org/guides/introduction/introduction-to-the-pom.html) + +Start by defining a simple schema with two fields. We also define a rank profile with two [rank features](/en/reference/ranking/rank-features) to be used in the searcher for re-ranking: + +```js +schema doc { + + document doc { + field name type string { + indexing: summary | index + match: text + index: enable-bm25 + } + + field downloads type int { + indexing: summary | attribute + } + } + + fieldset default { + fields: name + } + + rank-profile rank-profile-with-match { + first-phase { + expression: bm25(name) + } + match-features { + bm25(name) + attribute(downloads) + } + } +} +``` + +<Card> +Paste the above into file my-app/src/main/application/schemas/doc.sd +</Card> + +The searcher implementing the re-ranking logic: + +```java expandable +package ai.vespa.example.searcher; + +import com.yahoo.search.Query; +import com.yahoo.search.Result; +import com.yahoo.search.Searcher; +import com.yahoo.search.result.FeatureData; +import com.yahoo.search.result.Hit; +import com.yahoo.search.searchchain.Execution; + +public class ReRankingSearcher extends Searcher { + @Override + public Result search(Query query, Execution execution) { + int hits = query.getHits(); + query.setHits(200); //Re-ranking window + query.getRanking().setProfile("rank-profile-with-match"); + Result result = execution.search(query); + if(result.getTotalHitCount() == 0 + || result.hits().getErrorHit() != null) + return result; + double max = 0; + //Find max value of the window + for (Hit hit : result.hits()) { + FeatureData featureData = (FeatureData) hit.getField("matchfeatures"); + if(featureData == null) + throw new RuntimeException("No 'matchfeatures' found - wrong rank profile used?"); + double downloads = featureData.getDouble("attribute(downloads)"); + if (downloads > max) + max = downloads; + } + //re-rank using normalized value + for (Hit hit : result.hits()) { + FeatureData featureData = (FeatureData) hit.getField("matchfeatures"); + if(featureData == null) + throw new RuntimeException("No 'matchfeatures' found - wrong rank profile used?"); + double downloads = featureData.getDouble("attribute(downloads)"); + double normalizedByMax = downloads / max; //Change me + double bm25Name = featureData.getDouble("bm25(name)"); + double newScore = bm25Name + normalizedByMax; + hit.setField("rerank-score",newScore); + hit.setRelevance(newScore); + } + result.hits().sort(); + //trim the result down to the requested number of hits + result.hits().trim(0, hits); + return result; + } +} +``` + +<Card> +Paste the above into file my-app/src/main/java/ai/vespa/example/searcher/ReRankingSearcher.java +</Card> + +[services.xml](/en/reference/applications/services/services) is needed to make up a Vespa [application package](/en/reference/applications/application-packages). Here we include the custom searcher in the `default` [search chain](/en/applications/chaining): + +```xml expandable +<?xml version="1.0" encoding="utf-8" ?> +<services version="1.0" xmlns:deploy="vespa" xmlns:preprocess="properties"> + <container id="default" version="1.0"> + <document-api/> + <search> + <chain id="default" inherits="vespa"> + <searcher id="ai.vespa.example.searcher.ReRankingSearcher" bundle="ranking"/> + </chain> + </search> + <nodes> + <node hostalias="node1" /> + </nodes> + </container> + + <content id="docs" version="1.0"> + <redundancy>2</redundancy> + <documents> + <document type="doc" mode="index" /> + </documents> + <nodes> + <node hostalias="node1" distribution-key="0" /> + </nodes> + </content> +</services> +``` + +<Card> +Paste the above into file my-app/src/main/application/services.xml +</Card> + +Notice the `bundle` name of the searcher, this needs to be in synch with the `artifactId` defined in `pom.xml`: + +```xml expandable +<?xml version="1.0"?> +<project xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 + http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <groupId>ai.vespa.example</groupId> + <artifactId>ranking</artifactId> <!-- Note: When changing this, also change bundle names in services.xml --> + <version>1.0.0</version> + <packaging>container-plugin</packaging> + <parent> + <groupId>com.yahoo.vespa</groupId> + <artifactId>cloud-tenant-base</artifactId> + <version>[7,999)</version> <!-- Use the latest Vespa release on each build --> + <relativePath/> + </parent> + <properties> + <bundle-plugin.failOnWarnings>true</bundle-plugin.failOnWarnings> + <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> + <test.hide>true</test.hide> + </properties> +</project> +``` + +<Card> +Paste the above into file my-app/pom.xml +</Card> + +### Starting Vespa + +Now, we have the files and can start Vespa: + +```bash +$ docker pull vespaengine/vespa +$ docker run --detach --name vespa --hostname vespa-container \ + --publish 8080:8080 --publish 19071:19071 \ + vespaengine/vespa +``` + +Install [vespa-cli](/en/clients/vespa-cli) using Homebrew: + +```bash +$ brew install vespa-cli +``` + +Build the Maven project, this step creates the application package including the custom searcher: + +```bash +$ (cd my-app && mvn package) +``` + +Deploy the application to Vespa using vespa-cli: + +```bash +$ vespa deploy --wait 300 my-app +``` + +### Feed data + +Create a few sample docs: + +```json +{ + "put": "id:docs:doc::0", + "fields": { + "name": "A sample document", + "downloads": 100 + } +} +``` + +<Card> +Paste the above into file doc-1.json +</Card> + +```json +{ + "put": "id:docs:doc::1", + "fields": { + "name": "Another sample document", + "downloads": 10 + } +} +``` + +<Card> +Paste the above into file doc-2.json +</Card> + +Feed them using the CLI: + +```bash +$ vespa document doc-1.json && vespa document doc-2.json +``` + +### Query the data + +Run a query - this will invoke the reranking searcher since it was included in a the `default` search chain: + +```bash +$ vespa query 'yql=select * from doc where userQuery()' \ + 'query=sample' +``` + +```json expandable +{ + "root": { + "id": "toplevel", + "relevance": 1.0, + "fields": { + "totalCount": 2 + }, + "coverage": { + "coverage": 100, + "documents": 2, + "full": true, + "nodes": 1, + "results": 1, + "resultsFull": 1 + }, + "children": [ + { + "id": "id:docs:doc::0", + "relevance": 1.1823215567939547, + "source": "docs", + "fields": { + "matchfeatures": { + "attribute(downloads)": 100.0, + "bm25(name)": 0.1823215567939546 + }, + "rerank-score": 1.1823215567939547, + "sddocname": "doc", + "documentid": "id:docs:doc::0", + "name": "A sample document", + "downloads": 100 + } + }, + { + "id": "id:docs:doc::1", + "relevance": 0.2823215567939546, + "source": "docs", + "fields": { + "matchfeatures": { + "attribute(downloads)": 10.0, + "bm25(name)": 0.1823215567939546 + }, + "rerank-score": 0.2823215567939546, + "sddocname": "doc", + "documentid": "id:docs:doc::1", + "name": "Another sample document", + "downloads": 10 + } + } + ] + } +} +``` + +### Teardown + +Remove app and data: + +```bash +$ docker rm -f vespa +``` \ No newline at end of file diff --git a/mintlify-docs/en/ranking/significance.mdx b/mintlify-docs/en/ranking/significance.mdx new file mode 100644 index 0000000000..0c21b65f56 --- /dev/null +++ b/mintlify-docs/en/ranking/significance.mdx @@ -0,0 +1,164 @@ +--- +title: "Significance Model" +--- + +*Significance* is a measure of how rare a term is in a collection of documents. Rare terms like "neurotransmitter" are weighted higher during ranking than common terms like "the". Significance is often calculated as the inverse document frequency (IDF): + + +$$ +I D F \left(\right. t , N \left.\right) = l o g \left(\right. \frac{N}{n_{t}} \left.\right) +$$ + + +where: + +- $N$ is the total number of documents in the collection +- $n_{t}$ is the number of documents containing the term $t$ + +Variations of IDF are used in [bm25](/en/ranking/bm25) and [nativeRank](/en/ranking/nativerank). + +*Significance model* provides the data necessary to calculate IDF, i.e. $n_{t}$ for each term and $N$ for the document collection. We distinguish between *local and global* significance models. A local model is node-specific and a global model is shared across nodes. + +## Local significance model + +For `string` fields indexed with [bm25](/en/ranking/bm25) or [nativeRank](/en/ranking/nativerank), Vespa creates a local significance model on each content node from the documents it contains. For each query term, the document frequency is computed by aggregating the per-field document frequencies across all fields being searched. + +Different nodes can have different significance values for the same term. In large collections, this difference is usually small and doesn’t affect ranking quality. + +One issue with the local models is that ranking is non-deterministic in the following cases: + +1. When new documents are added, local models on affected content nodes are updated. +2. When the content cluster [redistributes documents](/en/content/elasticity) across nodes, e.g. adding, removing nodes for scaling and failure recovery, the models change on the nodes involved. +3. When using [grouped distribution](/en/content/elasticity#grouped-distribution), queries can return different results depending on which group processes them. + +Another issue is that local significance models are not available in [streaming search](/en/performance/streaming-search) because inverted indexes are not constructed so IDF values can't be extracted. All significance values are set to 1, which is the default value for unknown terms. The lack of significance values may degrade the ranking quality. + +A global significance model addresses these issues. + +## Global significance model + +In a *global significance model*, significance values are shared across nodes and don’t change when new documents are added. There are two ways to provide a global model: + +1. Include [significance values in a query](/en/ranking/significance#significance-values-in-a-query) +2. Set [significance values in a searcher](/en/ranking/significance#significance-values-in-a-searcher) +3. Specify [models in services.xml](/en/ranking/significance#significance-models-in-services-xml) + + +### Significance values in a query + +Document frequency and document count can be specified in YQL, e.g.: + +```js +select * from example where content contains ({documentFrequency: {frequency: 13, count: 101}}"colors") +``` + +Alternatively, significance values can be specified in YQL directly and used instead of computed IDF values, e.g.: + +```js +select * from example where content contains ({significance:0.9}"neurotransmitter") +``` + +### Significance values in a searcher + +Document frequency and significance values can be also set in a [custom searcher](../applications/searchers#writing-a-searcher): + +```js +private void setDocumentFrequency(WordItem item, long frequency, long numDocuments) { + var word = item.getWord(); + word.setDocumentFrequency(new DocumentFrequency(frequency, numDocuments)); +} + +private void setSignificance(WordItem item, float significance) { + var word = item.getWord(); + word.setSignificance(significance); +} +``` + +### Significance models in services.xml + +[`significance` element in services.xml](/en/reference/applications/services/search#significance) specifies one or more models: + +```xml +<container version="1.0"> + <search> + <significance> + <model model-id="significance-en-wikipedia-v1"/> + <model url="https://some/uri/mymodel.multilingual.json" /> + <model path="models/mymodel.no.json.zst" /> + </significance> + </search> +</container> +``` + +Vespa Cloud users have access to [pre-built models](/en/rag/model-hub), identified by `model-id`. In addition, all users can specify their own models by providing a `url` to an external resource or a `path` to a model file within the application package. Vespa provides a [command line tool](/en/reference/operations/self-managed/tools#vespa-significance) to generate [model files](#significance-model-file) from documents. The order in which the models are specified determines the model precedence, see [model resolution](#model-resolution) for details. + +In addition to adding models in [services.xml](/en/reference/applications/services/search#significance), the `significance` feature must be enabled in the [`rank-profile` section of the schema](/en/reference/schemas/schemas#significance), e.g. + +```js +schema example { + document example { + field content type string { + indexing: index | summary + index: enable-bm25 + } + } + + rank-profile default { + significance { + use-model: true + } + } +} +``` + +The model will be applied to all query terms except those that already have significance values from the query. + +Specifying significance models in services.xml is available in Vespa as of version 8.426.8. + +#### Significance model file + +The significance model file is a JSON file that contains term document frequencies and document count for one or more languages, e.g. + +```json expandable highlight= {12,21} +{ + "version": 1, + "id": "wikipedia", + "description": "Some optional description", + "languages": { + "en": { + "description": "Some optional description for English model", + "document-count": 1000, + "document-frequencies": { + "and": 500, + "car": 100, + ... + } + }, + "no": { + "description": "Some optional description for Norwegian model", + "document-count": 800, + "document-frequencies": { + "bil": 80, + "og": 400, + ... + } + } + } +} +``` + +A significance model file can be compressed with [zstandard](https://facebook.github.io/zstd/) when included in the application package or made available via a URL. + +Vespa provides a [CLI tool](/en/reference/operations/self-managed/tools#vespa-significance) for generating significance model files from Vespa documents. The tool uses the same [linguistic module](/en/linguistics/linguistics) as query processing to extract tokens and their document frequencies. The CLI can also export local significance models from content nodes to an intermediate file format and merge these intermediate files into a single file, which can then be converted into a global significance model file. + +#### Model resolution + +Model resolution selects a model from the models specified in [services.xml](#significance-models-in-servicesxml) based on the language of the query. The language can be either [explicitly tagged](/en/reference/api/query#model.language) or [implicitly detected](/en/linguistics/linguistics#query-language-detection). + +The resolution logic is as follows: + +- When language is explicitly tagged + - Select the last specified model that has the tagged language. Fail if none are available. + - If the language is tagged as “un” (unknown), select the model for “un” first, fall back to “en” (English). Fail if none are available. +- When language is implicitly detected + - Select the last specified model with the detected language. If not available, try “un” and then “en” languages. Fail if none are available. \ No newline at end of file diff --git a/mintlify-docs/en/ranking/stateless-model-evaluation.mdx b/mintlify-docs/en/ranking/stateless-model-evaluation.mdx new file mode 100644 index 0000000000..c269157bb0 --- /dev/null +++ b/mintlify-docs/en/ranking/stateless-model-evaluation.mdx @@ -0,0 +1,151 @@ +--- +title: "Stateless Model Evaluation" +--- + +Vespa's speciality is evaluating machine-learned models quickly over large numbers of data points. However, it can also be used to evaluate models once on request in stateless containers. By enabling a feature in [services.xml](/en/reference/applications/services/services), all machine-learned models - [TensorFlow](/en/ranking/tensorflow), [Onnx](/en/ranking/onnx), [XGBoost](/en/ranking/xgboost), [LightGBM](/en/ranking/lightgbm) and [Vespa stateless models](/en/reference/ranking/model-files) - added to the `models/` directory of the [application package](/en/reference/applications/application-packages), are made available through both a REST API and a Java API where you can compute inferences from your own code. + +An example application package can be found at in the [model-evaluation system test](https://github.com/vespa-engine/system-test/tree/master/tests/container/model_evaluation/app). + +### The model evaluation tag + +To enable both the REST API and the Java API, add the `model-evaluation` tag inside the [container](/en/applications/containers) clusters where it is needed in [services.xml](/en/reference/applications/services/services): + +```bash highlight= {3} +<container> + ... + <model-evaluation/> + ... +</container> +``` + +The `model-evaluation` section can optionally contain inference session options for ONNX models. See [ONNX inference options](#onnx-inference-options). + +## Model inference using the REST API + +The simplest way to evaluate the model is to use the REST API. After enabling it as above, a new API path is made available: `/model-evaluation/v1/`. To discover and find information about the models (including expected input parameters to the model) in your application package, simply follow the links from this root. To evaluate a model add `/eval` to the query path: + +```bash +http://host:port/model-evaluation/v1/<model-name>/<function>/eval?<param1=...>&... +``` + +Here `<model-name>` signifies which model to evaluate as you can deploy multiple models in your application package. The `<function>` specifies which signature and output to evaluate as a model might have multiple signatures and outputs you can evaluate. If a model only has one function, this can be omitted. Inputs to the model are specified as query parameters for GET requests, and they can also be in the body part of the request for POST requests. The expected format for input parameters are tensors as specified with the [literal form](/en/reference/ranking/tensor#tensor-literal-form). + +See the [model-inference sample app](https://github.com/vespa-engine/sample-apps/tree/master/model-inference) for an example of this. + +### Model evaluation REST API parameters + +Model evaluation requests accepts these request parameters: + +| Parameter | Type | Description | +| --- | --- | --- | +| **format.tensors** | String | Controls how tensors are rendered in the result.<br/><br/> Value: `short` <br/>Description:<br/> **Default**. Render the tensor value in a JSON object having two keys, "type" containing the value, and "cells"/"blocks"/"values" ([depending on the type](/en/reference/schemas/document-json-format#tensor)) containing the tensor content. Render the tensor content in the [type-appropriate short form](/en/reference/schemas/document-json-format#tensor). <br/><br/> Value: `long`<br/>Description:<br/> Render the tensor value in a JSON object having two keys, "type" containing the value, and "cells" containing the tensor content. <br/> Render the tensor content in the [general verbose form](/en/reference/schemas/document-json-format#tensor). <br/><br/> Value: `short-value` <br/>Description:<br/> Render the tensor content directly as a JSON value.<br/> Render the tensor content in the [type-appropriate short form](/en/reference/schemas/document-json-format#tensor). <br/><br/> Value: `long-value` <br/>Description:<br/> Render the tensor content directly as a JSON value. <br/> Render the tensor content in the [general verbose form](/en/reference/schemas/document-json-format#tensor). <br/><br/> Value: `string` <br/>Description:<br/> Render the tensor content as a string on the [appropriate literal short form](/en/reference/ranking/tensor#tensor-literal-form). <br/><br/> Value: `string-long`<br/>Description:<br/> Render the tensor content as a string on the [general literal form](/en/reference/ranking/tensor#general-literal-form). | + +## Model inference using Java + +While the REST API gives a basic interface to run model inference, the Java interface offers far more control allowing you to for instance implement custom input and output formats. + +First, add the following dependency in `pom.xml`: + +```xml +<dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>container</artifactId> + <scope>provided</scope> +</dependency> +``` + +(Or, if you want the minimal dependency, depend on `model-evaluation` instead of `container`.) + +With the dependency above and the `model-evaluation` element added to `services.xml`, you can now have your Java component that should evaluate models take a `ai.vespa.models.evaluation.ModelsEvaluator` (see [ModelsEvaluator.java](https://github.com/vespa-engine/vespa/blob/master/model-evaluation/src/main/java/ai/vespa/models/evaluation/ModelsEvaluator.java)) instance as a constructor argument (Vespa will [automatically inject it](/en/applications/dependency-injection)). + +Use the `ModelsEvaluator` API (from any thread) to make inferences. Sample code: + +```java expandable +import ai.vespa.models.evaluation.ModelsEvaluator; +import ai.vespa.models.evaluation.FunctionEvaluator; +import com.yahoo.tensor.Tensor; + +// ... + +// Create evaluator +FunctionEvaluator evaluator = modelsEvaluator.evaluatorOf("myModel", "mySignature", "myOutput"); // Unambiguous args may be skipped + +// Get model inputs for instance from query (here we just construct a sample tensor) +Tensor.Builder b = Tensor.Builder.of(new TensorType.Builder().indexed("d0", 3)); +b.cell(0.1, 0); +b.cell(0.2, 0); +b.cell(0.3, 0); +Tensor input = b.build(); + +// Bind inputs to the evaluator +evaluator.bind("myInput", input); + +// Evaluate model. Note: Evaluator must be discarded after a single use +Tensor result = evaluator.evaluate()); + +// Do something with the result +``` + +The [model-inference sample app](https://github.com/vespa-engine/sample-apps/tree/master/model-inference) also has an example of this. + +## Unit testing model evaluation in Java + +When developing your application it can be helpful to unit test your models and/or your searchers and document processors during development. Vespa provides a `ModelsEvaluatorTester` which can be constructed from the contents of your "models" directory. This allows for testing that the model works as expected in context of Vespa, and that your searcher or document processor gets the correct results from your models. + +The following dependency is needed in `pom.xml`: + +```bash +<dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>container-test</artifactId> + <scope>test</scope> +</dependency> +``` + +With this you can construct a testable `ModelsEvaluator`: + +```java +import com.yahoo.vespa.model.container.ml.ModelsEvaluatorTester; + +public class ModelsTest { + @Test + public void testModels() { + ModelsEvaluator modelsEvaluator = ModelsEvaluatorTester.create("src/main/application/models"); + + // Test the modelsEvaluator directly or construct a searcher and pass it in + + } +} +``` + +The `ModelsEvaluator` object that is returned contains all models found under the directory pass in. Note that this should only be used in unit testing. + +The [model-inference sample app](https://github.com/vespa-engine/sample-apps/tree/master/model-inference) uses this for testing handlers, searchers, and document processors. + +## ONNX inference options + +ONNX models are evaluated using [ONNX Runtime](https://onnxruntime.ai/). Vespa provides the following options to tune inference: + +```bash +<model-evaluation> + <onnx> + <models> + <model name="reranker_margin_loss_v4"> + <intraop-threads>[number]</intraop-threads> + <interop-threads>[number]</interop-threads> + <execution-mode>parallel | sequential</execution-mode> + <gpu-device>[number]</gpu-device> + </model> + </models> + </onnx> +</model-evaluation> +``` + +| Attribute | Required | Value | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| intraop-threads | optional | number | max(1, CPU count / 4) | The number of threads available for running operations with multithreaded implementations. | +| interop-threads | optional | number | `max(1, CPU count / 4)` if execution mode `parallel` | The number of threads available for running multiple operations in parallel. This is only applicable for `parallel` execution mode. | +| execution-mode | optional | string | sequential | Controls how the operators of a graph are executed, either `sequential` or `parallel`. | +| gpu-device | optional | number | | Set the GPU device number to use for computation, starting at 0, i.e. if your GPU is `/dev/nvidia0` set this to 0. This must be an Nvidia CUDA-enabled GPU. | + +Since stateless model evaluation is based on auto-discovery of models under the `models` directory in the application package, the above would only be needed for models that should not use the default settings, or should run on a GPU. \ No newline at end of file diff --git a/mintlify-docs/en/ranking/tensor-examples.mdx b/mintlify-docs/en/ranking/tensor-examples.mdx new file mode 100644 index 0000000000..aeb4551eab --- /dev/null +++ b/mintlify-docs/en/ranking/tensor-examples.mdx @@ -0,0 +1,275 @@ +--- +title: "Tensor Computation Examples" +sidebarTitle: "Tensor Examples" +--- + +Tensors can be used to express machine-learned models such as neural nets, but they can be used for much more than that. The tensor model in Vespa is powerful, since it supports sparse dimensions, dimension names and lambda computations. Whatever you want to compute, it is probably possible to express it succinctly as a tensor expression - the problem is learning how. This page collects some real-world examples of tensor usage to provide some inspiration. + +## Tensor playground + +The tensor playground is a tool to get familiar with and explore tensor algebra. It can be found at [docs.vespa.ai/playground](https://docs.vespa.ai/playground/). Below are some examples of common tensor compute operations using [tensor functions](../reference/ranking/ranking-expressions#tensor-functions). Feel free to play around with them to explore further: + +<CardGroup> + <Card title="Dense tensor dot product" icon="grid-2" href="https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QGNIAaFDSPBdDTAF30gBEBTAOwGdmxa32sAnMABMstMDn5YhAVwK0AdGABiAS37sxAdy5Dm0Fay4BDbrwHswAAwCClsEdZCrAITtGLujlx4dzcSGSoAL6BQaQY1OS4DMwkgRAU+JE0kKwM1nE0mLEIkD58-AAUAB6IAMwAugCUcIgADMRgdfIAjI11FQFZIRhhgckJ0bmx4SmUaPGYabnOmVlQOVD5AiXl1bXNbc0ATMQdXTQ9waMQA1BDhHPk42cJ9LkAKgAW3ppYpr7qYE9GAG7eLzA7CMAFsdCowRwVFhWGBWKDmOxGnx7GAQdIADa0FQ4DEqAhGbEwsAqCzMDHMSG0AC0mlJzH8kyOED6EUm2HGkBG7JukwS0ygVxSi0g1jAACowLMmaEThMshyGEQ5QlefMlgxnjpROJJDI5CSLLRAexpCCLFh-oJjVwCFgQSDiUIIbxoaxGd1Zf12RcuULVUk+VABZB-dkGKaQYVrOLnFUDr1QigKiAgkA" horizontal /> + <Card title="Sparse tensor dot product" icon="grid" href="https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QGNIAaFDSPBdDTAF30gGUcBDAJwGcBTMWrgOw5Y2YACZZaYHGyyiArgVoA6MADEAlp1rEwAdx6iu0dfx61dWMB1aczAoZzAADAIJOwLfqOcAhJyoAdfgA5CXUCMwALHgiAG1iOXkiWSWSANwEAckkAIy4BMSMTLm9xLkT+CTB0nhYwNJZYuS44SDJUAF92jtIManJcBi4SdogKfH6aSH4GFxGaTGGESD5BYQAKAA9gDoBKOGAARjgABiVDjraFrowe9smxweXh3qnKNFHMGeWfeYWoJZQVYOLY7fbAE6nc7EY5nABMl0+N06rwgDygT0If3I73RY3oywAkvwkjwALZyWK0dQ4WLhFLqLD8HRM2IAT1JYDJKQIkRMAHMwHEEh42DFGgRKSkSq0kd1UR8Fth3pAXp9xlRPmNvlBsVNAZAXGAAFRgX5y24K9HKhhEBVjXFaoEMAAq0TEVWksgUknUiVo7opVJpdIIDKZYGgWHiWH03hyHLqHDkZLAWAyIgDMSwZLJEdE6jJ9kZ-Fl13l93VmNVeodEyd0wYtcWDGTZPWLmNPh0m12VxoyIgdz6VZVdvVjv+zuWbr9YDnXAAjnJ1A1YgJJLQsGWBxWR0rq2qlZP-o3ls2xgaxfIIusAFZYEwdnTdyNbYhs3ZbY2f3Y6Nt9hanQoAAuiAHRAA" horizontal /> + <Card title="Vector-matrix product" icon="chart-bar" href="https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QGNIAaFDSPBdDTAF30gDUBTA2rAJwFoBbAQ1ocAlgA8wODlgAmAVzYA6MADEhHAM60wAd2ZgpzaEIB2uvmABurdhzAADRrbB8jUp2H6DRdgLK24kMlQAX0Cg0gxqclwGZhJAiAp8SJpIIwZGOJpMWIRIWmYjNU4AChFEAGYAXQBKOEQABnkAVmIwRpa25sqArJCMMMDkhOjc2PCUyjR4zDTc70ysqByofMKSggrK4jKq2sQG+XrWxoBGY-kAJi2kE8PW27OwW6vWxAu7sHfH96vu6b7guMIEMoCNCAtyJMQQl6LkAIIWKycXgCYRiCTSOSaIRqMC0AAWuh4MgANrQhDgSUICAIhFgjGAsNA8YTEWxOE4XO5UV5nK41DIeDxjABzRmWGwE3QELBC+l6IQ8ApqOlGfz-UJAqZZbCTSBjaaJKjTBKzKAQlLLSACnjFRhgABUYG8rRE1R6NABEAGEUNYMgRC1CShJpWDAAclh8iyBCzdJx9DYme5SeTKdTafKpFhmGojAByTQefIcRQASWxuKlCqVhVVeKwYBt4uYkvxseLrbU8g9-U1gz9eoNOpDiygZsgFvIVptxW8jrAjFd7o1-S1IN1DEDhtHizyDAAKqy+AAjLCWMA4sDMACOMiE5j4JIKmnY6t6-d9Ov9w4mSVDqQMFO2QMBwzCyAQzDFAAVlgxh2q0LpgNApTEAAntUpQLhh1StDay69sEoQoJUIBBEAA" horizontal /> + <Card title="Matrix multiplication" icon="table-layout" href="https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QGNIAaFDSPBdDTAF30gFEAPAQwFscAbAUzC2hhWYdq1oAnAJbMAtKInSwOcVgAmAVwK0AdGABik8QGdaYAO59VPaJIB2fWmawixUgjyPEwAAwCC3oVtVHwAhb21IMlQAXyjo0gxqclwGHhIoiAp8JJpIWwZfdJpMNIRIWh5bIyxxAApmRAAmAF1iAE9EAGZmgEo4REQABm0ARmJhxvHtbuJEEdHiecn57ubI4tiMeKiczJSytITcyjQMzHyykKLiqFKoCqqa2o6ZgC8m3v6hxdahhfmRr8JottC01mdNjEjhBdlB9oRruQTrDMvQygAVAAWfGUak0pgELgU7iMPn8gWC3jC5kkXC4YExrAAbnxWHSwAQsOwAEZ2HjBVSSdiVIySLBVXRYvic9jscVgQXCqpi2w+NoBdjqEwAHVsjJZYFo2LARg4fFFrz4djA3KwRsNIpqpNq0BqCpFDkdxj66xokIg20SZ2wJ0gh2DyLOmQuUERuTukF8YAAVGArhC4tDTsUQwwiFnMpGbvcGFKiVJmCJ1FxaJJuJICGIVWBJKSjeb1Ox+CzxIbjTK5arFSLm951V5xB5q7XbABzFuq4TyCv8QQWvjeZjEV4BViknjMHA8LT8uC+raZnbB+FhuOF7JRqAxyB3koMIyd2pJ1MhLxtHrnjEl5BjmN75hGD7FuUpaYq2Lb7gAjuokhMmylSmLQWBnhmWxZrCuYHK+WRUI+eQMK+mQJpOGjuLUABWWB2F+xC-tA9TtD09TJv+PTEB+7AcYBAZxCgzQgNEQA" horizontal /> + <Card title="Tensor generation, dimension renaming and concatenation" icon="shuffle" href="https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QGNIAaFDSPBdDTAF30gBUBTAOwGcsAnMAczeZcAhrQCWWVsTAATUQFs27cazBc2QuaNa8wQ1tLAEJBEerESAdGABioru1pgA7sxnNoW17SdYwtRdzsYAAUAG7MBLSBAJRgADqsAAYAgom6+mCJAEJpAK5K2rp+ATz8rIIiymDMAB44auxKEnCQZKgAvm3tpBjU5LgMzCRtEBT4fTSQrAzJwzSYQwiQ-hzcwaKIAMwAutHr0a3znRjdbROjA0tDPZOUaCOY00tZc-NQi1ArnFzriABMu3WAGpNgcHscOjcIOcoJdCK9yHcYaN6EsAOquEwqIysEwrUx+AAWzDkfiw-FoxJ4EiJmKwcjkNNkCg4yha4K6UPu82wd0g1weYyoD1GTygCMmH0gOLxwWSxCyxFEYKOnLOgrh0oloyRIs+DAASupNIVKa5mYoqolRGktGAsmTMgArRLs1UnLkw3mDbVQXVvKBiyAAYV9oylalYGmYwUVomITpVNAhEFOvQ1fKIXJ14z1ywYGMMekMxlMUf8mVS6QMiWDaRpzFEZp4FtZNOt9Z4iRdbuTavTPM1Ap5-reUwYYfeDBlIjlxGDSqTJ37qC9Q99Qu5Y6Dk-509LtDnC8Thz7y4g2xA7SAA" horizontal /> + <Card title="Jaccard similarity between mapped (sparse) tensors" icon="code-branch" href="https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEMAXZgJwEsAjAV2YIAUAYxYEA5lk4EAzgEoSZGpCIJI-WtMkAeDrWYAOAHzCWwAL6y4wADqRpOSc2m2EARmJhbtAgHdn8d08YXUZaIQIXVzNIRQgzRTNSDGpyXAYiJKVKNFi6BgBHHgI2AE8TfgkpOQUaTBUodU02HT0jcvNLGzsHNicXMHdbaBCwiIDo3PiMRMUUzDTVDNyKfDnyelUAK0YhETYAEwB9LG9DgBtdJdqIZQZpHgBbAULispEKyQ4ZWTAAKjAWOxuHxBO9xJ9vrIAPRgARge5PB7FMSCF6lcrgqqyDyAzi8fgYypfOQeaACAAexBKsgErlk9LA8kmCRQAF0QGYgA" horizontal /> + <Card title="Neural network" icon="microchip" href="https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QGNIAaFDSPBdDTAF30gDkBTAVwCcBDAGzADtmtAO5Z2AazDMAHpwC2ObszAAKAMxSArFICMASgB0YAGIBLdgGdaYIUoAmzaCYFhaACyUDhoiU5ytacJBkqAC+wSGkGNTkuAzMJMEQFPjRNJB8DL7+CTSY8QiQtMx85qLKWbSIqgC6unCIAAz62sRgTS1tzdVBuWEYEcGpSbEFRJFplGiJdAwAKu581nYOTkpuSq4mtvaL3JwAnszsSyYA5q605mCcfLZgAEYmnOaB032h4xBDUCNQ8Z+YSbfJIZApCTbbYo5XJ-BhFEplCpVarECE7RAaWr1AA6iyQAFomgB2bQADgAnAAmYjafTk8naSmk4hNBoAFjZZOIhOapMpbNULP0DX59JRuIgjX0ADZSUTSRpubTpfyNMympTVNL6ULpRoiUS2UKNOS9aTxXjEPjaRoOdKjRryQ12ULtNLpdpFbTDdpVOr9KptALMbjum9wgDvthJpB-tNklRpiCGPc0VCAeR8lB4aV2Mo03wMVipUyRUShSL9YrWc6PULKR7tC0eW7pQ1yWHehHBvHfpAxvGgUns3MFkswPZHM51mApzwwHtDscbGcLlcbndHs9Xl3+pHezG47kE1MYVBQVAhHPuNDj1nCsVc-mtujMcRr4htMWJQTpfo+RospKjaGiUvK3KsoG5Llqo+gaPqDYQQG0pEqoRLVKGPQ0O8EADFEB5xLegIpMO6QpteRGZnCj5lB+X51FKDR6kSbadth3b4cefYDseQ5noUDCmBYViLkc1wELQJgAG6cJJWB8K0rDmE4pzXGAAAG7DMNwrDqeJkkyXJizQKwfASSY8n6Fh-QcagUZ9keEwkfxF6QJReQMOYrCyOUfB+FYABU1gFq0FS6GAADUDwFtZoS2V8BEFI55B8TCZEFKJ7DaO5ST3lpOnKF5PkVGAQXgi+xShX5-jhVFqYVXwuixbh8Wnmk3HuSewIjgUACiACOrDSTwxS0Nw+yKcpfCqTO6lSEIAD6ChKQt9x6SZfBGTu7F7j2XGHp1qXHq5PBFOwfCydJzALZl2UZh5BT5awyjzUtOnmKtvn+a05WQgp0UNVV-m6E14a7Zx7UxjxTmJvx9AFKYF28Jl+nDUZk0qWpymnLIWBbKjhkWcZplbc1YA4eT+77YR91daRrk5bCBRFcot2ldY16tAWtUPBRYMfHtkM04Ozlpa5mWUozsaeWcuNbIV3mswcRzaOzV5ODwXMNTz9zXqDu4CxDMRQ4dovHvDUCI-OXB8BI5gEKIzDbTZ4N2YlsK00daQM-duWeYrEv6ztcUQNUIAhEAA" horizontal /> +</CardGroup> + +## Values that depend on the current time + +In an ecommerce application you may have promotions that sets a different product price in given time intervals. Since the price is used for ranking, the correct price must be computed in ranking. Can tensors be used to specify prices in arbitrary time intervals in documents and pick the right price during ranking? + +To do this, add three tensors to the document type as follows: + +```js +field startTime type tensor(id{}} { + indexing: attribute +} +field endTime type tensor(id{}} { + indexing: attribute +} +field price type tensor(id{}} { + indexing: attribute +} +``` + +Here the id is an arbitrary label for the promotion which must be unique within the document, and startTime and endTime are epoch timestamps. + +Now documents can include promotions as follows ([document JSON syntax](/en/reference/schemas/document-json-format)): + +```json +"startTime": { "cells": { "promo1": 40, "promo2": 60, "promo3": 80 } +"endTime": { "cells": { "promo1": 50, "promo2": 70, "promo3": 90 } +"price": { "cells": { "promo1": 16, "promo2": 18, "promo3": 10 } +``` + +And we can retrieve the currently valid price by the expression + +```js +reduce((attribute(startTime) < now) * (attribute(endTime) > now) * attribute(price), max) +``` + +This will return 0 if there is no matching interval, so a full expression will probably wrap this in a function and check if it returns 0 (using an if expression) and return the default price of that product otherwise. + +To see why this retrieves the right price, notice that `(attribute(startTime) < now)`[is a shorthand for](../reference/ranking/ranking-expressions#non-primitive-functions) + +```js +join(attribute(startTime), now, f(x,y)(x < y)) +``` + +That is joining all the cells of the `startTime` tensor by the zero-dimensional `now` tensor (i.e a number), and setting the cell value in the joined tensor to 1 if now is larger than the cell timestamp and 0 otherwise. When this tensor is joined by multiplication with one that has 1's only where now is smaller, the result is a tensor with 1's for promotion ids whose interval is currently valid and 0 otherwise. Then we can just join by multiplication with the price tensor to get the final tensor (on which we just pick the max value to retrieve the non-zero value. + +<CardGroup cols={1}> + <Card title="Play around with this example in the playground" icon="play-circle" href="https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gGcAXAQwCdmAVASwFsipGpiIJIzArUZZ2ACh4ATYAF8AlHGA52WPlgCMCACwAGYmC06sAJgQA2U+e26AzAgAcx5ZDKplP5UKo1OS4DII+EBT4wTSQ9GKSCrwCJBEiDBJSMvJKahoWugZgAKwOBdYIAOxlTliuYACcnt7CfhgBPjGRoQmpwlFUaZHxUFo8AMbhwpGiUJnScooq6pq1RXq2ZuU2YHpuW7X1es1pbb6BEF1QPYR9sZRoQ1AjcVgA7nf9s5C2xS00ZwgHQwV2wD0gU3u0SecQYn1i3xYHG4-AIYAAPGBaO9-u1-BdHv0bhD4ZEHldhnCCeRvolkmiAHxYnGnfGdNJgsKk0bQ6bPKkw76yJGcekY5lvVRgABUYFkdNRYCZ2MluN8bJBHOJkPI5JhL25MwYwrYosVmJVUtl8toSUVyveVscEyIrPaBNB2u5A0J01hYkNtzE7AICgArpNZCbkWKLY6ZXKFQIlRKnWNJmY+KwAB6qNVA-woAC6IGUQA" horizontal /> +</CardGroup> + +## Adding scalars to a tensor + +A common situation is that you have dense embedding vectors to which you want to add some scalar attributes (or function return values) as input to a machine-learned model. This can be done by the following expression (assuming the dense vector dimension is named "x": + +```js +concat(concat(query(embedding),attribute(embedding),x), tensor(x[2]):[bm25(title),attribute(popularity)], x) +``` + +This creates a tensor from a set of scalar expressions, and concatenates it to the query and document embedding vectors. + +<CardGroup cols={1}> + <Card title="Play around with this example in the playground" icon="play-circle" href="https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEMAXZgJwEsAjAV2YIAUBALZcCAE3EdaAcwCUJMjUhEEkfrQDOWNgIAeiAEwBdOXEQBGYichKIAXyX3SGauVwMiL5ZTR26DACOPARsAJ5CohJSsgreNIQMGtq6BiZmiADMAHQADMRgACx5xrYJjhjOSm6YHmpe-hT4NeT0alzChgCsAswczAA2BHH+ECoMhsWZZTQVqFWujXWJ8Zi+LQFqLOzcfII4uDwDjJzMYSMJY6pQXTOVTqsb2L7jq2Pro1BtUIqXiWoAYywtABLAEQJBYOCoQiIjEkmk8mI204vH4kXhMSRejkBWSOn0RlM5g63V6-SGuJRu3RBxwRxO-XOxgKOLu8ycKGMIHsQA" horizontal /> +</CardGroup> + +## Dot Product between query and document vectors + +Assume we have a set of documents where each document contains a vector of size 4. We want to calculate the dot product between the document vectors and a vector passed down with the query and rank the results according to the dot product score. + +The following schema file defines an attribute tensor field with a tensor type that has one indexed dimension `x` of size 4. In addition, we define a rank profile with the input and the dot product calculation: + +```js +schema example { + document example { + field document_vector type tensor<float>(x[4]) { + indexing: attribute | summary + } + } + rank-profile dot_product { + inputs { + query(query_vector) tensor<float>(x[4]) + } + first-phase { + expression: sum(query(query_vector)*attribute(document_vector)) + } + } +} +``` + +Example [JSON](/en/reference/schemas/document-json-format#tensor) document with the vector [1.0, 2.0, 3.0, 5.0], using indexed tensors short form: + +```json +[ + { + "put": "id:example:example::0", + "fields": { + "document_vector" : [1.0, 2.0, 3.0, 5.0] + } + } +] +``` + +Example query set in a searcher with the vector [1.0, 2.0, 3.0, 5.0]: + +```java +public Result search(Query query, Execution execution) { + query.getRanking().getFeatures().put("query(query_vector)", + Tensor.Builder.of(TensorType.fromSpec("tensor<float>(x[4])")). + cell().label("x", 0).value(1.0). + cell().label("x", 1).value(2.0). + cell().label("x", 2).value(3.0). + cell().label("x", 3).value(5.0).build()); + return execution.search(query); +} +``` + +<CardGroup cols={1}> + <Card title="Play around with this example in the playground" icon="play-circle" href="https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEcBXAgJwE8AKF9jgfQBuBAMYAXLGwCUJMjUhEEkMQVoBnSVwAeiACwBdKXEQBGYgCZiAZmIBWfZDkQAvnOekM1crgZEP8yjQnOgYAQzExNgBLACNmFS4AEywRZgBbVTEhUQlpWRpMRSgVdU0dAyNTC2s7B2DXDHc5L0wfJT9ginwW8nolfILCBjV0nlZOMb5s8UkpMAAqMHDI2PiCJJT0zOncqRl6txR9EGcgA" horizontal /> +</CardGroup> + +Note that this example calculates the dot product for every document retrieved by the query. Consider using [approximate nearest neighbor](/en/querying/approximate-nn-hnsw) search with `distance-metric` [dotproduct](/en/reference/schemas/schemas#distance-metric). + +## Logistic regression models with cross features + +One simple way to use machine-learning is to generate cross features from a set of base features and then do a logistic regression on these. How can this be expressed as Vespa tensors? + +Assume we have three base features: + +```js +query(interests): tensor(interest{}) - A sparse, weighted set of the interests of a user. +query(location): tensor(location{}) - A sparse set of the location(s) of the user. +attribute(topics): tensor(topic{}) - A sparse, weighted set of the topics of a given document. +``` + +From these we have generated all 3d combinations of these features and trained a logistic regression model, leading to a weight for each possible combination: + +```js +tensor(interest{}, location{}, topic{}) +``` + +This weight tensor can be added as a [constant tensor](/en/reference/schemas/schemas#constant) to the application package, say `constant(model)`. With that we can compute the model in a rank profile by the expression + +```js +sum(query(interests) * query(location) * attribute(topics) * constant(model)) +``` + +Where the first three factors generates the 3d cross feature tensor and the last combines them with the learned weights. + +<CardGroup cols={1}> + <Card title="Play around with this example in the playground" icon="play-circle" href="https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEcBXAgJwE8AKAS1oBd2BAM79hAShJkakIgkiDawrG14Cho4AF9xcYAGMAhm2FwADADoAHMTABbZsJ77zFgKxbI0iFulbSGNTkuAxEATKUaN50DCzs3AA2WEb8PFi0kuE0hAyKyqpJKWm02rrAtAQA7gD6HCoA1nAAjJ7Rvhj+0kGYIfJh0RT43eT08ob8-Gw8AEbMglz8uM4SUtkQsrkESioLS-qlek0ATADMzRZmrdntqJ2BA705WZiRwzHy+umihgJcdlgAEwICUy0XWcigeR2fEEbBE-G0xEK42KiMWOGcBzQAB1aBBgDCNPw4EYTEjkij0nAKjU6mx6sR0c5mqctK4msRcfjCXDRCTjMJyUUqTTag1GXs4CcACweVxHTl4tA8+H8snI1JU4Q-arQNg-fQ8YSfCUYlzHE5sywnRXc9S84mkwUa4pwbW0XX62iG41YU3MmVyyzS23K+2qhxOfRCym0alVMX0-3m1muNyhgnhvmR5wxzVx0V0hlMlyBq0WABsGZV2ccuZdWp1eoNRpNJZZltcVa5YdhEbr0YbcfdnpbvuTUtl5YA7NWs8TplgsPVnRT8-HaeL2xbyzYe5m+3zF8vV8KCwmixOy64AJxzw8LpcrvOukfN72tv3b1NNC73olwMez5Dm6TZej6baStev5NLiVw0DcPjPG82CRBszzrK8YJQKMUCrGsGzyMIzB2FwcScGoD4SGAABUYDkYka7FOItFgOMkwzHMBC7Ga1F0Z8Sj8D8-B-ICwLiJIbR+CgAC6IBaEAA" horizontal /> +</CardGroup> + +## Matrix Product between 1d vector and 2d matrix + +Assume we have a 3x2 matrix represented in an attribute tensor field `document_matrix` with a tensor type `tensor<float>(x[3],y[2])` with content: + +```json +{ {x:0,y:0}:1.0, {x:1,y:0}:3.0, {x:2,y:0}:5.0, {x:0,y:1}:7.0, {x:1,y:1}:11.0, {x:2,y:1}:13.0 } +``` + +Also assume we have 1x3 vector passed down with the query as a tensor with type `tensor<float>(x[3])` with content: + +```json +{ {x:0}:1.0, {x:1}:3.0, {x:2}:5.0 } +``` + +that is set as `query(query_vector)` in a searcher as specified in [query feature](/en/ranking/ranking-expressions-features#using-query-variables). + +To calculate the matrix product between the 1x3 vector and 3x2 matrix (to get a 1x2 vector) use the following ranking expression: + +```bash +sum(query(query_vector) * attribute(document_matrix),x) +``` + +This is a sparse tensor product over the shared dimension `x`, followed by a sum over the same dimension. + +<CardGroup cols={1}> + <Card title="Play around with this example in the playground" icon="play-circle" href="https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEMAXZgJwEsAjAV2YIAUAEywBjHgFsCtZgH0JLTgA8AlCTI1IRBJH60AzljYAeaABssLAHwCliAMwBdYgE9EAJkcq4iRAEZiAHZnB2I-PxCAVjCnR0gNCABfDUTSDGpyXAYiNM1KNAS6BgBHHgI2FwFS8pdZADcCUWYjNVyaQgY9QxNzS2YbOydvf2J7Yki4wuSMVI0MzCydHMKKfHnyeh11dogtBn1JKrKKo5r6xua2FTAAKjAWdm4+QRFxKRl5RQ5VYlV49umqEBjhAiSAA" horizontal /> +</CardGroup> + +## Using a tensor as a lookup structure + +Tensors with mapped dimensions look similar to maps, but are more general. What if all needed is a simple map lookup? See [tensor performance](/en/performance/feature-tuning#mapped-lookups) for more details. + +Assume a tensor attribute `my_map` and this is the value for a specific document: + +```json +tensor<float>(x{},y[3]):{a:[1,2,3],b:[4,5,6],c:[7,8,9]} +``` + +To create a query to select which of the 3 named vectors (a,b,c) to use for some other calculation, wrap the wanted label to look up inside a tensor. Assume a query tensor `my_key` with type/value: + +```json +tensor<float>(x{}):{b:1.0} +``` + +Do the lookup, returning a tensor of type `tensor<float>(y[3])`: + +```js +sum(query(my_key)*attribute(my_map),x) +``` + +If the key does not match anything, the result will be empty: `tensor<float>(y[3]):[0,0,0]`. For something else, add a check up-front to check if the lookup will be successful and run a fallback expression if it is not, like: + +```js +if(reduce(query(my_key)*attribute(my_map),count) == 3, + reduce(query(my_key)*attribute(my_map),sum,x), + tensor<float>(y[3]):[0.5,0.5,0.5]) +``` + +<Note> + **Note:** + +The above can be considered the same as creating a [slice](../reference/ranking/ranking-expressions#slice), like `(y*x){x:b}`. The above syntax allows an optimized execution, find an example in the [Tensor Playground](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gFssATAgGwGcSybIiEkAC4FanLACcAFIwD6ASxbAAvsQAeiAMwBdAJRxgjAIxxEAFgB0ABmJgArNdsA2a9tuMATKYDsjsAA4-AE5XZUheCGVeVV5qclwGIlIaKEo0CLoGZjZ2BRYeFIh+BhExSRk8lX1DLyNrMIyojBiMOMwEwSSMinw28npBRgBDHBwCFll2LCwAawBXPGTC4sFOOcYZVg5OACpsjjziOUVdcJSm1BbUPqgOwgK+NJuigahOdnkAYy7C+8FNnK7fa5E6GPJwTwNc7RFDaEDKIA). +</Note> + +## Slicing with lambda + +A common use case is to use a tensor lambda function to slice out the first `k` dimensions of a vector representation of `m` dimensions where `m` is larger than `k`. Slicing with lambda functions is great for representing vectors from [Matryoshka Representation Learning](https://arxiv.org/abs/2205.13147). + +> Matryoshka Representation Learning (MRL) which encodes information at different granularities and allows a single embedding to adapt to the computational constraints of downstream tasks. + +The following slices the first 256 dimensions of a tensor `t`: + +```js +tensor<float>(x[256])(t{x:(x)}) +``` + +Importantly, this does only reference into the original tensor, avoiding copying the tensor to a smaller tensor. + +The following is a complete example where we have stored an original vector representation with 3072 dimensions, And we slice the first 256 dimensions of the original representation to perform a dot product in the first-phase expression, followed by a full computation over all dimensions in the second-phase expression. See [phased ranking](/en/ranking/phased-ranking) for context on using Vespa phased computations and [customizing reusable frozen embeddings with Vespa](https://blog.vespa.ai/tailoring-frozen-embeddings-with-vespa/). + +```js +schema example { + document example { + field document_vector type tensor<float>(x[3072]) { + indexing: attribute | summary + } + } + rank-profile small-256-first-phase { + inputs { + query(query_vector) tensor<float>(x[3072]) + } + function slice_first_dims(t) { + expression: l2_normalize(tensor<float>(x[256])(t{x:(x)}), x) + } + first-phase { + expression: sum( slice_first_dims(query(query_vector)) * slice_first_dims(attribute(document_vector)) ) + } + second-phase { + expression: sum( query(query_vector) * attribute(document_vector) ) + } + } +} +``` + +See also a runnable example in this [tensor playground example](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gFsBPAfQAsBLAc3dYBNOjVgDcCAYwAuWAE4kyNSEQSRJBWgGdZAHmgAbLAENJAPgAUAD0QBGAEwBdAJRwbxW8QDMxACzEArMQAbMQA7MQAHMQAnMTWAAyx1rEOkAoQAL4K6aQY1OS4DEQ5ipRoaXQM0JwyGpKsgQJCGqxY0KwsHDx8gsJiUrLyNJjKUGqaspaIgU5mHVy8jb0S0jLAFnCWjumOqUOZGNkKeZgFKkXlFPjH5PQqtLKMhnqcAF7nQxBKDHq2rPcyj2ebzMVRqdQaPWarXabHm3SEomWsmIYAsO3K+1QmPsIHSQA). diff --git a/mintlify-docs/en/ranking/tensor-user-guide.mdx b/mintlify-docs/en/ranking/tensor-user-guide.mdx new file mode 100644 index 0000000000..3c7d40590c --- /dev/null +++ b/mintlify-docs/en/ranking/tensor-user-guide.mdx @@ -0,0 +1,340 @@ +--- +title: "Tensor Guide" +sidebarTitle: "Tensor user guide" +--- + +Vespa provides a _tensor_ data model and computation engine to support advanced computations over data. This guide explains the tensor support in Vespa. See also the [tensor reference](/en/reference/ranking/tensor), and our [published paper](https://dl.acm.org/doi/10.1145/3459104.3459152) ([pdf](/assets/a_tensor_formalism_for_computer_science.pdf)). + +## Tensor concepts + +A tensor in Vespa is a data structure which generalizes scalars, vectors and matrices to any number of dimensions: + +- A scalar is a tensor of rank 0 +- A vector is a tensor of rank 1 +- A matrix is a tensor of rank 2 +- ... + +Tensors consist of a set of scalar valued _cells_, with each cell having a unique _address_. A cell's address is specified by its index or label in all the dimensions of that tensor. The number of dimensions in a tensor is the _rank_ of the tensor. Each dimension can be either _mapped_ or _indexed_. Mapped dimensions are sparse and allow any label (string identifier) designating their address, while indexed dimensions use dense numeric indices starting at 0. + +Example: Using [literal form](/en/reference/ranking/tensor#tensor-literal-form), the tensor: + +```js +{ + {user:bob, movie:"Heat"}:0.1, + {user:alice, movie:"Frozen"}:0.9, + {user:carol, movie:"Top Gun"}:0.3, +} +``` + +has two dimensions named `user` and `movie`, and has three cells with defined values: + +<Frame> + ![Tensor graphical representation](/assets/img/tensor-mapped.png) +</Frame> + +A tensor has a _type_, which consists of a set of dimension names, dimension types, and a [tensor cell value type](/en/reference/ranking/tensor#tensor-type-spec). The dimension name can be anything. This defines a 2-dimensional mapped tensor (sparse 2D matrix) of floats as illustrated above: + +```bash +tensor<float>(user{},movie{}) +``` +This is a 2-dimensional indexed tensor (a 2D 1280x720 matrix). For example, used to represent an image: +```bash +tensor<int8>(x[1280],y[720]) +``` +This is a 3-dimensional indexed tensor. For example, used to represent spatial data: +```bash +tensor<float>(x[256], y[256], z[128]) +``` +This is a _mixed_ tensor combining a _mapped_ dimension and an _indexed_ dimension. For example, used to represent word2vec: +```bash +tensor<bfloat16>(word_id{},vec[300]) +``` +Another mixed tensor used to represent paragraph [embeddings](/en/rag/embedding) used to power [multi-vector indexing](https://blog.vespa.ai/semantic-search-with-multi-vector-indexing/). +```bash +tensor<float>(paragraph{},embedding[768]) +``` + +Vespa uses the tensor type information to optimize tensor expression execution plans at configuration time. + +## Tensor document fields + +Document fields in [schemas](/en/basics/schemas) can be of any tensor type: + +```js expandable +schema product { + + document product { + + field title type string { + indexing: summary | index + } + + field price type int { + indexing: summary | attribute + } + + field popularity type float { + indexing: summary | attribute + } + + field sales_score type tensor<float>(category{}) { + indexing: summary | attribute + } + + field embedding type tensor<float>(x[4]) { + indexing: summary | attribute | index + attribute { + distance-metric: dotproduct + } + } + } +} +``` + +The above schema exemplifies a _product_ with two tensor fields. The `sales_score` tensor field represents how popular a product is per unique _category_. This information could be used when [ranking](/en/basics/ranking) products for a user query. The `embedding` tensor field represents an embedding vector representation of the product. + +- **sales\_score** is a _mapped_ tensor with a single mapped dimension `category`. Mapped dimensions are sparse and allow any label (string identifier) designating their address. +- **embedding** is an _indexed_ tensor. Indexed dimensions use dense numeric indices starting at 0. + +To perform computations over a document tensor field in [ranking](/en/ranking/ranking-intro), the field must be defined with [attribute](/en/content/attributes). + +Tensors with the following types can be indexed with [HNSW](/en/querying/approximate-nn-hnsw) and searched efficiently using the [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) query operator: + +- One indexed dimension - single vector per tensor field +- One mapped and one indexed dimension - multiple vectors per tensor field + +See [nearest neighbor search](/en/querying/nearest-neighbor-search) and [approximate nearest neighbor search](/en/querying/approximate-nn-hnsw). + +## Feeding tensors + +An example _product_ document in Vespa JSON format. This example uses the product category string as the mapped label key. The _embedding_ tensor stores and indexes ([HNSW](/en/querying/approximate-nn-hnsw)) a dense [embedding](/en/rag/embedding). + +```json +{ + "put": "id:shopping:product::B0BFW5SXX2", + "fields": { + "title": "Keyboard Case for iPad Pro 12.9 inch", + "price": 29, + "popularity": 0.8, + "sales_score": { + "Tablet Keyboard Cases": 8.0, + "Keyboards": 2.0, + "Personal Computers": 0.1 + }, + "embedding": [ + 1, + 2, + 3, + 4 + ] + } +} +``` + +The JSON feed format example above uses short value form. Tensor fields can be represented using different [JSON format](/en/reference/schemas/document-json-format#tensor) verbosity. + +You can use [partial updates](/en/writing/partial-updates) of tensor fields with [add](/en/reference/schemas/document-json-format#tensor-add), [remove](/en/reference/schemas/document-json-format#tensor-remove) and [modify](/en/reference/schemas/document-json-format#tensor-modify) tensor cells, or [assign](/en/reference/schemas/document-json-format#tensor-field) a completely new tensor value. From [container components](/en/applications/components) you can create and modify tensor values using the [tensor Java API](https://javadoc.io/doc/com.yahoo.vespa/vespajlib/latest/com/yahoo/tensor/Tensor.html). + +You would typically re-calculate the per category sales scores outside Vespa and update them continuously using [partial updates](/en/writing/partial-updates) to avoid re-feeding or re-indexing of other fields. + +## Querying with tensors + +Query input tensors **must** be defined in the schema [rank-profile](/en/ranking/ranking-intro) using [inputs](/en/reference/schemas/schemas#inputs): + +```js +rank-profile product_ranking inherits default { + inputs { + query(q_category) tensor<float>(category{}) + query(q_embedding) tensor<float>(x[4]) + } + ..... +} +``` + +The above defines two query input tensors that we can reference in [ranking](/en/ranking/ranking-intro) expressions. With the tensor query name and tensor type defined, you can: + +- Add it to the query in a [Searcher](/en/applications/searchers) using the [Tensor class](https://javadoc.io/doc/com.yahoo.vespa/vespajlib/latest/com/yahoo/tensor/Tensor.html) and setting it by `Query.getRanking().getFeatures.put("query(q_embedding)", myTensorInstance)`, or +- Pass it in the request, using an HTTP parameter like `input.query(q_embedding)` and passing a tensor [value](/en/reference/ranking/tensor#tensor-literal-form). + +An example query request using [Vespa CLI query request](/en/clients/vespa-cli#queries): + +```bash +vespa query 'yql=select * from product where {targetHits:1}nearestNeighbor(embedding,q_embedding)' \ + 'input.query(q_embedding)=[1,2,3,4]' \ + 'input.query(q_category)={"Tablet Keyboard Cases":0.8, "Keyboards":0.3}' \ + 'ranking=product_ranking' +``` + +This query request example assumes that the user query has been mapped (classified) to be related to the _Tablet Keyboard Cases_ and _Keyboards_ categories. Similarly, the user query has been mapped to a dense vector representation (`query(q_embedding`) and is used as input to the [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) query operator, expressed with the [YQL query language](/en/querying/query-language). + +The Vespa CLI uses HTTP GET and you can use the _-v_ flag to see the curl GET equivalent. For [POST](/en/querying/query-api#using-post) query requests using JSON, the equivalent JSON is: + +```json +{ + "yql": "select * from product where {targetHits:1}nearestNeighbor(embedding,q_embedding)", + "input": { + "query(q_embedding)": [ + 1, + 2, + 3, + 4 + ], + "query(q_category)": { + "Tablet Keyboard Cases": 0.8, + "Keyboards":0.3 + } + }, + "ranking": "product_ranking" +} +``` + +If the input query tensor used for the [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) operator is not defined in the schema rank-profile, the request will fail: + +```json +"Expected 'query(q_embedding)' to be a tensor, but it is the string '[1,2,3,4]'" +``` + +## Ranking with tensors + +Tensors can be used in making inference computations over documents that are matched by a query. These computations are expressed with [ranking expressions](/en/reference/ranking/ranking-expressions) in schema [rank profiles](/en/reference/schemas/schemas#rank-profile). We can use this support to rank products by both the dense embedding dot product similarity and the category sales score. + +```js +rank-profile product_ranking inherits default { + + inputs { + query(q_category) tensor<float>(category{}) + query(q_embedding) tensor<float>(x[4]) + } + + function p_sales_score() { + expression: sum(query(q_category) * attribute(sales_score)) + } + + function p_embedding_score() { + expression: closeness(field, embedding) + } + + first-phase { + expression: p_sales_score() + p_embedding_score() + } + match-features: p_sales_score() p_embedding_score() +} +``` + +The above profile uses a combination of two dot product calculations in the [first phase](/en/ranking/phased-ranking) expression. The `first-phase` expression is invoked for all documents that are **retrieved** by the [YQL query language](/en/querying/query-language). + +- The _p\_sales\_score_ function calculates the sparse tensor dotproduct between the _query(q\_category)_ and _attribute(sales\_score)_ tensor. +- The _p\_embedding\_score_ calculates the dense tensor dotproduct between the _query(q\_embedding)_ and _attribute(embedding)_ tensors. The function uses the [closeness(dimension,name)](/en/reference/ranking/rank-features#closeness(dimension,name)) [rank-feature](/en/reference/ranking/rank-features) which is calculated by the [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) query operator. Alternatively, if we don't use the _nearestNeighbor_ operator in the query request, we could use sparse tensor dotproduct: +```js +function p_embedding_score() { + expression: sum(query(q_embedding) * attribute(embedding)) +} +``` + +The full list of tensor functions are listed in the [ranking expression reference](/en/reference/ranking/ranking-expressions#tensor-functions). Using [match-features](/en/reference/schemas/schemas#match-features), developers can debug, or log function outputs in the search result. + +```json +"matchfeatures": { + "p_embedding_score": 30.0, + "p_sales_score": 8.0, +}, +"documentid": "id:shopping:product::B0BFW5SXX2", +"title": "Keyboard Case for iPad Pro 12.9 inch" +``` + +## Creating tensors from document fields + +If you need to make tensor computations from non-tensor single-valued attributes, arrays, weighted sets, or array\<struct\> fields, you can convert them in a ranking expression: + +- Creating an _indexed_ tensor where the _values_ are lifted from single-value attributes (price and popularity) using the tensor generate function: +```js +function to_indexed_tensor() { + expression: tensor(x[2]):[attribute(price),attribute(popularity)] +} +``` +- Creating a _mapped_ tensor where the _values_ are lifted from single-value attributes using the tensor generate function: +```js +function to_mapped_tensor() { + expression: tensor(x{}):{key1:attribute(price),key2:attribute(popularity)} +} +``` +- Creating a _mapped_ tensor where the _label(s)_ are lifted from a string array or single-value attribute can be done with the [document feature](/en/reference/ranking/rank-features#document-features) `tensorFromLabels`. +- Creating a _mapped_ tensor where the labels _and_ values are lifted from a weighted set can be done with the [document feature](/en/reference/ranking/rank-features#document-features) `tensorFromWeightedSet`. +- Creating a _mapped_ tensor where labels and values are lifted from an `array<struct>` attribute can be done with the [document feature](/en/reference/ranking/rank-features#document-features) `tensorFromStructs`. + +Converting non-tensor fields to tensors at query runtime has a performance penalty that is linear with the number of elements in the array, weighted set, or struct array. Prefer using native tensor fields instead. The benefit of converting non-tensor fields is that non-tensor fields like `int`, `float`, `weightedset`, or `array<struct>` can be efficiently queried. Only specific tensor types can be searched efficiently using the [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) query operator. + +## Constant tensors + +In addition to document tensors and query tensors, [constant tensors](/en/reference/schemas/schemas#constant) can be put in the [application package](/en/reference/applications/application-packages). This is useful for adding machine learned models. Example: + +```js +constants { + my_tensor_constant tensor<float>(x[4]): file: constants/constant_tensor_file.json +} +``` + +This defines a new tensor with the type as defined and the contents distributed with the application package in the file _constants/constant\_tensor\_file.json_. The format of this file is the [constant tensor JSON format](/en/reference/ranking/constant-tensor-json-format): + +```json +{ + "type": "tensor<float>(x[4])", + "values": [ + 0, + 0, + 0, + 1.0 + ] +} +``` + +To use this constant tensor in a ranking expression, encapsulate the constant name with `constant(...)`: + +```js +rank-profile use_constant_tensor inherits product_ranking { + constants { + my_tensor_constant tensor<float>(x[4]): file: constants/constant_tensor_file.json + } + first-phase { + expression: sum(query(q_embedding) * attribute(embedding) * constant(my_tensor_constant)) + } +} +``` + +Note that the rank profile `inherit` the inputs we defined in the `product_ranking` profile. With the example data used, the first-phase expression returns the 16.0 since: + +```json +"embedding": [ + 1.0, + 2.0, + 3.0, + 4.0 + ], + "query(q_embedding)": [ + 1.0, + 2.0, + 3.0, + 4.0 + ], + "constant(my_tensor_constant)": [ + 0.0, + 0.0, + 0.0, + 1.0 + ] +``` + +## Tensors with strings + +Tensors in Vespa cannot have strings as values, since the mathematical tensor functions would be undefined for such "tensors". However, you can still represent sets of strings in tensors by using the strings as keys in a mapped tensor dimensions, using e.g 1.0 as values. This allows you to perform set operations on strings and similar without making those tensors incompatible with other tensors and with normal tensor operations. + +## Further reading + +See also: + +- [Blog post: Computing with tensors in Vespa](https://blog.vespa.ai/computing-with-tensors/). +- [Using ONNX models](/en/ranking/onnx) to use machine-learned models taking tensor input in Vespa. +- Some [practical tensor computation examples](/en/ranking/tensor-examples). +- The [tensor playground](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QGNIAaFDSPBdDTAF30gAkBTAJ2bAE8sBXMAgIYA7MDgA2AzmAGteQgCbSFYZgA8cbAJYBbZkNpgA7ptoALMLT0BnLKytLFloTdYr17K1c1ZnAOjAs7GCa9gJgVqa2BgA6QmoC2uLMxFy8-MJg0JrK2rYc2dC22gK03iIlYLGIZhxOLmA8VmxgAOY8mvLMALoAFKa0tDhWcAD0I-JYBFa+AG7MVjgCvgKaI3ojdbYAtI1sW20dzL792mIAlJBkqAC+V9ekGNTkuAxEDzQU+E8f9AiQAIIWay2YKhMDyEphKy0Vg8Ai0HhBMwVELSVp6NgCMSaABeJTKYCw0HCggkdhSc3htlCORKrE0BHm-liABVgXYwKYBHM0UIeNoAEbNIngnTWMr2ZEGQQiIUqEymZqdZwcEELGRNQmuMIELCC7L4ny+WL-ETxRJiVXEqGLOy1dlwS40MC3DD3K7fCDYShQZgkK5en2ezBCBjqu0AfU2rH9zq9fr+0Z6qmA1zOcGAAjgAEZiAK4AAma5Omium7vCDB72vWMfIMBugMQHKzXRowKiyGLCi3TOCUpTS+I5o4owzSqR0NssQd2PBvVv5+iuYetxqChv4t5hR9m152QBNQJOqRAFrrETiIADMXXTiEQuYLxBvxEQABZiABWYgANi6XRLN07mXKsXj+N551XONIF+KA2Wcal0hELkeQFLAzHCW1NWERQtx7cUfCsFIPB4MRSiEFpgnKMBtHHZhHHZfxAg4YwMJqMAAANoDELASg42IIVoMJaE4DRJ2dacXRA+cwN9PcVy+BsvQ3KBaNUeidwQmNl3IQ8YPZAAebjeNoAA+ZNUwva9bwzLMH2IJ8X3zd8v1-LpiynYCPRkn1IAg-coP3WDIHglx7BQjgoWYAwRVwTFSkI6QxB44wKMyEEwnkTQ5jtcJosJYldUSHghISoR-AAMRBbJoWERkUjCDiACssGyDjxNLLy533WSD3kwNFLXSAVMgfrMD0lrsh6cMmk0lwUjUjToxSaAegEPMzjWgAqAUzguTy3WknrfP8utBug4KWVMVFUWYABHdoZixPQDFoLAOqAw7vOOhgl0g879xGsb4zDLDtzbLaaLo+Q5tsQCbi61BQJOsbPioJSjwYKrXFyIJzSSIi8o4RAJimWZ5kWZZVnWaMtnxy1phOMRen6QZhjGUnpjmdUqbWIQNnZOnVASAnjloU59oku4UC6EBriAA) which allows you to create and compute with Vespa tensors in your browser. +- The [Tensor Java API](https://javadoc.io/doc/com.yahoo.vespa/vespajlib/latest/com/yahoo/tensor/Tensor.html). +- [Tensor ranking performance](/en/performance/feature-tuning#tensor-ranking). \ No newline at end of file diff --git a/mintlify-docs/en/ranking/tensorflow.mdx b/mintlify-docs/en/ranking/tensorflow.mdx new file mode 100644 index 0000000000..157a9f2737 --- /dev/null +++ b/mintlify-docs/en/ranking/tensorflow.mdx @@ -0,0 +1,67 @@ +--- +# Copyright Vespa.ai. All rights reserved. +title: "Ranking with TensorFlow Models" +sidebarTitle: "Using TensorFlow models" +--- + +Vespa can import TensorFlow models converted to the ONNX format. The tutorial [TensorFlow: Deploy model to Vespa through ONNX](https://vespa-engine.github.io/learntorank-DEPRECATED/notebooks/tensorflow-via-onnx.html) shows an end-to-end example from training a Learning-to-Rank (LTR) model to deploying it to Vespa. The tutorial can be reproduced by running the [Jupyter Notebook](https://github.com/vespa-engine/learntorank-DEPRECATED/blob/main/notebooks/tensorflow-via-onnx.ipynb). + +Key steps covered in the tutorial above: + +- Define and train a `tf_model`. +- Save the model to disk: `tf_model.save("tf_model_file")` +- Convert the model to ONNX with the `tf2onnx` library: + +```sh +$ python3 -m tf2onnx.convert --saved-model tf_model_file --output tf_model.onnx +``` + +- Inspect expected input/output format with the `onnx` library: + +```python +import onnx +m = onnx.load("simpler_keras_model.onnx") +m.graph.input # check input format +m.graph.output # check output format +``` + +- Include the model on Vespa .sd file + +```js expandable +schema msmarco { + document msmarco { + field id type string { + indexing: summary | attribute + } + field text type string { + indexing: summary | index + } + } + onnx-model ltr_tensorflow { + file: files/tf_model.onnx + input input: vespa_input + output dense: dense + } + rank-profile tensorflow { + function vespa_input() { + expression { + tensor<float>(x[1],y[3]):[ + [fieldMatch(text).queryCompleteness, + fieldMatch(text).significance, + nativeRank(text)] + ] + } + } + first-phase { + expression: sum(onnx(ltr_tensorflow).dense) + } + summary-features { + onnx(ltr_tensorflow) + fieldMatch(text).queryCompleteness + fieldMatch(text).significance + nativeRank(text) + } + } + } +``` + diff --git a/mintlify-docs/en/ranking/wand.mdx b/mintlify-docs/en/ranking/wand.mdx new file mode 100644 index 0000000000..b3e6bcfe8e --- /dev/null +++ b/mintlify-docs/en/ranking/wand.mdx @@ -0,0 +1,177 @@ +--- +title: "WAND: Accelerated OR search" +description: "This document describes how to use the Weak And algorithm for accelerated OR like search." +--- + +The WAND algorithm is described in detail in [Efficient Query Evaluation using a Two-Level Retrieval Process (PDF)](https://www.researchgate.net/profile/David-Carmel-3/publication/221613425_Efficient_query_evaluation_using_a_two-level_retrieval_process/links/02bfe50e6854500153000000/Efficient-query-evaluation-using-a-two-level-retrieval-process.pdf) by Andrei Z. Broder et al.: + +> We have determined that our algorithm significantly reduces the total number of full evaluations by more than 90%, almost without any loss in precision or recall. At the heart of our approach there is an efficient implementation of a new Boolean construct called WAND or Weak AND that might be of independent interest + +Vespa has two different implementations of the WAND dynamic pruning algorithm. Both implementations attempt to retrieve the best top-k scoring documents without exhaustive scoring all documents which matches any of the terms in the query. + +Consider a query example ***is cdg airport in main paris*?** from the [MS Marco Passage Ranking](https://github.com/microsoft/MSMARCO-Passage-Ranking) query set. If we run this query over the 8.8M passage documents using OR we retrieve and rank *7,926,256* documents out of *8,841,823* documents. That is in other words [ranking](/en/ranking/ranking-intro) 89% of the total corpus size. It's close to brute force evaluating all documents (100%). + +If we instead change to the boolean retrieval logic to AND, we only retrieve 2 documents and fail to retrieve the relevant document(s). + +The WAND algorithm tries to address this problem by starting the search for candidate documents using OR, limiting the number of documents that are ranked, saving both latency and resource usage (cost) while still returning the same or almost the same top-k results as the brute force OR. For the example, using WAND with *K* or *totalTargetHits* to 1000, only 196,900 documents are fully ranked. That is a huge improvement over the exhaustive OR search which retrieves and ranks *7,926,256* documents and at the same time retrieving the same results as the exhaustive OR search. + +So what is the catch? Why not use WAND algorithm all the time? The catch is that the inner wand scoring function can only be a linear sum of the ranking contribution from each of the query terms, and one cannot override the score calculation (with a [ranking expression](/en/ranking/ranking-expressions-features)). The inner scoring function of both WAND implementations in Vespa cannot be overridden like the ranking expression controlled by the `first-phase` ranking expression in a rank profile. Users can only control the weight of the terms or generally features in the query and in the document. The weights can be adjusted and both WAND implementations in Vespa attempt to calculate the top-k documents with the highest maximum inner dot product. WAND could be looked at as performing the maximum inner product search in a sparse vector space, without brute force calculating it all over all candidates exhaustively. + +Vespa has two query operators which implements the WAND algorithm; `weakAnd` and `wand`. These two query operators have different characteristics: + +- `weakAnd` is designed for single valued indexed string fields (or fieldset combining multiple indexed string fields). `weakAnd` integrates with linguistic processing (tokenization and stemming). It uses the per-term inverted document frequency and query term weight in the inner scoring but does not use document term frequency in the scoring. Note that when searching a fieldset, the document frequency is aggregated across all member fields, which can cause common terms like *"the"* to be pruned from the query — see [this FAQ entry](/en/learn/faq#why-can-common-words-like-the-hurt-recall-and-collapse-significance-across-a-fieldset). +- The `wand` query operator which does not integrate with linguistic processing like tokenization, stemming and normalization and the user (you) must specify the query features and their weight and the document features and their weight. The features do not need to be string, and it is recommended to map from string to numeric types. For example the pre-trained language model [BERT](https://en.wikipedia.org/wiki/BERT_\(language_model\)) uses a fixed vocabulary of about 30K tokens and text snippets are tokenized into a set of token ids. We can then represent the document as a bag of BERT token ids (e.g. using weightedset`<int>` data type) where each token id has a weight which is computed during document processing, e.g. using [DeepCT or HDCT](https://github.com/AdeDZY/DeepCT) weighting. Similar approaches exist for other high dimensional sparse vector spaces which do not relate to text matching but where one wants to efficiently perform a maximum inner dot product search. + +If you are in doubt whether you can use WAND algorithm to accelerate retrieval, you can evaluate using a query set and perform the query exhaustive using brute force OR and compare the top-k results returned when using the approximative WAND. If top-k as measured by Recall@K is high, you could save compute resources (and get lower latency) by using WAND. + +In the following sections we discuss these two WAND implementations in detail. + +## weakAnd + +The [weakAnd query operator](/en/reference/querying/yql#weakand) accepts terms searching over multiple fields and also logical conjunctions using OR/AND. It's designed to retrieve over indexed string fields and fieldsets (single-valued or multivalued) and integrates fully with linguistic processing like tokenization and stemming. + +When using weakAnd via [YQL](/en/querying/query-language) or a [Searcher plugin](/en/applications/searchers), specify the target for minimum number of hits the operator should produce per content node involved in the query. + +The effect of tuning `totalTargetHits` may not be intuitive. To ensure that you get the best hits possible with a weakAnd, set the target number somewhat higher than the number of hits returned to the user; setting it 10 times higher should be more than enough. + +The reason for increasing the target number is that weakAnd uses a ranking function internally (inner product) and the hits which are evaluated by the weakAnd scorer is also evaluated by the `first-phase` ranking expression. + +Anything similar to classic vector ranking should correlate well with weakAnd inner product scoring, e.g. `nativeFieldMatch` or `bm25` ranking features. + +Note that because weakAnd relies on feedback identifying which hits are used for first phase ranking to increase its threshold for what's considered a good hit, the special [unranked rank profile](/en/reference/schemas/schemas#rank-profile) (which turns off ranking completely) may cause weakAnd queries to become slower than using a real rank profile. + +The query example expressed in YQL: + +```sql +select * from passages where ( + default contains "is" OR default contains "cdg" OR + default contains "airport" OR default contains "in" OR default contains "main" OR + default contains "paris" +) +``` + +Alternatively using a combination of YQL and user query language + +```json +{ + "yql": "select * from passages where userQuery()", + "query": "is cdg airport in main paris?", + "type": "any" +} +``` + +Where type *any* means OR. + +Using the weakAnd query operator, the query is: + +```sql +select * from passages where ( + {totalTargetHits: 200} + weakAnd( + default contains "is", default contains "cdg", default contains "airport", + default contains "in", default contains "main", default contains "paris" + ) +) +``` + +We specify that the [target number of hits (top k)](/en/reference/querying/yql#targethits) should be 200 (Default 100), and this number is used per content node if the content is distributed over more than one node. + +### weakAnd inner scoring + +The weakAnd query operator uses the following ranking features when calculating the inner score dot product: + +- [term(n).significance](/en/reference/ranking/rank-features#term(n).significance) +- [term(n).weight](/en/reference/ranking/rank-features#term(n).weight) + +Note that the number of times the term occurs in the document is not used in the inner scoring. + +Both term significance and weight features could be overridden in the query using [annotations](/en/reference/querying/yql#annotations). If the term significance is not overridden with the query, the significance is calculated from the indexed corpus using a formula loosely based on [Inverse Document Frequency](https://en.wikipedia.org/wiki/Tf%E2%80%93idf). + +Documents that could not potentially compete with any of the hits already in heap (size targetHits) of top hits are skipped, while the weakAnd implementation still exposes the hits which were evaluated to the first phase ranking function, and not only the top k hits. When configured to use multiple threads per search, each thread maintains a top-k scoring heap but communicates score thresholds. + +Often times, for the performance reasons listed above, it is preferable to use weakAnd instead of OR. To enable this behavior, set the query property [weakAnd.replace](/en/reference/api/query#weakand.replace) to true. + +## wand + +The [wand query operator](/en/reference/querying/yql#wand) works over a single [weightedset field](/en/reference/schemas/schemas#weightedset) which can be both string or numeric (int/long) - the weight is always int. + +Weighted sets of string must be configured with `match:word` or `match:exact` - see [match documentation](/en/reference/schemas/schemas#match). There is no linguistic processing of strings for string features when using the wand query operator. + +Below is an example passage document type where we pre-process the text using a BERT tokenizer and map text to bert token ids and assign a weight to each unique token id. + +```js +document passage { + field text type string { + indexing: summary | index + } + field deep_ct_tokens type weightedset<int> { + indexing: summary |attribute + attribute:fast-search + } +} +``` + +We can process text and tokenize the text with a BERT tokenizer and set the weight per token id: + +```json +{ + "put": "id:msmarco:passage::8433854", + "fields": { + "text": "Charles de Gaulle airport (CDG) is the main international airport for Paris", + "deep_ct_tokens": { + "2248": 12, + "1996": 5, + "3729": 9, + "2003": 8, + "2798": 1, + "2139": 1, + "1006": 3, + "1007": 1, + "2290": 5, + "28724": 6, + "3000": 3, + "2005": 5, + "2364": 1, + "3199": 15 + } + } +} +``` + +The wand query operator allows full control over both query side weights and document side weights, and it is guaranteed that it will find the top k best hits ranked by the inner dot product between the sparse query vector and the sparse document vector. + +The Vespa [rank query operator](/en/reference/querying/yql#rank) can be used to create a query tree, where a bag of features is used in the wand for efficient retrieval with normal lexical query terms to produce matching ranking features like bm25 for the configurable first phase/second-phase ranking. + +The example below uses the `rank` operator to also produce normal text matching features for those top-k documents which are retrieved by the inner product search performed by the wand operator. The userQuery() does not impact recall, but creates "normal" ranking features for first-phase or second-phase ranking. Similar at query time we can use the same type of text to feature mapping (in this case all query terms have weight 1): + +```json +{ + "yql":"select * from passages where rank( + ({totalTargetHits: 25} + wand(deep_ct_tokens, @tokens)), + userQuery())", + "tokens": "{2003: 1, 3729: 1, 2290: 1, 3199: 1, 1999: 1, 2364: 1, 3000: 1}", + "query": "is cdg airport in main paris?", + "type": "any", + "ranking.profile": "deep_bm25" +} +``` + + +With the rank profile *deep\_bm25* defined as: + +```js +rank-profile deep { + first-phase { + expression: rawScore(deep_ct_tokens) + bm25(text) + } + summary-features { + bm25(text) + rawScore(deep_ct_tokens) + } +} +``` + +The [rawScore](/en/reference/ranking/rank-features#rawScore(field)) ranking feature is the inner dot product calculated by the wand query operator. For the 25 documents (per node) with the highest inner product score there is also a bm25(text) score which we combine with the inner product score. Note that the bm25 is **only** calculated for the top-k hits returned by the wand. + +In this example, the inner product score is 41, and the bm25 of the text is 35.003. \ No newline at end of file diff --git a/mintlify-docs/en/ranking/xgboost.mdx b/mintlify-docs/en/ranking/xgboost.mdx new file mode 100644 index 0000000000..476a939199 --- /dev/null +++ b/mintlify-docs/en/ranking/xgboost.mdx @@ -0,0 +1,347 @@ +--- +title: "Ranking with XGBoost Models" +sidebarTitle: "Using XGBoost models" +--- + +Vespa supports importing Gradient Boosting Decision Tree (GBDT) models trained with [XGBoost](https://xgboost.readthedocs.io/). + +## Exporting models from XGBoost + +Vespa supports two XGBoost model formats: UBJ (recommended) and JSON (legacy). + +### UBJ format (recommended) + +`Vespa 8.656.31+` + +The recommended way to export an XGBoost model for Vespa is using [`save_model()`](https://xgboost.readthedocs.io/en/latest/python/python_api.html#xgboost.Booster.save_model) with the `.ubj` (Universal Binary JSON) extension. UBJ has been the default XGBoost model format since XGBoost 2.1.0 and preserves all model information: tree structure, `base_score`, feature names, and objective. + + +```python +import xgboost as xgb +import numpy as np + +# Train a model +dtrain = xgb.DMatrix(np.random.rand(100, 2), label=np.random.randint(2, size=100), + feature_names=["feature_1", "feature_2"]) +param = {"max_depth": 2, "objective": "binary:logistic"} +model = xgb.train(param, dtrain, num_boost_round=10) + +# Export as UBJ +model.save_model("my_model.ubj") +``` + + + +<Warning> +**Important:** + +Do **not** use `save_model("model.json")` — this produces a different JSON structure +(with a `learner` wrapper) that Vespa cannot parse. Only `dump_model()` with `dump_format="json"` is supported for the JSON path. +</Warning> + + +Since the UBJ format preserves the objective, Vespa automatically applies the correct transformation (e.g. sigmoid for logistic objectives) — no need to wrap the ranking expression manually. + +### JSON format (legacy) + +Vespa also supports importing XGBoost's JSON model dump via +[`dump_model()`](https://xgboost.readthedocs.io/en/latest/python/python_api.html#xgboost.Booster.dump_model) +with `dump_format="json"`. + + +```python +import xgboost as xgb + +dtrain = xgb.DMatrix("training-vectors.txt") +param = {"base_score": 0, "max_depth": 1, "objective": "reg:squarederror"} +bst = xgb.train(param, dtrain, 2) +bst.dump_model("trained-model.json", fmap="feature-map.txt", with_stats=False, dump_format="json") +``` + + +This produces a JSON array of tree objects: + + +```json +[ + { "nodeid": 0, "depth": 0, "split": "fieldMatch(title).completeness", "split_condition": 0.772132337, "yes": 1, "no": 2, "missing": 1, "children": [ + { "nodeid": 1, "leaf": 0.673938096 }, + { "nodeid": 2, "leaf": 0.791884363 } + ]}, + { "nodeid": 0, "depth": 0, "split": "fieldMatch(title).importance", "split_condition": 0.606320798, "yes": 1, "no": 2, "missing": 1, "children": [ + { "nodeid": 1, "leaf": 0.469432801 }, + { "nodeid": 2, "leaf": 0.55586201 } + ]} +] +``` + + +The `split` attribute represents the Vespa feature name and must resolve to a Vespa [rank feature](/en/reference/ranking/rank-features) defined in the [document schema](/en/basics/schemas), or a user-defined [function](/en/ranking/ranking-expressions-features#function-snippets). + +The training data is represented using [LibSVM text format](https://xgboost.readthedocs.io/en/latest/tutorials/input_format.html). See also a complete [XGBoost training notebook](https://github.com/vespa-engine/sample-apps/blob/master/commerce-product-ranking/notebooks/Train-xgboost.ipynb) using `ranking` objective. + + +<Warning> +**Important:** + +`dump_model()` JSON does **not** preserve `base_score`. +Set `base_score=0` during training, or accept that Vespa predictions will be offset. +For logistic objectives, you must manually wrap the expression in `sigmoid()` (see [Objective types](#xgboost-objective-types)). +</Warning> + + +## Feature mappings from XGBoost to Vespa + +Model feature names must map to Vespa [rank features](/en/reference/ranking/rank-features). The mapping method depends on the model format. + +### UBJ feature mapping + +For UBJ models, place a features file named `<model_name>-features.txt` alongside the `.ubj` file in the `models` directory. The file contains one feature name per line, matching the training column order: + + +```txt +feature_1 +feature_2 +feature_3 +``` + + +For a model file named `my_model.ubj`, the features file must be named `my_model-features.txt`. + +Then define rank profile [functions](/en/ranking/ranking-expressions-features#function-snippets) that match the feature names and map them to Vespa document attributes or query features: + + +```js +schema my_app { + document my_app { + field price type double { + indexing: summary | attribute + } + field popularity type double { + indexing: summary | attribute + } + } + rank-profile my_rank_profile inherits default { + function feature_1() { + expression: attribute(price) + } + function feature_2() { + expression: attribute(popularity) + } + function feature_3() { + expression: query(user_context) + } + first-phase { + expression: xgboost("my_model.ubj") + } + } +} +``` + + +If the model was trained with feature names that are valid Vespa rank features (e.g. `attribute(price)`), the functions are not needed — Vespa resolves them directly. + +### JSON feature mapping + +When using `dump_model()`, XGBoost names features by array index (`f0`, `f1`, ...) unless a feature map file (`fmap`) is provided. The `fmap` maps feature indices to named Vespa features: + + +```bash +$ cat feature-map.txt | egrep "fieldMatch\(title\).completeness|fieldMatch\(title\).importance" +36 fieldMatch(title).completeness q +39 fieldMatch(title).importance q +``` + + +In this example, feature at index 36 maps to [fieldMatch(title).completeness](/en/reference/ranking/rank-features#fieldMatch(name).completeness) and index 39 maps to [fieldMatch(title).importance](/en/reference/ranking/rank-features#fieldMatch(name).importance). + +Format of `feature-map.txt: <featureid> <featurename> <q or i or int>\n`: + - Feature id must be from 0 to number of features, in sorted order + - `i` means this feature is a binary indicator feature + - `q` means this feature is a quantitative value, such as age, time, can be missing + - `int` means this feature is an integer value (when int is hinted, the decision boundary will be integer) + +When using Pandas `DataFrame`s with column names, the feature names are embedded directly in the JSON dump and a feature map file is not needed. + +## Importing XGBoost models + +To import an XGBoost model, add the model file to your application package under the `models` directory. For UBJ models, also include the corresponding `-features.txt` file: + + +```bash +├── models +│ ├── my_model.ubj +│ ├── my_model-features.txt +│ └── legacy_model.json +├── schemas +│ └── main.sd +└── services.xml +``` + + +An application package can have multiple models. + +## Ranking with XGBoost models + +Vespa has a `xgboost` [ranking feature](/en/reference/ranking/rank-features). This ranking feature specifies the model to use in a ranking expression. Both UBJ and JSON models use the same ranking feature: + + +```js +schema my_app { + rank-profile prediction inherits default { + first-phase { + expression: nativeRank + } + second-phase { + expression: xgboost("my_model.ubj") + } + } +} +``` + + +Here, we specify that the model `my_model.ubj` is applied to the top ranking documents by the first-phase ranking expression. The query request must specify `prediction` as the [ranking.profile](/en/reference/api/query#ranking.profile). See also [Phased ranking](/en/ranking/phased-ranking) on how to control number of data points/documents which is exposed to the model. + +Generally the run time complexity is determined by: + +* The number of documents evaluated [per thread](../performance/sizing-search.html) / number of nodes and the query filter +* The complexity of computing features. For example `fieldMatch` features are 100x more expensive than `nativeFieldMatch/nativeRank`. +* The number of XGBoost trees and the maximum depth per tree + +Serving latency can be brought down by [using multiple threads per query request](/en/performance/practical-search-performance-guide#multithreaded-search-and-ranking). + +## Categorical features + + +<Warning> +**Important:** + +Vespa does **not** support XGBoost's native categorical splits +(`enable_categorical=True`). Deploying a model with native categorical splits will **silently produce +wrong predictions** — Vespa interprets the categorical split condition as a numerical threshold. +</Warning> + + +To use categorical features with XGBoost models in Vespa, integer-encode them before training: + + +```python +import xgboost as xgb +import pandas as pd + +# Integer-encode categorical features +category_map = {"small": 0, "medium": 1, "large": 2} +df["size"] = df["size_raw"].map(category_map).astype(float) + +# Train without enable_categorical — XGBoost uses numerical splits on the integers +dtrain = xgb.DMatrix(df[feature_cols], label=targets) +param = {"max_depth": 4, "objective": "binary:logistic"} +model = xgb.train(param, dtrain, num_boost_round=100) +model.save_model("my_model.ubj") +``` + + +In the Vespa schema, store integer-encoded categoricals as `int` attributes and map them via rank profile functions like any other numerical feature. + +Note: Vespa's [LightGBM](/en/ranking/lightgbm) importer does support native categorical splits. + +## XGBoost objective types + +Vespa can import XGBoost models trained with any [objective](https://xgboost.readthedocs.io/en/stable/parameter.html#learning-task-parameters). Common objectives include: + +* Regression `reg:squarederror` / `reg:logistic` +* Classification `binary:logistic` +* Ranking `rank:pairwise`, `rank:ndcg` and `rank:map` + +Vespa evaluates XGBoost models by summing the tree outputs. The only objective-specific behavior is for logistic objectives (`reg:logistic` and `binary:logistic`), where the raw tree sum must be passed through a sigmoid function to produce a probability. + +### UBJ models + +For UBJ models, Vespa reads the objective from the model file. For logistic objectives, the `base_score` is automatically transformed (logit) so the model output matches XGBoost's predictions without manual adjustment: + + +```js +schema my_app { + rank-profile classify inherits default { + first-phase { + expression: xgboost("my_classifier.ubj") + } + } +} +``` + + +Note that UBJ does not automatically apply a sigmoid to the final output. For logistic objectives, wrap the expression in `sigmoid()` if you need a probability: + + +```js +schema my_app { + rank-profile classify inherits default { + first-phase { + expression: sigmoid(xgboost("my_classifier.ubj")) + } + } +} +``` + + +For ranking objectives and `reg:squarederror`, the raw tree sum can be used directly. + +### JSON models + +For JSON models exported with `dump_model()`, the objective and `base_score` are **not** preserved. + +For `reg:logistic` and `binary:logistic`, the raw margin tree sum needs to be passed through the [sigmoid function](/en/reference/ranking/ranking-expressions) to represent the probability of class 1. For regression, the model can be directly imported but `base_score` should be set to 0 during training as it is not included in the dump. + +An example using the sklearn toy datasets: + + +```python +from sklearn import datasets +import xgboost as xgb +breast_cancer = datasets.load_breast_cancer() +c = xgb.XGBClassifier(n_estimators=20, objective="binary:logistic") +c.fit(breast_cancer.data, breast_cancer.target) +c.get_booster().dump_model("binary_breast_cancer.json", fmap="feature-map.txt", dump_format="json") +c.predict_proba(breast_cancer.data)[:, 1] +``` + + +To represent the `predict_proba` function of XGBoost for the binary classifier in Vespa, use the [sigmoid function](/en/reference/ranking/ranking-expressions): + + +```js +schema my_app { + rank-profile prediction-binary inherits default { + first-phase { + expression: sigmoid(xgboost("binary_breast_cancer.json")) + } + } +} +``` + + +When the `base_score` is not the default (0.5), the sigmoid alone is insufficient. The full formula accounting for `base_score` is: + + +```js +schema my_app { + rank-profile prediction-binary inherits default { + constants { + base_score: 0.5 + } + first-phase { + expression: 1.0 / (1.0 + (1.0 - base_score) / base_score * exp(-(xgboost("binary_breast_cancer.json")))) + } + } +} +``` + + +Replace `0.5` with the actual `base_score` used during training. See the [XGBoost System Test](https://github.com/vespa-engine/system-test/tree/master/tests/search/xgboost) for a complete working example. + +## Debugging Vespa inference score versus XGBoost predict score + +* For JSON models, the `base_score` and optimal number of trees (if trained with early stopping) are lost in the dump. UBJ models preserve this information. XGBoost also has different predict functions (e.g. predict/predict_proba). The following [XGBoost System Test](https://github.com/vespa-engine/system-test/tree/master/tests/search/xgboost) demonstrates how to represent different types of XGBoost models in Vespa. +* For training, features should be scraped from Vespa, using either `match-features` or `summary-features` so that features from offline training matches the online Vespa computed features. Dumping features can also help debug any differences by zooming into specific query,document pairs using [recall](/en/reference/api/query#recall) parameter. +* In a distributed setting when multiple nodes use the model, text matching features such as `nativeRank`, `nativeFieldMatch`, `bm25` and `fieldMatch` might differ, depending on which node produced the hit. The reason is that all these features use [term(n).significance](/en/reference/ranking/rank-features#query-features), which is computed from the locally indexed corpus. The `term(n).significance` feature is related to *Inverse Document Frequency (IDF)*. The `term(n).significance` should be set by a searcher in the container for global correctness as each node will estimate the significance values from the local corpus. diff --git a/mintlify-docs/en/reference/api/api.mdx b/mintlify-docs/en/reference/api/api.mdx new file mode 100644 index 0000000000..0b9c053b01 --- /dev/null +++ b/mintlify-docs/en/reference/api/api.mdx @@ -0,0 +1,60 @@ +--- +title: "Vespa API and interfaces" +description: "Vespa API and interfaces in Vespa applications." +--- + +## Deployment and configuration + +- [Deploy API](/en/reference/api/deploy-v2): +Deploy [application packages](/en/basics/applications) +to configure a Vespa application +- [Config API](/en/reference/api/config-v2): +Get and Set configuration +- [Tenant API](/en/reference/api/application-v2): +Configure multiple tenants in the config servers + +## Document API + +- [Reads and writes](/en/writing/reads-and-writes): +APIs and binaries to read and update documents +- [/document/v1/](/en/reference/api/document-v1): +REST API for operations based on document ID (get, put, remove, update) +- [Feeding API](/en/clients/vespa-feed-client): +High performance feeding API, the recommended API for feeding data +- [JSON feed format](/en/reference/schemas/document-json-format): +The Vespa Document format +- [Vespa Java Document API](/en/writing/document-api-guide) + +## Query and grouping + +- [Query API](/en/querying/query-api), +[Query API reference](/en/reference/api/query) +- [Query Language](/en/querying/query-language), +[Query Language reference](/en/reference/querying/yql), +[Simple Query Language reference](/en/reference/querying/simple-query-language), +[Predicate fields](/en/schemas/predicate-fields) +- [Vespa Query Profiles](/en/querying/query-profiles) +- [Grouping API](/en/querying/grouping), +[Grouping API reference](/en/reference/querying/grouping-language) + +## Processing + +- [Vespa Processing](/en/applications/processing): +Request-Response processing +- [Vespa Document Processing](/en/applications/document-processors): +Feed processing + +## Request processing + +- [Searcher API](/en/applications/searchers) +- [Federation API](/en/querying/federation) +- [Web service API](/en/applications/web-services) + +## Result processing + +- [Custom renderer API](/en/applications/result-renderers) + +## Status and state + +- [Health and Metric APIs](/en/operations/metrics) +- [/cluster/v2 API](/en/reference/api/cluster-v2) diff --git a/mintlify-docs/en/reference/api/application-v2.mdx b/mintlify-docs/en/reference/api/application-v2.mdx new file mode 100644 index 0000000000..45aa099d34 --- /dev/null +++ b/mintlify-docs/en/reference/api/application-v2.mdx @@ -0,0 +1,14 @@ +--- +title: "/application/v2/tenant API reference" +description: "/application/v2/tenant API reference in Vespa applications." +--- +| HTTP request | application/v2/tenant operation | Description | +| :--- | :--- | :--- | +| GET | List tenant information. | | +| | List tenants | `/application/v2/tenant/` Example response: `[` `"default"` `]` | +| | Get tenant | `/application/v2/tenant/default` Example response: `{` `"message": "Tenant 'default' exists."` `}` | +| PUT | Create a new tenant. | | +| | Create tenant | `/application/v2/tenant/default` Response: A message with the name of the tenant created - example: `{` `"message" : "Tenant default created."` `}` **Note:** This operation is asynchronous, it will eventually propagate to all config servers. | +| DELETE | Delete a tenant. | | +| | Delete tenant | `/application/v2/tenant/default` Response: A message with the deleted tenant: `{` `"message" : "Tenant default deleted."` `}` **Note:** This operation is asynchronous, it will eventually propagate to all config servers. | + diff --git a/mintlify-docs/en/reference/api/cluster-v2.mdx b/mintlify-docs/en/reference/api/cluster-v2.mdx new file mode 100644 index 0000000000..a75362084a --- /dev/null +++ b/mintlify-docs/en/reference/api/cluster-v2.mdx @@ -0,0 +1,38 @@ +--- +title: "/cluster/v2 API reference" +description: "/cluster/v2 API reference in Vespa applications." +--- +| HTTP request | cluster/v2 operation | Description | +| :--- | :--- | :--- | +| GET | List cluster and nodes. Get cluster, node or disk states. | | +| | List content clusters | `/cluster/v2/` | +| | Get cluster state and list service types within cluster | `/cluster/v2/<cluster>` | +| | List nodes per service type for cluster | `/cluster/v2/<cluster>/<service-type>` | +| | Get node state | `/cluster/v2/<cluster>/<service-type>/<node>` | +| PUT | Set node state | | +| | Set node user state | `/cluster/v2/<cluster>/<service-type>/<node>` | + +| State | Description | +| :--- | :--- | +| `Up` | The node is up and available to keep buckets and serve requests. | +| `Down` | The node is not available, and can not be used. | +| `Stopping` | This node is stopping and is expected to be down soon. This state is typically only exposed to the cluster controller to tell why the node stopped. The cluster controller will expose the node as down or in maintenance mode for the rest of the cluster. This state is thus not seen by the distribution algorithm. | +| `Maintenance` | This node is temporarily unavailable. The node is available for bucket placement, so redundancy is lower. Using this mode, new replicas of the documents stored on this node will not be created, allowing the node to be down with less of a performance impact on the rest of the cluster. This mode is typically used to mask a down state during controlled node restarts, or by an administrator that need to do some short maintenance work, like upgrading software or restart the node. | +| `Retired` | A retired node is available and serves requests. This state is used to remove nodes while keeping redundancy. Buckets are moved to other nodes (with low priority), until empty. Special considerations apply when using [grouped distribution](/en/content/elasticity#grouped-distribution) as buckets are not necessarily removed. | + +| Type | Spec | Description | +| :--- | :--- | :--- | +| cluster | *`<identifier>`* | The name given to a content cluster in a Vespa application. | +| description | *.** | Description can contain anything that is valid JSON. However, as the information is presented in various interfaces, some which may present reasons for all the states in a cluster or similar, keeping it short and to the point makes it easier to fit the information neatly into a table and get a better cluster overview. | +| group-spec | *`<identifier>`* (\. *`<identifier>`* )* | The hierarchical group assignment of a given content node. This is a dot separated list of identifiers given in the application services.xml configuration. | +| node | [0-9]+ | The index or distribution key identifying a given node within the context of a content cluster and a service type. | +| service-type | (distributor\|storage) | The type of the service to look at state for, within the context of a given content cluster. | +| state-disk | (up\|down) | One of the valid disk states. | +| state-unit | [up](#up) \| [stopping](#stopping) \| [down](#down) | The cluster controller fetches states from all nodes, called *unit states*. States reported from the nodes are either `up` or `stopping`. If the node can not be reached, a `down` state is assumed. This means, the cluster controller detects failed nodes. The subsequent *generated states* will have nodes in `down`, and the [ideal state algorithm](/en/content/idealstate) will redistribute [buckets](/en/content/buckets) of documents. | +| state-user | [up](#up) \| [down](#down) \| [maintenance](#maintenance) \| [retired](#retired) | Use tools for [user state management](/en/operations/self-managed/admin-procedures#cluster-state). Retire a node from a cluster - use `retired` to move buckets to other nodes Short-lived maintenance work - use `maintenance` to avoid merging buckets to other nodes Fail a bad node. The cluster controller or an operator can set a node `down` | +| state-generated | [up](#up) \| [down](#down) \| [maintenance](#maintenance) \| [retired](#retired) | The cluster controller generates the cluster state from the `unit` and `user` states, over time. The generated state is called the *cluster state*. | + +| Parameter | Type | Description | +| :--- | :--- | :--- | +| recursive | number | Number of levels, or `true` for all levels. Examples: Use `recursive=1` for a node request to also see all datause `recursive=2` to see all the node data within each service type In recursive mode, you will see the same output as found in the spec below. However, where there is a `{ "link" : "<url-path>" }` element, this element will be replaced by the content of that request, given a recursive value of one less than the request above. | + diff --git a/mintlify-docs/en/reference/api/config-v2.mdx b/mintlify-docs/en/reference/api/config-v2.mdx new file mode 100644 index 0000000000..420d5a6462 --- /dev/null +++ b/mintlify-docs/en/reference/api/config-v2.mdx @@ -0,0 +1,32 @@ +--- +title: "Config API" +description: "Config API in Vespa applications." +--- +| Term | Description | +| :--- | :--- | +| Parameters | **Parameter:** recursive; **Default:** false; **Description:** If true, include each config id in the model which produces the config, and list only the links to the config payload. If false, include the first level of the config ids in the listing of new list URLs, as explained above. | +| Request body | None | +| Response | A list response includes two arrays: List-links to descend one level down in the config id hierarchy, named `children`. [Config payload](#payload) links for the current (top) level, named `configs`. | +| Error Response | N/A | + +| Term | Description | +| :--- | :--- | +| Parameters | Same as above. | +| Request body | None | +| Response | List the configs in the model with the given namespace and name. List semantics as above. | +| Error Response | 404 if the given namespace.name is not known to the config model. | + +| Term | Description | +| :--- | :--- | +| Parameters | Same as above. | +| Request body | None | +| Response | List the configs in the model with the given namespace and name, and for which the given config id segment is a prefix. | +| Error Response | 404 if the given namespace.name is not known to the config model. 404 if the given config id is not in the model. | + +| Term | Description | +| :--- | :--- | +| Parameters | None | +| Request body | None | +| Response | Returns the config payload of the given `namespace.name/config/id`, formatted as JSON. | +| Error Response | Same as above. | + diff --git a/mintlify-docs/en/reference/api/deploy-v2.mdx b/mintlify-docs/en/reference/api/deploy-v2.mdx new file mode 100644 index 0000000000..d47cd6e891 --- /dev/null +++ b/mintlify-docs/en/reference/api/deploy-v2.mdx @@ -0,0 +1,93 @@ +--- +title: "Deploy API" +description: "Example:" +--- +| Term | Description | +| :--- | :--- | +| session-id | The session-id used in this API is generated by the server and is required for all operations after [creating](#create-session) a session. The session-id is valid if it is an active session, or it was created before [session lifetime](https://github.com/vespa-engine/vespa/blob/master/configdefinitions/src/vespa/configserver.def) has expired, the default value being 1 hour. | +| path | An application file path in a request URL or parameter refers to a relative path in the application package. A path ending with "/" refers to a directory. | + +| Term | Description | +| :--- | :--- | +| Parameters | | +| Request body | **Required:** Yes; **Content:** A compressed [application package](/en/reference/applications/application-packages) (with gzip or zip compression); **Note:** Set `Content-Type` HTTP header to `application/x-gzip` or `application/zip` . | +| Response | See [active](#activate-session). | + +| Term | Description | +| :--- | :--- | +| Parameters | **Name:** from; **Default:** N/A; **Description:** Use when you want to create a new session based on an active application. The value supplied should be a URL to an active application. | +| Request body | **Required:** Yes, unless `from` parameter is used; **Content:** A compressed [application package](/en/reference/applications/application-packages) (with gzip or zip compression); **Note:** It is required to set the `Content-Type` HTTP header to `application/x-gzip` or `application/zip` , unless the `from` parameter is used. | +| Response | The response contains: A [session-id](#session-id) to the application that was created. A [prepared](#prepare-session) URL for preparing the application. | + +| Term | Description | +| :--- | :--- | +| Parameters | None | +| Request body | If path is a directory, none. If path is a file, the contents of the file. | +| Response | None Any errors or warnings from writing the file/creating the directory. | + +| Term | Description | +| :--- | :--- | +| Parameters | **Name:** recursive; **Default:** false; **Description:** If *true* , directory content will be listed recursively.<br />**Name:** return; **Default:** content; **Description:** If set to content and path refers to a file, the content will be returned. If set to content and path refers to a directory, the files and subdirectories in the directory will be listed. If set to status and path refers to a file, the file status and hash will be returned. If set to status and path refers to a directory, a list of file/subdirectory statuses and hashes will be returned. | +| Request body | None. | +| Response | If path is a directory: a JSON array of URLs to the files and subdirectories of that directory. If path is a file: the contents of the file. If status parameter is set, the status and hash will be returned. | + +| Term | Description | +| :--- | :--- | +| Parameters | None | +| Request body | None | +| Response | Any errors or warnings from deleting the resource. | + +| Term | Description | +| :--- | :--- | +| Parameters | **Parameter:** applicationName; **Default:** N/A; **Description:** Name of the application to be deployed<br />**Parameter:** environment; **Default:** default; **Description:** Environment where application should be deployed<br />**Parameter:** region; **Default:** default; **Description:** Region where application should be deployed<br />**Parameter:** instance; **Default:** default; **Description:** Name of application instance<br />**Parameter:** debug; **Default:** false; **Description:** If true, include stack trace in response if prepare fails.<br />**Parameter:** timeout; **Default:** 360 seconds; **Description:** Timeout in seconds to wait for session to be prepared. | +| Request body | None | +| Response | Returns a [session-id](#session-id) and a link to activate the session. Log with any errors or warnings from preparing the application. An [activate](#activate-session) URL for activating the application with this [session-id](#session-id), if there were no errors. A list of actions (possibly empty) that must be performed in order to apply some config changes between the current active application and this next prepared application. These actions are organized into three categories; *restart*, *reindex*, and *refeed*: *Restart* actions are done after the application has been activated and are handled by restarting all listed services. See [schemas](/en/reference/schemas/schemas#modifying-schemas) for details. *Reindex* actions are special refeed actions that Vespa [handles automatically](/en/operations/reindexing), if the [reindex](#reindex) endpoint below is used. *Refeed* actions require several steps to handle. See [schemas](/en/reference/schemas/schemas#modifying-schemas) for details. | + +| Term | Description | +| :--- | :--- | +| Parameters | **Parameter:** timeout; **Default:** 60 seconds; **Description:** Timeout in seconds to wait for session to be activated (when several config servers are used, they might need to sync before activate can be done). | +| Request body | None | +| Response | Returns a [session-id](#session-id), a message and a URL to the activated application. [session-id](#session-id) Message | + +| Term | Description | +| :--- | :--- | +| Parameters | None | +| Request body | None | +| Response | Returns a list of applications Array of active applications | + +| Term | Description | +| :--- | :--- | +| Parameters | None | +| Request body | None | +| Response | Returns information about the application specified. config generation | + +| Term | Description | +| :--- | :--- | +| Parameters | N/A | +| Request body | N/A | +| Response | JSON detailing current reindexing status for the application, with all its clusters and document types. Status for each content cluster in the application, by name: Status of each document type in the cluster, by name: Last time reindexing was triggered for this document type.Current status of reindexing.Optional start time of reindexing.Optional end time of reindexing.Optional progress of reindexing, from 0 to 1.Pseudo-speed of reindexing. | + +| Term | Description | +| :--- | :--- | +| Parameters | **Name:** clusterId; **Description:** A comma-separated list of content clusters to limit reindexing to. All clusters are reindexed if this is not present.<br />**Name:** documentType; **Description:** A comma-separated list of document types to limit reindexing to. All document types are reindexed if this is not present.<br />**Name:** indexedOnly; **Description:** Boolean: whether to mark reindexing ready only for document types with indexing mode *index* and at least one field with the indexing statement `index` . Default is `false` .<br />**Name:** speed; **Description:** Number (0–10], default 1: Indexing pseudo speed - balance speed vs. resource use. Example: speed=0.1 | +| Request body | N/A | +| Response | A human-readable message indicating what reindexing was marked as ready. | + +| Term | Description | +| :--- | :--- | +| Parameters | **Name:** clusterId; **Description:** A comma-separated list of content clusters to limit the changes to. Reindexing for all clusters are modified if this is not present.<br />**Name:** documentType; **Description:** A comma-separated list of document types to limit the changes to. Reindexing for all document types are modified if this is not present.<br />**Name:** indexedOnly; **Description:** Boolean: whether to modify reindexing only for document types with indexing mode *index* and at least one field with the indexing statement `index` . Default is `false` .<br />**Name:** speed; **Description:** Number [0–10], required: Indexing pseudo speed - balance speed vs. resource use. Example: speed=0.1 | +| Request body | N/A | +| Response | A human-readable message indicating what reindexing was modified. | + +| Term | Description | +| :--- | :--- | +| Parameters | None | +| Request body | None | +| Response | Returns a message stating if the operation was successful or not | + +| Term | Description | +| :--- | :--- | +| Parameters | None | +| Request body | None | +| Response | Returns a message with tenant and application details. | + diff --git a/mintlify-docs/en/reference/api/document-v1.mdx b/mintlify-docs/en/reference/api/document-v1.mdx new file mode 100644 index 0000000000..7ab2556f11 --- /dev/null +++ b/mintlify-docs/en/reference/api/document-v1.mdx @@ -0,0 +1,18 @@ +--- +title: "/document/v1 API reference" +description: "/document/v1 API reference in Vespa applications." +--- +| HTTP request | document/v1 operation | Description | +| :--- | :--- | :--- | +| GET | *Get* a document by ID or *Visit* a set of documents by selection. | | +| | Get | Get a document: `/document/v1/<namespace>/<document-type>/docid/<document-id>` `/document/v1/<namespace>/<document-type>/number/<numeric-group-id>/<document-id>` `/document/v1/<namespace>/<document-type>/group/<text-group-id>/<document-id>` Optional parameters: [cluster](#cluster) [fieldSet](#fieldset) [timeout](#timeout) [tracelevel](#tracelevel) | +| | Visit | Iterate over and get all documents, or a [selection](#selection) of documents, in chunks, using [continuation](#continuation) tokens to track progress. Visits are a linear scan over the documents in the cluster. `/document/v1/` It is possible to specify namespace and document type with the visit path: `/document/v1/<namespace>/<document-type>/docid` Documents can be grouped to limit accesses to a subset. A group is defined by a numeric ID or string — see [id scheme](/en/schemas/documents#id-scheme) . `/document/v1/<namespace>/<document-type>/group/<group>` `/document/v1/<namespace>/<document-type>/number/<number>` Mandatory parameters: [cluster](#cluster) - Visits can only retrieve data from *one* content cluster, so `cluster` **must** be specified for requests at the root `/document/v1/` level, or when there is ambiguity. This is required even if the application has only one content cluster. Optional parameters: [bucketSpace](#bucketspace) - Parent documents are [global](/en/reference/applications/services/content#document) and in the `global` [bucket space](#bucketspace). By default, visit will visit non-global documents in the `default` bucket space, unless document type is indicated, and is a global document type. [concurrency](#concurrency) - Use to configure backend parallelism for each visit HTTP request. [continuation](#continuation) [fieldSet](#fieldset) [selection](#selection) [sliceId](#sliceid) [slices](#slices) - Split visiting of the document corpus across more than one HTTP request—thus allowing the concurrent use of more HTTP containers—use the `slices` and `sliceId` parameters. [stream](#stream) - It's recommended enabling streamed HTTP responses, with the [stream](#stream) parameter, as this reduces memory consumption and reduces HTTP overhead. [timeout](#timeout) [tracelevel](#tracelevel) [wantedDocumentCount](#wanteddocumentcount) [fromTimestamp](#fromtimestamp) [toTimestamp](#totimestamp) [includeRemoves](#includeRemoves) Optional request headers: [Accept](#accept) - specify the desired response format. | +| POST | *Put* a given document, by ID, or *Copy* a set of documents by selection from one content cluster to another. | | +| | Copy | Write documents visited in source [cluster](#cluster) to the [destinationCluster](#destinationcluster) in the same application. A [selection](#selection) is mandatory — typically the document type. Supported paths (see [visit](#visit) above for semantics): `/document/v1/` `/document/v1/<namespace>/<document-type>/docid/` `/document/v1/<namespace>/<document-type>/group/<group>` `/document/v1/<namespace>/<document-type>/number/<number>` Mandatory parameters: [cluster](#cluster) [destinationCluster](#destinationcluster) [selection](#selection) Optional parameters: [bucketSpace](#bucketspace) [continuation](#continuation) [timeChunk](#timechunk) [timeout](#timeout) [tracelevel](#tracelevel) | +| PUT | *Update* a document with the given partial update, by ID, or *Update where* the given selection is true. | | +| | Update | Update a document with the partial update contained in the request body in the [document update JSON format](/en/reference/schemas/document-json-format#update) . `/document/v1/<namespace>/<document-type>/docid/<document-id>` Optional parameters: [condition](#condition) - use for conditional writes [create](#create) - use to create empty documents when updating non-existent ones. [route](#route) [timeout](#timeout) [tracelevel](#tracelevel) | +| | Update where | Update visited documents in [cluster](#cluster) with the partial update contained in the request body in the [document update JSON format](/en/reference/schemas/document-json-format#update). Supported paths (see [visit](#visit) above for semantics): `/document/v1/<namespace>/<document-type>/docid/` `/document/v1/<namespace>/<document-type>/group/<group>` `/document/v1/<namespace>/<document-type>/number/<number>` Mandatory parameters: [cluster](#cluster) [selection](#selection) Optional parameters: [bucketSpace](#bucketspace) - See [visit](#visit), `default` or `global` bucket space [continuation](#continuation) [stream](#stream) [timeChunk](#timechunk) [timeout](#timeout) [tracelevel](#tracelevel) | +| DELETE | *Remove* a document, by ID, or *Remove where* the given selection is true. | | +| | Remove | Remove a document. `/document/v1/<namespace>/<document-type>/docid/<document-id>` Optional parameters: [condition](#condition) [route](#route) [timeout](#timeout) [tracelevel](#tracelevel) | +| | Delete where | Delete visited documents from [cluster](#cluster). Supported paths (see [visit](#visit) above for semantics): `/document/v1/` `/document/v1/<namespace>/<document-type>/docid/` `/document/v1/<namespace>/<document-type>/group/<group>` `/document/v1/<namespace>/<document-type>/number/<number>` Mandatory parameters: [cluster](#cluster) [selection](#selection) Optional parameters: [bucketSpace](#bucketspace) - See [visit](#visit), `default` or `global` bucket space [continuation](#continuation) [stream](#stream) [timeChunk](#timechunk) [timeout](#timeout) [tracelevel](#tracelevel) | + diff --git a/mintlify-docs/en/reference/api/metrics-v1.mdx b/mintlify-docs/en/reference/api/metrics-v1.mdx new file mode 100644 index 0000000000..d2f22e399f --- /dev/null +++ b/mintlify-docs/en/reference/api/metrics-v1.mdx @@ -0,0 +1,25 @@ +--- +title: "/metrics/v1 API reference" +description: "/metrics/v1 API reference in Vespa applications." +--- +| HTTP request | metrics/v1 operation | Description | +| :--- | :--- | :--- | +| GET | | | +| | Node metrics | `/metrics/v1/values` See [monitoring](/en/operations/self-managed/monitoring#metrics-v1-values) for examples. | + +| Parameter | Type | Description | +| :--- | :--- | :--- | +| consumer | String | Specify response [consumer](/en/reference/applications/services/admin#consumer), i.e. set of metrics. An unknown / empty value will return the `default` metric set. Built-in: `default` - see [DefaultMetrics](/en/reference/operations/metrics/default-metric-set). `vespa` - see [VespaMetricSet](/en/reference/operations/metrics/vespa-metric-set). | + +| Element | Parent | Type | Description | +| :--- | :--- | :--- | :--- | +| services | | Object | Root for /metrics/v1/values. Contains service objects. | +| name | services | String | Service name. | +| timestamp | services | Number | EPOCH in seconds - time of metrics fetch from service. | +| status | services | Object | Status from metrics fetch. | +| code | status | String | The status for each service is one of: `up` `down` `unknown` `unknown` is used if the service seems to be alive, but does not report metrics. | +| description | status | String | Textual status. | +| metrics | services | Array | Array of metric objects. | +| values | metrics | Object | Set of metric-name/value pairs. | +| dimensions | metrics | Object | Set of metric dimension-name/value pairs. | + diff --git a/mintlify-docs/en/reference/api/metrics-v2.mdx b/mintlify-docs/en/reference/api/metrics-v2.mdx new file mode 100644 index 0000000000..16c9ab72bf --- /dev/null +++ b/mintlify-docs/en/reference/api/metrics-v2.mdx @@ -0,0 +1,13 @@ +--- +title: "/metrics/v2 API reference" +description: "/metrics/v2 API reference in Vespa applications." +--- +| HTTP request | metrics/v2 operation | Description | +| :--- | :--- | :--- | +| GET | | | +| | Application metrics | `/metrics/v2/values` See [monitoring](/en/operations/self-managed/monitoring#metrics-v2-values) for examples. | + +| Parameter | Type | Description | +| :--- | :--- | :--- | +| consumer | String | Specify response [consumer](/en/reference/applications/services/admin#consumer), i.e. set of metrics. See [metrics/v1](/en/reference/api/metrics-v1#consumer) for details. | + diff --git a/mintlify-docs/en/reference/api/prometheus-v1.mdx b/mintlify-docs/en/reference/api/prometheus-v1.mdx new file mode 100644 index 0000000000..cdc3088a45 --- /dev/null +++ b/mintlify-docs/en/reference/api/prometheus-v1.mdx @@ -0,0 +1,13 @@ +--- +title: "/prometheus/v1 API reference" +description: "/prometheus/v1 API reference in Vespa applications." +--- +| HTTP request | prometheus/v1 operation | Description | +| :--- | :--- | :--- | +| GET | | | +| | Node metrics | `/prometheus/v1/values` See [monitoring](/en/operations/self-managed/monitoring#prometheus-v1-values) for examples. | + +| Parameter | Type | Description | +| :--- | :--- | :--- | +| consumer | String | Specify response [consumer](/en/reference/applications/services/admin#consumer), i.e. set of metrics. An unknown / empty value will return the `default` metric set. Built-in (note: case-sensitive): `default` `Vespa` | + diff --git a/mintlify-docs/en/reference/api/query.mdx b/mintlify-docs/en/reference/api/query.mdx new file mode 100644 index 0000000000..cf380c9224 --- /dev/null +++ b/mintlify-docs/en/reference/api/query.mdx @@ -0,0 +1,387 @@ +--- +title: "Query API Reference" +description: "Query API Reference in Vespa applications." +--- + +Refer to the [Query API guide](/en/querying/query-api) for API examples. + +All the request parameters listed below can be set in query profiles. +The first four blocks of properties are also modeled as +[query profile types](/en/querying/query-profiles#query-profile-types). +These types can be referred from query profiles (and inheriting types) +to provide type checking on the parameters. + +Parameters have *full name* and can have one or more shorter aliases, shown in square brackets below. +Both names can be used in requests and query profiles. +Aliases are case-insensitive in requests. + +When accessing parameters from Searcher components, the full names correspond to the path from +the root Query object to that parameter. + +## Parameters + +#### Query + +- [yql](#yql) + +#### Native Execution Parameters + +- [hits](#hits) [*count*] +- [offset](#offset)[*start*] +- [queryProfile](#queryprofile) +- [groupingSessionCache](#groupingsessioncache) +- [searchChain](#searchchain) +- [timeout](#timeout) + +#### Query Model + +- [model.defaultIndex](#model.defaultindex) [*default-index*] +- [model.encoding](#model.encoding) [*encoding*] +- [model.filter](#model.filter) [*filter*] +- [model.locale](#model.locale) [*locale*] +- [model.language](#model.language) [*lang, language*] +- [model.queryString](#model.querystring) [*query*] +- [model.restrict](#model.restrict) [*restrict*] +- [model.searchPath](#model.searchpath) [*path*] +- [model.sources](#model.sources) [*search, sources*] +- [model.type](#model.type) [*type*] +- [model.type.composite](#model.type.composite) +- [model.type.tokenization](#model.type.tokenization) +- [model.type.syntax](#model.type.syntax) +- [model.type.profile](#model.type.profile) +- [model.type.isYqlDefault](#model.type.isYqlDefault) + +#### Ranking + +- [ranking.elementGap.*fieldName*](#ranking.elementGap) +- [ranking.features](#ranking.features) [*input*, *rankfeature*] +- [ranking.freshness](#ranking.freshness) +- [ranking.keepRankCount](#ranking.keeprankcount) +- [ranking.totalKeepRankCount](#ranking.totalkeeprankcount) +- [ranking.listFeatures](#ranking.listfeatures) [*rankfeatures*] +- [ranking.matchPhase](#ranking.matchPhase) +- [ranking.matching](#ranking.matching) +- [ranking.profile](#ranking.profile) [*ranking*] +- [ranking.properties](#ranking.properties) [*rankproperty*] +- [ranking.queryCache](#ranking.querycache) +- [ranking.rankScoreDropLimit](#ranking.rankscoredroplimit) +- [ranking.matchphase.attribute](#ranking.matchphase.attribute) +- [ranking.matchPhase.totalMaxHits](#ranking.matchphase.totalmaxhits) +- [ranking.matchPhase.maxHits](#ranking.matchphase.maxhits) +- [ranking.matchPhase.ascending](#ranking.matchphase.ascending) +- [matchPhase.diversity.attribute](#ranking.matchphase.diversity.attribute) +- [matchPhase.diversity.minGroups](#ranking.matchphase.diversity.mingroups) +- [ranking.secondPhase.totalRerankCount](#ranking.secondphase.totalrerankcount) +- [ranking.secondPhase.rerankCount](#ranking.secondphase.rerankcount) +- [ranking.secondPhase.rankScoreDropLimit](#ranking.secondphase.rankscoredroplimit) +- [ranking.globalPhase.rankScoreDropLimit](#ranking.globalphase.rankscoredroplimit) +- [ranking.globalPhase.rerankCount](#ranking.globalphase.rerankcount) +- [ranking.significance.useModel](#ranking.significance.useModel) +- [ranking.softtimeout.enable](#ranking.softtimeout.enable) +- [ranking.sorting](#ranking.sorting) [*sorting*] + +#### Presentation + +- [presentation.bolding](#presentation.bolding) [*bolding*] +- [presentation.format](#presentation.format) [*format*] +- [presentation.template](#presentation.template) +- [presentation.summary](#presentation.summary) [*summary*] +- [presentation.timing](#presentation.timing) + +#### Grouping + +- [select](#select) +- [collapse.summary](#collapse.summary) +- [collapsefield](#collapsefield) +- [collapsesize](#collapsesize) +- [collapsesize](#collapsesize.fieldname) [*fieldname*] +- [grouping.defaultMaxGroups](#grouping.defaultmaxgroups) +- [grouping.defaultMaxHits](#grouping.defaultmaxhits) +- [grouping.globalMaxGroups](#grouping.globalmaxgroups) +- [grouping.defaultPrecisionFactor](#grouping.defaultprecisionfactor) +- [timezone](#timezone) + +#### Streaming + +- [streaming.groupname](#streaming.groupname) +- [streaming.selection](#streaming.selection) +- [streaming.maxbucketspervisitor](#streaming.maxbucketspervisitor) + +#### Tracing + +- [trace.profile](#trace.profile) +- [trace.level](#trace.level) +- [trace.explainLevel](#trace.explainlevel) +- [trace.profileDepth](#trace.profiledepth) +- [trace.profiling.matching.depth](#trace.profiling.matching.depth) +- [trace.profiling.firstPhaseRanking.depth](#trace.profiling.firstPhaseRanking.depth) +- [trace.profiling.secondPhaseRanking.depth](#trace.profiling.secondPhaseRanking.depth) +- [trace.timestamps](#trace.timestamps) +- [trace.query](#trace.query) + +#### Semantic Rules + +- [rules.off](#rules.off) +- [rules.rulebase](#rules.rulebase) +- [tracelevel.rules](#tracelevel.rules) + +#### Dispatch + +- [dispatch.topKProbability](#dispatch.topkprobability) + +#### Other + +- [recall](#recall) +- [user](#user) +- [hitcountestimate](#hitcountestimate) +- [metrics.ignore](#metrics.ignore) +- [weakAnd.replace](#weakand.replace) +- [wand.hits](#wand.hits) +- [sorting.degrading](#sorting.degrading) +- [noCache](#nocache) + + +## Query + +| Parameter | Alias | Type | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| yql | | String | | See the [YQL query guide](/en/querying/query-language) for examples, and the [reference](/en/reference/querying/yql) for details. | + +## Native Execution Parameters + +These parameters are defined in the `native` query profile type. + +| Parameter | Alias | Type | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| hits | count | Number | 10 | A positive integer, including 0. The maximum number of hits to return from the result set. `hits` is capped at `maxHits`, default 400. `maxHits` can be set in a [query profile](/en/querying/query-profiles). Number of hits can also be set in [YQL](/en/reference/querying/yql#limit-offset). | +| offset | start | Number | 0 | To implement pagination: The number of hits to skip when returning the result. A positive integer, including 0. `offset` is capped at `maxOffset`, default 1000. `maxOffset` can be set in a [query profile](/en/querying/query-profiles). Offset can also be set in [YQL](/en/reference/querying/yql#limit-offset). | +| queryProfile | | String | `default` | A query profile id with format `name:version`, where version can be omitted or partially specified, e.g. `myprofile:2.1`. A [query profile](/en/querying/query-profiles) has default properties for a query. The default query profile is named *default*. | +| groupingSessionCache | | Boolean | true | Set to true to enable grouping session cache. See the [grouping reference](/en/reference/querying/grouping-language#grouping-session-cache) for details. | +| searchChain | | String | `default` | A search chain id with format `name:version`, where version can be omitted or partially specified, e.g. `mychain:2.1.3`. The [search chain](/en/applications/chaining) initially invoked when processing the query. This search chain may invoke other chains. | +| timeout | | String | 0.5s | Positive floating point number with an optional unit. Default unit is seconds (s), valid unit strings are e.g. *ms* and *s*. To set a timeout of one minute, the argument could be set to *60 s*. Space between the number and the unit is optional. It specifies the overall timeout of the query execution and can be defined in a [query profile](/en/querying/query-profiles). Different classes of queries can then easily have a different latency budget/timeout using different profiles. At timeout, the hits generated thus far are returned, refer to [ranking.softtimeout.enable](#ranking.softtimeout.enable) for details on HTTP status codes and response elements. Refer to the [Query API guide](/en/querying/query-api#timeout) for more details on timeout handling. | + +## Query Model Parameters + +| Parameter | Alias | Type | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| model.defaultIndex | default-index | String | `default` | An index name. The field which is searched for query terms which doesn't explicitly specify an index. Also see the [defaultIndex](/en/reference/querying/yql#defaultindex) query annotation. | +| model.encoding | encoding | String | `utf-8` | Encoding names or aliases defined in the [IANA character sets](https://www.iana.org/assignments/character-sets/character-sets.xhtml). Sets the encoding to use when returning a result. The query is always encoded as UTF-8, independently of how the result will be encoded. The encodings `big5`, `euc-jp`, `euc-kr`, `gb2312`, `iso-2022-jp` and `shift-jis` also influences how [tokenization](/en/linguistics/linguistics-opennlp#tokenization) is done in the absence of an explicit language setting. | +| model.filter | filter | String | | A filter string in the [Simple Query Language](/en/reference/querying/simple-query-language). Sets a filter to be combined with the [model.queryString](#model.querystring). Typical use of a filter is to add machine generated or preferences based filter terms to the user query. Terms which are passed in the filter are not [bolded](#presentation.bolding). The filter is parsed the same way as a query of type `any`, the full syntax is available. The positive terms (preceded by +) and phrases act as AND filters, the negative terms (preceded by -) act as NOT filters, while the unprefixed terms will be used to RANK the results. Unless the query has no positive terms, the filter will only restrict and influence ranking of the result set, never cause more matches than the query. The [model.queryString](#model.querystring) must be present for this to have any effect. To add filters to the YQL string, use query profiles. See [example](/en/querying/query-profiles#example). | +| model.locale | locale | String | | A language tag from [RFC 5646](https://www.rfc-editor.org/rfc/rfc5646). Sets the locale and language to use when parsing queries from a language tag, such as `en-US`. This attribute should always be set when it is known. If this parameter is not set, it will be guessed from the query and encoding, and default to english if it cannot be guessed. | +| model.language | lang, language | String | | A language tag from [RFC 5646](https://www.rfc-editor.org/rfc/rfc5646), but allowing underscore instead of dash as separator character. A legacy alternative to locale. When this value is accessed, underscores will be replaced by dashes in the returned value. Also see the [language](/en/reference/querying/yql#language) query term annotation. | +| model.queryString | query | String | | A query string in the [Simple Query Language](/en/reference/querying/simple-query-language). It is combined with [model.filter](#model.filter). See the [userQuery](/en/reference/querying/yql#userquery) operator for how to combine with YQL. Can also be used without YQL. | +| model.restrict | restrict | String | | A comma-delimited list of document type (schema) names, defaulting to all schemas if not set. See [federation](/en/querying/federation). Use [model.sources](#model.sources) to restrict to content cluster names or other source names. | +| model.searchPath | searchpath | String | | Specification of which content nodes a query should be sent to. This is useful for debugging/monitoring and when using [Rank phase statistics](/en/ranking/phased-ranking#rank-phase-statistics). Note that in a content cluster with flat distribution (i.e. no <group> element in *services.xml*), there is 1 implicit group. If not set, defaults to all nodes in one group, selected by load balancing. `searchpath::ELEMENT [';' ELEMENT]*` `ELEMENT::NODE ['/' GROUP]` `NODE::EXP [',' EXP]*` `EXP::NUM \| RANGE` `GROUP::NUM \| '*'` `RANGE::'['NUM ',' NUM ' >'` Examples: `7/3` = node 7, group 3. `7/` = node 7, any group. `*/0` = all nodes in group 0 `7,1,9/0` = nodes 1,7 and 9, group 0. `1,[3,9>/0` = nodes 1,3,4,5,6,7,8, group 0. | +| model.sources | search, sources | String | | A comma-separated list of content cluster names or other source names, defaulting to all sources/clusters if not set. The names of the sources to query, e.g., one or more content clusters and/or federated sources - see [federation](/en/querying/federation). Use [model.restrict](#model.restrict) to only search a subset of the schemas in a cluster. | +| model.type | type | String | `weakAnd` | Sets all the model.type parameters (composite, tokenization, and syntax) specifying how to parse a [model.queryString](#model.querystring) parameter at once, according to the given table: `all` → composite `and`, tokenization `internal`, syntax `simple` `any` → composite `or`, tokenization `internal`, syntax `simple` `linguistics` → composite `weakAnd`, tokenization `linguistics`, syntax `none` `phrase` → composite `phrase`, tokenization `internal`, syntax `none` `tokenize` → composite `weakAnd`, tokenization `internal`, syntax `none` `weakAnd` → composite `weakAnd`, tokenization `internal`, syntax `simple` `web` → composite `and`, tokenization `internal`, syntax `web` `yql` → composite `and`, tokenization `internal`, syntax `yql` Also see [YQL grammar](/en/reference/querying/yql#userinput). | +| model.type.composite | | String | `Determined by model.type` | Sets the Vespa query composite type that will collect parsed terms of the query by default. and Create an AndItem which only matches if *all* terms are present. nearCreate a NearItem which matches if all the terms appear near each other (gap of 1 by default). oNear — Create an ONearItem which matches if all the terms appear near each other (gap of 1 by default), in the given order. or — Create an OrItem which matches if *any* of the terms are present. phrase — Create a PhraseItem which matches if all the terms are present in the given order with no gaps. weakAnd — Create a [WeakAndItem](/en/ranking/wand#weakand) which has the semantics of `or` with performance approaching `and` . | +| model.type.tokenization | | String | `Determined by model.type` | Sets the tokenizer used to split the query string into tokens. internal Use the tokenizer built into the query parser. linguisticsPass the full query string as-is to the linguistics component for tokenization, exactly as on the indexing side, and collect any text and numeric token returned as-is, with no further stemming or normalization even when specified in the schema. This is only supported in conjunction with the `none` syntax option. | +| model.type.syntax | | String | `Determined by model.type` | Sets the syntax used to interpret the query string. Options: `none`: No syntax: Disregard any non-searchable terms `simple`: Use the [simple query language](/en/reference/querying/simple-query-language) suitable for end users. `web`: Like the [simple query language](/en/reference/querying/simple-query-language) , but '+' in front of a term means "search for this term as-is", and 'a OR b' (capital OR) means match either a or b. `yql`: Parse as a [YQL query](/en/reference/querying/yql) . | +| model.type.profile | | String | `*(null)*` | Overrides the linguistics profile assigned to the field(s) searched. The linguistics profile is used to choose the processing done in the [linguistics component](/en/linguistics/linguistics). | +| model.type.isYqlDefault | | Boolean | `false` | Whether the model.type settings should be used as the default settings for terms in YQL queries. With this parameter turned on, the model.type settings become the default "grammar" settings in userQuery, and with tokenization set to `linguistics` this will also cause "contains" terms to not undergo stemming, normalization and lowercasing as separate operations, as using this mode delegates all token processing to a single pass through the lingustics module. | + +## Ranking + +| Parameter | Alias | Type | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| ranking.location | | String | | See [Geo search](/en/querying/geo-search). Point (two-dimensional location) to use as base for location ranking. **Important:** Deprecated in favor of adding a [geoLocation](/en/reference/querying/yql#geolocation) item to the query tree. Use inside a [rank](/en/reference/querying/yql#rank) operator if it should be used only for ranking). | +| ranking.features<br />.*featurename* | input <br /> . *featurename* , rankfeature <br /> . *featurename* | String | | Set a query rank feature input to a value. The key must be a query feature - `query(anyname)`, and the value must be a double, string (to be hashed to a double), or a tensor matching the [declared input type](/en/reference/schemas/schemas#inputs) on [tensor literal form](/en/reference/ranking/tensor#tensor-literal-form) - see the [tensor user guide](/en/ranking/tensor-user-guide#querying-with-tensors). Examples: `input.query(userageDouble)=42.1` `input.query(stringToBeHashed)=abcd` `input.query(myIndexedTensor)=[1.0, 2.0, 3.0]` `input.query(myMappedTensor)={"Tablet Keyboard Cases": 0.8, "Keyboards":0.3}` | +| ranking.listFeatures | rankfeatures | Boolean | false | Set to true to request *all* [rank-features](/en/reference/schemas/schemas#rank-features) to be calculated and returned. The rank features will be returned in the summary field *rankfeatures*. This option is typically used for MLR training, should not to be used for production. | +| ranking.profile | ranking | String | `default` | Sets [rank profile](/en/reference/schemas/schemas#rank-profile) to use for assigning rank scores for documents. The `default` rank profile will be used for backends which does not have the given rank profile. | +| ranking.properties<br />.*propertyname* | rankproperty <br /> . *propertyname* | String | | Set a [rank property](/en/reference/schemas/schemas#rank-properties) that is passed to, and used by a feature executor for this query. Example: `query=foo&ranking.properties.dotProduct.X={a:1,b:2}` | +| ranking.softtimeout<br />.enable | | Boolean | true | By default, the hits available are returned on [timeout](#timeout). To return no hits at timeout instead, set `ranking.softtimeout.enable=false`. Softtimeout uses `ranking.softtimeout.factor` of the [timeout](#timeout), default 70%. The rest of the time budget is spent on later ranking phases. The factor is adaptive, per rank profile - the factor is adjusted based on remaining time after all ranking phases, unless overridden in the query using `ranking.softtimeout.factor`. A [timeout](/en/reference/querying/default-result-format#timeout) element is returned in the query response at timeout. Example: query with 500ms timeout, use 300ms in first-phase ranking: `&ranking.softtimeout.enable=true<br />&ranking.softtimeout.factor=0.6<br />&timeout=0.5` The `ranking.softtimeout` settings controls what the content nodes should do in the case where the latency budget has almost been used (timeout times a factor). Return the documents recalled and ranked with the [first phase function](/en/ranking/phased-ranking) within the time used, or simply don't produce a result: With soft timeout disabled, the Vespa container will return a 504 timeout without any results. When enabled, it will return the documents matched and ranked up until the timeout was reached, with a 200 OK response along with the reason the result set was degraded. The container might respond with a timeout error with HTTP response code 504 even with soft timeout enabled if the timeout is set so low that the query does not make it to the content nodes, or the container does not have any time left after input and query processing to dispatch the query to the content nodes. Read more about soft timeout in [coverage degradation](/en/performance/graceful-degradation). | +| ranking.softtimeout<br />.factor | | Number | 0.7 | See [ranking.softtimeout.enable](#ranking.softtimeout.enable). | +| ranking.sorting | sorting | String | | A valid [sort specification](/en/reference/querying/sorting-language). Fields you want to sort on must be stored as document attributes in the index structure by adding [attribute](/en/reference/schemas/schemas#attribute) to the indexing statement. | +| ranking.significance.useModel | | Boolean | false | Enables or disables the use of significance models specified in [service.xml](/en/reference/applications/services/search#significance). Overrides [use-model](/en/reference/schemas/schemas#significance) set in the rank profile. | +| ranking.freshness | | String | | Sets the time which will be used as *now* during execution. `[integer]`, an absolute time in seconds since epoch, or `now-[number]`, to use a time [integer] seconds into the past, or `now` to use the current time. | +| ranking.queryCache | | Boolean | false | Turns query cache on or off. Query is a two-phase process. If the query cache is on, the query is stored on the content nodes between the first and second phase, saving network bandwidth and also query setup time, at the expense of using more memory. It only affects the protocol phase two, see [caches in Vespa](/en/performance/caches-in-vespa). It does not cache the result, it just saves resources by not forwarding the query twice (one for the first protocol phase which is find the best k documents from all nodes, to the second phase which is to fill summary data and potentially ranking features listed in summary-features in the rank profile). The [summary-features](/en/reference/schemas/schemas#summary-features) are re-calculated but this setting avoids sending the query down once more. There is little downside of using it, and it can save resources and latency in cases where the query tree and query ranking features (e.g. tensors used in ranking) are large. As this is a protocol optimization, it also works with changing filter, it's not cached cross independent queries, it's just saving having to send the same query twice. | +| ranking.secondPhase.totalRerankCount | | Number | | Specifies the number of hits that should be ranked in the second ranking phase in total over the queried content nodes. Overrides the [total-rerank-count](/en/reference/schemas/schemas#secondphase-total-rerank-count) set in the rank profile. Setting to 0 disables second phase reranking. | +| ranking.secondPhase.rerankCount | | Number | | Specifies the number of hits that should be ranked in the second phase *per node*. Prefer using [totalRerankCount](#ranking.secondphase.totalrerankcount) over this. | +| ranking.totalKeepRankCount | | Number | | Specifies the number of hits for which the rank score should be kept after first phase ranking in total over the nodes participating in the query. Overrides the [total-keep-rank-count](/en/reference/schemas/schemas#total-keep-rank-count) set in the rank profile. | +| ranking.keepRankCount | | Number | | Specifies the number of hits for which the rank score should be kept after first phase ranking on each node. Overrides the [keep-rank-count](/en/reference/schemas/schemas#keep-rank-count) set in the rank profile. Prefer [total-keep-rank-count](#ranking.totalkeeprankcount) over this. | +| ranking.rankScoreDropLimit | | Number | | Minimum rankscore for a document to be considered a hit. Overrides the [rank-score-drop-limit](/en/reference/schemas/schemas#rank-score-drop-limit) set in the rank profile. | +| ranking.secondPhase.rankScoreDropLimit | | Number | | Minimum rank score for a document to be considered a hit after second phase reranking or rescoring. Overrides the [second phase rank-score-drop-limit](/en/reference/schemas/schemas#secondphase-rank-score-drop-limit) set in the rank profile. | +| ranking.globalPhase.rerankCount | | Number | | Specifies the number of hits that should be re-ranked in the global ranking phase. Overrides the [rerank-count](/en/reference/schemas/schemas#globalphase-rerank-count) set in the rank profile. Setting to 0 disables the global phase reranking. | +| ranking.globalPhase.rankScoreDropLimit | | Number | | Minimum rank score for a document to be considered a hit after global phase reranking or rescoring. Overrides the [global phase rank-score-drop-limit](/en/reference/schemas/schemas#globalphase-rank-score-drop-limit) set in the rank profile. | +| ranking.elementGap.*fieldName* | | Integer | | Set or overrides [element-gap](/en/reference/schemas/schemas#rank-element-gap) configured for a given *fieldName* in the rank profile. Note: Can be the integer "0" to consider elements to be adjacent, or the string "infinity" to signal that words in different elements never are considered "close". | + +## ranking.matching + +Settings to control behavior during matching of query evaluation. + If these are set in the query, they will override any equivalent settings in the + [rank profile](/en/reference/schemas/schemas#rank-profile). + Detailed descriptions are found in the rank profile documentation. + +| Parameter | Alias | Type | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| ranking.matching<br />.numThreadsPerSearch | | integer | | Rank profile equivalent: [num-threads-per-search](/en/reference/schemas/schemas#num-threads-per-search) Overrides the global [persearch](/en/reference/applications/services/content#requestthreads-persearch) threads to a **lower** value. | +| ranking.matching<br />.minHitsPerThread | | integer | | Rank profile equivalent: [min-hits-per-thread](/en/reference/schemas/schemas#min-hits-per-thread) After estimating the number of hits for a query, this number is used to decide how many search threads to use. | +| ranking.matching<br />.numSearchPartitions | | integer | | Rank profile equivalent: [num-search-partitions](/en/reference/schemas/schemas#num-search-partitions) Number of logical partitions the corpus on a content node is divided in. A partition is the smallest unit a search thread will handle. | +| ranking.matching<br />.termwiseLimit | | double [0.0, 1.0] | | Rank profile equivalent: [termwise-limit](/en/reference/schemas/schemas#termwise-limit) If estimated number of hits > corpus * termwise-limit, document candidates are pruned with a [TAAT](/en/performance/feature-tuning#hybrid-taat-daat) evaluation for query terms not needed for ranking. | +| ranking.matching<br />.postFilterThreshold | | double [0.0, 1.0] | 1.0 | Rank profile equivalent: [post-filter-threshold](/en/reference/schemas/schemas#post-filter-threshold) Threshold value deciding if a query with an approximate [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) operator combined with filters is evaluated using post-filtering. | +| ranking.matching<br />.approximateThreshold | | double [0.0, 1.0] | 0.02 | Rank profile equivalent: [approximate-threshold](/en/reference/schemas/schemas#approximate-threshold) Threshold value deciding if a query with an approximate [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) operator combined with filters is evaluated by searching for approximate or exact nearest neighbors. | +| ranking.matching<br />.filterFirstThreshold | | double [0.0, 1.0] | 0.2 | Rank profile equivalent: [filter-first-threshold](/en/reference/schemas/schemas#filter-first-threshold) Threshold value deciding if the filter is checked before computing a distance (*filter-first heuristic*) while searching the [HNSW](/en/reference/schemas/schemas#index-hnsw) graph for approximate neighbors with filtering. | +| ranking.matching<br />.filterFirstExploration | | double [0.0, 1.0] | 0.01 | Rank profile equivalent: [filter-first-exploration](/en/reference/schemas/schemas#filter-first-exploration) Value specifying how aggressively the filter-first heuristic searches the [HNSW](/en/reference/schemas/schemas#index-hnsw) graph for approximate neighbors with filtering. | +| ranking.matching<br />.explorationSlack | | double [0.0, 1.0] | 0.0 | Rank profile equivalent: [exploration-slack](/en/reference/schemas/schemas#exploration-slack) Value specifying slack to delay the termination of the search of the [HNSW](/en/reference/schemas/schemas#index-hnsw) graph for nearest neighbors with or without filtering. | +| ranking.matching<br />.targetHitsMaxAdjustmentFactor | | double [1.0, inf] | | Rank profile equivalent: [target-hits-max-adjustment-factor](/en/reference/schemas/schemas#target-hits-max-adjustment-factor) Value used to control the auto-adjustment of [totalTargetHits](/en/reference/querying/yql#totaltargethits) used when evaluating an approximate [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) operator with post-filtering. | +| ranking.matching<br />.filterThreshold | | double [0.0, 1.0] | | Rank profile equivalent: [filter-threshold](/en/reference/schemas/schemas#filter-threshold) Threshold value (in the range [0, 1]) deciding when matching in *index* fields should be treated as filters. This happens for query terms with [estimated hit ratios](/en/learn/glossary#estimated-hit-ratio) that are above the *filterThreshold*. | +| ranking.matching.weakand<br />.stopwordLimit | | double [0.0, 1.0] | | Rank profile equivalent: [weakand stopword-limit](/en/reference/schemas/schemas#weakand-stopword-limit) A number in the range [0, 1] representing the maximum [normalized document frequency](/en/learn/glossary#document-frequency-normalized) a query term can have in the corpus before it's considered a stopword and dropped entirely from being a part of the `weakAnd` evaluation. | +| ranking.matching.weakand<br />.adjustTarget | | double [0.0, 1.0] | | Rank profile equivalent: [weakand adjust-target](/en/reference/schemas/schemas#weakand-adjust-target) A number in the range [0, 1] representing [normalized document frequency](/en/learn/glossary#document-frequency-normalized). Used to derive a per-query document score threshold, where documents scoring lower than the threshold will not be considered as potential hits from the `weakAnd` operator. | +| ranking.matching.weakand<br />.allowDropAll | | boolean | false | Rank profile equivalent: [weakand allow-drop-all](/en/reference/schemas/schemas#weakand-allow-drop-all) A boolean value that, if set to `true`, will allow the `weakAnd` operator to drop *all* terms from the query if all terms are considered stopwords (i.e. by setting `weakAnd.stopwordLimit`). Typically used in conjunction with [nearestNeighbor](/en/querying/nearest-neighbor-search#querying-using-nearestneighbor-query-operator) or other operators to ensure that the query will return hits even when all terms are considered stopwords. | + +## ranking.matchPhase + +Settings to control behavior during the match phase of query evaluation. + If these are set in the query, they will override any + [match-phase](/en/reference/schemas/schemas#match-phase) settings in the rank profile. + Detailed descriptions are found in the rank profile documentation. + +| Parameter | Alias | Type | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| ranking.matchPhase<br />.attribute | | string | | Rank profile equivalent: [match-phase: attribute](/en/reference/schemas/schemas#match-phase-attribute) The attribute used to limit matches by if more than maxHits hits will be produced. | +| ranking.matchPhase<br />.totalMaxHits | | long | | The max number of hits that should be generated in total over the content nodes during the match phase. Setting the value to `0` disables match phase early termination. Rank profile equivalent: [match-phase: total-max-hits](/en/reference/schemas/schemas#match-phase-total-max-hits) | +| ranking.matchPhase<br />.maxHits | | long | | The max number of hits that should be generated on eache content nodes during the match phase. Prefer using [totalMaxHits](#ranking.matchphase.totalmaxhits) over this. Rank profile equivalent: [match-phase: max-hits](/en/reference/schemas/schemas#match-phase-max-hits) | +| ranking.matchPhase<br />.ascending | | boolean | | Rank profile equivalent: [match-phase: order](/en/reference/schemas/schemas#match-phase-order) Whether to keep the documents having the highest (false) or lowest (true) values of the match phase attribute. | +| ranking.matchPhase<br />.diversity.attribute | | string | | Rank profile equivalent: [diversity: attribute](/en/reference/schemas/schemas#diversity-attribute) The attribute to use when deciding diversity. | +| ranking.matchPhase<br />.diversity.minGroups | | long | | Rank profile equivalent: [diversity: min-groups](/en/reference/schemas/schemas#diversity-min-groups) The minimum number of groups that should be returned from the match phase grouped by the diversity attribute. | + +## Dispatch + +| Parameter | Alias | Type | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| dispatch.topKProbability | | double | | Probability to use when computing how many hits to fetch from each partition when merging and creating the final result set. See [services](/en/reference/applications/services/content#top-k-probability) for details. Default: [none](/en/reference/applications/services/content#top-k-probability). | + +## Presentation + +| Parameter | Alias | Type | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| presentation.bolding | bolding | Boolean | true | Whether to bold query terms in [schema](/en/reference/schemas/schemas) fields defined with [bolding: on](/en/reference/schemas/schemas#bolding) or [summary: dynamic](/en/reference/schemas/schemas#summary). | +| presentation.format | format | String | `default` | `Value`: Description `*No value* or [default](/en/reference/querying/default-result-format)`: The default, builtin JSON format `[json](/en/reference/querying/default-result-format)`: Builtin JSON format ``cbor``: Builtin [CBOR](https://cbor.io/) format. Binary encoding, responses are smaller and faster to render than JSON, especially for numeric data. Semantically equivalent to JSON. Cannot be used with `jsoncallback` (JSONP). Requires Vespa 8.623.5 or later. ``xml``: Builtin XML format. **Important:** See [deprecations](/en/reference/release-notes/vespa8). `[page](/en/reference/querying/page-result-format)`: XML format which is suitable for use with [page templates](/en/querying/page-templates) . **Important:** See [deprecations](/en/reference/release-notes/vespa8). `*Any other value*`: A custom [result renderer](/en/applications/result-renderers) supplied by the application The response format can also be selected via the HTTP `Accept` header. If the Accept header specifies `application/cbor` with higher priority than `application/json`, CBOR will be used. The `format` query parameter overrides the Accept header. | +| presentation.summary | summary | String | | The name of the [summary class](/en/querying/document-summaries) used to select fields in results. Default: The default summary class of the schema. | +| presentation.template | | String | | The id of a deployed page template to use for this result. This should be used with the [page](/en/reference/querying/page-result-format) result format. | +| presentation.timing | | Boolean | false | Whether a result renderer should try to add optional timing information to the rendered page - see the [result reference](/en/reference/querying/default-result-format#timing). | +| presentation.format.tensors | | String | `short` | Controls how tensors are rendered in the result. `Value`: Description ``short``: Render the tensor value in an object having two keys, "type" containing the value, and "cells"/"blocks"/"values" ( [depending on the type](/en/reference/schemas/document-json-format#tensor) ) containing the tensor content. <br /> Render the tensor content in the [type-appropriate short form](/en/reference/schemas/document-json-format#tensor) . ``long``: Render the tensor value in an object having two keys, "type" containing the value, and "cells" containing the tensor content. <br /> Render the tensor content in the [general verbose form](/en/reference/schemas/document-json-format#tensor) . ``short-value``: Render the tensor content directly. <br /> Render the tensor content in the [type-appropriate short form](/en/reference/schemas/document-json-format#tensor) . ``long-value``: Render the tensor content directly. <br /> Render the tensor content in the [general verbose form](/en/reference/schemas/document-json-format#tensor) . ``hex``: Use `short` form, and render dense values [hex encoded](/en/reference/ranking/tensor#indexed-hex-form) . <br /> ``hex-value``: Use `short-value` form, and render dense values [hex encoded](/en/reference/ranking/tensor#indexed-hex-form) . <br /> | + +## Grouping and Aggregation + +| Parameter | Alias | Type | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| select | | String | | Requests specific multi-level result set statistics and/or hit groups to be returned in the result. Fields you want to retrieve statistics or hit groups for must be stored as document attributes in the index structure by adding attribute to the indexing statement. Default is no grouping. See the [grouping guide](/en/querying/grouping) for examples. | +| collapsefield | | String | | Comma-separated list of [field names](/en/reference/schemas/schemas#summary), that should only appear uniquely in a result. Hits with values in these fields which are already present in a higher-ranked hit will be filtered out. Read more in [result diversity](/en/querying/result-diversity) to compare this with other options. Default is no field collapsing. | +| collapsesize | | Number | 1 | The number of hits to keep in each collapsed bucket - used for all collapsefields. | +| collapsesize.*fieldname* | | Number | 1 | The number of hits to keep in each collapsed bucket - used for the specified field. This value takes precedence over the value specified in `collapsesize`. | +| collapse.summary | | String | | A valid name of a document summary class. Use this summary class to fetch the fields used for collapsing. Default: Use default summary or attributes. | +| grouping.defaultMaxGroups | | Number | 10 | Positive integer or `-1` to disable. The default number of groups to return when [max](/en/querying/grouping#ordering-and-limiting-groups) is not specified. | +| grouping.defaultMaxHits | | Number | 10 | Positive integer or `-1` to disable. The default number of hits to return when [max](/en/querying/grouping#hits-per-group) is not specified. | +| grouping.globalMaxGroups | | Number | 10000 | Positive integer or `-1` to disable. A cost limit for grouping queries. Any query that may exceed this threshold will be preemptively failed by the container. The limit is defined as the total number of groups and document summaries a query may produce. A query that does not have an implicit or explicit `max` defined for all levels will always fail if limit is enabled. This parameter can only be overridden in a [query profile](/en/querying/query-profiles). See the [grouping guide](/en/querying/grouping#global-limit) for practical examples. | +| grouping.defaultPrecisionFactor | | Decimal <br /> number | 2.0 | The default precision scale factor when [precision](/en/querying/grouping#ordering-and-limiting-groups) is not specified. The final precision value is calculated by multiplying the effective `max` value with the scale factor. | +| timezone | | String | `utc` | Specifies a timezone that will be used to offset all `time` related expressions in grouping. See [Java's definition](https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/TimeZone.html#getTimeZone(java.lang.String)) for valid timezones. See the [grouping guide](/en/querying/grouping#timezone-grouping) for examples. | + +## Streaming + +Parameters for [streaming search mode](/en/performance/streaming-search). + +| Parameter | Alias | Type | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| streaming.groupname | | A string | | Sets the group (specified by [g=<groupname>](/en/schemas/documents#id-scheme)) of the documents to stream through. | +| streaming.selection | | A [document selection](/en/reference/writing/document-selector-language) | | Restricts streaming search using a selection expression instead of a group id. If the selection is on the form `id.group == "foo" or id.group == "bar" or id.group == ...` this will only stream documents in those groups, which is efficient for a small number of groups. If any other selection is used, this will stream through *all* groups, which is very costly. | +| streaming.maxbucketspervisitor | | An integer Positive infinity If set, limit backend bucket concurrency to the specified number of buckets. Can be used to explicitly control resource usage for extremely large streaming search locations. This is an expert option. | | | + +## Tracing + +Parameters controlling trace information returning with the result for diagnostics. + +| Parameter | Alias | Type | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| trace.profile | profile | Boolean | false | True to produce a structured trace for performance analysis. Returns a structured trace for performance analysis. This is a shorthand to set various other parameters to the suitable values to generate a performance trace. | +| trace.level | tracelevel | Number | 0 | A positive number. Default is no tracing. Collect trace information for debugging when running a query. Higher numbers give progressively more detail on query transformations, searcher execution and content node(s) query execution. See [query tracing](/en/querying/query-api#query-tracing) for details and examples. Tracing is subject to change at any time, the below is a guide: `Level`: Description `1`: Basic tracing in container `2`: Basic tracing, more details `3`: Basic tracing, even more details `4`: Include timing info from content nodes `5`: Even more timing info from content nodes `6`: Include the query execution plan (blueprint) `7`: Include the query execution tree | +| trace.explainLevel | explainlevel | Number | 0 | Set to a positive number to collect query execution information for debugging when running a query. Higher numbers give progressively more detail on content node query execution. Tuning this parameter is useful if we want to get more information from the content nodes without gathering lots of trace information from the container chain. Explanation is subject to change at any time, the below is a guide: `Level`: Description `1`: Timing and overall query plan (blueprint) from each content node `2`: Timing per search thread and execution tree (search iterator tree) Note that you might get the same at [trace.level](#trace.level) 5 and above. Default is no explanation. Tracing with `trace.explainLevel` also requires that [trace.level](#trace.level) is positive. | +| trace.profileDepth | | Number | 0 | Turns on performance profiling of the content node query execution for [matching](#trace.profiling.matching.depth), [first-phase ranking](#trace.profiling.firstPhaseRanking.depth), and [second-phase ranking](#trace.profiling.secondPhaseRanking.depth). How profiling is performed is based on whether `trace.profileDepth` is positive or negative: `Type`: Description `Tree`: A positive number specifies the depth used by a tree profiler. A higher number means more profiler data. The output resembles the structure of the search iterator tree or rank expression tree being profiled, with total time and self time tracked per component (node in the tree). `Flat`: A negative number specifies the topn (cut-off) used by a flat profiler. The output returns the topn components that use the most self time. The performance profiling output is subject to change at any time. Default is no information. Tracing with `trace.profileDepth` also requires that [trace.level](#trace.level) is positive. | +| trace.profiling.matching.depth | | Number | 0 | Turns on profiling of [matching](/en/performance/sizing-search#life-of-a-query-in-vespa) of the content node query execution. This exposes information about how time spent on matching is distributed between individual search iterators. The profiling output is tagged *match_profiling* and is subject to change at any time. Default is no information. See [trace.profileDepth](#trace.profiledepth) for semantics of this parameter. Tracing with `trace.profiling.matching.depth` requires that [trace.level](#trace.level) is positive. | +| trace.profiling.firstPhaseRanking.depth | | Number | 0 | Turns on profiling of the [first-phase ranking](/en/basics/ranking) of the content node query execution. This exposes information about how time spent on first-phase ranking is distributed between individual [rank features](/en/reference/ranking/rank-features). The profiling output is tagged *first_phase_profiling* and is subject to change at any time. Default is no information. See [trace.profileDepth](#trace.profiledepth) for semantics of this parameter. Tracing with `trace.profiling.firstPhaseRanking.depth` also requires that [trace.level](#trace.level) is positive. | +| trace.profiling.secondPhaseRanking.depth | | Number | 0 | Turns on profiling of the [second-phase ranking](/en/basics/ranking) of the content node query execution. This exposes information about how time spent on second-phase ranking is distributed between individual [rank features](/en/reference/ranking/rank-features). The profiling output is tagged *second_phase_profiling* and is subject to change at any time. Default is no information. See [trace.profileDepth](#trace.profiledepth) for semantics of this parameter. Tracing with `trace.profiling.secondPhaseRanking.depth` also requires that [trace.level](#trace.level) is positive. | +| trace.timestamps | | Boolean | false | Enable to get timing information already at [trace.level=1](#trace.level). This is useful for debugging latency spent at different components in the container search chain without rendering a lot of string data which is associated with higher trace levels. | +| trace.query | | Boolean | true | Whether to include the query in any trace messages. This is useful for avoiding query serialization with very large queries to avoid impact from it on performance and excessively large traces. | + +## Semantic Rules + +Refer to [semantic rules](/en/reference/querying/semantic-rules). + +| Parameter | Alias | Type | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| rules.off | | Boolean | true | Turn rule evaluation off for this query. | +| rules.rulebase | | String | | A rule base name - the name of the rule base to use for these queries. | +| tracelevel.rules | | Number | | The amount of rule evaluation trace output to show, higher number means more details. This is useful to see a trace from rule evaluation without having to see trace from all other searchers at the same time. | + +## Other + +| Parameter | Alias | Type | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| recall | | String | | Any allowed collection of recall terms. Sets a recall parameter to be combined with the query. This is identical to [filter](#model.filter), except that recall terms are not exposed to the ranking framework and thus not ranked. As such, one can not use unprefixed terms; they must either be positive or negative. | +| user | | String | | The id of the user making the query. The content of the argument is made available to the search chain, but it triggers no features in Vespa apart from being propagated to the access log. | +| hitcountestimate | | Boolean | false | Make this an estimation query. No hits will be returned, and total hit count will be set to an estimate of what executing the query as a normal query would give. | +| metrics.ignore | | Boolean | false | Ignore metric collection for this query request, useful for [warm-up queries](/en/performance/container-tuning#container-warmup). | +| weakAnd.replace | | Boolean | false | Replace all instances of OR in the query tree with weakAnd. | +| wand.hits | | Number | 100 | Used in combination with [weakAnd.replace](#weakand.replace). Sets the targetHits of the new weakAnds to the specified value. | +| sorting.degrading | | Boolean | true | When sorting on a [single-value numeric attribute with fast-search](/en/content/attributes) an optimization is activated to return early, with an inaccurate total-hits count. Set `sorting.degrading` to false to disable this optimization. This optimization sets the primary sorting attribute as the [match phase attribute](#ranking.matchphase.attribute), and [match phase maxHits](#ranking.matchphase.maxhits) equal to `max(10000, maxHits+maxOffset)`. [maxHits](#hits) and [maxOffset](#offset) can be set in a query profile. | +| noCache | nocache | Boolean | false | Sets whether this query should never be served from a cache. Vespa has [few caches](/en/performance/caches-in-vespa), and this parameter does not control any of them. Therefore, this parameter has no effect | + +## HTTP status codes + +The following rules determine which HTTP status code is returned: + +- If the Result contains no errors (Result.hits().getError()==null): 200 OK is returned. + +- If the Result contains errors and no regular hits: + + + If the error code of any ErrorMessage in the Result + (Result.hits().getErrorHit().errorIterator()) is a "WEB SERVICE ERROR CODE", + the first of those is returned. + + + Otherwise, if it is an "HTTP COMPATIBLE ERROR CODE", the mapping of it is returned. + + + Otherwise 500 INTERNAL_SERVER_ERROR is returned. + +*List of possible HTTP status codes and their descriptions.* + +| Code | Description | +| :--- | :--- | +| 200 | OK | +| 400 | Bad Request | +| 401 | Unauthorized | +| 403 | Forbidden | +| 404 | Not Found | +| 405 | Method Not Allowed | +| 408 | Request Timeout | +| 428 | Precondition Required | +| 431 | Request Header Fields Too Large | +| 500 | Internal Server Error | +| 502 | Bad Gateway | +| 503 | Service Unavailable; no available search handler threads in the jdisc container to serve the request. See [Container Tuning](/en/performance/container-tuning#container-worker-threads) on sizing thread pools. | +| 504 | Gateway Timeout | +| 507 | Insufficient Storage | + +Mapping of internal error codes to HTTP status codes. + +| Error Code | HTTP Code | +| :--- | :--- | +| com.yahoo.container.protect.Error.BAD_REQUEST | 400 | +| com.yahoo.container.protect.Error.UNAUTHORIZED | 401 | +| com.yahoo.container.protect.Error.FORBIDDEN | 403 | +| com.yahoo.container.protect.Error.NOT_FOUND | 404 | +| com.yahoo.container.protect.Error.INTERNAL_SERVER_ERROR | 500 | +| com.yahoo.container.protect.Error.INSUFFICIENT_STORAGE | 507 | + +## select + +A `select` query is equivalent in structure to YQL, written in JSON. + Contains subparameters `where`, `grouping` and `fields`. + +| Parameter | Alias | Type | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| where | | String | | A string with JSON. Refer to the [select reference](/en/reference/querying/json-query-language) for details. | +| grouping | | String | | A string with JSON. Refer to the [select reference](/en/reference/querying/json-query-language) for details. | +| fields | | String | | A JSON array of [summary field](/en/querying/document-summaries#selecting-summary-fields-in-yql) names to include in each hit. Equivalent to the field list in a YQL `select` clause. Refer to the [select reference](/en/reference/querying/json-query-language) for details. | diff --git a/mintlify-docs/en/reference/api/state-v1.mdx b/mintlify-docs/en/reference/api/state-v1.mdx new file mode 100644 index 0000000000..6546b046e2 --- /dev/null +++ b/mintlify-docs/en/reference/api/state-v1.mdx @@ -0,0 +1,40 @@ +--- +title: "/state/v1 API reference" +description: "/state/v1 API reference in Vespa applications." +--- +| HTTP request | state/v1 operation | Description | +| :--- | :--- | :--- | +| GET | | | +| | Service config generation | `/state/v1/config` In the response, [config](#config) has a mandatory [generation](#generation) and one or more \<service> elements: sentinel container distributor logd slobroks servicelayer proton Note: Other configuration elements can also be added as a service. A \<service> has a mandatory [generation](#generation). An optional [message](#message) can be returned. Example: ```{ "config": {"generation": 11, "slobroks": {"generation": 11, "message": "ok"}}}``` | +| | Service version | `/state/v1/version` Returns a mandatory service [version](#version). Example: ```{ "version": "8.43.64"}``` | +| | Service health | `/state/v1/health` Returns the service status, with [time](#time), [status](#status) and [metrics](#metrics). Metrics contains `requestsPerSecond` and `latencySeconds`, see [StateHandler](https://github.com/vespa-engine/vespa/blob/master/container-disc/src/main/java/com/yahoo/container/jdisc/state/StateHandler.java). Example: `{` `"time": 1661863544346,` `"status": {` `"code": "up"` `},` `"metrics": {` `"snapshot": {` `"from": 1661863483.422,` `"to": 1661863543.38` `},` `"values": [` `{` `"name": "requestsPerSecond",` `"values": {` `"count": 30,` `"rate": 0.5` `}` `},` `{` `"name": "latencySeconds",` `"values": {` `"average": 0.001,` `"sum": 0,` `"count": 0,` `"last": 0.001,` `"max": 0.001,` `"min": 0.001,` `"rate": 0` `}` `}` `]` `}` `}` | +| | Service metrics | `/state/v1/metrics` Same as `/state/v1/health`, but with a full metrics set. A metric has a [name](#name) and [values](#values), and can have a [description](#description) and a set of [dimensions](#dimensions): `{` `"name": "content.proton.documentdb.matching.rank_profile.query_setup_time",` `"description": "Average time (sec) spent setting up and tearing down queries",` `"values": {` `"average": 0,` `"sum": 0,` `"count": 0,` `"rate": 0,` `"min": 0,` `"max": 0,` `"last": 0` `},` `"dimensions": {` `"documenttype": "music",` `"rankProfile": "default"` `}` `}` | +| | Service metric histograms | `/state/v1/metrics/histograms` See [histograms](/en/operations/self-managed/monitoring#histograms) for usage. The histograms are implemented using [HdrHistogram](http://hdrhistogram.org/), and the CSV result is what that library generates. | + +| Element | Parent | Type | Description | +| :--- | :--- | :--- | :--- | +| config | | Object | Root element for /state/v1/config. | +| generation | config | Number | The generation number is the number for the config that is active in the application. | +| message | config | String | An info or error message. | +| version | | String | Vespa version. | +| time | | Number | Epoch in microseconds. | +| status | | Object | | +| code | status | String | Service status code - one of: up down initializing Containers with the [query API](/en/querying/query-api) enabled return `initializing` while waiting for content nodes to start, see [example](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA). `up` means that the service is fully up. Assume status `down` if no response. Refer to [StateMonitor](https://github.com/vespa-engine/vespa/blob/master/container-disc/src/main/java/com/yahoo/container/jdisc/state/StateMonitor.java) for implementation. | +| message | status | String | Message is optional - it is normally empty if the service is up, while it is set to a textual reason for why it is unavailable, if so. | +| metrics | | Object | Snapshot of metric values. | +| snapshot | metrics | Object | Time period for metrics snapshot. | +| from | snapshot | Number | Epoch in seconds, with microseconds fraction. | +| to | snapshot | Number | Epoch in seconds, with microseconds fraction. | +| values | metrics | Array | Array of metric objects. | +| name | values | String | Metric name. | +| description | values | String | Textual description of the metric. | +| dimensions | values | Object | Set of dimension name/value pairs. | +| values | values | Object | Set of metric values. | +| average | values | Number | Average metric value, typically *sum* divided by *count* . | +| sum | values | Number | Sum of metric values in snapshot. | +| count | values | Number | Number of times metric has been set. For instance in a metric counting number of operations done, it will give the number of operations added for that snapshot period. For a value metric, for instance latency of operations, the count will give how many times latencies have been added to the metric. | +| last | values | Number | Last metric value. | +| max | values | Number | Max metric value in snapshot. | +| min | values | Number | Min metric value in snapshot. | +| rate | values | Number | Metric rate: *count* divided by *snapshot interval* . | + diff --git a/mintlify-docs/en/reference/applications/application-packages.mdx b/mintlify-docs/en/reference/applications/application-packages.mdx new file mode 100644 index 0000000000..3d781ce0b6 --- /dev/null +++ b/mintlify-docs/en/reference/applications/application-packages.mdx @@ -0,0 +1,102 @@ +--- +title: "Application package reference" +sidebarTitle: "Application packages" +--- + +This is the [application package](/en/basics/applications) reference. An application package is the deployment unit in Vespa. To deploy an application, create an application package and [vespa deploy](/en/clients/vespa-cli#deployment) or use the [deploy API](/en/reference/api/deploy-v2). The application package is a directory of files and subdirectories: + +| Directory/file | Required | Description | +| --- | --- | --- | +| [services.xml](/en/reference/applications/services/services) | Yes | Describes which services to run where, and their main configuration. | +| [hosts.xml](/en/reference/applications/hosts) | No | Vespa Cloud: Not used. See node counts in [services.xml](/en/reference/applications/services/services). <br/><br/> Self-managed: The mapping from logical nodes to actual hosts. | +| [deployment.xml](/en/reference/applications/deployment) | Yes, for Vespa Cloud | Specifies which environments and regions the application is deployed to during automated application deployment, as which application instances. <br/><br/> This file also specifies other deployment-related configurations like [cloud accounts](/en/operations/enclave/enclave) and [private endpoints](/en/operations/private-endpoints). <br/><br/> The file is required when deploying to the [prod environment](/en/operations/environments#prod) - it is ignored (with some exceptions) when deploying to the *dev* environment. | +| [validation-overrides.xml](/en/reference/applications/validation-overrides) | No | Override, allowing this package to deploy even if it fails validation. | +| [.vespaignore](/en/applications/vespaignore) | No | Contains a list of path patterns that should be excluded from the `application.zip` deployed to Vespa. | +| [models](/en/reference/ranking/model-files)/ | No | Machine-learned models in the application package. Refer to [stateless model evaluation](/en/ranking/stateless-model-evaluation), [Tensorflow](/en/ranking/tensorflow), [Onnx](/en/ranking/onnx), [XGBoost](/en/ranking/xgboost), and [LightGBM](/en/ranking/lightgbm). | +| [schemas](/en/basics/schemas)/ | No | Contains the \*.sd files describing the document types of the application and how they should be queried and processed. | +| [schemas/\[schema\]](/en/reference/schemas/schemas#rank-profile)/ | No | Contains \*.profile files defining [rank profiles](/en/basics/ranking#rank-profiles). This is an alternative to defining rank profiles inside the schema. | +| [security/clients.pem](/en/security/guide) | Yes, for Vespa Cloud | PEM encoded X.509 certificates for data plane access. See the [security guide](/en/security/guide) for how to generate and use. | +| [components](/en/applications/components)/ | No | Contains \*.jar files containing searcher(s) for the JDisc Container. | +| [rules](/en/reference/querying/semantic-rules)/ | No | Contains \*.sr files containing rule bases for semantic recognition and translation of the query | +| [search/query-profiles](/en/reference/querying/query-profiles)/ | No | Contains \*.xml files containing a named set of search request parameters with values | +| [constants](/en/ranking/tensor-user-guide#constant-tensors)/ | No | Constant tensors | +| [tests](/en/reference/applications/testing)/ | No | Test files for automated tests | +| ext/ | No | Files that are guaranteed to be ignored by Vespa: They are excluded when processing the application package and cannot be referenced from any other element in it. | + +Additional files and directories can be placed anywhere in the application package. These will be not be processed explicitly by Vespa when deploying the application package (i.e. they will only be considered if they are referred to from within the application package), but there is no guarantee to how these might be processed in a future release. To extend the application package in a way that is guaranteed to be ignored by Vespa in all future releases, use the *ext/* directory. + +## Deploy + +| Command | Description | +| --- | --- | +| **upload** | Uploads an application package to the config server. Normally not used, as *prepare* includes *upload* | +| **prepare** | 1. Verifies that a configuration server is up and running <br/><br/>2. Uploads the application to the configuration server, which stores it in *`$VESPA_HOME/var/db/vespa/config_server/serverdb/tenants/default/sessions/[sessionid]`*. *\[sessionid\]* increases for each *prepare*\-call. The config server also stores the application in a [ZooKeeper](/en/operations/self-managed/configuration-server.html) instance at */config/v2/tenants/default/sessions/\[sessionid\]* - this distributes the application to all config servers <br/><br/>3. Creates metadata about the deployed the applications package (which user deployed it, which directory was it deployed from and at what time was it deployed) and stores it in *...sessions/\[sessionid\]/.applicationMetaData*<br/><br/> 4. Verifies that the application package contains the required files and performs a consistency check <br/><br/>5. Validates the xml config files using the [schema](https://github.com/vespa-engine/vespa/tree/master/config-model/src/main/resources/schema), found in *`$VESPA_HOME/share/vespa/schema`* <br/><br/>6. Checks if there are config changes between the active application and this prepared application that require actions like restart or re-feed (like changes to [schemas](/en/basics/schemas)). These actions are returned as part of the prepare step in the [deployment API](/en/reference/api/deploy-v2#prepare-session).<br/> This prevents breaking changes to production - also read about [validation overrides](/en/reference/applications/validation-overrides)<br/><br/> 7. Distributes constant tensors and bundles with [components](/en/applications/components) to nodes using [file distribution](/en/applications/deployment#file-distribution). Files are downloaded to *`$VESPA_HOME/var/db/vespa/filedistribution`*, URL download starts downloading to *`$VESPA_HOME/var/db/vespa/download`* | +| **activate** | 1. Waits for prepare to complete <br/><br/>2. Activates new configuration version <br/><br/>3. Signals to containers to load new bundles - read more in [container components](/en/applications/components) | +| **fetch** | Use *fetch* to download the active application package | + +An application package can be zipped for deployment: + +```bash +$ zip -r ../app.zip . +``` + +Use any name for the zip file - then refer to the file instead of the path in [deploy](/en/clients/vespa-cli#deployment) commands. + +<Warning> +**Important:** + +Using `tar` / `gzip` is not supported. [Details](https://github.com/vespa-engine/vespa/issues/17837). +</Warning> + +## Preprocess directives + +Use preprocess directives to: + +- *preprocess:properties*: define properties that one can refer to everywhere in *services.xml* +- *preprocess:include*: split *services.xml* in smaller chunks + +Below, *`${container.port}`* is replaced by *4099*. The contents of *content.xml* is placed at the *include* point. This is applied recursively, one can use preprocess directives in included files, as long as namespaces are defined in the top level file: + +```xml +<services version="1.0" xmlns:preprocess="properties"> + <preprocess:properties> + <container.port>4099</container.port> + </preprocess:properties> + <container version="1.0"> + <http> + <server id="container" port="${container.port}" /> + </http> + <search /> + </container> + <preprocess:include file="content.xml" /> +</services> +``` + +Sample *content.xml*: + +```xml +<content version="1.0" > + <redundancy>1</redundancy> + <documents> + <document type="music.sd" mode="index" /> + </documents> + <nodes> + <node hostalias="node0"/> + <node hostalias="node1"/> + <node hostalias="node2"/> + </nodes> +</content> +``` + +## Versioning application packages + +An application can be given a user-defined version, available at [/ApplicationStatus](/en/applications/components#monitoring-the-active-application). Configure the version in [services.xml](/en/reference/applications/services/services) (at top level): + +```xml +<services> + <config name="container.handler.observability.application-userdata"> + <version>42</version> + </config> + ... +</services> +``` diff --git a/mintlify-docs/en/reference/applications/components.mdx b/mintlify-docs/en/reference/applications/components.mdx new file mode 100644 index 0000000000..c5160b3ac7 --- /dev/null +++ b/mintlify-docs/en/reference/applications/components.mdx @@ -0,0 +1,138 @@ +--- +title: "Component reference" +sidebarTitle: "Components" +--- + + +A component is any Java class whose lifetime is controlled by the container, see the [Developer Guide](/en/applications/developer-guide) for an introduction. Components are specified and configured in services.xml and can have other components, and config (represented by generated "Config" classes) [injected](/en/applications/dependency-injection) at construction time, and in turn be injected into other components. + +Whenever a component or a resource your component depends on is changed by a redeployment, your component is reconstructed. Once all changed components are reconstructed, new requests are atomically switched to use the new set and the old ones are destructed. + +If you have multiple constructors in your component, annotate the one to use for injection by `@com.yahoo.component.annotation.Inject`. + +Identifiable components must implement `com.yahoo.component.Component`, and components that need to destruct resources at removal must subclass `com.yahoo.component.AbstractComponent` and implement `deconstruct()`. + +See the [example](/en/operations/metrics#example-qa) for common questions about component uniqueness / lifetime. + +## Component Types + +Vespa defined various component types (superclasses) for common tasks: + +| Component type | Description | +| --- | --- | +| **Request handler** | [Request handlers](/en/applications/request-handlers) allow applications to implement arbitrary HTTP APIs. A request handler accepts a request and returns a response. Custom request handlers are subclasses of [ThreadedHttpRequestHandler](https://javadoc.io/doc/com.yahoo.vespa/container-disc/latest/com/yahoo/container/jdisc/ThreadedHttpRequestHandler.html). | +| **Processor** | The [processing framework](/en/applications/processing) can be used to create general composable synchronous request-response systems. Searchers and search chains are an instantiation (through subclasses) of this general framework for a specific domain. Processors are invoked synchronously and the response is a tree of arbitrary data elements. Custom output formats can be defined by adding [renderers](#renderers). | +| **Renderer** | Renderers convert a Response (or query Result) into a serialized form sent over the network. Renderers are subclasses of [com.yahoo.processing.rendering.Renderer](https://github.com/vespa-engine/vespa/blob/master/container-disc/src/main/java/com/yahoo/processing/rendering/Renderer.java). | +| **Searcher** | Searchers processes Queries and their Results. Since they are synchronous, they can issue multiple queries serially or in parallel to e.g. implement federation or decorate queries with information fetched from a content cluster. Searchers are composed into *search chains* defined in services.xml. A query request selects a particular search chain which implements the logic of that query. [Read more](/en/applications/searchers). | +| **Document processor** | Document processors processes incoming document operations. Similar to Searchers and Processors they can be composed in chains, but document processors are asynchronous. [Read more](/en/applications/document-processors). | +| **Binding** | A binding matches a request URI to the correct [filter chain](#filter) or [request handler](#request-handlers), and route outgoing requests to the correct [client](#client). For instance, the binding *http://\*/\** would match any HTTP request, while *http://\*/processing* would only match that specific path. If several bindings match, the most specific one is chosen.<br/><br/> **Server binding**<br/> A server binding is a rule for matching incoming requests to the correct request handler, basically the JDisc building block for implementing RESTful APIs. <br/><br/> **Client binding** <br/>A client binding is a pattern which is used to match requests originating inside the container, e.g. when doing federation, to a client provider. That is, it is a rule which determines what code should handle a given outgoing request. \| | +| **Filter** | A filter is a lightweight request checker. It may set some specific request property, or it may do security checking and simply block requests missing some mandatory property or header. | +| **Client** | Clients, or client providers, are implementations of clients for different protocols, or special rules for given protocols. When a JDisc application acts as a client, e.g. fetches a web page from another host, it is a client provider that handles the transaction. Bindings are used, as with request handlers and filters, to choose the correct client, matching protocol, server, etc., and then hands off the request to the client provider. There is no problem in using arbitrary other types of clients for external services in processors and request handlers. | + +## Component configurations + +This illustrates a typical component configuration set up by the Vespa container: + +<Frame> +![Vespa container component configuration](/assets/img/container-components.svg) +</Frame> + +The network layer associates a Request with a *response handler* and routes it to the correct type of [request handler](#request-handlers) (typically based on URI binding patterns). + +If an application needs lightweight request-response processing using decomposition by a series of chained logical units, the [processing framework](/en/applications/processing) is the correct family of components to use. The request will be routed from ProcessingHandler through one or more chains of [Processor](#processors) instances. The exact format of the output is customizable using a [Renderer](#renderers). + +If doing queries, SearchHandler will create a Query object, route that to the pertinent chain of [Searcher](#searchers) instances, and associate the returned Result with the correct [Renderer](#renderers) instance for optional customization of the output format. + +The DocumentProcessingHandler is usually invoked from messagebus, and used for feeding documents into an index or storage. The incoming data is used to build a Document object, and this is then feed through a chain of [DocumentProcessor](#document-processors) instances. + +If building an application with custom HTTP APIs, for instance arbitrary REST APIs, the easiest way is building a custom [RequestHandler](#request-handlers). This gets the Request, which is basically a set of key-value pairs, and returns a stream of arbitrary data back to the network. + +## Injectable Components + +These components are available from Vespa for [injection](/en/applications/dependency-injection) into applications in various contexts: + +| Component | Description | +|---|---| +| **Always available** | | +| [AthenzIdentityProvider](https://github.com/vespa-engine/vespa/blob/master/container-disc/src/main/java/com/yahoo/container/jdisc/athenz/AthenzIdentityProvider.java) | Provides the application's Athenz-identity and gives access to identity/role certificate and tokens. | +| [BertBaseEmbedder](https://github.com/vespa-engine/vespa/blob/master/model-integration/src/main/java/ai/vespa/embedding/BertBaseEmbedder.java) | A BERT-Base compatible embedder, see [BertBase embedder](/en/rag/embedding#bert-embedder). | +| [ConfigInstance](https://github.com/vespa-engine/vespa/blob/master/config-lib/src/main/java/com/yahoo/config/ConfigInstance.java) | Configuration is injected into components as `ConfigInstance` components - see [configuring components](/en/applications/configuring-components). | +| [Executor](https://docs.oracle.com/javase/7/docs/api/java/util/concurrent/Executor.html) | Default threadpool for processing requests in threaded request handler | +| [Linguistics](https://github.com/vespa-engine/vespa/blob/master/linguistics/src/main/java/com/yahoo/language/Linguistics.java) | Inject a Linguistics component like [SimpleLinguistics](https://github.com/vespa-engine/vespa/blob/master/linguistics/src/main/java/com/yahoo/language/simple/SimpleLinguistics.java) or provide a custom implementation - see [linguistics](/en/linguistics/linguistics). | +| [Metric](https://github.com/vespa-engine/vespa/blob/master/jdisc_core/src/main/java/com/yahoo/jdisc/Metric.java) | Jdisc core interface for metrics. Required by all subclasses of ThreadedRequestHandler. | +| [MetricReceiver](https://github.com/vespa-engine/vespa/blob/master/container-disc/src/main/java/com/yahoo/metrics/simple/MetricReceiver.java) | Use to emit metrics from a component. Find an example in the [metrics](/en/operations/metrics#metrics-from-custom-components) guide. | +| [ModelsEvaluator](https://github.com/vespa-engine/vespa/blob/master/model-evaluation/src/main/java/ai/vespa/models/evaluation/ModelsEvaluator.java) | Evaluates machine-learned models added to Vespa applications and available as config form. | +| [SentencePieceEmbedder](https://github.com/vespa-engine/vespa/blob/master/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEmbedder.java) | A native Java implementation of SentencePiece, see [SentencePiece embedder](/en/reference/rag/embedding#sentencepiece-embedder). | +| [VespaCurator](https://github.com/vespa-engine/vespa/blob/master/zkfacade/src/main/java/com/yahoo/vespa/curator/api/VespaCurator.java) | A client for ZooKeeper. For use in container clusters that have ZooKeeper enabled. See [using ZooKeeper](/en/applications/using-zookeeper). | +| [VipStatus](https://github.com/vespa-engine/vespa/blob/master/container-disc/src/main/java/com/yahoo/container/handler/VipStatus.java) | Use this to gain control over the service status (up/down) to be emitted from this container. | +| [WordPieceEmbedder](https://github.com/vespa-engine/vespa/blob/master/linguistics-components/src/main/java/com/yahoo/language/wordpiece/WordPieceEmbedder.java) | An implementation of the WordPiece embedder, usually used with BERT models. Refer to [WordPiece embedder](/en/reference/rag/embedding#wordpiece-embedder). | +| [SystemInfo](https://github.com/vespa-engine/vespa/blob/master/hosted-zone-api/src/main/java/ai/vespa/cloud/SystemInfo.java) | Vespa Cloud: Provides information about the environment the component is running in. [Read more](/en/applications/components#the-systeminfo-injectable-component). | +| **Available in containers having `search`** | | +| [DocumentAccess](https://github.com/vespa-engine/vespa/blob/master/documentapi/src/main/java/com/yahoo/documentapi/DocumentAccess.java) | To use the [Document API](/en/writing/document-api-guide). | +| [ExecutionFactory](https://github.com/vespa-engine/vespa/blob/master/container-search/src/main/java/com/yahoo/search/searchchain/ExecutionFactory.java) | To execute new queries from code. [Read more](/en/applications/web-services#queries). | +| [Map`<String, Model>`](https://github.com/vespa-engine/vespa/blob/master/model-evaluation/src/main/java/ai/vespa/models/evaluation/Model.java) | Use to inject a set of Models, see [Stateless Model Evaluation](/en/ranking/stateless-model-evaluation). | +| **Available in containers having `document-api` or `document-processing`** | | +| [DocumentAccess](https://github.com/vespa-engine/vespa/blob/master/documentapi/src/main/java/com/yahoo/documentapi/DocumentAccess.java) | To use the [Document API](/en/writing/document-api-guide). | + +## Component Versioning + +Components as well as many other artifacts in the container can be versioned. This document explains the format and semantics of these versions and how they are referred. + +### Format + +Versions are on the form: + +```js +version ::= major [ "." minor [ "." micro [ "." qualifier ]]] +``` + +Where `major`, `minor`, and `micro` are integers and `qualifier` is any string. + +A version is appended to an id separated by a colon. In cases where a file is created for each component version, the colon is replaced by a dash in the file name. + +### Ordering + +Versions are ordered first by major, then minor, then micro and then by doing a lexical ordering on the qualifier. This means that `a:1 < a:1.0 < a:1.0.0 < a:1.1 < a:1.1.0 < a:2` + +### Referencing a versioned Component + +Whenever component is referenced by id (in code or configuration), a fully or partially specified version may be included in the reference by using the form `id:versionSpecification`. Such references are resolved using the following rules: + +- An id without any version specification resolves to the highest version not having a qualifier. +- A partially or full version specification resolves to the highest version not having a qualifier which matches the specification. +- Versions with qualifiers are matched only by exact match. + +Example: Given a component with id `a` having these versions: `[1.1, 1.2, 1.2, 1.3.test, 2.0]` + +- The reference `a` will resolve to `a:2.0` +- The reference `a:1` will resolve to `a:1.2` +- The only way to resolve to the "test" qualified version is by using the exact reference `a:1.3.test` +- These references will not resolve: `a:1.3`, `a:3`, `1.2.3` + +### Merging specifications for chained Components + +In some cases, there is a need for merging multiple references into one. An example is inheritance of chains of version references, where multiple inherited chains may reference the same component. + +Two version references are said to be *compatible* if one is a prefix of the other. In this case the most specific version is used. If they are not compatible they are *conflicting*. Example: + +```xml +<search> + <searcher id="Searcher:2.3" class="com.yahoo.search.example.Searcher" bundle="the name in <artifactId> in your pom.xml" /> + <searcher id="Searcher:2.4" class="com.yahoo.search.example.Searcher" bundle="the name in <artifactId> in your pom.xml" /> + <chain id="parenta"> + <searcher id="Searcher:2"> bundle="the name in <artifactId> in your pom.xml" </searcher> + </chain> + <chain id="parentb"> + <searcher id="Searcher:2.3"> bundle="the name in <artifactId> in your pom.xml" </searcher> + </chain> + <chain id="parentc"> + <searcher id="Searcher:2.4"> bundle="the name in <artifactId> in your pom.xml" </searcher> + </chain> + + <!-- This chain will get Searcher:2.3 --> + <chain id="childa" inherits="parenta parentb" /> + + <!-- Error, as Searcher:2.3 and Searcher:2.4 are conflicting --> + <chain id="childb" inherits="parentb parentc" /> +</search> +``` diff --git a/mintlify-docs/en/reference/applications/config-files.mdx b/mintlify-docs/en/reference/applications/config-files.mdx new file mode 100644 index 0000000000..57270f6f39 --- /dev/null +++ b/mintlify-docs/en/reference/applications/config-files.mdx @@ -0,0 +1,155 @@ +--- +title: "Custom Configuration File Reference" +sidebarTitle: "Configuration files" +--- + + +This is the reference for config file definitions. It is useful for developing applications that has [configurable components](/en/applications/configuring-components) for the [Vespa Container](/en/applications/containers), where configuration for individual components may be provided by defining [`<config>`](#generic-configuration-in-services-xml) elements within the component's scope in services.xml. + +## Config definition files + +Config definition files are part of the source code of your application and have a *.def* suffix. Each file defines and documents the content and semantics of one configuration type. Vespa's builtin *.def* files are found in `$VESPA_HOME/share/vespa/configdefinitions/`. + +### Package + +Package is a mandatory statement that is used to define the package for the java class generated to represent the file. For [container component](/en/applications/components) developers, it is recommended to use a separate package for each bundle that needs to export config classes, to avoid conflicts between bundles that contain configurable components. Package must be the first non-comment line, and can only contain lower-case characters and dots: + +```bash +package=com.mydomain.mypackage +``` + +### Parameter names + +Config definition files contain lines on the form: + +```bash +parameterName type [default=value] [range=[min,max]] +``` + +camelCase in parameter names is recommended for readability. + +### Parameter types + +Supported types for variables in the *.def* file: + +| int | 32 bit signed integer value | +| --- | --- | +| long | 64 bit signed integer value | +| double | 64 bit IEEE float value | +| enum | Enumerated types. A set of strings representing the valid values for the parameter, e.g: <br/><br/>`foo enum {BAR, BAZ, QUUX} default=BAR` | +| bool | A boolean (true/false) value | +| string | A String value. Default values must be enclosed in quotation marks (" "), and any internal quotation marks must be escaped by backslash. Likewise, newlines must be escaped to `\n` | +| path | A path to a physical file or directory in the application package. This makes it possible to access files from the application package in container components. The path is relative to the root of the [application package](/en/basics/applications). A path parameter cannot have a default value, but may be optional (using the *optional* keyword after the type). An optional path does not have to be set, in which case it will be an empty value. The content will be available as a `java.nio.file.Path` instance when the component accessing this config is constructed, or an `Optional<Path>` if the *optional* keyword is used. | +| url | Similar to `path`, an arbitrary URL of a file that should be downloaded and made available to container components. The file content will be available as a java.io.File instance when the component accessing this config is constructed. Note that if the file takes a long time to download, it will also take a long time for the container to come up with the configuration referencing it. See also the [note about changing contents for such a url](/en/applications/configuring-components#adding-files-to-the-component-configuration). | +| model | A pointer to a machine-learned model. This can be a model-id, url or path, and multiple of these can be specified as a single config value, where one is used depending on the deployment environment: <br/><br/>• If a model-id is specified and the application is deployed on Vespa Cloud, the model-id is used. <br/>• Otherwise, if a URL is specified, it is used. <br/>• Otherwise, path is used. <br/><br/> You may also use remote URLs protected by bearer-token authentication by supplying the optional `secret-ref` attribute. See [using private Huggingface models](/en/reference/rag/embedding#private-model-hub). <br/><br/> On the receiving side, this config value is simply represented as a file path regardless of how it is resolved. This makes it easy to refer to models in multiple ways such that the appropriate one is used depending on the context. The special syntax for setting these config values is documented in [adding files to the configuration](/en/applications/configuring-components#adding-files-to-the-component-configuration). | +| reference | A config id to another configuration (only for internal vespa usage) | + +### Structs + +Structs are used to group a number of parameters that naturally belong together. A struct is declared by adding a '.' between the struct name and each member's name: + +```bash +basicStruct.foo string +basicStruct.bar int +``` + +### Arrays + +Arrays are declared by appending square brackets to the parameter name. Arrays can either contain simple values, or have children. Children can be simple parameters and/or structs and/or other arrays. Arbitrarily complex structures can be built to any depth. Examples: + +```bash +intArr[] int # Integer value array +row[].column[] int # Array of integer value arrays +complexArr[].foo string # Complex array that contains +complexArr[].bar double # … two simple parameters +complexArr[].coord.x int # … and a struct called 'coord' +complexArr[].coord.y int +complexArr[].coord.depths[] double # … that contains a double array +``` + +Note that arrays cannot have default values, even for simple value arrays. An array that has children cannot contain simple values, and vice versa. In the example above, `intArr` and `row.column` could not have children, while `row` and `complexArr` are not allowed to contain values. + +### Maps + +Maps are declared by appending curly brackets to the parameter name. Arbitrarily complex structures are supported also here. Examples: + +```bash +myMap{} int +complexMap{}.nestedMap{}.id int +complexMap{}.nestedMap{}.name string +``` + +## Generic configuration in services.xml + +`services.xml`has four types of elements: + +| individual service elements | (e.g. *searcher*, *handler*, *searchnode*) - creates a service, but has no child elements that create services | +| --- | --- | +| **service group elements** | (e.g. *content*, *container*, *document-processing* - creates a group of services and can have all types of child elements | +| **dedicated config elements** | (e.g. *accesslog*) - configures a service or a group of services and can only have other dedicated config elements as children | +| **generic config elements** | always named *config* | + +Generic config elements can be added to most elements that lead to one or more services being created - i.e. service group elements and individual service elements. The config is then applied to all services created by that element and all descendant elements. + +For example, by adding *config* for *container*, the config will be applied to all container components in that cluster. Config at a deeper level has priority, so this config can be overridden for individual components by setting the same config values in e.g. *handler* or *server* elements. + +Given the following config definition, let's say its name is `type-examples.def`: + +```bash +package=com.mydomain + +stringVal string +myArray[].name string +myArray[].type enum {T1, T2, T3} default=T1 +myArray[].intArr[] int +myMap{} string +basicStruct.foo string +basicStruct.bar int default=0 range=[-100,100] +boolVal bool +myFile path +myUrl url +myOptionalPath path optional +``` + +To set all the values for this config in `services.xml`, add the following xml at the desired element (the name should be *`<package>.<config definition file name>`*): + +```xml expandable +<config name="com.mydomain.type-examples"> + <stringVal>val</stringVal> + <myArray> + <item> + <name>elem_0</name> + <type>T2</type> + <intArr> + <item>0</item> + <item>1</item> + </intArr> + </item> + <item> + <name>elem_1</name> + <type>T3</type> + <intArr> + <item>0</item> + <item>1</item> + </intArr> + </item> + </myArray> + <myMap> + <item key="key1">val1</item> + <item key="key2">val2</item> + </myMap> + <basicStruct> + <foo>str</foo> + <bar>3</bar> + </basicStruct> + <boolVal>true</boolVal> + <myFile>components/file1.txt</myFile> + <myUrl>/en/reference/query-api-reference.html</myUrl> +</config> +``` + +Note that each '.' in the parameter's definition corresponds to a child element in the xml. It is not necessary to set values that already have a default in the *.def* file, if you want to keep the default value. Hence, in the example above, `basicStruct.bar` and `myArray[].type` could have been omitted in the xml without generating any errors when deploying the application. + +### Configuring arrays + +Assigning values to *arrays* is done by using the `<item>` element. This ensures that the given config values do not overwrite any existing array elements from higher-level xml elements in services, or from Vespa itself. diff --git a/mintlify-docs/en/reference/applications/deployment.mdx b/mintlify-docs/en/reference/applications/deployment.mdx new file mode 100644 index 0000000000..26b112cb3a --- /dev/null +++ b/mintlify-docs/en/reference/applications/deployment.mdx @@ -0,0 +1,473 @@ +--- +title: "deployment.xml reference" +sidebarTitle: "deployment.xml" +--- + + +*deployment.xml* controls how an application is deployed. + +*deployment.xml* is placed in the root of the [application package](/en/basics/applications) and specifies which environments and regions the application is deployed to during [automated application deployment](/en/operations/automated-deployments), as which application instances. + +Deployment progresses through the `test` and `staging` environments to the `prod` environments listed in *deployment.xml*. + +Simple example: + +```xml +<deployment version="1.0"> + <prod> + <region>aws-us-east-1c</region> + <region>aws-us-west-2a</region> + </prod> +</deployment> +``` + +More complex example: + +```xml expandable +<deployment version="1.0"> + <instance id="beta"> + <prod> + <region>aws-us-east-1c</region> + </prod> + </instance> + <instance id="default"> + <block-change revision="false" + days="mon,wed-fri" + hours="16-23" + time-zone="UTC" /> + <backup frequency="7d" granularity="cluster" /> + <prod> + <region>aws-us-east-1c</region> + <delay hours="3" minutes="7" seconds="13" /> + <parallel> + <region>aws-us-west-1c</region> + <steps> + <region>aws-eu-west-1a</region> + <delay hours="3" /> + <test>aws-us-west-2a</test> + </steps> + </parallel> + </prod> + <endpoints> + <endpoint container-id="my-container-service"> + <region>aws-us-east-1c</region> + </endpoint> + </endpoints> + </instance> + <endpoints> + <endpoint id="my-weighted-endpoint" + container-id="my-container-service" + region="aws-us-east-1c"> + <instance weight="1">beta</instance> + </endpoint> + </endpoints> +</deployment> +``` + +Some of the elements can be declared *either* under the `<deployment>` root, **or**, if one or more `<instance>` tags are listed, under these. These have a bold **or** when listing where they may be present. + +## deployment + +The root element. + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| version | Yes | 1.0 | +| major-version | No | The major version number this application is valid for. | +| cloud-account | No | Account to deploy to with [Vespa Cloud Enclave](/en/operations/enclave/enclave). | + +## instance + +In `<deployment>` or `<parallel>` (which must be a direct descendant of the root). An instance of the application; several of these may be simultaneously deployed in the same zone. If no `<instance>` is specified, all children of the root are implicitly children of an `<instance>` with `id="default"`, as in the simple example at the top. + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| id | Yes | The unique name of the instance. | +| tags | No | Space-separated tags which can be referenced to make [deployment variants](/en/operations/deployment-variants). | +| cloud-account | No | Account to deploy to with [Vespa Cloud Enclave](/en/operations/enclave/enclave). Overrides parent's use of cloud-account. | + +## block-change + +In `<deployment>`, **or** `<instance>`. This blocks changes from being deployed to production in the matching time interval. Changes are nevertheless tested while blocked. + +By default, both application revision changes and Vespa platform changes (upgrades) are blocked. It is possible to block just one kind of change using the `revision` and `version` attributes. + +Any combination of the attributes below can be specified. Changes on a given date will be blocked if all conditions are met. Invalid `<block-change>` tags (i.e. that contains conditions that never match an actual date) are rejected by the system. + +This tag must be placed after any `<test>` and `<staging>` tags, and before `<prod>`. It can be declared multiple times. + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| revision | No, default `true` | Set to `false` to allow application deployments | +| version | No, default `true` | Set to `false` to allow Vespa platform upgrades | +| maintenance | No, default `false` | Set to `true` to disallow Vespa maintenance operations. This is best effort, maintenance can still happen (e.g. for security reasons). The block window for maintenance should be open at least 10% of the time calculated over a week, that is, at least 17 hours per week. | +| days | No, default `mon-sun` | List of days this block is effective - a comma-separated list of single days or day intervals where the start and end day are separated by a dash and are inclusive. Each day is identified by its english name or three-letter abbreviation. | +| hours | No, default `0-23` | List of hours this block is effective - a comma-separated list of single hours or hour intervals where the start and end hour are separated by a dash and are inclusive. Each hour is identified by a number in the range 0 to 23. | +| time-zone | No, default UTC | The name of the time zone used to interpret the hours attribute. Time zones are full names or short forms, when the latter is unambiguous. See [ZoneId.of](https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#of-java.lang.String-) for the full spec of acceptable values. | +| from-date | No | The inclusive starting date of this block (ISO-8601, `YYYY-MM-DD`). | +| to-date | No | The inclusive ending date of this block (ISO-8601, `YYYY-MM-DD`). | + +The below example blocks all changes on weekends, and blocks revisions outside working hours, in the PST time zone: + +```xml +<block-change days="sat-sun" + hours="0-23" + time-zone="America/Los_Angeles"/> +<block-change revision="false" + days="mon-fri,sat,sun" + hours="0-8,16-23" + time-zone="America/Los_Angeles"/> +``` + +The below example blocks: + +- all changes on Sundays starting on 2022-03-01 +- all changes in the hours 16-23 between 2022-02-10 and 2022-02-15 +- all changes until 2022-01-05 + +```xml +<block-change days="sun" + from-date="2022-03-01" + time-zone="America/Los_Angeles"/> +<block-change hours="16-23" + from-date="2022-02-10" + to-date="2022-02-15" + time-zone="America/Los_Angeles"/> +<block-change to-date="2022-01-05" + time-zone="America/Los_Angeles"/> +``` + +## backup + +In `<deployment>`, **or** `<instance>`. Configures scheduled backups of production content clusters. When present, backups will be created at the specified frequency. Must be placed after any `<test>` and `<staging>` tags, and before `<prod>`. + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| frequency | Yes | A positive integer with a suffix `h` (hours) or `d` (days), e.g. `12h` or `7d`. Minimum 1h. | +| granularity | No, default `cluster` | • `cluster`: all content nodes in the cluster<br/><br/> • `group`: all content nodes in a single group | + +Backup activity does not affect service availability, but has costs in terms of performance. You can use `granularity` to control the tradeoff between backup and restoration speed. + +- A `cluster` backup will take longer, as each content node must be temporarily suspended to ensure data integrity. Restoration will however require effectively zero content redistribution. +- A `group` backup will be faster, as an entire group will be suspended and backed up simultaneously. Restoration may however require a significant amount of content redistribution, depending on the cluster topology. + +In most situations we recommend `cluster` backups. + +[Block windows](#block-change) also prevent new backups from starting in the given period. If the available time is too short for a full backup to complete, the process will, however, extend beyond the block window. [Read more](/en/operations/data-management#backup). + +## resource-tags + +In `<deployment>`, **or** `<instance>`. Specifies custom tags to apply to cloud resources (virtual machines and attached disks) provisioned in the tenant cloud account. Only available for [Vespa Cloud Enclave](/en/operations/enclave/enclave) deployments, where a `cloud-account` is set. Commonly used for cost tracking and resource management. + +Tags declared at the `<deployment>` level apply to all instances. Tags at the `<instance>` level are merged with deployment-level tags; on key conflict, the instance-level value wins. + +```xml +<deployment version="1.0" cloud-account="aws:123456789012"> + <resource-tags> + <tag key="cost-center" value="engineering"/> + <tag key="env" value="${environment}"/> + </resource-tags> + + <instance id="prod"> + <resource-tags> + <tag key="cost-center" value="search-team"/> + </resource-tags> + <prod> + <region>aws-us-east-1c</region> + </prod> + </instance> +</deployment> +``` + +The `<resource-tags>` element contains one or more `<tag>` children. Each `<tag>` has two mandatory attributes: + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| key | Yes | The tag key. Must be non-empty. Allowed characters and maximum length depend on the target cloud; see [per-cloud rules](#resource-tags-per-cloud-rules) below. The `vai_` prefix is reserved for internal use. | +| value | Yes | The tag value. Must be non-empty. May contain [template variables](#resource-tags-template-variables). Allowed characters and maximum length depend on the target cloud; see [per-cloud rules](#resource-tags-per-cloud-rules) below. | + +The maximum number of tags per instance (after merging deployment-level and instance-level tags) depends on the cloud; see [per-cloud rules](#resource-tags-per-cloud-rules) below. + +**Per-cloud rules.** Allowed characters and length limits vary by cloud provider. A single deployment can span multiple clouds, so tags are validated against the rules of each target cloud at deploy time. If a tag is valid for AWS but not for GCP, the deployment will succeed in AWS regions but fail in GCP regions. + +| Constraint | AWS | Azure | GCP | +| :--- | :--- | :--- | :--- | +| Key characters | `[a-zA-Z0-9 +-=._:/@]` | Unicode, except `< > % & \ ? /` | `[a-z][a-z0-9_-]*` (must start with lowercase letter) | +| Value characters | `[a-zA-Z0-9 +-=._:/@]` | No restrictions | `[a-z0-9_-]*` | +| Key max length | 128 | 512 | 63 | +| Value max length | 256 | 256 | 63 | +| Max tags | 50 | 50 | 64 | + +**Template variables.** Tag values may reference the following template variables. Resolved values are always lowercased regardless of cloud. Template-variable placeholders are excluded when checking per-cloud character rules, so only the literal parts of the value are validated. Referencing an unknown variable causes the deployment to fail. Variables can be combined, e.g. `value="${environment}-${clustertype}"`. + +| Variable | Description | +| :--- | :--- | +| `${tenant}` | The tenant name, e.g. `mytenant`. | +| `${application}` | The application name, e.g. `myapp`. | +| `${instance}` | The instance name, e.g. `default`, `beta`. | +| `${environment}` | The deployment environment, e.g. `prod`, `dev`. | +| `${region}` | The deployment region, e.g. `aws-us-east-1c`. | +| `${clustername}` | The cluster ID from [services.xml](/en/reference/applications/services/services), e.g. `default`, `music`. | +| `${clustertype}` | The Vespa cluster type: `container`, `content`, or `admin`. | + +**Reconciliation.** Tags are applied to virtual machines and attached disks. When tags are changed, added, or removed in *deployment.xml*, the existing resources are updated by a background reconciliation process. Tags that were previously applied by Vespa Cloud but are no longer listed are removed from the resources. Tags added manually by the tenant in the cloud console are preserved. + +## upgrade + +In `<deployment>`, or `<instance>`. Determines the strategy for upgrading the application, or one of its instances. By default, application revision changes deploy independently of platform upgrades, and an application revision can catch up to and pass an ongoing platform upgrade. See the `rollout` attribute below to change this behavior. + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| rollout | No, default `simultaneous` | • `separate`: When a revision catches up to a platform upgrade, it stays behind, unless the upgrade alone fails.<br/><br/> • `leading`: When a revision catches up to a platform upgrade, they fuse and roll out together.<br/><br/>• `simultaneous` is the default, and favors revision roll-out. Revision changes deploy independently of platform upgrades. When a revision catches up to a platform upgrade, it joins, and then passes the upgrade. | +| revision-target | No, default `latest` | • `latest` is the default. When rolling out a new revision to an instance, the latest available revision is chosen.<br/><br/> • `next` trades speed for smaller changes. When rolling out a new revision to an instance, the next available revision is chosen.<br/><br/> The available revisions for an instance are revisions which are not yet deployed, or revisions which have rolled out in previous instances. | +| revision-change | No, default `when-failing` | • `always` is the most aggressive setting. A new, available revision may always replace the one which is currently rolling out.<br/><br/> • `when-failing` is the default. A new, available revision may replace the one which is currently rolling out if this is failing.<br/><br/> • `when-clear` is the most conservative setting. A new, available revision may never replace one which is currently rolling out.<br/><br/> Revision targets will never automatically change inside [revision block window](#block-change), but may be set by manual intervention at any time. | +| max-risk | No, default `0` | May only be used with `revision-change="when-clear"` and `revision-target="next"`. The maximum amount of *risk* to roll out per new revision target. The default of `0` results in the next build always being chosen, while a higher value allows skipping intermediate builds, as long as the cumulative risk does not exceed what is configured here. | +| min-risk | No, default `0` | Must be less than or equal to the configured `max-risk`. The minimum amount of *risk* to start rolling out a new revision. The default of `0` results in a new revision rolling out as soon as anything is ready, while a higher value lets the system wait until enough cumulative risk is available. This can be used to avoid blocking a lengthy deployment process with trivial changes. | +| max-idle-hours | No, default `8` | May only be used when `min-risk` is specified, and greater than `0`. The maximum number of hours to wait for enough cumulative risk to be available, before rolling out a new revision. | + +## test + +Meaning depends on where it is located: + +| Parent | Description | +| :--- | :--- | +| `<deployment>` `<instance>` | If present, the application is deployed to the [`test`](/en/operations/environments#test) environment, and system tested there, even if no prod zones are deployed to. Also, when specified, system tests *must* be present in the application test package. See guides for [getting to production](/en/operations/production-deployment). <br/><br/> If present in an `<instance>` element, system tests are run for that specific instance before any production deployments of the instance may proceed — otherwise, previous system tests for any instance are acceptable. | +| `<prod>` `<parallel>` `<steps>` | If present, production tests are run against the production region with id contained in this element. A test must be *after* a corresponding [region](#region) element. When specified, production tests *must* be preset in the application test package. See guides for [getting to production](/en/operations/production-deployment). | + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| cloud-account | No | For [system tests](/en/operations/automated-deployments#system-tests) only: account to deploy to with [Vespa Cloud Enclave](/en/operations/enclave/enclave). Overrides parent's use of cloud-account. Cloud account *must not* be specified for [production tests](/en/operations/automated-deployments#production-tests), which always run in the account of the corresponding deployment. | + +## staging + +In `<deployment>`, or `<instance>`. If present, the application is deployed to the [`staging`](/en/operations/environments#staging) environment, and tested there, even if no prod zones are deployed to. If present in an `<instance>` element, staging tests are run for that specific instance before any production deployments of the instance may proceed — otherwise, previous staging tests for any instance are acceptable. When specified, staging tests *must* be preset in the application test package. See guides for [getting to production](/en/operations/production-deployment). + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| cloud-account | No | Account to deploy to with [Vespa Cloud Enclave](/en/operations/enclave/enclave). Overrides parent's use of cloud-account. | + +## prod + +In `<deployment>`, **or** in `<instance>`. If present, the application is deployed to the production regions listed inside this element, under the specified instance, after deployments and tests in the `test` and `staging` environments. + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| cloud-account | No | Account to deploy to with [Vespa Cloud Enclave](/en/operations/enclave/enclave). Overrides parent's use of cloud-account. | + +## region + +In `<prod>`, `<parallel>`, `<steps>`, or `<group>`. The application is deployed to the production [region](/en/operations/zones) with id contained in this element. + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| fraction | No | Only when this region is inside a group: The fractional membership in the group. | +| cloud-account | No | Account to deploy to with [Enclave](/en/operations/enclave/enclave). Overrides parent's use of cloud-account. | + +## dev + +In `<deployment>`. Optionally used to control deployment settings for the [dev environment](/en/operations/environments). This can be used specify a different cloud account, tags, and private endpoints. + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| tags | No | Space-separated tags which can be referenced to make [deployment variants](/en/operations/deployment-variants). | +| cloud-account | No | Account to deploy to with [Vespa Cloud Enclave](/en/operations/enclave/enclave). Overrides parent's use of cloud-account. | + +## delay + +In `<deployment>`, `<instance>`, `<prod>`, `<parallel>`, or `<steps>`. Introduces a delay which must pass after completion of all previous steps, before subsequent steps may proceed. This may be useful to allow some grace time to discover errors before deploying a change in additional zones, or to gather higher-level metrics for a production deployment for a while, before evaluating these in a production test. The maximum total delay for the whole deployment spec is 48 hours. The delay is specified by any combination of the `hours`, `minutes` and `seconds` attributes. + +## parallel + +In `<deployment>`, `<prod>`, or `<steps>`. Runs the contained steps in parallel: instances if in `<deployment>`, or primitive steps (deployments, tests or delays) or a series of these (see [steps](#steps)) otherwise. Multiple `<parallel>` elements are permitted. The following example will deploy to `us-west-1` first, then to `us-east-3` and `us-central-1` simultaneously, and, finally to `eu-west-1`, once both parallel deployments have completed: + +```xml +<region>us-west-1</region> +<parallel> + <region>us-east-3</region> + <region>us-central-1</region> +</parallel> +<region>eu-west-1</region> +``` + +## steps + +In `<parallel>`. Runs the contained parallel or primitive steps (deployments, tests or delays) serially. The following example will in parallel: + +<Steps> +<Step> +deploy to `us-east-3`, +</Step> +<Step> +deploy to `us-west-1`, then delay 1 hour, and run tests for `us-west-1`, and +</Step> +<Step> +delay for two hours. +</Step> +</Steps> + +Thus, the parallel block is complete when both deployments are complete, tests are successful for the second deployment, and at least two hours have passed since the block began executing. + +```xml +<parallel> + <region>us-east-3</region> + <steps> + <region>us-west-1</region> + <delay hours="1" /> + <test>us-west-1</test> + </steps> + <delay hours="2" /> +</parallel> +``` + +## tester + +In `<test>`, `<staging>` and `<prod>`. Specifies container settings for the tester application container, which is used to run system, staging and production verification tests. + +The allowed elements inside this are [`<nodes>`](/en/reference/applications/services/services#nodes). + +```xml +<staging> + <tester> + <nodes count="1"> + <resources vcpu="8" memory="32Gb" disk="30Gb" /> + </nodes> + </tester> +</staging> +``` + +## endpoints (global) + +In `<deployment>`, without any `<instance>` declared **or** in `<instance>`: This allows *global* endpoints, via one or more [`<endpoint>`](#endpoint-global) elements; and [zone endpoint](#endpoint-zone) and [private endpoint](#endpoint-private) elements for cloud-native private network configuration. + +## endpoints (dev) + +In `<dev>`. This allows [zone endpoint](#endpoint-zone) elements for cloud-native private network configuration for [dev](/en/operations/environments#dev) deployments. Note that [private endpoints](#endpoint-private) are only supported in `prod`. + +## endpoint (global) + +In `<endpoints>` or `<group>`. Specifies a global endpoint for this application. Each endpoint will point to the regions that are declared in the endpoint. If no regions are specified, the endpoint defaults to the regions declared in the `<prod>` element. The following example creates a default endpoint to all regions, and a *us* endpoint pointing only to US regions. + +```xml +<endpoints> + <endpoint container-id="my-container-service"/> + <endpoint id="us" container-id="my-container-service"> + <region>aws-us-east-1c</region> + <region>aws-us-west-2a</region> + </endpoint> +</endpoints> +``` + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| id | No | The identifier for the endpoint. This will be part of the endpoint name that is generated. If not specified, the endpoint will be the default global endpoint for the application. | +| container-id | Yes | The id of the [container cluster](/en/reference/applications/services/container) to which requests to the global endpoint is forwarded. | + +Global endpoints are implemented using Route 53 and healthchecks, to keep active zones in rotation. See [BCP](#bcp) for advanced configurations. + +## endpoint (zone) + +In `<endpoints>` or `<group>`, with `type='zone'`. Used to disable public zone endpoints. *Non-public endpoints can not be used in global endpoints, which require that all constituent endpoints are public.* The example disables the public zone endpoint for the `my-container` container cluster in all regions, except where it is explicitly enabled, in `region-1`. Changing endpoint visibility will make the service unavailable for a short period of time. + +```xml +<endpoints> + <endpoint type='zone' container-id='my-container' enabled='false' /> + <endpoint type='zone' container-id='my-container' enabled='true'> + <region>region-1</region> + </endpoint> +</endpoints> +``` + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| type | Yes | Private endpoints are specified with `type='zone'`. | +| container-id | Yes | The id of the [container cluster](/en/reference/applications/services/container) to disable public endpoints for. | +| enabled | No | Whether a public endpoint for this container cluster should be enabled; default `true`. | + +## endpoint (private) + +In `<endpoints>` or `<group>`, with `type='private'`. Specifies a private endpoint service for this application. Each service will be launched in the regions that are declared in the endpoint. If no regions are specified, the service is launched in all regions declared in the `<prod>` element, that support any of the declared [access types](#allow). The following example creates a private endpoint in two specific regions. + +```xml +<endpoints> + <endpoint type='private' container-id='my-container'> + <region>aws-us-east-1c</region> + <allow with='aws-private-link' arn='arn:aws:iam::123123123123:root' /> + </endpoint> + <endpoint type='private' container-id='my-container'> + <region>gcp-us-central1-f</region> + <allow with='gcp-service-connect' project='user-project' /> + </endpoint> +</endpoints> +``` + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| type | Yes | Private endpoints are specified with `type='private'`. | +| container-id | Yes | The id of the [container cluster](/en/reference/applications/services/container) to which requests to the private endpoint service is forwarded. | +| auth-method | No | The authentication method to use with this [private endpoint](/en/operations/private-endpoints). Must be either `mtls` or `token`. Defaults to mTLS if not included. | + +## allow + +In `<endpoint type='private'>`. Allows a principal identified by the URN to set up a connection to the declared private endpoint service. This element must be repeated for each additional URN. An endpoint service will only consider allowed URNs of a compatible type, and will only be created if at least one compatible access type-and-URN is given: + +- For AWS deployments, specify `aws-private-link`, and an *ARN*. +- For GCP deployments, specify `gcp-service-connect`, and a *project ID* + +```xml +<endpoint type='private' container-id="my-container"> + <allow with='aws-private-link' arn='arn:aws:iam::123123123123:root' /> + <allow with='aws-private-link' arn='arn:aws:iam::321321321321:role/my-role' /> + <allow with='aws-private-link' arn='arn:aws:iam::321321321321:user/my-user' /> + <allow with='gcp-service-connect' project='my-project' /> +</endpoint> +``` + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| with | Yes | The private endpoint access type; must be `aws-private-link` or `gcp-service-connect`. | +| arn | Maybe | Must be specified with `aws-private-link`. See [AWS documentation](https://docs.aws.amazon.com/vpc/latest/privatelink/configure-endpoint-service.html) for more details. | +| project | Maybe | Must be specified with `gcp-service-connect`. See [GCP documentation](https://cloud.google.com/vpc/docs/configure-private-service-connect-services) for more details. | + +## bcp + +In `<instance>` or `<deployment>`. Defines the BCP (Business Continuity Planning) structure of this instance: Which zones should take over for which others during the outage of a zone and how fast they must have the capacity ready. Autoscaling uses this information to decide the ideal cpu load of a zone. If this element is not defined, it is assumed that all regions covers for an equal share of the traffic of all other regions and must have that capacity ready at all times. + +If a bcp element is specified at the root, and explicit instances are used, that bcp element becomes the default for all instances that does not contain a bcp element themselves. If a BCP element contains no group elements it will implicitly define a single group of all the regions of the instance in which it is used. + +See [BCP test](https://cloud.vespa.ai/en/reference/bcp-test.html?_gl=1*1lsxxnq*_gcl_au*ODE0ODM4MTI2LjE3Nzk3MjQ3OTY) for a procedure to verify that your BCP configuration is correct. + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| deadline | No | The max time after a region becomes unreachable until the other regions in its BCP group must be able to handle the traffic of it, given as a number of minutes followed by 'm', 'h' or 'd' (for minutes, hours or days). The default deadline is 0: Regions must at all times have capacity to handle BCP traffic immediately.<br/><br/> By providing a deadline, autoscaling can avoid the cost of provisioning additional resources for BCP capacity if it predicts that it can grow to handle the traffic faster than the deadline in a given cluster.<br/><br/> This is the default deadline to be used for all groups that don't specify one themselves. | + +Example: + +```xml +<bcp> + <group deadline="15m"> + <endpoint id="foo" container-id="bar"/> + <region>us-east1</region> + <region>us-east2</region> + <region fraction="0.5">us-central1</region> + </group> + <group> + <region>us-west1</region> + <region>us-west2</region> + <region fraction="0.5">us-central1</region> + </group> +</bcp> +``` + +## group + +In `<bcp>`. Defines a bcp group: A set of regions whose members cover for each other during a regional outage. + +Each region in a group will (as allowed, when autoscaling ranges are configured) provision resources sufficient to handle that any other single region in the group goes down. The traffic of the region is assumed to be rerouted in equal amount to the remaining regions in the group. That is, if a group has one member, no resources will be provisioned to handle an outage in that member. If a group has two members, each will aim to provision sufficient resources to handle the actual traffic of the other. If a group has three members, each will provision to handle half of the traffic observed in the region among the two others which receives the most traffic. + +A region may have fractional membership in multiple groups, meaning it will handle just that fraction of the traffic of the remaining members, and vice versa. A regions total membership among groups must always sum to exactly 1. + +A group may also define global endpoints for the region members in the group. This is exactly the same as defining the endpoint separately and repeating the regions of the group under the endpoint. Endpoints under a group cannot contain explicit region sub-elements. + +| Attribute | Mandatory | Values | +| :--- | :--- | :--- | +| deadline | No | The deadline of this BCP group. See deadline on the BCP element. | diff --git a/mintlify-docs/en/reference/applications/hosts.mdx b/mintlify-docs/en/reference/applications/hosts.mdx new file mode 100644 index 0000000000..ba0c11b38a --- /dev/null +++ b/mintlify-docs/en/reference/applications/hosts.mdx @@ -0,0 +1,37 @@ +--- +title: "hosts.xml" +--- + + +*hosts.xml* is a configuration file in an [application package](/en/reference/applications/application-packages). Elements: + +hosts + [host \[name\]](#host) + [alias](#alias) + +The purpose of *hosts.xml* is to add aliases for real hostnames to self-defined aliases. The aliases are used in [services.xml](/en/reference/applications/services/services) to map service instances to hosts. It is only needed when deploying to multiple hosts. + +## host + +Sub-elements: + +- [`alias`](#alias) + +Example: + +```xml +<hosts> + <host name="myserver0.mydomain.com"> + <alias>SEARCH0</alias> + <alias>CONTAINER0</alias> + </host> + <host name="myserver1.mydomain.com"> + <alias>SEARCH1</alias> + <alias>CONTAINER1</alias> + </host> +</hosts> +``` + +## alias + +Alias used in [services.xml](/en/reference/applications/services/services) to refer to the host. diff --git a/mintlify-docs/en/reference/applications/services/admin.mdx b/mintlify-docs/en/reference/applications/services/admin.mdx new file mode 100644 index 0000000000..2effffbaba --- /dev/null +++ b/mintlify-docs/en/reference/applications/services/admin.mdx @@ -0,0 +1,213 @@ +--- +title: "services.xml - 'admin'" +sidebarTitle: "services.xml" +--- + +Reference documentation for `<admin>` in [services.xml](/en/reference/applications/services/services). Find a working example of this configuration in the sample application *multinode-HA* [services.xml](https://github.com/vespa-engine/sample-apps/blob/master/examples/operations/multinode-HA/services.xml). + +```yaml +admin [version] + adminserver [hostalias] + cluster-controllers + cluster-controller [hostalias, baseport, jvm-options, jvm-gc-options] + configservers + configserver [hostalias, baseport] + logserver [jvm-options, jvm-gc-options] + slobroks + slobrok [hostalias, baseport] + monitoring [systemname] + metrics + consumer [id] + metric-set [id] + metric [id] + cloudwatch [region, namespace] + shared-credentials [file, profile] + logging +``` + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **version** | required | number | | 2.0 | + +## adminserver + +The configured node will be the default administration node in your Vespa system, which means that unless configured otherwise all administrative services - i.e. the log server, the configuration server, the slobrok, and so on - will run on this node. Use [configservers](#configservers), [logserver](#logserver), [slobroks](#slobroks) elements if you need to specify baseport or jvm options for any of these services. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **hostalias** | required | string | | | +| **baseport** | optional | number | | | + +## cluster-controllers + +Container for one or more [cluster-controller](#cluster-controller) elements. When having one or more [content](/en/reference/applications/services/content) clusters, configuring at least one cluster controller is required. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **standalone-zookeeper** | optional | true/false | false | Will by default share the ZooKeeper instance with configserver. If configured to true a separate ZooKeeper instance will be configured and started on the set of nodes where you run cluster controller on. The set of cluster controllers nodes cannot overlap with the set of nodes where config server is running. If this setting is changed from false to true in a running system, all previous cluster state information will be lost as the underlying ZooKeeper changes. Cluster controllers will re-discover the state, but nodes that have been manually set as down will again be considered to be up. | + +## cluster-controller + +Specifies a host on which to run the [Cluster Controller](/en/content/content-nodes#cluster-controller) service. The Cluster Controller manages the state of the cluster in order to provide elasticity and failure detection. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **hostalias** | required | string | | | +| **baseport** | optional | number | | | +| **jvm-options** | optional | string | | | + +## configservers + +Container for one or more `configserver` elements. + +## configserver + +Specifies a host on which to run the [Configuration Server](/en/operations/self-managed/configuration-server) service. If contained directly below `<admin>` you may only have one, so if you need to configure multiple instances of this service, contain them within the [`<configservers>`](#configservers) element. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **hostalias** | required | string | | | +| **baseport** | optional | number | | | + +## logserver + +Specifies a host on which to run the [Vespa Log Server](/en/reference/operations/log-files#log-server) service. If not specified, the logserver is placed on the [adminserver](#adminserver), like in the [example](https://github.com/vespa-engine/sample-apps/blob/master/examples/operations/multinode-HA/services.xml). + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **hostalias** | required | string | | | +| **baseport** | optional | number | | | +| **jvm-options** | optional | string | | | +| **jvm-gc-options** | optional | string | | | + +Example: + +```xml +<logserver hostalias="node1" /> +``` + +## slobroks + +This is a container for one or more `slobrok` elements. + +## slobrok + +Specifies a host on which to run the [Service Location Broker (slobrok)](/en/operations/self-managed/slobrok) service. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **hostalias** | required | string | | | +| **baseport** | optional | number | | | + +## monitoring + +Settings for how to pass metrics to a monitoring service - see [monitoring](/en/operations/self-managed/monitoring). + +```xml +<monitoring systemname="name-in-metrics-system" /> +``` + +||| +| --- | --- | +| systemname | The name of the application in question in the monitoring system, default is "vespa" | + + +## logging + +Used for tuning log levels of Java plug-ins. If you (temporarily) need to enable debug logging from some class or package, or if some third-party component is spamming your log with unnecessary INFO level messages, you can turn levels on or off. Example: + +```xml +<logging> + <class name="org.myorg.MyHelperClass" levels="+debug" /> + <class name="org.thirdparty.TooSpammyClass" levels="-info" /> + <package name="org.myorg.tricky.package" levels="all" /> + <package name="org.anotherorg" levels="all -info -debug -spam" /> +</logging> +``` + +Note that tuning also affects sub-packages, so the above would also affect all packages with `org.anotherorg.` as prefix. And if there is a `org.myorg.tricky.package.foo.InternalClass` you will get even "spam" level logging from it! + +The default for `levels` is `"all -debug -spam"` and as seen above you can add and remove specific levels. + +## metrics + +Used for configuring the forwarding of metrics to graphing applications - add `consumer` child elements. Also see [monitoring](/en/operations/self-managed/monitoring). Example: + +```xml +<metrics> + <consumer id="my-metric-namespace"> + <metric-set id="default" /> + <metric id="my-custom-metric" /> + <cloudwatch region="us-east-1" namespace="my-vespa"> + <shared-credentials file="/path/to/credentials-file" profile="default" /> + </cloudwatch> + </consumer> +</metrics> +``` + +## consumer + +Configure a metrics consumer. The metrics contained in this element will be exported to the consumer with the given id. `consumer` is a request parameter in [/metrics/v1/values](/en/reference/api/metrics-v1), [/metrics/v2/values](/en/reference/api/metrics-v2) and [/prometheus/v1/values](/en/reference/api/prometheus-v1). + +Add `metric` and/or `metric-set` children. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | The name of the consumer to export metrics to. | + +## metric-set + +Include a pre-defined set of metrics to the consumer. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | The id of the metric set to include. Built-in metric sets are:<br/><br/> • `default`<br/> • `Vespa` | + +## metric + +Configure a metric. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | The name of the metric as defined in custom code or in [process metrics api](/en/reference/api/state-v1#state-v1-metrics) | + +Note that metric id needs to include the metric specific suffix, e.g. *.average*. + +In this example, there is one metric added to a custom consumer in addition to the default metric set. Use *&consumer=my-custom-consumer* parameter for the prometheus endpoint. Also notice the .count suffix, see [process metrics api](/en/reference/api/state-v1#state-v1-metrics). + +The per process metrics api endpoint */state/v1/metrics* also includes a description of each emitted metric. The */state/v1/metrics* endpoint also includes the metric aggregates (.count, .average, .rate, .max). + +```xml +<metrics> + <consumer id="my-custom-consumer"> + <metric-set id="default" /> + <metric id="vds.idealstate.garbage_collection.documents_removed.count" /> + </consumer> +</metrics> +``` + +## cloudwatch + +Specifies that the metrics from this consumer should be forwarded to CloudWatch. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **region** | required | string | | Your AWS region | +| **namespace** | required | string | | The metrics namespace in CloudWatch | + +Example: + +```xml +<cloudwatch region="us-east-1" namespace="my-vespa"> + <shared-credentials file="/path/to/credentials-file" profile="default" /> +</cloudwatch> +``` + +## shared-credentials + +Specifies that a profile from a shared-credentials file should be used for authentication to CloudWatch. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **file** | required | string | | The path to the shared-credentials file | +| **profile** | optional | string | default | The profile in the shared-credentials file | diff --git a/mintlify-docs/en/reference/applications/services/container.mdx b/mintlify-docs/en/reference/applications/services/container.mdx new file mode 100644 index 0000000000..e04a73654b --- /dev/null +++ b/mintlify-docs/en/reference/applications/services/container.mdx @@ -0,0 +1,448 @@ +--- +title: "services.xml - container" +--- + +This is the reference for the container cluster configuration in [services.xml](/en/reference/applications/services/services). + +```yaml expandable +container [version, id] + http + server [id, port] + filtering + handler [id, class, bundle] + binding + component + server [id, class, bundle] + clients + client [id, permissions] + certificate [file] + token [id] + components + component + search + include [dir] + binding + searcher + federation + provider + chain + renderer + threadpool + significance + document-processing + include [dir] + documentprocessor + chain + threadpool + processing + include [dir] + binding + processor + chain + renderer + document-api + abortondocumenterror + retryenabled + route + maxpendingdocs + maxpendingbytes + retrydelay + timeout + tracelevel + mbusport + ignore-undefined-fields + max-document-size + model-evaluation + onnx + inference + memory + document [type, class, bundle] + accesslog [type, fileNamePattern, symlinkName, rotationInterval, rotationScheme] + request-content [samples-per-second, path-prefix, max-bytes] + config + nodes [count, allocated-memory, jvm-gc-options, jvm-options] + resources (Vespa Cloud) + node [hostalias] (Self managed) + environment-variables + jvm [allocated-memory, options, gc-options] + secrets + secret-store [type] + group [name, environment] + zookeeper + threadpool +``` + +[config](/en/reference/applications/config-files#generic-configuration-in-services-xml) elements are also allowed most places. + +Example: + +```xml + + + + + + + + +``` + +## `<container>` + +Contained in [`<services>`](/en/reference/applications/services/services). Each container tag specifies a separate container cluster. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **version** | required | number | | 1.0 in this version of Vespa | +| **id** | required | string | | the id of this cluster | + +## handler + +The `handler` element holds the configuration of a request handler. For each `binding` tag, the handler will be bound to the pertinent JDisc interfaces using the given binding. + +- `binding` For JDisc request handlers, add this server binding to this handler. +- [`component`](#component) for injecting another component. Must be a declaration of a new component, not a reference. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | The component ID | +| **class** | optional | string | | The class of the handler, defaults to id | +| **bundle** | optional | string | | The bundle to load the handler from: The name in `<artifactId>` in pom.xml. Defaults to class or id (if no class is given) | + +Example: + +```xml +<container id="default" version="1.0"> + <handler id="com.yahoo.search.handler.LegacyBridge"> + <binding>http://*/*</binding> + </handler> + <handler bundle="the name in <artifactId> in pom.xml" id="com.mydomain.vespatest.RedirectingHandler"/> + <handler bundle="the name in <artifactId> in pom.xml" id="com.mydomain.vespatest.ExampleHandler"/> + <nodes> + <node hostalias="node1"/> + </nodes> +</container> +``` + +## binding + +The URI to map a Handler to. Multiple elements are allowed. See example above. + +## server + +The `server` element holds the configuration of a JDisc server provider. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | The component ID | +| **class** | optional | string | | The class of the server, defaults to id | +| **bundle** | optional | string | | The bundle to load the server from: The name in `<artifactId>` in the pom.xml. Defaults to class or id (if no class is given). | + +Example: + +```xml +<server id="com.mydomain.vespatest.DemoServer"> + <config name="vespatest.demo-server"> + <response>Hello, world! + </response> + <port>16889</port> + </config> +</server> +``` + +## clients + +Vespa Cloud only. The `clients` element is a parent element for [client](#client) security configuration. Find details and practical examples in the [security guide](/en/security/guide#configuring-tokens). Example: + +```xml +<container id="query" version="1.0"> + <clients> + <client id="mtls" permissions="read"> + <certificate file="security/clients.pem"/> + </client> + <client id="query-token-client" permissions="read"> + <token id="query-token"/> + </client> + </clients> +</container> +``` + +## client + +Vespa Cloud only. Child element of [clients](#clients). Use to configure security credentials for a container cluster, using [certificate](#certificate) and/or [token](#token). + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | The client ID | +| **permissions** | required | string | | Permissions, see the [security guide](/en/security/guide#permissions). One of: <br/><br/> • `read`<br/> • `write`<br/> • `read,write` | + +## certificate + +Vespa Cloud only. Child element of [client](#client). Configure certificates using the *file* attribute. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **file** | required | string | | Path to the certificate file, see the [security guide](/en/security/guide#configuring-mtls). | + +## token + +Vespa Cloud only. Child element of [client](#client). Configure tokens using the *id* attribute. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | Token ID, see the [security guide](/en/security/guide#configuring-tokens). | + +## components + +Contains [component](#component) elements. Can be used in conjunction with [include](#include) for modular config of components. + +## component + +The `component` element holds the configuration of a [generic component](/en/applications/dependency-injection#special-components). Must be a declaration of a new component, not a reference. + +Nested [`component`](#component) child elements can be added for injecting specific component instances. This is useful if there is more than one declared component of the same Java class. Refer to [Injecting components](/en/applications/dependency-injection) for details and examples. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | The component ID | +| **class** | optional | string | | The class of the component, defaults to id | +| **bundle** | optional | string | | The bundle to load the component from: The name in `<artifactId>` in the pom.xml. Defaults to class or id (if no class is given). | + +Example: + +```xml +<component id="com.mydomain.demo.DemoComponent" bundle="the name in <artifactId> in pom.xml" /> +``` + +## document-api + +Use to enable [Document API](../../api/api.html) operations to a container cluster. Children elements: + +| Name | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **binding** | optional | string | http://\*/ | The URI to map the document-api handlers to. Multiple bindings are valid. Must end with a '/'. Note that each document-api handler will get its individual binding by adding a suffix, e.g. the feed handler will add 'feed/', the remove handler will add 'remove/' and so on. Example:<br/><br/>`<document-api>`<br/>`<binding>http://*/document-api/</binding>`<br/>`<binding>https://*/document-api/</binding>`<br/>`</document-api>`<br/><br/> With these configured bindings, the feed handler will be available at `http://*/document-api/feed/` and `https://*/document-api/feed/`. For other handlers, just replace 'feed/' with the appropriate suffix, e.g. 'get/', 'remove/' etc. | +| **abortondocumenterror** | optional | true/false | true | Controls whether to abort the entire feed or not if a document-related error occurs, i.e. if a document contains an unknown field. Setting this field to `true` will abort the feed on such errors, while setting it to `false` will cause Vespa to simply skip to the next document in the feed. Note that malformed XML in the input will abort the feed regardless of this setting. | +| **maxpendingbytes** | optional | number | | The maximum number of pending bytes. If `<maxpendingdocs>` is 0 and this is set to 0, this defaults to 100 MB. If `<maxpendingdocs>` is more than 0, and this is set to 0, the send-window is only limited by number of messages sent, not the memory footprint. | +| **maxpendingdocs** | optional | number | | The maximum number of pending documents the client can have. By default, the client will dynamically adjust the window size based on the latency of the performed operations. If the parameter is set, dynamic window sizing will be turned off in favor of the configured value. | +| **mbusport** | optional | number | | Set the MessageBus port | +| **retrydelay** | optional | double | 1.0 | Delay in seconds between retries | +| **retryenabled** | optional | true/false | | Enable or disable retrying documents that have failed. | +| **route** | optional | string | default | Set the route to feed documents to | +| **timeout** | optional | double | 180.0 | Set the timeout value in seconds for an operation | +| **tracelevel** | optional | 0-9 | 0 | Configure the level of which to trace messages sent. The higher the level, the more detailed descriptions. | +| **ignore-undefined-fields** | optional | true/false | false | Set to true to ignore undefined fields in document API operations and let such operations complete successfully, rather than fail. A [response header is returned](/en/reference/api/document-v1#x-vespa-ignored-fields) when field operations are ignored. | +| **max-document-size** | optional | string | 100MiB | Specifies the maximum size of a document operation request accepted by the container, measured as the uncompressed size of the request body. The limit applies to all document types in the container cluster. A request larger than this limit will be rejected by the container before the operation is forwarded to the content cluster. <br/><br/> Valid values are numbers including a unit (e.g. *10MiB*) and the value must be between 1MiB and 2048MiB (inclusive). Values will be rounded to the nearest MiB, so using MiB as a unit is preferable.<br/><br/> The value should normally not exceed the smallest [max-document-size](/en/reference/applications/services/content#max-document-size) configured in any content cluster that this container feeds to; a deployment warning is emitted otherwise. <br/><br/> Example:<br/><br/> ```xml <document-api>```<br/>```<max-document-size>10MiB</max-document-size>```<br/> ```</document-api``` | + +Example: + +```xml +<document-api> + <binding>http://*/document-api/</binding> + <binding>https://*/document-api/</binding> + <abortondocumenterror>false</abortondocumenterror> + <maxpendingbytes>1048576</maxpendingbytes> + <maxpendingdocs>1000</maxpendingdocs> + <mbusport>1234</mbusport> + <retrydelay>5.5</retrydelay> + <retryenabled>false</retryenabled> + <route>default</route> + <timeout>250.5</timeout> + <tracelevel>3</tracelevel> + <max-document-size>10MiB</max-document-size> +</document-api> +``` + +## inference + +Configures resources used for model inference in the container, for example [embedders](/en/rag/embedding), [local LLMs](/en/rag/local-llms), and [stateless model evaluation](/en/ranking/stateless-model-evaluation). + +| Element | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **memory** | optional | string | auto-estimated | Container memory reserved for model inference, covering both model weights and inference requests. This memory is subtracted from the memory available to the JVM heap on the same node.<br/><br/> When not set, Vespa estimates the required inference memory automatically. The automatic estimate can be inaccurate for some models and workloads, which may lead to out-of-memory errors. Set this element explicitly to override the estimate. | + +Example: + +```xml +<container id="default" version="1.0"> + <inference> + <memory>1500Mb</memory> + </inference> + ... +</container> +``` + +## document + +[Concrete document type](/en/schemas/concrete-documents) bindings for the container. Example: + +```xml +<container id="default" version="1.0"> + <document class="com.mydomain.concretedocs.Vehicle" + bundle="the name in <artifactId> in pom.xml" + type="vehicle"/> + <document class="com.mydomain.concretedocs.Vehicle" + bundle="the name in <artifactId> in pom.xml" + type="ship"/> + <document class="com.mydomain.concretedocs2.Disease" + bundle="the name in <artifactId> in pom.xml" + type="disease"/> + <search/> + <document-processing> + <chain id="default"> + <documentprocessor bundle="the name in <artifactId> in pom.xml" + id="concretedocs.ConcreteDocDocProc"/> + </chain> + </document-processing> + ... +</container> +``` + +## accesslog + +Configures properties of the accesslog. The default type is `json` that will give output in (line-based) [JSON format](/en/operations/access-logging). See [Access logging](/en/operations/access-logging) for configuration details. Setting the type to `vespa` gives a classic Apache CLF-like format. + +Access logging can be disabled by setting the type to `disabled`. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **type** | optional | string | json | The accesslog type: *json*, *vespa* or *disabled* | +| **fileNamePattern** | required\* | string | `JsonAccessLog.<container id>.%Y%m%d%H%M%S` | File name pattern. \* Note: Optional when *type* is *disabled* | +| **symlinkName** | optional | string | `JsonAccessLog.<container id>` | Symlink name | +| **rotationInterval** | optional | string | 0 60 ... | Rotation interval | +| **rotationScheme** | optional | string | date | Valid values are *date* or *sequence* | + +### request-content + +The `request-content` element is a child of `accesslog` and configures logging of request content. Multiple `request-content` elements can be specified to log different request paths with different configurations. + +| Element | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **samples-per-second** | required | double | | Probabilistic sample rate per second | +| **path-prefix**| required | string | | URI path prefix to match for logging | +| **max-bytes** | required | integer | | Maximum size in bytes to log, only prefix will be kept for larger requests | + +Example: + +```xml + <accesslog fileNamePattern="$VESPA_HOME/logs/vespa/access/JsonAccessLog.<container id>.%Y%m%d%H%M%S" + symlinkName="JsonAccessLog.<container id>" + rotationInterval="0 1 ..." + type="json" > + <request-content> + <samples-per-second>0.2</samples-per-second> + <path-prefix>/search</path-prefix> + <max-bytes>65536</max-bytes> + </request-content> + </accesslog> +``` + +## include + + +Allows including XML snippets contained in external files. All files from all listed directories will be included. All files must have the same outer tag as they were referred from, i.e. search, document-processing or processing. The path must be relative to the application package root, and must never point outside the package. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **dir** | required | string | | The directory to include files from. File inclusion order is undefined. | + +Example: + +```xml +<include dir="included_configs/search" /> +``` + +## nodes + +See [nodes](/en/reference/applications/services/services#nodes) in the general services.xml documentation. + +Additional container cluster specific attributes: + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **allocated-memory** | optional | percentage | | <Danger>**Deprecated:** See [jvm](#jvm).</Danger> | +| **jvm-options** | optional | string | | <Danger>**Deprecated:** See [jvm](#jvm).</Danger> | +| **jvm-gc-options** | optional | string | | <Danger>**Deprecated:** See [jvm](#jvm).</Danger> | + +## environment-variables + +Add child elements to set environment variables accessible in the container JVM runtime. + +Example: + +```xml +<nodes> + <environment-variables> + <KMP_SETTING>1</KMP_SETTING> + <KMP_AFFINITY>granularity=fine,verbose,compact,1,0</KMP_AFFINITY> + </environment-variables> +</nodes> +``` + +## jvm + +JVM settings for container nodes. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **allocated-memory** | optional | percentage | | Memory to allocate to each JVM instance as a percentage of available memory. Must be an integer percentage followed by *%* | +| **options** | optional | string | | Generic JVM options | +| **gc-options** | optional | string | | JVM GC options. Garbage Collector specific parameters | + +Example where 50% of the node total memory is used as the Max heap size of the JVM: + +```xml +<nodes> + <jvm gc-options="-XX:+UseG1GC -XX:MaxTenuringThreshold=10" + options="-XX:+PrintCommandLineFlags" + allocated-memory="50%" /> +</nodes> +``` + +## secrets + +Use to access secrets configured in Vespa Cloud - refer to the [secret store](/en/security/secret-store). + +## secret-store + +The `secret-store` element holds configuration for custom implementations. Contains one or more `group` elements. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **type** | required | string | | Value: "oath-ckms" | + +## group + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **name** | required | string | | Key group name | +| **environment** | required | string | | Value one of: "alpha" "corp" "prod" "aws" "aws\_stage" | + +Example: + +```xml +<secret-store type="my-ckms"> + <group name="[key group]" environment="[environment]"/> +</secret-store> +``` + +## zookeeper + +The *zookeeper* element declares that the container cluster should run ZooKeeper and configure the necessary components. This element has no attributes or children. + +## threadpool + +Available since `Vespa 8.611.13`. + +Specifies configuration for the default thread pool in the container. All parameters are relative to the number of CPU cores—see the [container tuning example](/en/performance/container-tuning#container-worker-threads-example). This thread pool also supports the optional `max` attribute, which lets the pool grow up to `max * vCPU` threads under load before shrinking back after 5 seconds of idleness. Requests are rejected once the allowed number of threads is reached, all are busy, and the queue is full. + +### threads + +The number of permanent threads relative to number of vCPU cores. Default value is `2`. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **max** | optional | number | 100 | The maximum number of threads relative to vCPU cores. Value must be greater than or equal to `<threads>`. | + +### queue + +The size of the request queue relative to effective number of threads. Specify `0` to disable queuing. Queueing is disabled by default. diff --git a/mintlify-docs/en/reference/applications/services/content.mdx b/mintlify-docs/en/reference/applications/services/content.mdx new file mode 100644 index 0000000000..8d86c8012d --- /dev/null +++ b/mintlify-docs/en/reference/applications/services/content.mdx @@ -0,0 +1,788 @@ +--- +title: "services.xml - 'content'" +sidebarTitle: "services.xml - content" +--- + + +```yaml expandable +content + documents + document + document-processing + min-redundancy + redundancy + coverage-policy + nodes + node + group + distribution + node + group + engine + proton + searchable-copies + tuning + searchnode + lidspace + max-bloat-factor + requestthreads + search + persearch + summary + flushstrategy + native + total + maxmemorygain + diskbloatfactor + component + maxmemorygain + diskbloatfactor + maxage + transactionlog + maxsize + conservative + memory-limit-factor + disk-limit-factor + initialize + threads + feeding + concurrency + niceness + index + io + search + warmup + time + unpack + removed-db + prune + age + interval + summary + io + read + store + cache + maxsize + maxsize-percent + compression + type + level + logstore + maxfilesize + chunk + maxsize + compression + type + level + sync-transactionlog + flush-on-shutdown + resource-limits + disk + memory + search + query-timeout + visibility-delay + coverage + minimum + min-wait-after-coverage-factor + max-wait-after-coverage-factor + tuning + bucket-splitting + min-node-ratio-per-group + distribution + max-document-size + merges + persistence-threads + resource-limits + visitors + max-concurrent + dispatch + max-hits-per-partition + dispatch-policy + prioritize-availability + min-active-docs-coverage + top-k-probability + cluster-controller + init-progress-time + transition-time + max-premature-crashes + stable-state-period + min-distributor-up-ratio + min-storage-up-ratio + groups-allowed-down-ratio +``` + +## content + +The root element of a Content cluster definition. Creates a content cluster. A content cluster stores and/or indexes documents. The xml file may have zero or more such tags. + +Contained in [services](/en/reference/applications/services/services). + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **version** | required | number | | 1.0 in this version of Vespa | +| **id** | required for multiple clusters | string | | Name of the content cluster. If none is supplied, the cluster name will be `content`. Cluster names must be unique within the application, if multiple clusters are configured, the name must be set for all but one at minimum. <br/><br/><Note> **Note:**<br/><br/> Renaming a cluster is the same as dropping the current cluster and adding a new one. This makes data unavailable or lost, depending on hosting model. Deploying with a changed cluster id will therefore fail with a validation override requirement: `Content cluster 'music' is removed. This will cause loss of all data in this cluster. To allow this add <allow until='yyyy-mm-dd'>content-cluster-removal</allow> to validation-overrides.xml, see /en/reference//en/reference/applications/validation-overrides`. </Note>| + +Subelements: + +- [documents](#documents) (required) +- [min-redundancy](#min-redundancy) +- [redundancy](#redundancy) +- [coverage-policy](#coverage-policy) +- [nodes](/en/reference/applications/services/services#nodes) +- [group](#group) +- [engine](#engine) +- [search](#search) +- [tuning](#tuning) + +## documents + +Contained in [content](#content). Defines which document types should be routed to this content cluster using the default route, and what documents should be kept in the cluster if the garbage collector runs. Read more on [expiring documents](/en/schemas/documents#document-expiry). Also have some backend specific configuration for whether documents should be searchable or not. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **selection** | optional | string | | A [document selection](/en/reference/writing/document-selector-language), restricting documents that are routed to this cluster. Defaults to a selection expression matching everything.<br/><br/> This selection can be specified to match document identifier specifics that are *independent* of document types. For restrictions that apply only to a *specific* document type, this must be done within that particular document type's [document](#document) element. Trying to use document type references in this selection makes an error during deployment. The selection given here will be merged with per-document type selections specified within document tags, if any, meaning that any document in the cluster must match *both* selections to be accepted and kept. <br/><br/> This feature is primarily used to [expire documents](/en/schemas/documents#document-expiry). | +| **garbage-collection** | optional | true / false | false | If true, regularly verify the documents stored in the cluster to see if they belong in the cluster, and delete them if not. If false, garbage collection is not run. | +| **garbage-collection-interval** | optional | integer | 3600 | Time (in seconds) between garbage collection cycles. Note that the deletion of documents is spread over this interval, so more resources will be used for deleting a set of documents with a small interval than with a larger interval. | + +Subelements: + +- [document](#document) (required) +- [document-processing](#document-processing) (optional) + +## document + +Contained in [documents](#documents). The document type to be routed to this content cluster. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **type** | required | string | | [Document type name](/en/reference/schemas/schemas#document) | +| **mode** | required | index / store-only / streaming | | The mode of storing and indexing. Refer to [streaming search](/en/performance/streaming-search) for *store-only*, as documents are stored the same way for both cases.<br/><br/> Changing mode requires an *indexing-mode-change* [validation override](/en/reference/applications/validation-overrides), and documents must be re-fed. | +| **selection** | optional | string | | A [document selection](/en/reference/writing/document-selector-language), restricting documents that are routed to this cluster. Defaults to a selection expression matching everything.<br/><br/> This selection must apply to fields in *this document type only*. Selection will be merged together with selection for other types and global selection from [documents](#documents) to form a full expression for what documents belong to this cluster. | +| **global** | optional | true / false | false | Set to *true* to distribute all documents of this type to all nodes in the content cluster it is defined.<br/><br/> Fields in global documents can be imported into documents to implement joins - read more in [parent/child](/en/schemas/parent-child). Vespa will detect when a new (or outdated) node is added to the cluster and prevent it from taking part in searches until it has received all global documents.<br/><br/> Changing from *false* to *true* or vice versa requires a *global-document-change* [validation override](/en/reference/applications/validation-overrides). First, [stop services](/en/operations/self-managed/admin-procedures#vespa-start-stop-restart) on all content nodes. Then, deploy with the validation override. Finally, [start services](/en/operations/self-managed/admin-procedures#vespa-start-stop-restart) on all content nodes.<br/><br/> <Note> **Note:**<br/><br/> *global* is only supported for *mode="index"*.</Note> | + +## document-processing + +Contained in [documents](#documents). Vespa Search specific configuration for which document processing cluster and chain to run index preprocessing. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **cluster** | optional | string | Container cluster on content node | Name of a [document-processing](/en/reference/applications/services/docproc) container cluster that does index preprocessing. Use cluster to specify an alternative cluster, other than the default cluster on content nodes. | +| **chain** | optional | string | `indexing` chain | A document processing chain in the container cluster specified by *cluster* to use for index preprocessing. The chain must inherit the `indexing` chain. | + +Example - the container cluster enables [document-processing](/en/reference/applications/services/docproc), referred to by the content cluster: + +```xml +<container id="my-indexing-cluster" version="1.0"> + <document-api/> + <document-processing/> +</container> +<content id="music" version="1.0"> + <documents> + <document-processing cluster="my-indexing-cluster"/> + </documents> +</content> +``` + +To add document processors either before or after the indexer, declare a chain (inherit *indexing*) in a *document-processing* container cluster and add document processors. Annotate document processors with `before=indexingStart` or `after=indexingEnd`. Configure this cluster and chain as the indexing chain in the content cluster - example: + +```xml +<container id="my-indexing-cluster" version="1.0"> + <document-api/> + <document-processing> + <chain id="my-document-processors" + inherits="indexing"> + <documentprocessor id="MyDocproc"> + <before>indexingStart</before> + </documentprocessor> + <documentprocessor id="MyOtherDocproc"> + <after>indexingEnd</after> + </documentprocessor> + </chain> + </document-processing> +</container> +<content id="music" version="1.0"> + <documents> + <document-processing cluster="my-indexing-cluster" + chain="my-document-processors" /> + </documents> +</content> +``` + +<Warning> +**Important:** + +Note the [document-api](/en/reference/applications/services/container#document-api) configuration. Set up this API on the same nodes as `document-processing` - find details in [indexing](/en/writing/indexing). +</Warning> + +## min-redundancy + +Contained in [content](#content). The minimum total data copies the cluster will maintain. This can be set instead of (or in addition to) redundancy to ensure that a minimum number of copies are always maintained regardless of other configuration. + +`min-redundancy` can be changed without node restart - replicas will be added or removed automatically. + +### min-redundancy and groups + +A group will always have minimum one copy of each document in the cluster. This is also the most commonly used configuration; Increase replica level with more groups to improve query capacity. + +- Example 1: If *min-redundancy* is 2 and there is 1 content group, there will be 2 data copies in the group (2 copies for the cluster). If the number of groups is changed to 2 there will be 1 data copy in each group (still 2 copies for the cluster). +- Example 2: A cluster is configured to [autoscale](/en/operations/autoscaling) using `groups="[2,3]"`. Here, configure min-redundancy to 2, as each group will have 1 replica irrespective of number of groups, here 2 or 3 - see [replicas](/en/content/elasticity#replicas). Setting the lower bound ensures correct replica level for 2 groups. + +For self-managed Vespa: Read more about the actual number of replicas when using [groups](#group) in [topology change](/en/content/elasticity#changing-topology). + +## redundancy + +Contained in [content](#content). + +<Note> +**Note:** + +Use [min-redundancy](#min-redundancy) instead of `redundancy`. +</Note> + +Vespa Cloud: The number of data copies *per group*. + +Self-managed: The total data copies the cluster will maintain to avoid data loss. + +Example: with a redundancy of 2, the system tolerates 1 node failure before data becomes unavailable (until the system has managed to create new replicas on other online nodes). + +Redundancy can be changed without node restart - replicas will be added or removed automatically. + +## coverage-policy + +Contained in [content](#content). + +Specifies the coverage policy for the content cluster. Valid values are `group` or `node`. The default value is `group`. + +If the policy is `group` coverage is maintained per group, meaning that when doing maintenance, upgrades etc. one group is allowed to be down at a time. If there is only one group in the cluster, coverage will be the same as policy `node`. + +If the policy is `node` coverage is maintained on a node level, meaning that when doing maintenance, upgrades etc. coverage will be maintained on a node level, so in practice 1 node in the whole cluster is allowed to be down at a time. + +When having several groups the common reason for changing policy away from the default `group` policy is when the load added to the remaining groups will increase too much when a whole group is allowed to go down. In that case it will be better to use the `node` policy, as taking one node at a time will give just a minor increase in load. + +## node + +Contained in [nodes](/en/reference/applications/services/services#nodes) or [group](#group). Configures a content node to the cluster,/en/reference/applications/services/services#node) in the general services.xml documentation. + +Additional node attributes for content nodes: + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| distribution-key | required | integer | | The unique data distribution id of this node. This **must** remain unchanged for the host's lifetime. Distribution keys of a fresh system should be contiguous and start from zero.<br/><br/> Distribution keys are used to identify nodes and groups for the [distribution algorithm](/en/content/idealstate). If a node changes distribution key, the distribution algorithm regards it as a new node, so buckets are redistributed. | +| capacity | optional | double | 1 |<Danger> **Deprecated:**<br/><br/> Capacity of this node, relative to other nodes. A node with capacity 2 will get double the data and feed requests of a node with capacity 1. This feature is deprecated and expert mode only. Don't use in production, Vespa assumes homogenous cluster capacity.</Danger>| +| baseport | optional | integer | | baseport The first port in the port range allocated by this node. | + +## group + +Contained in [content](#content) or [group](#group) - groups can be nested. Defines the [hierarchical structure](/en/content/elasticity#grouped-distribution) of the cluster. Can not be used in conjunction with the [nodes](/en/reference/applications/services/services#nodes) element. Groups can contain other groups or nodes, but not both. There can only be a single level of leaf groups under the top group. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **distribution-key** | required | integer | | Sets the distribution key of a group. It is not allowed to change this for a given group. Group distribution keys only need to be unique among groups that share the same parent group. | +| **name** | required | string | | The name of the group, used for access from status pages and the like. | + +<Warning> +**Important:** + +There is no deployment-time verification that the distribution key remains unchanged for any given node or group. Consequently, take great care when modifying the set of nodes in a content cluster. Assigning a new distribution key to an existing node is undefined behavior; Best case, the existing data will be temporarily unavailable until the error has been corrected. Worst case, risk crashes or data loss. +</Warning> + +See [Vespa Serving Scaling Guide](/en/performance/sizing-search) for when to consider using grouped distribution. + +## distribution (in group) + +Contained in [group](#group). Defines the data distribution to subgroups of this group. *distribution* should not be in the lowest level group containing storage nodes, as here the ideal state algorithm is used directly. In higher level groups, *distribution* is mandatory. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **partitions** | required if there are subgroups in the group | string | | String conforming to the partition specification:<br/> <br/>Partition specification    Description <br/><br/>\*   Distribute all copies over 1 of N groups <br/>1\|\*    Distribute all copies over 2 of N groups <br/> 1\|1\|\*   Distribute all copies over 3 of N groups | + +The partition specification is used to evenly distribute content copies across groups. Set a number or `*` per group separated by pipes (e.g. `1|*` for two groups). See [sample deployment configurations](/en/operations/self-managed/sizing-examples). + +## engine + +Contained in [content](#content). Specify the content engine to use, and/or adjust tuning parameters for the engine. Allowed engines are `proton` and `dummy`, the latter being used for debugging purposes. If no engine is given, proton is used. Sub-element: [proton](#proton). + +## proton + +Contained in [engine](#engine). If specified, the content cluster will use the Proton content engine. This engine supports storage, indexed search and secondary indices. Optional sub-elements are [searchable-copies](#searchable-copies), [tuning](#tuning-proton), [sync-transactionlog](#sync-transactionlog), [flush-on-shutdown](#flush-on-shutdown), and [resource-limits (in proton)](#resource-limits-proton). + +## searchable-copies + +Contained in [proton](#proton). Default value is 2, or [redundancy](#redundancy), if lower. If set to less than redundancy, only some of the stored copies are ready for searching at any time. This means that node failures causes temporary data unavailability while the alternate copies are being indexed for search. The benefit is using less memory, trading off availability during transitions. Refer to [bucket move](/en/content/proton#bucket-move). + +If updating documents or using [document selection](#documents) for garbage collection, consider setting [fast-access](/en/reference/schemas/schemas#attribute) on the subset of attribute fields used for this to make sure that these attributes are always kept in memory for fast access. Note that this is only useful if `searchable-copies` is less than `redundancy`. Read more in [proton](/en/content/proton). + +`searchable-copies` can be changed without node restart. Note that when reducing `searchable-copies` resource usage will not be reduced until content nodes are restarted. + +## tuning + +Contained in [proton](#proton), optional. Tune settings for the search nodes in a content cluster - sub-element: + +| Element | Required | Quantity | +| --- | --- | --- | +| [searchnode](#searchnode) | No | Zero or one | + +## searchnode + +Contained in [tuning](#tuning-proton), optional. Tune settings for search nodes in a content cluster - sub-elements: + +| Element | Required | Quantity | +| --- | --- | --- | +| [lidspace](#lidspace) | No | Zero or one | +| | +| [requestthreads](#requestthreads) | No | Zero or one | +| [flushstrategy](#flushstrategy) | No | Zero or one | +| [initialize](#initialize) | No | Zero or one | +| [feeding](#feeding) | No | Zero or one | +| [index](#index) | No | Zero or one | +| [summary](#summary) | No | Zero or one | + +```xml +<tuning> + <searchnode> + <lidspace></lidspace> + <requestthreads></requestthreads> + <flushstrategy></flushstrategy> + <initialize></initialize> + <feeding></feeding> + <index></index> + <summary></summary> + </searchnode> +</tuning> +``` + +## requestthreads + +Contained in [searchnode](#searchnode), optional. Tune the number of request threads used on a content node, see [thread-configuration](/en/performance/sizing-search#thread-configuration) for details. Sub-elements: + +| Element | Required | Default | Description | +| --- | --- | --- | --- | +| **search** | Optional | **Vespa Cloud:** min(vcpu\*4 + persearch - 1, vcpu\*persearch) **Self-hosted:** 64. | Total size of the match engine thread pool. Together with `persearch`, this determines the maximum number of queries that can execute concurrently: `search / persearch`. See the [Vespa serving scaling guide](/en/performance/sizing-search#thread-configuration) for sizing guidance. | +| **persearch** | Optional | 1 | Maximum number of threads used per search. A higher value reduces the time queries spend in query evaluation, except time spent in ANN which is single-threaded. This number of threads is held for each query for the duration of the query, also when much of the time is spent on single-threaded operations. See the [Vespa serving scaling guide](/en/performance/sizing-search) for an introduction of using multiple threads per search per node to reduce query latency. Number of threads per search can be adjusted down per *rank-profile* using [num-threads-per-search](/en/reference/schemas/schemas#num-threads-per-search). | +| **summary** | Optional | **Vespa Cloud:** vcpu **Self-hosted:** 16 | Number of summary threads. | + +```xml +<requestthreads> + <search>64</search> + <persearch>1</persearch> + <summary>16</summary> +</requestthreads> +``` + +## flushstrategy + +Contained in [searchnode](#searchnode), optional. Tune the *native*\-strategy for flushing components to disk - a smaller number means more frequent flush: + +- *Memory gain* is how much memory can be freed by flushing a component +- *Disk gain* is how much disk space can be freed by flushing a component (typically by using compaction) + +Refer to [Proton maintenance jobs](/en/content/proton#proton-maintenance-jobs). Optional sub-elements: + +- `native`: + - `total` + - `maxmemorygain`: The total maximum memory gain (in bytes) for *all* components before running flush, default 4294967296 (4 GB) + - `diskbloatfactor`: Trigger flush if the total disk gain (in bytes) for *all* components is larger than the factor times current total disk usage, default 0.25 + - `component` + - `maxmemorygain`: The maximum memory gain (in bytes) by *a single* component before running flush, default 1073741824 (1 GB) + - `diskbloatfactor`: Trigger flush if the disk gain (in bytes) by *a single* component is larger than the given factor times the current disk usage by that component, default 0.25 + - `maxage`: The maximum age (in seconds) of unflushed content for a single component before running flush, default 111600 (31h) + - `transactionlog` + - `maxsize`: The total maximum size (in bytes) of [transaction logs](/en/content/proton#transaction-log) for all document types before running flush, default 21474836480 (20 GB) + - `conservative` + - `memory-limit-factor`: When [resource-limits (in proton)](#resource-limits-proton) for memory is reached, flush more often by downscaling `total.maxmemorygain` and `component.maxmemorygain`, default 0.5 + - `disk-limit-factor`: When [resource-limits (in proton)](#resource-limits-proton) for disk is reached, flush more often by downscaling `transactionlog.maxsize`, default 0.5 + +```xml +<flushstrategy> + <native> + <total> + <maxmemorygain>4294967296</maxmemorygain> + <diskbloatfactor>0.2</diskbloatfactor> + </total> + <component> + <maxmemorygain>1073741824</maxmemorygain> + <diskbloatfactor>0.2</diskbloatfactor> + <maxage>111600</maxage> + </component> + <transactionlog> + <maxsize>21474836480</maxsize> + </transactionlog> + <conservative> + <memory-limit-factor>0.5</memory-limit-factor> + <disk-limit-factor>0.5</disk-limit-factor> + </conservative> + </native> +</flushstrategy> +``` + +## initialize + +Contained in [searchnode](#searchnode), optional. Tune settings related to how the search node (proton) is initialized. Optional sub-elements: + +- `threads`: The number of initializer threads used for loading structures from disk at proton startup. The threads are shared between document databases when the value is larger than 0. Default value is the number of document databases + 1. + - When set to larger than 1, document databases are initialized in parallel + - When set to 1, document databases are initialized in sequence + - When set to 0, 1 separate thread is used per document database, and they are initialized in parallel. + +```xml +<initialize> + <threads>2</threads> +</initialize> +``` + +## lidspace + +Contained in [searchnode](#searchnode), optional. Tune settings related to how lidspace is managed. Optional sub-elements: + +- `max-bloat-factor`: Maximum bloat allowed before lidspace compaction is started. Compaction is moving a document from a high lid to a lower lid. Cost is similar to feeding a document and removing it. Also see description in [lidspace compaction maintenance job](/en/content/proton#lid-space-compaction). Default value is 0.01 or 1% of total lidspace. Will be increased to target of 0.50 or 50%. + +```xml +<lidspace> + <max-bloat-factor>0.5</max-bloat-factor> +</lidspace> +``` + +## feeding + +Contained in [searchnode](#searchnode), optional. Tune [proton](/en/content/proton) settings for feed operations. Optional sub-elements: + +- `concurrency`: A number between 0.0 and 1.0 that specifies the concurrency when handling feed operations, default 0.5. When set to 1.0, all cores on the cpu can be used for feeding. Changing this value requires a restart of the nodes to take effect. + Feed operations in this context also includes data migration operations inside a cluster, e.g., when resizing. Data migrations might therefore limit feeding capacity while running. All such feed operations run with the same priority. +- `niceness`: A number between 0.0 and 1.0 that specifies the niceness of the feeding threads, default 0.0 => not any nicer than anyone else. Increasing this number will reduce the priority of feeding compared to search. The real-world effect is hard to predict as the magic exists in the OS level scheduler. Changing this value requires a restart of the nodes to take effect. + +```xml +<feeding> + <concurrency>0.8</concurrency> + <niceness>0.5</niceness> +</feeding> +``` + +<Note> +**Note:** + +Queries can always use *all* available cores; concurrency caps how many can be used for writes and maintenance, which includes redistribution. +</Note> + +## index + +Contained in [searchnode](#searchnode), optional. Tune various aspect with the handling of disk and memory indexes. Optional sub-elements: + +- `io` + - `search`: Controls io read options used during search, `values={mmap,populate}`, default `mmap`. Using `populate` will eagerly touch all pages when index is loaded (after re-start or after index fusion is complete). +- `warmup` + - `time`: Specifies in seconds how long the index shall be warmed up before being switched in for serving. During warmup, it will receive queries and posting lists will be iterated, but results ignored as they are duplicates of the live index. This will pull in the most important ones in the cache. However, as warming up an index will occupy more memory, do not turn it on unless you suspect you need it. And always benchmark to see if it is worth it.<br/> + It's only potentially relevant for fields with indexing setting [index](/en/basics/schemas#document-fields), which have regular disk based indexes, and where the disk indexes are merged/fused in the background. When switching the index, warmup can be used. Also note that [state-v1-health](/en/reference/api/state-v1#state-v1-health) is independent of `warmup` - the node can be "up" before warmup. + - `unpack`: Controls whether all posting features are pulled in to the cache, or only the most important. ` values={true, false}`, default false. + +```xml +<index> + <io> + <search>mmap</search> + </io> + <warmup> + <time>60</time> + <unpack>true</unpack> + </warmup> +</index> +``` + +## removed-db + +Contained in [searchnode](#searchnode), optional. Tune various aspect of the db of removed documents. Optional sub-elements: + +- `prune` + - `age`: Specifies how long (in seconds) we must remember removed documents before we can prune them away. Default is 2 weeks. This sets the upper limit on how long a node can be down and still be accepted back in the system, without having the index wiped. There is no point in having this any higher than the age of the documents. If corpus is re-fed every day, there is no point in having this longer than 24 hours. + - `interval`: Specifies how often (in seconds) to prune old documents. Default is 3.36 hours (prune age / 100). No need to change default. Exposed here for reference and for testing. + +```xml +<removed-db> + <prune> + <age>86400</age> + </prune> +</removed-db> +``` + +## summary + +Contained in [searchnode](#searchnode), optional. Tune various aspect with the handling of document summary. Optional sub-elements: + +- `io` + - `read`: Controls io read options used during reading of stored documents. Values are `directio` `mmap` `populate`. Default is `mmap`. `populate` will do an eager mmap and touch all pages. +- `store` + - `cache`: Used to tune the cache used by the document store. Enabled by default, using up to 5% of available memory. + - `maxsize`: The maximum size of the cache in bytes. If set, it takes precedence over [maxsize-percent](#summary-store-cache-maxsize-percent). Default is unset. + - `maxsize-percent`: The maximum size of the cache in percent of available memory. Default is 5%. + - `compression` + - `type`: The compression type of the documents while in the cache. Possible values are , `none` `lz4` `zstd`. Default is `lz4` + - `level`: The compression level of the documents while in cache. Default is 6 + - `logstore`: Used to tune the actual document store implementation (log-based). + - `maxfilesize`: The maximum size (in bytes) per summary file on disk. Default value is 1GB. [document-store-compaction](/en/content/proton#document-store-compaction) + - `chunk` + - `maxsize`: Maximum size (in bytes) of a chunk. Default value is 64KB. + - `compression` + - `type`: Compression type for the documents, `none` `lz4` `zstd`. Default is `zstd`. + - `level`: Compression level for the documents. Default is 3. + +```xml expandable +<summary> + <io> + <read>directio</read> + </io> + <store> + <cache> + <maxsize-percent>5</maxsize-percent> + <compression> + <type>none</type> + </compression> + </cache> + <logstore> + <chunk> + <maxsize>16384</maxsize> + <compression> + <type>zstd</type> + <level>3</level> + </compression> + </chunk> + </logstore> + </store> +</summary> +``` + +## flush-on-shutdown + +Contained in [proton](#proton). Default value is true. If set to true, search nodes will flush a set of components (e.g. memory index, attributes) to disk before shutting down such that the time it takes to flush these components plus the time it takes to replay the [transaction log](/en/content/proton#transaction-log) after restart is as low as possible. The time it takes to replay the transaction log depends on the amount of data to replay, so by flushing, some components before restart the transaction log will be pruned, and we reduce the replay time significantly. Refer to [Proton maintenance jobs](/en/content/proton#proton-maintenance-jobs). + +## sync-transactionlog + +Contained in [proton](#proton). Default value is true. If true, the transactionlog is synced to disk after every write. This enables the transactionlog to survive power failures and kernel panic. The sync cost is amortized over multiple feed operations. The faster you feed the more operations it is amortized over. So with a local disk this is not known to be a performance issue. However, if using NAS (Network Attached Storage) like EBS on AWS one can see significant feed performance impact. For one particular case, turning off sync-transactionlog for EBS gave a 60x improvement. + +With sync-transactionlog turned off, the risk of losing data depends on the kernel's [sysctl settings.](https://www.kernel.org/doc/html/latest/admin-guide/sysctl/vm.html#dirty-background-bytes) For example, this is a common default: + +```bash +# sysctl -a +... +vm.dirty_expire_centisecs = 3000 +vm.dirty_ratio = 20 +vm.dirty_writeback_centisecs = 500 +... +``` + +With this configuration, the worse case scenario is to lose 35 seconds worth of transactionlog, but no more than 1/20 of the free memory. Because kernel flusher threads wake up every 5s (dirty\_writeback\_centisecs) and write data older than 30s (dirty\_expire\_centisecs) from memory to disk. But if un-synced data exceeds 1/20 of the free memory, the Vespa process will sync it (dirty\_ratio). + +The above also assumes that all copies of the data are lost at the same time **and** that kernels on all these nodes flush at the same time: realistic scenario only with one copy. + +Adjust these [sysctl settings](https://www.kernel.org/doc/html/latest/admin-guide/sysctl/vm.html#dirty-background-bytes) to manage the trade-off between data loss and performance. You'll see more in those kernel docs: for example, thresholds can be expressed in bytes. + +## resource-limits (in proton) + +Contained in [proton](#proton). Specifies resource limits used by proton to reject both external and internal write operations (on this content node) when a limit is reached. + +<Warning> +**Warning:** + +These proton limits should almost never be changed directly. Instead, change [resource-limits](#resource-limits) that controls when external write operations are blocked in the entire content cluster. Be aware of the risks of tuning resource limits as seen in the link. +</Warning> + +The local proton limits are derived from the cluster limits if not specified, using this formula: + +$$ +L_{p r o t o n} = L_{c l u s t e r} + \frac{1 - L_{c l u s t e r}}{2} +$$ + +| Element | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **disk** | optional | float \[0, 1\] | 0.9 | Fraction of total space on the disk partition used before put and update operations are rejected | +| **memory** | optional | float \[0, 1\] | 0.9 | Fraction of physical memory that can be resident memory in anonymous mapping by proton before put and update operations are rejected | + +Example: + +```xml +<proton> + <resource-limits> + <disk>0.83</disk> + <memory>0.82</memory> +``` + +## search + +Contained in [content](#content), optional. Declares search configuration for this content cluster. Optional sub-elements are [query-timeout](#query-timeout), [visibility-delay](#visibility-delay) and [coverage](#coverage). + +## query-timeout + +Contained in [search](#search). Specifies the query timeout in seconds for queries against the search interface on the content nodes. The default is 0.5 (500ms), the max is 600.0. For query timeout also see the request parameter [timeout](/en/reference/api/query#timeout). + +**Note:** One can not override this value using the [timeout](/en/reference/api/query#timeout) request parameter. + +## visibility-delay + +Contained in [search](#search). Default 0, max 1, seconds. + +This setting controls the TTL caching for [parent-child](/en/schemas/parent-child) imported fields. See [feature tuning](/en/performance/feature-tuning#parent-child-and-search-performancemance). + +## coverage + +Contained in [search](#search). Declares search coverage configuration for this content cluster. Optional sub-elements are [minimum](#minimum), [min-wait-after-coverage-factor](#min-wait-after-coverage-factor) and [max-wait-after-coverage-factor](#max-wait-after-coverage-factor). Search coverage configuration controls how many nodes the query dispatcher process should wait for, trading search coverage versus search performance. + +## minimum + +Contained in [coverage](#coverage). Declares the minimum search coverage required before returning the results of a query. This number is in the range `[0, 1]`, with 0 being no coverage and 1 being full coverage. + +The default is 1; unless configured otherwise a query will not return until all search nodes have responded within the specified timeout. + +## min-wait-after-coverage-factor + +Contained in [coverage](#coverage). Declares the minimum time for a query to wait for full coverage once the declared [minimum](#minimum) has been reached. This number is a factor that is multiplied with the time remaining at the time of reaching minimum coverage. + +The default is 0; unless configured otherwise a query will return as soon as the minimum coverage has been reached, and the remaining search nodes appear to be lagging. + +## max-wait-after-coverage-factor + +Contained in [coverage](#coverage). Declares the maximum time for a query to wait for full coverage once the declared [minimum](#minimum) has been reached. This number is a factor that is multiplied with the time remaining at the time of reaching minimum coverage. + +The default is 1; unless configured otherwise a query is allowed to wait its full timeout for full coverage even after reaching the minimum. + +## tuning + +Contained in [content](#content), optional. Optional tuning parameters are: [bucket-splitting](#bucket-splitting), [min-node-ratio-per-group](#min-node-ratio-per-group), [cluster-controller](#cluster-controller), [dispatch](#dispatch-tuning), [distribution](#distribution_type), [max-document-size](#max-document-size), [merges](#merges), [persistence-threads](#persistence-threads) and [visitors](#visitors). + +## bucket-splitting + +Contained in [tuning](#tuning). The [bucket](/en/content/buckets) is the fundamental unit of distribution and management in a content cluster. Buckets are auto-split, no need to configure for most applications. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **max-documents** | optional | integer | 1024 | Maximum number of documents per content bucket. Buckets are split in two if they have more documents than this. Keep this value below 16K. | +| **max-size** | optional | integer | 32MiB | Maximum size (in bytes) of a bucket. This is the sum of the serialized size of all documents kept in the bucket. Buckets are split in two if they have a larger size than this. Keep this value below 100 MiB. | +| **minimum-bits** | optional | integer | | Override the ideal distribution bit count configured for this cluster. Prefer to use the [distribution type](#distribution_type) setting instead if the default distribution bit count does not fit the cluster. This variable is intended for testing and to work around possible distribution bit issues. Most users should not need this option. | + +## min-node-ratio-per-group + +<Warning> +**Important:** + +This is configuration for the cluster controller. Most users are normally looking for [min-active-docs-coverage](#min-active-docs-coverage) which controls how many nodes can be down before query load is routed to other groups. +</Warning> + +Contained in [tuning](#tuning). States a lower bound requirement on the ratio of nodes within *individual* [groups](#group) that must be online and able to accept traffic before the entire group is automatically taken out of service. Groups are automatically brought back into service when the availability of its nodes has been restored to a level equal to or above this limit. + +Elastic content clusters are often configured to use multiple groups for the sake of horizontal traffic scaling and/or data availability. The content distribution system will try to ensure a configured number of replicas is always present within a group in order to maintain data redundancy. If the number of available nodes in a group drops too far, it is possible for the remaining nodes in the group to not have sufficient capacity to take over storage and serving for the replicas they now must assume responsibility for. Such situations are likely to result in increased latencies and/or feed rejections caused by resource exhaustion. Setting this tuning parameter allows the system to instead automatically take down the remaining nodes in the group, allowing feed and query traffic to fail completely over to the remaining groups. + +Valid parameter is a decimal value in the range \[0, 1\]. Default is 0, which means that the automatic group out-of-service functionality will *not* automatically take effect. + +Example: assume a cluster has been configured with *n* groups of 4 nodes each and the following tuning config: + +```xml +<tuning> + <min-node-ratio-per-group>0.75</min-node-ratio-per-group> +</tuning> +``` + +This tuning allows for 1 node in a group to be down. If 2 or more nodes go down, all nodes in the group will be marked as down, letting the *n-1* remaining groups handle all the traffic. + +This configuration can be changed live as the system is running and altered limits will take effect immediately. + +## distribution (in tuning) + +Contained in [tuning](#tuning). Tune the distribution algorithm used in the cluster. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **type** | optional | loose \| strict \| legacy | loose | When the number of a nodes configured in a system changes over certain limits, the system will automatically trigger major redistributions of documents. This is to ensure that the number of buckets is appropriate for the number of nodes in the cluster. This enum value specifies how aggressive the system should be in triggering such distribution changes.<br/><br/> The default of `loose` strikes a balance between rarely altering the distribution of the cluster and keeping the skew in document distribution low. It is recommended that you use the default mode unless you have empirically observed that it causes too much skew in load or document distribution.<br/><br/> Note that specifying `minimum-bits` under [bucket-splitting](#bucket-splitting) overrides this setting and effectively "locks" the distribution in place. | + +## max-document-size + +Contained in [tuning](#tuning). Specifies max document size in the content cluster, measured as the uncompressed size of a document operation arriving over the wire by the distributor service. The limit will be used for all document types. A document larger than this limit will be rejected by the distributor. Note that some document operations that don't contain the entire document, like [document updates](/en/writing/document-api-guide#document-updates) might increase the size of a document above this limit. See also [max-document-size](/en/reference/applications/services/container#document-api) for document api, which is probably what you want to configure, make sure to keep the value documented here larger. + +Valid values are numbers including a unit (e.g. *10MiB*) and the value must be between 1Mib and 2048 Mib (inclusive). Values will be rounded to nearest MiB, so using MiB as a unit is preferrable. It is strongly recommended to make sure this is not set too high, 10 MiB is a reasonable setting for most use cases, setting it above 100 MiB is not recommended, as allowing large documents might impact operations, e.g. when restarting nodes, moving documents between nodes etc. Default value is 128 MiB. + +Example: + +```xml +<tuning> + <max-document-size>10MiB</max-document-size> +</tuning> +``` + +## merges + +Contained in [tuning](#tuning). Defines throttling parameters for bucket merge operations. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **max-per-node** | optional | number | | Maximum number of parallel active bucket merge operations. | +| **max-queue-size** | optional | number | | Maximum size of the merge bucket queue, before reporting BUSY back to the distributors. | + +## persistence-threads + +Contained in [tuning](#tuning). Defines the number of persistence threads per partition on each content node. A content node executes bucket operations against the persistence engine synchronously in each of these threads. 8 threads are used by default. Override with the **count** attribute. + +## visitors + +Contained in [tuning](#tuning). Tuning parameters for visitor operations. Might contain [max-concurrent](#max-concurrent). + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **thread-count** | optional | number | | The maximum number of threads in which to execute visitor operations. A higher number of threads may increase performance, but may use more memory. | +| **max-queue-size** | optional | number | | Maximum size of the pending visitor queue, before reporting BUSY back to the distributors. | + +## max-concurrent + +Contained in [visitors](#visitors). Defines how many visitors can be active concurrently on each storage node. The number allowed depends on priority - lower priority visitors should not block higher priority visitors completely. To implement this, specify a fixed and a variable number. The maximum active is calculated by adjusting the variable component using the priority, and adding the fixed component. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **fixed** | optional | number | [16](https://github.com/vespa-engine/vespa/blob/master/storage/src/vespa/storage/visiting/stor-visitor.def) | The fixed component of the maximum active count | +| **variable** | optional | number | [64](https://github.com/vespa-engine/vespa/blob/master/storage/src/vespa/storage/visiting/stor-visitor.def) | The variable component of the maximum active count | + +## resource-limits + +Contained in [tuning](#tuning). Specifies resource limits used to decide whether external write operations should be blocked in the entire content cluster, based on the reported resource usage by content nodes. See [feed block](../../../writing/feed-block.html) for more details. + +<Warning> +**Warning:** + +The content nodes require resource headroom to handle extra documents as part of re-distribution during node failure, and spikes when running [maintenance jobs](/en/content/proton#proton-maintenance-jobs). Tuning these limits should be done with extreme care, and setting them too high might lead to permanent data loss. They are best left untouched, using the defaults, and cannot be set in Vespa Cloud. +</Warning> + +| Element | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **disk** | optional | float \[0, 1\] | 0.8 | Fraction of total space on the disk partition used on a content node before feed is blocked | +| **memory** | optional | float \[0, 1\] | 0.8/0.75 | Fraction of physical memory that can be resident memory in anonymous mapping on a content node before feed is blocked. Total physical memory is sampled as the minimum of `sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE)` and the cgroup (v1 or v2) memory limit. Nodes with 8 Gib or less memory in Vespa Cloud has a limit of 0.75. | + +Example - in the content tag: +```xml +<tuning> + <resource-limits> + <disk>0.78</disk> + <memory>0.77</memory> + </resource-limits> +</tuning> +``` + +## dispatch + +Contained in [tuning](#tuning). Tune the query dispatch behavior - child elements: + +| Element | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **max-hits-per-partition** | optional | Integer | No capping: Return all | Maximum number of hits to return from a content node. By default, a query returns the requested number of hits + offset from every content node to the container. The container orders the hits globally according to the query, then discards all hits beyond the number requested.<br/><br/> In a system with a large fan-out, this consumes network bandwidth and the container nodes easily network saturated. Containers will also sort and discard more hits than optimal.<br/><br/> When there are sufficiently many search nodes, assuming an even distribution of the hits, it suffices to only return a fraction of the request number of hits from each node. Note that changing this number will have global ordering impact. See *top-k-probability* below for improving performance with fewer hits. | +| **dispatch-policy | optional** | adaptive / best-of-random-2 / round-robin | adaptive | With [grouped distribution](/en/performance/sizing-search#data-distribution): Configure policy for choosing which group shall receive the next query request. Coverage requirements is considered when choosing a group. Note that multiphase requests that requires or benefits from hitting the same group in all phases are always hashed. <br/><br/>**adaptive**<br/> Measures latency, preferring lower latency groups, selecting group `i` has a probability proportional to 1 / (latency for group `i`). <br/><br/> **best-of-random-2** <br/>Selects 2 random groups and selects the one with the lowest latency. <br/><br/> **round-robin** <br/>Selects groups in a round-robin manner, giving fair distribution of queries to each group. | +| **prioritize-availability** | optional | Boolean | true | With [grouped distribution](/en/performance/sizing-search#data-distribution): If true, or by default, all groups that are within min-active-docs-coverage of the **median** of the document count of other groups will be used to service queries. If set to false, only groups within min-active-docs-coverage of the **max** document count will be used, with the consequence that full coverage is prioritized over availability when multiple groups are lacking content, since the remaining groups may not be able to service the full query load. | +| **min-active-docs-coverage** | optional | A float percentage | 97 | With [grouped distribution](/en/performance/sizing-search#data-distribution): The percentage of active documents a group must have, relative to the median across all groups in the content cluster, to be considered active for serving queries. Because of measurement timing differences, it is not advisable to tune this above 99 percent. | +| **top-k-probability** | optional | Double | 0.9999 | Probability that the top K hits will be the globally best. Based on this probability, the dispatcher will fetch enough hits from each node to achieve this. The only way to guarantee a probability of 1.0 is to fetch K hits from each partition. However, by reducing the probability from 1.0 to 0.99999, one can significantly reduce number of hits fetched and save both bandwidth and latency. The number of hits to fetch from each partition is computed as: <br/><br/> $$ q = \frac{k}{n} + q T \left(\right. p , 30 \left.\right) \times \sqrt{k \times \frac{1}{n} \times \left(\right. 1 - \frac{1}{n} \left.\right)} $$ <br/><br/> where qT is a Student's t-distribution. With n=10 partitions, k=200 hits and p=0.99999, only 45 hits per partition is needed, as opposed to 200 when p=1.0.<br/><br/> Use this option to reduce network and container cpu/memory in clusters with many nodes per group - see [Vespa Serving Scaling Guide](/en/performance/sizing-search). | + +## cluster-controller + +Contained in [tuning](#tuning). Tuning parameters for the cluster controller managing this cluster - child elements: + +| Element | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **init-progress-time** | optional | | | If the initialization progress count have not been altered for this amount of seconds, the node is assumed to have deadlocked and is set down. Note that initialization may actually be prioritized lower now, so setting a low value here might cause false positives. Though if it is set down for wrong reason, when it will finish initialization and then be set up again. | +| **transition-time** | optional | | [storage\_transition\_time](https://github.com/vespa-engine/vespa/blob/master/configdefinitions/src/vespa/fleetcontroller.def) [distributor\_transition\_time](https://github.com/vespa-engine/vespa/blob/master/configdefinitions/src/vespa/fleetcontroller.def) | The transition time states how long (in seconds) a node will be in maintenance mode during what looks like a controlled restart. Keeping a node in maintenance mode during a restart allows a restart without the cluster trying to create new copies of all the data immediately. If the node has not started or got back up within the transition time, the node is set down, in which case, new full bucket copies will be created. Note separate defaults for distributor and storage (i.e. search) nodes. | +| **max-premature-crashes** | optional | | [max\_premature\_crashes](https://github.com/vespa-engine/vespa/blob/master/configdefinitions/src/vespa/fleetcontroller.def) | The maximum number of crashes allowed before a content node is permanently set down by the cluster controller. If the node has a stable up or down state for more than the *stable-state-period*, the crash count is reset. However, resetting the count will not re-enable the node again if it has been disabled - restart the cluster controller to reset. | +| **stable-state-period** | optional | | [stable\_state\_time\_period](https://github.com/vespa-engine/vespa/blob/master/configdefinitions/src/vespa/fleetcontroller.def) | If a content node's state doesn't change for this many seconds, it's state is considered *stable*, clearing the premature crash count. | +| **min-distributor-up-ratio** | optional | | [min\_distributor\_up\_ratio](https://github.com/vespa-engine/vespa/blob/master/configdefinitions/src/vespa/fleetcontroller.def) | The minimum ratio of distributors that are required to be *up* for the cluster state to be *up*. | +| **min-storage-up-ratio** | optional | | [min\_storage\_up\_ratio](https://github.com/vespa-engine/vespa/blob/master/configdefinitions/src/vespa/fleetcontroller.def) | The minimum ratio of content nodes that are required to be *up* for the cluster state to be *up*. | +| **groups-allowed-down-ratio** | optional | | [groups-allowed-down-ratio](https://github.com/vespa-engine/vespa/blob/master/configdefinitions/src/vespa/fleetcontroller.def) | A ratio for the number of content groups that are allowed to be down simultaneously. A value of 0.5 means that 50% of the groups are allowed to be down. The default is to allow only one group to be down at a time. | diff --git a/mintlify-docs/en/reference/applications/services/docproc.mdx b/mintlify-docs/en/reference/applications/services/docproc.mdx new file mode 100644 index 0000000000..7788a531d1 --- /dev/null +++ b/mintlify-docs/en/reference/applications/services/docproc.mdx @@ -0,0 +1,151 @@ +--- +title: "services.xml - document-processing" +sidebarTitle: "services.xml - docproc" +--- + +This is the [document-processing](/en/applications/document-processors) reference in [services.xml](/en/reference/applications/services/services): + +```yaml expandable +container + document-processing [numnodesperclient, preferlocalnode, maxmessagesinqueue, maxqueuebytesize, + maxqueuewait, maxconcurrentfactor, documentexpansionfactor, containercorememory] + include + documentprocessor [class, bundle, id, idref, provides, before, after] + provides + before + after + map + field [doctype, in-document, in-processor] + chain [name, id, idref, inherits, excludes, documentprocessors] + map + field [doctype, in-document, in-processor] + inherits + chain + exclude + documentprocessor [class, bundle, id, idref, provides, before, after] + provides + before + after + map + field [doctype, in-document, in-processor] + phase [id, idref, before, after] + before + after + threadpool +``` + +The root element of the *document-processing* configuration model. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **numnodesperclient** | optional | | | <Danger>**Deprecated:** Ignored and deprecated, will be removed in Vespa 9.</Danger><br/><br/> Set to some number below the amount of nodes in the cluster to limit how many nodes a single client can connect to. If you have many clients, this can reduce the memory usage on both document-processing and client nodes. | +| **preferlocalnode** | optional | | false | <Danger>**Deprecated:** Ignored and deprecated, will be removed in Vespa 9.</Danger><br/><br/> Set to always prefer sending to a document-processing node running on the same host as the client. You should use this if you are running a client on each document-processing node. | +| **maxmessagesinqueue** | | | | | +| **maxqueuebytesize** | | | | <Danger>**Deprecated:** Ignored and deprecated, will be removed in Vespa 9.</Danger> | +| **maxqueuewait** | optional | | | The maximum number of seconds a message should wait in queue before being processed. Docproc will adapt its queue size to adhere to this. If the queue is full, new messages will be replied to with SESSION\_BUSY. | +| **maxconcurrentfactor** | | | | | +| **documentexpansionfactor** | optional | | | | +| **containercorememory** | | | | | + +## Document Processor elements + +*documentprocessor* elements are contained in [docproc chain elements](#chain) or in the *document-processing* root. + +A documentprocessor element is either a document processor definition or document processor reference. The rest of this section deals with document processor definitions; document processor references are described in [docproc chain elements](#docproc-chain-elements). + +A documentprocessor definition causes the creation of exactly one document processor instance. This instance is set up according to the content of the documentprocessor element. + +A documentprocessor definition contained in a docproc chain element defines an *inner document processor*. Otherwise, it defines an *outer document processor.* + +For inner documentprocessors, the name must be unique inside the docproc chain. For outer documentprocessors, the component id must be unique. An inner documentprocessor is not permitted to have the same name as an outer documentprocessor. + +Optional sub-elements: + +- provides, a single name that should be added to the provides list +- before, a single name that should be added to the before list +- after, a single name that should be added to the after list +- config (one or more) + +For more information on provides, before and after, see [Chained components](/en/applications/chaining). + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| class | | | | | +| bundle | | | | | +| id | required | | | The component id of the documentprocessor instance. | +| idref | | | | | +| provides | optional | | | A space-separated list of names that represents what this documentprocessor produces. | +| before | optional | | | A space-separated list of phase or provided names. Phases or documentprocessors providing these names will be placed later in the docproc chain than this document processor. | +| after | optional | | | A space-separated list of phase or provided names. Phases or documentprocessors providing these names will be placed earlier in the docproc chain than this document processor. | + +### documentprocessor + +Defines a documentprocessor instance of a user specified class. + +```xml +<documentprocessor id="componentId" + class="className:versionSpecification" + bundle="the name in <artifactId> in pom.xml"> + ... +</documentprocessor> +``` + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| id | required | | | The component id of the documentprocessor instance. | +| class | optional | | | A component specification containing the name of the class to instantiate to create the document processor instance. If missing, copied from id. | +| bundle | optional | | | The bundle containing the class: The name in `<artifactId>` in pom.xml. If a bundle is not specified, the bundle containing document processors bundled with Vespa is used. | + +## Docproc chain elements + +Specifies how a docproc chain should be instantiated, and how the contained document processors should be ordered. + +### chain + +Contained in *document-processing*. Refer to the [chain reference](/en/reference/applications/services/processing#chain). Chains can [inherit](/en/reference/applications/services/processing#inherits) document processors from other chains and use [phases](/en/reference/applications/services/processing#phase) for ordering. Optional sub-elements: + +- [documentprocessor element](#documentprocessor) (one or more), either a documentprocessor reference or documentprocessor definition. If the name given for a documentprocessor matches an *outer documentprocessor*, it is a *documentprocessor reference* - otherwise, it is a *documentprocessor definition*. If it is a documentprocessor definition, it is also an implicit documentprocessor reference saying: use *exactly* this documentprocessor. All these documentprocessor elements must have different name. +- [phase](/en/reference/applications/services/processing#phase) (one or more). +- [config](/en/reference/applications/config-files#generic-configuration-in-services-xml) (one or more - will apply to all *inner* documentprocessors in this docproc chain, unless overridden by individual inner documentprocessors). + +## Map + +Set up a field name mapping from the name(s) of field(s) in the input documents to the names used in a deployed docproc. The purpose is to reuse functionality without changing the field names. The example below shows the configuration: + +```xml +<chain name="myChain"> + <map> + <field in-document="key" in-processor="id"/> + </map> + <documentprocessor type="CityDocProc"> + <map> + <field in-document="town" in-processor="city" doctype="restaurant"/> + </map> + </documentprocessor> + <documentprocessor type="CarDocProc"> + <map> + <field in-document="engine.cylinders" in-processor="cyl"/> + </map> + </documentprocessor> +</chain> +``` + +In the example, a chain is deployed with 2 docprocs. + +For the chain, a mapping from *key* to *id* is set up. Imagine that some or all of the docprocs in the chain read and write to a field called *id*, but we want this functionality to the document field *key*. + +Furthermore, a similar thing is done for the `CityDocProc`: The docproc accesses the field *city*, whereas it's called *town* in the feed. The mapping only applies to the document type *restaurant*. + +The `CarDocProc` accesses a field called *cyl*. In this example this is mapped to the field *cylinders* of a struct *engine* using a dotted notation. + +If you specify mappings on different levels of the config (say both for a cluster and a docproc), the mapping closest to the actual docproc will take precedence. + +## threadpool + +Available since `Vespa 8.601.12` + +Specifies configuration for the thread pool used by document processor chains. All values scale with the number of vCPU—see the [container tuning example](/en/performance/container-tuning#container-worker-threads-example). When all workers are busy, new document processing requests are rejected. + +### threads + +Number of worker threads per vCPU. Default value is `1`. The pool runs with `threads * vCPU` workers. diff --git a/mintlify-docs/en/reference/applications/services/http.mdx b/mintlify-docs/en/reference/applications/services/http.mdx new file mode 100644 index 0000000000..28222eee42 --- /dev/null +++ b/mintlify-docs/en/reference/applications/services/http.mdx @@ -0,0 +1,301 @@ +--- +title: "services.xml - http" +--- + +This is the reference for the `http` subelement of [container](/en/reference/applications/services/container) in [services.xml](/en/reference/applications/services/services). The http block is used to configure http servers and filters - when this element is present, the default http server is disabled. + +```yml exoandable +http + server [id, port] + ssl + private-key-file + certificate-file + ca-certificates-file + client-authentication + protocols + cipher-suites + ssl-provider [class, bundle] + filtering + filter [id, class, bundle, provides, before, after] + provides + before + after + filter-config + request-/response-chain [id, inherits, excludes] + binding + filter [id, class, bundle, provides, before, after] + provides + before + after + filter-config + inherits + chain + exclude + phase [id, before, after] + before + after +``` + +Most elements takes optional [config](/en/reference/applications/config-files#generic-configuration-in-services-xml) elements, see example in [server](#server). + +Note: To bind the search handler port (i.e. the handler for queries), refer to [search bindings](/en/reference/applications/services/search#binding). + +Example: + +```xml +<http> + <server id="server1" port="8080" /> + <server id="server2" port="9000" /> + + <filtering> + <filter id="request-filter1" class="com.yahoo.test.RequestFilter1" /> + <filter id="response-filter1" class="com.yahoo.test.ResponseFilter1" /> + + <request-chain id="test-request-chain"> + <binding>http://*/*</binding> + <filter id="request-filter1"/> + <filter id="request-filter2" class="com.yahoo.test.RequestFilter2" /> + </request-chain> + + <response-chain id="test-response-chain"> + <binding>http://*:8080/*</binding> + <binding>http://*:9000/path</binding> + <filter id="response-filter1"/> + <filter id="response-filter2" class="com.yahoo.test.ResponseFilter2" /> + </response-chain> + </filtering> +</http> +``` + +## server + +The definition of a http server. Configure the server using [jdisc.http.connector.def](https://github.com/vespa-engine/vespa/blob/master/container-disc/src/main/resources/configdefinitions/jdisc.http.jdisc.http.connector.def). + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | The component ID | +| **port** | optional | number | The web services port of the [environment variables](/en/operations/self-managed/files-processes-and-ports#environment-variables) | Server port | +| **default-request-chain** | optional | string | | The default request chain to use for unmatched requests | +| **default-response-chain** | optional | string | | The default response chain to use for unmatched requests | + +Example: + +```xml +<server id="server1" port="8080"> + <config name="jdisc.http.connector"> + <idleTimeout>90</idleTimeout> + </config> +</server> +``` + +## ssl + +Setup TLS on HTTP server using credentials provided in PEM format. + +## private-key-file + +Path to private key file in PEM format. + +## certificate-file + +Path to certificate file in PEM format. + +## ca-certificates-file + +Path to CA certificates file in PEM format. + +## client-authentication + +Client authentication. Supported values: *disabled*, *want* or *need*. + +## protocols + +Comma-separated list of TLS protocol versions to enable. Example: *TLSv1.2,TLSv1.3*. + +## cipher-suites + +Comma-separated list of TLS cipher suites to enable. The specified ciphers must be supported by JDK installation. Example: *TLS\_AES\_256\_GCM\_SHA384,TLS\_ECDHE\_ECDSA\_WITH\_AES\_256\_GCM\_SHA384*. + +## ssl-provider + +Setup TLS on the HTTP server through a programmatic Java interface. The specified class must implement the [SslProvider](https://javadoc.io/doc/com.yahoo.vespa/container-disc/latest/com/yahoo/jdisc/http/SslProvider.html) interface. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **class** | required | string | | The class name | +| **bundle** | required | string | | The bundle name | + +## filtering + +`filtering` is for configuring http filter chains. Sub-elements: + +- [filter](#filter) +- [request-chain](#chain) +- [response-chain](#chain) + +Example: + +```xml +<filtering> + <filter id="request-filter1" class="com.yahoo.test.RequestFilter1" /> + <filter id="response-filter1" class="com.yahoo.test.ResponseFilter1" /> + + <request-chain id="test-request-chain"> + <binding>http://*/</binding> + <filter id="request-filter1"/> + <filter id="request-filter2" class="com.yahoo.test.RequestFilter2" /> + </request-chain> + + <response-chain id="test-response-chain"> + <binding>http://*/</binding> + <filter id="response-filter1"/> + <filter id="response-filter2" class="com.yahoo.test.ResponseFilter2" /> + </response-chain> +</filtering> +``` + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **strict-mode** | optional | boolean | true | When set to true, all requests must match a filter. For any requests not matching, an HTTP 403 response is returned. | + +## binding + +Specifies that requests/responses matching the given URI pattern should be sent through the [request-chain/response-chain](#chain). + +## filter + +The definition of a single filter, for referencing when defining chains. If a single filter is to be used in different chains, it is cleaner to define it directly under `http` and then refer to it with `id`, than defining it inline separately for each chain. The following filter types are supported: + +- RequestFilter +- ResponseFilter +- SecurityRequestFilter +- SecurityResponseFilter + +Security\[Request/Response\]Filters are automatically wrapped in Security\[Request/Response\]FilterChains. This makes them behave like regular Request/Response filters with respect to chaining. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | The component ID | +| **class** | optional | string | id | The class of the component, defaults to id | +| **bundle** | optional | string | id or class | The bundle to load the component from, defaults to class or id (if no class is given) | +| **before** | optional | string | | Space separated list of phases and/or filters which should succeed this phase | +| **class** | optional | string | id | Space separated list of phases and/or filters which should precede this phase | + +Sub-elements: + +- [provides](#provides) +- [before](#before) +- [after](#after) +- [filter-config](#filter-config) + +Example: + +```bash +<filter id="filter2" class="com.yahoo.test.Filter2"/> +``` + +## provides + +A name provided by a filter for phases and other filters to use as dependencies. Contained in [filter](#filter) and [filter](#filter) (in chain). + +## before + +The name of a phase or filter which should succeed this phase or filter. `before` tags may be used if it is necessary to define filters or phases which always should succeed this filter or phase in a chain. In other words, the phase or filter defined is placed *before* name in the tag. Contained in [filter](#filter), [filter](#filter) (in chain) and [phase](#phase). + +## after + +The name of a phase or filter which should precede this phase or filter. `after` tags may be used if it is necessary to define filters or phases which always should precede this filter or phase in a chain. In other words, the phase or filter defined is placed *after* the name in the tag. Contained in [filter](#filter), [filter](#filter) (in chain) and [phase](#phase). Example: + +```xml +<filter id="filterauth" class="com.yahoo.test.auth"> + <provides>Authorization</provides> + <before>LastFilters</before> + <after>Earlyfilters</after> +</filter> +``` + +## filter-config + +Only used to configure filters that are configured with `com.yahoo.jdisc.http.filter.security.FilterConfig`. This is the case for all filters provided in JDisc bundles. + +## request-chain/response-chain + +Defines a chain of request filters or response filters, respectively. A chain is a set ordered by dependencies. Dependencies are expressed through phases, which may depend upon other phases, or filters. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **inherits** | | string | | A space separated list of chains this chain should include the contents of | +| **excludes** | | string | | A space separated list of filters (contained in an inherited chain) this chain should not include | + +Sub-elements: + +- [binding](#binding) +- [filter](#filter). Refer to or define a filter. *config* or *filter-config* can not be added to references, only filter definitions. +- [inherits](#inherits) +- [phase](#phase) + +Examples: + +```xml +<request-chain id="default-request-filters"> + <binding>http://*/*</binding> + <filter id="com.yahoo.test.RequestFilter"/> +</request-chain> +<response-chain id="response-filters"> + <binding>http://*:8080/*</binding> + <binding>http://*:9000/path</binding> + <filter id="com.yahoo.test.ResponseFilter"/> +</response-chain> +``` + +## inherits + +Wrapper element for information about which chains, if any, a chain should inherit, and how. Contained in [request-chain](#chain) and [response-chain](#chain). Sub-elements: + +- (inherited) [chain](#inheritedchain) +- [exclude](#exclude) + +## (inherited) chain + +The ID of a chain which this chain should inherit, i.e. include all filters and phases from. Use multiple `chain` tags if it is necessary to combine the filters from multiple chains. Contained in [inherits](#inherits). + +## exclude + +A filter the chain under definition should exclude from the chain or chains it inherits from. Use multiple `exclude` tags to exclude multiple filters. Contained in [inherits](#inherits). Example: + +```xml +<request-chain id="demo"> + <inherits> + <chain>idOfSomeInheritedChain</chain> + <exclude>idOfUnwantedFilter</exclude> + <exclude>idOfYetAnotherUnwantedFilter</exclude> + </inherits> + <filter id="filter2" class="com.yahoo.test.Filter2"/> +</request-chain> +``` + +## phase + +Defines a phase, which is a checkpoint to help order filters. Filters and other phases may depend on a phase to be able to make assumptions about the order of filters. Contained in [chain](#chain). + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | The ID, or name, which other phases and filters may depend upon as a [successor](#before) or [predecessor](#after) | +| **before** | optional | string | | Space separated list of phases and/or filters which should succeed this phase | +| **after** | optional | string | | Space separated list of phases and/or filters which should precede this phase | + +Sub-elements: + +- [before](#before) +- [after](#after) + +Example: + +```xml +<request-chain id="demo"> + <phase id="CheckpointName"> + <before>Authorization</before> + </phase> + <filter id="filter2" class="com.yahoo.test.Filter2"/> +</request-chain> +``` diff --git a/mintlify-docs/en/reference/applications/services/processing.mdx b/mintlify-docs/en/reference/applications/services/processing.mdx new file mode 100644 index 0000000000..4c06a854e1 --- /dev/null +++ b/mintlify-docs/en/reference/applications/services/processing.mdx @@ -0,0 +1,188 @@ +--- +title: "services.xml - processing" +--- + + +This document explains the syntax and semantics of the `processing` subelement of the [`container`](/en/reference/applications/services/container) part of `services.xml`. `processing` is for configuring a pure [request-response processing](/en/applications/processing) application, with no particular dependency to search or query handling. The processing block is used to configure [processors](/en/applications/processing): + +```yaml expandable +processing + binding + processor [id, class, bundle, provides, before, after] + provides + before + after + renderer [id, class, bundle] + chain [id, inherits, excludes] + processor [idref, id, class, bundle, provides, before, after] + provides + before + after + inherits + chain + exclude + phase [id, before, after] + before + after +``` + +Example: + +```xml +<processing> + <processor id="processor1" class="com.yahoo.test.Processor1" /> + <chain id="default"> + <processor idref="processor1"/> + <processor id="processor2" class="com.yahoo.test.Processor2"/> + </chain> + <renderer id="renderer1" class="com.yahoo.test.Renderer1" /> +</processing> +``` + +## binding + +The URI to map the ProcessingHandler to. The default binding is `http://*/processing/*`. Multiple elements are allowed. Example: + +```xml +<binding>http://*/processing/*</binding> +``` + +## processor + +The definition of a single processor, for referencing when defining chains. If a single processor is to be used in multiple chains, it is cleaner to define it directly under `processing` and then refer to it with `idref`, than defining it inline separately for each chain. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | The component ID | +| **class** | optional | string | | The class of the component, defaults to id | +| **bundle** | optional | string | | The bundle to load the component from, defaults to class or id (if no class is given) | +| **before** | optional | string | | Space separated list of phases and/or processors which should succeed this processor | +| **after** | optional | string | | Space separated list of phases and/or processors which should precede this processor | + +Example: + +```xml +<processor id="processor2" class="com.yahoo.test.Processor2"/> +``` + +## renderer + +The definition of a renderer, for use by a Handler. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | The component ID | +| **class** | optional | string | | The class of the component, defaults to id | +| **bundle** | optional | string | | The bundle to load the component from, defaults to class or id (if no class is given) | + +Example: + +```xml +<renderer id="renderer1" class="com.yahoo.test.Renderer1" /> +``` + +## processor (in chain) + +Reference to or inline definition of a processor in a chain. If inlining, same as [processor](#processor) - if referring to, use *idref* attribute: + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **idref** | | string | | Reference to the definition of this processor. | + +Example: + +```xml +<chain id="default"> + <processor idref="processor2" /> +</chain> +``` + +## provides + +A name provided by a processor for phases and other processors to use as dependencies. Example: + +```xml +<provides>IntentAnalysis</provides> +``` + +## chain + +An element for defining a chain of [processors](/en/reference/applications/services/processing) / [searchers](/en/reference/applications/services/search) / [document processors](/en/reference/applications/services/docproc) (i.e. components). A chain is a set ordered by dependencies. Dependencies are expressed through phases, which may depend upon other phases, or components. For an incoming request from the network, the chain named *default* will be invoked. Refer to [Chained components](/en/applications/chaining) for a guide. Requires one of *id* or *idref*. + +Searcher, Document processing and Processing chains can be modified at runtime without restarts. Modification includes adding/removing processors in chains and changing names of chains and processors. Make the change and [deploy](/en/basics/applications#deploying-applications). Some changes require a container restart, refer to [reconfiguring document processing](/en/applications/document-processors#reconfiguring-document-processing). + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **idref** | | string | | A reference to a defined chain. Mutually exclusive with *id*. If *idref* is used, no other attributes apply. | +| **id** | required | string | | The chain ID. Required unless *idref* is used | +| **inherits** | optional | string | | A space-separated list of chains this chain should include the contents of - see example below. | +| **excludes** | optional | string | | A space-separated list of processors (contained in an inherited chain) this chain should not include. The exclusion is done before any consolidation of component references when inheriting chains. Example:<br/><br/>`<chain id="demo" inherits="idOfInheritedChain"`<br/> `excludes="idOfProc1 idOfProc2">`<br/>`<processor id="proc2" class="com.yahoo.test.Proc2"/>`<br/> `</chain>` | +| **class** | optional | string | | | +| **name** | | | | | +| **documentprocessors** | | | | | + +## inherits + +Inherit from one or more parent chain(s). + +When a search chain inherits from another search chain, it subsumes the phases and the *searcher references* (both implicitly and explicitly defined) from the parent chain. + +If two or more inherited component references have the same name, a new component specification matching those will be used instead. If that is not possible, an error will be signaled (i.e. if the version specifications can not be consolidated or if they require component definitions from different chains). + +The component references determines which instances are used in the resulting chain instance. + +A component reference is a component specification that says: there shall be exactly one component in this chain with the given name, and this component must match the version specification. + +A component reference *overrides* any inherited component references with the same name (i.e. the inherited ones are ignored). + +If several components match a given component reference, the newest (as determined by the version) is used. + +## exclude + +Exclude components from inherited chains. + +## phase + +Defines a phase, which is a named checkpoint to help order components inside a chain. Components and other phases may depend on a phase to be able to make assumptions about the order of components. Refer to the [Chained components](/en/applications/chaining) guide. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | The ID, or name, which other phases and processors may depend upon as a [successor](#before) or [predecessor](#after). | +| **before** | optional | string | | Space-separated list of phases and/or processors which should succeed this phase | +| **after** | optional | string | | Space-separated list of phases and/or processors which should precede this phase | + +Optional sub-elements: + +- before: same as the attribute +- after: same as the attribute + +These are equivalent: + +```xml +<phase id="name"> + <before>phaseName1</before> + <after>phaseName2</after> +</phase> + +<phase id="name" before="phaseName1" after="phaseName2" /> +``` + +Example: + +```xml +<chain id="demo"> + <phase id="CheckpointName"> + <before>IntentAnalysis</before> + <after>OtherAnalysis</after> + </phase> + <processor id="processor2" class="com.yahoo.test.Processor2"/> +</chain> +``` + +## before + +The name of a phase or component which should succeed this phase or component. Multiple `before` elements can be used to define multiple components or phases which always should succeed this component or phase in a chain. In other words, the phase or component defined is placed *before* name in the element. + +## after + +The name of a phase or component which should precede this phase or component. Multiple `after` elements can be used to define multiple component or phases which always should precede this component or phase in a chain. In other words, the phase or component defined is placed *after* the name in the element. diff --git a/mintlify-docs/en/reference/applications/services/search.mdx b/mintlify-docs/en/reference/applications/services/search.mdx new file mode 100644 index 0000000000..404951c16e --- /dev/null +++ b/mintlify-docs/en/reference/applications/services/search.mdx @@ -0,0 +1,296 @@ +--- +title: "services.xml - 'search'" +sidebarTitle: "services.xml - search" +--- + +This is the reference for the *search* part of the container config. Related: [Chained components](/en/applications/chaining) and the [federation tutorial](/en/querying/federation). The root element of the search configuration, declared as a subelement to [container](/en/reference/applications/services/container): + +```yaml +search + binding + searcher [id, class, bundle, provides, before, after] + federation [id] + source [idref] + federationoptions [timeout, requestTimeout, optional] + source-set [inherits] + target-selector + chain [id, inherits, excludes] + searcher [id, class, bundle, provides, before, after] + federation [id] + source [idref] + federationoptions [timeout, requestTimeout, optional] + source-set [inherits] + target-selector + provider [id, type, cluster, excludes] + federationoptions [timeout, requestTimeout, optional] + source [id] + searcher [id, class, bundle, provides, before, after] + renderer [id, class, bundle] + significance + threadpool + threads [ max ] + queue +``` + +[config](/en/reference/applications/config-files#generic-configuration-in-services-xml) applies to all searchers in the JDisc cluster's search chains, unless overridden by individual search chains or searchers. + +## binding + +The URI to map the SearchHandler to. The default binding is `http://*/search/*`. Multiple elements are allowed. Example: + +```xml +<binding>http://*/search/*</binding> +``` + +## searcher + +Searcher elements are contained in [chain](#chain) elements or in the search root. + +A searcher element is either a *definition* (using *id*) or a *reference* (using *idref*). + +A searcher definition causes the creation of exactly one searcher instance. This instance is set up according to the content of the searcher element. A searcher definition contained in a search chain element defines an *inner searcher*. Otherwise, it defines an *outer searcher.* + +Searcher definition: + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | The component id of the searcher instance. For inner searchers, the id must be unique inside the search chain. For outer searchers, the id must be unique. An inner searcher is not permitted to have the same id as an outer searcher. | +| **class** | optional | | | A component specification containing the name of the class to instantiate to create the searcher instance. If missing, copied from id | +| **bundle** | optional | | | A component specification containing the bundle symbolic name and version used to select the bundle: The name in `<artifactId>` in pom.xml. The class is loaded from this bundle. If no bundle is specified, it defaults to the bundle containing the searchers bundled with Vespa. | +| **provides** | optional | | | A space-separated list of names that represents what this searcher produces. For more information on provides, before and after, see [chained components](/en/applications/chaining) | +| **before** | optional | | | A space-separated list of phase or provided names. Phases or searchers providing these names will be placed later in the search chain than this searcher | +| **after** | optional | | | A space-separated list of phase or provided names. Phases or searchers providing these names will be placed earlier in the search chain than this searcher | + +Example: + +```xml +<searcher id="componentId" + class="className:versionSpecification" + bundle="the name in <artifactId> in pom.xml" /> +``` + +Searcher reference: + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **idref** | required | string | | Reference to a searcher definition | + +Example: + +```xml +<searcher idref="componentId" /> +``` + +## federation + +A federation is a [searcher](/en/applications/searchers#searchers) - see above. This element implements *federation* - it defines a searcher instance that sends each query to a set of search chains in parallel and combines the results. Read the [federation guide](/en/querying/federation) to learn more and find examples for use of federation and its children [source](#source), [source-set](#source-set) and [target-selector](#target-selector), as well as [provider](#provider). + +```xml +<federation id="componentId"> + <source idref="componentSpecification" /> + <target-selector /> +</federation> +``` + +## target-selector + +Specifies a component that should be used to select search chains to federate to. This component must inherit from com.yahoo.search.federation.selection.TargetSelector. See [component](/en/reference/applications/services/container#component) for attributes and subelements. + +## source-set + +Used to duplicate the sources of e.g. the built-in federation searcher: + +```xml +<federation id="combinator"> + <source-set inherits="default" /> + … +</federation> +``` + +## source + +Reference to a source that should be used by the enclosing federation searcher. Child element [federationoptions](#federationoptions) is optional. + +```xml +<source idref="componentSpecification"> + <federationoptions/> +</source> +``` + +## federationoptions + +Contained in [source](#source) or [provider](#provider). Specifies *how* a federation searcher should federate to a given search chain. If a federation options A *overrides* another federation options B, the result is a new federation options containing: + +- all the options in B not present in A +- all the options in A + +When federating to a source or provider, the federation searcher per default uses the federation options from the search chain. If a [source reference](#source-reference) contains federation options, it overrides the options of the search chain when used from the enclosing federation searcher. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **timeout** | optional | number | | The minimum number of seconds or milliseconds (if ms is present) the federation searcher waits for the federated search chain executing the query | +| **requestTimeout** | optional | number | | The minimum number of seconds or milliseconds (if ms is present) the search chain executing the query should continue execution. In some cases it is useful to set this higher than the timeout, such that a chain can keep waiting for requested data longer than the query is waiting for the chain. This allows queries to populate caches within the search chain even though populating the caches requires waiting longer than the query timeout | +| **optional** | optional | true/false | false | Determines if the federation searcher should wait for this search chain at all. Normally, it only waits for mandatory (i.e. not optional) search chains, and when they are done, cancels the remaining search chains that are not finished. If all the search chains federated to are optional, all of them will be treated as mandatory. All search chains are per default mandatory | + +Example: + +```xml +<federationoptions timeout="2.0" requestTimeout="2500ms" optional="true" /> +``` + +## renderer + +The definition of a [search result renderer](/en/applications/result-renderers). + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | The component ID | +| **class** | optional | string | | The class of the component, defaults to id | +| **bundle** | optional | string | | The bundle to load the component from: The name in `<artifactId>` in your pom.xml. If no bundle is given, the bundle containing renderers provided by Vespa is used. | + +Example: + +```xml +<renderer id="componentId" + class="className:versionSpecification" + bundle="the name in <artifactId> in pom.xml" /> +``` + +## significance + +Contained in [searcher](#searcher). Specifies one or more global significance [models](#model). + +```xml +<significance> + <model model-id="significance-en-wikipedia-v1"/> + <model url="https://some/uri/my-model.model.multilingual.json"/> + <model path="models/my-model.no.json.zst"/> +</significance> +``` + +The models are either provided by *Vespa* or generated with [vespa-significance tool](/en/reference/operations/self-managed/tools#vespa-significance). The order determines model precedence - with the last element having the highest priority. To use these models, schema needs to [enable significance models in the rank-profile](/en/reference/schemas/schemas#significance). + +Sub-elements: + +- [model](#model) (required, one or more) + +## model + +Contained in [significance](#significance). Specifies [global significance model](/en/ranking/significance#global-significance-model). Models are identified by `model-id` or by providing `url` or `path` to a model file in the application package. + +Models with `model-id` are provided by *Vespa* and listed [here](/en/rag/model-hub#significance-models). Example with `model-id`: + +```xml +<model model-id="significance-en-wikipedia-v1"/> +``` + +A model specified with `url` and `path` are JSON files, which can be also compressed with [zstandard](https://facebook.github.io/zstd/). Model files can be generated using [vespa-significance tool](/en/reference/operations/self-managed/tools#vespa-significance). Example with `url`: + +```xml +<model url="https://some/uri/mymodel.multilingual.json"/> +``` + +Models with `path` should be placed in the application package. The path is relative to the application package root. Example with `path`: + +```xml +<model path="models/mymodel.no.json.zst"/> +``` + +## chain + +Specifies how a search chain should be instantiated, and how the contained searchers should be ordered. Refer to the [chain reference](/en/reference/applications/services/processing#chain) for attributes and child elements. Chains can [inherit](/en/reference/applications/services/processing#inherits) searchers from other chains and use [phases](/en/reference/applications/services/processing#phase) for ordering. Note that [provider](#provider) and [source](#source) elements are also chains. Specify a search chain in a query using [searchChain](/en/reference/api/query#searchchain). + +Example which inherits from the built-in *vespa* chain so that the searcher can dispatch queries to the content clusters: + +```xml +<chain id="common" inherits="vespa"> + <searcher class="com.yahoo.vespatest.ExtraHitSearcher" id="CommonSearcher" bundle="the name in <artifactId> in your pom.xml" > + <config name="vespatest.extra-hit"> + <exampleString>A searcher for ...</exampleString> + </config> + </searcher> +</chain> +``` + +Optional sub-elements: + +- searcher or federation (one or more), either a reference or definition. If the name given for a searcher matches an *outer searcher*, it is a *searcher reference*. Otherwise, it is a *searcher definition*. If it is a searcher definition, it is also an implicit searcher reference saying: use *exactly* this searcher. All these searcher elements must have different name. +- [phase](/en/reference/applications/services/processing#phase) (one or more). +- [config](/en/reference/applications/config-files#generic-configuration-in-services-xml) (one or more - will apply to all *inner* searchers in this search chain, unless overridden by individual inner searchers). + +You can put search config in separate files in a directory under the application package using [include](/en/reference/applications/services/container#include). Each file must contain one `<search>` element like above. Vespa behaves as if each chain configured within was "inline" in `services.xml`. This is handy when multiple developers need to deploy individual search chains, say in different packages. + +<Note> +**Note:** + +If using multiple container clusters, the modular search chains will be available in all the clusters. +</Note> + +Each searcher reference must match the *type* of the searcher definition. So for example the searcher reference *federation idref="myId"* must match an outer searcher defined as *federation id="myId"*, not *searcher id="myId"*. + +## provider + +A provider is a search chain responsible for talking to an external service. Everything covered in [chain](#chain) is also valid for providers. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **id** | required | string | | ID | +| **excludes** | optional | | | | +| **type** | optional | local | | Determines which searchers are implicitly added to this search chain to talk to the external service. | + +### local provider + +Local providers are providers with the type set to *local*, accessing a local Vespa cluster (i.e. a content cluster in the same application). + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **cluster** | required | string | | The name of the local cluster. | + +```xml +<provider id="music" cluster="music" type="local" excludes="com.yahoo.prelude.querytransform.StemmingSearcher" /> +``` + +## source + +Defines a source search chain and an associated source. + +```xml +<provider id="providerA"> + <source id="commonSource"> + <searcher id="com.yahoo.example.AddHitSearcher" bundle="the name in <artifactId> in your pom.xml" > + <config name="vespatest.hit-title"> + <hitTitle>providerA</hitTitle> + </config> + </searcher> + </source> +</provider> +``` + +The component id specified is the id of the associated source. The associated source consists of all the source search chains with the same source name. + +Only a single source search chain can specify the source name using the "id" attribute. This search chain is called the *leader*. The other source search chains must specify the source name using the "idref" attribute. The latter search chains are called participants. + +A source can be used for *federation*. When federating to a source, the leader search chain is normally used. To use one of the participant search chains, the following query parameter must be set: source.*sourceId*.provider.*providerId*. + +The id of the source search chain is *sourceId@providerId*. This search chain automatically inherits from the enclosing provider. It also automatically inherits the federation options of the enclosing provider. If the source contains federation options, they override the inherited ones. In all other respects, this search chain behaves like any other search chain. + +## threadpool + +Specifies configuration for the thread pool for the jdisc search handler. All parameters are relative to the number of CPU cores—see the [container tuning example](/en/performance/container-tuning#container-worker-threads-example). This thread pool also supports the optional `max` attribute, which lets the pool grow up to `max * vCPU` threads under load before shrinking back after 5 seconds of idleness. Requests are rejected once the allowed number of threads is reached, all are busy, and the queue is full. + +Optional sub-elements: + +- [threads](#threadpool-threads) +- [queue](#threadpool-queue) + +### threads + +The number of permanent threads relative to number of vCPU cores. Default value is `10`. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| **max** | optional | number | equal to `<threads>` | The maximum number of threads relative to vCPU cores. Value must be greater than or equal to `<threads>`. | + +### queue + +The size of the request queue relative to effective number of threads. Specify `0` to disable queuing. Default value is `40`. diff --git a/mintlify-docs/en/reference/applications/services/services.mdx b/mintlify-docs/en/reference/applications/services/services.mdx new file mode 100644 index 0000000000..d16fa3c11b --- /dev/null +++ b/mintlify-docs/en/reference/applications/services/services.mdx @@ -0,0 +1,134 @@ +--- +title: "services.xml" +--- + + +*services.xml* specifies the clusters an application should have and their capabilities. It is placed in the root of the [application package](/en/basics/applications). + +Elements: + +<Card> +[services \[version\]](#services)<br/> +   [container   \[version\]](/en/reference/applications/services/container) - specifies a container cluster<br/> +   [content    \[version\]](/en/reference/applications/services/content) - specifies a content cluster<br/> +   [admin      \[version\]](/en/reference/applications/services/admin) - control plane configuration (rarely needed)<br/> +   [routing     \[version\] - how content should be routed (rarely needed)](/en/writing/document-routing#routing-services)<br/> +</Card> + +## `<services>` + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| version | required | number | | 1.0 in this version of Vespa | + +Optional subelements (one or more of *container* or *content* is required): + +- [`<container>`](/en/reference/applications/services/container) +- [`<content>`](/en/reference/applications/services/content) +- [`<admin>`](/en/reference/applications/services/admin) +- [`<routing>`](/en/writing/document-routing#routing-services) + +The rest of this document describes tags that are used within multiple services tags. + +## `<nodes>` + +The *nodes* element configures the hardware resources of a cluster, and so is used in both container and content clusters. This tag works differently on Vespa Cloud and self-managed instances: + +- Vespa Cloud: The number of nodes are specified by a *count* attribute, and the resources of each node by a [resource](#resources) child element. +- Self-managed: *nodes* have a [node](#node) child element for each node, A node referred to in *services.xml* must be defined in [hosts.xml](/en/reference/applications/hosts) using *hostalias*. + +It is possible to specify both to make an application package work in both environments, and it is always possible to deploy either type for development on the other: When the nodes tag has Vespa Cloud content it is interpreted as a single-node cluster in a self-hosted environment and vice versa. + +| Attribute | type | Default | Description | +| --- | --- | --- | --- | +| **count** | integer or range | | Vespa Cloud: The number of nodes of the cluster. | +| **exclusive** | boolean | false | Optional. Vespa Cloud: If true these nodes will never be placed on shared hosts even when this would otherwise be allowed (which is only for content nodes in some environments). When nodes are allocated exclusively, the resources must match the resources of the host exactly. | +| **groups** | integer or range | | Vespa Cloud content nodes only, optional: Integer or range. Sets the number of groups into which content nodes should be divided. Each group will have an equal share of the nodes, and one or more complete copies of the corpus and index, and each query will be routed to just one group - see [grouped distribution](/en/content/elasticity#grouped-distribution). This allows scaling to a higher query load than is possible with just a single group. | +| **group-size** | integer or range | | Vespa Cloud content nodes only, optional: Integer or range where either value can be skipped (replaced by an empty string) to create a one-sided limit. This can be set as an alternative to explicitly setting `groups`: The group sizes used will always be within these limits (inclusive), for any `count`. | + +If neither *groups* nor *group-size* is set, all nodes belong to a single group. Read more in [topology](/en/performance/topology-and-resizing). + +Ranges are expressed by the syntax `[lower-limit, upper-limit]`; Both limits are inclusive. Any value set as a range will be [autoscaled](/en/operations/autoscaling). + +## `<resources>` + +Under [nodes](#nodes) on Vespa Cloud: Specifies the resources each node in the cluster should have. + +The resources must match a node flavor in [AWS](/en/performance/instance-types/aws-instance-types), [GCP](/en/performance/instance-types/gcp-instance-types) [Azure](/en/performance/instance-types/azure-instance-types), depending on where you are deploying. Exception: If you use remote disk, you can specify any number lower than the max size. + +**Subelements:** [`<gpu>`](#gpu) + +| Attribute | type | Default | Description | +| --- | --- | --- | --- | +| **vcpu** | float or range | 2 | CPU (virtual threads) | +| **memory** | float or range, each followed by a byte unit, such as "Gb" | 8 Gb in container clusters, 16 Gb in content clusters | Memory | +| **disk** | float or range, each followed by a byte unit, such as "Gb" | 50 in container clusters, 300 in content clusters | Disk space. To fit core dumps/heap dumps, the disk space should be larger than 3 x memory size for content nodes, 2 x memory size for container nodes. If disk size is not explicitly specified, Vespa Cloud chooses a default disk size. The default may be automatically increased to satisfy the minimum disk-to-memory ratio. When both disk and memory are explicitly specified, Vespa Cloud enforces the same minimum ratios. | +| **storage-type** | string (enum) | `any` | The type of storage to use. This is useful to specify local storage when network storage provides insufficient io operations or too noisy io performance:<br/><br/> • `local`: Node-local storage is required. <br/>• `remote`: Network storage must be used.<br/> • `any`: Both remote or local storage may be used. | +| **disk-speed** | string (enum) | `fast` | The required disk speed category:<br/><br/> • `fast`: SSD-like disk speed is required<br/> • `slow`: This is sized for spinning disk speed <br/>• `any`: Performance does not depend on disk speed (often suitable for container clusters). | +| **architecture** | string (enum) | `any` | Node CPU architecture:<br/><br/> • `x86_64` <br/>• `arm64`<br/> • `any`: Use any of the available architectures. | + +Ranges are expressed by the syntax `[lower-limit, upper-limit]`; Both limits are inclusive. Any value set as a range will be [autoscaled](/en/operations/autoscaling). + +## `<node>` + +Under [nodes](#nodes) on self-managed systems: Specifies a node that should be a member in the cluster. + +| Attribute | Required | Value | Default | Description | +| --- | --- | --- | --- | --- | +| hostalias | required | string | | a host name which must be mapped to a full hostname in [hosts.xml](/en/reference/applications/hosts) | + +## `<gpu>` + +Under [resources](#resources) on Vespa Cloud: Declares GPU resources to provision. + +Limitations: + +- Available in AWS zones only +- Valid for container clusters only + +| Attribute | type | Description | +| --- | --- | --- | +| **count** | integer | Number of GPUs | +| **memory** | integer, followed by a byte unit, such as "Gb" | Amount of memory per GPU. Total amount of GPU memory available is this number multiplied by `count`. | + +Example: + +```xml +<nodes count="2"> + <resources vcpu="4" memory="16Gb" disk="125Gb"> + <gpu count="1" memory="16Gb"/> + </resources> +</nodes> +``` + +## Zone-specific configuration + +Use *deployment variants* to express configuration like: + +```xml +<nodes deploy:environment="prod" + deploy:region="aws-use1-az4" + count="20"> + <resources vcpu="4" memory="16Gb" disk="125Gb"/> +</nodes> +``` + +Refer to [deployment variants](/en/operations/deployment-variants) for details and more options. + +## Generic configuration using `<config>` + +Most elements in *services.xml* accept a sub-element named *config*. *config* elements can be included on different levels in the XML structure and the lower-level ones will override values in the higher-level ones (example below). The *config* element must include the attribute *name*, which gives the full name of the configuration option in question, including the namespace. The name can either refer to configuration definitions that are shipped with Vespa or ones that are part of the [application package](../config-files.html). For a complete example on generic configuration see the [application package](/en/reference/applications/config-files#generic-configuration-in-services-xml) reference. + +```xml +<container id="default" version="1.0"> + <handler id="com.yahoo.vespatest.ConfiguredHandler"> + <config name="vespatest.response"> + <response>configured string</response> + </config> + </handler> +</container> +``` + +## Modular Configuration + +Some features are configurable using XML files in subdirectories of the application package. This means that the configuration found in these XML files will be used as if it was inlined in *services.xml*. This is supported for [search chains](/en/reference/applications/services/search#chain), [docproc chains](/en/reference/applications/services/docproc) and [routing tables](/en/writing/document-routing#routing-services). diff --git a/mintlify-docs/en/reference/applications/testing-java.mdx b/mintlify-docs/en/reference/applications/testing-java.mdx new file mode 100644 index 0000000000..643fd6b5f5 --- /dev/null +++ b/mintlify-docs/en/reference/applications/testing-java.mdx @@ -0,0 +1,51 @@ +--- +title: "Testing with Java JUnit tests" +sidebarTitle: "System test (Java)" +--- + + +This is the Vespa Testing reference for [Vespa application system tests](/en/applications/testing) written in Java, as JUnit 5 unit test. + +These tests verify the behaviour of a Vespa application by using its HTTP interfaces. To write tests without Java, see [basic HTTP test reference](/en/reference/applications/testing). + +See the [testing guide](/en/applications/testing) for examples of how to run the tests. + +## Test suites + +The [testing documentation](/en/applications/testing) defines three test scenarios, comprised of four test code categories. The *system test framework* in [com.yahoo.vespa:tenant-cd-api](https://search.maven.org/artifact/com.yahoo.vespa/tenant-cd-api) uses Java annotations to declare what category a JUnit test class belongs to. To run tests with Maven belonging to a specific category, a JUnit 5 test *tag* must be specified: + +```sh +$ mvn test -D test.categories=system -D vespa.test.config=/path-to/test-config.json +``` + +| Category | Annotation | JUnit tag | Description | +| --- | --- | --- | --- | +| System test | @SystemTest | system | Independent, functional tests | +| Staging setup | @StagingSetup | staging-setup | Set state before upgrade | +| Staging test | @StagingTest | staging | Verify state after upgrade | +| Production test | @ProductionTest | production | Verify domain specific metrics | + +For an example including system and staging tests, check out the [sample application test suite](https://github.com/vespa-cloud/examples/tree/main/CI-CD/production-deployment-with-tests-java). + +## TestNG + +Combining Vespa JUnit 5 test suites with unit tests in TestNG is possible. You'll need to explicitly configure Maven's surefire plugin to enable integration for both frameworks. To execute the Vespa test suites specify `-D test.categories=[tag]`, where *\[tag\]* is one of the values listed in [Test suites](#test-suites). + +```xml +<plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-surefire-plugin</artifactId> + <dependencies> + <dependency> + <groupId>org.apache.maven.surefire</groupId> + <artifactId>surefire-junit-platform</artifactId> + <version>${surefire.vespa.tenant.version}</version> + </dependency> + <dependency> + <groupId>org.apache.maven.surefire</groupId> + <artifactId>surefire-testng</artifactId> + <version>${surefire.vespa.tenant.version}</version> + </dependency> + </dependencies> +</plugin> +``` diff --git a/mintlify-docs/en/reference/applications/testing.mdx b/mintlify-docs/en/reference/applications/testing.mdx new file mode 100644 index 0000000000..ef55b355cc --- /dev/null +++ b/mintlify-docs/en/reference/applications/testing.mdx @@ -0,0 +1,152 @@ +--- +title: "Basic HTTP testing" +sidebarTitle: "System test" +--- + +This is the Vespa Testing reference for basic HTTP tests, used to write [Vespa application system tests](/en/applications/testing). + +These tests verify the behaviour of a Vespa application by using its HTTP interfaces. Basic HTTP tests are written in JSON; to write more advanced tests, see the [Java testing reference](/en/reference/applications/testing-java). + +See the [testing guide](/en/applications/testing) for examples of how to run the tests. + +## Test suites + +The [testing documentation](/en/applications/testing) defines three test scenarios, comprised of four test code categories. For basic HTTP tests, the category of a test is defined by its placement in the application tests directory: + +| Category | Directory | Description | +| --- | --- | --- | +| System test | tests/system-test/ | Independent, functional tests | +| Staging setup | tests/staging-setup/ | Set state before upgrade | +| Staging test | tests/staging-test/ | Verify state after upgrade | +| Production test | tests/production-test/ | Verify domain specific metrics | + +<Note> +**Note:** + +If the application package has Java code, the `tests` directory is `src/test/application/tests` +</Note> + +Each test is described by a JSON file, and may include other files using relative paths: + +```bash +$ ls -1 tests/*/* + +tests/production-test/metrics-test.json +tests/staging-setup/set-up-old-documents.json +tests/staging-test/verify-search-still-works.json +tests/system-test/data/document.json +tests/system-test/feed-and-search-test.json +tests/system-test/ranking-test.json +``` + +For an example with actual system and staging tests, check out a Vespa Cloud [sample test suite](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/CI-CD). Since production tests are highly application-specific, this suite has none, but such a test could be: + +```json +{ + "steps": [ + { + "request": { + "uri": "https://my.external.service/metrics/?query=customer-engagement" + } + } + ] +} +``` + +## Test file structure + +Each `.json` file directly under any of the directories listed above describes one test. Each test consists of a series of steps, and each step specifies an HTTP request to run and some assertions about the response to obtain. Some additional properties may also be specified on both the test and step levels. A full example, with `//` comments: + +```json expandable +{ + "name": "my test", + "defaults": { + "cluster": "default", + "parameters": { + "timeout": "1.618s" + } + }, + "steps": [ + { + "name": "clear existing documents", + "request": { + "method": "DELETE", + "uri": "/document/v1/", + "parameters": { + "cluster": "music", + "selection": "true" + } + } + }, + { + "name": "feed foo", + "request": { + "method": "POST", + // should contain payload as expected by /document/v1/ + "body": "foo/body.json", + // specify only the path and query for Vespa requests + "uri": "/document/v1/test/music/docid/foo?timeout=8s", + // JSON object file; merged with query from "uri" + "parameters": "foo/parameters.json" + } + // no response spec: just assert code 200 + }, + { + "name": "query for foo", + "request": { + // no "uri": defaults to "/search/" + "parameters": { + "query": "artist: foo" + } + }, + "response": { + "body": { + "root": { + "children": [ + // assert "children" has a single element ... + { + // ... which has the field "fields" ... + "fields": { + // ... where the field "artist" is "Foo Fighters" ... + "artist": "Foo Fighters" + }, + // ... and the field "relevance" close to 0.381862383599 + "relevance": 0.381862383599 + } + ] + } + } + } + } + ] +} +``` + +### Test JSON specification + +A full list of fields, with description: + +| Name | Parent | Type | Default | Description | +| --- | --- | --- | --- | --- | +| name | root step | string | file name, step *n* | Name used for display purposes in the test report. The file name is used by default for the test, while the 1-indexed "step n" is used for steps. | +| defaults | root | object | | Default settings for all steps in this test. May be overridden in each step. | +| steps | root | array | | The non-empty list of steps that constitute this test. | +| request | step | object | | A specification of a request to send, to Vespa, or to an external service. | +| cluster | defaults request | string | | The name of the Vespa cluster to send a request to, as specified in [services.xml](/en/reference/applications/services/services). If this is not specified, and the application has a single container cluster, this is used. | +| method | request | string | "GET" | The HTTP method to use for a request. | +| uri | request | string | "/search/" | When this is path + (encoded) query, the host is determined by the specified cluster; otherwise, it must be an absolute URI (with scheme), and its host is used. Query parameters specified here override those specified in the defaults. | +| parameters | defaults request | string object | | HTTP request query parameters. The values should not be encoded. These are merged with parameters from the specified URI, and override those specified in the defaults. If the value is a string, it must be a relative file reference to a parameters object. | +| body | request response | string object | | The body for a request, or the partial body (see [matching](#json-matching)) for a response. If the value is a string, it must be a relative file reference to a JSON object to be used in its place. | +| response | step | object | | A specification for assertions to make on the body of the HTTP response obtained by executing the HTTP request in the same step. | +| code | response | number | 200 | The status code the response should have. | + +### JSON matching + +All requests and responses must be in JSON format. The tests allow simple JSON verification, by describing *what should be present* in the actual response. This is done by specifying a JSON structure, a *template*, for each response, and requiring each field present in the template to match fields in the actual response. Unmatched fields result in a test failure, with the following rules: + +- Objects must contain all listed fields, and may also contain unlisted ones. +- Arrays must match element-by-element. +- Numbers must match within precision `1e-9`. +- All other values must match exactly. + +Note that the empty object `{ }` matches any other object, and can be used to fill elements of an array that require no further validation. diff --git a/mintlify-docs/en/reference/applications/validation-overrides.mdx b/mintlify-docs/en/reference/applications/validation-overrides.mdx new file mode 100644 index 0000000000..a627fe2b30 --- /dev/null +++ b/mintlify-docs/en/reference/applications/validation-overrides.mdx @@ -0,0 +1,44 @@ +--- +title: "validation-overrides.xml" +sidebarTitle: "" +--- + + +*validation-overrides.xml* is added to the root of an [application package](/en/reference/applications/application-packages) (i.e. next to [services.xml](/en/reference/applications/services/services)) to allow a deployment that otherwise fails to validate to proceed. The message will explain what the validation concerns and how to add a validation override + +Validations protect against inadvertently corrupting a production instance. Overriding them may be useful e.g. if the application is not in production yet or if you think the consequences of inconsistencies or loss of the data in a particular field are fine. + +Read more about schema changes in the [schema reference](/en/reference/schemas/schemas#modifying-schemas). + +## Structure + +```xml +<validation-overrides> + <allow until="iso-8601-date" comment="Optional note">validation-id</allow> +</validation-overrides> +``` + +Any number of `allow` tags is permissible. Example: + +```xml +<validation-overrides> + <allow + until="2025-01-31" + comment="Reduce to needed cluster size after benchmarking">resources-reduction</allow> + <allow + until="2025-02-03">field-type-change</allow> +</validation-overrides> +``` + +## allow + +An `allow` tag disables a particular validation for a limited time and contains a single validation id. `allow` tags with unknown ids are ignored. + +| Attribute | Mandatory | Value | +| --- | --- | --- | +| until | Yes | The last day this change is allowed, as a ISO-8601-format date in UTC, e.g. 2016-01-30. Dates may at most be 30 days in the future, but should be as close to now as possible for safety, while allowing time for review and propagation to all deployed zones. `allow`\-tags with dates in the past are ignored. | +| comment | No | Text explaining the reason for the change to humans. | + +## List of validation overrides + +See [ValidationId.java](https://github.com/vespa-engine/vespa/blob/master/config-model-api/src/main/java/com/yahoo/config/application/api/ValidationId.java) for a complete list of validation overrides. diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa.mdx new file mode 100644 index 0000000000..ba43db76bf --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa.mdx @@ -0,0 +1,60 @@ +--- +title: "vespa" +description: "The command-line tool for Vespa.ai" +--- + +### Synopsis + +The command-line tool for Vespa.ai. + +Use it on Vespa instances running locally, remotely or in Vespa Cloud - use a token for Vespa Cloud access: + +```bash +$ export VESPA_CLI_DATA_PLANE_TOKEN='value-of-token' +``` + +To get started, follow [/en/basics/deploy-an-application.html](/en/basics/deploy-an-application) + +The complete Vespa documentation is available at https://docs.vespa.ai. + +For detailed description of flags and configuration, see 'vespa help config'. + + +```bash +vespa [flags] +``` + +### Options + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -h, --help help for vespa + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa activate](/en/reference/clients/vespa-cli/vespa_activate) - Activate (deploy) a previously prepared application package +* [vespa auth](/en/reference/clients/vespa-cli/vespa_auth) - Manage Vespa Cloud credentials +* [vespa clone](/en/reference/clients/vespa-cli/vespa_clone) - Create files and directory structure from a Vespa sample application +* [vespa config](/en/reference/clients/vespa-cli/vespa_config) - Manage persistent values for global flags +* [vespa curl](/en/reference/clients/vespa-cli/vespa_curl) - Access Vespa directly using curl +* [vespa deploy](/en/reference/clients/vespa-cli/vespa_deploy) - Deploy (prepare and activate) an application package +* [vespa destroy](/en/reference/clients/vespa-cli/vespa_destroy) - Remove a deployed Vespa application and its data +* [vespa document](/en/reference/clients/vespa-cli/vespa_document) - Issue a single document operation to Vespa +* [vespa feed](/en/reference/clients/vespa-cli/vespa_feed) - Feed multiple document operations to Vespa +* [vespa fetch](/en/reference/clients/vespa-cli/vespa_fetch) - Download a deployed application package +* [vespa log](/en/reference/clients/vespa-cli/vespa_log) - Show the Vespa log +* [vespa prepare](/en/reference/clients/vespa-cli/vespa_prepare) - Prepare an application package for activation +* [vespa prod](/en/reference/clients/vespa-cli/vespa_prod) - Deploy an application package to production in Vespa Cloud +* [vespa query](/en/reference/clients/vespa-cli/vespa_query) - Issue a query to Vespa +* [vespa status](/en/reference/clients/vespa-cli/vespa_status) - Show Vespa endpoints and status +* [vespa test](/en/reference/clients/vespa-cli/vespa_test) - Run a test suite, or a single test +* [vespa version](/en/reference/clients/vespa-cli/vespa_version) - Show current CLI version and check for updates +* [vespa visit](/en/reference/clients/vespa-cli/vespa_visit) - Retrieve and print all documents from Vespa + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_activate.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_activate.mdx new file mode 100644 index 0000000000..a7d2e8cb7e --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_activate.mdx @@ -0,0 +1,32 @@ +--- +title: "vespa activate" +description: "Activate (deploy) a previously prepared application package" +--- + +```bash +vespa activate [flags] +``` + +### Options + +```bash + -h, --help help for activate + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth.mdx new file mode 100644 index 0000000000..774d23948e --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth.mdx @@ -0,0 +1,40 @@ +--- +title: "vespa auth" +description: "Manage Vespa Cloud credentials" +--- + +### Synopsis + +Manage Vespa Cloud credentials. + +```bash +vespa auth [flags] +``` + +### Options + +```bash + -h, --help help for auth +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai +* [vespa auth api-key](/en/reference/clients/vespa-cli/vespa_auth_api-key) - Create a new developer key for headless authentication with Vespa Cloud control plane +* [vespa auth cert](/en/reference/clients/vespa-cli/vespa_auth_cert) - Create a new self-signed certificate for authentication with Vespa Cloud data plane +* [vespa auth login](/en/reference/clients/vespa-cli/vespa_auth_login) - Authenticate Vespa CLI with Vespa Cloud control plane. This is preferred over api-key for interactive use +* [vespa auth logout](/en/reference/clients/vespa-cli/vespa_auth_logout) - Sign out of Vespa Cloud +* [vespa auth show](/en/reference/clients/vespa-cli/vespa_auth_show) - Show authenticated user + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_api-key.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_api-key.mdx new file mode 100644 index 0000000000..6dbb1eacaa --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_api-key.mdx @@ -0,0 +1,60 @@ +--- +title: "vespa auth api-key" +description: "Create a new developer key for headless authentication with Vespa Cloud control plane" +--- + +### Synopsis + +Create a new developer key for headless authentication with Vespa Cloud control plane + +A developer key is a tenant-wide secret, intended for headless communication with the Vespa Cloud control plane. For example when deploying from a continuous integration system. + +The developer key will be stored in the Vespa CLI home directory (see 'vespa help config'). Other commands will then automatically load the developer key as necessary. + +It's possible to override the developer key used through environment variables. This can be useful in continuous integration systems. + +Example of setting the key in-line: + + export VESPA_CLI_API_KEY="my api key" + +Example of loading the key from a custom path: + + export VESPA_CLI_API_KEY_FILE=/path/to/api-key + +Note that when overriding the developer key through environment variables, that key will always be used. It's not possible to specify a tenant-specific key through the environment. + +See [/en/security/guide.html](/en/security/guide) for more details about developer keys. + +```bash +vespa auth api-key [flags] +``` + +### Examples + +```bash +$ vespa auth api-key -a my-tenant.my-app.my-instance +``` + +### Options + +```bash + -f, --force Force overwrite of existing developer key + -h, --help help for api-key +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa auth](/en/reference/clients/vespa-cli/vespa_auth) - Manage Vespa Cloud credentials + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_cert.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_cert.mdx new file mode 100644 index 0000000000..8641303e63 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_cert.mdx @@ -0,0 +1,73 @@ +--- +title: "vespa auth cert" +description: "Create a new self-signed certificate for authentication with Vespa Cloud data plane" +--- + +### Synopsis + +Create a new self-signed certificate for authentication with Vespa Cloud data plane. + +The private key and certificate will be stored in the Vespa CLI home directory (see 'vespa help config'). Other commands will then automatically load the certificate as necessary. The certificate will be added to your application package specified as an argument to this command (default '.'). + +It's possible to override the private key and certificate used through environment variables. This can be useful in continuous integration systems. + +It's also possible override the CA certificate which can be useful when using self-signed certificates with a self-hosted Vespa service. See [/en/security/mtls.html](/en/security/mtls) for more information. + +Example of setting the CA certificate, certificate and key in-line: + + export VESPA_CLI_DATA_PLANE_CA_CERT="my CA cert" + export VESPA_CLI_DATA_PLANE_CERT="my cert" + export VESPA_CLI_DATA_PLANE_KEY="my private key" + +Example of loading CA certificate, certificate and key from custom paths: + + export VESPA_CLI_DATA_PLANE_CA_CERT_FILE=/path/to/cacert + export VESPA_CLI_DATA_PLANE_CERT_FILE=/path/to/cert + export VESPA_CLI_DATA_PLANE_KEY_FILE=/path/to/key + +Example of disabling verification of the server's certificate chain and +hostname: + + export VESPA_CLI_DATA_PLANE_TRUST_ALL=true + +Note that when overriding key pair through environment variables, that key pair will always be used for all applications. It's not possible to specify an application-specific key. + +See [/en/security/guide.html](/en/security/guide) for more details. + +```bash +vespa auth cert [flags] +``` + +### Examples + +```bash +$ vespa auth cert +$ vespa auth cert -a my-tenant.my-app.my-instance +$ vespa auth cert -a my-tenant.my-app.my-instance path/to/application/package +``` + +### Options + +```bash + -f, --force Force overwrite of existing certificate and private key + -h, --help help for cert + -N, --no-add Do not add certificate to the application package +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa auth](/en/reference/clients/vespa-cli/vespa_auth) - Manage Vespa Cloud credentials +* [vespa auth cert add](/en/reference/clients/vespa-cli/vespa_auth_cert_add) - Add certificate to application package + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_cert_add.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_cert_add.mdx new file mode 100644 index 0000000000..d3f1547db3 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_cert_add.mdx @@ -0,0 +1,47 @@ +--- +title: "vespa auth cert add" +description: "Add certificate to application package" +--- + +### Synopsis + +Add an existing self-signed certificate for Vespa Cloud deployment to your application package. + +The certificate will be loaded from the Vespa CLI home directory (see 'vespa help config') by default. + +The location of the application package can be specified as an argument to this command (default '.'). + +```bash +vespa auth cert add [flags] +``` + +### Examples + +```bash +$ vespa auth cert add -a my-tenant.my-app.my-instance +$ vespa auth cert add -a my-tenant.my-app.my-instance path/to/application/package +``` + +### Options + +```bash + -f, --force Force overwrite of existing certificate + -h, --help help for add +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa auth cert](/en/reference/clients/vespa-cli/vespa_auth_cert) - Create a new self-signed certificate for authentication with Vespa Cloud data plane + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_login.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_login.mdx new file mode 100644 index 0000000000..86a5121cc1 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_login.mdx @@ -0,0 +1,47 @@ +--- +title: "vespa auth login" +description: "Authenticate Vespa CLI with Vespa Cloud control plane. This is preferred over api-key for interactive use" +--- + +### Synopsis + +Authenticate Vespa CLI with Vespa Cloud control plane. This is preferred over api-key for interactive use. + +This command runs a browser-based authentication flow for the Vespa Cloud control plane. + +Use --file-storage flag to store the refresh token in unencrypted files instead of the system keyring. This is useful in SSH/CI/Docker environments where keyring access may not be available. + + +```bash +vespa auth login [flags] +``` + +### Examples + +```bash +$ vespa auth login +``` + +### Options + +```bash + --file-storage Use file storage (unencrypted) instead of keyring for storing refresh token + -h, --help help for login +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa auth](/en/reference/clients/vespa-cli/vespa_auth) - Manage Vespa Cloud credentials + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_logout.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_logout.mdx new file mode 100644 index 0000000000..9ac187413a --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_logout.mdx @@ -0,0 +1,37 @@ +--- +title: "vespa auth logout" +description: "Sign out of Vespa Cloud" +--- + +```bash +vespa auth logout [flags] +``` + +### Examples + +```bash +$ vespa auth logout +``` + +### Options + +```bash + -h, --help help for logout +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa auth](/en/reference/clients/vespa-cli/vespa_auth) - Manage Vespa Cloud credentials + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_show.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_show.mdx new file mode 100644 index 0000000000..98af7a06d7 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_auth_show.mdx @@ -0,0 +1,42 @@ +--- +title: "vespa auth show" +description: "Show authenticated user" +--- + +### Synopsis + +Show which user (if any) is authenticated with "auth login" + + +```bash +vespa auth show [flags] +``` + +### Examples + +```bash +$ vespa auth show +``` + +### Options + +```bash + -h, --help help for show +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa auth](/en/reference/clients/vespa-cli/vespa_auth) - Manage Vespa Cloud credentials + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_clone.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_clone.mdx new file mode 100644 index 0000000000..76c6370c26 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_clone.mdx @@ -0,0 +1,47 @@ +--- +title: "vespa clone" +description: "Create files and directory structure from a Vespa sample application" +--- + +### Synopsis + +Create files and directory structure from a Vespa sample application. + +Sample applications are downloaded from https://github.com/vespa-engine/sample-apps. + +By default, sample applications are cached in the user's cache directory. This directory can be overridden by setting the VESPA_CLI_CACHE_DIR environment variable. + +```bash +vespa clone sample-application-path target-directory [flags] +``` + +### Examples + +```bash +$ vespa clone album-recommendation my-app +``` + +### Options + +```bash + -f, --force Ignore cache and force downloading the latest sample application from GitHub + -h, --help help for clone + -l, --list List available sample applications +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_completion.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_completion.mdx new file mode 100644 index 0000000000..2a08136484 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_completion.mdx @@ -0,0 +1,36 @@ +--- +title: "vespa completion" +description: "Generate the autocompletion script for the specified shell" +--- + +### Synopsis + +Generate the autocompletion script for vespa for the specified shell. See each sub-command's help for details on how to use the generated script. + + +### Options + +```bash + -h, --help help for completion +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use (cloud only) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use (cloud only) + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai +* [vespa completion bash](/en/reference/clients/vespa-cli/vespa_completion_bash) - Generate the autocompletion script for bash +* [vespa completion fish](/en/reference/clients/vespa-cli/vespa_completion_fish) - Generate the autocompletion script for fish +* [vespa completion powershell](/en/reference/clients/vespa-cli/vespa_completion_powershell) - Generate the autocompletion script for powershell +* [vespa completion zsh](/en/reference/clients/vespa-cli/vespa_completion_zsh) - Generate the autocompletion script for zsh + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_completion_bash.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_completion_bash.mdx new file mode 100644 index 0000000000..7406fb2510 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_completion_bash.mdx @@ -0,0 +1,57 @@ +--- +title: "vespa completion bash" +description: "Generate the autocompletion script for bash" +--- + +### Synopsis + +Generate the autocompletion script for the bash shell. + +This script depends on the 'bash-completion' package. If it is not installed already, you can install it via your OS's package manager. + +To load completions in your current shell session: + +```bash +source <(vespa completion bash) +``` + +To load completions for every new session, execute once: + +#### Linux: + + vespa completion bash > /etc/bash_completion.d/vespa + +#### macOS: + + vespa completion bash > $(brew --prefix)/etc/bash_completion.d/vespa + +You will need to start a new shell for this setup to take effect. + + +```bash +vespa completion bash +``` + +### Options + +```bash + -h, --help help for bash + --no-descriptions disable completion descriptions +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use (cloud only) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use (cloud only) + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa completion](/en/reference/clients/vespa-cli/vespa_completion) - Generate the autocompletion script for the specified shell + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_completion_fish.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_completion_fish.mdx new file mode 100644 index 0000000000..57491de378 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_completion_fish.mdx @@ -0,0 +1,47 @@ +--- +title: "vespa completion fish" +description: "Generate the autocompletion script for fish" +--- + +### Synopsis + +Generate the autocompletion script for the fish shell. + +To load completions in your current shell session: + + vespa completion fish | source + +To load completions for every new session, execute once: + + vespa completion fish > ~/.config/fish/completions/vespa.fish + +You will need to start a new shell for this setup to take effect. + + +```bash +vespa completion fish [flags] +``` + +### Options + +```bash + -h, --help help for fish + --no-descriptions disable completion descriptions +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use (cloud only) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use (cloud only) + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa completion](/en/reference/clients/vespa-cli/vespa_completion) - Generate the autocompletion script for the specified shell + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_completion_powershell.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_completion_powershell.mdx new file mode 100644 index 0000000000..4c77110f7e --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_completion_powershell.mdx @@ -0,0 +1,43 @@ +--- +title: "vespa completion powershell" +description: "Generate the autocompletion script for powershell" +--- + +### Synopsis + +Generate the autocompletion script for powershell. + +To load completions in your current shell session: + + vespa completion powershell | Out-String | Invoke-Expression + +To load completions for every new session, add the output of the above command to your powershell profile. + + +```bash +vespa completion powershell [flags] +``` + +### Options + +```bash + -h, --help help for powershell + --no-descriptions disable completion descriptions +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use (cloud only) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use (cloud only) + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa completion](/en/reference/clients/vespa-cli/vespa_completion) - Generate the autocompletion script for the specified shell + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_completion_zsh.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_completion_zsh.mdx new file mode 100644 index 0000000000..d3a20d384d --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_completion_zsh.mdx @@ -0,0 +1,59 @@ +--- +title: "vespa completion zsh" +description: "Generate the autocompletion script for zsh" +--- + +### Synopsis + +Generate the autocompletion script for the zsh shell. + +If shell completion is not already enabled in your environment you will need to enable it. You can execute the following once: + + echo "autoload -U compinit; compinit" >> ~/.zshrc + +To load completions in your current shell session: + +```bash +source <(vespa completion zsh) +``` + +To load completions for every new session, execute once: + +#### Linux: + + vespa completion zsh > "${fpath[1]}/_vespa" + +#### macOS: + + vespa completion zsh > $(brew --prefix)/share/zsh/site-functions/_vespa + +You will need to start a new shell for this setup to take effect. + + +```bash +vespa completion zsh [flags] +``` + +### Options + +```bash + -h, --help help for zsh + --no-descriptions disable completion descriptions +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use (cloud only) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use (cloud only) + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa completion](/en/reference/clients/vespa-cli/vespa_completion) - Generate the autocompletion script for the specified shell + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_config.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_config.mdx new file mode 100644 index 0000000000..2816228495 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_config.mdx @@ -0,0 +1,88 @@ +--- +title: "vespa config" +description: "Manage persistent values for global flags" +--- + +### Synopsis + +Manage persistent values for global flags. + +This command allows setting persistent values for the global flags found in Vespa CLI. On future invocations the flag can then be omitted as it is read from the config file instead. + +Configuration is written to $HOME/.vespa by default. This path can be overridden by setting the VESPA_CLI_HOME environment variable. + +When setting an option locally, the configuration is written to .vespa in the working directory. When reading configuration, Vespa CLI searches for .vespa in the current directory and parent directories, allowing you to run commands from subdirectories of your application. This allows you to have separate configuration options per application. + +Vespa CLI chooses the value for a given option in the following order, from most to least preferred: + +1. Flag value specified on the command line +2. Local config value +3. Global config value +4. Default value + +The following global flags/options can be configured: + +application + +Specifies the application ID to manage. It has three parts, separated by dots, with the third part being optional. If the third part is omitted it defaults to "default". This is only relevant for the "cloud" and "hostedtargets. See [/en/learn/tenant-apps-instances.html](/en/learn/tenant-apps-instances) for more details. This has no default value. Examples: tenant1.app1, tenant1.app1.instance1 + +cluster + +Specifies the container cluster to manage. If left empty (default) and the application has only one container cluster, that cluster is chosen automatically. When an application has multiple cluster this must specify a valid cluster name, as specified in services.xml. See [/en/reference/applications/services/container.html](/en/reference/applications/services/container) for more details. + +color + +Controls how Vespa CLI uses colors. Setting this to "auto" (default) enables colors if supported by the terminal, "never" completely disables colors and "always" enables colors unilaterally. + +instance + +Specifies the instance of the application to manage. When specified, this takes precedence over the instance specified as part of the 'application' option. This has no default value and is only relevant for the "cloud" and "hosted" targets. Example: instance2 + +quiet + +Suppress informational output. Errors are still printed. + +target + +Specifies the target to use for commands that interact with a Vespa platform, e.g. vespa deploy or vespa query. Possible values are: + +- local: (default) Connect to a Vespa platform running at localhost. When using this target, container clusters are automatically discovered and are chosen with the cluster option. This assumes that the configserver is available on port 19071 (the default when using the Vespa container image). +- cloud: Connect to Vespa Cloud. When using this target, container clusters are automatically discovered and can be selected with the cluster option. +- hosted: Connect to hosted Vespa (reserved for internal use) +- *url*: Connect to a platform running at given URL. This instructs the command you're running to target a concrete URL. The cluster option cannot be used with this target. + +Authentication is configured automatically for the cloud and hosted targets. To set a custom private key and certificate, e.g. for use with a self-hosted Vespa nstallation configured with mTLS, see the documentation of 'vespa auth cert'. + +zone + +Specifies a custom zone to use when connecting to a Vespa Cloud application. This is only relevant for cloud and hosted targets and defaults to a dev zone. See [/en/operations/zones.html](/en/operations/zones) for available zones. Examples: dev.aws-us-east-1c, dev.gcp-us-central1-f + +```bash +vespa config [flags] +``` + +### Options + +```bash + -h, --help help for config +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai +* [vespa config get](/en/reference/clients/vespa-cli/vespa_config_get) - Show given configuration option, or all configuration options +* [vespa config set](/en/reference/clients/vespa-cli/vespa_config_set) - Set a configuration option. +* [vespa config unset](/en/reference/clients/vespa-cli/vespa_config_unset) - Unset a configuration option. + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_config_get.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_config_get.mdx new file mode 100644 index 0000000000..46525318d8 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_config_get.mdx @@ -0,0 +1,47 @@ +--- +title: "vespa config get" +description: "Show given configuration option, or all configuration options" +--- + +### Synopsis + +Show given configuration option, or all configuration options. + +By default, this command prints the effective configuration for the current application, i.e. it takes into account any local configuration located in [working-directory]/.vespa. + + +```bash +vespa config get [option-name] [flags] +``` + +### Examples + +```bash +$ vespa config get +$ vespa config get target +$ vespa config get --local +``` + +### Options + +```bash + -h, --help help for get + -l, --local Show only local configuration, if any +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa config](/en/reference/clients/vespa-cli/vespa_config) - Manage persistent values for global flags + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_config_set.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_config_set.mdx new file mode 100644 index 0000000000..1031d016a8 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_config_set.mdx @@ -0,0 +1,51 @@ +--- +title: "vespa config set" +description: "Set a configuration option." +--- + +```bash +vespa config set option-name value [flags] +``` + +### Examples + +```bash +# Set the target to Vespa Cloud +$ vespa config set target cloud + +# Set application, without a specific instance. The instance will be named "default" +$ vespa config set application my-tenant.my-application + +# Set application with a specific instance +$ vespa config set application my-tenant.my-application.my-instance + +# Set the instance explicitly. This will take precedence over an instance specified as part of the application option. +$ vespa config set instance other-instance + +# Set an option in local configuration, for the current application only +$ vespa config set --local zone dev.aws-us-east-1c +``` + +### Options + +```bash + -h, --help help for set + -l, --local Write option to local configuration, i.e. for the current application +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa config](/en/reference/clients/vespa-cli/vespa_config) - Manage persistent values for global flags + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_config_unset.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_config_unset.mdx new file mode 100644 index 0000000000..beb4240c49 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_config_unset.mdx @@ -0,0 +1,49 @@ +--- +title: "vespa config unset" +description: "Unset a configuration option." +--- + +### Synopsis + +Unset a configuration option. + +Unsetting a configuration option will reset it to its default value, which may be empty. + + +```bash +vespa config unset option-name [flags] +``` + +### Examples + +```bash +# Reset target to its default value +$ vespa config unset target + +# Stop overriding application option in local config +$ vespa config unset --local application +``` + +### Options + +```bash + -h, --help help for unset + -l, --local Unset option in local configuration, i.e. for the current application +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa config](/en/reference/clients/vespa-cli/vespa_config) - Manage persistent values for global flags + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_curl.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_curl.mdx new file mode 100644 index 0000000000..30ee2984bd --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_curl.mdx @@ -0,0 +1,50 @@ +--- +title: "vespa curl" +description: "Access Vespa directly using curl" +--- + +### Synopsis + +Access Vespa directly using curl. + +Execute curl with the appropriate URL, certificate and private key for your application. Assumes MTLS authentication. + +For a more high-level interface to query and feeding, see the 'query' and 'document' commands. + + +```bash +vespa curl [curl-options] path [flags] +``` + +### Examples + +```bash +$ vespa curl /ApplicationStatus +$ vespa curl -- -X POST -H "Content-Type:application/json" --data-binary @src/test/resources/A-Head-Full-of-Dreams.json /document/v1/namespace/music/docid/1 +$ vespa curl -- -v --data-urlencode "yql=select * from music where album contains 'head'" /search/\?hits=5 +``` + +### Options + +```text + -n, --dry-run Print the curl command that would be executed + -h, --help help for curl + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```text + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_deploy.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_deploy.mdx new file mode 100644 index 0000000000..af103fe2b0 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_deploy.mdx @@ -0,0 +1,59 @@ +--- +title: "vespa deploy" +description: "Deploy (prepare and activate) an application package" +--- + +### Synopsis + +Deploy (prepare and activate) an application package. + +An application package defines a deployable Vespa application. See [/en/reference/applications/application-packages.html](/en/reference/applications/application-packages) for details about the files contained in this package. + +To get started, 'vespa clone' can be used to download a sample application. + +This command deploys an application package. When deploy returns successfully the application package has been validated and activated on config servers. The process of applying it on individual nodes has started but may not have completed. + +If application directory is not specified, it defaults to working directory. + +In Vespa Cloud you may override the Vespa runtime version (--version) for your deployment. This option should only be used if you have a reason for using a specific version. By default, Vespa Cloud chooses a suitable version for you. + + +```bash +vespa deploy [application-directory-or-file] [flags] +``` + +### Examples + +```bash +$ vespa deploy . +$ vespa deploy -t cloud +$ vespa deploy -t cloud -z dev.aws-us-east-1c # -z can be omitted here as this zone is the default +$ vespa deploy -t cloud -z dev.gcp-us-central1-f +``` + +### Options + +```bash + -A, --add-cert Copy certificate of the configured application to the current application package + -h, --help help for deploy + -l, --log-level string Log level for Vespa logs. Must be "error", "warning", "info" or "debug" (default "error") + -V, --version string Override the Vespa runtime version to use in Vespa Cloud + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_destroy.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_destroy.mdx new file mode 100644 index 0000000000..bb305e4196 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_destroy.mdx @@ -0,0 +1,52 @@ +--- +title: "vespa destroy" +description: "Remove a deployed Vespa application and its data" +--- + +### Synopsis + +Remove a deployed Vespa application and its data. + +This command removes the currently deployed application and permanently deletes its data. + +When run interactively, the command will prompt for confirmation before removing the application. When run non-interactively, the command will refuse to remove the application unless the --force option is given. + +This command can only be used to remove non-production deployments, in Vespa Cloud. See [/en/operations/deleting-applications.html](/en/operations/deleting-applications) for how to remove production deployments. + +For other systems, destroy the application by removing the containers in use by the application. For example: https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA#clean-up-after-testing + +```bash +vespa destroy [flags] +``` + +### Examples + +```bash +$ vespa destroy +$ vespa destroy -a mytenant.myapp.myinstance +$ vespa destroy --force +``` + +### Options + +```bash + --force Disable confirmation (default false) + -h, --help help for destroy +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_document.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_document.mdx new file mode 100644 index 0000000000..27ce312477 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_document.mdx @@ -0,0 +1,56 @@ +--- +title: "vespa document" +description: "Issue a single document operation to Vespa" +--- + +### Synopsis + +Issue a single document operation to Vespa. + +The operation must be on the format documented in [/en/reference/schemas/document-json-format#document-operations](/en/reference/schemas/document-json-format#document-operations) + +When this returns successfully, the document is guaranteed to be visible in any subsequent get or query operation. + +To feed with high throughput, [/en/reference/clients/vespa-cli/vespa_feed.html](/en/reference/clients/vespa-cli/vespa_feed) should be used instead of this. + +```bash +vespa document json-file [flags] +``` + +### Examples + +```bash +$ vespa document src/test/resources/A-Head-Full-of-Dreams.json +``` + +### Options + +```bash + -d, --data string Document data to use instead of reading from file or stdin + --header strings Add a header to the HTTP request, on the format 'Header: Value'. This can be specified multiple times + -h, --help help for document + -T, --timeout int Timeout for the document request in seconds (default 60) + -v, --verbose Print the equivalent curl command for the document operation + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai +* [vespa document get](/en/reference/clients/vespa-cli/vespa_document_get) - Gets one or more documents +* [vespa document put](/en/reference/clients/vespa-cli/vespa_document_put) - Writes a document to Vespa +* [vespa document remove](/en/reference/clients/vespa-cli/vespa_document_remove) - Removes a document from Vespa +* [vespa document update](/en/reference/clients/vespa-cli/vespa_document_update) - Modifies some fields of an existing document + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_document_get.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_document_get.mdx new file mode 100644 index 0000000000..bd3350872b --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_document_get.mdx @@ -0,0 +1,45 @@ +--- +title: "vespa document get" +description: "Gets one or more documents" +--- + +```bash +vespa document get id(s) [flags] +``` + +### Examples + +```bash +$ vespa document get id:mynamespace:music::song-1 +$ vespa document get id:mynamespace:music::song-1 id:mynamespace:music::song-2 +``` + +### Options + +```bash + -d, --data string Document data to use instead of reading from file or stdin + --field-set string Fields to include when reading document + --header strings Add a header to the HTTP request, on the format 'Header: Value'. This can be specified multiple times + -h, --help help for get + --ignore-missing Do not treat non-existent document as an error + -T, --timeout int Timeout for the document request in seconds (default 60) + -v, --verbose Print the equivalent curl command for the document operation + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa document](/en/reference/clients/vespa-cli/vespa_document) - Issue a single document operation to Vespa + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_document_put.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_document_put.mdx new file mode 100644 index 0000000000..6dc0546e6a --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_document_put.mdx @@ -0,0 +1,51 @@ +--- +title: "vespa document put" +description: "Writes a document to Vespa" +--- + +### Synopsis + +Writes the document in the given file to Vespa. If the document already exists, all its values will be replaced by this document. If the document ID is specified both as an argument and in the file the argument takes precedence. + +If json-file is a single dash ('-'), the document will be read from standard input. Alternatively, you can use the --data parameter to provide the document data directly. + + +```bash +vespa document put [id] json-file [flags] +``` + +### Examples + +```bash +$ vespa document put src/test/resources/A-Head-Full-of-Dreams.json +$ vespa document put id:mynamespace:music::a-head-full-of-dreams src/test/resources/A-Head-Full-of-Dreams.json +$ vespa document put id:mynamespace:music::a-head-full-of-dreams --data '{"fields":{"title":"My Title","artist":"My Artist"}}' +``` + +### Options + +```bash + -d, --data string Document data to use instead of reading from file or stdin + --header strings Add a header to the HTTP request, on the format 'Header: Value'. This can be specified multiple times + -h, --help help for put + -T, --timeout int Timeout for the document request in seconds (default 60) + -v, --verbose Print the equivalent curl command for the document operation + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa document](/en/reference/clients/vespa-cli/vespa_document) - Issue a single document operation to Vespa + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_document_remove.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_document_remove.mdx new file mode 100644 index 0000000000..6190c9b141 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_document_remove.mdx @@ -0,0 +1,47 @@ +--- +title: "vespa document remove" +description: "Removes a document from Vespa" +--- + +### Synopsis + +Removes the document specified either as a document ID or given in the json file. If the document ID is specified both as an argument and in the file the argument takes precedence. + +```bash +vespa document remove id | json-file [flags] +``` + +### Examples + +```bash +$ vespa document remove src/test/resources/A-Head-Full-of-Dreams-Remove.json +$ vespa document remove id:mynamespace:music::a-head-full-of-dreams +``` + +### Options + +```bash + -d, --data string Document data to use instead of reading from file or stdin + --header strings Add a header to the HTTP request, on the format 'Header: Value'. This can be specified multiple times + -h, --help help for remove + -T, --timeout int Timeout for the document request in seconds (default 60) + -v, --verbose Print the equivalent curl command for the document operation + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa document](/en/reference/clients/vespa-cli/vespa_document) - Issue a single document operation to Vespa + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_document_update.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_document_update.mdx new file mode 100644 index 0000000000..2de8237c3e --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_document_update.mdx @@ -0,0 +1,48 @@ +--- +title: "vespa document update" +description: "Modifies some fields of an existing document" +--- + + +### Synopsis + +Updates the values of the fields given in a json file as specified in the file. If the document ID is specified both as an argument and in the file the argument takes precedence. + +```bash +vespa document update [id] json-file [flags] +``` + +### Examples + +```bash +$ vespa document update src/test/resources/A-Head-Full-of-Dreams-Update.json +$ vespa document update id:mynamespace:music::a-head-full-of-dreams src/test/resources/A-Head-Full-of-Dreams.json +``` + +### Options + +```bash + -d, --data string Document data to use instead of reading from file or stdin + --header strings Add a header to the HTTP request, on the format 'Header: Value'. This can be specified multiple times + -h, --help help for update + -T, --timeout int Timeout for the document request in seconds (default 60) + -v, --verbose Print the equivalent curl command for the document operation + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa document](/en/reference/clients/vespa-cli/vespa_document) - Issue a single document operation to Vespa + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_feed.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_feed.mdx new file mode 100644 index 0000000000..53b0772840 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_feed.mdx @@ -0,0 +1,84 @@ +--- +title: "vespa feed" +description: "Feed multiple document operations to Vespa" +--- + +### Synopsis + +Feed multiple document operations to Vespa. + +This command can be used to feed large amounts of documents to a Vespa cluster efficiently. + +The contents of json-file must be either a JSON array or JSON objects separated by newline (JSONL). + +If json-file is a single dash ('-'), documents will be read from standard input. + +Once feeding completes, metrics of the feed session are printed to standard out in a JSON format: + +- feeder.operation.count: Number of operations passed to the feeder by the user, not counting retries. +- feeder.seconds: Total time spent feeding. +- feeder.ok.count: Number of successful operations. +- feeder.ok.rate: Number of successful operations per second. +- feeder.error.count: Number of network errors (transport layer). +- feeder.inflight.count: Number of operations currently being sent. +- http.request.count: Number of HTTP requests made, including retries. +- http.request.bytes: Number of bytes sent. +- http.request.MBps: Request throughput measured in MB/s. This is the raw + operation throughput, and not the network throughput, I.e. using compression does not affect this number. +- http.exception.count: Same as feeder.error.count. Present for compatibility with vespa-feed-client. +- http.response.count: Number of HTTP responses received. +- http.response.bytes: Number of bytes received. +- http.response.MBps: Response throughput measured in MB/s. +- http.response.error.count: Number of non-OK HTTP responses received. +- http.response.latency.millis.min: Lowest latency of a successful operation. +- http.response.latency.millis.avg: Average latency of successful operations. +- http.response.latency.millis.max: Highest latency of a successful operation. +- http.response.code.counts: Number of responses grouped by their HTTP code. + + +```bash +vespa feed json-file [json-file]... [flags] +``` + +### Examples + +```bash +$ vespa feed docs.jsonl moredocs.json +$ cat docs.jsonl | vespa feed - +``` + +### Options + +```bash + --compression string Whether to compress the document data when sending the HTTP request. Default is "auto", which compresses large documents. Must be "auto", "gzip" or "none" (default "auto") + --connections int The number of connections to use (default 8) + --deadline int Exit if this number of seconds elapse without any successful operations. 0 to disable (default 0) + --header strings Add a header to all HTTP requests, on the format 'Header: Value'. This can be specified multiple times + -h, --help help for feed + --inflight int The target number of inflight requests. 0 to dynamically detect the best value (default 0) + --progress int Print stats summary at given interval, in seconds. 0 to disable (default 0) + --route string Target Vespa route for feed operations (default "default") + --speedtest int Perform a network speed test using given payload, in bytes. 0 to disable (default 0) + --speedtest-duration int Duration of speedtest, in seconds (default 60) + --timeout int Individual feed operation timeout in seconds. 0 to disable (default 0) + --trace int Network traffic trace level in the range [0,9]. 0 to disable (default 0) + --verbose Verbose mode. Print successful operations in addition to errors + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_fetch.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_fetch.mdx new file mode 100644 index 0000000000..9ff1387c9b --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_fetch.mdx @@ -0,0 +1,46 @@ +--- +title: "vespa fetch" +description: "Download a deployed application package" +--- + +### Synopsis + +Download a deployed application package. + +This command can be used to download an already deployed Vespa application package. The package is written as a ZIP file to the given path, or current directory if no path is given. + +```bash +vespa fetch [path] [flags] +``` + +### Examples + +```bash +$ vespa fetch +$ vespa fetch mydir/ +$ vespa fetch -t cloud mycloudapp.zip + +``` + +### Options + +```bash + -h, --help help for fetch +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_inspect.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_inspect.mdx new file mode 100644 index 0000000000..f6889b6933 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_inspect.mdx @@ -0,0 +1,32 @@ +--- +title: "vespa inspect" +description: "Provides insight" +--- + +### Synopsis + +Provides subcommands to inspect various things in more detail + +### Options + +```text + -h, --help help for inspect +``` + +### Options inherited from parent commands + +```text + -a, --application string The application to use (cloud only). Format "tenant.application.instance" - instance is optional + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use (cloud only) + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai +* [vespa inspect profile](/en/reference/clients/vespa-cli/vespa_inspect_profile) - Inspect profiling results + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_inspect_profile.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_inspect_profile.mdx new file mode 100644 index 0000000000..ce817130a0 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_inspect_profile.mdx @@ -0,0 +1,43 @@ +--- +title: "vespa inspect profile" +description: "Inspect profiling results" +--- + +### Synopsis + +Inspect profiling results previously obtained by vespa query --profile + +<Note> +**Note:** + +this feature is experimental and currently under development +</Note> +profiling results can also be analyzed with vespa-query-analyzer (part of vespa installation) + +```bash +vespa inspect profile [flags] +``` + +### Options + +```text + -h, --help help for profile + -f, --profile-file string Name of the profile file to inspect (default "vespa_query_profile_result.json") +``` + +### Options inherited from parent commands + +```text + -a, --application string The application to use (cloud only). Format "tenant.application.instance" - instance is optional + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use (cloud only) + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa inspect](/en/reference/clients/vespa-cli/vespa_inspect) - Provides insight + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_log.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_log.mdx new file mode 100644 index 0000000000..2129b15efe --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_log.mdx @@ -0,0 +1,54 @@ +--- +title: "vespa log" +description: "Show the Vespa log" +--- + +### Synopsis + +Show the Vespa log. + +The logs shown can be limited to a relative or fixed period. All timestamps are shown in UTC. + +Logs for the past hour are shown if no arguments are given. + + +```bash +vespa log [relative-period] [flags] +``` + +### Examples + +```bash +$ vespa log 1h +$ vespa log --nldequote=false 10m +$ vespa log --from 2021-08-25T15:00:00Z --to 2021-08-26T02:00:00Z +$ vespa log --follow +``` + +### Options + +```text + -f, --follow Follow logs + -F, --from string Include logs since this timestamp (RFC3339 format) + -h, --help help for log + -l, --level string The maximum log level to show. Must be "error", "warning", "info" or "debug" (default "debug") + -n, --nldequote Dequote LF and TAB characters in log messages (default true) + -T, --to string Include logs until this timestamp (RFC3339 format) +``` + +### Options inherited from parent commands + +```text + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_prepare.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_prepare.mdx new file mode 100644 index 0000000000..88ca9da25f --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_prepare.mdx @@ -0,0 +1,32 @@ +--- +title: "vespa prepare" +description: "Prepare an application package for activation" +--- + +```bash +vespa prepare [application-directory-or-file] [flags] +``` + +### Options + +```bash + -h, --help help for prepare + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_prod.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_prod.mdx new file mode 100644 index 0000000000..e0c8b3a0cd --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_prod.mdx @@ -0,0 +1,46 @@ +--- +title: "vespa prod" +description: "Deploy an application package to production in Vespa Cloud" +--- + +### Synopsis + +Deploy an application package to production in Vespa Cloud. + +Configure and deploy your application package to production in Vespa Cloud. + +```bash +vespa prod [flags] +``` + +### Examples + +```bash +$ vespa prod init +$ vespa prod deploy +``` + +### Options + +```text + -h, --help help for prod +``` + +### Options inherited from parent commands + +```text + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai +* [vespa prod deploy](/en/reference/clients/vespa-cli/vespa_prod_deploy) - Deploy an application to production +* [vespa prod init](/en/reference/clients/vespa-cli/vespa_prod_init) - Modify service.xml and deployment.xml for production deployment + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_prod_deploy.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_prod_deploy.mdx new file mode 100644 index 0000000000..bc391e1b1b --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_prod_deploy.mdx @@ -0,0 +1,58 @@ +--- +title: "vespa prod deploy" +description: "Deploy an application to production" +--- + +### Synopsis + +Deploy an application to production. + +This commands uploads an application package to Vespa Cloud and deploys it to the production zones specified in deployment.xml. + +Nodes are allocated to the application according to resources specified in services.xml. + +For more information about production deployments in Vespa Cloud see: +en/operations/production-deployment](/en/operations/production-deployment) +[/en/operations/automated-deployments.html](/en/operations/automated-deployments) + + +```bash +vespa prod deploy [application-directory-or-file] [flags] +``` + +### Examples + +```bash +$ mvn package # when adding custom Java components +$ vespa prod deploy +``` + +### Options + +```bash + -A, --add-cert Copy certificate of the configured application to the current application package (default false) + --author-email string Email of the author of the commit being deployed + --commit string Identifier of the source code being deployed. For example a commit hash + --description string Description of the source code being deployed. For example a git commit message + -h, --help help for deploy + --risk int The risk score of source code being deployed. 0 to ignore (default 0) + --source-url string URL which points to the source code being deployed. For example the build job running the submission + --wait int Seconds to wait for the build to complete before returning (0 to return immediately) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa prod](/en/reference/clients/vespa-cli/vespa_prod) - Deploy an application package to production in Vespa Cloud + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_prod_init.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_prod_init.mdx new file mode 100644 index 0000000000..3fc319e902 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_prod_init.mdx @@ -0,0 +1,41 @@ +--- +title: "vespa prod init" +description: "Modify service.xml and deployment.xml for production deployment" +--- + +### Synopsis + +Modify service.xml and deployment.xml for production deployment. + +Only basic deployment configuration is available through this command. For advanced configuration see the relevant Vespa Cloud documentation and make changes to deployment.xml and services.xml directly. + +Reference: +[/en/reference/applications/services/services](/en/reference/applications/services/services) +[/en/reference/applications/deployment](/en/reference/applications/deployment) + +```bash +vespa prod init [flags] +``` + +### Options + +```bash + -h, --help help for init +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa prod](/en/reference/clients/vespa-cli/vespa_prod) - Deploy an application package to production in Vespa Cloud + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_query.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_query.mdx new file mode 100644 index 0000000000..0ccde77e26 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_query.mdx @@ -0,0 +1,52 @@ +--- +title: "vespa query" +description: "Issue a query to Vespa" +--- + +### Synopsis + +Issue a query to Vespa. + +Any parameter from [/en/reference/api/query.html](/en/reference/api/query) can be set by the syntax [parameter-name]=[value]. + +```bash +vespa query query-parameters [flags] +``` + +### Examples + +```bash +$ vespa query 'yql=select * from music where album contains "head"' hits=5 +$ vespa query --format=plain 'yql=select * from music where album contains "head"' hits=5 +$ vespa query --file q-vector.json +$ vespa query --header='X-First-Name: Joe' 'yql=select * from music where album contains "head"' hits=5 +``` + +### Options + +```bash + --file string Read query parameters from the given JSON file and send a POST request, with overrides from arguments + --format string Output format. Must be 'human' (human-readable) or 'plain' (no formatting) (default "human") + --header strings Add a header to the HTTP request, on the format 'Header: Value'. This can be specified multiple times + -h, --help help for query + -T, --timeout int Timeout for the query in seconds (default 10) + -v, --verbose Print the equivalent curl command for the query + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_status.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_status.mdx new file mode 100644 index 0000000000..efd5d15b83 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_status.mdx @@ -0,0 +1,53 @@ +--- +title: "vespa status" +description: "Show Vespa endpoints and status" +--- + +### Synopsis + +Show Vespa endpoints and status. + +This command shows the current endpoints, and their status, of a deployed Vespa application. + +```bash +vespa status [flags] +``` + +### Examples + +```bash +$ vespa status +$ vespa status --cluster mycluster +$ vespa status --cluster mycluster --wait 600 +$ vespa status --format plain --cluster mycluster +$ vespa status --no-verify +``` + +### Options + +```bash + --format string Output format. Must be 'human' (human-readable), 'plain' (cluster URL only), or 'json' (default "human") + -h, --help help for status + --no-verify Skip checking service status (control plane only) + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai +* [vespa status deploy](/en/reference/clients/vespa-cli/vespa_status_deploy) - Show status of the Vespa deploy service +* [vespa status deployment](/en/reference/clients/vespa-cli/vespa_status_deployment) - Show status of a Vespa deployment +* [vespa status endpoint](/en/reference/clients/vespa-cli/vespa_status_endpoint) - Show Vespa endpoints without checking their status + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_status_deploy.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_status_deploy.mdx new file mode 100644 index 0000000000..eaf50e64a7 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_status_deploy.mdx @@ -0,0 +1,39 @@ +--- +title: "vespa status deploy" +description: "Show status of the Vespa deploy service" +--- + +```bash +vespa status deploy [flags] +``` + +### Examples + +```bash +$ vespa status deploy +``` + +### Options + +```bash + --format string Output format. Must be 'human' (human-readable text), 'plain' (cluster URL only), or 'json' (default "human") + -h, --help help for deploy + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa status](/en/reference/clients/vespa-cli/vespa_status) - Show Vespa endpoints and status + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_status_deployment.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_status_deployment.mdx new file mode 100644 index 0000000000..4e1d30b92f --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_status_deployment.mdx @@ -0,0 +1,50 @@ +--- +title: "vespa status deployment" +description: "Show status of a Vespa deployment" +--- + +### Synopsis + +Show status of a Vespa deployment. + +This commands shows whether a Vespa deployment has converged on the latest run (Vespa Cloud) or config generation (self-hosted). If an argument is given, show the convergence status of that particular run or generation. + + +```bash +vespa status deployment [flags] +``` + +### Examples + +```bash +$ vespa status deployment +$ vespa status deployment -t cloud [run-id] +$ vespa status deployment -t local [session-id] +$ vespa status deployment -t local [session-id] --wait 600 + +``` + +### Options + +```bash + -h, --help help for deployment + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + --format string Output format. Must be 'human' (human-readable), 'plain' (cluster URL only), or 'json' (default "human") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa status](/en/reference/clients/vespa-cli/vespa_status) - Show Vespa endpoints and status + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_status_document.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_status_document.mdx new file mode 100644 index 0000000000..23a7cef4bc --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_status_document.mdx @@ -0,0 +1,38 @@ +--- +title: "vespa status document" +description: "Verify that the document service is ready to use" +--- + +```bash +vespa status document [flags] +``` + +### Examples + +```bash +$ vespa status document +``` + +### Options + +```bash + -h, --help help for document + -w, --wait int Number of seconds to wait for a service to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone +``` + +### SEE ALSO + +* [vespa status](/en/reference/clients/vespa-cli/vespa_status) - Verify that a service is ready to use (query by default) + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_status_endpoint.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_status_endpoint.mdx new file mode 100644 index 0000000000..0698337115 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_status_endpoint.mdx @@ -0,0 +1,49 @@ +--- +title: "vespa status endpoint" +description: "Show Vespa endpoints without checking their status" +--- + +### Synopsis + +Show Vespa endpoints without checking their status. + +This command shows the current endpoints of a deployed Vespa application, discovered from the control plane, without contacting the data plane to check their status. This is useful when you only have control plane credentials. + +This is equivalent to: vespa status --no-verify + +```bash +vespa status endpoint [flags] +``` + +### Examples + +```bash +$ vespa status endpoint +$ vespa status endpoint --cluster mycluster +$ vespa status endpoint --format plain +``` + +### Options + +```bash + --format string Output format. Must be 'human' (human-readable), 'plain' (cluster URL only), or 'json' (default "human") + -h, --help help for endpoint + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa status](/en/reference/clients/vespa-cli/vespa_status) - Show Vespa endpoints and status + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_status_query.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_status_query.mdx new file mode 100644 index 0000000000..86531956a2 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_status_query.mdx @@ -0,0 +1,38 @@ +--- +title: "vespa status query" +description: "Verify that the query service is ready to use (default)" +--- + +```bash +vespa status query [flags] +``` + +### Examples + +```bash +$ vespa status query +``` + +### Options + +```bash + -h, --help help for query + -w, --wait int Number of seconds to wait for a service to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone +``` + +### SEE ALSO + +* [vespa status](/en/reference/clients/vespa-cli/vespa_status) - Verify that a service is ready to use (query by default) + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_test.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_test.mdx new file mode 100644 index 0000000000..f7fce534b6 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_test.mdx @@ -0,0 +1,47 @@ +--- +title: "vespa test" +description: "Run a test suite, or a single test" +--- + +### Synopsis + +Run a test suite, or a single test + +Runs all JSON test files in the specified directory, or the single JSON test file specified. + +See [/en/reference/applications/testing.html](/en/reference/applications/testing) for details. + +```bash +vespa test test-directory-or-file [flags] +``` + +### Examples + +```bash +$ vespa test src/test/application/tests/system-test +$ vespa test src/test/application/tests/system-test/feed-and-query.json +``` + +### Options + +```bash + -h, --help help for test + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_version.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_version.mdx new file mode 100644 index 0000000000..242a01bf73 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_version.mdx @@ -0,0 +1,32 @@ +--- +title: "vespa version" +description: "Show current CLI version and check for updates" +--- + +```bash +vespa version [flags] +``` + +### Options + +```text + -h, --help help for version + -n, --no-check Do not check if a new version is available +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai + diff --git a/mintlify-docs/en/reference/clients/vespa-cli/vespa_visit.mdx b/mintlify-docs/en/reference/clients/vespa-cli/vespa_visit.mdx new file mode 100644 index 0000000000..0041197d63 --- /dev/null +++ b/mintlify-docs/en/reference/clients/vespa-cli/vespa_visit.mdx @@ -0,0 +1,63 @@ +--- +title: "vespa visit" +description: "Retrieve and print all documents from Vespa" +--- + +### Synopsis + +Retrieve and print all documents from Vespa. + +By default, prints each document received on its own line (JSONL format). + + +```bash +vespa visit [flags] +``` + +### Examples + +```bash +$ vespa visit # get documents from any cluster +$ vespa visit --content-cluster search # get documents from cluster named "search" +$ vespa visit --field-set "[id]" # list document IDs + +``` + +### Options + +```bash + --bucket-space strings The "default" or "global" bucket space (default [global,default]) + --chunk-count int Chunk by count (default 1000) + --content-cluster string Which content cluster to visit documents from (default "*") + --field-set string Which fieldset to ask for + --from string Timestamp to visit from, in seconds + --header strings Add a header to the HTTP request, on the format 'Header: Value'. This can be specified multiple times + -h, --help help for visit + --json-lines Output documents as JSON lines (default true) + --make-feed Output JSON array suitable for vespa-feeder + --pretty-json Format pretty JSON + --selection string Select subset of cluster + --slice-id int The number of the slice this visit invocation should fetch (default -1) + --slices int Split the document corpus into this number of independent slices (default -1) + --stream Stream the HTTP responses + --to string Timestamp to visit up to, in seconds + -v, --verbose Print the equivalent curl command for the visit operation + -w, --wait int Number of seconds to wait for service(s) to become ready. 0 to disable (default 0) +``` + +### Options inherited from parent commands + +```bash + -a, --application string The application to use. Format "tenant.application.instance" - instance is optional (tenant required for cloud targets) + -C, --cluster string The container cluster to use. This is only required for applications with multiple clusters + -c, --color string Whether to use colors in output. Must be "auto", "never", or "always" (default "auto") + -i, --instance string The instance of the application to use + -q, --quiet Print only errors + -t, --target string The target platform to use. Must be "local", "cloud", "hosted" or an URL (default "local") + -z, --zone string The zone to use. This defaults to a dev zone (cloud only) +``` + +### SEE ALSO + +* [vespa](/en/reference/clients/vespa-cli/vespa) - The command-line tool for Vespa.ai + diff --git a/mintlify-docs/en/reference/operations/health-checks.mdx b/mintlify-docs/en/reference/operations/health-checks.mdx new file mode 100644 index 0000000000..4df3cdd5c1 --- /dev/null +++ b/mintlify-docs/en/reference/operations/health-checks.mdx @@ -0,0 +1,47 @@ +--- +title: "Health checks reference" +sidebarTitle: "Health checks" +--- + +This is the reference for loadbalancer healthchecks to [containers](/en/applications/containers). + +By default, a container configures an instance of [VipStatusHandler](https://github.com/vespa-engine/vespa/blob/master/container-disc/src/main/java/com/yahoo/container/handler/VipStatusHandler.java) to serve `/status.html`. This will respond with status code 200 and text *OK* if content clusters are UP. See [VipStatus.java](https://github.com/vespa-engine/vespa/blob/master/container-disc/src/main/java/com/yahoo/container/handler/VipStatus.java) for details. + +Applications with multiple content clusters should implement custom handlers for healthchecks, if the built-in logic is inadequate for the usage. Also refer to [federation](/en/querying/federation) for how to manage data sources. + +## Override using a status file + +Use `container.core.vip-status` to make `VipStatusHandler` use a file for health status: + +```xml +<container> + <config name="container.core.vip-status"> + <accessdisk>true</accessdisk> + <statusfile>/full-path-to/status-response.html</statusfile> + </config> +``` + +If the file exists, its contents will be served on `/status.html`, otherwise an error message will be generated. To remove a container from service, delete or rename the file to serve. + +## Alternative / multiple paths + +`VipStatusHandler` only looks at a single file path by default. As it is independent of the URI path, it is possible to configure multiple handler instances to serve alternative or custom messages - example: + +```xml +<handler id="vipFreshness" class="com.yahoo.container.handler.VipStatusHandler"> + <binding>http://*:*/docproc/freshness-data.xml</binding> + <config name="container.core.vip-status"> + <accessdisk>true</accessdisk> + <statusfile>/full-path-to/freshness-data.xml</statusfile> + </config> +</handler> +<handler id="vipClustering" class="com.yahoo.container.handler.VipStatusHandler"> + <binding>http://*:*/docproc/ClusteringDocproc.status</binding> + <config name="container.core.vip-status"> + <accessdisk>true</accessdisk> + <statusfile>/full-path-to/ClusteringDocproc.status</statusfile> + </config> +</handler> +``` + +The paths `/docproc/freshness-data.xml` and `/docproc/ClusteringDocproc.status` serves the files located at `/full-path-to/freshness-data.xml` and `/full-path-to/ClusteringDocproc.status`, respectively. As the handler instances are independent, a container can be taken out of one type of rotation without affecting another. \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/log-files.mdx b/mintlify-docs/en/reference/operations/log-files.mdx new file mode 100644 index 0000000000..ff37b5841b --- /dev/null +++ b/mintlify-docs/en/reference/operations/log-files.mdx @@ -0,0 +1,137 @@ +--- +title: "Log file reference" +sidebarTitle: "Log files" +--- + +All Vespa components use a common log module for logging. These log messages are added to a local log file in *`$VESPA_HOME/logs/vespa/`* and filtered, then forwarded, to the log server on the administration node. The log archive and rotation is explained in [log server](#log-server). + +<Note> +**Note:** + +If Vespa is running in a local container (named "vespa"), run `docker exec vespa vespa-logfmt` to quickly dump logs. +</Note> + +## Log file fields + +Log files are in a machine-readable log format, made more human-readable by [vespa-logfmt](/en/reference/operations/self-managed/tools#vespa-logfmt) - it can filter out log messages from specific programs, only show certain log levels, print the time in a more directly understandable format and so on. Each line that is logged consists of the following fields, in order, separated by a TAB (ASCII 9) character: + +```bash +time host pid service component level message +``` + +| Log field | Description | +| --- | --- | +| *time* | Time in seconds since 1970-01-01 UTC, with optional fractional seconds after. E.g. 1102675319.726342 | +| *host* | The hostname of the machine that produced this log entry | +| *pid* | The process id, and an optional thread-id of the process/thread that logged the message | +| *service* | The Vespa service name of the logger | +| *component* | The component name that logged. An application may have multiple subcomponents with their own component names, usually starts with the name of the binary | +| *level* | One of fatal, error, warning, info, config, event, debug, or spam | +| *message* | The log message itself. All dangerous characters are escaped (CR, NL, TAB, \\, ASCII < 32 and ASCII 128..159) | + +| Log level | Description | +| --- | --- | +| *fatal* | Fatal error messages. The application must exit immediately, and restarting it will not help | +| *error* | Error messages. These are serious, the application cannot function correctly | +| *warning* | Warnings - the application may be able to continue, but the situation should be looked into | +| *info* | Informational messages that are not reporting error conditions, but should still be useful to the operator | +| *config* | Configuration settings | +| *event* | [Machine-readable events](#log-events). May contain information about processes starting and stopping, and various metrics | +| *debug* | Debug messages - normally suppressed | +| *spam* | Low-level debug messages, normally suppressed. Generates massive amounts of logs when enabled | + +## Controlling log levels + +Use [vespa-logctl](/en/reference/operations/self-managed/tools#vespa-logctl) to change active log levels of Vespa programs running as services, run-time. When running in the cloud, tuning can be done in services.xml - see [admin element (logging)](/en/reference/applications/services/admin#logging). Standalone programs will instead read the environment variable *VESPA\_LOG\_LEVEL* on startup to determine which log levels should be active. The default setting for *VESPA\_LOG\_LEVEL* is *"all -debug -spam"*, which enables all log levels except debug and spam. + +*vespa-logctl* shows or modifies the active log levels for a program, or for parts of a program, while it is running. This is useful for enabling debug output in parts of a live system for diagnosing problems. It can also be used to silence programs that logs too verbose. + +Programs that can be controlled by *vespa-logctl* put log control files in *`$VESPA_HOME/var/db/vespa/logcontrol/service.logcontrol`*. If this file exists on program startup, it will be used to set the logging levels. This means that log level modifications done with *vespa-logctl* are sticky, and can also be performed even if the program is not running. + +## Log events + +Event messages are log messages of the *event* type. Events contain a well-defined payload which makes them suitable for automated processing of various kinds, like alerting. An event is emitted by a component when something of interest happens to it, or when it has some metric data it wants to share with the world. As all other log messages, events are collected to the admin nodes by the logserver component, where they may be found in the Vespa log or intercepted programmatically by a logserver plugin. + +Metrics are used to report on internal variables detailing the processing performed by a particular component. *VALUES* are numbers with momentarily significance, such as queue lengths and latencies. *COUNTER* are numbers increasing monotonically with each processing step, such as number of documents processes, or number of queries. Refer to the [metrics API](/en/operations/metrics). + +Each event has an event *type*, a *version* and an optional *payload*. In the log format, event types are expressed as a single word, versions as a simple integer, and the payload as a set of *key=value* pairs. The event payload is backslash-quoted just like log messages are in general. This means that events may be double-quoted during transport. Double-quote delimiters are not supported. + +| Event | Description | +| --- | --- | +| starting | Payload: *`name=<name>`*<br/><br/> This event is sent by processes when they are about to start another process. Typical for, but not limited to, shell scripts. This event is not required to track processes, but is useful in cases where a sub-process may fail during startup. Example:<br/><br/>`starting container for default/container.0` | +| started | Payload: *`name=<name>`* <br/><br/> The *started* event is sent by a service that just started up. Example:<br/><br/>`started/1 name="vespa-proton"` | +| stopping | Payload: *`name=<name> why=<why>`*<br/><br/> The *stopping* event is sent by a process that is about to exit. Example:<br/><br/>`stopping/1 name="vespa-proton" why="clean shutdown"` | +| stopped | Payload: *`name=<name> pid=<pid> exitcode=<exitcode>`*<br/><br/> This event is sent by a process monitoring when a sub-process exits. Example:<br/><br/>`stopped/1 name="vespa-proton" pid=14523 exitcode=0` | +| crash | Payload: *`name=<name> pit=<pid> signal=<signal>`* <br/><br/> Submitted by a process monitoring a sub-process when the sub-process crashes (dumps core etc.). Example:<br/><br/>`crash/1 name="vespa-proton" pid=12345 signal=11` | +| count | Payload: *`name=<name> value=<value>`* <br/><br/> General event for counts - for tracking any type of counter metric. The *name* is specific to each library/application. Counters are assumed to increase with time, counting the number of events since the program was started. Example:<br/><br/>`count/1 name="queries" value=10` | +| value | Payload: *`name=<name> value=<value>`* <br/><br/> General event for values - for tracking any type of value metric. *Value is for values that cannot be counts*. Typical values are queue lengths, transaction frequencies and so on. Example:<br/><br/>`value/1 name="peak_qps" value=200` | +| state | Payload: *`name=<name> value=<value>`*<br/><br/> General event for components in a process. *value* contains a string with more detailed information on what has happened. Note that the format and content of such strings can change between releases. Example:<br/><br/>`state/1 name="transactionlog.replay.start" value="{"domain":"test","serialnum":{"first":1,"last":1000}}"` | + +## Logd + +A small program named *logd* is responsible for rotating the `vespa.log` file and also forwarding most log messages (see next section for details) to the log server. The log file is rotated after 24 hours, or if it grows too large. Rotated logs are removed by logd after 30 days, or if the total size grows above 1000 MB. + +## Log server + +On the log server on the administration node, the *Archiver* plugin will write the log messages from each node to a log archive. These messages are written to the log file based on the message timestamp. The log files are located in the `$VESPA_HOME/logs/vespa/logarchive` directory. The catalog structure is like: + +```bash +logarchive/<year>/<month>/<day>/<hour>-<serial> +``` + +For instance will a message logged at 2016-07-22 08:05:00 be found in: + +```bash +logarchive/2016/07/22/08-0 +``` + +All dates and times are in UTC. If the log file exceeds 20 Mb, the file will be rotated and the serial number will increase. Rotated log files more than *two hours* old, will be compressed to save disk space. Archived log files in the log archive will be deleted for two reasons: + +- Log file is more than 30 days old +- The full size of the log archive exceeds 30GB + +<Note> +**Note:** + +If you need to remove log files more aggressively than this to e.g. prevent running out of storage space, you need to add a way of purging log files no longer needed yourself. +</Note> + +Events and log messages with level *debug* and *spam* are normally filtered out before sending to the log archive. As an example, to forward events and *debug* log messages, add this to *services.xml*: + +```xml +<services> + <config name="cloud.config.log.logd"> + <loglevel> + <event> + <forward>true</forward> + </event> + <debug> + <forward>true</forward> + </debug> + </loglevel> + </config> +``` + +## Access log file content + +The Container logs each request in its access log. The log files are found in *`$VESPA_HOME/logs/vespa/access/`*. See [access logging](/en/operations/access-logging) for details. + +### Time values in the access log compared to metrics and log events + +The timing in the access log will in general be slightly off compared to the timing values in vespa.log. The reason is the "probes" into the system are placed at slightly different levels of abstraction. The explanations here are directed at experienced users and troubleshooting. + +#### Definition of processing time in the access log + +Processing time in the access log starts when the execution is first invoked from the search handler. The end is dependent on whether the response is asynchronous or not. For a synchronous response, the end is after the renderer has been invoked, but before the rendering buffer is flushed. For an asynchronous response, e.g. a normal search response, the end is defined as when the completion handler is created. That means after control flow has returned from the search chains, but before any network traffic or rendering has been done. + +#### Definition of processing time in the vespa.log + +StatisticsSearcher defines the metric *query\_latency* and the log event *mean\_query\_latency*. The data fed into both is the same. The start of the interval is defined as when the control flow enters StatisticsSearcher, the end as when the next searcher after StatisticsSearcher returns from search(). This has the side effect of *not* including fill time if the result was not already filled when passed on from StatisticsSearcher. This may happen if the SearchHandler has to invoke fill() itself, e.g. if no searchers need to access hit fields. + +#### Timing summary + +The access log includes everything happening before rendering, but will exclude expensive rendering logic and slow networks. The query latency event and metrics only covers what happens inside the search chain where StatisticsSearcher is placed, and may exclude summary fetching. + +### ZooKeeper Log + +The ZooKeeper log file is normally not necessary to monitor on a regular basis, but is mentioned here as a possible source of information in case you should ever need to debug the Vespa configuration system. It is located at `$VESPA_HOME/logs/vespa/zookeeper.<servicename>.log`. \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/metrics/clustercontroller.mdx b/mintlify-docs/en/reference/operations/metrics/clustercontroller.mdx new file mode 100644 index 0000000000..100611b171 --- /dev/null +++ b/mintlify-docs/en/reference/operations/metrics/clustercontroller.mdx @@ -0,0 +1,30 @@ +--- +title: "ClusterController Metrics" +sidebarTitle: "Cluster controller metrics" +--- + +| Name | Unit | Description | +| --- | --- | --- | +| cluster-controller.down.count | node | Number of content nodes down | +| cluster-controller.initializing.count | node | Number of content nodes initializing | +| cluster-controller.maintenance.count | node | Number of content nodes in maintenance | +| cluster-controller.retired.count | node | Number of content nodes that are retired | +| cluster-controller.stopping.count | node | Number of content nodes currently stopping | +| cluster-controller.up.count | node | Number of content nodes up | +| cluster-controller.cluster-state-change.count | node | Number of nodes changing state | +| cluster-controller.nodes-not-converged | node | Number of nodes not converging to the latest cluster state version | +| cluster-controller.stored-document-count | document | Total number of unique documents stored in the cluster | +| cluster-controller.stored-document-bytes | byte | Combined byte size of all unique documents stored in the cluster (not including replication) | +| cluster-controller.cluster-buckets-out-of-sync-ratio | fraction | Ratio of buckets in the cluster currently in need of syncing | +| cluster-controller.busy-tick-time-ms | millisecond | Time busy | +| cluster-controller.idle-tick-time-ms | millisecond | Time idle | +| cluster-controller.work-ms | millisecond | Time used for actual work | +| cluster-controller.is-master | binary | 1 if this cluster controller is currently the master, or 0 if not | +| cluster-controller.remote-task-queue.size | operation | Number of remote tasks queued | +| cluster-controller.node-event.count | operation | Number of node events | +| cluster-controller.resource\_usage.nodes\_above\_limit | node | The number of content nodes above resource limit, blocking feed | +| cluster-controller.resource\_usage.max\_memory\_utilization | fraction | Current memory utilisation, for content node with the highest value | +| cluster-controller.resource\_usage.max\_disk\_utilization | fraction | Current disk space utilisation, for content node with the highest value | +| cluster-controller.resource\_usage.memory\_limit | fraction | Memory space limit as a fraction of available memory | +| cluster-controller.resource\_usage.disk\_limit | fraction | Disk space limit as a fraction of available disk space | +| reindexing.progress | fraction | Re-indexing progress | \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/metrics/configserver.mdx b/mintlify-docs/en/reference/operations/metrics/configserver.mdx new file mode 100644 index 0000000000..99806dcd36 --- /dev/null +++ b/mintlify-docs/en/reference/operations/metrics/configserver.mdx @@ -0,0 +1,146 @@ +--- +title: "ConfigServer Metrics" +sidebarTitle: "Configserver metrics" +--- + +| Name | Unit | Description | +| --- | --- | --- | +| configserver.requests | request | Number of requests processed | +| configserver.failedRequests | request | Number of requests that failed | +| configserver.latency | millisecond | Time to complete requests | +| configserver.cacheConfigElems | item | Time to complete requests | +| configserver.cacheChecksumElems | item | Number of checksum elements in the cache | +| configserver.hosts | node | The number of nodes being served configuration from the config server cluster | +| configserver.tenants | instance | The number of tenants being served configuration from the config server cluster | +| configserver.applications | instance | The number of applications being served configuration from the config server cluster | +| configserver.delayedResponses | response | Number of delayed responses | +| configserver.sessionChangeErrors | session | Number of session change errors | +| configserver.unknownHostRequests | request | Config requests from unknown hosts | +| configserver.newSessions | session | New config sessions | +| configserver.preparedSessions | session | Prepared config sessions | +| configserver.activeSessions | session | Active config sessions | +| configserver.inactiveSessions | session | Inactive config sessions | +| configserver.addedSessions | session | Added config sessions | +| configserver.removedSessions | session | Removed config sessions | +| configserver.rpcServerWorkQueueSize | item | Number of elements in the RPC server work queue | +| maintenanceDeployment.transientFailure | operation | Number of maintenance deployments that failed with a transient failure | +| maintenanceDeployment.failure | operation | Number of maintenance deployments that failed with a permanent failure | +| maintenanceDeployment.reason | operation | Reason for maintenance deployment | +| maintenance.successFactorDeviation | fraction | Configserver: Maintenance Success Factor Deviation | +| maintenance.duration | millisecond | Configserver: Maintenance Duration | +| maintenance.congestion | failure | Configserver: Maintenance Congestion | +| configserver.zkConnectionLost | connection | Number of ZooKeeper connections lost | +| configserver.zkReconnected | connection | Number of ZooKeeper reconnections | +| configserver.zkConnected | node | Number of ZooKeeper nodes connected | +| configserver.zkSuspended | node | Number of ZooKeeper nodes suspended | +| configserver.zkZNodes | node | Number of ZooKeeper nodes present | +| configserver.zkAvgLatency | millisecond | Average latency for ZooKeeper requests | +| configserver.zkMaxLatency | millisecond | Max latency for ZooKeeper requests | +| configserver.zkConnections | connection | Number of ZooKeeper connections | +| configserver.zkOutstandingRequests | request | Number of ZooKeeper requests in flight | +| orchestrator.lock.acquire-latency | second | Time to acquire zookeeper lock | +| orchestrator.lock.acquire-success | operation | Number of times zookeeper lock has been acquired successfully | +| orchestrator.lock.acquire-timedout | operation | Number of times zookeeper lock couldn't be acquired within timeout | +| orchestrator.lock.acquire | operation | Number of attempts to acquire zookeeper lock | +| orchestrator.lock.acquired | operation | Number of times zookeeper lock was acquired | +| orchestrator.lock.hold-latency | second | Time zookeeper lock was held before it was released | +| nodes.active | node | The number of active nodes in a cluster | +| nodes.nonActive | node | The number of non-active nodes in a cluster | +| nodes.nonActiveFraction | node | The fraction of non-active nodes vs total nodes in a cluster | +| nodes.exclusiveSwitchFraction | fraction | The fraction of nodes in a cluster on exclusive network switches | +| nodes.emptyExclusive | node | The number of exclusive hosts that do not have any nodes allocated to them | +| nodes.expired.deprovisioned | node | The number of deprovisioned nodes that have expired | +| nodes.expired.dirty | node | The number of dirty nodes that have expired | +| nodes.expired.inactive | node | The number of inactive nodes that have expired | +| nodes.expired.provisioned | node | The number of provisioned nodes that have expired | +| nodes.expired.reserved | node | The number of reserved nodes that have expired | +| cluster.cost | dollar\_per\_hour | The cost of the nodes allocated to a certain cluster, in $/hr | +| cluster.load.ideal.cpu | fraction | The ideal cpu load of a certain cluster | +| cluster.load.ideal.memory | fraction | The ideal memory load of a certain cluster | +| cluster.load.ideal.disk | fraction | The ideal disk load of a certain cluster | +| cluster.load.peak.cpu | fraction | The peak cpu load in the period considered of a certain cluster | +| cluster.load.peak.memory | fraction | The peak memory load in the period considered of a certain cluster | +| cluster.load.peak.disk | fraction | The peak disk load in the period considered of a certain cluster | +| cluster.backup.age | fraction | Age of the most recent cluster backup as a fraction of the backup interval | +| cluster.snapshot.busySeconds | second | The maximum time a snapshot has been busy (creating or restoring) for a cluster | +| zone.working | binary | The value 1 if zone is considered healthy, 0 if not. This is decided by considering the number of non-active nodes vs the number of active nodes in a zone | +| cache.nodeObject.hitRate | fraction | The fraction of cache hits vs cache lookups for the node object cache | +| cache.nodeObject.evictionCount | item | The number of cache elements evicted from the node object cache | +| cache.nodeObject.size | item | The number of cache elements in the node object cache | +| cache.curator.hitRate | fraction | The fraction of cache hits vs cache lookups for the curator cache | +| cache.curator.evictionCount | item | The number of cache elements evicted from the curator cache | +| cache.curator.size | item | The number of cache elements in the curator cache | +| wantedRestartGeneration | generation | Wanted restart generation for tenant node | +| currentRestartGeneration | generation | Current restart generation for tenant node | +| wantToRestart | binary | One if node wants to restart, zero if not | +| wantedRebootGeneration | generation | Wanted reboot generation for tenant node | +| currentRebootGeneration | generation | Current reboot generation for tenant node | +| wantToReboot | binary | One if node wants to reboot, zero if not | +| retired | binary | One if node is retired, zero if not | +| wantedVespaVersion | version | Wanted vespa version for the node, in the form MINOR.PATCH. Major version is not included here | +| currentVespaVersion | version | Current vespa version for the node, in the form MINOR.PATCH. Major version is not included here | +| wantToChangeVespaVersion | binary | One if node want to change Vespa version, zero if not | +| hasWireguardKey | binary | One if node has a WireGuard key, zero if not | +| wantToRetire | binary | One if node wants to retire, zero if not | +| wantToDeprovision | binary | One if node wants to be deprovisioned, zero if not | +| failReport | binary | One if there is a fail report for the node, zero if not | +| suspended | binary | One if the node is suspended, zero if not | +| suspendedSeconds | second | The number of seconds the node has been suspended | +| activeSeconds | second | The number of seconds the node has been active | +| numberOfServicesUp | instance | The number of services confirmed to be running on a node | +| numberOfServicesNotChecked | instance | The number of services supposed to run on a node, that has not checked | +| numberOfServicesDown | instance | The number of services confirmed to not be running on a node | +| someServicesDown | binary | One if one or more services has been confirmed to not run on a node, zero if not | +| numberOfServicesUnknown | instance | The number of services the config server does not know is running on a node | +| nodeFailerBadNode | binary | One if the node is failed due to being bad, zero if not | +| downInNodeRepo | binary | One if the node is registered as being down in the node repository, zero if not | +| numberOfServices | instance | Number of services supposed to run on a node | +| lockAttempt.acquireMaxActiveLatency | second | Maximum duration for keeping a lock, ending during the metrics snapshot, or still being kept at the end or this snapshot period | +| lockAttempt.acquireHz | operation\_per\_second | Average number of locks acquired per second the snapshot period | +| lockAttempt.acquireLoad | operation | Average number of locks held concurrently during the snapshot period | +| lockAttempt.lockedLatency | second | Longest lock duration in the snapshot period | +| lockAttempt.lockedLoad | operation | Average number of locks held concurrently during the snapshot period | +| lockAttempt.acquireTimedOut | operation | Number of locking attempts that timed out during the snapshot period | +| lockAttempt.deadlock | operation | Number of lock grab deadlocks detected during the snapshot period | +| lockAttempt.errors | operation | Number of other lock related errors detected during the snapshot period | +| hostedVespa.docker.totalCapacityCpu | vcpu | Total number of VCPUs on tenant hosts managed by hosted Vespa in a zone | +| hostedVespa.docker.totalCapacityMem | gigabyte | Total amount of memory on tenant hosts managed by hosted Vespa in a zone | +| hostedVespa.docker.totalCapacityDisk | gigabyte | Total amount of disk space on tenant hosts managed by hosted Vespa in a zone | +| hostedVespa.docker.freeCapacityCpu | vcpu | Total number of free VCPUs on tenant hosts managed by hosted Vespa in a zone | +| hostedVespa.docker.freeCapacityMem | gigabyte | Total amount of free memory on tenant hosts managed by hosted Vespa in a zone | +| hostedVespa.docker.freeCapacityDisk | gigabyte | Total amount of free disk space on tenant hosts managed by hosted Vespa in a zone | +| hostedVespa.docker.allocatedCapacityCpu | vcpu | Total number of allocated VCPUs on tenant hosts managed by hosted Vespa in a zone | +| hostedVespa.docker.allocatedCapacityMem | gigabyte | Total amount of allocated memory on tenant hosts managed by hosted Vespa in a zone | +| hostedVespa.docker.allocatedCapacityDisk | gigabyte | Total amount of allocated disk space on tenant hosts managed by hosted Vespa in a zone | +| hostedVespa.pendingRedeployments | task | The number of hosted Vespa re-deployments pending | +| hostedVespa.docker.skew | fraction | A number in the range 0..1 indicating how well allocated resources are balanced with availability on hosts | +| hostedVespa.activeHosts | host | The number of managed hosts that are in state "active" | +| hostedVespa.breakfixedHosts | host | The number of managed hosts that are in state "breakfixed" | +| hostedVespa.deprovisionedHosts | host | The number of managed hosts that are in state "deprovisioned" | +| hostedVespa.dirtyHosts | host | The number of managed hosts that are in state "dirty" | +| hostedVespa.failedHosts | host | The number of managed hosts that are in state "failed" | +| hostedVespa.inactiveHosts | host | The number of managed hosts that are in state "inactive" | +| hostedVespa.parkedHosts | host | The number of managed hosts that are in state "parked" | +| hostedVespa.provisionedHosts | host | The number of managed hosts that are in state "provisioned" | +| hostedVespa.readyHosts | host | The number of managed hosts that are in state "ready" | +| hostedVespa.reservedHosts | host | The number of managed hosts that are in state "reserved" | +| hostedVespa.activeNodes | host | The number of managed nodes that are in state "active" | +| hostedVespa.breakfixedNodes | host | The number of managed nodes that are in state "breakfixed" | +| hostedVespa.deprovisionedNodes | host | The number of managed nodes that are in state "deprovisioned" | +| hostedVespa.dirtyNodes | host | The number of managed nodes that are in state "dirty" | +| hostedVespa.failedNodes | host | The number of managed nodes that are in state "failed" | +| hostedVespa.inactiveNodes | host | The number of managed nodes that are in state "inactive" | +| hostedVespa.parkedNodes | host | The number of managed nodes that are in state "parked" | +| hostedVespa.provisionedNodes | host | The number of managed nodes that are in state "provisioned" | +| hostedVespa.readyNodes | host | The number of managed nodes that are in state "ready" | +| hostedVespa.reservedNodes | host | The number of managed nodes that are in state "reserved" | +| overcommittedHosts | host | The number of hosts with over-committed resources | +| spareHostCapacity | host | The number of spare hosts | +| throttledHostFailures | host | Number of host failures stopped due to throttling | +| throttledNodeFailures | host | Number of node failures stopped due to throttling | +| nodeFailThrottling | binary | Metric indicating when node failure throttling is active. The value 1 means active, 0 means inactive | +| clusterAutoscaled | operation | Number of times a cluster has been rescaled by the autoscaler | +| clusterAutoscaleDuration | second | The currently predicted duration of a rescaling of this cluster | +| deployment.prepareMillis | millisecond | Duration of deployment preparations | +| deployment.activateMillis | millisecond | Duration of deployment activations | +| throttledHostProvisioning | binary | Value 1 if host provisioning is throttled, 0 if not | \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/metrics/container.mdx b/mintlify-docs/en/reference/operations/metrics/container.mdx new file mode 100644 index 0000000000..82471e658b --- /dev/null +++ b/mintlify-docs/en/reference/operations/metrics/container.mdx @@ -0,0 +1,206 @@ +--- +title: "Container Metrics" +sidebarTitle: "Container metrics" +--- + +| Name | Unit | Description | +| --- | --- | --- | +| http.status.1xx | response | Number of responses with a 1xx status | +| http.status.2xx | response | Number of responses with a 2xx status | +| http.status.3xx | response | Number of responses with a 3xx status | +| http.status.4xx | response | Number of responses with a 4xx status | +| http.status.5xx | response | Number of responses with a 5xx status | +| application\_generation | version | The currently live application config generation (aka session id) | +| in\_service | binary | This will have the value 1 if the node is in service, 0 if not. | +| jdisc.gc.count | operation | Number of JVM garbage collections done | +| jdisc.gc.ms | millisecond | Time spent in JVM garbage collection | +| jdisc.jvm | version | JVM runtime version | +| cpu | thread | Container service CPU pressure | +| jdisc.memory\_mappings | operation | JDISC Memory mappings | +| jdisc.open\_file\_descriptors | item | JDISC Open file descriptors | +| jdisc.thread\_pool.unhandled\_exceptions | thread | Number of exceptions thrown by tasks | +| jdisc.thread\_pool.work\_queue.capacity | thread | Capacity of the task queue | +| jdisc.thread\_pool.work\_queue.size | thread | Size of the task queue | +| jdisc.thread\_pool.rejected\_tasks | thread | Number of tasks rejected by the thread pool | +| jdisc.thread\_pool.size | thread | Size of the thread pool | +| jdisc.thread\_pool.max\_allowed\_size | thread | The maximum allowed number of threads in the pool | +| jdisc.thread\_pool.active\_threads | thread | Number of threads that are active | +| jdisc.deactivated\_containers.total | item | JDISC Deactivated container instances | +| jdisc.deactivated\_containers.with\_retained\_refs.last | item | JDISC Deactivated container nodes with retained refs | +| jdisc.application.failed\_component\_graphs | item | JDISC Application failed component graphs | +| jdisc.application.component\_graph.creation\_time\_millis | millisecond | JDISC Application component graph creation time | +| jdisc.application.component\_graph.reconfigurations | item | JDISC Application component graph reconfigurations | +| jdisc.singleton.is\_active | item | JDISC Singleton is active | +| jdisc.singleton.activation.count | operation | JDISC Singleton activations | +| jdisc.singleton.activation.failure.count | operation | JDISC Singleton activation failures | +| jdisc.singleton.activation.millis | millisecond | JDISC Singleton activation time | +| jdisc.singleton.deactivation.count | operation | JDISC Singleton deactivations | +| jdisc.singleton.deactivation.failure.count | operation | JDISC Singleton deactivation failures | +| jdisc.singleton.deactivation.millis | millisecond | JDISC Singleton deactivation time | +| jdisc.http.ssl.handshake.failure.missing\_client\_cert | operation | JDISC HTTP SSL Handshake failures due to missing client certificate | +| jdisc.http.ssl.handshake.failure.expired\_client\_cert | operation | JDISC HTTP SSL Handshake failures due to expired client certificate | +| jdisc.http.ssl.handshake.failure.invalid\_client\_cert | operation | JDISC HTTP SSL Handshake failures due to invalid client certificate | +| jdisc.http.ssl.handshake.failure.incompatible\_protocols | operation | JDISC HTTP SSL Handshake failures due to incompatible protocols | +| jdisc.http.ssl.handshake.failure.incompatible\_chifers | operation | JDISC HTTP SSL Handshake failures due to incompatible chifers | +| jdisc.http.ssl.handshake.failure.connection\_closed | operation | JDISC HTTP SSL Handshake failures due to connection closed | +| jdisc.http.ssl.handshake.failure.unknown | operation | JDISC HTTP SSL Handshake failures for unknown reason | +| jdisc.http.latency | millisecond | Request latency including the HTTP layer | +| jdisc.http.time\_to\_first\_byte | millisecond | Time from request has been received by the server until the first byte is returned to the client | +| jdisc.http.request.prematurely\_closed | request | HTTP requests prematurely closed | +| jdisc.http.request.requests\_per\_connection | request | HTTP requests per connection | +| jdisc.http.request.uri\_length | byte | HTTP URI length | +| jdisc.http.request.content\_size | byte | HTTP request content size | +| jdisc.http.requests | request | HTTP requests | +| jdisc.http.requests.status | request | Number of requests to the built-in status handler | +| jdisc.http.filter.rule.blocked\_requests | request | Number of requests blocked by filter | +| jdisc.http.filter.rule.allowed\_requests | request | Number of requests allowed by filter | +| jdisc.http.filtering.request.handled | request | Number of filtering requests handled | +| jdisc.http.filtering.request.unhandled | request | Number of filtering requests unhandled | +| jdisc.http.filtering.response.handled | request | Number of filtering responses handled | +| jdisc.http.filtering.response.unhandled | request | Number of filtering responses unhandled | +| jdisc.http.handler.unhandled\_exceptions | request | Number of unhandled exceptions in handler | +| jdisc.tls.capability\_checks.succeeded | operation | Number of TLS capability checks succeeded | +| jdisc.tls.capability\_checks.failed | operation | Number of TLS capability checks failed | +| jdisc.http.jetty.threadpool.thread.max | thread | Configured maximum number of threads | +| jdisc.http.jetty.threadpool.thread.min | thread | Configured minimum number of threads | +| jdisc.http.jetty.threadpool.thread.reserved | thread | Configured number of reserved threads or -1 for heuristic | +| jdisc.http.jetty.threadpool.thread.busy | thread | Number of threads executing internal and transient jobs | +| jdisc.http.jetty.threadpool.thread.idle | thread | Number of idle threads | +| jdisc.http.jetty.threadpool.thread.total | thread | Current number of threads | +| jdisc.http.jetty.threadpool.queue.size | thread | Current size of the job queue | +| jdisc.http.jetty.http\_compliance.violation | failure | Number of HTTP compliance violations | +| serverNumOpenConnections | connection | The number of currently open connections | +| serverNumConnections | connection | The total number of connections opened | +| serverBytesReceived | byte | The number of bytes received by the server | +| serverBytesSent | byte | The number of bytes sent from the server | +| handled.requests | operation | The number of requests handled per metrics snapshot | +| handled.latency | millisecond | The time used for handling requests, excluding HTTP layer and rendering | +| httpapi\_latency | millisecond | Duration for requests to the HTTP document APIs | +| httpapi\_pending | operation | Document operations pending execution | +| httpapi\_num\_operations | operation | Total number of document operations performed | +| httpapi\_num\_updates | operation | Document update operations performed | +| httpapi\_num\_removes | operation | Document remove operations performed | +| httpapi\_num\_puts | operation | Document put operations performed | +| httpapi\_ops\_per\_sec | operation\_per\_second | Document operations per second | +| httpapi\_succeeded | operation | Document operations that succeeded | +| httpapi\_failed | operation | Document operations that failed | +| httpapi\_parse\_error | operation | Document operations that failed due to document parse errors | +| httpapi\_condition\_not\_met | operation | Document operations not applied due to condition not met | +| httpapi\_not\_found | operation | Document operations not applied due to document not found | +| httpapi\_failed\_unknown | operation | Document operations failed by unknown cause | +| httpapi\_failed\_timeout | operation | Document operations failed by timeout | +| httpapi\_failed\_insufficient\_storage | operation | Document operations failed by insufficient storage | +| httpapi\_queued\_operations | operation | Document operations queued for execution in /document/v1 API handler | +| httpapi\_queued\_bytes | byte | Total operation bytes queued for execution in /document/v1 API handler | +| httpapi\_queued\_age | second | Age in seconds of the oldest operation in the queue for /document/v1 API handler | +| httpapi\_mbus\_window\_size | operation | The window size of Messagebus's dynamic throttle policy for /document/v1 API handler | +| mem.heap.total | byte | Total available heap memory | +| mem.heap.free | byte | Free heap memory | +| mem.heap.used | byte | Currently used heap memory | +| mem.direct.total | byte | Total available direct memory | +| mem.direct.free | byte | Currently free direct memory | +| mem.direct.used | byte | Direct memory currently used | +| mem.direct.count | byte | Number of direct memory allocations | +| mem.native.total | byte | Total available native memory | +| mem.native.free | byte | Currently free native memory | +| mem.native.used | byte | Native memory currently used | +| athenz-tenant-cert.expiry.seconds | second | Time remaining until Athenz tenant certificate expires | +| container-iam-role.expiry.seconds | second | Time remaining until IAM role expires | +| peak\_qps | query\_per\_second | The highest number of qps for a second for this metrics snapshot | +| search\_connections | connection | Number of search connections | +| feed.operations | operation | Number of document feed operations | +| feed.latency | millisecond | Feed latency | +| feed.http-requests | operation | Feed HTTP requests | +| queries | operation | Query volume | +| query\_container\_latency | millisecond | The query execution time consumed in the container | +| query\_latency | millisecond | The overall query latency as observed by the container cluster, excluding HTTP layer and rendering | +| query\_timeout | millisecond | The amount of time allowed for query execution, from the client | +| failed\_queries | operation | The number of failed queries | +| degraded\_queries | operation | The number of degraded queries, e.g. due to some content nodes not responding in time | +| hits\_per\_query | hit\_per\_query | The number of hits returned | +| query\_hit\_offset | hit | The offset for hits returned | +| documents\_covered | document | The combined number of documents considered during query evaluation | +| documents\_total | document | The number of documents to be evaluated if all requests had been fully executed | +| documents\_target\_total | document | The target number of total documents to be evaluated when all data is in sync | +| jdisc.render.latency | nanosecond | The time used by the container to render responses | +| query\_item\_count | item | The number of query items (terms, phrases, etc.) | +| docproc.proctime | millisecond | Time spent processing document | +| docproc.documents | document | Number of processed documents | +| totalhits\_per\_query | hit\_per\_query | The total number of documents found to match queries | +| empty\_results | operation | Number of queries matching no documents | +| requestsOverQuota | operation | The number of requests rejected due to exceeding quota | +| relevance.at\_1 | score | The relevance of hit number 1 | +| relevance.at\_3 | score | The relevance of hit number 3 | +| relevance.at\_10 | score | The relevance of hit number 10 | +| error.timeout | operation | Requests that timed out | +| error.backends\_oos | operation | Requests that failed due to no available backends nodes | +| error.plugin\_failure | operation | Requests that failed due to plugin failure | +| error.backend\_communication\_error | operation | Requests that failed due to backend communication error | +| error.empty\_document\_summaries | operation | Requests that failed due to missing document summaries | +| error.illegal\_query | operation | Requests that failed due to illegal queries | +| error.invalid\_query\_parameter | operation | Requests that failed due to invalid query parameters | +| error.internal\_server\_error | operation | Requests that failed due to internal server error | +| error.misconfigured\_server | operation | Requests that failed due to misconfigured server | +| error.invalid\_query\_transformation | operation | Requests that failed due to invalid query transformation | +| error.results\_with\_errors | operation | The number of queries with error payload | +| error.unspecified | operation | Requests that failed for an unspecified reason | +| error.unhandled\_exception | operation | Requests that failed due to an unhandled exception | +| serverRejectedRequests | operation | Deprecated. Use jdisc.thread\_pool.rejected\_tasks instead. | +| serverThreadPoolSize | thread | Deprecated. Use jdisc.thread\_pool.size instead. | +| serverActiveThreads | thread | Deprecated. Use jdisc.thread\_pool.active\_threads instead. | +| jrt.transport.tls-certificate-verification-failures | failure | TLS certificate verification failures | +| jrt.transport.peer-authorization-failures | failure | TLS peer authorization failures | +| jrt.transport.server.tls-connections-established | connection | TLS server connections established | +| jrt.transport.client.tls-connections-established | connection | TLS client connections established | +| jrt.transport.server.unencrypted-connections-established | connection | Unencrypted server connections established | +| jrt.transport.client.unencrypted-connections-established | connection | Unencrypted client connections established | +| max\_query\_latency | millisecond | Deprecated. Use query\_latency.max instead | +| mean\_query\_latency | millisecond | Deprecated. Use the expression (query\_latency.sum / query\_latency.count) instead | +| jdisc.http.filter.athenz.accepted\_requests | request | Number of requests accepted by the AthenzAuthorization filter | +| jdisc.http.filter.athenz.rejected\_requests | request | Number of requests rejected by the AthenzAuthorization filter | +| jdisc.http.filter.athenz.grid\_requests | request | Number of grid requests | +| serverConnectionsOpenMax | connection | Maximum number of open connections | +| serverConnectionDurationMax | millisecond | Longest duration a connection is kept open | +| serverConnectionDurationMean | millisecond | Average duration a connection is kept open | +| serverConnectionDurationStdDev | millisecond | Standard deviation of open connection duration | +| serverNumRequests | request | Number of requests | +| serverNumSuccessfulResponses | request | Number of successful responses | +| serverNumFailedResponses | request | Number of failed responses | +| serverNumSuccessfulResponseWrites | request | Number of successful response writes | +| serverNumFailedResponseWrites | request | Number of failed response writes | +| serverStartedMillis | millisecond | Time since the service was started | +| embedder.latency | millisecond | Time spent creating an embedding | +| embedder.sequence\_length | item | Number of tokens in the input sequence | +| embedder.request.count | request | Number of embedder API requests | +| embedder.request.failure.count | request | Number of failed embedder API requests | +| embedder.batch.size | item | Number of items in each dispatched batch | +| embedder.batch.queue\_time | millisecond | Time spent waiting in queue before batch dispatch | +| embedder.batch.count | operation | Number of batch dispatches | +| inference.pending | item | Number of pending inference requests in a queue | +| inference.request.rate | operation\_per\_second | Successful inference requests per second | +| inference.failure.rate | operation\_per\_second | Failed inference requests per second | +| inference.request.latency | millisecond | Average inference request latency | +| inference.queue.latency | millisecond | Average inference queue latency | +| inference.compute.latency | millisecond | Average inference compute latency | +| inference.queue\_compute.ratio | ratio | Ratio of inference queue time to compute time | +| jvm.buffer.count | buffer | An estimate of the number of buffers in the pool | +| jvm.buffer.memory.used | byte | An estimate of the memory that the Java virtual machine is using for this buffer pool | +| jvm.buffer.total.capacity | byte | An estimate of the total capacity of the buffers in this pool | +| jvm.classes.loaded | class | The number of classes that are currently loaded in the Java virtual machine | +| jvm.classes.unloaded | class | The total number of classes unloaded since the Java virtual machine has started execution | +| jvm.gc.concurrent.phase.time | second | Time spent in concurrent phase | +| jvm.gc.live.data.size | byte | Size of long-lived heap memory pool after reclamation | +| jvm.gc.max.data.size | byte | Max size of long-lived heap memory pool | +| jvm.gc.memory.allocated | byte | Incremented for an increase in the size of the (young) heap memory pool after one GC to before the next | +| jvm.gc.memory.promoted | byte | Count of positive increases in the size of the old generation memory pool before GC to after GC | +| jvm.gc.overhead | percentage | An approximation of the percent of CPU time used by GC activities | +| jvm.gc.pause | second | Time spent in GC pause | +| jvm.memory.committed | byte | The amount of memory in bytes that is committed for the Java virtual machine to use | +| jvm.memory.max | byte | The maximum amount of memory in bytes that can be used for memory management | +| jvm.memory.usage.after.gc | percentage | The percentage of long-lived heap pool used after the last GC event | +| jvm.memory.used | byte | The amount of used memory | +| jvm.threads.daemon | thread | The current number of live daemon threads | +| jvm.threads.live | thread | The current number of live threads including both daemon and non-daemon threads | +| jvm.threads.peak | thread | The peak live thread count since the Java virtual machine started or peak was reset | +| jvm.threads.started | thread | The total number of application threads started in the JVM | +| jvm.threads.states | thread | The current number of threads (in each state) | \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/metrics/default-metric-set.mdx b/mintlify-docs/en/reference/operations/metrics/default-metric-set.mdx new file mode 100644 index 0000000000..6b1fb53344 --- /dev/null +++ b/mintlify-docs/en/reference/operations/metrics/default-metric-set.mdx @@ -0,0 +1,102 @@ +--- +title: "Default Metric Set" +sidebarTitle: "Metrics" +--- + +This document provides reference documentation for the Default metric set, including suffixes present per metric. If the suffix column contains "N/A" then the base name of the corresponding metric is used with no suffix. + +## ClusterController Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| cluster-controller.down.count | node | last, max | Number of content nodes down | +| cluster-controller.maintenance.count | node | last, max | Number of content nodes in maintenance | +| cluster-controller.up.count | node | last, max | Number of content nodes up | +| cluster-controller.is-master | binary | last, max | 1 if this cluster controller is currently the master, or 0 if not | +| cluster-controller.resource\_usage.nodes\_above\_limit | node | last, max | The number of content nodes above resource limit, blocking feed | +| cluster-controller.resource\_usage.max\_memory\_utilization | fraction | last, max | Current memory utilisation, for content node with the highest value | +| cluster-controller.resource\_usage.max\_disk\_utilization | fraction | last, max | Current disk space utilisation, for content node with the highest value | + +## Container Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| http.status.1xx | response | rate | Number of responses with a 1xx status | +| http.status.2xx | response | rate | Number of responses with a 2xx status | +| http.status.3xx | response | rate | Number of responses with a 3xx status | +| http.status.4xx | response | rate | Number of responses with a 4xx status | +| http.status.5xx | response | rate | Number of responses with a 5xx status | +| jdisc.gc.ms | millisecond | average, max | Time spent in JVM garbage collection | +| jdisc.thread\_pool.work\_queue.capacity | thread | max | Capacity of the task queue | +| jdisc.thread\_pool.work\_queue.size | thread | count, max, min, sum | Size of the task queue | +| jdisc.thread\_pool.size | thread | max | Size of the thread pool | +| jdisc.thread\_pool.active\_threads | thread | count, max, min, sum | Number of threads that are active | +| jdisc.application.failed\_component\_graphs | item | rate | JDISC Application failed component graphs | +| jdisc.singleton.is\_active | item | last, max | JDISC Singleton is active | +| jdisc.http.ssl.handshake.failure.missing\_client\_cert | operation | rate | JDISC HTTP SSL Handshake failures due to missing client certificate | +| jdisc.http.ssl.handshake.failure.incompatible\_protocols | operation | rate | JDISC HTTP SSL Handshake failures due to incompatible protocols | +| jdisc.http.ssl.handshake.failure.incompatible\_chifers | operation | rate | JDISC HTTP SSL Handshake failures due to incompatible chifers | +| jdisc.http.ssl.handshake.failure.unknown | operation | rate | JDISC HTTP SSL Handshake failures for unknown reason | +| jdisc.http.latency | millisecond | count, max, sum | Request latency including the HTTP layer | +| mem.heap.free | byte | average | Free heap memory | +| athenz-tenant-cert.expiry.seconds | second | last, max, min | Time remaining until Athenz tenant certificate expires | +| feed.operations | operation | rate | Number of document feed operations | +| feed.latency | millisecond | count, sum | Feed latency | +| queries | operation | rate | Query volume | +| query\_latency | millisecond | average, count, max, sum | The overall query latency as observed by the container cluster, excluding HTTP layer and rendering | +| failed\_queries | operation | rate | The number of failed queries | +| degraded\_queries | operation | rate | The number of degraded queries, e.g. due to some content nodes not responding in time | +| hits\_per\_query | hit\_per\_query | average, count, max, sum | The number of hits returned | +| docproc.documents | document | sum | Number of processed documents | +| totalhits\_per\_query | hit\_per\_query | average, count, max, sum | The total number of documents found to match queries | +| serverActiveThreads | thread | average | Deprecated. Use jdisc.thread\_pool.active\_threads instead. | + +## Distributor Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| vds.distributor.docsstored | document | average | Number of documents stored in all buckets controlled by this distributor | +| vds.bouncer.clock\_skew\_aborts | operation | count | Number of client operations that were aborted due to clock skew between sender and receiver exceeding acceptable range | + +## NodeAdmin Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| endpoint.certificate.expiry.seconds | second | N/A | Time until node endpoint certificate expires | +| node-certificate.expiry.seconds | second | N/A | Time until node certificate expires | + +## SearchNode Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| content.proton.documentdb.documents.total | document | last, max | The total number of documents in this documents db (ready + not-ready) | +| content.proton.documentdb.documents.ready | document | last, max | The number of ready documents in this document db | +| content.proton.documentdb.documents.active | document | last, max | The number of active / searchable documents in this document db | +| content.proton.documentdb.disk\_usage | byte | last | The total disk usage (in bytes) for this document db | +| content.proton.documentdb.memory\_usage.allocated\_bytes | byte | last | The number of allocated bytes | +| content.proton.search\_protocol.query.latency | second | average, count, max, sum | Query request latency (seconds) | +| content.proton.search\_protocol.docsum.latency | second | average, count, max, sum | Docsum request latency (seconds) | +| content.proton.search\_protocol.docsum.requested\_documents | document | rate | Total requested document summaries | +| content.proton.resource\_usage.disk | fraction | average | The relative amount of disk used by this content node (transient usage not included, value in the range \[0, 1\]). Same value as reported to the cluster controller | +| content.proton.resource\_usage.memory | fraction | average | The relative amount of memory used by this content node (transient usage not included, value in the range \[0, 1\]). Same value as reported to the cluster controller | +| content.proton.resource\_usage.feeding\_blocked | binary | last, max | Whether feeding is blocked due to resource limits being reached (value is either 0 or 1) | +| content.proton.transactionlog.disk\_usage | byte | last | The disk usage (in bytes) of the transaction log | +| content.proton.documentdb.matching.docs\_matched | document | rate | Number of documents matched | +| content.proton.documentdb.matching.docs\_reranked | document | rate | Number of documents re-ranked (second phase) | +| content.proton.documentdb.matching.rank\_profile.query\_latency | second | average, count, max, sum | Total average latency (sec) when matching and ranking a query | +| content.proton.documentdb.matching.rank\_profile.query\_setup\_time | second | average, count, max, sum | Average time (sec) spent setting up and tearing down queries | +| content.proton.documentdb.matching.rank\_profile.rerank\_time | second | average, count, max, sum | Average time (sec) spent on 2nd phase ranking | + +## Sentinel Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| sentinel.totalRestarts | restart | last, max, sum | Total number of service restarts done by the sentinel since the sentinel was started | + +## Storage Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| vds.filestor.allthreads.put.count | operation | rate | Number of requests processed. | +| vds.filestor.allthreads.remove.count | operation | rate | Number of requests processed. | +| vds.filestor.allthreads.update.count | request | rate | Number of requests processed. | \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/metrics/distributor.mdx b/mintlify-docs/en/reference/operations/metrics/distributor.mdx new file mode 100644 index 0000000000..6c43274241 --- /dev/null +++ b/mintlify-docs/en/reference/operations/metrics/distributor.mdx @@ -0,0 +1,228 @@ +--- +title: "Distributor Metrics" +sidebarTitle: "Distributor metrics" +--- + +| Name | Unit | Description | +| --- | --- | --- | +| vds.idealstate.buckets\_rechecking | bucket | The number of buckets that we are rechecking for ideal state operations | +| vds.idealstate.idealstate\_diff | bucket | A number representing the current difference from the ideal state. This is a number that decreases steadily as the system is getting closer to the ideal state | +| vds.idealstate.buckets\_toofewcopies | bucket | The number of buckets the distributor controls that have less than the desired redundancy | +| vds.idealstate.buckets\_toomanycopies | bucket | The number of buckets the distributor controls that have more than the desired redundancy | +| vds.idealstate.buckets | bucket | The number of buckets the distributor controls | +| vds.idealstate.buckets\_notrusted | bucket | The number of buckets that have no trusted copies. | +| vds.idealstate.bucket\_replicas\_moving\_out | bucket | Bucket replicas that should be moved out, e.g. retirement case or node added to cluster that has higher ideal state priority. | +| vds.idealstate.bucket\_replicas\_copying\_out | bucket | Bucket replicas that should be copied out, e.g. node is in ideal state but might have to provide data other nodes in a merge | +| vds.idealstate.bucket\_replicas\_copying\_in | bucket | Bucket replicas that should be copied in, e.g. node does not have a replica for a bucket that it is in ideal state for | +| vds.idealstate.bucket\_replicas\_syncing | bucket | Bucket replicas that need syncing due to mismatching metadata | +| vds.idealstate.max\_observed\_time\_since\_last\_gc\_sec | second | Maximum time (in seconds) since GC was last successfully run for a bucket. Aggregated max value across all buckets on the distributor. | +| vds.idealstate.delete\_bucket.done\_ok | operation | The number of operations successfully performed | +| vds.idealstate.delete\_bucket.done\_failed | operation | The number of operations that failed | +| vds.idealstate.delete\_bucket.pending | operation | The number of operations pending | +| vds.idealstate.delete\_bucket.blocked | operation | The number of operations blocked by blocking operation starter | +| vds.idealstate.delete\_bucket.throttled | operation | The number of operations throttled by throttling operation starter | +| vds.idealstate.merge\_bucket.done\_ok | operation | The number of operations successfully performed | +| vds.idealstate.merge\_bucket.done\_failed | operation | The number of operations that failed | +| vds.idealstate.merge\_bucket.pending | operation | The number of operations pending | +| vds.idealstate.merge\_bucket.blocked | operation | The number of operations blocked by blocking operation starter | +| vds.idealstate.merge\_bucket.throttled | operation | The number of operations throttled by throttling operation starter | +| vds.idealstate.merge\_bucket.source\_only\_copy\_changed | operation | The number of merge operations where source-only copy changed | +| vds.idealstate.merge\_bucket.source\_only\_copy\_delete\_blocked | operation | The number of merge operations where delete of unchanged source-only copies was blocked | +| vds.idealstate.merge\_bucket.source\_only\_copy\_delete\_failed | operation | The number of merge operations where delete of unchanged source-only copies failed | +| vds.idealstate.split\_bucket.done\_ok | operation | The number of operations successfully performed | +| vds.idealstate.split\_bucket.done\_failed | operation | The number of operations that failed | +| vds.idealstate.split\_bucket.pending | operation | The number of operations pending | +| vds.idealstate.split\_bucket.blocked | operation | The number of operations blocked by blocking operation starter | +| vds.idealstate.split\_bucket.throttled | operation | The number of operations throttled by throttling operation starter | +| vds.idealstate.join\_bucket.done\_ok | operation | The number of operations successfully performed | +| vds.idealstate.join\_bucket.done\_failed | operation | The number of operations that failed | +| vds.idealstate.join\_bucket.pending | operation | The number of operations pending | +| vds.idealstate.join\_bucket.blocked | operation | The number of operations blocked by blocking operation starter | +| vds.idealstate.join\_bucket.throttled | operation | The number of operations throttled by throttling operation starter | +| vds.idealstate.garbage\_collection.done\_ok | operation | The number of operations successfully performed | +| vds.idealstate.garbage\_collection.done\_failed | operation | The number of operations that failed | +| vds.idealstate.garbage\_collection.pending | operation | The number of operations pending | +| vds.idealstate.garbage\_collection.documents\_removed | document | Number of documents removed by GC operations | +| vds.idealstate.garbage\_collection.blocked | operation | The number of operations blocked by blocking operation starter | +| vds.idealstate.garbage\_collection.throttled | operation | The number of operations throttled by throttling operation starter | +| vds.distributor.puts.latency | millisecond | The latency of put operations | +| vds.distributor.puts.ok | operation | The number of successful put operations performed | +| vds.distributor.puts.failures.total | operation | Sum of all failures | +| vds.distributor.puts.failures.notfound | operation | The number of operations that failed because the document did not exist | +| vds.distributor.puts.failures.test\_and\_set\_failed | operation | The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document | +| vds.distributor.puts.failures.concurrent\_mutations | operation | The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID | +| vds.distributor.puts.failures.notconnected | operation | The number of operations discarded because there were no available storage nodes to send to | +| vds.distributor.puts.failures.notready | operation | The number of operations discarded because distributor was not ready | +| vds.distributor.puts.failures.wrongdistributor | operation | The number of operations discarded because they were sent to the wrong distributor | +| vds.distributor.puts.failures.safe\_time\_not\_reached | operation | The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed | +| vds.distributor.puts.failures.storagefailure | operation | The number of operations that failed in storage | +| vds.distributor.puts.failures.timeout | operation | The number of operations that failed because the operation timed out towards storage | +| vds.distributor.puts.failures.busy | operation | The number of messages from storage that failed because the storage node was busy | +| vds.distributor.puts.failures.inconsistent\_bucket | operation | The number of operations failed due to buckets being in an inconsistent state or not found | +| vds.distributor.removes.latency | millisecond | The latency of remove operations | +| vds.distributor.removes.ok | operation | The number of successful removes operations performed | +| vds.distributor.removes.failures.total | operation | Sum of all failures | +| vds.distributor.removes.failures.notfound | operation | The number of operations that failed because the document did not exist | +| vds.distributor.removes.failures.test\_and\_set\_failed | operation | The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document | +| vds.distributor.removes.failures.concurrent\_mutations | operation | The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID | +| vds.distributor.removes.failures.busy | operation | The number of messages from storage that failed because the storage node was busy | +| vds.distributor.removes.failures.inconsistent\_bucket | operation | The number of operations failed due to buckets being in an inconsistent state or not found | +| vds.distributor.removes.failures.notconnected | operation | The number of operations discarded because there were no available storage nodes to send to | +| vds.distributor.removes.failures.notready | operation | The number of operations discarded because distributor was not ready | +| vds.distributor.removes.failures.safe\_time\_not\_reached | operation | The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed | +| vds.distributor.removes.failures.storagefailure | operation | The number of operations that failed in storage | +| vds.distributor.removes.failures.timeout | operation | The number of operations that failed because the operation timed out towards storage | +| vds.distributor.removes.failures.wrongdistributor | operation | The number of operations discarded because they were sent to the wrong distributor | +| vds.distributor.updates.latency | millisecond | The latency of update operations | +| vds.distributor.updates.ok | operation | The number of successful updates operations performed | +| vds.distributor.updates.failures.total | operation | Sum of all failures | +| vds.distributor.updates.failures.notfound | operation | The number of operations that failed because the document did not exist | +| vds.distributor.updates.failures.test\_and\_set\_failed | operation | The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document | +| vds.distributor.updates.failures.concurrent\_mutations | operation | The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID | +| vds.distributor.updates.diverging\_timestamp\_updates | operation | Number of updates that report they were performed against divergent version timestamps on different replicas | +| vds.distributor.updates.failures.busy | operation | The number of messages from storage that failed because the storage node was busy | +| vds.distributor.updates.failures.inconsistent\_bucket | operation | The number of operations failed due to buckets being in an inconsistent state or not found | +| vds.distributor.updates.failures.notconnected | operation | The number of operations discarded because there were no available storage nodes to send to | +| vds.distributor.updates.failures.notready | operation | The number of operations discarded because distributor was not ready | +| vds.distributor.updates.failures.safe\_time\_not\_reached | operation | The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed | +| vds.distributor.updates.failures.storagefailure | operation | The number of operations that failed in storage | +| vds.distributor.updates.failures.timeout | operation | The number of operations that failed because the operation timed out towards storage | +| vds.distributor.updates.failures.wrongdistributor | operation | The number of operations discarded because they were sent to the wrong distributor | +| vds.distributor.updates.fast\_path\_restarts | operation | Number of safe path (write repair) updates that were restarted as fast path updates because all replicas returned documents with the same timestamp in the initial read phase | +| vds.distributor.removelocations.ok | operation | The number of successful removelocations operations performed | +| vds.distributor.removelocations.failures.total | operation | Sum of all failures | +| vds.distributor.removelocations.failures.busy | operation | The number of messages from storage that failed because the storage node was busy | +| vds.distributor.removelocations.failures.concurrent\_mutations | operation | The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID | +| vds.distributor.removelocations.failures.inconsistent\_bucket | operation | The number of operations failed due to buckets being in an inconsistent state or not found | +| vds.distributor.removelocations.failures.notconnected | operation | The number of operations discarded because there were no available storage nodes to send to | +| vds.distributor.removelocations.failures.notfound | operation | The number of operations that failed because the document did not exist | +| vds.distributor.removelocations.failures.notready | operation | The number of operations discarded because distributor was not ready | +| vds.distributor.removelocations.failures.safe\_time\_not\_reached | operation | The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed | +| vds.distributor.removelocations.failures.storagefailure | operation | The number of operations that failed in storage | +| vds.distributor.removelocations.failures.test\_and\_set\_failed | operation | The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document | +| vds.distributor.removelocations.failures.timeout | operation | The number of operations that failed because the operation timed out towards storage | +| vds.distributor.removelocations.failures.wrongdistributor | operation | The number of operations discarded because they were sent to the wrong distributor | +| vds.distributor.removelocations.latency | millisecond | The average latency of removelocations operations | +| vds.distributor.gets.latency | millisecond | The average latency of gets operations | +| vds.distributor.gets.ok | operation | The number of successful gets operations performed | +| vds.distributor.gets.failures.total | operation | Sum of all failures | +| vds.distributor.gets.failures.notfound | operation | The number of operations that failed because the document did not exist | +| vds.distributor.gets.failures.busy | operation | The number of messages from storage that failed because the storage node was busy | +| vds.distributor.gets.failures.concurrent\_mutations | operation | The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID | +| vds.distributor.gets.failures.inconsistent\_bucket | operation | The number of operations failed due to buckets being in an inconsistent state or not found | +| vds.distributor.gets.failures.notconnected | operation | The number of operations discarded because there were no available storage nodes to send to | +| vds.distributor.gets.failures.notready | operation | The number of operations discarded because distributor was not ready | +| vds.distributor.gets.failures.safe\_time\_not\_reached | operation | The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed | +| vds.distributor.gets.failures.storagefailure | operation | The number of operations that failed in storage | +| vds.distributor.gets.failures.test\_and\_set\_failed | operation | The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document | +| vds.distributor.gets.failures.timeout | operation | The number of operations that failed because the operation timed out towards storage | +| vds.distributor.gets.failures.wrongdistributor | operation | The number of operations discarded because they were sent to the wrong distributor | +| vds.distributor.visitor.latency | millisecond | The average latency of visitor operations | +| vds.distributor.visitor.ok | operation | The number of successful visitor operations performed | +| vds.distributor.visitor.failures.total | operation | Sum of all failures | +| vds.distributor.visitor.failures.notready | operation | The number of operations discarded because distributor was not ready | +| vds.distributor.visitor.failures.notconnected | operation | The number of operations discarded because there were no available storage nodes to send to | +| vds.distributor.visitor.failures.wrongdistributor | operation | The number of operations discarded because they were sent to the wrong distributor | +| vds.distributor.visitor.failures.safe\_time\_not\_reached | operation | The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed | +| vds.distributor.visitor.failures.storagefailure | operation | The number of operations that failed in storage | +| vds.distributor.visitor.failures.timeout | operation | The number of operations that failed because the operation timed out towards storage | +| vds.distributor.visitor.failures.busy | operation | The number of messages from storage that failed because the storage node was busy | +| vds.distributor.visitor.failures.inconsistent\_bucket | operation | The number of operations failed due to buckets being in an inconsistent state or not found | +| vds.distributor.visitor.failures.notfound | operation | The number of operations that failed because the document did not exist | +| vds.distributor.visitor.bytes\_per\_visitor | operation | The number of bytes visited on content nodes as part of a single client visitor command | +| vds.distributor.visitor.docs\_per\_visitor | operation | The number of documents visited on content nodes as part of a single client visitor command | +| vds.distributor.visitor.failures.concurrent\_mutations | operation | The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID | +| vds.distributor.visitor.failures.test\_and\_set\_failed | operation | The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document | +| vds.distributor.docsstored | document | Number of documents stored in all buckets controlled by this distributor | +| vds.distributor.bytesstored | byte | Number of bytes stored in all buckets controlled by this distributor | +| metricmanager.periodichooklatency | millisecond | Time in ms used to update a single periodic hook | +| metricmanager.resetlatency | millisecond | Time in ms used to reset all metrics. | +| metricmanager.sleeptime | millisecond | Time in ms worker thread is sleeping | +| metricmanager.snapshothooklatency | millisecond | Time in ms used to update a single snapshot hook | +| metricmanager.snapshotlatency | millisecond | Time in ms used to take a snapshot | +| vds.distributor.activate\_cluster\_state\_processing\_time | millisecond | Elapsed time where the distributor thread is blocked on merging pending bucket info into its bucket database upon activating a cluster state | +| vds.distributor.bucket\_db.memory\_usage.allocated\_bytes | byte | The number of allocated bytes | +| vds.distributor.bucket\_db.memory\_usage.dead\_bytes | byte | The number of dead bytes (`<=` used\_bytes) | +| vds.distributor.bucket\_db.memory\_usage.onhold\_bytes | byte | The number of bytes on hold | +| vds.distributor.bucket\_db.memory\_usage.used\_bytes | byte | The number of used bytes (`<=` allocated\_bytes) | +| vds.distributor.getbucketlists.failures.busy | operation | The number of messages from storage that failed because the storage node was busy | +| vds.distributor.getbucketlists.failures.concurrent\_mutations | operation | The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID | +| vds.distributor.getbucketlists.failures.inconsistent\_bucket | operation | The number of operations failed due to buckets being in an inconsistent state or not found | +| vds.distributor.getbucketlists.failures.notconnected | operation | The number of operations discarded because there were no available storage nodes to send to | +| vds.distributor.getbucketlists.failures.notfound | operation | The number of operations that failed because the document did not exist | +| vds.distributor.getbucketlists.failures.notready | operation | The number of operations discarded because distributor was not ready | +| vds.distributor.getbucketlists.failures.safe\_time\_not\_reached | operation | The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed | +| vds.distributor.getbucketlists.failures.storagefailure | operation | The number of operations that failed in storage | +| vds.distributor.getbucketlists.failures.test\_and\_set\_failed | operation | The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document | +| vds.distributor.getbucketlists.failures.timeout | operation | The number of operations that failed because the operation timed out towards storage | +| vds.distributor.getbucketlists.failures.total | operation | Total number of failures | +| vds.distributor.getbucketlists.failures.wrongdistributor | operation | The number of operations discarded because they were sent to the wrong distributor | +| vds.distributor.getbucketlists.latency | millisecond | The average latency of getbucketlists operations | +| vds.distributor.getbucketlists.ok | operation | The number of successful getbucketlists operations performed | +| vds.distributor.recoverymodeschedulingtime | millisecond | Time spent scheduling operations in recovery mode after receiving new cluster state | +| vds.distributor.set\_cluster\_state\_processing\_time | millisecond | Elapsed time where the distributor thread is blocked on processing its bucket database upon receiving a new cluster state | +| vds.distributor.state\_transition\_time | millisecond | Time it takes to complete a cluster state transition. If a state transition is preempted before completing, its elapsed time is counted as part of the total time spent for the final, completed state transition | +| vds.distributor.stats.failures.busy | operation | The number of messages from storage that failed because the storage node was busy | +| vds.distributor.stats.failures.concurrent\_mutations | operation | The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID | +| vds.distributor.stats.failures.inconsistent\_bucket | operation | The number of operations failed due to buckets being in an inconsistent state or not found | +| vds.distributor.stats.failures.notconnected | operation | The number of operations discarded because there were no available storage nodes to send to | +| vds.distributor.stats.failures.notfound | operation | The number of operations that failed because the document did not exist | +| vds.distributor.stats.failures.notready | operation | The number of operations discarded because distributor was not ready | +| vds.distributor.stats.failures.safe\_time\_not\_reached | operation | The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed | +| vds.distributor.stats.failures.storagefailure | operation | The number of operations that failed in storage | +| vds.distributor.stats.failures.test\_and\_set\_failed | operation | The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document | +| vds.distributor.stats.failures.timeout | operation | The number of operations that failed because the operation timed out towards storage | +| vds.distributor.stats.failures.total | operation | The total number of failures | +| vds.distributor.stats.failures.wrongdistributor | operation | The number of operations discarded because they were sent to the wrong distributor | +| vds.distributor.stats.latency | millisecond | The average latency of stats operations | +| vds.distributor.stats.ok | operation | The number of successful stats operations performed | +| vds.distributor.update\_gets.failures.busy | operation | The number of messages from storage that failed because the storage node was busy | +| vds.distributor.update\_gets.failures.concurrent\_mutations | operation | The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID | +| vds.distributor.update\_gets.failures.inconsistent\_bucket | operation | The number of operations failed due to buckets being in an inconsistent state or not found | +| vds.distributor.update\_gets.failures.notconnected | operation | The number of operations discarded because there were no available storage nodes to send to | +| vds.distributor.update\_gets.failures.notfound | operation | The number of operations that failed because the document did not exist | +| vds.distributor.update\_gets.failures.notready | operation | The number of operations discarded because distributor was not ready | +| vds.distributor.update\_gets.failures.safe\_time\_not\_reached | operation | The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed | +| vds.distributor.update\_gets.failures.storagefailure | operation | The number of operations that failed in storage | +| vds.distributor.update\_gets.failures.test\_and\_set\_failed | operation | The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document | +| vds.distributor.update\_gets.failures.timeout | operation | The number of operations that failed because the operation timed out towards storage | +| vds.distributor.update\_gets.failures.total | operation | The total number of failures | +| vds.distributor.update\_gets.failures.wrongdistributor | operation | The number of operations discarded because they were sent to the wrong distributor | +| vds.distributor.update\_gets.latency | millisecond | The average latency of update\_gets operations | +| vds.distributor.update\_gets.ok | operation | The number of successful update\_gets operations performed | +| vds.distributor.update\_metadata\_gets.failures.busy | operation | The number of messages from storage that failed because the storage node was busy | +| vds.distributor.update\_metadata\_gets.failures.concurrent\_mutations | operation | The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID | +| vds.distributor.update\_metadata\_gets.failures.inconsistent\_bucket | operation | The number of operations failed due to buckets being in an inconsistent state or not found | +| vds.distributor.update\_metadata\_gets.failures.notconnected | operation | The number of operations discarded because there were no available storage nodes to send to | +| vds.distributor.update\_metadata\_gets.failures.notfound | operation | The number of operations that failed because the document did not exist | +| vds.distributor.update\_metadata\_gets.failures.notready | operation | The number of operations discarded because distributor was not ready | +| vds.distributor.update\_metadata\_gets.failures.safe\_time\_not\_reached | operation | The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed | +| vds.distributor.update\_metadata\_gets.failures.storagefailure | operation | The number of operations that failed in storage | +| vds.distributor.update\_metadata\_gets.failures.test\_and\_set\_failed | operation | The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document | +| vds.distributor.update\_metadata\_gets.failures.timeout | operation | The number of operations that failed because the operation timed out towards storage | +| vds.distributor.update\_metadata\_gets.failures.total | operation | The total number of failures | +| vds.distributor.update\_metadata\_gets.failures.wrongdistributor | operation | The number of operations discarded because they were sent to the wrong distributor | +| vds.distributor.update\_metadata\_gets.latency | millisecond | The average latency of update\_metadata\_gets operations | +| vds.distributor.update\_metadata\_gets.ok | operation | The number of successful update\_metadata\_gets operations performed | +| vds.distributor.update\_puts.failures.busy | operation | The number of messages from storage that failed because the storage node was busy | +| vds.distributor.update\_puts.failures.concurrent\_mutations | operation | The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID | +| vds.distributor.update\_puts.failures.inconsistent\_bucket | operation | The number of operations failed due to buckets being in an inconsistent state or not found | +| vds.distributor.update\_puts.failures.notconnected | operation | The number of operations discarded because there were no available storage nodes to send to | +| vds.distributor.update\_puts.failures.notfound | operation | The number of operations that failed because the document did not exist | +| vds.distributor.update\_puts.failures.notready | operation | The number of operations discarded because distributor was not ready | +| vds.distributor.update\_puts.failures.safe\_time\_not\_reached | operation | The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed | +| vds.distributor.update\_puts.failures.storagefailure | operation | The number of operations that failed in storage | +| vds.distributor.update\_puts.failures.test\_and\_set\_failed | operation | The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document | +| vds.distributor.update\_puts.failures.timeout | operation | The number of operations that failed because the operation timed out towards storage | +| vds.distributor.update\_puts.failures.total | operation | The total number of put failures | +| vds.distributor.update\_puts.failures.wrongdistributor | operation | The number of operations discarded because they were sent to the wrong distributor | +| vds.distributor.update\_puts.latency | millisecond | The average latency of update\_puts operations | +| vds.distributor.update\_puts.ok | operation | The number of successful update\_puts operations performed | +| vds.distributor.mutating\_op\_memory\_usage | byte | Estimated amount of memory used by active mutating operations across all distributor stripes, in bytes | +| vds.idealstate.nodes\_per\_merge | node | The number of nodes involved in a single merge operation. | +| vds.idealstate.set\_bucket\_state.blocked | operation | The number of operations blocked by blocking operation starter | +| vds.idealstate.set\_bucket\_state.done\_failed | operation | The number of operations that failed | +| vds.idealstate.set\_bucket\_state.done\_ok | operation | The number of operations successfully performed | +| vds.idealstate.set\_bucket\_state.pending | operation | The number of operations pending | +| vds.idealstate.set\_bucket\_state.throttled | operation | The number of operations throttled by throttling operation starter | +| vds.bouncer.clock\_skew\_aborts | operation | Number of client operations that were aborted due to clock skew between sender and receiver exceeding acceptable range | \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/metrics/logd.mdx b/mintlify-docs/en/reference/operations/metrics/logd.mdx new file mode 100644 index 0000000000..0d44881d57 --- /dev/null +++ b/mintlify-docs/en/reference/operations/metrics/logd.mdx @@ -0,0 +1,8 @@ +--- +title: "Logd Metrics" +sidebarTitle: "Logd metrics" +--- + +| Name | Unit | Description | +| --- | --- | --- | +| logd.processed.lines | item | Number of log lines processed | \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/metrics/metric-units.mdx b/mintlify-docs/en/reference/operations/metrics/metric-units.mdx new file mode 100644 index 0000000000..44d87ff7c9 --- /dev/null +++ b/mintlify-docs/en/reference/operations/metrics/metric-units.mdx @@ -0,0 +1,57 @@ +--- +title: "Metric Units Reference" +sidebarTitle: "Metric units" +--- + +| Unit | Description | +| --- | --- | +| binary | Zero or one. Zero typically indicate "false" while one indicate "true" | +| bucket | A chunk of documents managed by a distributor service | +| buffer | A buffer | +| byte | A collection of 8 bits | +| byte/second | A unit of storage capable of holding 8 bits | +| class | A instance of a Java class | +| connection | A link used for communication between a client and a server | +| context switch | A context switch | +| deployment | A deployment on hosted Vespa | +| distance | A number describing the distance of two tensors | +| document | Vespa document, a collection of fields defined in a schema file | +| documentid | A unique document identifier | +| dollar | US dollar | +| dollar/hour | Total current cost of the cluster in $/hr | +| failure | Failures, typically for requests, operations or nodes | +| file | Data file stored on the disk on a node | +| fraction | A value in the range \[0..1\]. Higher values can occur for some metrics, but would indicate the value is outside the allowed range. | +| ratio | A dimensionless ratio between two values. | +| generation | Typically, generation of configuration or application package | +| gigabyte | One billion bytes | +| graph node | A node in a graph | +| hit | Document that meets the filtering/restriction criteria specified by a given query | +| hit/query | Number of hits per query over a period of time | +| host | Bare metal computer that contain nodes | +| instance | Typically, tenant or application | +| item | Object or unit maintained in e.g. a queue | +| millisecond | Millisecond, 1/1000 of a second | +| nanosecond | Nanosecond, 1/1000.000.000 of a second | +| node | (Virtual) computer that is part of a Vespa cluster | +| packet | Collection of data transmitted over the network as a single unit | +| operation | A clearly defined task | +| operation/second | Number of operations per second | +| percentage | A number expressed as a fraction of 100, normally in the range \[0..100\]. | +| query | A request for matching, grouping and/or scoring documents stored in Vespa | +| query/second | Number of queries per second. | +| record | A collection of information, typically a set of key/value, e.g. stored in a transaction log | +| request | A request sent from a client to a server | +| response | A response from a server to a client, typically as a response to a request | +| restart | A service or node restarts | +| routing rotation | Routing rotation | +| score | Relevance score for a document | +| second | Time span of 1 second | +| seconds since epoch | Seconds since Unix Epoch | +| session | A set of operations taking place during one connection or as part of a higher level operation | +| task | Piece of work executed by a server, e.g. to perform back-ground data maintenance | +| tenant | Tenant that owns zero or more applications in a managed Vespa system | +| thread | Computer thread for executing e.g. tasks, operations or queries | +| vcpu | Virtual CPU | +| version | Software or config version | +| wakeup | Computer thread wake-ups for doing some work | \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/metrics/metrics.mdx b/mintlify-docs/en/reference/operations/metrics/metrics.mdx new file mode 100644 index 0000000000..f8a3230d02 --- /dev/null +++ b/mintlify-docs/en/reference/operations/metrics/metrics.mdx @@ -0,0 +1,48 @@ +--- +title: "Metrics Reference" +--- + +## Metrics reference documentation + +### Metric types + +Each metric have one of the types documented [here](/en/reference/operations/metrics/metric-units). + +### Metric aggregator suffixes + +Metrics are collected over a time period so a metric reading must aggregate individual samples over this period. These are specified by adding the aggregator suffix to the metric name: <code>metric.name.aggregator</code>. + +The following aggregators are available: + +| Aggregator name (metric suffix) | Explanation | +| :--- | :--- | +| 95percentile | The 95 percentile of samples in the period | +| 99percentile | The 99 percentile of samples in the period | +| average | The average of samples in the period | +| count | The count of samples in the period | +| last | The last value sampled in the period | +| max | The max value sampled in the period | +| min | The min value sampled in the period | +| rate | The count of samples divided by the length of the period in seconds | +| sum | The sum of the sampled values in the period | + +### Metric sets defined in Vespa + +A metric set is a collection of metrics which can be referenced for convenience. +The following metric sets are defined in Vespa. + +<CardGroup> + <Card title="Vespa Metric Set Reference" icon="bar-chart" href="/en/reference/operations/metrics/vespa-metric-set" horizontal /> + <Card title="Default Metric Set Reference" icon="list" href="/en/reference/operations/metrics/default-metric-set" horizontal /> + <Card title="Metric Units Reference" icon="ruler" href="/en/reference/operations/metrics/metric-units" horizontal /> + <Card title="Container Metrics Reference" icon="box" href="/en/reference/operations/metrics/container" horizontal /> + <Card title="Distributor Metrics Reference" icon="code-branch" href="/en/reference/operations/metrics/distributor" horizontal /> + <Card title="Searchnode Metrics Reference" icon="search" href="/en/reference/operations/metrics/searchnode" horizontal /> + <Card title="Storage Metrics Reference" icon="database" href="/en/reference/operations/metrics/storage" horizontal /> + <Card title="Configserver Metrics Reference" icon="gear" href="/en/reference/operations/metrics/configserver" horizontal /> + <Card title="Logd Metrics Reference" icon="file-text" href="/en/reference/operations/metrics/logd" horizontal /> + <Card title="Node Admin Metrics Reference" icon="server" href="/en/reference/operations/metrics/nodeadmin" horizontal /> + <Card title="Slobrok Metrics Reference" icon="chart-bar" href="/en/reference/operations/metrics/slobrok" horizontal /> + <Card title="Clustercontroller Metrics Reference" icon="layer-group" href="/en/reference/operations/metrics/clustercontroller" horizontal /> + <Card title="Sentinel Metrics Reference" icon="shield" href="/en/reference/operations/metrics/sentinel" horizontal /> +</CardGroup> diff --git a/mintlify-docs/en/reference/operations/metrics/nodeadmin.mdx b/mintlify-docs/en/reference/operations/metrics/nodeadmin.mdx new file mode 100644 index 0000000000..c1332f564a --- /dev/null +++ b/mintlify-docs/en/reference/operations/metrics/nodeadmin.mdx @@ -0,0 +1,9 @@ +--- +title: "NodeAdmin Metrics" +sidebarTitle: "Node Admin metrics" +--- + +| Name | Unit | Description | +| --- | --- | --- | +| endpoint.certificate.expiry.seconds | second | Time until node endpoint certificate expires | +| node-certificate.expiry.seconds | second | Time until node certificate expires | \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/metrics/searchnode.mdx b/mintlify-docs/en/reference/operations/metrics/searchnode.mdx new file mode 100644 index 0000000000..8a1a98d66c --- /dev/null +++ b/mintlify-docs/en/reference/operations/metrics/searchnode.mdx @@ -0,0 +1,263 @@ +--- +title: "SearchNode Metrics" +sidebarTitle: "Search node metrics" +--- + +| Name | Unit | Description | +| --- | --- | --- | +| content.proton.config.generation | version | The oldest config generation used by this search node | +| content.proton.documentdb.documents.total | document | The total number of documents in this documents db (ready + not-ready) | +| content.proton.documentdb.documents.ready | document | The number of ready documents in this document db | +| content.proton.documentdb.documents.active | document | The number of active / searchable documents in this document db | +| content.proton.documentdb.documents.removed | document | The number of removed documents in this document db | +| content.proton.documentdb.index.docs\_in\_memory | document | Number of documents in memory index | +| content.proton.documentdb.disk\_usage | byte | The total disk usage (in bytes) for this document db | +| content.proton.documentdb.memory\_usage.allocated\_bytes | byte | The number of allocated bytes | +| content.proton.documentdb.memory\_usage.dead\_bytes | byte | The number of dead bytes (`<=` used\_bytes) | +| content.proton.documentdb.memory\_usage.onhold\_bytes | byte | The number of bytes on hold | +| content.proton.documentdb.memory\_usage.used\_bytes | byte | The number of used bytes (`<=` allocated\_bytes) | +| content.proton.documentdb.heart\_beat\_age | second | How long ago (in seconds) heart beat maintenance job was run | +| content.proton.docsum.count | request | Docsum requests handled | +| content.proton.docsum.docs | document | Total docsums returned | +| content.proton.docsum.latency | millisecond | Docsum request latency | +| content.proton.search\_protocol.query.latency | second | Query request latency (seconds) | +| content.proton.search\_protocol.query.request\_size | byte | Query request size (network bytes) | +| content.proton.search\_protocol.query.reply\_size | byte | Query reply size (network bytes) | +| content.proton.search\_protocol.docsum.latency | second | Docsum request latency (seconds) | +| content.proton.search\_protocol.docsum.request\_size | byte | Docsum request size (network bytes) | +| content.proton.search\_protocol.docsum.reply\_size | byte | Docsum reply size (network bytes) | +| content.proton.search\_protocol.docsum.requested\_documents | document | Total requested document summaries | +| content.proton.executor.proton.queuesize | task | Size of executor proton task queue | +| content.proton.executor.proton.accepted | task | Number of executor proton accepted tasks | +| content.proton.executor.proton.wakeups | wakeup | Number of times an executor proton worker thread has been woken up | +| content.proton.executor.proton.utilization | fraction | Ratio of time the executor proton worker threads has been active | +| content.proton.executor.proton.rejected | task | Number of rejected tasks | +| content.proton.executor.flush.queuesize | task | Size of executor flush task queue | +| content.proton.executor.flush.accepted | task | Number of accepted executor flush tasks | +| content.proton.executor.flush.wakeups | wakeup | Number of times an executor flush worker thread has been woken up | +| content.proton.executor.flush.utilization | fraction | Ratio of time the executor flush worker threads has been active | +| content.proton.executor.flush.rejected | task | Number of rejected tasks | +| content.proton.executor.match.queuesize | task | Size of executor match task queue | +| content.proton.executor.match.accepted | task | Number of accepted executor match tasks | +| content.proton.executor.match.wakeups | wakeup | Number of times an executor match worker thread has been woken up | +| content.proton.executor.match.utilization | fraction | Ratio of time the executor match worker threads has been active | +| content.proton.executor.match.rejected | task | Number of rejected tasks | +| content.proton.executor.docsum.queuesize | task | Size of executor docsum task queue | +| content.proton.executor.docsum.accepted | task | Number of executor accepted docsum tasks | +| content.proton.executor.docsum.wakeups | wakeup | Number of times an executor docsum worker thread has been woken up | +| content.proton.executor.docsum.utilization | fraction | Ratio of time the executor docsum worker threads has been active | +| content.proton.executor.docsum.rejected | task | Number of rejected tasks | +| content.proton.executor.shared.queuesize | task | Size of executor shared task queue | +| content.proton.executor.shared.accepted | task | Number of executor shared accepted tasks | +| content.proton.executor.shared.wakeups | wakeup | Number of times an executor shared worker thread has been woken up | +| content.proton.executor.shared.utilization | fraction | Ratio of time the executor shared worker threads has been active | +| content.proton.executor.shared.rejected | task | Number of rejected tasks | +| content.proton.executor.warmup.queuesize | task | Size of executor warmup task queue | +| content.proton.executor.warmup.accepted | task | Number of accepted executor warmup tasks | +| content.proton.executor.warmup.wakeups | wakeup | Number of times a warmup executor worker thread has been woken up | +| content.proton.executor.warmup.utilization | fraction | Ratio of time the executor warmup worker threads has been active | +| content.proton.executor.warmup.rejected | task | Number of rejected tasks | +| content.proton.executor.field\_writer.queuesize | task | Size of executor field writer task queue | +| content.proton.executor.field\_writer.accepted | task | Number of accepted executor field writer tasks | +| content.proton.executor.field\_writer.wakeups | wakeup | Number of times an executor field writer worker thread has been woken up | +| content.proton.executor.field\_writer.utilization | fraction | Ratio of time the executor fieldwriter worker threads has been active | +| content.proton.executor.field\_writer.saturation | fraction | Ratio indicating the max saturation of underlying worker threads. A higher saturation than utilization indicates a bottleneck in one of the worker threads. | +| content.proton.executor.field\_writer.rejected | task | Number of rejected tasks | +| content.proton.documentdb.job.total | fraction | The job load average total of all job metrics | +| content.proton.documentdb.job.attribute\_flush | fraction | Flushing of attribute vector(s) to disk | +| content.proton.documentdb.job.memory\_index\_flush | fraction | Flushing of memory index to disk | +| content.proton.documentdb.job.disk\_index\_fusion | fraction | Fusion of disk indexes | +| content.proton.documentdb.job.document\_store\_flush | fraction | Flushing of document store to disk | +| content.proton.documentdb.job.document\_store\_compact | fraction | Compaction of document store on disk | +| content.proton.documentdb.job.bucket\_move | fraction | Moving of buckets between 'ready' and 'notready' sub databases | +| content.proton.documentdb.job.lid\_space\_compact | fraction | Compaction of lid space in document meta store and attribute vectors | +| content.proton.documentdb.job.removed\_documents\_prune | fraction | Pruning of removed documents in 'removed' sub database | +| content.proton.documentdb.threading\_service.master.queuesize | task | Size of threading service master task queue | +| content.proton.documentdb.threading\_service.master.accepted | task | Number of accepted threading service master tasks | +| content.proton.documentdb.threading\_service.master.wakeups | wakeup | Number of times a threading service master worker thread has been woken up | +| content.proton.documentdb.threading\_service.master.utilization | fraction | Ratio of time the threading service master worker threads has been active | +| content.proton.documentdb.threading\_service.master.rejected | task | Number of rejected tasks | +| content.proton.documentdb.threading\_service.index.queuesize | task | Size of threading service index task queue | +| content.proton.documentdb.threading\_service.index.accepted | task | Number of accepted threading service index tasks | +| content.proton.documentdb.threading\_service.index.wakeups | wakeup | Number of times a threading service index worker thread has been woken up | +| content.proton.documentdb.threading\_service.index.utilization | fraction | Ratio of time the threading service index worker threads has been active | +| content.proton.documentdb.threading\_service.index.rejected | task | Number of rejected tasks | +| content.proton.documentdb.threading\_service.summary.queuesize | task | Size of threading service summary task queue | +| content.proton.documentdb.threading\_service.summary.accepted | task | Number of accepted threading service summary tasks | +| content.proton.documentdb.threading\_service.summary.wakeups | wakeup | Number of times a threading service summary worker thread has been woken up | +| content.proton.documentdb.threading\_service.summary.utilization | fraction | Ratio of time the threading service summary worker threads has been active | +| content.proton.documentdb.threading\_service.summary.rejected | task | Number of rejected tasks | +| content.proton.documentdb.threading\_service.attribute\_field\_writer.accepted | task | Number of accepted tasks | +| content.proton.documentdb.threading\_service.attribute\_field\_writer.queuesize | task | Size of task queue | +| content.proton.documentdb.threading\_service.attribute\_field\_writer.rejected | task | Number of rejected tasks | +| content.proton.documentdb.threading\_service.attribute\_field\_writer.utilization | fraction | Ratio of time the worker threads has been active | +| content.proton.documentdb.threading\_service.attribute\_field\_writer.wakeups | wakeup | Number of times a worker thread has been woken up | +| content.proton.documentdb.threading\_service.index\_field\_inverter.accepted | task | Number of accepted tasks | +| content.proton.documentdb.threading\_service.index\_field\_inverter.queuesize | task | Size of task queue | +| content.proton.documentdb.threading\_service.index\_field\_inverter.rejected | task | Number of rejected tasks | +| content.proton.documentdb.threading\_service.index\_field\_inverter.utilization | fraction | Ratio of time the worker threads has been active | +| content.proton.documentdb.threading\_service.index\_field\_inverter.wakeups | wakeup | Number of times a worker thread has been woken up | +| content.proton.documentdb.threading\_service.index\_field\_writer.accepted | task | Number of accepted tasks | +| content.proton.documentdb.threading\_service.index\_field\_writer.queuesize | task | Size of task queue | +| content.proton.documentdb.threading\_service.index\_field\_writer.rejected | task | Number of rejected tasks | +| content.proton.documentdb.threading\_service.index\_field\_writer.utilization | fraction | Ratio of time the worker threads has been active | +| content.proton.documentdb.threading\_service.index\_field\_writer.wakeups | wakeup | Number of times a worker thread has been woken up | +| content.proton.documentdb.ready.lid\_space.lid\_bloat\_factor | fraction | The bloat factor of this lid space, indicating the total amount of holes in the allocated lid space ((lid\_limit - used\_lids) / lid\_limit) | +| content.proton.documentdb.ready.lid\_space.lid\_fragmentation\_factor | fraction | The fragmentation factor of this lid space, indicating the amount of holes in the currently used part of the lid space ((highest\_used\_lid - used\_lids) / highest\_used\_lid) | +| content.proton.documentdb.ready.lid\_space.lid\_limit | documentid | The size of the allocated lid space | +| content.proton.documentdb.ready.lid\_space.highest\_used\_lid | documentid | The highest used lid | +| content.proton.documentdb.ready.lid\_space.used\_lids | documentid | The number of lids used | +| content.proton.documentdb.ready.lid\_space.lowest\_free\_lid | documentid | The lowest free local document id | +| content.proton.documentdb.notready.lid\_space.lid\_bloat\_factor | fraction | The bloat factor of this lid space, indicating the total amount of holes in the allocated lid space ((lid\_limit - used\_lids) / lid\_limit) | +| content.proton.documentdb.notready.lid\_space.lid\_fragmentation\_factor | fraction | The fragmentation factor of this lid space, indicating the amount of holes in the currently used part of the lid space ((highest\_used\_lid - used\_lids) / highest\_used\_lid) | +| content.proton.documentdb.notready.lid\_space.lid\_limit | documentid | The size of the allocated lid space | +| content.proton.documentdb.notready.lid\_space.highest\_used\_lid | documentid | The highest used lid | +| content.proton.documentdb.notready.lid\_space.used\_lids | documentid | The number of lids used | +| content.proton.documentdb.notready.lid\_space.lowest\_free\_lid | documentid | The lowest free local document id | +| content.proton.documentdb.removed.lid\_space.lid\_bloat\_factor | fraction | The bloat factor of this lid space, indicating the total amount of holes in the allocated lid space ((lid\_limit - used\_lids) / lid\_limit) | +| content.proton.documentdb.removed.lid\_space.lid\_fragmentation\_factor | fraction | The fragmentation factor of this lid space, indicating the amount of holes in the currently used part of the lid space ((highest\_used\_lid - used\_lids) / highest\_used\_lid) | +| content.proton.documentdb.removed.lid\_space.lid\_limit | documentid | The size of the allocated lid space | +| content.proton.documentdb.removed.lid\_space.highest\_used\_lid | documentid | The highest used lid | +| content.proton.documentdb.removed.lid\_space.used\_lids | documentid | The number of lids used | +| content.proton.documentdb.removed.lid\_space.lowest\_free\_lid | documentid | The lowest free local document id | +| content.proton.documentdb.bucket\_move.buckets\_pending | bucket | The number of buckets left to move | +| content.proton.resource\_usage.disk | fraction | The relative amount of disk used by this content node (transient usage not included, value in the range \[0, 1\]). Same value as reported to the cluster controller | +| content.proton.resource\_usage.disk\_usage.total | fraction | The total relative amount of disk used by this content node (value in the range \[0, 1\]) | +| content.proton.resource\_usage.disk\_usage.total\_utilization | fraction | The relative amount of disk used compared to the content node disk resource limit | +| content.proton.resource\_usage.disk\_usage.transient | fraction | The relative amount of transient disk used by this content node (value in the range \[0, 1\]) | +| content.proton.resource\_usage.disk\_usage.reserved | fraction | The relative amount of reserved disk space for this content node (value in the range \[0, 1\]) | +| content.proton.resource\_usage.disk\_usage.used\_and\_reserved | fraction | The relative amount of disk used and reserved disk space by this content node (transient usage not included, value in the range \[0, 1\]) | +| content.proton.resource\_usage.memory | fraction | The relative amount of memory used by this content node (transient usage not included, value in the range \[0, 1\]). Same value as reported to the cluster controller | +| content.proton.resource\_usage.memory\_usage.total | fraction | The total relative amount of memory used by this content node (value in the range \[0, 1\]) | +| content.proton.resource\_usage.memory\_usage.total\_utilization | fraction | The relative amount of memory used compared to the content node memory resource limit | +| content.proton.resource\_usage.memory\_usage.transient | fraction | The relative amount of transient memory used by this content node (value in the range \[0, 1\]) | +| content.proton.resource\_usage.memory\_mappings | file | The number of memory mapped files | +| content.proton.resource\_usage.open\_file\_descriptors | file | The number of open files | +| content.proton.resource\_usage.feeding\_blocked | binary | Whether feeding is blocked due to resource limits being reached (value is either 0 or 1) | +| content.proton.resource\_usage.malloc\_arena | byte | Size of malloc arena | +| content.proton.documentdb.attribute.resource\_usage.address\_space | fraction | The max relative address space used among components in all attribute vectors in this document db (value in the range \[0, 1\]) | +| content.proton.documentdb.attribute.resource\_usage.feeding\_blocked | binary | Whether feeding is blocked due to attribute resource limits being reached (value is either 0 or 1) | +| content.proton.documentdb.attribute.memory\_usage.allocated\_bytes | byte | The number of allocated bytes | +| content.proton.documentdb.attribute.memory\_usage.dead\_bytes | byte | The number of dead bytes (`<=` used\_bytes) | +| content.proton.documentdb.attribute.memory\_usage.onhold\_bytes | byte | The number of bytes on hold | +| content.proton.documentdb.attribute.memory\_usage.used\_bytes | byte | The number of used bytes (`<=` allocated\_bytes) | +| content.proton.resource\_usage.cpu\_util.setup | fraction | cpu used by system init and (re-)configuration | +| content.proton.resource\_usage.cpu\_util.read | fraction | cpu used by reading data from the system | +| content.proton.resource\_usage.cpu\_util.write | fraction | cpu used by writing data to the system | +| content.proton.resource\_usage.cpu\_util.compact | fraction | cpu used by internal data re-structuring | +| content.proton.resource\_usage.cpu\_util.other | fraction | cpu used by work not classified as a specific category | +| content.proton.transactionlog.entries | record | The current number of entries in the transaction log | +| content.proton.transactionlog.disk\_usage | byte | The disk usage (in bytes) of the transaction log | +| content.proton.transactionlog.replay\_time | second | The replay time (in seconds) of the transaction log during start-up | +| content.proton.documentdb.ready.document\_store.disk\_usage | byte | Disk space usage in bytes | +| content.proton.documentdb.ready.document\_store.disk\_bloat | byte | Disk space bloat in bytes | +| content.proton.documentdb.ready.document\_store.max\_bucket\_spread | fraction | Max bucket spread in underlying files (sum(unique buckets in each chunk)/unique buckets in file) | +| content.proton.documentdb.ready.document\_store.memory\_usage.allocated\_bytes | byte | The number of allocated bytes | +| content.proton.documentdb.ready.document\_store.memory\_usage.used\_bytes | byte | The number of used bytes (`<=` allocated\_bytes) | +| content.proton.documentdb.ready.document\_store.memory\_usage.dead\_bytes | byte | The number of dead bytes (`<=` used\_bytes) | +| content.proton.documentdb.ready.document\_store.memory\_usage.onhold\_bytes | byte | The number of bytes on hold | +| content.proton.documentdb.notready.document\_store.disk\_usage | byte | Disk space usage in bytes | +| content.proton.documentdb.notready.document\_store.disk\_bloat | byte | Disk space bloat in bytes | +| content.proton.documentdb.notready.document\_store.max\_bucket\_spread | fraction | Max bucket spread in underlying files (sum(unique buckets in each chunk)/unique buckets in file) | +| content.proton.documentdb.notready.document\_store.memory\_usage.allocated\_bytes | byte | The number of allocated bytes | +| content.proton.documentdb.notready.document\_store.memory\_usage.used\_bytes | byte | The number of used bytes (`<=` allocated\_bytes) | +| content.proton.documentdb.notready.document\_store.memory\_usage.dead\_bytes | byte | The number of dead bytes (`<=` used\_bytes) | +| content.proton.documentdb.notready.document\_store.memory\_usage.onhold\_bytes | byte | The number of bytes on hold | +| content.proton.documentdb.removed.document\_store.disk\_usage | byte | Disk space usage in bytes | +| content.proton.documentdb.removed.document\_store.disk\_bloat | byte | Disk space bloat in bytes | +| content.proton.documentdb.removed.document\_store.max\_bucket\_spread | fraction | Max bucket spread in underlying files (sum(unique buckets in each chunk)/unique buckets in file) | +| content.proton.documentdb.removed.document\_store.memory\_usage.allocated\_bytes | byte | The number of allocated bytes | +| content.proton.documentdb.removed.document\_store.memory\_usage.used\_bytes | byte | The number of used bytes (`<=` allocated\_bytes) | +| content.proton.documentdb.removed.document\_store.memory\_usage.dead\_bytes | byte | The number of dead bytes (`<=` used\_bytes) | +| content.proton.documentdb.removed.document\_store.memory\_usage.onhold\_bytes | byte | The number of bytes on hold | +| content.proton.documentdb.ready.document\_store.cache.elements | item | Number of elements in the cache | +| content.proton.documentdb.ready.document\_store.cache.memory\_usage | byte | Memory usage of the cache (in bytes) | +| content.proton.documentdb.ready.document\_store.cache.hit\_rate | fraction | Rate of hits in the cache compared to number of lookups | +| content.proton.documentdb.ready.document\_store.cache.lookups | operation | Number of lookups in the cache (hits + misses) | +| content.proton.documentdb.ready.document\_store.cache.invalidations | operation | Number of invalidations (erased elements) in the cache. | +| content.proton.documentdb.notready.document\_store.cache.elements | item | Number of elements in the cache | +| content.proton.documentdb.notready.document\_store.cache.memory\_usage | byte | Memory usage of the cache (in bytes) | +| content.proton.documentdb.notready.document\_store.cache.hit\_rate | fraction | Rate of hits in the cache compared to number of lookups | +| content.proton.documentdb.notready.document\_store.cache.lookups | operation | Number of lookups in the cache (hits + misses) | +| content.proton.documentdb.notready.document\_store.cache.invalidations | operation | Number of invalidations (erased elements) in the cache. | +| content.proton.documentdb.removed.document\_store.cache.elements | item | Number of elements in the cache | +| content.proton.documentdb.removed.document\_store.cache.hit\_rate | fraction | Rate of hits in the cache compared to number of lookups | +| content.proton.documentdb.removed.document\_store.cache.invalidations | item | Number of invalidations (erased elements) in the cache. | +| content.proton.documentdb.removed.document\_store.cache.lookups | operation | Number of lookups in the cache (hits + misses) | +| content.proton.documentdb.removed.document\_store.cache.memory\_usage | byte | Memory usage of the cache (in bytes) | +| content.proton.documentdb.ready.attribute.memory\_usage.allocated\_bytes | byte | The number of allocated bytes | +| content.proton.documentdb.ready.attribute.memory\_usage.used\_bytes | byte | The number of used bytes (`<=` allocated\_bytes) | +| content.proton.documentdb.ready.attribute.memory\_usage.dead\_bytes | byte | The number of dead bytes (`<=` used\_bytes) | +| content.proton.documentdb.ready.attribute.memory\_usage.onhold\_bytes | byte | The number of bytes on hold | +| content.proton.documentdb.ready.attribute.disk\_usage | byte | Disk space usage (in bytes) of the flushed snapshot of this attribute for this document type | +| content.proton.documentdb.notready.attribute.memory\_usage.allocated\_bytes | byte | The number of allocated bytes | +| content.proton.documentdb.notready.attribute.memory\_usage.used\_bytes | byte | The number of used bytes (`<=` allocated\_bytes) | +| content.proton.documentdb.notready.attribute.memory\_usage.dead\_bytes | byte | The number of dead bytes (`<=` used\_bytes) | +| content.proton.documentdb.notready.attribute.memory\_usage.onhold\_bytes | byte | The number of bytes on hold | +| content.proton.index.cache.postinglist.elements | item | Number of elements in the cache. Contains disk index posting list files across all document types | +| content.proton.index.cache.postinglist.memory\_usage | byte | Memory usage of the cache (in bytes). Contains disk index posting list files across all document types | +| content.proton.index.cache.postinglist.hit\_rate | fraction | Rate of hits in the cache compared to number of lookups. Contains disk index posting list files across all document types | +| content.proton.index.cache.postinglist.lookups | operation | Number of lookups in the cache (hits + misses). Contains disk index posting list files across all document types | +| content.proton.index.cache.postinglist.invalidations | operation | Number of invalidations (erased elements) in the cache. Contains disk index posting list files across all document types | +| content.proton.index.cache.bitvector.elements | item | Number of elements in the cache. Contains disk index bitvector files across all document types | +| content.proton.index.cache.bitvector.memory\_usage | byte | Memory usage of the cache (in bytes). Contains disk index bitvector files across all document types | +| content.proton.index.cache.bitvector.hit\_rate | fraction | Rate of hits in the cache compared to number of lookups. Contains disk index bitvector files across all document types | +| content.proton.index.cache.bitvector.lookups | operation | Number of lookups in the cache (hits + misses). Contains disk index bitvector files across all document types | +| content.proton.index.cache.bitvector.invalidations | operation | Number of invalidations (erased elements) in the cache. Contains disk index bitvector files across all document types | +| content.proton.documentdb.index.memory\_usage.allocated\_bytes | byte | The number of allocated bytes for the memory index for this document type | +| content.proton.documentdb.index.memory\_usage.used\_bytes | byte | The number of used bytes (`<=` allocated\_bytes) for the memory index for this document type | +| content.proton.documentdb.index.memory\_usage.dead\_bytes | byte | The number of dead bytes (`<=` used\_bytes) for the memory index for this document type | +| content.proton.documentdb.index.memory\_usage.onhold\_bytes | byte | The number of bytes on hold for the memory index for this document type | +| content.proton.documentdb.index.disk\_usage | byte | Disk space usage (in bytes) of all disk indexes for this document type | +| content.proton.documentdb.index.indexes | item | Number of disk or memory indexes | +| content.proton.documentdb.index.io.search.read\_bytes | byte | Bytes read from disk index posting list and bitvector files as part of search for this document type | +| content.proton.documentdb.index.io.search.cached\_read\_bytes | byte | Bytes read from cached disk index posting list and bitvector files as part of search for this document type | +| content.proton.documentdb.ready.index.memory\_usage.allocated\_bytes | byte | The number of allocated bytes for this index field in the memory index for this document type | +| content.proton.documentdb.ready.index.disk\_usage | byte | Disk space usage (in bytes) of this index field in all disk indexes for this document type | +| content.proton.documentdb.matching.queries | query | Number of queries executed | +| content.proton.documentdb.matching.soft\_doomed\_queries | query | Number of queries hitting the soft timeout | +| content.proton.documentdb.matching.query\_latency | second | Total average latency (sec) when matching and ranking a query | +| content.proton.documentdb.matching.query\_setup\_time | second | Average time (sec) spent setting up and tearing down queries | +| content.proton.documentdb.matching.docs\_matched | document | Number of documents matched | +| content.proton.documentdb.matching.docs\_ranked | document | Number of documents ranked (first phase) | +| content.proton.documentdb.matching.docs\_reranked | document | Number of documents re-ranked (second phase) | +| content.proton.documentdb.matching.exact\_nns\_distances\_computed | distance | Number of distances computed in exact nearest-neighbor search | +| content.proton.documentdb.matching.approximate\_nns\_distances\_computed | distance | Number of distances computed in approximate nearest-neighbor search | +| content.proton.documentdb.matching.approximate\_nns\_nodes\_visited | graph\_node | Number of nodes visited in approximate nearest-neighbor search | +| content.proton.documentdb.matching.rank\_profile.queries | query | Number of queries executed | +| content.proton.documentdb.matching.rank\_profile.soft\_doomed\_queries | query | Number of queries hitting the soft timeout | +| content.proton.documentdb.matching.rank\_profile.soft\_doom\_factor | fraction | Factor used to compute soft-timeout | +| content.proton.documentdb.matching.rank\_profile.query\_latency | second | Total average latency (sec) when matching and ranking a query | +| content.proton.documentdb.matching.rank\_profile.query\_setup\_time | second | Average time (sec) spent setting up and tearing down queries | +| content.proton.documentdb.matching.rank\_profile.grouping\_time | second | Average time (sec) spent on grouping | +| content.proton.documentdb.matching.rank\_profile.rerank\_time | second | Average time (sec) spent on 2nd phase ranking | +| content.proton.documentdb.matching.rank\_profile.docs\_matched | document | Number of documents matched | +| content.proton.documentdb.matching.rank\_profile.docs\_ranked | document | Number of documents ranked (first phase) | +| content.proton.documentdb.matching.rank\_profile.docs\_reranked | document | Number of documents re-ranked (second phase) | +| content.proton.documentdb.matching.rank\_profile.exact\_nns\_distances\_computed | distance | Number of distances computed in exact nearest-neighbor search | +| content.proton.documentdb.matching.rank\_profile.approximate\_nns\_distances\_computed | distance | Number of distances computed in approximate nearest-neighbor search | +| content.proton.documentdb.matching.rank\_profile.approximate\_nns\_nodes\_visited | graph\_node | Number of nodes visited in approximate nearest-neighbor search | +| content.proton.documentdb.matching.rank\_profile.limited\_queries | query | Number of queries limited in match phase | +| content.proton.documentdb.matching.rank\_profile.docid\_partition.active\_time | second | Time (sec) spent doing actual work | +| content.proton.documentdb.matching.rank\_profile.docid\_partition.docs\_matched | document | Number of documents matched | +| content.proton.documentdb.matching.rank\_profile.docid\_partition.docs\_ranked | document | Number of documents ranked (first phase) | +| content.proton.documentdb.matching.rank\_profile.docid\_partition.docs\_reranked | document | Number of documents re-ranked (second phase) | +| content.proton.documentdb.matching.rank\_profile.docid\_partition.wait\_time | second | Time (sec) spent waiting for other external threads and resources | +| content.proton.documentdb.matching.rank\_profile.match\_time | second | Average time (sec) for matching a query (1st phase) | +| content.proton.documentdb.feeding.commit.operations | operation | Number of operations included in a commit | +| content.proton.documentdb.feeding.commit.latency | second | Latency for commit in seconds | +| content.proton.session\_cache.grouping.num\_cached | session | Number of currently cached sessions | +| content.proton.session\_cache.grouping.num\_dropped | session | Number of dropped cached sessions | +| content.proton.session\_cache.grouping.num\_insert | session | Number of inserted sessions | +| content.proton.session\_cache.grouping.num\_pick | session | Number if picked sessions | +| content.proton.session\_cache.grouping.num\_timedout | session | Number of timed out sessions | +| content.proton.session\_cache.search.num\_cached | session | Number of currently cached sessions | +| content.proton.session\_cache.search.num\_dropped | session | Number of dropped cached sessions | +| content.proton.session\_cache.search.num\_insert | session | Number of inserted sessions | +| content.proton.session\_cache.search.num\_pick | session | Number if picked sessions | +| content.proton.session\_cache.search.num\_timedout | session | Number of timed out sessions | +| metricmanager.periodichooklatency | millisecond | Time in ms used to update a single periodic hook | +| metricmanager.resetlatency | millisecond | Time in ms used to reset all metrics. | +| metricmanager.sleeptime | millisecond | Time in ms worker thread is sleeping | +| metricmanager.snapshothooklatency | millisecond | Time in ms used to update a single snapshot hook | +| metricmanager.snapshotlatency | millisecond | Time in ms used to take a snapshot | \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/metrics/sentinel.mdx b/mintlify-docs/en/reference/operations/metrics/sentinel.mdx new file mode 100644 index 0000000000..4c1985740d --- /dev/null +++ b/mintlify-docs/en/reference/operations/metrics/sentinel.mdx @@ -0,0 +1,11 @@ +--- +title: "Sentinel Metrics" +sidebarTitle: "Sentinel metrics" +--- + +| Name | Unit | Description | +| --- | --- | --- | +| sentinel.restarts | restart | Number of service restarts done by the sentinel | +| sentinel.totalRestarts | restart | Total number of service restarts done by the sentinel since the sentinel was started | +| sentinel.uptime | second | Time the sentinel has been running | +| sentinel.running | instance | Number of services the sentinel has running currently | \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/metrics/slobrok.mdx b/mintlify-docs/en/reference/operations/metrics/slobrok.mdx new file mode 100644 index 0000000000..f2c226b470 --- /dev/null +++ b/mintlify-docs/en/reference/operations/metrics/slobrok.mdx @@ -0,0 +1,12 @@ +--- +title: "Slobrok Metrics" +sidebarTitle: "Slobrok metrics" +--- + +| Name | Unit | Description | +| --- | --- | --- | +| slobrok.heartbeats.failed | request | Number of heartbeat requests failed | +| slobrok.requests.register | request | Number of register requests received | +| slobrok.requests.mirror | request | Number of mirroring requests received | +| slobrok.requests.admin | request | Number of administrative requests received | +| slobrok.missing.consensus | second | Number of seconds without full consensus with all other brokers | \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/metrics/storage.mdx b/mintlify-docs/en/reference/operations/metrics/storage.mdx new file mode 100644 index 0000000000..8d91e4280a --- /dev/null +++ b/mintlify-docs/en/reference/operations/metrics/storage.mdx @@ -0,0 +1,215 @@ +--- +title: "Storage Metrics" +sidebarTitle: "Storage metrics" +--- + +| Name | Unit | Description | +| --- | --- | --- | +| vds.datastored.alldisks.buckets | bucket | Number of buckets managed | +| vds.datastored.alldisks.docs | document | Number of documents stored | +| vds.datastored.alldisks.bytes | byte | Number of bytes stored | +| vds.datastored.alldisks.activebuckets | bucket | Number of active buckets on the node | +| vds.datastored.alldisks.readybuckets | bucket | Number of ready buckets on the node | +| vds.visitor.allthreads.averagevisitorlifetime | millisecond | Average lifetime of a visitor | +| vds.visitor.allthreads.averagequeuewait | millisecond | Average time an operation spends in input queue. | +| vds.visitor.allthreads.queuesize | operation | Size of input message queue. | +| vds.visitor.allthreads.completed | operation | Number of visitors completed | +| vds.visitor.allthreads.created | operation | Number of visitors created. | +| vds.visitor.allthreads.failed | operation | Number of visitors failed | +| vds.visitor.allthreads.averagemessagesendtime | millisecond | Average time it takes for messages to be sent to their target (and be replied to) | +| vds.visitor.allthreads.averageprocessingtime | millisecond | Average time used to process visitor requests | +| vds.visitor.allthreads.aborted | instance | Number of visitors aborted. | +| vds.visitor.allthreads.averagevisitorcreationtime | millisecond | Average time spent creating a visitor instance | +| vds.visitor.allthreads.destination\_failure\_replies | instance | Number of failure replies received from the visitor destination | +| vds.filestor.queuesize | operation | Size of input message queue. | +| vds.filestor.averagequeuewait | millisecond | Average time an operation spends in input queue. | +| vds.filestor.active\_operations.size | operation | Number of concurrent active operations | +| vds.filestor.active\_operations.latency | millisecond | Latency (in ms) for completed operations | +| vds.filestor.throttle\_window\_size | operation | Current size of async operation throttler window size | +| vds.filestor.throttle\_waiting\_threads | thread | Number of threads waiting to acquire a throttle token | +| vds.filestor.throttle\_active\_tokens | instance | Current number of active throttle tokens | +| vds.filestor.allthreads.mergemetadatareadlatency | millisecond | Time spent in a merge step to check metadata of current node to see what data it has. | +| vds.filestor.allthreads.mergedatareadlatency | millisecond | Time spent in a merge step to read data other nodes need. | +| vds.filestor.allthreads.mergedatawritelatency | millisecond | Time spent in a merge step to write data needed to current node. | +| vds.filestor.allthreads.mergeavgdatareceivedneeded | byte | Amount of data transferred from previous node in chain that we needed to apply locally. | +| vds.filestor.allthreads.mergebuckets.count | request | Number of requests processed. | +| vds.filestor.allthreads.mergebuckets.failed | request | Number of failed requests. | +| vds.filestor.allthreads.mergebuckets.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.mergelatencytotal | millisecond | Latency of total merge operation, from master node receives it, until merge is complete and master node replies. | +| vds.filestor.allthreads.merge\_put\_latency | millisecond | Latency of individual puts that are part of merge operations | +| vds.filestor.allthreads.merge\_remove\_latency | millisecond | Latency of individual removes that are part of merge operations | +| vds.filestor.allstripes.throttled\_rpc\_direct\_dispatches | instance | Number of times an RPC thread could not directly dispatch an async operation directly to Proton because it was disallowed by the throttle policy | +| vds.filestor.allstripes.throttled\_persistence\_thread\_polls | instance | Number of times a persistence thread could not immediately dispatch a queued async operation because it was disallowed by the throttle policy | +| vds.filestor.allstripes.timeouts\_waiting\_for\_throttle\_token | instance | Number of times a persistence thread timed out waiting for an available throttle policy token | +| vds.filestor.allstripes.averagequeuewait | millisecond | Average time an operation spends in input queue. | +| vds.filestor.allthreads.put.count | operation | Number of requests processed. | +| vds.filestor.allthreads.put.failed | operation | Number of failed requests. | +| vds.filestor.allthreads.put.test\_and\_set\_failed | operation | Number of operations that were skipped due to a test-and-set condition not met | +| vds.filestor.allthreads.put.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.put.request\_size | byte | Size of requests, in bytes | +| vds.filestor.allthreads.remove.count | operation | Number of requests processed. | +| vds.filestor.allthreads.remove.failed | operation | Number of failed requests. | +| vds.filestor.allthreads.remove.test\_and\_set\_failed | operation | Number of operations that were skipped due to a test-and-set condition not met | +| vds.filestor.allthreads.remove.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.remove.request\_size | byte | Size of requests, in bytes | +| vds.filestor.allthreads.remove.not\_found | request | Number of requests that could not be completed due to source document not found. | +| vds.filestor.allthreads.get.count | operation | Number of requests processed. | +| vds.filestor.allthreads.get.failed | operation | Number of failed requests. | +| vds.filestor.allthreads.get.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.get.request\_size | byte | Size of requests, in bytes | +| vds.filestor.allthreads.get.not\_found | request | Number of requests that could not be completed due to source document not found. | +| vds.filestor.allthreads.update.count | request | Number of requests processed. | +| vds.filestor.allthreads.update.failed | request | Number of failed requests. | +| vds.filestor.allthreads.update.test\_and\_set\_failed | request | Number of requests that were skipped due to a test-and-set condition not met | +| vds.filestor.allthreads.update.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.update.request\_size | byte | Size of requests, in bytes | +| vds.filestor.allthreads.update.latency\_read | millisecond | Latency of the source read in the request. | +| vds.filestor.allthreads.update.not\_found | request | Number of requests that could not be completed due to source document not found. | +| vds.filestor.allthreads.createiterator.count | request | Number of requests processed. | +| vds.filestor.allthreads.createiterator.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.createiterator.failed | request | Number of failed requests. | +| vds.filestor.allthreads.visit.count | request | Number of requests processed. | +| vds.filestor.allthreads.visit.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.visit.docs | document | Number of entries read per iterate call | +| vds.filestor.allthreads.visit.failed | request | Number of failed requests. | +| vds.filestor.allthreads.remove\_location.count | request | Number of requests processed. | +| vds.filestor.allthreads.remove\_location.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.remove\_location.failed | request | Number of failed requests. | +| vds.filestor.allthreads.splitbuckets.count | request | Number of requests processed. | +| vds.filestor.allthreads.splitbuckets.failed | request | Number of failed requests. | +| vds.filestor.allthreads.splitbuckets.latency | request | Latency of successful requests. | +| vds.filestor.allthreads.joinbuckets.count | request | Number of requests processed. | +| vds.filestor.allthreads.joinbuckets.failed | request | Number of failed requests. | +| vds.filestor.allthreads.joinbuckets.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.deletebuckets.count | request | Number of requests processed. | +| vds.filestor.allthreads.deletebuckets.failed | request | Number of failed requests. | +| vds.filestor.allthreads.deletebuckets.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.remove\_by\_gid.count | request | Number of requests processed. | +| vds.filestor.allthreads.remove\_by\_gid.failed | request | Number of failed requests. | +| vds.filestor.allthreads.remove\_by\_gid.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.setbucketstates.count | request | Number of requests processed. | +| vds.filestor.allthreads.setbucketstates.failed | request | Number of failed requests. | +| vds.filestor.allthreads.setbucketstates.latency | millisecond | Latency of successful requests. | +| vds.mergethrottler.averagequeuewaitingtime | millisecond | Time merges spent in the throttler queue | +| vds.mergethrottler.queuesize | instance | Length of merge queue | +| vds.mergethrottler.active\_window\_size | instance | Number of merges active within the pending window size | +| vds.mergethrottler.estimated\_merge\_memory\_usage | byte | An estimated upper bound of the memory usage (in bytes) of the merges currently in the active window | +| vds.mergethrottler.bounced\_due\_to\_back\_pressure | instance | Number of merges bounced due to resource exhaustion back-pressure | +| vds.mergethrottler.locallyexecutedmerges.ok | instance | The number of successful merges for 'locallyexecutedmerges' | +| vds.mergethrottler.locallyexecutedmerges.failures.aborted | operation | The number of merges that failed because the storage node was (most likely) shutting down | +| vds.mergethrottler.locallyexecutedmerges.failures.bucketnotfound | operation | The number of operations that failed because the bucket did not exist | +| vds.mergethrottler.locallyexecutedmerges.failures.busy | operation | The number of merges that failed because the storage node was busy | +| vds.mergethrottler.locallyexecutedmerges.failures.exists | operation | The number of merges that were rejected due to a merge operation for their bucket already being processed | +| vds.mergethrottler.locallyexecutedmerges.failures.notready | operation | The number of merges discarded because distributor was not ready | +| vds.mergethrottler.locallyexecutedmerges.failures.other | operation | The number of other failures | +| vds.mergethrottler.locallyexecutedmerges.failures.rejected | operation | The number of merges that were rejected | +| vds.mergethrottler.locallyexecutedmerges.failures.timeout | operation | The number of merges that failed because they timed out towards storage | +| vds.mergethrottler.locallyexecutedmerges.failures.total | operation | Sum of all failures | +| vds.mergethrottler.locallyexecutedmerges.failures.wrongdistribution | operation | The number of merges that were discarded (flushed) because they were initiated at an older cluster state than the current | +| vds.mergethrottler.mergechains.ok | operation | The number of successful merges for 'mergechains' | +| vds.mergethrottler.mergechains.failures.busy | operation | The number of merges that failed because the storage node was busy | +| vds.mergethrottler.mergechains.failures.total | operation | Sum of all failures | +| vds.mergethrottler.mergechains.failures.exists | operation | The number of merges that were rejected due to a merge operation for their bucket already being processed | +| vds.mergethrottler.mergechains.failures.notready | operation | The number of merges discarded because distributor was not ready | +| vds.mergethrottler.mergechains.failures.other | operation | The number of other failures | +| vds.mergethrottler.mergechains.failures.rejected | operation | The number of merges that were rejected | +| vds.mergethrottler.mergechains.failures.timeout | operation | The number of merges that failed because they timed out towards storage | +| vds.mergethrottler.mergechains.failures.wrongdistribution | operation | The number of merges that were discarded (flushed) because they were initiated at an older cluster state than the current | +| vds.server.network.tls-handshakes-failed | operation | Number of client or server connection attempts that failed during TLS handshaking | +| vds.server.network.peer-authorization-failures | failure | Number of TLS connection attempts failed due to bad or missing peer certificate credentials | +| vds.server.network.client.tls-connections-established | connection | Number of secure mTLS connections established | +| vds.server.network.server.tls-connections-established | connection | Number of secure mTLS connections established | +| vds.server.network.client.insecure-connections-established | connection | Number of insecure (plaintext) connections established | +| vds.server.network.server.insecure-connections-established | connection | Number of insecure (plaintext) connections established | +| vds.server.network.tls-connections-broken | connection | Number of TLS connections broken due to failures during frame encoding or decoding | +| vds.server.network.failed-tls-config-reloads | failure | Number of times background reloading of TLS config has failed | +| vds.bouncer.unavailable\_node\_aborts | operation | Number of operations that were aborted due to the node (or target bucket space) being unavailable | +| vds.changedbucketownershiphandler.avg\_abort\_processing\_time | millisecond | Average time spent aborting operations for changed buckets | +| vds.changedbucketownershiphandler.external\_load\_ops\_aborted | operation | Number of outdated external load operations aborted | +| vds.changedbucketownershiphandler.ideal\_state\_ops\_aborted | operation | Number of outdated ideal state operations aborted | +| vds.communication.bucket\_space\_mapping\_failures | operation | Number of messages that could not be resolved to a known bucket space | +| vds.communication.convertfailures | operation | Number of messages that failed to get converted to storage API messages | +| vds.communication.exceptionmessageprocesstime | millisecond | Time transport thread uses to process a single message that fails with an exception thrown into communication manager | +| vds.communication.messageprocesstime | millisecond | Time transport thread uses to process a single message | +| vds.communication.messagequeue | item | Size of input message queue. | +| vds.communication.sendcommandlatency | millisecond | Average ms used to send commands to MBUS | +| vds.communication.sendreplylatency | millisecond | Average ms used to send replies to MBUS | +| vds.communication.toolittlememory | operation | Number of messages failed due to too little memory available | +| vds.datastored.bucket\_space.active\_buckets | bucket | Number of active buckets in the bucket space | +| vds.datastored.bucket\_space.bucket\_db.memory\_usage.allocated\_bytes | byte | The number of allocated bytes | +| vds.datastored.bucket\_space.bucket\_db.memory\_usage.dead\_bytes | byte | The number of dead bytes (`<=` used\_bytes) | +| vds.datastored.bucket\_space.bucket\_db.memory\_usage.onhold\_bytes | byte | The number of bytes on hold | +| vds.datastored.bucket\_space.bucket\_db.memory\_usage.used\_bytes | byte | The number of used bytes (`<=` allocated\_bytes) | +| vds.datastored.bucket\_space.buckets\_total | bucket | Total number buckets present in the bucket space (ready + not ready) | +| vds.datastored.bucket\_space.entries | document | Number of entries (documents + tombstones) stored in the bucket space | +| vds.datastored.bucket\_space.bytes | byte | Bytes stored across all documents in the bucket space | +| vds.datastored.bucket\_space.docs | document | Documents stored in the bucket space | +| vds.datastored.bucket\_space.ready\_buckets | bucket | Number of ready buckets in the bucket space | +| vds.datastored.fullbucketinfolatency | millisecond | Amount of time spent to process a full bucket info request | +| vds.datastored.fullbucketinforeqsize | node | Amount of distributors answered at once in full bucket info requests. | +| vds.datastored.simplebucketinforeqsize | bucket | Amount of buckets returned in simple bucket info requests | +| vds.filestor.allthreads.applybucketdiff.count | request | Number of requests processed. | +| vds.filestor.allthreads.applybucketdiff.failed | request | Number of failed requests. | +| vds.filestor.allthreads.applybucketdiff.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.applybucketdiffreply | request | Number of applybucketdiff replies that have been processed. | +| vds.filestor.allthreads.bucketfixed | bucket | Number of times bucket has been fixed because of corruption | +| vds.filestor.allthreads.bucketverified.count | request | Number of requests processed. | +| vds.filestor.allthreads.bucketverified.failed | request | Number of failed requests. | +| vds.filestor.allthreads.bucketverified.latency | request | Latency of successful requests. | +| vds.filestor.allthreads.bytesmerged | byte | Total number of bytes merged into this node. | +| vds.filestor.allthreads.createbuckets.count | request | Number of requests processed. | +| vds.filestor.allthreads.createbuckets.failed | request | Number of failed requests. | +| vds.filestor.allthreads.createbuckets.latency | request | Latency of successful requests. | +| vds.filestor.allthreads.failedoperations | operation | Number of operations throwing exceptions. | +| vds.filestor.allthreads.getbucketdiff.count | request | Number of requests processed. | +| vds.filestor.allthreads.getbucketdiff.failed | request | Number of failed requests. | +| vds.filestor.allthreads.getbucketdiff.latency | request | Latency of successful requests. | +| vds.filestor.allthreads.getbucketdiffreply | request | Number of getbucketdiff replies that have been processed. | +| vds.filestor.allthreads.internaljoin.count | request | Number of requests processed. | +| vds.filestor.allthreads.internaljoin.failed | request | Number of failed requests. | +| vds.filestor.allthreads.internaljoin.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.movedbuckets.count | request | Number of requests processed. | +| vds.filestor.allthreads.movedbuckets.failed | request | Number of failed requests. | +| vds.filestor.allthreads.movedbuckets.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.operations | operation | Number of operations processed. | +| vds.filestor.allthreads.readbucketinfo.count | request | Number of requests processed. | +| vds.filestor.allthreads.readbucketinfo.failed | request | Number of failed requests. | +| vds.filestor.allthreads.readbucketinfo.latency | request | Latency of successful requests. | +| vds.filestor.allthreads.readbucketlist.count | request | Number of requests processed. | +| vds.filestor.allthreads.readbucketlist.failed | request | Number of failed requests. | +| vds.filestor.allthreads.readbucketlist.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.recheckbucketinfo.count | request | Number of requests processed. | +| vds.filestor.allthreads.recheckbucketinfo.failed | request | Number of failed requests. | +| vds.filestor.allthreads.recheckbucketinfo.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.revert.count | request | Number of requests processed. | +| vds.filestor.allthreads.revert.failed | request | Number of failed requests. | +| vds.filestor.allthreads.revert.latency | millisecond | Latency of successful requests. | +| vds.filestor.allthreads.revert.not\_found | request | Number of requests that could not be completed due to source document not found. | +| vds.filestor.allthreads.stat\_bucket.count | request | Number of requests processed. | +| vds.filestor.allthreads.stat\_bucket.failed | request | Number of failed requests. | +| vds.filestor.allthreads.stat\_bucket.latency | request | Latency of successful requests. | +| vds.filestor.bucket\_db\_init\_latency | millisecond | Time taken (in ms) to initialize bucket databases with information from the persistence provider | +| vds.filestor.directoryevents | operation | Number of directory events received. | +| vds.filestor.diskevents | operation | Number of disk events received. | +| vds.filestor.partitionevents | operation | Number of partition events received. | +| vds.filestor.pendingmerge | bucket | Number of buckets currently being merged. | +| vds.filestor.waitingforlockrate | operation | Amount of times a filestor thread has needed to wait for lock to take next message in queue. | +| vds.mergethrottler.mergechains.failures.aborted | operation | The number of merges that failed because the storage node was (most likely) shutting down | +| vds.mergethrottler.mergechains.failures.bucketnotfound | operation | The number of operations that failed because the bucket did not exist | +| vds.server.memoryusage | byte | Amount of memory used by the storage subsystem | +| vds.server.memoryusage\_visiting | byte | Message use from visiting | +| vds.server.message\_memory\_use.highpri | byte | Message use from high priority storage messages | +| vds.server.message\_memory\_use.lowpri | byte | Message use from low priority storage messages | +| vds.server.message\_memory\_use.normalpri | byte | Message use from normal priority storage messages | +| vds.server.message\_memory\_use.total | byte | Message use from storage messages | +| vds.server.message\_memory\_use.veryhighpri | byte | Message use from very high priority storage messages | +| vds.state\_manager.invoke\_state\_listeners\_latency | millisecond | Time spent (in ms) propagating state changes to internal state listeners | +| vds.visitor.cv\_queueevictedwaittime | millisecond | Milliseconds waiting in create visitor queue, for visitors that was evicted from queue due to higher priority visitors coming | +| vds.visitor.cv\_queuefull | operation | Number of create visitor messages failed as queue is full | +| vds.visitor.cv\_queuesize | item | Size of create visitor queue | +| vds.visitor.cv\_queuetimeoutwaittime | millisecond | Milliseconds waiting in create visitor queue, for visitors that timed out while in the visitor queue | +| vds.visitor.cv\_queuewaittime | millisecond | Milliseconds waiting in create visitor queue, for visitors that was added to visitor queue but scheduled later | +| vds.visitor.cv\_skipqueue | operation | Number of times we could skip queue as we had free visitor spots | +| vds.server.network.rpc-capability-checks-failed | failure | Number of RPC operations that failed due to one or more missing capabilities | +| vds.server.network.status-capability-checks-failed | failure | Number of status page operations that failed due to one or more missing capabilities | +| vds.server.fnet.num-connections | connection | Total number of connection objects | \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/metrics/vespa-metric-set.mdx b/mintlify-docs/en/reference/operations/metrics/vespa-metric-set.mdx new file mode 100644 index 0000000000..9fe82d7e4e --- /dev/null +++ b/mintlify-docs/en/reference/operations/metrics/vespa-metric-set.mdx @@ -0,0 +1,570 @@ +--- +title: "Vespa Metric Set" +sidebarTitle: "Vespa metric set" +--- +This document provides reference documentation for the Vespa metric set, including suffixes present per metric. If the suffix column contains "N/A" then the base name of the corresponding metric is used with no suffix. + +## ClusterController Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| cluster-controller.down.count | node | last, max | Number of content nodes down | +| cluster-controller.initializing.count | node | last, max | Number of content nodes initializing | +| cluster-controller.maintenance.count | node | last, max | Number of content nodes in maintenance | +| cluster-controller.retired.count | node | last, max | Number of content nodes that are retired | +| cluster-controller.stopping.count | node | last | Number of content nodes currently stopping | +| cluster-controller.up.count | node | last, max | Number of content nodes up | +| cluster-controller.nodes-not-converged | node | max | Number of nodes not converging to the latest cluster state version | +| cluster-controller.stored-document-count | document | max | Total number of unique documents stored in the cluster | +| cluster-controller.stored-document-bytes | byte | max | Combined byte size of all unique documents stored in the cluster (not including replication) | +| cluster-controller.cluster-buckets-out-of-sync-ratio | fraction | max | Ratio of buckets in the cluster currently in need of syncing | +| cluster-controller.busy-tick-time-ms | millisecond | count, last, max, sum | Time busy | +| cluster-controller.idle-tick-time-ms | millisecond | count, last, max, sum | Time idle | +| cluster-controller.work-ms | millisecond | count, last, sum | Time used for actual work | +| cluster-controller.is-master | binary | last, max | 1 if this cluster controller is currently the master, or 0 if not | +| cluster-controller.remote-task-queue.size | operation | last | Number of remote tasks queued | +| cluster-controller.resource\_usage.nodes\_above\_limit | node | last, max | The number of content nodes above resource limit, blocking feed | +| cluster-controller.resource\_usage.max\_memory\_utilization | fraction | last, max | Current memory utilisation, for content node with the highest value | +| cluster-controller.resource\_usage.max\_disk\_utilization | fraction | last, max | Current disk space utilisation, for content node with the highest value | +| cluster-controller.resource\_usage.memory\_limit | fraction | last, max | Memory space limit as a fraction of available memory | +| cluster-controller.resource\_usage.disk\_limit | fraction | last, max | Disk space limit as a fraction of available disk space | +| reindexing.progress | fraction | last, max | Re-indexing progress | + +## Container Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| http.status.1xx | response | rate | Number of responses with a 1xx status | +| http.status.2xx | response | rate | Number of responses with a 2xx status | +| http.status.3xx | response | rate | Number of responses with a 3xx status | +| http.status.4xx | response | rate | Number of responses with a 4xx status | +| http.status.5xx | response | rate | Number of responses with a 5xx status | +| application\_generation | version | N/A | The currently live application config generation (aka session id) | +| jdisc.gc.count | operation | average, last, max | Number of JVM garbage collections done | +| jdisc.gc.ms | millisecond | average, last, max | Time spent in JVM garbage collection | +| jdisc.jvm | version | last | JVM runtime version | +| jdisc.memory\_mappings | operation | max | JDISC Memory mappings | +| jdisc.open\_file\_descriptors | item | max | JDISC Open file descriptors | +| jdisc.thread\_pool.unhandled\_exceptions | thread | count, last, max, min, sum | Number of exceptions thrown by tasks | +| jdisc.thread\_pool.work\_queue.capacity | thread | count, last, max, min, sum | Capacity of the task queue | +| jdisc.thread\_pool.work\_queue.size | thread | count, last, max, min, sum | Size of the task queue | +| jdisc.thread\_pool.rejected\_tasks | thread | count, last, max, min, sum | Number of tasks rejected by the thread pool | +| jdisc.thread\_pool.size | thread | count, last, max, min, sum | Size of the thread pool | +| jdisc.thread\_pool.max\_allowed\_size | thread | count, last, max, min, sum | The maximum allowed number of threads in the pool | +| jdisc.thread\_pool.active\_threads | thread | count, last, max, min, sum | Number of threads that are active | +| jdisc.deactivated\_containers.total | item | last, sum | JDISC Deactivated container instances | +| jdisc.deactivated\_containers.with\_retained\_refs.last | item | last | JDISC Deactivated container nodes with retained refs | +| jdisc.application.failed\_component\_graphs | item | rate | JDISC Application failed component graphs | +| jdisc.application.component\_graph.creation\_time\_millis | millisecond | last | JDISC Application component graph creation time | +| jdisc.application.component\_graph.reconfigurations | item | rate | JDISC Application component graph reconfigurations | +| jdisc.singleton.is\_active | item | last, max, min | JDISC Singleton is active | +| jdisc.singleton.activation.count | operation | last | JDISC Singleton activations | +| jdisc.singleton.activation.failure.count | operation | last | JDISC Singleton activation failures | +| jdisc.singleton.activation.millis | millisecond | last | JDISC Singleton activation time | +| jdisc.singleton.deactivation.count | operation | last | JDISC Singleton deactivations | +| jdisc.singleton.deactivation.failure.count | operation | last | JDISC Singleton deactivation failures | +| jdisc.singleton.deactivation.millis | millisecond | last | JDISC Singleton deactivation time | +| jdisc.http.ssl.handshake.failure.missing\_client\_cert | operation | rate | JDISC HTTP SSL Handshake failures due to missing client certificate | +| jdisc.http.ssl.handshake.failure.expired\_client\_cert | operation | rate | JDISC HTTP SSL Handshake failures due to expired client certificate | +| jdisc.http.ssl.handshake.failure.invalid\_client\_cert | operation | rate | JDISC HTTP SSL Handshake failures due to invalid client certificate | +| jdisc.http.ssl.handshake.failure.incompatible\_protocols | operation | rate | JDISC HTTP SSL Handshake failures due to incompatible protocols | +| jdisc.http.ssl.handshake.failure.incompatible\_chifers | operation | rate | JDISC HTTP SSL Handshake failures due to incompatible chifers | +| jdisc.http.ssl.handshake.failure.connection\_closed | operation | rate | JDISC HTTP SSL Handshake failures due to connection closed | +| jdisc.http.ssl.handshake.failure.unknown | operation | rate | JDISC HTTP SSL Handshake failures for unknown reason | +| jdisc.http.latency | millisecond | count, max, sum | Request latency including the HTTP layer | +| jdisc.http.request.prematurely\_closed | request | rate | HTTP requests prematurely closed | +| jdisc.http.request.requests\_per\_connection | request | average, count, max, min, sum | HTTP requests per connection | +| jdisc.http.request.uri\_length | byte | count, max, sum | HTTP URI length | +| jdisc.http.request.content\_size | byte | count, max, sum | HTTP request content size | +| jdisc.http.requests | request | count, rate | HTTP requests | +| jdisc.http.filter.rule.blocked\_requests | request | rate | Number of requests blocked by filter | +| jdisc.http.filter.rule.allowed\_requests | request | rate | Number of requests allowed by filter | +| jdisc.http.filtering.request.handled | request | rate | Number of filtering requests handled | +| jdisc.http.filtering.request.unhandled | request | rate | Number of filtering requests unhandled | +| jdisc.http.filtering.response.handled | request | rate | Number of filtering responses handled | +| jdisc.http.filtering.response.unhandled | request | rate | Number of filtering responses unhandled | +| jdisc.http.handler.unhandled\_exceptions | request | rate | Number of unhandled exceptions in handler | +| jdisc.tls.capability\_checks.succeeded | operation | rate | Number of TLS capability checks succeeded | +| jdisc.tls.capability\_checks.failed | operation | rate | Number of TLS capability checks failed | +| jdisc.http.jetty.threadpool.thread.max | thread | count, last, max, min, sum | Configured maximum number of threads | +| jdisc.http.jetty.threadpool.thread.min | thread | count, last, max, min, sum | Configured minimum number of threads | +| jdisc.http.jetty.threadpool.thread.reserved | thread | count, last, max, min, sum | Configured number of reserved threads or -1 for heuristic | +| jdisc.http.jetty.threadpool.thread.busy | thread | count, last, max, min, sum | Number of threads executing internal and transient jobs | +| jdisc.http.jetty.threadpool.thread.total | thread | count, last, max, min, sum | Current number of threads | +| jdisc.http.jetty.threadpool.queue.size | thread | count, last, max, min, sum | Current size of the job queue | +| jdisc.http.jetty.http\_compliance.violation | failure | rate | Number of HTTP compliance violations | +| serverNumOpenConnections | connection | average, last, max | The number of currently open connections | +| serverNumConnections | connection | average, last, max | The total number of connections opened | +| serverBytesReceived | byte | count, sum | The number of bytes received by the server | +| serverBytesSent | byte | count, sum | The number of bytes sent from the server | +| handled.requests | operation | count | The number of requests handled per metrics snapshot | +| handled.latency | millisecond | count, max, sum | The time used for handling requests, excluding HTTP layer and rendering | +| httpapi\_latency | millisecond | count, max, sum | Duration for requests to the HTTP document APIs | +| httpapi\_pending | operation | count, max, sum | Document operations pending execution | +| httpapi\_num\_operations | operation | rate | Total number of document operations performed | +| httpapi\_num\_updates | operation | rate | Document update operations performed | +| httpapi\_num\_removes | operation | rate | Document remove operations performed | +| httpapi\_num\_puts | operation | rate | Document put operations performed | +| httpapi\_succeeded | operation | rate | Document operations that succeeded | +| httpapi\_failed | operation | rate | Document operations that failed | +| httpapi\_parse\_error | operation | rate | Document operations that failed due to document parse errors | +| httpapi\_condition\_not\_met | operation | rate | Document operations not applied due to condition not met | +| httpapi\_not\_found | operation | rate | Document operations not applied due to document not found | +| httpapi\_failed\_unknown | operation | rate | Document operations failed by unknown cause | +| httpapi\_failed\_timeout | operation | rate | Document operations failed by timeout | +| httpapi\_failed\_insufficient\_storage | operation | rate | Document operations failed by insufficient storage | +| httpapi\_queued\_operations | operation | last | Document operations queued for execution in /document/v1 API handler | +| httpapi\_queued\_bytes | byte | last | Total operation bytes queued for execution in /document/v1 API handler | +| httpapi\_queued\_age | second | last | Age in seconds of the oldest operation in the queue for /document/v1 API handler | +| httpapi\_mbus\_window\_size | operation | last | The window size of Messagebus's dynamic throttle policy for /document/v1 API handler | +| mem.heap.total | byte | average | Total available heap memory | +| mem.heap.free | byte | average | Free heap memory | +| mem.heap.used | byte | average, max | Currently used heap memory | +| mem.direct.total | byte | average | Total available direct memory | +| mem.direct.free | byte | average | Currently free direct memory | +| mem.direct.used | byte | average, max | Direct memory currently used | +| mem.direct.count | byte | max | Number of direct memory allocations | +| mem.native.total | byte | average | Total available native memory | +| mem.native.free | byte | average | Currently free native memory | +| mem.native.used | byte | average | Native memory currently used | +| athenz-tenant-cert.expiry.seconds | second | last, max, min | Time remaining until Athenz tenant certificate expires | +| container-iam-role.expiry.seconds | second | N/A | Time remaining until IAM role expires | +| peak\_qps | query\_per\_second | max | The highest number of qps for a second for this metrics snapshot | +| search\_connections | connection | count, max, sum | Number of search connections | +| feed.operations | operation | rate | Number of document feed operations | +| feed.latency | millisecond | count, max, sum | Feed latency | +| feed.http-requests | operation | count, rate | Feed HTTP requests | +| queries | operation | rate | Query volume | +| query\_container\_latency | millisecond | count, max, sum | The query execution time consumed in the container | +| query\_latency | millisecond | count, max, sum | The overall query latency as observed by the container cluster, excluding HTTP layer and rendering | +| query\_timeout | millisecond | count, max, min, sum | The amount of time allowed for query execution, from the client | +| failed\_queries | operation | rate | The number of failed queries | +| degraded\_queries | operation | rate | The number of degraded queries, e.g. due to some content nodes not responding in time | +| hits\_per\_query | hit\_per\_query | count, max, sum | The number of hits returned | +| query\_hit\_offset | hit | count, max, sum | The offset for hits returned | +| documents\_covered | document | count | The combined number of documents considered during query evaluation | +| documents\_total | document | count | The number of documents to be evaluated if all requests had been fully executed | +| documents\_target\_total | document | count | The target number of total documents to be evaluated when all data is in sync | +| jdisc.render.latency | nanosecond | average, count, last, max, min, sum | The time used by the container to render responses | +| query\_item\_count | item | count, max, sum | The number of query items (terms, phrases, etc.) | +| docproc.proctime | millisecond | count, max, sum | Time spent processing document | +| docproc.documents | document | count, max, min, sum | Number of processed documents | +| totalhits\_per\_query | hit\_per\_query | count, max, sum | The total number of documents found to match queries | +| empty\_results | operation | rate | Number of queries matching no documents | +| requestsOverQuota | operation | count, rate | The number of requests rejected due to exceeding quota | +| relevance.at\_1 | score | count, sum | The relevance of hit number 1 | +| relevance.at\_3 | score | count, sum | The relevance of hit number 3 | +| relevance.at\_10 | score | count, sum | The relevance of hit number 10 | +| error.timeout | operation | rate | Requests that timed out | +| error.backends\_oos | operation | rate | Requests that failed due to no available backends nodes | +| error.plugin\_failure | operation | rate | Requests that failed due to plugin failure | +| error.backend\_communication\_error | operation | rate | Requests that failed due to backend communication error | +| error.empty\_document\_summaries | operation | rate | Requests that failed due to missing document summaries | +| error.invalid\_query\_parameter | operation | rate | Requests that failed due to invalid query parameters | +| error.internal\_server\_error | operation | rate | Requests that failed due to internal server error | +| error.misconfigured\_server | operation | rate | Requests that failed due to misconfigured server | +| error.invalid\_query\_transformation | operation | rate | Requests that failed due to invalid query transformation | +| error.results\_with\_errors | operation | rate | The number of queries with error payload | +| error.unspecified | operation | rate | Requests that failed for an unspecified reason | +| error.unhandled\_exception | operation | rate | Requests that failed due to an unhandled exception | +| serverRejectedRequests | operation | count, rate | Deprecated. Use jdisc.thread\_pool.rejected\_tasks instead. | +| serverThreadPoolSize | thread | last, max | Deprecated. Use jdisc.thread\_pool.size instead. | +| serverActiveThreads | thread | count, last, max, min, sum | Deprecated. Use jdisc.thread\_pool.active\_threads instead. | +| jrt.transport.tls-certificate-verification-failures | failure | N/A | TLS certificate verification failures | +| jrt.transport.peer-authorization-failures | failure | N/A | TLS peer authorization failures | +| jrt.transport.server.tls-connections-established | connection | N/A | TLS server connections established | +| jrt.transport.client.tls-connections-established | connection | N/A | TLS client connections established | +| jrt.transport.server.unencrypted-connections-established | connection | N/A | Unencrypted server connections established | +| jrt.transport.client.unencrypted-connections-established | connection | N/A | Unencrypted client connections established | +| embedder.latency | millisecond | count, max, sum | Time spent creating an embedding | +| embedder.sequence\_length | item | count, max, sum | Number of tokens in the input sequence | +| embedder.request.count | request | count | Number of embedder API requests | +| embedder.request.failure.count | request | count | Number of failed embedder API requests | +| embedder.batch.size | item | count, max, sum | Number of items in each dispatched batch | +| embedder.batch.queue\_time | millisecond | count, max, sum | Time spent waiting in queue before batch dispatch | +| embedder.batch.count | operation | count | Number of batch dispatches | + +## Distributor Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| vds.idealstate.buckets\_rechecking | bucket | average | The number of buckets that we are rechecking for ideal state operations | +| vds.idealstate.idealstate\_diff | bucket | average | A number representing the current difference from the ideal state. This is a number that decreases steadily as the system is getting closer to the ideal state | +| vds.idealstate.buckets\_toofewcopies | bucket | average | The number of buckets the distributor controls that have less than the desired redundancy | +| vds.idealstate.buckets\_toomanycopies | bucket | average | The number of buckets the distributor controls that have more than the desired redundancy | +| vds.idealstate.buckets | bucket | average | The number of buckets the distributor controls | +| vds.idealstate.buckets\_notrusted | bucket | average | The number of buckets that have no trusted copies. | +| vds.idealstate.bucket\_replicas\_moving\_out | bucket | average | Bucket replicas that should be moved out, e.g. retirement case or node added to cluster that has higher ideal state priority. | +| vds.idealstate.bucket\_replicas\_copying\_out | bucket | average | Bucket replicas that should be copied out, e.g. node is in ideal state but might have to provide data other nodes in a merge | +| vds.idealstate.bucket\_replicas\_copying\_in | bucket | average | Bucket replicas that should be copied in, e.g. node does not have a replica for a bucket that it is in ideal state for | +| vds.idealstate.bucket\_replicas\_syncing | bucket | average | Bucket replicas that need syncing due to mismatching metadata | +| vds.idealstate.max\_observed\_time\_since\_last\_gc\_sec | second | average | Maximum time (in seconds) since GC was last successfully run for a bucket. Aggregated max value across all buckets on the distributor. | +| vds.idealstate.delete\_bucket.done\_ok | operation | rate | The number of operations successfully performed | +| vds.idealstate.delete\_bucket.done\_failed | operation | rate | The number of operations that failed | +| vds.idealstate.delete\_bucket.pending | operation | average | The number of operations pending | +| vds.idealstate.merge\_bucket.done\_ok | operation | rate | The number of operations successfully performed | +| vds.idealstate.merge\_bucket.done\_failed | operation | rate | The number of operations that failed | +| vds.idealstate.merge\_bucket.pending | operation | average | The number of operations pending | +| vds.idealstate.merge\_bucket.blocked | operation | rate | The number of operations blocked by blocking operation starter | +| vds.idealstate.merge\_bucket.throttled | operation | rate | The number of operations throttled by throttling operation starter | +| vds.idealstate.merge\_bucket.source\_only\_copy\_changed | operation | rate | The number of merge operations where source-only copy changed | +| vds.idealstate.merge\_bucket.source\_only\_copy\_delete\_blocked | operation | rate | The number of merge operations where delete of unchanged source-only copies was blocked | +| vds.idealstate.merge\_bucket.source\_only\_copy\_delete\_failed | operation | rate | The number of merge operations where delete of unchanged source-only copies failed | +| vds.idealstate.split\_bucket.done\_ok | operation | rate | The number of operations successfully performed | +| vds.idealstate.split\_bucket.done\_failed | operation | rate | The number of operations that failed | +| vds.idealstate.split\_bucket.pending | operation | average | The number of operations pending | +| vds.idealstate.join\_bucket.done\_ok | operation | rate | The number of operations successfully performed | +| vds.idealstate.join\_bucket.done\_failed | operation | rate | The number of operations that failed | +| vds.idealstate.join\_bucket.pending | operation | average | The number of operations pending | +| vds.idealstate.garbage\_collection.done\_ok | operation | rate | The number of operations successfully performed | +| vds.idealstate.garbage\_collection.done\_failed | operation | rate | The number of operations that failed | +| vds.idealstate.garbage\_collection.pending | operation | average | The number of operations pending | +| vds.idealstate.garbage\_collection.documents\_removed | document | count, rate | Number of documents removed by GC operations | +| vds.distributor.puts.latency | millisecond | count, max, sum | The latency of put operations | +| vds.distributor.puts.ok | operation | rate | The number of successful put operations performed | +| vds.distributor.puts.failures.total | operation | rate | Sum of all failures | +| vds.distributor.puts.failures.notfound | operation | rate | The number of operations that failed because the document did not exist | +| vds.distributor.puts.failures.test\_and\_set\_failed | operation | rate | The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document | +| vds.distributor.puts.failures.concurrent\_mutations | operation | rate | The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID | +| vds.distributor.puts.failures.notconnected | operation | rate | The number of operations discarded because there were no available storage nodes to send to | +| vds.distributor.puts.failures.notready | operation | rate | The number of operations discarded because distributor was not ready | +| vds.distributor.puts.failures.wrongdistributor | operation | rate | The number of operations discarded because they were sent to the wrong distributor | +| vds.distributor.puts.failures.safe\_time\_not\_reached | operation | rate | The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed | +| vds.distributor.puts.failures.storagefailure | operation | rate | The number of operations that failed in storage | +| vds.distributor.puts.failures.timeout | operation | rate | The number of operations that failed because the operation timed out towards storage | +| vds.distributor.puts.failures.busy | operation | rate | The number of messages from storage that failed because the storage node was busy | +| vds.distributor.puts.failures.inconsistent\_bucket | operation | rate | The number of operations failed due to buckets being in an inconsistent state or not found | +| vds.distributor.removes.latency | millisecond | count, max, sum | The latency of remove operations | +| vds.distributor.removes.ok | operation | rate | The number of successful removes operations performed | +| vds.distributor.removes.failures.total | operation | rate | Sum of all failures | +| vds.distributor.removes.failures.notfound | operation | rate | The number of operations that failed because the document did not exist | +| vds.distributor.removes.failures.test\_and\_set\_failed | operation | rate | The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document | +| vds.distributor.removes.failures.concurrent\_mutations | operation | rate | The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID | +| vds.distributor.updates.latency | millisecond | count, max, sum | The latency of update operations | +| vds.distributor.updates.ok | operation | rate | The number of successful updates operations performed | +| vds.distributor.updates.failures.total | operation | rate | Sum of all failures | +| vds.distributor.updates.failures.notfound | operation | rate | The number of operations that failed because the document did not exist | +| vds.distributor.updates.failures.test\_and\_set\_failed | operation | rate | The number of mutating operations that failed because they specified a test-and-set condition that did not match the existing document | +| vds.distributor.updates.failures.concurrent\_mutations | operation | rate | The number of operations that were transiently failed due to a mutating operation already being in progress for its document ID | +| vds.distributor.updates.diverging\_timestamp\_updates | operation | rate | Number of updates that report they were performed against divergent version timestamps on different replicas | +| vds.distributor.removelocations.ok | operation | rate | The number of successful removelocations operations performed | +| vds.distributor.removelocations.failures.total | operation | rate | Sum of all failures | +| vds.distributor.gets.latency | millisecond | count, max, sum | The average latency of gets operations | +| vds.distributor.gets.ok | operation | rate | The number of successful gets operations performed | +| vds.distributor.gets.failures.total | operation | rate | Sum of all failures | +| vds.distributor.gets.failures.notfound | operation | rate | The number of operations that failed because the document did not exist | +| vds.distributor.visitor.latency | millisecond | count, max, sum | The average latency of visitor operations | +| vds.distributor.visitor.ok | operation | rate | The number of successful visitor operations performed | +| vds.distributor.visitor.failures.total | operation | rate | Sum of all failures | +| vds.distributor.visitor.failures.notready | operation | rate | The number of operations discarded because distributor was not ready | +| vds.distributor.visitor.failures.notconnected | operation | rate | The number of operations discarded because there were no available storage nodes to send to | +| vds.distributor.visitor.failures.wrongdistributor | operation | rate | The number of operations discarded because they were sent to the wrong distributor | +| vds.distributor.visitor.failures.safe\_time\_not\_reached | operation | rate | The number of operations that were transiently failed due to them arriving before the safe time point for bucket ownership handovers has passed | +| vds.distributor.visitor.failures.storagefailure | operation | rate | The number of operations that failed in storage | +| vds.distributor.visitor.failures.timeout | operation | rate | The number of operations that failed because the operation timed out towards storage | +| vds.distributor.visitor.failures.busy | operation | rate | The number of messages from storage that failed because the storage node was busy | +| vds.distributor.visitor.failures.inconsistent\_bucket | operation | rate | The number of operations failed due to buckets being in an inconsistent state or not found | +| vds.distributor.visitor.failures.notfound | operation | rate | The number of operations that failed because the document did not exist | +| vds.distributor.docsstored | document | average | Number of documents stored in all buckets controlled by this distributor | +| vds.distributor.bytesstored | byte | average | Number of bytes stored in all buckets controlled by this distributor | +| vds.distributor.mutating\_op\_memory\_usage | byte | max | Estimated amount of memory used by active mutating operations across all distributor stripes, in bytes | +| vds.bouncer.clock\_skew\_aborts | operation | count | Number of client operations that were aborted due to clock skew between sender and receiver exceeding acceptable range | + +## Logd Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| logd.processed.lines | item | count | Number of log lines processed | + +## NodeAdmin Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| endpoint.certificate.expiry.seconds | second | N/A | Time until node endpoint certificate expires | +| node-certificate.expiry.seconds | second | N/A | Time until node certificate expires | + +## SearchNode Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| content.proton.config.generation | version | last | The oldest config generation used by this search node | +| content.proton.documentdb.documents.total | document | last, max | The total number of documents in this documents db (ready + not-ready) | +| content.proton.documentdb.documents.ready | document | last, max | The number of ready documents in this document db | +| content.proton.documentdb.documents.active | document | last, max | The number of active / searchable documents in this document db | +| content.proton.documentdb.documents.removed | document | last, max | The number of removed documents in this document db | +| content.proton.documentdb.index.docs\_in\_memory | document | last, max | Number of documents in memory index | +| content.proton.documentdb.disk\_usage | byte | last | The total disk usage (in bytes) for this document db | +| content.proton.documentdb.memory\_usage.allocated\_bytes | byte | max | The number of allocated bytes | +| content.proton.documentdb.heart\_beat\_age | second | last, min | How long ago (in seconds) heart beat maintenance job was run | +| content.proton.docsum.docs | document | rate | Total docsums returned | +| content.proton.docsum.latency | millisecond | count, max, sum | Docsum request latency | +| content.proton.search\_protocol.query.latency | second | count, max, sum | Query request latency (seconds) | +| content.proton.search\_protocol.query.request\_size | byte | count, max, sum | Query request size (network bytes) | +| content.proton.search\_protocol.query.reply\_size | byte | count, max, sum | Query reply size (network bytes) | +| content.proton.search\_protocol.docsum.latency | second | average, count, max, sum | Docsum request latency (seconds) | +| content.proton.search\_protocol.docsum.request\_size | byte | count, max, sum | Docsum request size (network bytes) | +| content.proton.search\_protocol.docsum.reply\_size | byte | count, max, sum | Docsum reply size (network bytes) | +| content.proton.search\_protocol.docsum.requested\_documents | document | count, max, sum | Total requested document summaries | +| content.proton.executor.proton.queuesize | task | count, max, sum | Size of executor proton task queue | +| content.proton.executor.proton.accepted | task | rate | Number of executor proton accepted tasks | +| content.proton.executor.proton.wakeups | wakeup | rate | Number of times an executor proton worker thread has been woken up | +| content.proton.executor.proton.utilization | fraction | count, max, sum | Ratio of time the executor proton worker threads has been active | +| content.proton.executor.flush.queuesize | task | count, max, sum | Size of executor flush task queue | +| content.proton.executor.flush.accepted | task | rate | Number of accepted executor flush tasks | +| content.proton.executor.flush.wakeups | wakeup | rate | Number of times an executor flush worker thread has been woken up | +| content.proton.executor.flush.utilization | fraction | count, max, sum | Ratio of time the executor flush worker threads has been active | +| content.proton.executor.match.queuesize | task | count, max, sum | Size of executor match task queue | +| content.proton.executor.match.accepted | task | rate | Number of accepted executor match tasks | +| content.proton.executor.match.wakeups | wakeup | rate | Number of times an executor match worker thread has been woken up | +| content.proton.executor.match.utilization | fraction | count, max, sum | Ratio of time the executor match worker threads has been active | +| content.proton.executor.docsum.queuesize | task | count, max, sum | Size of executor docsum task queue | +| content.proton.executor.docsum.accepted | task | rate | Number of executor accepted docsum tasks | +| content.proton.executor.docsum.wakeups | wakeup | rate | Number of times an executor docsum worker thread has been woken up | +| content.proton.executor.docsum.utilization | fraction | count, max, sum | Ratio of time the executor docsum worker threads has been active | +| content.proton.executor.shared.queuesize | task | count, max, sum | Size of executor shared task queue | +| content.proton.executor.shared.accepted | task | rate | Number of executor shared accepted tasks | +| content.proton.executor.shared.wakeups | wakeup | rate | Number of times an executor shared worker thread has been woken up | +| content.proton.executor.shared.utilization | fraction | count, max, sum | Ratio of time the executor shared worker threads has been active | +| content.proton.executor.warmup.queuesize | task | count, max, sum | Size of executor warmup task queue | +| content.proton.executor.warmup.accepted | task | rate | Number of accepted executor warmup tasks | +| content.proton.executor.warmup.wakeups | wakeup | rate | Number of times a warmup executor worker thread has been woken up | +| content.proton.executor.warmup.utilization | fraction | count, max, sum | Ratio of time the executor warmup worker threads has been active | +| content.proton.executor.field\_writer.queuesize | task | count, max, sum | Size of executor field writer task queue | +| content.proton.executor.field\_writer.accepted | task | rate | Number of accepted executor field writer tasks | +| content.proton.executor.field\_writer.wakeups | wakeup | rate | Number of times an executor field writer worker thread has been woken up | +| content.proton.executor.field\_writer.utilization | fraction | count, max, sum | Ratio of time the executor fieldwriter worker threads has been active | +| content.proton.executor.field\_writer.saturation | fraction | count, max, sum | Ratio indicating the max saturation of underlying worker threads. A higher saturation than utilization indicates a bottleneck in one of the worker threads. | +| content.proton.documentdb.job.total | fraction | average | The job load average total of all job metrics | +| content.proton.documentdb.job.attribute\_flush | fraction | average | Flushing of attribute vector(s) to disk | +| content.proton.documentdb.job.memory\_index\_flush | fraction | average | Flushing of memory index to disk | +| content.proton.documentdb.job.disk\_index\_fusion | fraction | average | Fusion of disk indexes | +| content.proton.documentdb.job.document\_store\_flush | fraction | average | Flushing of document store to disk | +| content.proton.documentdb.job.document\_store\_compact | fraction | average | Compaction of document store on disk | +| content.proton.documentdb.job.bucket\_move | fraction | average | Moving of buckets between 'ready' and 'notready' sub databases | +| content.proton.documentdb.job.lid\_space\_compact | fraction | average | Compaction of lid space in document meta store and attribute vectors | +| content.proton.documentdb.job.removed\_documents\_prune | fraction | average | Pruning of removed documents in 'removed' sub database | +| content.proton.documentdb.threading\_service.master.queuesize | task | count, max, sum | Size of threading service master task queue | +| content.proton.documentdb.threading\_service.master.accepted | task | rate | Number of accepted threading service master tasks | +| content.proton.documentdb.threading\_service.master.wakeups | wakeup | rate | Number of times a threading service master worker thread has been woken up | +| content.proton.documentdb.threading\_service.master.utilization | fraction | count, max, sum | Ratio of time the threading service master worker threads has been active | +| content.proton.documentdb.threading\_service.index.queuesize | task | count, max, sum | Size of threading service index task queue | +| content.proton.documentdb.threading\_service.index.accepted | task | rate | Number of accepted threading service index tasks | +| content.proton.documentdb.threading\_service.index.wakeups | wakeup | rate | Number of times a threading service index worker thread has been woken up | +| content.proton.documentdb.threading\_service.index.utilization | fraction | count, max, sum | Ratio of time the threading service index worker threads has been active | +| content.proton.documentdb.threading\_service.summary.queuesize | task | count, max, sum | Size of threading service summary task queue | +| content.proton.documentdb.threading\_service.summary.accepted | task | rate | Number of accepted threading service summary tasks | +| content.proton.documentdb.threading\_service.summary.wakeups | wakeup | rate | Number of times a threading service summary worker thread has been woken up | +| content.proton.documentdb.threading\_service.summary.utilization | fraction | count, max, sum | Ratio of time the threading service summary worker threads has been active | +| content.proton.documentdb.ready.lid\_space.lid\_bloat\_factor | fraction | average | The bloat factor of this lid space, indicating the total amount of holes in the allocated lid space ((lid\_limit - used\_lids) / lid\_limit) | +| content.proton.documentdb.ready.lid\_space.lid\_fragmentation\_factor | fraction | average | The fragmentation factor of this lid space, indicating the amount of holes in the currently used part of the lid space ((highest\_used\_lid - used\_lids) / highest\_used\_lid) | +| content.proton.documentdb.ready.lid\_space.lid\_limit | documentid | last, max | The size of the allocated lid space | +| content.proton.documentdb.ready.lid\_space.highest\_used\_lid | documentid | last, max | The highest used lid | +| content.proton.documentdb.ready.lid\_space.used\_lids | documentid | last, max | The number of lids used | +| content.proton.documentdb.notready.lid\_space.lid\_bloat\_factor | fraction | average | The bloat factor of this lid space, indicating the total amount of holes in the allocated lid space ((lid\_limit - used\_lids) / lid\_limit) | +| content.proton.documentdb.notready.lid\_space.lid\_fragmentation\_factor | fraction | average | The fragmentation factor of this lid space, indicating the amount of holes in the currently used part of the lid space ((highest\_used\_lid - used\_lids) / highest\_used\_lid) | +| content.proton.documentdb.notready.lid\_space.lid\_limit | documentid | last, max | The size of the allocated lid space | +| content.proton.documentdb.notready.lid\_space.highest\_used\_lid | documentid | last, max | The highest used lid | +| content.proton.documentdb.notready.lid\_space.used\_lids | documentid | last, max | The number of lids used | +| content.proton.documentdb.removed.lid\_space.lid\_bloat\_factor | fraction | average | The bloat factor of this lid space, indicating the total amount of holes in the allocated lid space ((lid\_limit - used\_lids) / lid\_limit) | +| content.proton.documentdb.removed.lid\_space.lid\_fragmentation\_factor | fraction | average | The fragmentation factor of this lid space, indicating the amount of holes in the currently used part of the lid space ((highest\_used\_lid - used\_lids) / highest\_used\_lid) | +| content.proton.documentdb.removed.lid\_space.lid\_limit | documentid | last, max | The size of the allocated lid space | +| content.proton.documentdb.removed.lid\_space.highest\_used\_lid | documentid | last, max | The highest used lid | +| content.proton.documentdb.removed.lid\_space.used\_lids | documentid | last, max | The number of lids used | +| content.proton.documentdb.bucket\_move.buckets\_pending | bucket | last, max, sum | The number of buckets left to move | +| content.proton.resource\_usage.disk | fraction | average | The relative amount of disk used by this content node (transient usage not included, value in the range \[0, 1\]). Same value as reported to the cluster controller | +| content.proton.resource\_usage.disk\_usage.total | fraction | max | The total relative amount of disk used by this content node (value in the range \[0, 1\]) | +| content.proton.resource\_usage.disk\_usage.total\_utilization | fraction | max | The relative amount of disk used compared to the content node disk resource limit | +| content.proton.resource\_usage.disk\_usage.transient | fraction | max | The relative amount of transient disk used by this content node (value in the range \[0, 1\]) | +| content.proton.resource\_usage.memory | fraction | average | The relative amount of memory used by this content node (transient usage not included, value in the range \[0, 1\]). Same value as reported to the cluster controller | +| content.proton.resource\_usage.memory\_usage.total | fraction | max | The total relative amount of memory used by this content node (value in the range \[0, 1\]) | +| content.proton.resource\_usage.memory\_usage.total\_utilization | fraction | max | The relative amount of memory used compared to the content node memory resource limit | +| content.proton.resource\_usage.memory\_usage.transient | fraction | max | The relative amount of transient memory used by this content node (value in the range \[0, 1\]) | +| content.proton.resource\_usage.memory\_mappings | file | max | The number of memory mapped files | +| content.proton.resource\_usage.open\_file\_descriptors | file | max | The number of open files | +| content.proton.resource\_usage.feeding\_blocked | binary | last, max | Whether feeding is blocked due to resource limits being reached (value is either 0 or 1) | +| content.proton.resource\_usage.malloc\_arena | byte | max | Size of malloc arena | +| content.proton.documentdb.attribute.resource\_usage.address\_space | fraction | max | The max relative address space used among components in all attribute vectors in this document db (value in the range \[0, 1\]) | +| content.proton.documentdb.attribute.resource\_usage.feeding\_blocked | binary | max | Whether feeding is blocked due to attribute resource limits being reached (value is either 0 or 1) | +| content.proton.resource\_usage.cpu\_util.setup | fraction | count, max, sum | cpu used by system init and (re-)configuration | +| content.proton.resource\_usage.cpu\_util.read | fraction | count, max, sum | cpu used by reading data from the system | +| content.proton.resource\_usage.cpu\_util.write | fraction | count, max, sum | cpu used by writing data to the system | +| content.proton.resource\_usage.cpu\_util.compact | fraction | count, max, sum | cpu used by internal data re-structuring | +| content.proton.resource\_usage.cpu\_util.other | fraction | count, max, sum | cpu used by work not classified as a specific category | +| content.proton.transactionlog.entries | record | average | The current number of entries in the transaction log | +| content.proton.transactionlog.disk\_usage | byte | average | The disk usage (in bytes) of the transaction log | +| content.proton.transactionlog.replay\_time | second | last, max | The replay time (in seconds) of the transaction log during start-up | +| content.proton.documentdb.ready.document\_store.disk\_usage | byte | average | Disk space usage in bytes | +| content.proton.documentdb.ready.document\_store.disk\_bloat | byte | average | Disk space bloat in bytes | +| content.proton.documentdb.ready.document\_store.max\_bucket\_spread | fraction | average | Max bucket spread in underlying files (sum(unique buckets in each chunk)/unique buckets in file) | +| content.proton.documentdb.ready.document\_store.memory\_usage.allocated\_bytes | byte | average | The number of allocated bytes | +| content.proton.documentdb.ready.document\_store.memory\_usage.used\_bytes | byte | average | The number of used bytes (`<=` allocated\_bytes) | +| content.proton.documentdb.ready.document\_store.memory\_usage.onhold\_bytes | byte | average | The number of bytes on hold | +| content.proton.documentdb.notready.document\_store.disk\_usage | byte | average | Disk space usage in bytes | +| content.proton.documentdb.notready.document\_store.disk\_bloat | byte | average | Disk space bloat in bytes | +| content.proton.documentdb.notready.document\_store.max\_bucket\_spread | fraction | average | Max bucket spread in underlying files (sum(unique buckets in each chunk)/unique buckets in file) | +| content.proton.documentdb.notready.document\_store.memory\_usage.allocated\_bytes | byte | average | The number of allocated bytes | +| content.proton.documentdb.notready.document\_store.memory\_usage.used\_bytes | byte | average | The number of used bytes (`<=` allocated\_bytes) | +| content.proton.documentdb.notready.document\_store.memory\_usage.dead\_bytes | byte | average | The number of dead bytes (`<=` used\_bytes) | +| content.proton.documentdb.notready.document\_store.memory\_usage.onhold\_bytes | byte | average | The number of bytes on hold | +| content.proton.documentdb.removed.document\_store.disk\_usage | byte | average | Disk space usage in bytes | +| content.proton.documentdb.removed.document\_store.disk\_bloat | byte | average | Disk space bloat in bytes | +| content.proton.documentdb.removed.document\_store.max\_bucket\_spread | fraction | average | Max bucket spread in underlying files (sum(unique buckets in each chunk)/unique buckets in file) | +| content.proton.documentdb.removed.document\_store.memory\_usage.allocated\_bytes | byte | average | The number of allocated bytes | +| content.proton.documentdb.removed.document\_store.memory\_usage.used\_bytes | byte | average | The number of used bytes (`<=` allocated\_bytes) | +| content.proton.documentdb.removed.document\_store.memory\_usage.dead\_bytes | byte | average | The number of dead bytes (`<=` used\_bytes) | +| content.proton.documentdb.removed.document\_store.memory\_usage.onhold\_bytes | byte | average | The number of bytes on hold | +| content.proton.documentdb.ready.document\_store.cache.memory\_usage | byte | average | Memory usage of the cache (in bytes) | +| content.proton.documentdb.ready.document\_store.cache.hit\_rate | fraction | average | Rate of hits in the cache compared to number of lookups | +| content.proton.documentdb.ready.document\_store.cache.lookups | operation | rate | Number of lookups in the cache (hits + misses) | +| content.proton.documentdb.ready.document\_store.cache.invalidations | operation | rate | Number of invalidations (erased elements) in the cache. | +| content.proton.documentdb.notready.document\_store.cache.memory\_usage | byte | average | Memory usage of the cache (in bytes) | +| content.proton.documentdb.notready.document\_store.cache.hit\_rate | fraction | average | Rate of hits in the cache compared to number of lookups | +| content.proton.documentdb.notready.document\_store.cache.lookups | operation | rate | Number of lookups in the cache (hits + misses) | +| content.proton.documentdb.notready.document\_store.cache.invalidations | operation | rate | Number of invalidations (erased elements) in the cache. | +| content.proton.documentdb.ready.attribute.memory\_usage.allocated\_bytes | byte | average | The number of allocated bytes | +| content.proton.documentdb.ready.attribute.memory\_usage.used\_bytes | byte | average | The number of used bytes (`<=` allocated\_bytes) | +| content.proton.documentdb.ready.attribute.memory\_usage.dead\_bytes | byte | average | The number of dead bytes (`<=` used\_bytes) | +| content.proton.documentdb.ready.attribute.memory\_usage.onhold\_bytes | byte | average | The number of bytes on hold | +| content.proton.documentdb.ready.attribute.disk\_usage | byte | average | Disk space usage (in bytes) of the flushed snapshot of this attribute for this document type | +| content.proton.documentdb.notready.attribute.memory\_usage.allocated\_bytes | byte | average | The number of allocated bytes | +| content.proton.documentdb.notready.attribute.memory\_usage.used\_bytes | byte | average | The number of used bytes (`<=` allocated\_bytes) | +| content.proton.documentdb.notready.attribute.memory\_usage.dead\_bytes | byte | average | The number of dead bytes (`<=` used\_bytes) | +| content.proton.documentdb.notready.attribute.memory\_usage.onhold\_bytes | byte | average | The number of bytes on hold | +| content.proton.index.cache.postinglist.memory\_usage | byte | average | Memory usage of the cache (in bytes). Contains disk index posting list files across all document types | +| content.proton.index.cache.postinglist.hit\_rate | fraction | average | Rate of hits in the cache compared to number of lookups. Contains disk index posting list files across all document types | +| content.proton.index.cache.postinglist.lookups | operation | rate | Number of lookups in the cache (hits + misses). Contains disk index posting list files across all document types | +| content.proton.index.cache.postinglist.invalidations | operation | rate | Number of invalidations (erased elements) in the cache. Contains disk index posting list files across all document types | +| content.proton.index.cache.bitvector.memory\_usage | byte | average | Memory usage of the cache (in bytes). Contains disk index bitvector files across all document types | +| content.proton.index.cache.bitvector.hit\_rate | fraction | average | Rate of hits in the cache compared to number of lookups. Contains disk index bitvector files across all document types | +| content.proton.index.cache.bitvector.lookups | operation | rate | Number of lookups in the cache (hits + misses). Contains disk index bitvector files across all document types | +| content.proton.index.cache.bitvector.invalidations | operation | rate | Number of invalidations (erased elements) in the cache. Contains disk index bitvector files across all document types | +| content.proton.documentdb.index.memory\_usage.allocated\_bytes | byte | average | The number of allocated bytes for the memory index for this document type | +| content.proton.documentdb.index.memory\_usage.used\_bytes | byte | average | The number of used bytes (`<=` allocated\_bytes) for the memory index for this document type | +| content.proton.documentdb.index.memory\_usage.dead\_bytes | byte | average | The number of dead bytes (`<=` used\_bytes) for the memory index for this document type | +| content.proton.documentdb.index.memory\_usage.onhold\_bytes | byte | average | The number of bytes on hold for the memory index for this document type | +| content.proton.documentdb.index.io.search.read\_bytes | byte | count, sum | Bytes read from disk index posting list and bitvector files as part of search for this document type | +| content.proton.documentdb.index.io.search.cached\_read\_bytes | byte | count, sum | Bytes read from cached disk index posting list and bitvector files as part of search for this document type | +| content.proton.documentdb.ready.index.disk\_usage | byte | average | Disk space usage (in bytes) of this index field in all disk indexes for this document type | +| content.proton.documentdb.matching.queries | query | rate | Number of queries executed | +| content.proton.documentdb.matching.soft\_doomed\_queries | query | rate | Number of queries hitting the soft timeout | +| content.proton.documentdb.matching.query\_latency | second | count, max, sum | Total average latency (sec) when matching and ranking a query | +| content.proton.documentdb.matching.query\_setup\_time | second | count, max, sum | Average time (sec) spent setting up and tearing down queries | +| content.proton.documentdb.matching.docs\_matched | document | count, rate | Number of documents matched | +| content.proton.documentdb.matching.exact\_nns\_distances\_computed | distance | rate | Number of distances computed in exact nearest-neighbor search | +| content.proton.documentdb.matching.approximate\_nns\_distances\_computed | distance | rate | Number of distances computed in approximate nearest-neighbor search | +| content.proton.documentdb.matching.approximate\_nns\_nodes\_visited | graph\_node | rate | Number of nodes visited in approximate nearest-neighbor search | +| content.proton.documentdb.matching.rank\_profile.queries | query | rate | Number of queries executed | +| content.proton.documentdb.matching.rank\_profile.soft\_doomed\_queries | query | rate | Number of queries hitting the soft timeout | +| content.proton.documentdb.matching.rank\_profile.soft\_doom\_factor | fraction | count, max, min, sum | Factor used to compute soft-timeout | +| content.proton.documentdb.matching.rank\_profile.query\_latency | second | count, max, sum | Total average latency (sec) when matching and ranking a query | +| content.proton.documentdb.matching.rank\_profile.query\_setup\_time | second | count, max, sum | Average time (sec) spent setting up and tearing down queries | +| content.proton.documentdb.matching.rank\_profile.grouping\_time | second | count, max, sum | Average time (sec) spent on grouping | +| content.proton.documentdb.matching.rank\_profile.rerank\_time | second | count, max, sum | Average time (sec) spent on 2nd phase ranking | +| content.proton.documentdb.matching.rank\_profile.docs\_matched | document | count, rate | Number of documents matched | +| content.proton.documentdb.matching.rank\_profile.exact\_nns\_distances\_computed | distance | rate | Number of distances computed in exact nearest-neighbor search | +| content.proton.documentdb.matching.rank\_profile.approximate\_nns\_distances\_computed | distance | rate | Number of distances computed in approximate nearest-neighbor search | +| content.proton.documentdb.matching.rank\_profile.approximate\_nns\_nodes\_visited | graph\_node | rate | Number of nodes visited in approximate nearest-neighbor search | +| content.proton.documentdb.matching.rank\_profile.limited\_queries | query | rate | Number of queries limited in match phase | +| content.proton.documentdb.feeding.commit.operations | operation | count, max, rate, sum | Number of operations included in a commit | +| content.proton.documentdb.feeding.commit.latency | second | count, max, sum | Latency for commit in seconds | + +## Sentinel Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| sentinel.restarts | restart | count | Number of service restarts done by the sentinel | +| sentinel.totalRestarts | restart | last, max, sum | Total number of service restarts done by the sentinel since the sentinel was started | +| sentinel.uptime | second | last | Time the sentinel has been running | +| sentinel.running | instance | count, last | Number of services the sentinel has running currently | + +## Slobrok Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| slobrok.heartbeats.failed | request | count | Number of heartbeat requests failed | +| slobrok.missing.consensus | second | count | Number of seconds without full consensus with all other brokers | + +## Storage Metrics + +| Name | Unit | Suffixes | Description | +| --- | --- | --- | --- | +| vds.datastored.alldisks.buckets | bucket | average | Number of buckets managed | +| vds.datastored.alldisks.docs | document | average | Number of documents stored | +| vds.datastored.alldisks.bytes | byte | average | Number of bytes stored | +| vds.visitor.allthreads.averagevisitorlifetime | millisecond | count, max, sum | Average lifetime of a visitor | +| vds.visitor.allthreads.averagequeuewait | millisecond | count, max, sum | Average time an operation spends in input queue. | +| vds.visitor.allthreads.queuesize | operation | count, max, sum | Size of input message queue. | +| vds.visitor.allthreads.completed | operation | rate | Number of visitors completed | +| vds.visitor.allthreads.created | operation | rate | Number of visitors created. | +| vds.visitor.allthreads.failed | operation | rate | Number of visitors failed | +| vds.visitor.allthreads.averagemessagesendtime | millisecond | count, max, sum | Average time it takes for messages to be sent to their target (and be replied to) | +| vds.visitor.allthreads.averageprocessingtime | millisecond | count, max, sum | Average time used to process visitor requests | +| vds.filestor.queuesize | operation | count, max, sum | Size of input message queue. | +| vds.filestor.averagequeuewait | millisecond | count, max, sum | Average time an operation spends in input queue. | +| vds.filestor.active\_operations.size | operation | count, max, sum | Number of concurrent active operations | +| vds.filestor.active\_operations.latency | millisecond | count, max, sum | Latency (in ms) for completed operations | +| vds.filestor.throttle\_window\_size | operation | count, max, sum | Current size of async operation throttler window size | +| vds.filestor.throttle\_waiting\_threads | thread | count, max, sum | Number of threads waiting to acquire a throttle token | +| vds.filestor.throttle\_active\_tokens | instance | count, max, sum | Current number of active throttle tokens | +| vds.filestor.allthreads.mergemetadatareadlatency | millisecond | count, max, sum | Time spent in a merge step to check metadata of current node to see what data it has. | +| vds.filestor.allthreads.mergedatareadlatency | millisecond | count, max, sum | Time spent in a merge step to read data other nodes need. | +| vds.filestor.allthreads.mergedatawritelatency | millisecond | count, max, sum | Time spent in a merge step to write data needed to current node. | +| vds.filestor.allthreads.merge\_put\_latency | millisecond | count, max, sum | Latency of individual puts that are part of merge operations | +| vds.filestor.allthreads.merge\_remove\_latency | millisecond | count, max, sum | Latency of individual removes that are part of merge operations | +| vds.filestor.allstripes.throttled\_rpc\_direct\_dispatches | instance | rate | Number of times an RPC thread could not directly dispatch an async operation directly to Proton because it was disallowed by the throttle policy | +| vds.filestor.allstripes.throttled\_persistence\_thread\_polls | instance | rate | Number of times a persistence thread could not immediately dispatch a queued async operation because it was disallowed by the throttle policy | +| vds.filestor.allstripes.timeouts\_waiting\_for\_throttle\_token | instance | rate | Number of times a persistence thread timed out waiting for an available throttle policy token | +| vds.filestor.allthreads.put.count | operation | rate | Number of requests processed. | +| vds.filestor.allthreads.put.failed | operation | rate | Number of failed requests. | +| vds.filestor.allthreads.put.test\_and\_set\_failed | operation | rate | Number of operations that were skipped due to a test-and-set condition not met | +| vds.filestor.allthreads.put.latency | millisecond | count, max, sum | Latency of successful requests. | +| vds.filestor.allthreads.put.request\_size | byte | count, max, sum | Size of requests, in bytes | +| vds.filestor.allthreads.remove.count | operation | rate | Number of requests processed. | +| vds.filestor.allthreads.remove.failed | operation | rate | Number of failed requests. | +| vds.filestor.allthreads.remove.test\_and\_set\_failed | operation | rate | Number of operations that were skipped due to a test-and-set condition not met | +| vds.filestor.allthreads.remove.latency | millisecond | count, max, sum | Latency of successful requests. | +| vds.filestor.allthreads.remove.request\_size | byte | count, max, sum | Size of requests, in bytes | +| vds.filestor.allthreads.get.count | operation | rate | Number of requests processed. | +| vds.filestor.allthreads.get.failed | operation | rate | Number of failed requests. | +| vds.filestor.allthreads.get.latency | millisecond | count, max, sum | Latency of successful requests. | +| vds.filestor.allthreads.get.request\_size | byte | count, max, sum | Size of requests, in bytes | +| vds.filestor.allthreads.update.count | request | rate | Number of requests processed. | +| vds.filestor.allthreads.update.failed | request | rate | Number of failed requests. | +| vds.filestor.allthreads.update.test\_and\_set\_failed | request | rate | Number of requests that were skipped due to a test-and-set condition not met | +| vds.filestor.allthreads.update.latency | millisecond | count, max, sum | Latency of successful requests. | +| vds.filestor.allthreads.update.request\_size | byte | count, max, sum | Size of requests, in bytes | +| vds.filestor.allthreads.createiterator.count | request | rate | Number of requests processed. | +| vds.filestor.allthreads.createiterator.latency | millisecond | count, max, sum | Latency of successful requests. | +| vds.filestor.allthreads.visit.count | request | rate | Number of requests processed. | +| vds.filestor.allthreads.visit.latency | millisecond | count, max, sum | Latency of successful requests. | +| vds.filestor.allthreads.remove\_location.count | request | rate | Number of requests processed. | +| vds.filestor.allthreads.remove\_location.latency | millisecond | count, max, sum | Latency of successful requests. | +| vds.filestor.allthreads.splitbuckets.count | request | rate | Number of requests processed. | +| vds.filestor.allthreads.joinbuckets.count | request | rate | Number of requests processed. | +| vds.filestor.allthreads.deletebuckets.count | request | rate | Number of requests processed. | +| vds.filestor.allthreads.deletebuckets.failed | request | rate | Number of failed requests. | +| vds.filestor.allthreads.deletebuckets.latency | millisecond | count, max, sum | Latency of successful requests. | +| vds.filestor.allthreads.remove\_by\_gid.count | request | rate | Number of requests processed. | +| vds.filestor.allthreads.remove\_by\_gid.failed | request | rate | Number of failed requests. | +| vds.filestor.allthreads.remove\_by\_gid.latency | millisecond | count, max, sum | Latency of successful requests. | +| vds.filestor.allthreads.setbucketstates.count | request | rate | Number of requests processed. | +| vds.mergethrottler.averagequeuewaitingtime | millisecond | count, max, sum | Time merges spent in the throttler queue | +| vds.mergethrottler.queuesize | instance | count, max, sum | Length of merge queue | +| vds.mergethrottler.active\_window\_size | instance | count, max, sum | Number of merges active within the pending window size | +| vds.mergethrottler.estimated\_merge\_memory\_usage | byte | count, max, sum | An estimated upper bound of the memory usage (in bytes) of the merges currently in the active window | +| vds.mergethrottler.bounced\_due\_to\_back\_pressure | instance | rate | Number of merges bounced due to resource exhaustion back-pressure | +| vds.mergethrottler.locallyexecutedmerges.ok | instance | rate | The number of successful merges for 'locallyexecutedmerges' | +| vds.mergethrottler.mergechains.ok | operation | rate | The number of successful merges for 'mergechains' | +| vds.mergethrottler.mergechains.failures.busy | operation | rate | The number of merges that failed because the storage node was busy | +| vds.mergethrottler.mergechains.failures.total | operation | rate | Sum of all failures | +| vds.server.network.tls-handshakes-failed | operation | count | Number of client or server connection attempts that failed during TLS handshaking | +| vds.server.network.peer-authorization-failures | failure | count | Number of TLS connection attempts failed due to bad or missing peer certificate credentials | +| vds.server.network.client.tls-connections-established | connection | count | Number of secure mTLS connections established | +| vds.server.network.server.tls-connections-established | connection | count | Number of secure mTLS connections established | +| vds.server.network.client.insecure-connections-established | connection | count | Number of insecure (plaintext) connections established | +| vds.server.network.server.insecure-connections-established | connection | count | Number of insecure (plaintext) connections established | +| vds.server.network.tls-connections-broken | connection | count | Number of TLS connections broken due to failures during frame encoding or decoding | +| vds.server.network.failed-tls-config-reloads | failure | count | Number of times background reloading of TLS config has failed | +| vds.server.network.rpc-capability-checks-failed | failure | count | Number of RPC operations that failed due to one or more missing capabilities | +| vds.server.network.status-capability-checks-failed | failure | count | Number of status page operations that failed due to one or more missing capabilities | +| vds.server.fnet.num-connections | connection | count | Total number of connection objects | \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/self-managed/tools.mdx b/mintlify-docs/en/reference/operations/self-managed/tools.mdx new file mode 100644 index 0000000000..7f375bc42c --- /dev/null +++ b/mintlify-docs/en/reference/operations/self-managed/tools.mdx @@ -0,0 +1,976 @@ +--- +title: "Vespa Command-line Tools" +sidebarTitle: "Tools" +description: "This is the reference for the Vespa command-line tools." +--- + +<Note> +**Note:** + +The tools listed on this page are primarily used for operating and debugging a *self-hosted* Vespa instance. For most use-cases, we recommend the [Vespa CLI](/en/clients/vespa-cli), which should work against most Vespa applications regardless of how they are deployed. +</Note> + +You can run these tools in [Vespa Docker image](https://hub.docker.com/r/vespaengine/vespa/tags): + +```bash +docker run --entrypoint bash vespaengine/vespa ./opt/vespa/bin/[tool] [args] +``` + +## vespa-configproxy-cmd + +*vespa-configproxy-cmd* is a status and control tool for the [config proxy](/en/operations/self-managed/config-proxy). The config proxy runs on all nodes as a proxy for one or more [config servers](/en/operations/self-managed/configuration-server). It connects to the config proxy on localhost by default. + +Run it without arguments to dump a list of active configIds - format specification: + +`<config definition name>,<config id>,<config generation>,<MD5 checksum>,<xxHash checksum>` + +Synopsis: `vespa-configproxy-cmd [args]` + +Example: + +```bash +$ vespa-configproxy-cmd -m sources +``` + +Find a more comprehensive example in [inspecting config](/en/operations/self-managed/config-proxy#inspecting-config). + +| Option | Description | +| --- | --- | +| **\-m** | method, available methods are: <br/><br/> **cache**<br/> Output the config proxy cache content (overview) <br/><br/> **dumpcache**<br/><br/> **statistics**<br/><br/> **heap** <br/><br/> **getConfig** <br/> Get config (see [vespa-get-config](#vespa-get-config)) <br/><br/> **getmode** <br/> Outputs the current mode of the config proxy <br/><br/> **setmode** <br/> Use *default* or *memorycache* - [example](/en/operations/self-managed/config-proxy) <br/><br/> **invalidatecache** <br/> Clears the current cache in the config proxy <br/><br/> **cachefull** <br/> Output the config proxy cache content (including config payload) <br/><br/> **sources** <br/> Output the config proxy's upstream config sources <br/><br/> **updatesources** <br/> Updates the config proxy's upstream config sources to the supplied ones | +| **\-p** | port number, optional | +| **\-s** | hostname, optional | +| **\-h** | help text and usage | + +## vespa-configserver-remove-state + +*vespa-configserver-remove-state* removes the config server state on the host. + +Synopsis: `vespa-configserver-remove-state [-force]` + +| Option | Description | +| --- | --- | +| **\-force** | Do not ask for confirmation before removal | + +## vespa-config-status + +*vespa-config-status* can run on any machine in a Vespa cluster and outputs a list of all running services which is running with an outdated application package. It can be invoked without any arguments or with optional arguments. + +Synopsis: `vespa-config-status [-v] [-c host] [-c host:port] [-f host0,...,hostN]` + +Example: + +```bash +$ vespa-config-status +``` + +| Option | Description | +| --- | --- | +| **\-v** | Verbose - show all services, even if they are up-to-date | +| **\-c arg** | Get the Vespa cluster configuration from the config server specified by host and port. Use *host* or *host:port* for config server | +| **\-f** | Filter to only query config status for the given comma-separated set of hosts | + +## vespa-deploy + +*vespa-deploy* is a standalone tool to deploy an application package. Prefer the [Vespa CLI](/en/clients/vespa-cli#deployment) instead. Under the hood, deployment uses the [deploy REST API](/en/reference/api/deploy-v2), which you can also use directly. Refer to the [deploy reference](/en/reference/applications/application-packages#deploy) for details. + +Synopsis: `vespa-deploy [-h] [-v] [-n] [-f] [-t timeout] [-c hostname] [-p port] prepare|activate|upload|fetch|help [args]` + +Example: + +```bash +$ vespa-deploy prepare [application-path|zip-file] && vespa-deploy activate +``` + +```bash +$ vespa-deploy prepare app.zip && vespa-deploy activate +``` + +| Command | Description | +| --- | --- | +| **prepare** | *vespa-deploy prepare* combines the [upload](/en/reference/api/deploy-v2#create-session) and [prepare](/en/reference/api/deploy-v2#prepare-session) steps. | +| **activate** | *vespa-deploy activate* invokes the [activate](/en/reference/api/deploy-v2#activate-session) step. | +| **upload** | *vespa-deploy upload* uploads an application package | +| **fetch** | *vespa-deploy fetch* fetches an application package. Useful to get the active configuration for an instance. | +| **help** | Same as **\-h** | + +| Option | Description | +| --- | --- | +| **\-h** | Show help text | +| **\-v** | Verbose | +| **\-n** | Dry-run deployment | +| **\-f** | Force - ignore validation errors | +| **\-t** | Timeout | +| **\-c** | Config server hostname | +| **\-p** | Config server port | + +## vespa-destination + +*vespa-destination* is a simple receiver for messagebus messages. It outputs messages received on stdout. Also see [vespa-visit-target](#vespa-visit-target). + +Synopsis: `vespa-destination [options]` + +Example: + +```bash +$ vespa-destination --name msg_sink +``` + +| Option | Description | +| --- | --- | +| **\--instant** | Reply in message thread | +| **\--name arg** | Slobrok name to register | +| **\--maxqueuetime arg** | Adjust the in-queue size to have a maximum queue wait period of this many ms (default -1 = unlimited) | +| **\--silent #nummsg** | Do not dump anything, but progress every #nummsg | +| **\--sleeptime arg** | The number of milliseconds to sleep per message, to simulate processing time | +| **\--threads arg** | The number of threads to process the incoming data | +| **\--verbose** | Dump the contents of certain messages to stdout | + +## vespa-fbench-filter-file + +```bash +usage: vespa-fbench-filter-file [-a] [-h] [-m maxLineSize] + +Read concatenated logs from stdin and write +extracted query urls to stdout. + + -a : all parameters to the original query urls are preserved. + If the -a switch is not given, only 'query' and 'type' + parameters are kept in the extracted query urls. + -h : print this usage information. + -m <num> : max line size for input/output lines. + Can not be less than the default [10240] +``` + +## vespa-fbench-geturl + +```bash +usage: vespa-fbench-geturl <host> <port> <url> +``` + +## vespa-fbench-split-file + +```bash +usage: vespa-fbench-split-file [-p pattern] [-m maxLineSize] <numparts> [<file>] + +Reads from <file> (stdin if <file> is not given) and +randomly distributes each line between <numpart> output +files. The names of the output files are generated by +combining the <pattern> with sequential numbers using +the sprintf function. + + -p pattern : output name pattern ['query%03d.txt'] + -m <num> : max line size for input/output lines. + Can not be less than the default [10240] + <numparts> : number of output files to generate. +``` + +## vespa-feeder + +*vespa-feeder* is a feeding client that parses [JSON](/en/reference/schemas/document-json-format) input as Vespa document operations and sends to a Vespa application. It parses the content of the input sequentially and feeds each operation in order. However, since many operations will be pending at any time, and because the processing time of an operation varies, there is no guarantee as to which order operations will reach the content nodes. As this can be important when it comes to operations that apply to the same document ID, there is logic in place to not send an operation for a document ID to which there is already a pending operation. + +*vespa-feeder* prints a report at the end of the feed. To print this report once a minute, use *\--verbose*: + +```bash +Messages sent to vespa (route default) : +---------------------------------------- +PutDocument: ok: 999997 msgs/sec: 411.38 failed: 0 ignored: 0 latency(min, max, avg): 2, 4360, 99 +``` + +*ignored* reports the number of documents that could not be routed to any [content clusters](/en/reference/applications/services/content) because they did not match any of the [configured document types](/en/reference/applications/services/content#documents) or selections - examples are: + +- A document type is removed from the application and the feed file contains documents of this type +- One or more selection expressions restrict the documents the cluster accepts, and the feed file contains documents that are excluded. An example is feeding [expired documents](/en/schemas/documents#document-expiry) - a selection for documents that are less than 30 days old and the feed file contains documents that are 30+ days old + +Synopsis: `vespa-feeder [--abortondataerror true|false] [--abortonsenderror true|false] [--file arg] [--maxpending arg] [--maxpendingsize arg] [--maxfeedrate arg] [mode standard|benchmark] [--noretry] [--retrydelay arg] [--route arg] [--timeout arg] [--trace arg] [--validate] [--dumpDocuments filename] [--numthreads arg] [create-if-non-existent] [-v,--verbose] filename` + +Example: + +```bash +$ vespa-feeder file.json +``` + +| Option | Description | +| --- | --- | +| **\--abortondataerror arg** | Abort if the input has errors (true\|false) - default true. Set to *false* in case the input has errors (e.g., invalid characters). *vespa-feeder* notifies on parsing errors at the end of the feed, but it will not abort | +| **\--abortonsenderror arg** | Abort if an error occurred while sending operations to Vespa (true\|false) - default true | +| **\--file arg** | Input files to read. These can also be passed as arguments without the option prefix. If none is given, this tool parses identifiers from stdin | +| **\--maxpending arg** | Maximum number of pending operations. This disables dynamic throttling, use with care | +| **\--maxpendingsize arg** | Maximum size (in bytes) of pending operations | +| **\--maxfeedrate arg** | Limits the feed rate to the given number (operations/second) | +| **\--mode** | The mode to run vespa-feeder in (standard\|benchmark) - default standard | +| **\--noretry** | Disables retries of recoverable failures | +| **\--retrydelay arg** | The time (in seconds) to wait between retries of a failed operation. Default 1 | +| **\--route arg** | The [route](/en/writing/document-routing) to send the data to. Default the *default* route | +| **\--timeout arg** | Time (in seconds) allowed for sending operations. Default 180 | +| **\--trace arg** | Trace level of network traffic. Default 0 | +| **\--validate** | Run validation tool on input files - do not feed | +| **\--dumpDocuments `<filename>`** | File where documents in the put are serialized | +| **\--numthreads arg** | How many threads to use for sending. Default 1 | +| **\--create-if-non-existent** | Enable setting of create-if-non-existent to true on all document updates in the given feed | +| **\-v, --verbose** | Enable verbose output of progress | + +## vespa-get + +*vespa-get* retrieves documents from a Vespa content cluster, and prints to *stdout*. *vespa-get* retrieves documents identified by the document IDs passed as command line arguments. If no document IDs are passed through the command line interface, ids will be read from *stdin* - separated by line breaks. + +Synopsis: `vespa-get <options> [documentid...]` + +| Option | Description | +| --- | --- | +| **\-a,--trace *tracelevel*** | Trace level to use (default 0) | +| **\-c,--configid *configid*** | Use the specified config id for messagebus configuration | +| **\-f,--fieldset *fieldset*** | Retrieve the specified fields only (see [Document field sets](/en/schemas/documents#fieldsets)). Default: `[document]` | +| **\-h,--help** | Show this syntax page | +| **\-i,--printids** | Show only identifiers of retrieved documents | +| **\-j,--jsonoutput** | JSON output (default) | +| **\-l,--loadtype *loadtype*** | Load type (default "") | +| **\-n,--noretry** | Do not retry operation on transient errors, as is default | +| **\-r,--route *route*** | Send request to the given messagebus route | +| **\-s,--showdocsize** | Show binary size of document | +| **\--shorttensor**s | Output using [tensor short form](/en/reference/schemas/document-json-format#tensor) | +| **\-t,--timeout *timeout*** | Set timeout for the request in seconds (default 0) | +| **\-u,--cluster *cluster*** | Send request to the given content cluster | + +## vespa-get-cluster-state + +Get cluster state - refer to [content nodes](/en/content/content-nodes). + +Synopsis: `vespa-get-cluster-state [options]` + +| Option | Description | +| --- | --- | +| **\-h, --help** | Show help | +| **\-v** | More verbose output | +| **\-s** | Less verbose output | +| **\--show-hidden** | Also show hidden undocumented debug options | +| **\-c, --cluster** | The cluster name of the cluster to query. If unspecified, and vespa is installed on the current node, information will be attempted auto-extracted | +| **\-f, --force** | Force execution | +| **\--config-server** | Host name of the config server to query | +| **\--config-server-port** | Port to connect to the config server on | +| **\--config-request-timeout** | Timeout of config request | + +## vespa-get-config + +*vespa-get-config* is a command-line tool to get configuration from a [config server](/en/operations/self-managed/configuration-server) or [config proxy](/en/operations/self-managed/config-proxy). By default, it connects to the config proxy on localhost, fetches config from its cache and prints the config payload on stdout. Configuration is addressed using name and [configId](/en/applications/configapi-dev#config-id). If configId is omitted, the global and default data for that name is returned. The default port number is 19090, the config proxy's port - use 19070 to access a config server. Also check [ports](/en/operations/self-managed/files-processes-and-ports). + +Synopsis: `vespa-get-config -n defName -i configId <option> [args]` + +Example: + +```bash +$ vespa-get-config -n container.statistics -i search/cluster.search +``` + +Find a more comprehensive example in [inspecting config](/en/operations/self-managed/config-proxy#inspecting-config). + +| Option | Description | +| --- | --- | +| **\-n** | config definition name, including namespace (on the form `<namespace>.<name>`) | +| **\-i** | config id, optional | +| **\-a** | config def schema file, optional (if you want to use another schema than the one known for the config server) | +| **\-m** | defMd5, optional | +| **\-c** | configMd5, optional | +| **\-t** | server timeout, in seconds, default value 3, optional | +| **\-w** | timeout, default value 10, optional | +| **\-s** | server hostname, default localhost, optional | +| **\-p** | port, default 19090, optional | +| **\-d** | debug mode, optional | +| **\-h** | help text and usage | + +## vespa-get-node-state + +Get the state of one or more storage services from the fleet controller - refer to [content nodes](/en/content/content-nodes): + +| State | Description | +| --- | --- | +| **Unit state** | The state of the node seen from the cluster controller. | +| **User state** | The state the administrator wants the node to be in, default "up". Can be set by using [vespa-set-node-state](#vespa-set-node-state) or by the cluster controller | +| **Generated state** | The state of a given node in the current cluster state. This is the state all the other nodes know about. This state is a product of the other two states and cluster controller logic to keep the cluster stable. | + +Synopsis: `vespa-get-node-state [options]` + +| Option | Description | +| --- | --- | +| **\-h, --help** | Show help | +| **\-v** | More verbose output | +| **\-s** | Less verbose output | +| **\--show-hidden** | Also show hidden undocumented debug options | +| **\-c, --cluster** | The cluster name of the cluster to query. If unspecified, and vespa is installed on the current node, information will be attempted auto-extracted | +| **\-f, --force** | Force execution | +| **\-t, --type** | Node type - can either be 'storage' or 'distributor'. If not specified, the operation will use state for both types | +| **\-i, --index** | Node index. If not specified, all nodes found running on this host will be used | +| **\--config-server** | Host name of the config server to query | +| **\--config-server-port** | Port to connect to the config server on | +| **\--config-request-timeout** | Timeout of config request | + +## vespa-index-inspect + +Use *vespa-index-inspect* to inspect indexed data on a content node. To troubleshoot [query rewriting](/en/linguistics/query-rewriting) and [linguistic transformations](/en/linguistics/linguistics) use this [guide](/en/querying/text-matching#index-and-attribute) instead. + +It shows posting list information (per token or all/range), or dumps the indexed tokens: + +```js +vespa-index-inspect showpostings [--indexdir indexDir] --field field word + +vespa-index-inspect showpostings [--indexdir indexDir] [--field field] --transpose \ + [--docidlimit docIdLimit] [--mindocid mindocid] + +vespa-index-inspect dumpwords [--indexdir indexDir] --field field \ + [--minnumdocs minnumdocs] [--verbose] [--wordnum] +``` + +Synopsis:`vespa-index-inspect showpostings|dumpwords [--indexdir path] [--field fieldname] [--transpose] [--minnumdocs count] [--docidlimit docIdLimit] [--mindocid mindocid] [--verbose] [--wordnum] [word]` + +Example (make sure to flush the index before using): + +`$` [`vespa-proton-cmd`](#vespa-proton-cmd) `--local triggerFlush && \ vespa-index-inspect dumpwords \ --indexdir /opt/vespa/var/db/vespa/search/cluster.music/n0/documents/music/0.ready/index/index.flush.1 \ --field artist bad 2 so 1` + + +| Option | Description | +| --- | --- | +| **\--indexdir *path*** | Index location | +| **\--field *fieldname*** | Field to analyze | +| **\--transpose** | Dump all tokens | +| **\--minnumdocs *count*** | Minimum number of documents to analyze | +| **\--docidlimit *docid*** | Dump up to this doc id | +| **\--mindocid *docid*** | Start from this docid | +| **\--wordnum** | Also dump token numbers | +| **\--verbose** | Verbose output | + +## vespa-attribute-inspect + +Use *vespa-attribute-inspect* to inspect the content of an attribute field on a content node. To troubleshoot [query rewriting](/en/linguistics/query-rewriting) and [linguistic transformations](/en/linguistics/linguistics) use this [guide](/en/querying/text-matching#index-and-attribute) instead. + +Synopsis: `vespa-attribute-inspect [-p attribute] [-a] [-s attribute] <attribute>` + +Example (make sure to flush the attribute before using): + + +`$` [`vespa-proton-cmd`](#vespa-proton-cmd) `--local triggerFlush && \ vespa-attribute-inspect -p /opt/vespa/var/db/vespa/search/cluster.music/n0/documents/music/0.ready/attribute/year/snapshot-10/year && \ cat /opt/vespa/var/db/vespa/search/cluster.music/n0/documents/music/0.ready/attribute/year/snapshot-10/year.out` + +| Option | Description | +| --- | --- | +| **\-p**| print content to `<attribute>.out` | +| **\-s** | save attribute to `<attribute>.save.dat` | + +## vespa-jvm-dumper + +Dump JVM heap, thread stacks, and other debugging information from a Java-based Vespa service. + +Invoke binary without arguments to print help and list the services running on this node. + +```bash +$ vespa-jvm-dumper +``` + +Produce JVM debugging information by invoking the binary with the config ID of the target service and the output directory. + +```bash +$ vespa-jvm-dumper default/container.1 /opt/vespa/tmp/jvm-dump +``` + +## vespa-logctl + +Print or modify log levels for a VESPA service, stored in *$VESPA\_HOME/var/db/vespa/logcontrol/service.logcontrol*. Refer to [controlling log levels](/en/reference/operations/log-files#controlling-log-levels) for details. *component-specification* specifies which subcomponents of the service should be controlled. If empty, all components are controlled: + +- `x.` : Matches only component x +- `x` : Matches component x and all its subcomponents + +Synopsis (show log levels): `vespa-logctl [OPTION] <service>[:component-specification]` + +Synopsis (set log levels): `vespa-logctl [OPTION] <service>[:component-specification] <level-mods>` + +*level-mods* are defined as : `<level>=<on|off>[,<level>=<on|off>]...` + +*level* is one of: `all`, `fatal`, `error`, `warning`, `info`, `event`, `config`, `debug`, `spam` + +Example: For service `container`, set `com.yahoo.search.searchchain` and all subcomponents of `com.yahoo.search.searchchain` to enable all except spam and debug: + +```bash +$ vespa-logctl container:com.yahoo.search.searchchain all=on,spam=off,debug=off +``` + +| Option | Description | +| --- | --- | +| **\-c** | Create the control file if it does not exist (implies -n) | +| **\-a** | Update all .logcontrol files | +| **\-r** | Reset to default levels | +| **\-n** | Create the component entry if it does not exist | +| **\-f *file*** | Use `<file>` as the log control file | +| **\-d *dir*** | Look in `<dir>` for log control files | + +## vespa-logfmt + +`vespa-logfmt` reads Vespa log files, selects messages, and writes a formatted version of these messages to standard output. If no file argument is given, vespa-logfmt will read the last Vespa log file `$VESPA_HOME/logs/vespa/vespa.log` (this also works with the `-f` option). Otherwise, reads only the files given as arguments. To read standard input, supply a single dash ’-’ as a file argument. Refer to the [logs reference](/en/reference/operations/log-files). + +Synopsis: ` vespa-logfmt [-l levellist ] [-s fieldlist ] [-p pid ] [-S service ] [-H host ] [-c regex ] [-m regex ] [-f ] [-N ] [-t ] [-ts ] [file …]` + +Examples: + +Display only messages with log level "event", printing a human-readable time (without any fractional seconds), the service generating the event and the event message: + +```bash +$ vespa-logfmt -l event -s fmttime,service,message +... +[2017-09-05 06:16:16] config-sentinel stopped/1 name="sbin/vespa-config-sentinel -c hosts/vespa-container (pid 1558)" pid=1558 exitcode=1 +[2017-09-05 06:16:16] config-sentinel starting/1 name="sbin/vespa-config-sentinel -c hosts/vespa-container (pid 1564)" +[2017-09-05 06:16:16] config-sentinel started/1 name="config-sentinel" +[2017-09-05 06:17:00] configserver count/1 name=configserver.failedRequests value=0 +[2017-09-05 06:17:00] configserver count/1 name=procTime value=0 +[2017-09-05 06:17:00] configserver count/1 name=configserver.requests value=0 +``` + +Display messages with log levels that are *not* any of *info, debug,* or *event,* printing the time in seconds and microseconds, the log level, the component name, and the message text: + +```bash +$ vespa-logfmt -l all-info,-debug -s level -s time,usecs,component,message -t -l -event +... +1504592294.738000 WARNING : configproxy.com No config found for name=sentinel,namespace=cloud.config,configId=hosts/vespa-container within timeout, will retry +1504592296.388000 WARNING : configproxy.com Request callback failed: APPLICATION_NOT_LOADED. Connection spec: tcp/localhost:19070, error message: Failed request (No application exists) from Connection { Socket[addr=/127.0.0.1,port=37806,localport=19070] } +1504592307.949461 WARNING : config-sentinel Connection to tcp/localhost:19090 failed or timed out +1504592307.949587 WARNING : config-sentinel FRT Connection tcp/localhost:19090 suspended until 2017-09-05 06:19:07 GMT +``` + +| Option | Description | +| --- | --- | +| **\-l *levellist* (--level=*levellist*)** | Filter messages by log level. By default, only messages of level *fatal, error, warning*, and *info* will be included, while messages of level *config, event, debug*, and *spam* will be ignored. This option allows you to replace or modify the list of log levels to be included. *levellist* is a comma-separated list of level names.<br/><br/> • The name *all* may be used to add all known levels <br/>• You may use + or - in front of terms to add or remove from the current (or default) list of levels instead of replacing it <br/>• Adding term | +| **\-s *fieldlist*** | Select which fields of log messages to show. The output field order is fixed. When using this option, only the named fields will be printed. The default fields are as \[**\-s fmttime,msecs,level,service,component,message**\]. The fieldlist is a comma-separated list of field names. The name *all* may be used to add all possible fields. Prepending a minus sign will turn off the display of the named field. Starting the list with a plus sign will add and remove fields from the current (or default) list of fields instead of replacing it. Using this option several times works as if the given *fieldlist* arguments had been concatenated into one comma-separated list. Fields:<br/><br/> **time** <br/> Print the time in seconds since the epoch. Ignored if *fmttime* is shown <br/><br/> **fmttime** <br/> Print the time in human-readable \[YYYY-MM-DD HH:mm:ss\] format. Note that the time is printed in the local timezone. To get GMT output, use `env TZ=GMT vespa-logfmt` <br/><br/> **msecs** <br/> Add milliseconds after the seconds in *time* and *fmttime* output. Ignored if *usecs* is in effect <br/><br/> **usecs** <br/> Add microseconds after the seconds in *time* and *fmttime* output <br/><br/> **host** <br/> Print the hostname field <br/><br/> **level** <br/><br/> Print the level field (upper-cased) <br/><br/> **pid** <br/> Print the pid field <br/><br/> **service** <br/>Print the service field <br/><br/> **component** <br/> Print the component field<br/><br/> **message** <br/> Print the message text field. You probably always want to add this | +| **\-p *pid*** | Select messages where the pid field matches the *pid* string | +| **\-S *service*** | Select messages where the service field matches the *service* string | +| **\-H *host*** | Select messages where the hostname field matches the *host* string | +| **\-c *regex*** | Select messages where the component field matches the *regex*, using *perlre* regular expression matching | +| **\-m *regex*** | Select messages where the message text field matches the *regex*, using *perlre* regular expression matching | +| **\-f** | Invoke tail -F to follow the input file | +| **\-N** | De-quote quoted newlines in the message text field to an actual newline plus tab | +| **\-t** | Format the component field (if shown) as a fixed-width string, truncating if necessary | +| **\-ts** | Format the service field (if shown) as a fixed-width string, truncating if necessary | +| **\-i, --internal** | Only include log entries emitted by the Vespa platform, i.e., exclude log entries from custom components | + +## vespa-model-inspect + +*vespa-model-inspect* is a tool for inspecting the topology and services of a Vespa system. Hosts, services, clusters, ports, URLs, and config ids can be inspected. It can run on any machine in a Vespa cluster that is running a Vespa configuration server. + +Synopsis: `vespa-model-inspect [-c host | host:port] [-t tag] [-h] [-u] [-v] command` + +| Command | Description | +| --- | --- | +| **hosts** | Show hostnames of all hosts in the Vespa system | +| **services** | Show a list of all service types in the Vespa system | +| **clusters** | Show a list of all named clusters in the Vespa system | +| **configids** | Show a list of all config ids in the Vespa system | +| **filter:ports** | List ports matching filter options | +| **host *hostname*** | Show host details: What services are running, and what ports have they allocated | +| **service *servicetype*** | Show service details: What instances of the service are running, on what hosts, and what ports have they allocated | +| **cluster *clustername*** | Show all services in the cluster, with details on hostname and allocated ports | +| **configid *configid*** | Show all services using this configid | +| **get-index-of *servicetype* *host*** | Show all indexes for instances of the service type on the given host | + +| Option | Description | +| --- | --- | +| **\-c *host* \| *host:port*** | Specify host and port (or just host) to use for getting the config that this tool displays. Default is to use the configserver. You might want to use localhost:19090 if you are on a host with a running Vespa system without a config server | +| **\-h** | Show usage | +| **\-t *tag*** | to filter on a port tag | +| **\-u** | Show URLs for services | +| **\-v** | Verbose mode | + +Examples: + +```bash expandable +$ vespa-model-inspect hosts +mynode.mydomain.com + +$ vespa-model-inspect services +config-sentinel +configproxy +configserver +container +container-clustercontroller +distributor +docprocservice +filedistributorservice +logd +logserver +searchnode +slobrok +storagenode + +$ vespa-model-inspect -u service distributor +distributor @ myhost.mydomain.com : content +myapp/distributor/4 + tcp/myhost1.mydomain.com:19112 (MESSAGING) + tcp/myhost1.mydomain.com:19113 (STATUS RPC) + http://myhost1.mydomain.com:19114/ (STATE STATUS HTTP) +distributor @ myhost2.mydomain.com : content +myapp/distributor/5 + tcp/myhost2.mydomain.com:19112 (MESSAGING) + tcp/myhost2.mydomain.com:19113 (STATUS RPC) + http://myhost2.mydomain.com:19114/ (STATE STATUS HTTP) +distributor @ myhost3.mydomain.com : content +``` + +## vespa-print-default + +Internal script used by other scripts to find hostname, config server addresses/ports, version, and more. Not intended for end-user usage. + +## vespa-proton-cmd + +Use *vespa-proton-cmd* to send commands to [proton](/en/content/proton). + +Synopsis: `vespa-proton-cmd HOSTSPEC COMMAND [ARGS]` + +The *hostspec* argument is one of `port|spec|--local|--id=name`. Use [vespa-model-inspect](#vespa-model-inspect) to locate the search node ADMIN RPC port: + +```bash +$ vespa-model-inspect service searchnode +searchnode @ /mynode.myhost.com : search +music/search/cluster.music/0 + tcp/mynode.myhost.com:19108 (STATUS ADMIN RTC RPC) + tcp/mynode.myhost.com:19109 (FS4) + tcp/mynode.myhost.com:19110 (TEST HACK SRMP) + tcp/mynode.myhost.com:19111 (ENGINES-PROVIDER RPC) + tcp/mynode.myhost.com:19112 (STATE HEALTH JSON HTTP) +``` + +Example: + +```bash +$ vespa-proton-cmd 19108 triggerFlush + OK: flush trigger enabled +``` + +Unless the **\-h** or **\--help** option is used, one of these commands must be present: + +| Command | Description | +| --- | --- | +| **getProtonStatus** | Get the current proton state and its components. | +| **getState** | Get the current proton state. | +| **triggerFlush** | Trigger [flush](/en/content/proton#proton-maintenance-jobs) as soon as possible for all document types. | +| **prepareRestart** | Estimates the cost of [transaction log](/en/content/proton#transaction-log) replay, and flushes data structures if that will speed up a subsequent start. If this is not called before stopping proton, there is no estimation and no flush. | + +## vespa-remove-index + +*vespa-remove-index* is a command-line tool to remove index data on a Vespa search node, by wiping out selected files and subdirectories found in *`$VESPA_HOME/var/db/vespa/`*. This process is irreversible, and the indexes deleted can not be recovered. + +Stop *services* before running it - example: + +```bash +$ vespa-stop-services && vespa-remove-index -force && vespa-start-services +``` + +Synopsis:`vespa-remove-index [-force] [-cluster name]` + +Example: + +```bash +$ vespa-remove-index +[info] For cluster music distribution key 0 you have: +[info] 156 kilobytes of data in var/db/vespa/search/cluster.music/n0 +Really to remove this vespa index? Type "yes" if you are sure ==> yes +[info] removing data: rm -rf var/db/vespa/search/cluster.music/n0 +[info] removed. +``` + +| Option | Description | +| --- | --- | +| **\-force** | Do not require verification from the user before really removing index data | +| **\-cluster *name*** | Only remove data for the given cluster name | + +## vespa-route + +*vespa-route* is a tool to inspect Vespa routing configurations. If file is set, it will be parsed as a feed and the output will look similar to when using [/document/v1/](/en/reference/api/document-v1) with trace enabled. + +Synopsis: `vespa-route [options] [file]` + +Example: + +```bash +$ vespa-route +There are 5 route(s): + 1. default + 2. music + 3. music-direct + 4. music-index + 5. storage/cluster.music + +There are 2 hop(s): + 1. docproc/cluster.music.indexing/chain.indexing + 2. indexing +``` + +| Option | Description | +| --- | --- | +| **\--documentmanagerconfigid `<id>`** | Sets the config id that supplies document configuration | +| **\--dump** | Prints the complete content of the routing table | +| **\--help** | Prints this help | +| **\--hop `<name>`** | Prints detailed information about hop `<name>` | +| **\--hops** | Prints a list of all available hops | +| **\--identity `<id>`** | Sets the identity of message bus | +| **\--listenport `<num>`** | Sets the port message bus will listen to | +| **\--oosserverpattern `<id>`** | Sets the out-of-service server pattern for message bus | +| **\--protocol `<name>`** | Sets the name of the protocol whose routing to inspect | +| **\--route `<name>`** | Prints detailed information about route `<name>` | +| **\--routes** | Prints a list of all available routes | +| **\--routingconfigid `<id>`** | Sets the config id that supplies the routing tables | +| **\--services** | Prints a list of all available services | +| **\--slobrokconfigid `<id>`** | Sets the config id that supplies the slobrok server list | +| **\--trace `<num>`** | Sets the trace level to use when visualizing the route | +| **\--verify** | All hops and routes are verified when routing | + +## vespa-sentinel-cmd + +Use *vespa-sentinel-cmd* to list, start and stop services - refer to [config sentinel](/en/operations/self-managed/config-sentinel) for examples. It can also check for connectivity between nodes. + +<Warning> +**Important:** + +See [start / stop / restart](/en/operations/self-managed/admin-procedures#vespa-start-stop-restart) for how to stop all services on a node, optimizing for restart time - use this for tasks like software upgrade. +</Warning> + +Synopsis: `vespa-sentinel-cmd [-h] list|start <service>|restart <service>|stop <service>|connectivity` + +| Option | Description | +| --- | --- | +| **\-h** | Help text | + +| Command | Description | +| --- | --- | +| **list** | Lists the services running on this node and their status: <br/><br/> **service name** <br/><br/>**state**<br/> • RUNNING: Service is running<br/> • FINISHED: Service has been stopped<br/> • FAILED: Service has crashed and failed to restart<br/> • TERMINATING: Service is stopping <br/><br/> **mode** <br/>• MANUAL: Service has to be started and stopped manually <br/>• AUTO: Service will restart automatically if it stops <br/><br/> **pid**<br/> Pid of the process (main thread) <br/><br/> **exitstatus** <br/> Exit code the last time the service stopped. <br/><br/> **id** <br/> [Config ID](/en/applications/configapi-dev#config-id) of the service | +| **restart \[name\]** | Restarts the service with the given name. The name is the first string in the service list given by `list` | +| **stop \[name\]** | Stops the service with the given name | +| **start \[name\]** | Starts the service with the given name | +| **connectivity** | Use to troubleshoot startup issues/network configuration/ACLs/iptables:<br/><br/>`$ vespa-sentinel-cmd connectivity`<br/> `vespa-sentinel-cmd 'connectivity' OK.`<br/> `node0.vespanet -> ok`<br/> `node1.vespanet -> ok`<br/> `node2.vespanet -> ok` | + +## vespa-set-node-state + +Set the [user state](/en/reference/api/cluster-v2#state-user) of a node. This will set the generated state to the user state if the user state is "better" than the generated state that would have been created if the user state were up. For instance, a node that is in `up` state can be forced into `down` state, while a node that is currently `down` can not be forced into `retired` state, but can be forced into maintenance state. + +Synopsis: `vespa-set-node-state [options] up|down|maintenance|retired [description]` + +Example: + +```bash +$ vespa-set-node-state -i 0 maintenance "Set to maintenance for software upgrade" +``` + +| Option | Description | +| --- | --- | +| **\-h, --help** | Show help | +| **\-v** | More verbose output | +| **\-s** | Less verbose output | +| **\--show-hidden** | Also show hidden undocumented debug options | +| **\-n, --no-wait** | Do not wait for node state changes to be visible in the cluster before returning | +| **\-c, --cluster** | The cluster name of the cluster to query. If unspecified, and vespa is installed on the current node, information will be attempted auto-extracted | +| **\-f, --force** | Force execution | +| **\-t, --type** | Node type - can either be 'storage' or 'distributor'. If not specified, the operation will use state for both types | +| **\-i, --index** | Node index. If not specified, all nodes found running on this host will be used | +| **\--config-server** | Host name of the config server to query | +| **\--config-server-port** | Port to connect to the config server on | +| **\--config-request-timeout** | Timeout of config request | + +## vespa-significance + +Generates a [significance model file](/en/ranking/significance#significance-model-file) for global significance. + +Synopsis: `vespa-significance <command> [options]` + +Commands: + +- `generate` – build a significance model from Vespa documents (JSONL) or Vespa Significance TSV (VSTSV) files. +- `export` – dump term statistics from an index to a VSTSV file. +- `merge` – merge multiple VSTSV files, optionally filtering low-frequency terms. + +### vespa-significance generate + +The command uses the same tokenizer as the default query processor, see [linguistics in Vespa](/en/linguistics/linguistics) for details. Custom tokenizers require a matching extractor. Tokens are lower-cased without stemming to align with query processing. + +Synopsis: `vespa-significance generate [options] --in <FILE>` + +Example: + +```bash +$ vespa-significance generate --in vespa-dump.jsonl --out en_model.json --field text --language en +``` + +This example generates a significance model called `en_mode.json` from a collection of Vespa Documents (the jsonl file). This is available in Vespa as of version 8.426.8. + +When running in Docker, mount the data directory to persist input and output files, e.g.: + +```bash +$ podman run -it --entrypoint bash -v $PWD/data:/data -w /data vespaengine/vespa:latest /opt/vespa/bin/vespa-significance generate --in docs.jsonl --out model.zst --field text --language en +``` + +To dump documents for JSONL input, use [vespa-visit](#vespa-visit) with the `--field-set` option: + +```bash +$ vespa visit --field-set mydocument:text_field > ./data/docs.jsonl +``` + +Inspect the resulting model file with `zstdcat` and `jq`: + +```bash +$ zstdcat ./data/model.zst | jq +``` + +Example generating model from VSTSV: + +```bash +$ vespa-significance generate --format vstsv --in dumped_term_df.vstsv --out un_model.json +``` + +In this example the input is generated with `vespa-significance export`. This is available in Vespa as of version 8.597.8. When language is not specified, the default is set to unknown when using VSTSV format. + +| Option | Description | +| --- | --- | +| **\-h, --help** | Show help for the subcommand. | +| **\-i, --in `<FILE>`** | Input file. Default format is JSON Lines where each line is a [Vespa document in JSON](/en/reference/schemas/document-json-format). Use `--format vstsv` to read VSTSV files. | +| **\--format `<FORMAT>`** | Input format. Format can be `jsonl` or `vstsv`. Defaults to `jsonl`. | +| **\-o, --out `<model.json[.zst]>`** | Output [significance model](/en/ranking/significance#significance-model-file). | +| **\-f, --field `<field>`** | JSONL: the name of the text field to analyse. VSTSV: No effect. | +| **\-l, --language `<tag[,tag...]>`** | Comma-separated ISO language tags. The first tag controls tokenization; additional tags are stored in the model. Required for JSONL, optional for VSTSV (defaults to `un`). See supported tags in [linguistics in Vespa](/en/linguistics/linguistics-opennlp#default-languages). | +| **\-zst, --zst-compression `<ENABLED>`** | Enable Zstandard compression of the output. Enabled can be `true` or `false`. Default `false`. When `true`, the output file must end with `.zst`. | + +### vespa-significance export + +Synopsis: `vespa-significance export [options] --field <FIELD>` + +Example: + +```bash +$ vespa-significance export --schema product --field title --out text.vstsv +``` + +Locates an index on the content node and exports term document frequencies from that index using `vespa-index-inspect`. The output VSTSV file can be passed to `generate --format vstsv` or merged with other exports. This is available in Vespa as of version 8.597.8. + +This command must be executed on a content node. + +| Option | Description | +| --- | --- | +| **\-h, --help** | Show help for the subcommand. | +| **\--index-dir `<path/to/index>`** | Explicit path to the index directory. If omitted, the tool locates the directory using cluster, schema, and node information. | +| **\--out `<FILE.vstsv[.zst]>`** | Output VSTSV file. Defaults to `export_*FIELD*.vstsv`. Compression adds `.zst` automatically. | +| **\--field `<FIELD>`** | Text field to export. Must correspond to a field directory within the selected index. This is the same as a field in the schema. | +| **\--cluster `<NAME>`** | Specifies the content cluster name for locating the index directory. If multiple clusters exist per node, use --cluster to choose one. | +| **\--schema `<NAME>`** | Specifies the schema (document type) name used when locating the index directory. | +| **\--node-index `<NUMBER>`** | Specifies the content node index for locating the index directory. If there are multiple indexes on a node, use --node-index to choose one. | +| **\-zst, --zst-compression** | Write the VSTSV output compressed with Zstandard. | + +### vespa-significance merge + +Synopsis: `vespa-significance merge [options] <input.vstsv[.zst]> [<input2.vstsv[.zst]> ...]` + +Example: + +```bash +$ vespa-significance merge --out merged_title.vstsv export_title_node1.vstsv export_title_node2.vstsv +``` + +Merges multiple VSTSV files and preserves the total document count in the header. Use this to combine exports before generating a model. This is available in Vespa as of version 8.597.8. + +| Option | Description | +| --- | --- | +| **\-h, --help** | Show help for the subcommand. | +| **\--out `<FILE.vstsv[.zst]>`** | Output VSTSV file. Defaults to `merged.vstsv`. Compression adds `.zst`. | +| **\--min-keep `<NUMBER>`** | Filter out terms with document frequency strictly lower than `NUMBER`. | +| **\-zst, --zst-compression** | Write the merged VSTSV output compressed with Zstandard. | + +## vespa-start-configserver + +Start a config server on a node, [details](/en/operations/self-managed/admin-procedures#vespa-start-stop-restart). See [vespa-start-services](#vespa-start-services) for node setup steps, before startup. + +Synopsis: `vespa-start-configserver` + +## vespa-start-services + +Start all services on a node, [details](/en/operations/self-managed/admin-procedures#vespa-start-stop-restart). + +As part of starting Vespa, the startup script calls [rhel-prestart.sh](https://github.com/vespa-engine/vespa/blob/master/vespabase/src/rhel-prestart.sh) to set up directories and system limits - this requires privileges to do so - run this command with sudo, see [example](/en/operations/self-managed/multinode-systems#config-server-cluster-setup). Refer to [vespa-start-services.sh](https://github.com/vespa-engine/vespa/blob/master/vespabase/src/vespa-start-services.sh) and [environment variables](/en/operations/self-managed/files-processes-and-ports#environment-variables). + +Synopsis: `vespa-start-services` + +When debugging a failed start, use [vespa-logfmt](#vespa-logfmt) to inspect the log. It is also useful to read up on the [start sequence](/en/operations/self-managed/configuration-server#start-sequence) and make sure the config server is running - Vespa will not start with a running config server. + +Vespa has a *Safe Cluster Startup* mode to only start vespa services after X% of nodes are running - see [cluster startup](/en/operations/self-managed/config-sentinel#cluster-startup). + +## vespa-stat + +*vespa-stat* is a tool to fetch statistics about a specific user, group, bucket, gid or document. + +vespa-stat works in two stages. The first stage is to figure out the actual buckets we want to look at. In the second stage, it can dump the located buckets. For each command line option, only the relevant documents will be dumped (the document for `--document/--gid`, or the user/group's documents for `--user/--group`). This stage can be turned on by adding `--dump`, but is default on for the case of `--document/--gid`. + +Synopsis: `vespa-stat [options]` + +Example: + +```bash +$ vespa-stat --document id:my_namespace:my_search::12345678-4fb7-3797-ae9a-d4d7a4e6e085 +Bucket maps to the following actual files: + BucketInfo(BucketId(0x4000000000004800): [distributor:17] [node(idx=17,crc=0xe5ce35c7,docs=57/57,bytes=478040/478040,trusted=true,active=true,ready=true), node(idx=15,crc=0xe5ce35c7,docs=57/57,bytes=478040/478040,trusted=true,active=true,ready=true)]) + +Details for BucketId(0x4000000000004800): + Bucket information from node 15: +Persistence bucket BucketId(0x4000000000004800), partition 0 + Timestamp: 1452598747000000, Doc(id:my_namespace:my_search::12345678-4fb7-3797-ae9a-d4d7a4e6e085), gid(0x0048e840a48002b12abbb0a0), size: 101 + + Bucket information from node 17: +Persistence bucket BucketId(0x4000000000004800), partition 0 + Timestamp: 1452598747000000, Doc(id:my_namespace:my_search::12345678-4fb7-3797-ae9a-d4d7a4e6e085), gid(0x0048e840a48002b12abbb0a0), size: 101 +``` + +| Option | Description | +| --- | --- | +| **\-b, --bucket `<bucketid>`** | Dump list of buckets that are contained in the given bucket, or that contain it | +| **\-d, --dump** | Dump list of documents for all buckets matching the selection command. | +| **\-g, --group `<groupid>`** | Dump list of buckets that can contain the given group | +| **\-h, --help | Help text | +| **\-l, --gid `<globalid>`** | Dump information about one specific document, as given by the GID (implies --dump) | +| **\-o, --document `<docid>`** | Dump information about one specific document (implies --dump) | +| **\-r, --route `<route>`** | Route to send the messages to, usually the name of the storage cluster | +| **\-s, --bucketspace `<space>`** | [Bucket space](/en/content/buckets#bucket-space) (*default* or *global*). If not specified, *default* is used | +| **\-u, --user `<userid>`** | Dump list of buckets that can contain the given user | + +## vespa-status-filedistribution + +Use *vespa-status-filedistribution* to get status from file distribution. Should be run on a config server, it connects to config server on localhost to get status. + +Synopsis: `vespa-status-filedistribution [--application <applicationNameArg>] [--debug] [--environment <environmentArg>] [(-h | --help)] [--instance <instanceNameArg>] [--region <regionArg>] [--tenant <tenantNameArg>] [--timeout <timeoutArg>]` + +| Option | Description | +| --- | --- | +| **\--application `<applicationName>`** | Application name | +| **\--debug** | Print debug log | +| **\--environment `<environment>`** | Environment name | +| **\-h, --help** | Display help information | +| **\--instance `<instanceName>`** | Instance name | +| **\--region `<regionName>`** | Region name | +| **\--tenant `<tenantName>`** | Tenant name | +| **\--timeout `<timeout>`** | timeout (in seconds) | + +## vespa-stop-configserver + +Stop a config server on a node, [details](/en/operations/self-managed/admin-procedures#vespa-start-stop-restart). + +Synopsis: `vespa-stop-configserver` + +## vespa-stop-services + +Stop all services on a node, [details](/en/operations/self-managed/admin-procedures#vespa-start-stop-restart). Running *vespa-stop-services* on a content node will call [prepareRestart](#vespa-proton-cmd) to optimize restart time. + +Synopsis: `vespa-stop-services` + +## vespa-summary-benchmark + +Tool for testing and benchmarking RPC docsum interface. Refer to [VespaSummaryBenchmark.java](https://github.com/vespa-engine/vespa/blob/master/vespaclient-java/src/main/java/com/yahoo/vespasummarybenchmark/VespaSummaryBenchmark.java) + +## vespa-visit + +Used to run a [visit](/en/clients/vespa-cli) operation, with more options than [vespa visit](/en/clients/vespa-cli). It uses the [Vespa Message Bus](/en/writing/document-routing) and must be run inside a Vespa application - it does not use the Vespa HTTP APIs. + +<Note> +**Note:** + +This tool is easily confused with `vespa visit` in the [vespa CLI](/en/clients/vespa-cli). The latter is the common tool for visiting, `vespa-visit` is built for debugging and complex use cases. +</Note> + +By default, vespa-visit gets visited documents and emits them to stdout. However, the tool may specify a [vespa-visit-target](#vespa-visit-target) and be used as a tool to run reprocessing or migration. It supports keeping a progress file on disk, such that you can restart it if it should fail in the middle for some reason. + +To migrate a set of documents from one cluster to another, use *visiting* - as the data is transferred directly, using a compact serialization format, from the source nodes to the targets, this is performance optimal (data is not piped through the visit client). Implement backup this way, or dump to file. + +Search node recovery: Feed the documents directly to a search cluster. Example, selecting documents of type *music*: + +```bash +$ vespa-visit --selection music --datahandler indexing +``` + +This feeds from the source into the search cluster in the same application. Note that simultaneous feed can make updates go lost. + +Include [remove-entries](/en/operations/self-managed/admin-procedures#data-retention-vs-size) in the visit operation using *\--visitremove* - this dumps the tombstones of documents recently removed. + +The [content policy](/en/writing/document-routing#content) can be configured to use a set of configuration servers from another cluster to configure itself. This is specified with the *config* parameter. As an example, the following route routes to the content cluster *mycluster* with a configuration server on *myconfigserver.mydomain.com:12345*: + +```bash +[Content:config=tcp/myconfigserver.mydomain.com:12345;cluster=mycluster] +``` + +The following examples illustrate how to copy all data from a source cluster to another cluster using vespa-visit: + +```bash +# Copies all data in the local cluster, routing it to the remote mycluster +$ vespa-visit --datahandler '[Content:config=tcp/myconfigserver.mydomain.com:12345;cluster=mycluster]' + +# Limit to 'music' documents only +$ vespa-visit --datahandler '[Content:config=tcp/myconfigserver.mydomain.com:19070;cluster=mycluster]' \ + --selection music + +# Limit to all documents for user '1234' +$ vespa-visit --datahandler '[Content:config=tcp/myconfigserver.mydomain.com:12345;cluster=mycluster]' \ + --selection id.user=1234 +``` + +Visitor processor types: + +| Processor Type | Description | +| --- | --- | +| **Dump visitor** | The most commonly used visitor processor type is the dump visitor. All it does is to send the read documents on to some external target specified by the visitor. Using the command line tool *vespa-visit*, the default is to just send the documents back to the client, and have them printed to stdout. The dump visitor is used to implement reprocessing. Typically, using a messagebus route, which will send the documents through the document processing cluster and then back to the content cluster. Migration of documents from one cluster to another is also implemented using a dump visitor. | +| **Streaming search visitor** | The [streaming search](/en/performance/streaming-search) visitor runs in the Vespa container, making it transparent whether search results were created from streaming or indexed search - see [indexing mode](/en/reference/applications/services/content#document). | + +Requests sent from the visitor processor are sent to a visitor target - types: + +| Target Type | Description | +| --- | --- | +| **Message bus routes** | You can specify a [message bus route](/en/writing/document-routing) name directly, and this route will be used to send the results. This is typically used when doing reprocessing or migration. Message bus routes are set up in the application package. In addition, some routes may have been auto-generated in simple setups, for instance, a route called *default* is generated if your setup is simple enough for the config model to likely guess where you want to send your data. | +| **Slobrok address** | You can also specify a slobrok address for data to be sent to. A slobrok address is a slash-separated path where you can use an asterisk to mean any element within this path. For instance, if you have a docproc cluster called *mydpcluster*, it will have registered its nodes with slobrok names like *docproc/cluster.mydpcluster/docproc/0/feed\_processor*, where the 0 here indicates the first node in the cluster. You can thus specify to send visit data to this docproc cluster by stating a slobrok address of *docproc/cluster.mydpcluster/docproc/\*/feed\_processor*. Note that this will not send all the data to one or all the nodes. The data sent from the visitor will be distributed among the matching nodes, but each message will just be sent to one node. <br/><br/> Slobrok names can be used when using [vespa-visit-target](#vespa-visit-target) to retrieve the data from some location. If you start vespa-visit-target on two nodes, listening to slobrok names *mynode/0/visit-destination* and *mynode/1/visit-destination*, you can send the results to these nodes by specifying *mynode/\*/visit-destination* as the data handler.<br/><br/> [vespa-destination](#vespa-destination) is similar to vespa-visit-target in that it can receive messages from messagebus and print the contents to stdout. It can be useful in situations where you want to debug a route or a docproc, by using the vespadestination as the endpoint of your route. | +| **TCP socket** | TCP sockets can also be specified directly. This requires that the endpoint speaks FNET RPC. This is typically done, either by using the *vespa-visit-target* tool, or by using a visitor destination programmatically by using a utility class in the document API. A socket address looks like the following: tcp/*hostname*:*port*/*servicename*. For instance, an address generated by *vespa-visit-target* might look like: *tcp/myhost.mydomain.com:12345/visit-destination* | + +Also see [vespa-destination](#vespa-destination). + +Synopsis: `vespa-visit [options]` + +| Option | Description | +| --- | --- | +| **\--abortonclusterdown** | Abort if cluster is down | +| **\-b, --maxbuckets `<num>`** | Maximum buckets per visitor | +| **\--bucketspace `<space>`** | [Bucket space](/en/content/buckets#bucket-space) to visit (*default* or *global*). If not specified, *default* is used | +| **\-c, --cluster `<cluster>`** | Visit the given cluster | +| **\-d, --datahandler `<target>`** | Send results to the given target - see [vespa-visit-target](#vespa-visit-target) | +| **\-f, --from `<timestamp>`** | Only visit from the given timestamp (microseconds) | +| **\-h, --help** | Show help text | +| **\-i, --printids** | Display only document identifiers | +| **\--jsonoutput** | Output a JSON array of document objects. This is the default output format. | +| **\--jsonl** | Output documents as JSONL (JSON Lines format). Each individual document is output as a single line, with a newline separating each document. Lines are not comma-separated, and there is no top-level array wrapping the document objects. | +| **\-l, --fieldset `<fieldset>`** | Retrieve the specified fields only (see [Document field sets](/en/schemas/documents#fieldsets)). Default: `[document]` | +| **\--libraryparam `<key>` `<val>`** | Send parameter to the visitor library | +| **\-m, --maxpending `<num>`** | Maximum pending messages to data handlers per storage visitor | +| **\--maxpendingsuperbuckets `<num>`** | Maximum pending visitor messages from the vespa-visit client. If set, dynamic throttling of visitors is disabled | +| **\--maxtotalhits `<num>`** | Abort visiting when received this many total documents. This is only an approximate number, all pending work will be completed, and those documents will also be returned | +| **\-o, --timeout `<milliseconds>`** | Time out visitor after given time | +| **\-p, --progress `<file>`** | Use the given file to track progress. `-p progress-file` saves progress, allowing the visitor to resume at next startup. Always remove the progress file to run the visiting operation from the start. | +| **\--processtime `<num>`** | Sleep for this number of milliseconds before processing the message. (Debug option for pretending to be a slow client) | +| **\-r, --visitremoves** | Return tombstone entries of documents that have been removed. Tombstones will be output as `remove` objects, which only contain a document ID. When using `--visitremoves`, regular (non-tombstone) documents will also be returned. | +| **\-s, --selection `<selection>`** | [Selection](/en/reference/writing/document-selector-language) string for which documents to visit. E.g., `-s 'id.hash().abs() % 100 == 0'` dumps 1% of the corpus - see [selection](/en/clients/vespa-cli#selection). Note that this expression is evaluated for *every* document in the cluster, so running 100 visits comparing against all values in \[0, 99) end up reading all documents 100 times. Prefer using `--slices` and `--sliceid` instead if available. | +| **\--shorttensors** | Output using [tensor short form](/en/reference/schemas/document-json-format#tensor) | +| **\--skipbucketsonfatalerrors** | Skip visiting super buckets with fatal error codes | +| **\--sliceid `<arg>`** | The slice number of the visit represented by this visitor. This number must be non-negative and less than the number of slices specified for the visit. | +| **\--slices `<arg>`** | Split the document corpus into this number of independent slices. This lets multiple, concurrent series of visitors advance the same logical visit independently, by specifying a different `sliceid` for each.<br/><br/> E.g. `--slices 100 --sliceid 0` dumps 1% of the corpus by efficiently iterating over only 1/100th of the data space. For a given number of `--slices`, it's possible to visit the entire corpus (possibly in parallel) with non-overlapping output by visiting with all `--sliceid` values from (and including) 0 up to (and excluding) `--slices`. | +| **\-t, --to `<timestamp>`** | Only visit up to the given timestamp (microseconds) | +| **\--tracelevel `<level>`** | Tracelevel (\[0-9\]), for debugging | +| **\-u, --buckettimeout `<milliseconds>`** | Fail visitor if visiting a single bucket takes longer than this (default same as timeout) | +| **\-v, --verbose** | Show progress and info on STDERR | +| **\--visitinconsistentbuckets** | Don't wait for inconsistent buckets to become consistent. See [read-consistency](/en/content/consistency#read-consistency) for details. | +| **\--visitlibrary `<string>`** | Use the given visitor library | + +## vespa-visit-target + +[vespa-visit-target](#vespa-visit-target) is a tool to set up an endpoint for [visiting](/en/clients/vespa-cli) data. It binds to a socket or a slobrok address, which is specified as a target in the visit client. Also see [vespa-destination](#vespa-destination). + +Synopsis: `vespa-visit-target [options]` + +| Option | Description | +| --- | --- | +| **\-c, --visithandler `<classname>`** | Use the given class as a visit handler (defaults to StdOutVisitorHandler) | +| **\-h, --help** | Show help page | +| **\-i, --printids** | Display document IDs only | +| **\-o, --visitoptions `<args>`** | Option arguments to pass through to the visitor handler instance | +| **\-p, --processtime `<msecs>`** | Sleep msecs milliseconds before processing the message. (Debug option for pretending to be a slow client) | +| **\-s, --bindtoslobrok `<address>`** | Bind to slobrok address. One, and only one, of the binding options must be set | +| **\-t, --bindtosocket `<port>`** | Bind to TCP port. One, and only one, of the binding options must be set | +| **\-v, --verbose** | Indent output, show progress and info on STDERR | \ No newline at end of file diff --git a/mintlify-docs/en/reference/operations/tools.mdx b/mintlify-docs/en/reference/operations/tools.mdx new file mode 100644 index 0000000000..040b3e00b9 --- /dev/null +++ b/mintlify-docs/en/reference/operations/tools.mdx @@ -0,0 +1,126 @@ +--- +title: "Tools" +description: "This is the command-line tools reference for various Vespa use cases." +--- + +## vespa-analyze-onnx-model + +Loads an ONNX-model to analyze memory usage and infer/probe output types based on input types. + +Synopsis: `vespa-analyze-onnx-model <onnx-model> [options...]` + +Example (refer to [ONNX](/en/ranking/onnx) for more examples): + +```bash +$ vespa-analyze-onnx-model Network.onnx +``` + +| Option | Description | +| --- | --- | +| **\--probe-types** | Use onnx model to infer/probe output types based on input types | + +## vespa-fbench + +*vespa-fbench* is used for benchmarking the capacity of a Vespa system. Refer to [vespa-benchmarking](/en/performance/benchmarking) for usage and examples. + +Multiple hostnames and ports can be used, to distribute load round-robin to clients. + +Synopsis: `vespa-fbench [options] <hostname> <port>` + +Example: + +```bash +$ vespa-fbench -n 10 -q query%03d.txt -s 300 -c 0 -o output%03d.txt -xy test.domain.com 8080 +``` + +| Option | Description | +| --- | --- | +| **\-H *header*** | append extra header to each get request. | +| **\-A *assign authority*** | hostname:port. Overrides Host: header sent. | +| **\-a *str*** | append string to each query | +| **\-n *numClients*** | Run vespa-fbench with *numClients* clients in parallel. If not specified, vespa-fbench will use a default value of *10* clients. | +| **\-c *cycleTime*** | each client will make a request each `<num>` milliseconds \[1000\] ('-1' -> cycle time should be twice the response time) | +| **\-l *limit*** | minimum response size for successful requests \[0\] | +| **\-i *ignoreCount*** | do not log the `<num>` first results. -1 means no logging \[0\] | +| **\-s *seconds*** | run the test for `<num>` seconds. -1 means forever \[60\] | +| **\-q *queryFilePattern*** | pattern defining input query files, e.g. *query%03d.txt* (the pattern is used with sprintf to generate filenames). Unless using POST, a query file has one query per line, each line starting with `/search/`:<br/><br/>`/search/?yql=select%20%2A%20from%20sources%20%2A%20where%20true` | +| **\-P** | use POST for requests instead of GET. Two lines per query, format:<br/><br/>`/search/`<br/>`{"yql" : "select * from sources * where true"}` <br/><br/>Any line starting with "/" will be taken as a URL path, with the following lines taken as the content (these lines can NOT start with "/"). The default content type is *"Content-Type: application/json"*; see *\-H*. | +| **\-o *outputFilePattern*** | save query results to output files with the given pattern (default is not saving.) | +| **\-r *restartLimit*** | number of times to re-use each query file. -1 means no limit \[-1\] | +| **\-m *maxLineSize*** | max line size in input query files \[8192\]. Can not be less than the minimum \[1024\]. | +| **\-p *seconds*** | Print summary every `<num>` seconds. Only available when installing vespa-fbench from test branch, | +| **\-k** | Enable HTTP keep-alive. | +| **\-d** | Base64 decode POST request content | +| **\-x** | write benchmark data reporting to output file: <br/><br/> **NumHits**<br/> Number of hits returned <br/><br/> **NumFastHits** <br/> Number of actual document hits returned <br/> <br/>**TotalHitCount** <br/> Total number of hits for query <br/> <br/> **QueryHits** <br/>Hits as specified in query <br/> <br/> **QueryOffset**<br/> Offset as specified in query <br/><br/> **NumErrors** <br/> Number of error hits returned <br/> <br/> **NumGroupHits** <br/> Number of grouping hits returned <br/><br/> **SearchTime** <br/> Time used for searching. Entire query time for one phase search, first phase for two-phase search <br/> <br/> **AttributeFetchTime** <br/> Time used for attribute fetching, or 0 for one phase search <br/> <br/> **FillTime** <br/> Time used for summary fetching, or 0 for one phase search | +| **\-y** | write data on coverage to output file (must be used with -x). <br/><br/> **DocsSearched** <br/> Total number of documents in nodes searched <br/><br/> **NodesSearched** <br/> Total number of search nodes which were used <br/> <br/>**FullCoverage** <br/>1 if true, 0 if false | +| **\-z** | Use single query file to be distributed between clients. | +| **\-C *file*** | Client certificate file name | +| **\-K *file*** | Client private key file name | +| **\-D** | Use TLS configuration from environment if T/C/K is not used | + +Default output: + +||| +| --- | --- | +| **connection reuse count** | Indicates how many times HTTP connections were reused to issue another request. Note that this number will only be displayed if the -k switch (enable HTTP keep-alive) is used. | +| **clients** | Echo of the -n parameter. | +| **cycle time** | Echo of the -c parameter. | +| **lower response limit** | Echo of the -l parameter. | +| **skipped requests** | Number of requests that was skipped by vespa-fbench. vespa-fbench will typically skip a request if the line containing the query url exceeds a pre-defined limit. Skipped requests will have minimal impact on the statistical results. | +| **failed requests** | The number of failed requests. A request will be marked as failed if en error occurred while reading the result or if the result contained fewer bytes than 'lower response limit'. | +| **successful requests** | Number of successful requests. Each performed request is counted as either successful or failed. Skipped requests (see above) are not performed and therefore not counted. | +| **cycles not held** | Number of cycles not held. The cycle time is specified with the -c parameter. It defines how often a client should perform a new request. However, a client may not perform another request before the result from the previous request has been obtained. Whenever a client is unable to initiate a new request 'on time' due to not being finished with the previous request, this value will be increased. | +| **minimum response time** | The minimum response time. The response time is measured as the time period from just before the request is sent to the server, till the result is obtained from the server. | +| **maximum response time** | The maximum response time. The response time is measured as the time period from just before the request is sent to the server, till the result is obtained from the server. | +| **average response time** | The average response time. The response time is measured as the time period from just before the request is sent to the server, till the result is obtained from the server. | +| **X percentile** | The X percentile of the response time samples; a value selected such that X percent of the response time samples are below this value. In order to calculate percentiles, a histogram of response times is maintained for each client at runtime and merged after the test run ends. If a percentile value exceeds the upper bound of this histogram, it will be approximated (and thus less accurate) and marked with '(approx)'. | +| **actual query rate** | The average number of queries per second; QPS. | +| **utilization** | The percentage of time used waiting for the server to complete (successful) requests. Note that if a request fails, the utilization will drop since the client has 'wasted' the time spent on the failed request. | +| **zero hit queries** | The number of queries that gave zero hits in Vespa | + +## vespa-makefsa + +Use *vespa-makefsa* to compile a list of phrases into a *finite state automation* (FSA) file. FSA files are used in [query phrasing and rewriting](/en/linguistics/query-rewriting). + +If input file is not specified, standard input is used. + +Synopsis: `vespa-makefsa [-h] [-b] [-B] [-e] [-n] [-s bytes] [-z bytes] [-t] [-p] [-i] [-S serial] [-v] [-V] [input_file] output_file` + +| Option | Description | +| --- | --- | +| **\-h** | Help text | +| **\-b** | Use binary input format with Base64 encoded info | +| **\-B** | Use binary input format with raw | +| **\-e** | Use text input format with no info (default) | +| **\-s bytes** | Data size for numerical info: 1,2 or 4(default) | +| **\-z bytes** | Data size for binary info (-B) (0 means NUL terminated) | +| **\-t** | Use text input format | +| **\-p** | Build automaton with perfect hash | +| **\-i** | Ignore info string, regardless of input format | +| **\-S serial** | Serial number | +| **\-v** | Verbose | +| **\-V** | Display version | + +## vespa-query-profile-dump-tool + +Dumps all resolved query profile properties for a set of dimension values + +Synopsis:`vespa-query-profile-dump-tool dump [query-profile] [dir]? [parameters]?` + +Examples: + +```bash +dump default # dumps the 'default' profile non-variant values in the current dir + +dump default x=x1&y=y1 # dumps the 'default' profile resolved with dimensions values x=x1 and y=y1 in the current dir + +dump default myapppackage # dumps the 'default' profile non-variant values in myapppackage/search/query-profiles + +dump default dev/myprofiles x=x1&y=y1 # dumps the 'default' profile resolved with dimensions values x=x1 and y=y1 in dev/myprofiles +``` + +| Option | Description | +| --- | --- | +| **query-profile** | Name of the query profile to dump the values of | +| **dir** | Path to an application package or query profile directory. Default: current dir | +| **parameters** | HTTP request encoded dimension keys used during resolving. Default: none | \ No newline at end of file diff --git a/mintlify-docs/en/reference/querying/default-result-format.mdx b/mintlify-docs/en/reference/querying/default-result-format.mdx new file mode 100644 index 0000000000..c7c5f2b972 --- /dev/null +++ b/mintlify-docs/en/reference/querying/default-result-format.mdx @@ -0,0 +1,341 @@ +--- +title: "Default JSON Result Format" +sidebarTitle: "The default result format" +--- +The default Vespa query response format is used when [presentation.format](/en/reference/api/query#presentation.format) is unset or set to `json`. An alternative binary [CBOR](https://cbor.io/) format is available by setting `format=cbor` or using `Accept: application/cbor`. CBOR is a drop-in replacement - when deserialized, the result is identical to JSON. CBOR is both more compact and faster to generate, especially for numeric data such as tensors and embeddings. Results are rendered with one or more objects: + +- `root`: mandatory object with the tree of returned data +- `timing`: optional object with query timing information +- `trace`: optional object for metadata about query execution + +Refer to the [query API guide](/en/querying/query-api#result-examples) for result and tracing examples. + +All object names are literal strings, the node `root` is the map key "root" in the return JSON object, in other words, only strings are used as map keys. + +## root + +| Element | Parent | Mandatory | Type | Description | +| :--- | :--- | :--- | :--- | :--- | +| **root** | | yes | Map of string to object | The root of the tree of returned data. | +| **children** | root | no | Array of objects | Array of JSON objects with the same structure as `root`. | +| **fields** | root | no | Map of string to object | | +| **totalCount** | fields | no | Integer | Number of documents matching the query. Not accurate when using *nearestNeighbor*, *wand* or *weakAnd* query operators. The value is the number of hits after [first-phase dropping](/en/reference/schemas/schemas#rank-score-drop-limit). | +| **searchGroup** | fields | no | Integer | The index of the group that produced this result, when informative and unique. Useful for [group pinning](/en/content/elasticity#pinning-groups). | +| **coverage** | root | no | Map of string to string/number | Map of metadata about how much of the total corpus has been searched to return the given documents. | +| **coverage** | coverage | yes | Integer | Percentage of total corpus searched (when lower than 100 this is an approximation and is a lower bound, as no info from nodes down is known) | +| **documents** | coverage | yes | Long | The number of active documents searched. | +| **full** | coverage | yes | Boolean | Whether the full corpus was searched. | +| **nodes** | coverage | yes | Integer | The number of search nodes returning results. | +| **results** | coverage | yes | Integer | The number of results merged creating the final rendered result. | +| **resultsFull** | coverage | yes | Integer | The number of full result sets merged, e.g. when there are several sources/clusters for the results. | +| **degraded** | coverage | no | Map of string to object | Map of match-phase degradation elements. | +| **match-phase** | degraded | no | Boolean | Indicator whether [match-phase degradation](/en/reference/schemas/schemas#match-phase) has occurred. | +| **timeout** | degraded | no | Boolean | Indicator whether the query [timed out](/en/reference/api/query#timeout) before completion. | +| **adaptive-timeout** | degraded | no | Boolean | Indicator whether the query timed out with [adaptive timeout](/en/reference/api/query#ranking.softtimeout.enable) before completion. | +| **non-ideal-state** | degraded | no | Boolean | Indicator whether the content cluster is in [ideal state](/en/content/idealstate). | +| **errors** | root | no | Array of objects | Array of error messages with the fields given below. [Example](/en/querying/query-api#error-result). | +| **code** | errors | yes | Integer | Numeric identifier used by the container application. See [error codes](https://github.com/vespa-engine/vespa/blob/master/container-disc/src/main/java/com/yahoo/container/protect/Error.java) and [ErrorMessage.java](https://github.com/vespa-engine/vespa/blob/master/container-search/src/main/java/com/yahoo/search/result/ErrorMessage.java) for a short description. | +| **message** | errors | no | String | Full error message. | +| **source** | errors | no | String | Which [data provider](/en/querying/federation) logged the error condition. | +| **stackTrace** | errors | no | String | Stack trace if an exception was involved. | +| **summary** | errors | yes | String | Short description of error. | +| **transient** | errors | no | Boolean | Whether the system is expected to recover from the faulty state on its own. If the flag is not present, this may or may not be the case, or the flag is not applicable. | +| **fields** | root | no | Map of string to object | The named document (schema) [fields](/en/reference/schemas/schemas#field). Fields without value are not rendered.<br/><br/>In addition to the fields defined in the schema, the following might be returned:<br/><br/> <ul><li><b>sddocname</b>: Schema name. Returned in the [default document summary](/en/querying/document-summaries).</li><li><b>documentid</b>: Document ID. Returned in the [default document summary](/en/querying/document-summaries).</li><li><b>summaryfeatures</b>: Refer to [summary-features](/en/reference/schemas/schemas#summary-features) and [observing values used in ranking](/en/ranking/ranking-intro#observing-values-used-in-ranking).</li><li><b>matchfeatures</b>: Refer to [match-features](/en/reference/schemas/schemas#match-features) and [example use](/en/querying/nearest-neighbor-search-guide#strict-filters-and-distant-neighbors).</li></ul>| +| **id** | root | no | String | String identifying the hit, document or other data type. For document hits, this is the full string document ID if the hit is filled with a document summary that includes the `documentid` field. If it is not filled or only filled with a document summary without the `documentid` field, it is an internally generated unique id on the form `index:[source]/[node-index]/[hex-gid]`.<br/><br/>See [Document IDs in search results](/en//schemas/documents#docid-in-results) for how to ensure that the full string document ID (from memory) is returned.<br/><br/>For further information on the internally generated ids, see the [/document/v1/guide](/en/writing/document-v1-api-guide#troubleshooting) and also [receiving-responses-of-different-formats-for-the-same-query-in-vespa](https://stackoverflow.com/questions/74033383/receiving-responses-of-different-formats-for-the-same-query-in-vespa) (outdated regarding document IDs being stored on disk only). | +| **label** | root | no | String | The label of a grouping list. | +| **limits** | root | no | Object | Used in grouping, the limits of a bucket in histogram style data. | +| **from** | limits | no | String | Lower bound of a bucket group. | +| **to** | limits | no | String | Upper bound of a bucket group. | +| **relevance** | root | yes | Double | Double value representing the rank score. The rank score is returned from the [rank-profile](/en/ranking/ranking-intro). See the [FAQ](/en/learn/faq#what-could-cause-the-relevance-field-to-be--infinity) for how to handle <code>"-Infinity"</code> (represented as string) values. | +| **source** | root | no | String | Which data provider created this node. | +| **types** | root | no | Array of string | Metadata about what kind of document or other kind of node in the result set this object is. | +| **value** | root | no | String | Used in grouping for value groups, the argument for the grouping data which is in the fields. | + + +## timing + +| Element | Parent | Mandatory | Type | Description | +| :--- | :--- | :--- | :--- | :--- | +| **timing** | | no | Map of string to object| Query timing information, enabled by [presentation.timing](/en/reference/api/query#presentation.timing). The [query performance guide](/en/performance/practical-search-performance-guide#basic-text-search-query-performance) is a useful resource to understand the values in its child elements. | +| **querytime** | timing | no | Double | Time to execute the first protocol phase/matching phase, in seconds. | +| **summaryfetchtime** | timing | no | Double | [Document summary](/en/querying/document-summaries) fetch time, in seconds. This is the time to execute the summary fill protocol phase for the globally ordered top-k hits. | +| **searchtime** | timing | no | Double | Approximately the sum of `querytime` and `summaryfetchtime` and is close to what a client will observe (except network latency). In seconds. | + +## trace + +<Info> +**Note:** The tracing elements below is a subset of all elements. Refer to the [search performance guide](/en/performance/practical-search-performance-guide#advanced-query-tracing) for examples. +</Info> + +| Element | Parent | Mandatory | Type | Description | +| :----------------- | :------------------ | :-------- | :-------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| **trace** | | no | Map of string to object | Metadata about query execution.<br/><br/>**Note:** The tracing elements below are a subset of all elements. Refer to the [search performance guide](/en/performance/practical-search-performance-guide#advanced-query-tracing) for examples. | +| **children** | trace | no | Array of object | Array of maps with exactly the same structure as `trace` itself. | +| **timestamp** | children | no | Long | Number of milliseconds since the start of query execution this node was added to the trace. | +| **message** | children | no | String | Descriptive trace text regarding this step of query execution. | +| **message** | children | no | Array of objects | Array of messages. | +| **start_time** | message | no | String | Timestamp, e.g. 2022-07-27 09:51:21.938 UTC | +| **traces** | message or threads | no | Array of traces or objects | | +| **distribution-key** | message | no | Integer | The [distribution key](/en/reference/applications/services/content#node) of the content node creating this span. | +| **duration_ms** | message | no | Float | Duration of span. | +| **timestamp_ms** | traces | no | Float | Time since start of parent, see `start_time`. | +| **event** | traces | no | String | Description of span. | +| **tag** | traces | no | String | Name of span. | +| **threads** | traces | no | Array of objects | Array of objects that again have `traces` elements. | + + +## JSON Schema + +Formal schema for the query API default result format: + +```json expandable +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "title": "Result", + "description": "Schema for Vespa results", + "type": "object", + + + "properties": { + "root": { + "type": "document_node", + "required": true + }, + "trace": { + "type": "trace_node", + "required": false + } + }, + + "definitions": { + "document_node": { + "properties": { + "children": { + "type": "array", + "items": { + "type": "document_node" + }, + "required": false + }, + "coverage": { + "type": "coverage", + "required": false + }, + "errors": { + "type": "array", + "items": { + "type": "error" + }, + "required": false + }, + "fields": { + "type": "object", + "additionalProperties": true, + "required": false + }, + "id": { + "type": "string", + "required": false + }, + "relevance": { + "type": "number", + "required": true + }, + "types": { + "type": "array", + "items": { + "type": "string" + }, + "required": false + }, + "source": { + "type": "string", + "required": false + }, + "value": { + "type": "string", + "required": false + }, + "limits": { + "type": "object", + "required": false + }, + "label": { + "type": "string", + "required": false + } + }, + "additionalProperties": true, + }, + "trace_node": { + "properties": { + "children": { + "type": "array", + "items": { + "type": "trace_node" + }, + "required": false + }, + "timestamp": { + "type": "number", + "required": false + }, + "message": { + "type": "string", + "required": false + } + } + }, + "fields": { + "properties": { + "totalCount": { + "type": "number", + "required": true + } + } + }, + "coverage": { + "properties": { + "coverage": { + "type": "number", + "required": true + }, + "documents": { + "type": "number", + "required": true + }, + "full": { + "type": "boolean", + "required": true + }, + "nodes": { + "type": "number", + "required": true + }, + "results": { + "type": "number", + "required": true + }, + "resultsFull": { + "type": "number", + "required": true + } + } + }, + "error": { + "properties": { + "code": { + "type": "number", + "required": true + }, + "message": { + "type": "string", + "required": false + }, + "source": { + "type": "string", + "required": false + }, + "stackTrace": { + "type": "string", + "required": false + }, + "summary": { + "type": "string", + "required": true + }, + "transient": { + "type": "boolean", + "required": false + } + } + } + } +} +``` + +## Appendix: Legacy Vespa 7 JSON rendering + +There were some inconsistencies between search results and document rendering in Vespa 7, which are fixed in Vespa 8. This appendix describes the old behavior, what the changes are, and how to configure to select a specific rendering. + +### Inconsistent weightedset rendering + +Fields with various weightedset types has a JSON input representation (for feeding) as a JSON object; for example `{"one":1, "two":2,"three":3}` for the value of a a `weightedset<string>` field. The same format is used when rendering a document (for example when visiting). + +In search results however, there are intermediate processing steps during which the field value is represented as an array of item/weight pairs, so in a search result the field value would render as `[ {"item":"one", "weight":1},{"item":"two", "weight":2},{"item":"three", "weight":3} ]` + +In Vespa 8, the default JSON renderer for search results outputs the same format as document rendering. If you have code that depends on the old format you can turn off this by setting `renderer.json.jsonWsets=false` in the query (usually via a [query profile](/en/querying/query-profiles)). + +### Inconsistent map rendering + +Fields with various map types has a JSON input representation (for feeding) as a JSON object; for example `{"1001":1.0, "1002":2.0, "1003":3.0}` for the value of a a `map<int,float>` field. The same format is used when rendering a document (for example when visiting). + +In search results however, there are intermediate processing steps and the field value is represented as an array of key/value pairs, so in a search results the field value would (in some cases) render as `[ {"key":1001, "value":1.0},{"key":1002, "value":2.0},{"key":1003, "value":3.0} ]` + +In Vespa 8, the default JSON renderer for search results output the same format as document rendering. For code that depends on the old format one can turn off this by setting `renderer.json.jsonMaps=false` in the query (usually via a [query profile](/en/querying/query-profiles)). + +### Geo position rendering + +Fields with the type `position` would in Vespa 7 be rendered using the internal fields "x" and "y". These are integers representing microdegrees, aka geographical degrees times 1 million, of longitude (for x) and latitude (for y). Also, any field _foo_ of type `position` would trigger addition of two extra synthetic summary fields _foo.position_ and _foo.distance_ (see below for details). + +In Vespa 8, positions are rendered with two JSON fields "lat" and "lng", both having a floating-point value. The "lat" field is latitude (going from -90.0 at the South Pole to +90.0 at the North Pole). The "lng" field is longitude (going from -180.0 at the dateline seen as extreme west, via 0.0 at the Greenwich meridian, to +180.0 at the dateline again, now as extreme east). The field names are chosen so the format is the same as used in the Google "places" API. + +A closely related change is the removal of two synthetic summary fields which would be returned in search results. For example with this in schema: + +```js +field mainloc type position { + indexing: attribute | summary +} +``` + +Vespa 7 would include the _mainloc_ summary field, but also _mainloc.position_ and _mainloc.distance_; the latter only when the query actually had a position to take the distance from. + +The first of these (_mainloc.position_ in this case) was mainly useful for producing XML output in older Vespa versions, and now contains just the same information as the _mainloc_ summary field. The second (_mainloc.distance_ in this case) would return a distance in internal units, and can be replaced by a summary feature - here `distance(mainloc)` would give the same number, while `distance(mainloc).km` would be the recommended replacement with suitable code changes. + +### Summary-features wrapped in "rankingExpression" + +In Vespa 7, if a rank profile wanted a function `foobar` returned in summary-features (or match-features), it would be rendered as `rankingExpression(foobar)` in the output. + +For programmatic use, the `FeatureData` class has extra checking to allow lookup with `getDouble("foobar")` or `getTensor("foobar")`, but now it's present and rendered with just the original name as specified. + +If applications needs the JSON rendering to look exactly as in Vespa 7, one can specify that in the rank profile. For example, with this in the schema: + +```js +rank-profile whatever { + function lengthScore() { expression: matchCount(title)/fieldLength(title) } + summary-features { + matchCount(title) + lengthScore + ... +``` + +could, in Vespa 7, yield JSON output containing: + +```json +summaryfeatures: { + matchCount(title): 1, + rankingExpression(lengthScore): 0.25, + ... +``` + +in Vespa 8, you instead get the expected: + +```json +summaryfeatures: { + matchCount(title): 1, + lengthScore: 0.25, + ... +``` + +But to get the old behavior one can specify: + +```js +rank-profile whatever { + function lengthScore() { expression: matchCount(title)/fieldLength(title) } + summary-features { + matchCount(title) + rankingExpression(lengthScore) + ... +``` + +which gives you the same output as before. diff --git a/mintlify-docs/en/reference/querying/grouping-language.mdx b/mintlify-docs/en/reference/querying/grouping-language.mdx new file mode 100644 index 0000000000..2c2afdcaea --- /dev/null +++ b/mintlify-docs/en/reference/querying/grouping-language.mdx @@ -0,0 +1,535 @@ +--- +title: "Grouping Language Reference" +sidebarTitle: "Grouping" +--- + +Read the [Vespa grouping guide](/en/querying/grouping) first, for examples and an introduction to grouping - this is the Vespa grouping reference. + +Also note that using a [multivalued](/en/querying/searching-multivalue-fields) attribute (such as an array of doubles) in a grouping expression affects performance. Such operations can hit a memory bandwidth bottleneck, particularly if the set of hits to be processed is large, as more data is evaluated. + +## Group + +Group query results using a custom expression (using the `group` clause): + +- A numerical or string constant (e.g., `group(1)` or `group("all")`) which makes one bucket with everything +- A document [attribute](/en/content/attributes) +- A function over another expression (`xorbit`, `md5`, `cat`, `xor`, `and`, `or`, `add`, `sub`, `mul`, `div`, `mod`) or any other [expression](#expressions) +- The datatype of an expression is resolved using best-effort, similarly to how common programming languages do to resolve arithmetic of different data-typed operands +- The results of any expression are either scalar or single-dimensional arrays + - `add(<array>)` adds all elements together to produce a scalar + - `add(<arrayA>, <arrayB>)` adds each element together producing a new array whose size is `max(|<arrayA>|, |<arrayB>|)` + +Groups can contain subgroups (by using `each` and `group` operations), and may be nested to any level. + +Multiple sub-groupings or outputs can be created under the same group level, using multiple parallel `each` or `all` clauses, and each one may be labelled using [as(mylabel)](#labels). + +When grouping results, _groups_ that contain _outputs_, _group lists_ and _hit lists_ are generated. Group lists contain subgroups, and hit lists contain hits that are part of the owning group. + +The identity of a group is held by its _id_. Scalar identities such as long, double, and string, are directly available from the _id_, whereas range identities used for bucket aggregation are separated into the sub-nodes _from_ and _to_. Refer to the [result format reference](/en/reference/querying/default-result-format). + +### Multivalue attributes + +A [multivalue](/en/querying/searching-multivalue-fields) attribute is a [weighted set](/en/reference/schemas/schemas#weightedset), [array](/en/reference/schemas/schemas#array) or [map](/en/reference/schemas/schemas#map). Most grouping functions will just handle the elements of multivalued attributes separately, as if they were all individual values in separate documents. If you are grouping over array of struct or maps, scoping will be used to preserve structure. Each entry in the array/map will be treated as a separate sub-document. The following syntax can be used when grouping on _map_ attribute fields. + +Group on map keys: + +```bash +all( group(mymap.key) each(output(count())) ) +``` + +Group on map keys then on map values: + +```bash +all( group(mymap.key) each( group(mymap.value) each(output(count())) )) +``` + +Group on values for key _my\_key_: + +```bash +all( group(my_map{"my_key"}) each(output(count())) ) +``` + +Group on struct field _my\_field_ referenced in map element _my\_key_: + +```bash +all( group(my_map{"my_key"}.my_field) each(output(count())) ) +``` + +The key can either be specified directly (above) or indirectly via a key source attribute. The key is retrieved from the key source attribute for each document. Note that the key source attribute must be single-valued and have the same data type as the key type of the map: + +```bash +all( group(my_map{attribute(my_key_source)}) each(output(count())) ) +``` + +Group on array of integers field: + +```bash +all( group(my_array) each(output(count())) ) +``` + +Group on struct field _my\_field_ in the _my\_array_ array of structs: + +```bash +all( group(my_array.my_field) each(output(count())) ) +``` + +[Tensors](/en/reference/schemas/schemas#tensor) can not be used in grouping. + +## Filtering groups + +When grouping on multivalue attributes, it may be useful to filter the groups so that only some specific values are collected. This can be done by adding a filter. The `filter` clause expects a filter _predicate_: + +- [regex("regular expression", input-expression)](#regex-filter) +- [range(min-limit, max-limit, input-expression)](#range-filter) +- [range(min-limit, max-limit, input-expression, bool, bool)](#range-filter) +- [istrue(input-expression)](#istrue-filter) +- [not _predicate_](#logical-predicates-filter) +- [_predicate_ and _predicate_](#logical-predicates-filter) +- [_predicate_ or _predicate_](#logical-predicates-filter) + +### Regex filter + +Use a regular expression to match the input, and include only documents that match in the grouping. The input will usually be the same -expression as in the "group" clause. Example: + +```bash +all( group(my_array) filter(regex("foo.*", my_array)) ...) +``` + +Here only the values that start with a "foo" prefix in _my\_array_ are collected into groups, all others are ignored. See also [this example](/en/querying/grouping#structured-grouping). + +Regex filtering works on the string representation of any field type. For example, you can also filter on boolean values using regex(true) and regex(false). + +### Range filter + +Use a `range` filter to match documents where a field value is between a lower and an upper bound. Example: + +```bash +all( group(some_field) filter(range(1990, 2012, year)) ...) +``` +Here, the lower bound is _inclusive_ (year ≥ 1990) and the upper bound is _exclusive_ (year \< 2012). Use optional bools at the end to control if the lower and upper bounds are inclusive, respectively. The first bool sets the lower bound inclusive, and the second sets the upper bound inclusive. +```bash +all( group(some_field) filter(range(1990, 2012, year, true, true)) ...) +``` +Here, both lower and upper bounds are inclusive. +### IsTrue filter + +Use `istrue` to filter using a boolean expression. The expression must evaluate to a boolean value, e.g. a boolean attribute. Example: + +```bash +all( group(some_field) filter(istrue(my_bool_attribute)) ...) +``` +Here, only documents where _my\_bool\_attribute_ evaluates to true are included in the grouping. +### Logical predicates + +Use `not` to negate another filter expression. It takes a single sub-filter and matches when the sub-filter does not. Example: + +```bash +all( group(my_field) filter( not regex("bar.*", my_other_field)) ...) +``` +Use `or` to perform a logical disjunction across two sub-filters. The combined filter matches if any of the sub-filters evaluate to true. Example: +```bash +all( group(my_field) filter( regex("bar.*", my_field) or regex("baz.*", my_third_field) ) ...) +``` +Use `and` to perform a logical conjunction across two sub-filters. The combined filter matches only if all of the sub-filters evaluate to true. Example: +```bash +all( group(my_field) filter( regex("bar.*", my_other_field) and regex("baz.*", my_third_field) ) ...) +``` +These logical predicates can be nested to create complex filter conditions. Filter expressions follow _conventional precedence_ rules: `not` is evaluated before `and`, and `and` is evaluated before `or`. Operators of the same precedence are evaluated left-to-right. Use parentheses `(...)` to force a different grouping when needed. Example: +```bash +all( + group(my_field) + filter( (regex("bar.*", some_field) or regex("baz.*", other_field)) and not regex(".*foo", some_field)) + each(...) +) +``` + +## Order / max + +Each level of grouping may specify how to order its groups (using `order`): + +- Ordering can be done using any of the available aggregates +- Multi-level grouping allows strict ordering where primary aggregates may be equal +- Ordering is either ascending or descending, specified per level of ordering +- Groups are sorted using [locale aware sorting](#uca) + +Limit the number of groups returned for each level using `max`, returning only first _n_ groups as specified by `order`: + +- `order` changes the ordering of groups after a merge operation for the following aggregators: `count`, `avg` and ` sum` +- `order` **will not** change ordering of groups after a merge operation when `max` or `min` is used +- Default order, `-max(relevance())`, **does not** require use of [precision](#precision) + +## Continuations + +Pagination of grouping results is managed by `continuations`. These are opaque objects that can be combined and resubmitted using the `continuations` annotation on the grouping step of the query to move to the previous or next page in a result list. + +All root groups contain a single _this_ continuation per `select`. That continuation represents the current view, and if submitted as the sole continuation, it will reproduce the exact same result as the one that contained it. + +There are zero or one _prev_/_next_ continuation per group- and hit list. Submit any number of these to retrieve the next/previous pages of the corresponding lists + +Any number of continuations can be combined in a query, but the first must always be the _this_-continuation. E.g., one may simultaneously move both to the next page of one list, and the previous page of another. + +<Info> + **Note:** If more than one continuation object is provided for the same group- or hit-list, the one given last is the one that takes effect. This is because continuations are processed in the order given, and they replace whatever continuations they collide with. +</Info> + +If working programmatically with grouping, find the [Continuation](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/search/grouping/Continuation.html) objects within [RootGroup](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/search/grouping/result/RootGroup.html), [GroupList](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/search/grouping/result/GroupList.html) and [HitList](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/search/grouping/result/HitList.html) result objects. These can then be added back into the continuation list of the [GroupingRequest](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/search/grouping/GroupingRequest.html) to paginate. + +Refer to the [grouping guide](/en/querying/grouping#pagination) for an example. + +## Labels + +Lists created using the `each` keyword can be assigned a label using the construct `each(...) as(mylabel)`. The outputs created by each clause will be identified by this label. + +## Aliases + +Grouping expressions can be tagged with an _alias_. An alias allows the expression to be reused without having to repeat the expression verbatim. + +```bash +all(group(a) alias(myalias, count()) each(output($myalias))) +``` + is equivalent to +```bash +all(group(a) each(output(count()))) +``` +. +```bash +all(group(a) order($myalias=count()) each(output($myalias))) +``` + is equivalent to +```bash +all(group(a) order(count()) each(output(count()))) +``` +. +## Precision + +The number of intermediate groups returned from each content node during expression evaluation to give the container node more data to consider when selecting the groups that are to be evaluated further: `each(...) precision(1000)` A higher number costs more bandwidth, but leads to higher accuracy in some cases. + +## Query parameters + +The following _query parameters_ are relevant for grouping. See the [Query API Reference](/en/reference/api/query#parameters) for description. + +- [select](/en/reference/api/query#select) +- [groupingSessionCache](/en/reference/api/query#groupingsessioncache) +- [grouping.defaultMaxGroups](/en/reference/api/query#grouping.defaultmaxgroups) +- [grouping.defaultMaxHits](/en/reference/api/query#grouping.defaultmaxhits) +- [grouping.globalMaxGroups](/en/reference/api/query#grouping.globalmaxgroups) +- [grouping.defaultPrecisionFactor](/en/reference/api/query#grouping.defaultprecisionfactor) + +## Grouping Session Cache + +<Danger> + **Important:** The grouping session cache is **only useful if** the grouping expression uses default ordering. The **drawback** is that when `max` is specified in the grouping expression, it might cause inaccuracies in aggregated values such as `count`. It is recommended to test whether this is an issue or not, and if so, adjust the `precision` parameter to still get correct counts. +</Danger> + +The session cache stores intermediate grouping results in the content nodes when using multi-level grouping expressions, in order to speed up grouping at a potential loss of accuracy. This causes the query and grouping expression to be run only once. + +When having multi-level grouping expressions, the search query is normally re-run for each level. The drawback of this is that, with an expensive ranking function, the query will take more time than strictly necessary. + +## Aggregators + +Each level of grouping specifies a set of aggregates to collect for all documents that belong to that group (using the `output` operation): + +- The documents in a group, retrieved using a specified summary class +- The count of documents in a group +- The sum, average, min, max, xor or standard deviation of an expression +- Multiple quantiles of an expressions value + +When all arguments are numeric, the result type is resolved by looking at the argument types. If all arguments are longs, the result is a long. If at least one argument is a double, the result is a double. + +When using `order`, aggregators can also be used in expressions in order to get increased control over group sorting. This does not work with expressions that take attributes as an argument, unless the expression is enclosed within an aggregator. + +Using sum, max on a multivalued attribute: Doing an operation such as `output(sum(myarray))` will run the sum over each element value in each document. The result is the sum of sums of values. Similarly, `max(myarray)` will yield the maximal element over all elements in all documents, and so on. + +Compute quantiles by listing the desired quantile values (comma-separated) in brackets, followed by a comma and the expression (e.g., a field): + +```bash +all( group(city) each(output(quantiles([0.5], delivery_days) as(median_delivery_days) ) ) ) +``` +to compute the median, or +```bash +all( group(city) each(output(quantiles([0.5, 0.9], delivery_days))) ) +``` +This computes the median (p50) and 90th percentile (p90) time to delivery in days per city. Note that quantiles are computed using [KLL Sketch](https://datasketches.apache.org/docs/KLL/KLLSketch.html), so they are approximate. + +Multivalue fields, such as maps and arrays, can be used for grouping. However, using aggregation functions such as sum() on such fields can give misleading results. Assume a map from strings to integers (`map<string, int>`), where the strings are some sort of key to use for grouping. The following expression will provide the sum of the values for all keys: + +```bash +all( group(mymap.key) each(output(sum(mymap.value))) ) +``` + +and not the sum of the values within each key, as one would expect. It is still, however, possible to run the following expression to get the sum of values within a specific key: + +``` +all( group("my_group") each(output(sum(mymap{"foo"}))) ) +``` + +Refer to the system test for [grouping on struct and map types](https://github.com/vespa-engine/system-test/blob/master/tests/search/struct_and_map_types/struct_and_map_grouping.rb) for more examples. + +### Group list aggregators + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| count | Counts the number of unique groups (as produced by `group`). Note that `count` operates independently of `max` and that this count is an estimate using HyperLogLog++ which is an algorithm for the count-distinct problem | None | Long | +| +### Group aggregators + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| count | Increments a long counter every time it is invoked | None | Long | +| sum | Sums the argument over all selected documents | Numeric | Numeric | +| avg | Computes the average over all selected documents | Numeric | Numeric | +| min | Keeps the minimum value of selected documents | Numeric | Numeric | +| max | Keeps the maximum value of selected documents | Numeric | Numeric | +| xor | XOR the values (their least significant 64 bits) of all selected documents | Any | Long | +| stddev | Computes the population standard deviation over all selected documents | Numeric | Double | +| quantiles | Computes one or multiple quantiles of the values of an expression. Quantiles must be a number between 0 and 1 inclusive. | [Numeric+], Expr | \[\{"quantile":Double,"value":Double}\+] | + + +### Hit aggregators + +| Name | Description | Arguments | Result | +| --- | --- | --- | --- | +|summary| Produces a summary of the requested [summary class](/en/reference/schemas/schemas#document-summary) | Name of summary class | Summary | + +## Expressions + +### Arithmetic expressions + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| add | Add the arguments together | Numeric+ | Numeric | +| + | Add left and right argument | Numeric, Numeric | Numeric | +| mul | Multiply the arguments together | Numeric+ | Numeric | +| \* | Multiply left and right argument | Numeric, Numeric | Numeric | +| sub | Subtract second argument from first, third from result, etc | Numeric+ | Numeric | +| - | Subtract right argument from left | Numeric, Numeric | Numeric | +| div | Divide first argument by second, result by third, etc | Numeric+ | Numeric | +| / | Divide left argument by right | Numeric, Numeric | Numeric | +| mod | Modulo first argument by second, result by third, etc | Numeric+ | Numeric | +| % | Modulo left argument by right | Numeric, Numeric | Numeric | +| neg | Negate argument | Numeric | Numeric | +| - | Negate right argument | Numeric | Numeric | + +### Bitwise expressions + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| and | AND the arguments in order | Long+ | Long | +| or | OR the arguments in order | Long+ | Long | +| xor | XOR the arguments in order | Long+ | Long | + +### String expressions + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| strlen | Count the number of bytes in argument | String | Long | +| strcat | Concatenate arguments in order | String+ | String | + +### Type conversion expressions + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| todouble | Convert argument to double | Any | Double | +| tolong | Convert argument to long | Any | Long | +| tostring | Convert argument to string | Any | String | +| toraw | Convert argument to raw | Any | Raw | + +### Raw data expressions + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| cat | Cat the binary representation of the arguments together | Any+ | Raw | +| md5 | Does an MD5 over the binary representation of the argument, and keeps the lowest 'width' bits | Any, Numeric(width) | Raw | +| xorbit | Does an XOR of 'width' bits over the binary representation of the argument. Width is rounded up to a multiple of 8 | Any, Numeric(width) | Raw | + +### Accessor expressions + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| relevance | Return the computed rank of a document | None | Double | +| \<attribute-name\> | Return the value of the named attribute | None | Any | +| array.at | Array element access. The expression `array.at(myarray, idx)` returns one value per document by evaluating the `idx` expression and using it as an index into the array. The expression can then be used to build bigger expressions such as `output(sum(array.at(myarray, 0)))` which will sum the first element in the array of each document. <br/>- The `idx` expression is capped to `[0, size(myarray)-1]`<br/>- If \> array size, the last element is returned<br/>- If \< 0, the first element is returned | Array, Numeric | Any | +| interpolatedlookup | Counts elements in a sorted array that are less than an expression, with linear interpolation if the expression is between element values. The operation `interpolatedlookup(myarray, expr)` is intended for generic graph/function lookup. The data in `myarray` should be numerical values sorted in ascending order. The operation will then scan from the start of the array to find the position where the element values become equal to (or greater than) the value of the `expr` lookup argument, and return the index of that position. <br/> When the lookup argument's value is between two consecutive array element values, the returned position will be a linear interpolation between their respective indexes. The return value is always in the range `[0, size(myarray)-1]` of the valid index values for an array.<br/>Assume `myarray` is a sorted array of type `array<double>` in each document: The expression `interpolatedlookup(myarray, 4.2)` is now a per-document expression that first evaluates the lookup argument, here a constant expression 4.2, and then looks at the contents of `myarray` in the document. The scan starts at the first element and proceeds until it hits an element value greater than 4.2 in the array. This means that:<br/>- If the first element in the array is greater than 4.2, the expression returns 0<br/>- If the first element in the array is exactly 4.2, the expression still returns 0<br/>- If the first element in the array is 1.7 while the **second** element value is exactly 4.2, the expression returns 1.0 - the index of the second element<br/>- If **all** the elements in the array are less than 4.2, the last valid array index `size(myarray)-1` is returned<br/>- If the first 5 elements in the array have values smaller than the lookup argument, and the lookup argument is halfway between the fifth and sixth element, a value of 4.5 is returned - halfway between the array indexes of the fifth and sixth elements<br/>- Similarly, if the elements in the array are `{0, 1, 2, 4, 8}` then passing a lookup argument of "5" would return 3.25 (linear interpolation between `indexOf(4)==3` and `indexOf(8)==4`)<br/>| + + + +### Bucket expressions + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| fixedwidth | Maps the value of the first argument into consecutive buckets whose width equals the second argument | Any, Numeric | NumericBucketList | +| predefined | Maps the value of the first argument into the given buckets. <br/>- Standard mathematical start and end specifiers may be used to define the width of a `bucket`. The `(` and `)` evaluates to `[` and `>` by default. <br/>- The buckets assume the type of the start/end specifiers (`string`, `long`, `double` or `raw`). Values are converted to this type before being compared with these specifiers (e.g., `double` values are rounded to the nearest integer for buckets of type `long`). <br/>- The end specifier can be skipped. The buckets `bucket(3)`/`bucket[3]` are the same as `bucket[3,4>`. This is allowed for string expressions as well; `bucket("c")` is identical to `bucket["c", "c ">`. | Any, Bucket+ | BucketList | + +### Time expressions + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| time.dayofmonth | Returns the day of month (1-31) for the given timestamp | Long | Long | +| time.dayofweek | Returns the day of week (0-6) for the given timestamp, Monday being 0 | Long | Long | +| time.dayofyear | Returns the day of year (0-365) for the given timestamp | Long | Long | +| time.hourofday | Returns the hour of day (0-23) for the given timestamp | Long | Long | +| time.minuteofhour | Returns the minute of hour (0-59) for the given timestamp | Long | Long | +| time.monthofyear | Returns the month of year (1-12) for the given timestamp | Long | Long | +| time.secondofminute | Returns the second of minute (0-59) for the given timestamp | Long | Long | +| time.year | Returns the full year (e.g. 2009) of the given timestamp | Long | Long | +| time.date | Returns the date (e.g. 2009-01-10) of the given timestamp | Long | Long | + +### List expressions + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| size | Return the number of elements in the argument if it is a list. If not return 1 | Any | Long | +| sort | Sort the elements in the argument in ascending order if the argument is a list. If not, it is a NOP | Any | Any | +| reverse | Reverse the elements in the argument if the argument is a list. If not, it is a NOP | Any | Any | + +### Other expressions + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| zcurve.x | Returns the X component of the given [zcurve](https://en.wikipedia.org/wiki/Z-order_curve) encoded 2d point. All fields of type "position" have an accompanying "\<fieldName\>\_zcurve" attribute that can be decoded using this expression, e.g. `zcurve.x(foo_zcurve)` | Long | Long | +| zcurve.y | Returns the Y component of the given zcurve encoded 2d point | Long | Long | +| geo\_distance | Computes the great-circle distance from a [position](/en/reference/schemas/schemas#position) field to a given point. The unit suffix `.km` or `.miles` selects the output unit. Works on both `position` and `array<position>` fields. For arrays, the minimum distance across all positions in the document is returned. <CodeBlock>```all( group(fixedwidth(geo_distance(attribute(location), 63.4, 10.4).km, 10)) each(output(count())) )```</CodeBlock> Available since Vespa 8.664.22 . | Attribute(position), Double(lat), Double(lng) | Double | +| uca | Converts the attribute string using [unicode collation algorithm](https://www.unicode.org/reports/tr10/). Groups are sorted using locale-aware sorting, with the default and primary strength values, respectively: <CodeBlock>```all( group(s) order(max(uca(s, "sv"))) each(output(count())) )```</CodeBlock> <CodeBlock>```all( group(s) order(max(uca(s, "sv", "PRIMARY"))) each(output(count())) )```</CodeBlock> | Any, Locale(String), Strength(String) | Raw | + +### Single argument standard mathematical expressions + These are the standard mathematical functions as found in the Java [Math](https://docs.oracle.com/javase/8/docs/api/java/lang/Math.html) class. +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| math.exp |   | Double | Double | +| math.log |   | Double | Double | +| math.log1p |   | Double | Double | +| math.log10 |   | Double | Double | +| math.sqrt |   | Double | Double | +| math.cbrt |   | Double | Double | +| math.sin |   | Double | Double | +| math.cos |   | Double | Double | +| math.tan |   | Double | Double | +| math.asin |   | Double | Double | +| math.acos |   | Double | Double | +| math.atan |   | Double | Double | +| math.sinh |   | Double | Double | +| math.cosh |   | Double | Double | +| math.tanh |   | Double | Double | +| math.asinh |   | Double | Double | +| math.acosh |   | Double | Double | +| math.atanh |   | Double | Double | + +### Dual argument standard mathematical expressions + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| math.pow | Return X^Y. | Double, Double | Double | +| math.hypot | Return length of hypotenuse given X and Y sqrt(X^2 + Y^2) | Double, Double | Double | + +## Filters + +### String filters + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| regex | Matches a field against a regular expression string. | String, Expression | Bool | + +### Numeric filters + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| range | Matches when a field is between a lower and upper bound. | Numeric, Numeric, Expression, Bool?, Bool? | Bool | + +### Boolean filters + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| istrue | Matches when a boolean expression evaluates to true. | Expression | Bool | + +### Predicate filters + +| Name | Description | Arguments | Result | +| :--- | :--- | :--- | :--- | +| and | Logical `and` between the arguments. | Filter, Filter | Bool | +| not | Logical `not` on the argument. | Filter | Bool | +| or | Logical `or` between the arguments. | Filter, Filter | Bool | + +## Grouping language grammar + +```bash expandable +request ::= "all(" operations ")" +group ::= ( "all" | "each") "(" operations ")" ["as" "(" identifier ")"] +operations ::= ["group" "(" exp ")"] + ( ( "alias" "(" identifier "," exp ")" ) | + ( filter ) | + ( "max" "(" ( number | "inf" ) ")" ) | + ( "order" "(" expList | aggrList ")" ) | + ( "output" "(" aggrList ")" ) | + ( "precision" "(" number ")" ) )* + group* +aggrList ::= aggr ( "," aggr )* +aggr ::= ( ( "count" "(" ")" ) | + ( "sum" "(" exp ")" ) | + ( "avg" "(" exp ")" ) | + ( "max" "(" exp ")" ) | + ( "min" "(" exp ")" ) | + ( "xor" "(" exp ")" ) | + ( "stddev" "(" exp ")" ) | + ( "quantiles" "(" "[" number ( "," number )* "]" "," exp ")" ) | + ( "summary" "(" [identifier] ")" ) ) + ["as" "(" identifier ")"] +expList ::= exp ( "," exp )* +exp ::= ( "+" | "-") ( "$" identifier ["=" math] ) | ( math ) | ( aggr ) +filter ::= ( "filter" | "keep" ) "(" filterExp ")" +filterExp ::= filterAnd ( "or" filterAnd )* +filterAnd ::= filterNot ( "and" filterNot )* +filterNot ::= "not" filterNot | filterPrim +filterPrim ::= ( "(" filterExp ")" ) | + ( "regex" "(" string "," exp ")" ) | + ( "range" "(" number "," number "," exp ["," boolean "," boolean] ")" ) | + ( "istrue" "(" exp ")" ) +math ::= value [( "+" | "-" | "*" | "/" | "%" ) value] +value ::= ( "(" exp ")" ) | + ( "add" "(" expList ")" ) | + ( "and" "(" expList ")" ) | + ( "cat" "(" expList ")" ) | + ( "div" "(" expList ")" ) | + ( "docidnsspecific" "(" ")" ) | + ( "fixedwidth" "(" exp "," number ")" ) | + ( "geo_distance" "(" "attribute" "(" attributeName ")" "," number "," number ")" "." ( "km" | "miles" ) ) | + ( "interpolatedlookup" "(" attributeName "," exp ")") | + ( "math" "." ( + ( + "exp" | "log" | "log1p" | "log10" | "sqrt" | "cbrt" | + "sin" | "cos" | "tan" | "asin" | "acos" | "atan" | + "sinh" | "cosh" | "tanh" | "asinh" | "acosh" | "atanh" + ) "(" exp ")" | + ( "pow" | "hypot" ) "(" exp "," exp ")" + )) | + ( "max" "(" expList ")" ) | + ( "md5" "(" exp "," number "," number ")" ) | + ( "min" "(" expList ")" ) | + ( "mod" "(" expList ")" ) | + ( "mul" "(" expList ")" ) | + ( "or" "(" expList ")" ) | + ( "predefined" "(" exp "," "(" bucket ( "," bucket )* ")" ")" ) | + ( "reverse" "(" exp ")" ) | + ( "relevance" "(" ")" ) | + ( "sort" "(" exp ")" ) | + ( "strcat" "(" expList ")" ) | + ( "strlen" "(" exp ")" ) | + ( "size" "(" exp")" ) | + ( "sub" "(" expList ")" ) | + ( "time" "." ( "date" | "year" | "monthofyear" | "dayofmonth" | "dayofyear" | "dayofweek" | + "hourofday" | "minuteofhour" | "secondofminute" ) "(" exp ")" ) | + ( "todouble" "(" exp ")" ) | + ( "tolong" "(" exp ")" ) | + ( "tostring" "(" exp ")" ) | + ( "toraw" "(" exp ")" ) | + ( "uca" "(" exp "," string ["," string] ")" ) | + ( "xor" "(" expList ")" ) | + ( "xorbit" "(" exp "," number ")" ) | + ( "zcurve" "." ( "x" | "y" ) "(" exp ")" ) | + ( attributeName "." "at" "(" number ")") | + ( attributeName ) +bucket ::= "bucket" ( "(" | "[" | "<" ) + ( "-inf" | rawvalue | number | string ) + ["," ( "inf" | rawvalue | number | string )] + ( ")" | "]" | ">" ) +rawvalue ::= "{" ( ( string | number ) "," )* "}" +``` \ No newline at end of file diff --git a/mintlify-docs/en/reference/querying/json-query-language.mdx b/mintlify-docs/en/reference/querying/json-query-language.mdx new file mode 100644 index 0000000000..065ca6e3c8 --- /dev/null +++ b/mintlify-docs/en/reference/querying/json-query-language.mdx @@ -0,0 +1,602 @@ +--- +title: "JSON Select Query Reference" +sidebarTitle: "Select" +--- + + +This document describes what the `select` parameter is and gives a few examples on how to use it. Refer to the [Query API](/en/querying/query-api) for how to execute queries with POST. + +The query has JSON syntax, and can be used with queries that are executed with HTTP POST. The `select` parameter is equivalent with YQL, and can be used instead of, but not together with YQL. Nor can it be used together with the `query` parameter. + +## Structure + + +```json +"select" : { + "where" : {...}, + "grouping" : {...}, + "fields" : [...] +} +``` + +Example query searching for the term 'country' in the field 'title': + +```json +{ + "select": { + "where": { + "contains": ["title", "country"] + } + } +} +``` + +This query can be executed with `curl`: + +```bash +curl -H "Content-Type: application/json" \ + --data "{ 'select': { 'where': { 'contains': ['default', 'country'] } } }" \ + http://localhost:8080/search/ +``` + + +### Where + +Unlike the sql-like syntax in [YQL](/en/reference/querying/grouping-language), the *where* queries are written in a tree syntax. By combining YQLs functions and arguments, queries equivalent with YQL can be written in JSON. + +#### Formal structure + + +Functions are nested like this: + +```json +FUNCTION : { + "children" : [ argument, argument,..], + "attributes" : {annotations} +{ +``` + +or like this, by moving the `children`-key up, if attributes are not in use with the function: + +```json +FUNCTION : [ + argument, + argument, + ... +] +``` + +YQL is a regular language and is parsed into a query tree when parsed in Vespa. That tree can also be built +with the `where` parameter in JSON. + +Let's take a look at this yql: `select * from sources * where default contains foo and rank(a contains "A", b contains "B")`, which will create the following query tree: + +<Frame> +<img src="/assets/img/querytree.svg" width="737px" height="auto" alt="Example query tree" /> +</Frame> + +The tree above can be written with the 'where' parameter, like this: + +```json +{ + "and" : [ + { "contains" : ["default", "foo"] }, + { "rank" : [ + { "contains" : ["a", "A"] }, + { "contains" : ["b", "B"] } + ]} + ] +} +``` +which is equivalent with the YQL. + + + +### Fields + +Available since Vespa `8.680.18`. + +The `fields` parameter restricts which +[summary fields](/en/querying/document-summaries#selecting-summary-fields-in-yql) +are included in each hit. It is a JSON array of field names, and is equivalent +to the field list in a YQL `select` clause. + +YQL: `select id, title from sources * where title contains 'madonna'`. + +Equivalent JSON: + +```json +{ + "select": { + "fields": ["id", "title"], + "where": { + "contains": ["title", "madonna"] + } + } +} +``` + +If `fields` is omitted or empty, all fields of the chosen +[document summary class](/en/querying/document-summaries#selecting-summary-fields-in-yql) are returned. + + +### Grouping + +One or more [grouping statements](/en/querying/grouping) can be set as a JSON array in the `grouping` field. +Each array item is a grouping statement represented as JSON where +- Each grouping function is represented by a JSON object where the name of the function is the field + name and the value is the function content. +- Lists of arguments are represented as JSON arrays. + +Examples: + +Grouping statement: + +```json +| all(group(time.year(a)) each(output(count()) + all(group(time.monthofyear(a)) each(output(count()))) +``` +equivalent JSON `grouping`-argument: + +```json +"grouping" : [ + { + "all" : { + "group" : "time.year(a)", + "each" : { "output" : "count()" }, + "all" : { + "group" : "time.monthofyear(a)", + "each" : { "output" : "count()" }, + } + } + } +] +``` + +Grouping statement: + +```bash +all(group(predefined(foo, bucket[1, 2>, bucket[3, 4>))) +``` +equivalent JSON `grouping`-argument: + +```json +"grouping" : [ + { + "all" : { + "group" : { + "predefined" : [ "foo", { "bucket": [1,2]}, { "bucket": [3,4]} ] + } + } + } +] +``` + +### Other query parameters + +[Query parameters](/en/reference/api/query) not specific to the Select or YQL syntax will work as well. For example, to search for everything (`"where": true`) in the `music` document type: + +```json +{ + "select": { + "where": true + }, + "model": { + "restrict": "judgment" + } +} +``` + +### Complete examples + +Create one bucket for all documents (`"group": "\"all\""`) and output overall price statistics (`"avg(price)"` and `"sum(price)"`): + +```json +{ + "select": { + "where": true, + "grouping": [ + { + "all": { + "group": "\"all\"", + "each": { + "output": [ + "avg(price)", + "sum(price)" + ] + } + } + } + ] + } +} +``` + +A more complex example: +```json +{ + "select" : { + "where" : { + "and" : { + "children" : [ + {"title" : "music"}, + {"default" : "festival"} + ] + } + }, + "grouping" : [ { + "all" : { + "group" : "time.year(a)", + "each" : { "output" : "count()" } + } + } ] + }, + "offset" : 5, + "presentation" : { + "bolding" : false, + "format" : "json" + } +} +``` + +### Examples with the different functions + + +##### CONTAINS +YQL: `where title contains 'a'`. + +Format of this in JSON: + +```json +"where" : { + "contains" : [ "title", "a" ] +} +``` + +##### CONTAINS with text() +YQL: `where title contains ({language:'en'}text('hello world'))`. + +Format of this [text() operator with annotations](/en/reference/querying/yql#text) in JSON: + +```json +"where" : { + "contains" : [ + "title", + { + "text" : { + "query" : "hello world", + "attributes" : { + "language" : "en" + } + } + } + ] +} +``` + +##### Numeric Operators +YQL: `where date >= 10`. + +Format of this in JSON: + +*Introducing the range parameter:* + +```json +"range" : [ + "date", + { ">=" : 10} +] +``` + +The range query accepts the following parameters: + + +| Operator | Description | +| :--- | :--- | +| ≥ | Greater-than or equal to | +| `>` | Greater-than | +| `<` | Less-than | +| ≤ | Less-than or equal to | + +YQL: `where range(field, 0, 500)`. + +Format of this in JSON: + +```json +"where" : { + "range" : [ + "field", + { ">=" : 0, "<=" : 500} + ] +} +``` + + +##### OR +YQL: `where title contains 'a' or title contains 'b'`. + +Format of this in JSON: + +```json +"where" : { + "or" : [ + { "contains" : [ "title", "a" ] }, + { "contains" : [ "title", "b" ] } + ] +} +``` + + +##### AND +YQL: `where title contains 'a' and title contains 'b'`. + +Format of this in JSON: + +```json +"where" : { + "and" : [ + {"contains" : [ "title", "a" ] }, + {"contains" : [ "title", "b" ] } + ] +} +``` + + +##### AND NOT +YQL: `where title contains 'a' and !(title contains 'b')`. + +Format of this in JSON: + +```json +"where" : { + "and_not" : [ + {"contains" : [ "title", "a" ] }, + {"contains" : [ "title", "b" ] } + ] +} +``` + +Formal structure: + +```json +"where" : { + "and_not" : [ + <Statement>, + <!Statement>, + .. + ] +} +``` + + + +##### Regular expressions +YQL: `where title matches "madonna"`. + +Format of this in JSON: + +```json +"where" : { + "matches" : [ + "title", + "madonna" + ] +} +``` +Another example: + +YQL: `where title matches "mado[n]+a"` + +```json +"where" : { + "matches" : [ + "title", + "mado[n]+a" + ] +} +``` + + +##### Phrase + +YQL: `where text contains phrase("st", "louis", "blues")`. + +Format of this in JSON: + +```json +"where" : { + "contains" : [ "text", { "phrase" : ["st", "louis", "blues"] } ] +} +``` + + +##### Near and Ordered Near +YQL: `where description contains ([ {"distance": 100} ]onear("a", "b"))`. + +Format of this in JSON: + +```json +"where" : { + "contains" : [ + "description", + { "onear" : { + "children" : ["a", "b"], + "attributes" : {"distance" : 100} + } + } + ] +} +``` + + +##### Equals +YQL: `where my_number = 42`. + +Format of this in JSON: + +```json +"where" : { + "equals" : { "field": "my_number", "value": 42 } +} +``` + +This can also be expressed using the following shorthand form: + +```json +"where" : { + "equals" : ["my_number", 42] +} +``` + +The `equals` operator supports boolean and integer values. + +To match at a specific element index in an array field, add the `index` parameter. +Only a single index is supported: + +YQL: `where my_numbers[2] = 42`. + +```json +"where" : { + "equals" : { "field": "my_numbers", "index": 2, "value": 42 } +} +``` + + +##### Search within same struct element +YQL: `where persons contains sameElement(first_name contains 'Joe', last_name contains 'Smith', year_of_birth < 1940)`. + +Format of this in JSON: + +```json +"where" : { + "contains" : [ + "persons", + { "sameElement" : [ + {"first_name" : "Joe", + "last_name" : "Smith", + "range" : [ + "year_of_birth", + { "<" : 1940} + ] + } + ] + } + ] +} +``` + + +##### Term Equivalence +YQL: `where fieldName contains equiv("A","B")`. + +Format of this in JSON: + +```json +"where" : { + "contains" : [ + "fieldName", + { "equiv" : ["A", "B"] } + ] +} +``` + + +##### Rank +YQL: `where rank(a contains "A", b contains "B")`. + +Format of this in JSON: + +```json +"where" : { + "rank" : [ + { "contains" : [ "a", "A" ] }, + { "contains" : [ "b", "B" ] } + ] +} +``` + + + +##### Advanced functions + +###### Wand +YQL: `where wand(description, {"a":1, "b":2}`. + +Format of this in JSON: + +```json +"where" : { + "wand" : [ "description", {"a" : 1, "b":2} ] +} +``` + +Another example: + +YQL: `where [ {"scoreThreshold": 13, "totalTargetHits": 7} ]wand(description, {"a":1, "b":2})`. + +Format of this in JSON: + +```json +"where" : { + "wand" : { + "children" : [ "description", {"a" : 1, "b":2} ], + "attributes" : {"scoreThreshold": 13, "totalTargetHits": 7} + } +} +``` + +###### dotProduct +YQL: `where dotProduct(description, {"a":1, "b":2})`. + +Format of this in JSON: + +```json +"where" : { + "dotProduct" : [ "description", {"a" : 1, "b":2} ] +} +``` + +###### weightedSet +YQL: `where weightedSet(description, {"a":1, "b":2})`. + +Format of this in JSON: + +```json +"where" : { + "weightedSet" : [ "description", {"a" : 1, "b":2} ] +} +``` + +###### weakAnd +YQL: `where {scoreThreshold: 41, "totalTargetHits": 7}weakAnd(a contains "A", b contains "B")`. + +Format of this in JSON: + +```json +"where" : { + "weakAnd" : { + "children" : [ { "contains" : ["a", "A"] }, { "contains" : ["b", "B"] } ], + "attributes" : {"scoreThreshold": 41, "totalTargetHits": 7} + } +} +``` + + + +##### Predicate +YQL: `where predicate(predicate_field,{"gender":"Female"},{"age":20L})`. + +Format of this in JSON: + +```js +"where" : { + "predicate" : [ + "predicate_field", + {"gender" : "Female"}, + {"age" : 20L} + ] +} +``` + + + diff --git a/mintlify-docs/en/reference/querying/page-result-format.mdx b/mintlify-docs/en/reference/querying/page-result-format.mdx new file mode 100644 index 0000000000..2796758e05 --- /dev/null +++ b/mintlify-docs/en/reference/querying/page-result-format.mdx @@ -0,0 +1,136 @@ +--- +sidebarTitle: "The page result format" +title: "The 'page' result format" +--- + +This document describes the `page` result format returned by Vespa. This result format is used when [presentation.format](/en/reference/api/query#presentation.format) is set to `page`. This format is usually used with [page templates](/en/reference/querying/page-templates). + +The tags of the format are described below. Subtags will be rendered in the order listed here. The format is _open_, all parsers must ignore attributes and child tags not mentioned here. + +## \<page\> + +The root tag of a page result: The single top-level section of the page. + +| Attribute | Description | Present | +| :--- | :--- | :--- | +| version | The version of this format - currently 1.0. | Always | +| layout | The name of the top-level layout to use for this page. | If specified in the page template used. | + +For regular permissible subtags, refer to [section](#<section>). + +## \<section\> + +A layout "box" in a page. + +| Attribute | Description | Present | +| :--- | :--- | :--- | +| id | The id of this section. | If specified in the page template used. | +| layout | The name of the top-level layout to use for this page. | If specified in the page template used. | +| region | The id of the region in the layout of the parent section where this should be placed. | If specified in the page template used. | + +| Subtag | Description | Present | +| :--- | :--- | :--- | +| [section](#<section>) | A nested section of this page | Zero or more. | +| [renderer](#<renderer>) | The name of the rendering to use for this section. | Zero or more. | +| [source](#<source>) | Used to specify where to fetch the content of this section if it is not sent with this page in a content tag. | One or zero. | +| [content](#<content>) | Contains some "payload" of this page - a set of [hit](#<hit>) instances | One if this section has inlined content, zero otherwise. | + +## \<renderer\> + +The way this section, or some of its content should be rendered. + +| Attribute | Description | Present | +| :--- | :--- | :--- | +| for | The name of the content source which should use this renderer | If this is not present, the renderer should be used for the entire section. | + +| Subtag | Description | Present | +| :--- | :--- | :--- | +| parameter | A parameter to this renderer | Zero or more | + +## \<source\> + +The source to be used to fetch the content of a section, if it is not sent as inline [content](#<content>). + +| Attribute | Description | Present | +| :--- | :--- | :--- | +| url | The url at which the content should be fetched. | Always. | + +| Subtag | Description | Present | +| :--- | :--- | :--- | +| parameter | A parameter to use when fetching this content. | Zero or more | + +## \<content\> + +The content to render in a section. + +| Subtag | Description | Present | +| :--- | :--- | :--- | +| [hit](#<hit>) | A content hit. | Zero or more. | +| [group](#<group>) | A group of content hits. | Zero or more. | + +## \<hit\> + +A single result content item. + +| Attribute | Description | Present | +| :--- | :--- | :--- | +| relevance | The relevance of this item - usually a normalized number between 0 and 1. | Always | +| source | The name of the source producing this hit. | Always | +| type | A space-separated list of type identifiers of this hit. | If a type is set in the hit. | + +Subtags: + +Hits have one subtag for every field they contain, where the field name is the name of the tag and the toString of the field content is the content of the tag. + +## \<group\> + +A [hit](#<hit>) which contains nested hits. Used to organize hits hierarchically. Has the name attributes and subtags as [hit](#<hit>), but may also contain nested [hit](#<hit>) and [group](#<group>) tags. + +## Example + +A page which should be rendered with two columns on top. + +```xml expandable +<page version="1.0"> + + <renderer name="two-column"/> + + <section region="left"> + <source url="http://host:port/resource/[news article id]"/> + <renderer name="articleBodyRenderer"> + <parameter name="color">blue</parameter> + </renderer> + </section> + + <section region="right"> + <renderer name="multi-item-column"> + <parameter name="items">3</parameter> + </renderer> + <section region="1"> + <renderer for="newsImage" name="newsImageRenderer"/> + <renderer for="news" name="articleRenderer"/> + <renderer for="image" name="imageRenderer"/> + <content> + <hit relevance="1.0" source="news"> + <id>news-1</id> + </hit> + <hit relevance="0.5" source="news"> + <id>news-2</id> + </hit> + </content> + </section> + <section region="2"> + <source url="http://host:port/consumption-widget"/> + <renderer name="identityRenderer"/> + </section> + <section region="3"> + <renderer name="htmlRenderer"/> + <content> + <hit relevance="1.0" source="htmlSource"> + <id>htmlSource-1</id> + </hit> + </content> + </section> + </section> +</page> +``` \ No newline at end of file diff --git a/mintlify-docs/en/reference/querying/page-templates.mdx b/mintlify-docs/en/reference/querying/page-templates.mdx new file mode 100644 index 0000000000..7c559a7b32 --- /dev/null +++ b/mintlify-docs/en/reference/querying/page-templates.mdx @@ -0,0 +1,156 @@ +--- +title: "Page templates reference" +sidebarTitle: "Page templates" +--- + +This document is a reference to the elements of the Page Template XML format. Refer to the [Introduction to Page Templates](/en/querying/page-templates). + +A page template describes a particular way or set of ways of organizing data from some sources on a page. It has the following structure: + +<Card> +`<`[page](#page) id="[\[id\]](#id)"> *`<!-- The top-level section of this page -->`* +  \[page-element\]\* +`</page>` +</Card> + +...where each `[page-element]` is one of: + +\<[section](#section)\>[page-element]\*\</section\> + +_A nested section (screen area)_ + +\<[source](#source) name="[source-name]"\> [renderer]\* [parameter]\* \</source\> + +_A data source which should be placed in this section_ + +\<[renderer](#renderer) name="[renderer-name]"\> [parameter]\* \</renderer\> + +_The renderer to use for the source or section containing this_ + +\<[choice](#choice)\> [map] or [page-element]/[alternative]\* \</choice\> + +_A choice between alternative page elements resolved at runtime_ + +\<placeholder id="[id]"/\> + +_an element to be replaced by a map item at runtime_ + +\<[include](#include) idref="[page-id]"\>/\> + +_Include the page elements contained in another page_ + +where the nested elements above are: + +\<parameter name="[name]"\>[value]\</parameter\> + +_A parameter of the owning element. Renderer parameters are sent as-is to the frontend in the result. Source parameters are sent to the source by setting the query parameter`source.[sourceName].[name]`._ + +\<alternative\> [page-element]\* \</alternative\> + +_multiple page elements constituting one choice alternative_ + +\<[map](#map) to="placeholder-id1 placeholder-id2 ... "\> [page-element]/[item]\* \</alternative\> + +_a mapping of some page elements to placeholders_ + +\<[item](#item)\> [page-element]\* \</item\> + +_multiple page elements which should all map to one placeholder_ + +All tags may also include a `description` attribute to document the use of the tag. Tags and attributes are described in detail in the following. + +## id + +An id has the format: + +```bash +id ::= name(:major(.minor(.micro(.qualifier)?)?)?)? +name ::= identifier +major ::= integer +minor ::= integer +micro ::= integer +qualifier ::= identifier +``` + +Any omitted numeric version component missing is taken to mean 0, while a missing qualifier is taken to mean the empty string. + +## page + +The root tag of a page template. Defines a page, is also its root section. Attributes and subtags are the same as for [section](#section), with the exception that the `id` attribute is mandatory for a page. + +## section + +A representation of an area of screen real-estate. At runtime a section will contain content from various sources. The final renderer will render the section with its data items and/or subsections in an area of screen real-estate determined by its containing tag. + +| Attribute | Description | Default | +| :--- | :--- | :--- | +| id | A unique identifier of this section used for referring. | _No id_ | +| layout | An identifier. Permissible values are `row`, `column` and any additional layouts supported by the renderer i of the returned page. | `column` | +| region | An identifier. The permissible values, and whether this is mandatory is determined by the particular layout identifier of the containing section (`row` and `column` does not specify any region identifiers). | _None_ | +| source | A space-separated set of sources permissible within this. This is a shorthand for defining sources as subtags. The total source list of this section consists of both the sources listed here and as subtags. | _All sources are permissible if none are specified._ | +| max | The maximum number of items permissible within this section (including any subsections). Regardless of the blending method used, the most relevant items are kept. | _Unrestricted_ | +| min | The minimum number of items desired within this. | _Unrestricted_ | +| order | The method of ordering to use on the items displayed in this container. This may be any [sorting specification](/en/reference/querying/sorting-language) over the fields of the hits, plus the source name and relevance score, for example `[source]-[relevance] category` to group by source, sort each group primarily by decreasing relevance and secondarily by the "category" field. The `[source]` identifier will sort sources by the order in which they are listed in the template in use. | | + +## source + +A data source whose data should be placed in the containing section. + +| Attribute | Description | Default | +| :--- | :--- | :--- | +| name | The name of this source. | _Mandatory_ | +| url | The url of this source. If this is set, the data of this source is _not_ fetched, but instead the source tag (with url) will appear in the returned page such that the frontend may fetch it. This is provided primarily as a migration path, as such data can not be inspected and processed to optimize the returned page. | _No url: Fetch this configured source from the container._ | + +## renderer + +A renderer to use to render a section of a data item (hit) of a particular type. + +| Attribute | Description | Default | +| :--- | :--- | :--- | +| name | The name of this renderer. | _Mandatory_ | +| for | The name of a hit type or a source this is the renderer for. | _If in a section: This is the renderer for the whole section. +If in a source: This is the default renderer for hits from this source._ | + +## choice + +A choice between multiple alternative (lists of) page elements. A resolver chooses between the possible alternatives for each request at runtime. The `alternative` tag is used to enclose an alternative. If an alternative consists of just one page element tag, the enclosing alternative tag may be skipped. + +| Attribute | Description | Default | +| :--- | :--- | :--- | +| method | the name of the method for making the choice. Must be supported by the optimizer in use. | _Any method_ | + +### Contained tags + +Either: + +| Tag | Description | Default | +| :--- | :--- | :--- | +| [page-element] | An alternative consisting of a single page element. | 0-n | +| alternative | An alternative consisting of multiple page elements. | 0-n | + +or + +| Tag | Description | Default | +| :--- | :--- | :--- | +| [map](#map) | Specify all alternatives as a single mapping function. | 0-1 | + +## map + +Specify all the alternatives of a choice as a mapping function of elements to placeholders. A map is a convenience shorthand of writing many alternatives in the case where a collection of elements should be mapped to a set of placeholders with the constraint that each placeholder should get a unique element. This is useful e.g. in the case where a set of sources are to be mapped to a set of sections. + +| Attribute | Description | Default | +| :--- | :--- | :--- | +| to | A space-separated list of the placeholder id's the map values should be mapped to. There cannot be more placeholder id's than there are values in this map (but fewer is ok). | + +| Contained Tags | Description | Default | +| :--- | :--- | :--- | +| [page-element] | A map item consisting of a single page element to map to a placeholder. | 0-n | +| item | An item containing multiple page elements to be mapped to a single placeholder.| 0-n | + +## include + +Includes the page elements contained directly in the `page` element in the given page template (the page tag itself is not included). Inclusion works exactly as if the `include` tag was literally replaced by the content of the included page. + +| Attribute | Description | Default | +| :--- | :--- | :--- | +| idref | The id specification of the page to include. Portions of the version may be left unspecified to get the latest matching version. | _(Mandatory)_ | \ No newline at end of file diff --git a/mintlify-docs/en/reference/querying/query-profiles.mdx b/mintlify-docs/en/reference/querying/query-profiles.mdx new file mode 100644 index 0000000000..ce038d71a8 --- /dev/null +++ b/mintlify-docs/en/reference/querying/query-profiles.mdx @@ -0,0 +1,179 @@ +--- +title: "Query Profile Reference" +sidebarTitle: "Query profiles" +--- + +This is a reference to the full format of [Query Profile](#query-profiles) and [Query Profile Type](#query-profile-types) configuration files. For an introduction to query profiles, please see [query profiles](/en/querying/query-profiles). + +## Query Profiles + +A query profile defines a named set of search request parameters with values - structure: + +```xml +<query-profile id="[[id]]" [[optional attributes]]> + <description></description>? + <dimensions></dimensions>? + + <field name="[[name]]" [[optional attributes]]> + [[value]] + </field>* + + <query-profile for="[[dimension values]]" [[optional attributes]]> + <field name="[[name]]">[[value]]</field>* + </query-profile>* + +</query-profile> +``` + +where `?` means optional and `*` means repeatable tag. These items are described in the following sections. + +### id + +The id has the format: + +<Card> +id ::= name(:major(.minor(.micro(.qualifier)?)?)?)?<br/> +name ::=_[identifier](#identifiers)_<br/> +major ::=_integer_<br/> +minor ::=_integer_<br/> +micro ::=_integer_<br/> +qualifier ::=_[identifier](#identifiers)_ +</Card> + +Any omitted numeric version component missing is taken to mean 0, while a missing qualifier is taken to mean the empty string. If the name is exactly `default`, this profile will be the default. If there are multiple profiles named `default`, the newest version is the default. + +### Optional `query-profile` attributes + +| Name | Default | Description | +| :--- | :--- | :--- | +| type | _No type checking_ | The id of a query profile type which defines the possible content of this query profile | +| inherits | _No inclusion_ | A space-separated list of id's of the query profiles whose fields should be included in this profile. The fields are included exactly as if they were present in this profile. Order matters: If a field is present in multiple inherited profiles, the first one found in a depth first, left to right search will be used. Fields present in this profile always overrides the same field name in an inherited profile. | + +### Description + +A textual description of the purpose of this. Used for documentation. + +### Query profile `field` + +A field in a query profile defines a key-value pair. + +If the value is a primitive (string, number), then this key value will be available from the Query exactly as if it was submitted with the search request as a parameter (if it is set both ways, the search request takes priority). + +If the value is a reference to another query profile, the key-values of the referenced profile will be available from the Query exactly as if they were submitted with the search request as a parameter, with the key of this value and a dot prepended to each key in the nested profile, i.e `keyNameInReferringProfile.keyNameInReferencedProfile=value`. + +### `field` name + +The name of the field, must be a valid [identifier](#identifiers). + +### Optional `field` attributes + +| Name | Default | Description | +| :--- | :--- | :--- | +| overridable | `true` | `true` or `false`. If this is `true`, this field can be overridden by a parameter of the same name in the search request. If it is `false`, it can not be overridden in the request. This attribute overrides the overridable setting in the field definition for this field (if any). If a non overridable value is attempted assigned a value later, the assignment will _not_ cause an error, but will simply be ignored.| + +### `field` value + +This value of the field, may be either: + +- a primitive, encoded as any (XML escaped) string, or +- a reference to another query profile encoded as \<ref\>[query-profile-id]\</ref\> + +If this field is defined in the query profile type referenced by this query profile, then the value must be the valid value type defined by that query type field definition. + +### Dimensions + +A comma-separated list of dimensions over which variants of this profile may be created as [nested query profiles](#query-profile-nested). The names of the dimensions are the names of request parameters which, when received in the request, will trigger the matching profile variants. + +### Query profile (nested) + +A nested query profile defines variants of values returned from the enclosing query profile, which are returned for variable requests where this variant is the most specific match to the request properties named by the [dimensions](#dimensions) as defined by its [for attribute](#query-profile-nested-for-attribute). No other attributes may be set in nested query profiles. + +### Query profile (nested) `for` attribute + +This attribute defines the values of the [dimensions](#dimensions) of the enclosing profile for which this nested profile defined alternative values, as a comma-separated list. The values are defined in the same order as the dimensions are defined. Dimensions for which this should match any value may be denoted by a "\*". One or more trailing "\*" may be omitted - example: + +<Card> +for="a,b,*,c,*,*" +for="a,b,*,c" // equivalent to the above +</Card> + +## Query Profile Types + +A query profile type defines a set of valid, typed values for a query profile - structure: + +```xml +<query-profile-type id="[[id]]" [[optional attributes]]> + + <description></description> ? + + <match path="true"/> ? + + <strict/> ? + + <field name="[[name]]" type="[[type]]" [[optional attributes]]/> * + +</query-profile-type> +``` + +where `?` means optional tag and `*` means repeatable tag. These items are described in the following sections. + +### Optional `query-profile-type` attributes + +| Name | Default | Description | +| :--- | :--- | :--- | +| inherits | _No inclusion_ | A space-separated list of id's of the query profile types whose field definitions should be included in this profile. The fields are included exactly as if they were present in this profile type. Order matters: If a field definition is present in multiple inherited profiles, the first one found in a depth first, left to right search will be used. A field definition in this type always overrides inherited ones. The same rules apply to other elements than fields. | + +### `match` + +If \<match path="true"\> is added to the query profile type, the name of the profile will be understood as a slash separated path name during matching of a query profile name to an actual profile. If the query profile name is a _path component prefix_ of the query profile name reference, the profile matches the reference. The profile having the most specific match is used as the target of the reference. + +If `match` is not specified in the profile type, exact name matching is used. The syntax is as specified for future extensions. + +The match setting is inherited from supertypes to subtypes. + +### `strict` + +If this element is added to a query profile type, then that profile can only contain values explicitly defined in the profile, whether that value is provided by a query profile, the search request or programmatically. + +It is possible to add strict sub-profiles to a non-strict profile and vice-versa, making it possible to create respectively "structs in maps" and "maps in structs". + +A profile which inherits a strict profile will also be strict, i.e `strict` is inherited. + +Some rules to note when using a top-level profile type which is declared as strict: + + - If the top-level profile is of a strict type, that type should usually inherit the `native` type to allow the built-in parameters to be passed in. This profile type and the subtypes it references are always available - refer to the [Query API reference](/en/reference/api/query). + - Non-primitive model objects are permitted to be added to the query profiles even if the top level profile is strict, but primitives (strings, numbers, booleans) are _not_ permitted but must either be declared in the strict profile type, or wrapped in a proper model object + - Feature specific properties like `select` are not automatically permitted, the parameters of the features which should be exposed must be declared explicitly in a strict top level type. + +### Query profile type `field` + +This defines the name and type of a field of query profiles of this type. + +### `field` type + +This defines the type of this field. The type is one of: + +| Type name | Description | +| :--- | :--- | +| string | Any string | +| integer | A signed 32-bit whole number | +| long | A signed 64-bit whole number | +| float | A signed 32-bit float | +| double | A signed 64-bit float | +| boolean | A boolean value, `true` or `false` | +| [[tensor-type-spec]](/en/reference/ranking/tensor#tensor-type-spec) | A tensor type spec | +| query-profile | A reference to a query profile of any type | +| query-profile:[query-profile-type-id] | A reference to a query profile of the given type | + +### Optional `field` definition attributes + +| Name | Default | Description | +| :--- | :--- | :--- | +| mandatory | `false` | `true` or `false`. If this is `true`, this field _must_ be present in either the query profile of this type or explicitly in the request referencing it | +| overridable | `true` | `true` or `false`. If this is `true`, instances of this field can be overridden by a parameter of the same name in the search request. If it is `false`, it can not be overridden in the request | +| alias | _None_ | One or more space-separated aliases of the field name. Unlike field names, aliases are case-insensitive | +| description | _None_ | A textual description of the purpose of this field. Used for documentation | + +### Identifiers + +An identifier is a string matches the pattern `[a-zA-Z_/][a-zA-Z0-9_/]*`. diff --git a/mintlify-docs/en/reference/querying/semantic-rules.mdx b/mintlify-docs/en/reference/querying/semantic-rules.mdx new file mode 100644 index 0000000000..44a273b478 --- /dev/null +++ b/mintlify-docs/en/reference/querying/semantic-rules.mdx @@ -0,0 +1,172 @@ +--- +title: "Semantic Rule Language Reference" +sidebarTitle: "Semantic rules" +--- + + +This is the reference for the semantic rule language in Vespa. For a guide on using this language, see [query rewriting](/en/linguistics/query-rewriting). Refer to the [Query API](/en/reference/api/query#semantic-rules) for how to use in queries. + +## Rule bases + +Semantic rules are collected in files called _rule bases_. The name of these files are _[rule-base-name].sr_. They must be placed in _[application-package]/rules/_ to be deployed. + +## Basic syntax + +A rule base may contain any number of the following four constructs, explained in the rest of this document: + +- [directives](#directives) +- [production rules](#production-rules) +- [named conditions](#named-conditions) +- comments, starting by # and ending by newline. + +Production rules and named conditions are _statements_. Statements may span multiple lines and are terminated by `;`. + +## Directives + +A directive is a "meta-level" statement which is not used during rule evaluation, but tells the rule engine how to use the rule base. A statement starts by `@` and ends by newline. They may take parameters. These directives exist: + +| Statement | Usage | Location | +| --- | --- | --- | +| @default | Make this rule base the default, to be used with all queries | Anywhere outside other statements | +| @automata(\<automata-filename\>) | Use an automata file with this base | Anywhere outside other statements | +| @include(\<rulebase-name\>) | Include all the statements of another rule base in this | Anywhere outside other statements | +| @super | Include the conditions of the same-named conditions from the included rule base | In a condition | +| @stemming(\<true|false\>) | Whether terms should match after stemming or exactly (true by default) | Before any rule | +| @language(\<[language-code](https://en.wikipedia.org/wiki/ISO_639-1)\>) | The language of the rule base, which should also be the query language. Influences stemming. | Before any rule | + +## Production Rules + +A production rule is of the form: + +<Card> +[\<condition\>](#conditions)\<operator\>[\<production-list\>](#production-list); +</Card> + +This performs the production as defined by the operator if the condition matches. There are two kinds of production rules (and two operators), replacing and adding: + +| Rule kind | Operator | Meaning | +| :--- | :--- | :--- | +| Replacing | -\> | _Replace_ the matched terms by the production | +| Adding | +\> | _Add_ the production to the matched terms | + +## Namespaces + +A namespace is a collection of facts which can be read from conditions and changed by productions. Namespaces may be positional (sequences), or not. A positional namespace will track the current fact and match and insert at the current position, while non-positional namespaces will match any fact against any condition. + +There is a default namespace which does not need an explicit reference. For query rules, the default namespace is the query terms. + +To determine the namespace used to read from conditions or change in productions, use: + +<Card> +\<namespace>.[\<condition\>](#conditions)<br/> +\<namespace>.[\<production\>](#production) +</Card> + +There are two namespaces defined during query processing: + +| Namespace | Syntax | Positional | Description | +| :--- | :--- | :--- | :--- | +| Query | | Yes | The default namespace. References the terms of the query. The condition value returned will be the term itself. | +| Parameter | `parameter.` | No | References the parameter of the query. Conditions will be true if the parameter is set in the query. The value returned from conditions is the value of the parameter. Productions will need both a key and value specified to set a parameter value. | + +## Named Conditions + +A named condition is on the form: + +<Card> +[condition-name] :-[\<condition\>](#conditions); +</Card> + +This simply assigns a name to the condition on the right, so it can be referred to the conditions in rules and other named conditions. + +## Conditions + +A condition is an expression which evaluates to true or false over the _facts_ of a [namespace](#namespaces). If the namespace is _positional_ (a _sequence_), evaluation starts at the _current position_ in the namespace. When evaluated true, conditions will also return a value which can be referenced by comparison conditions. + +Conditions may be preceded by a reference name and a label: + +<Card> +([\<reference-name\>](#reference-name)/)?([\<label\>](#condition-label):)?[\<condition\>](#condition) +</Card> + +### Reference Name + +The reference name allows an explicit name to be set, from which the terms matched by the condition can be referred in a condition. This is useful when multiple conditions of the same type are used in the condition of the same rule. + +If no reference name is given, the text standing between the square brackets of the condition is used as reference name. + +### Label + +If a label is specified, the condition will only match terms having that label (the label is the index in query terms). If a label is not set, the term will match if a label is not set, or if it is `default`. + +### Condition + +These are the supported kinds of conditions: + +| Condition | Syntax | Meaning | Returned value | +| :--- | :--- | :--- | :--- | +| Term | \<term\> | True if this is the term at the current position | Determined by the [namespace](#namespaces) | +| Reference (produce the matched term(s)) | [\<condition-name\>] | Evaluate a named condition | The matched term(s) of the condition | +| Reference (produce all terms in the condition) | [\<condition-name\>\*] | Evaluate a named condition | All the terms in the condition | +| Sequence | \<condition\> \<condition\> | Match both conditions by consecutive terms in the right order in the sequence | The last nested condition value | +| Choice | \<condition\>, \<condition\> | Match any one of the conditions, each one tried at the current position | The last nested condition value | +| Group | (\<condition\>) | Evaluate the condition inside the grouping as a unit | The last nested condition value | +| Ellipsis | … | Matches any sequence to make the overall condition match | The matched sequence | +| Referable ellipsis | […] | An ellipsis where the matched sequence can be referenced from the production | The matched sequence | +| Not | !\<condition\> | Matches if the condition does not match | Nothing | +| And | \<condition\> & \<condition\> | Matches if all the conditions matches at the (same) current position | The last nested condition value | +| Comparison | \<condition\> [\<operator\>](#operator) \<condition\> | True if the comparison is true for the values returned from the conditions | The last nested condition value | +| Literal | '\<literal\>' | Returns a value for comparison. This always evaluates to true. | The literal value | +| Start anchor | . \<condition\> | Matches condition only if it matches the query from the start | The matched sequence | +| End anchor | \<condition\> . | Matches condition only if it matches the query to the end | The matched sequence | + +### Comparison Condition Operators + +The possible operators of a comparison condition are: + +| Operator | Meaning | +| :--- | :--- | +| = | Left and right values are equal | +| \<= | Left value is smaller or equal | +| \>= | Left value is larger or equal | +| \< | Left value is smaller | +| \> | Left value is larger | +| =~ | Left value contains right value as a substring | + +## Production List + +A production list consists of a space-separated list of _productions_ which are carried out when the production of a rule is matched. A production can be preceded by the type of term to produce, a label (index in queries), and followed by the weight (importance) of the produced value: + +<Card> +([\<term-type\>](#term-type))?([\<label\>](#production-label):)?[\<production\>](#production)(! [\<weight\>](#weight))? +</Card> + +### Term Type + +The default term type is the term type of the context which the term is added to. The possible explicit term types are: + +| Syntax | Meaning | +| :--- | :--- | +| ? | Insert as an OR term | +| = | Insert as an EQUIV term | +| + | Insert as an AND term | +| $ | Insert as a RANK term | +| - | Insert as a NOT term | + +### Label + +If included, the label decides the label the produced term(s) will have in the namespace. This is the index in the query namespace. + +### Production + +There are three types of productions: + +| Production | Syntax | Meaning | +| :--- | :--- | :--- | +| Literal term | \<term\> | Produce this term literally | +| Literal term with value | \<term\>='\<value\>' | Produce this term and value literally. | +| Reference | [\<condition-reference\>] | Produce the terms matched by the referenced condition. The reference name is either the name of a named condition used in the condition, an ellipsis - `...` - or an explicit condition reference name. | + +### Weight + +The weight is a percentage integer denoting the importance of the produced term. The default is 100. In the query namespace the weight becomes the term weight, determining the relevance contribution of the term. \ No newline at end of file diff --git a/mintlify-docs/en/reference/querying/simple-query-language.mdx b/mintlify-docs/en/reference/querying/simple-query-language.mdx new file mode 100644 index 0000000000..bc3c7427f1 --- /dev/null +++ b/mintlify-docs/en/reference/querying/simple-query-language.mdx @@ -0,0 +1,148 @@ +--- +title: "Simple Query Language Reference" +--- + +The _simple query language_ allows application end users to issue more complex queries than a list of tokens. It is a heuristic, non-structured language, which attempts to do something probably-useful with any input given. It is combined with the structured [YQL](/en/reference/querying/grouping-language) by using the [userQuery](/en/reference/querying/grouping-language#userquery) operator. + +### Simple Query Syntax + +```bash expandable +Query ::= Expr ( SPACE Expr )* +Expr ::= Term | Prefix? '(' SimpleTerm+ ')' +Term ::= Prefix? Field? CoreTerm Weight? +SimpleTerm ::= Field? CoreTerm Weight? +Prefix ::= '+' | '-' +Field ::= ID ':' /* A valid field name or alias */ +Weight ::= '!'+ | '!' NUM /* NUM is a percentage. */ +CoreTerm ::= WORD | Phrase | NumTerm | PrefixTerm | SubstringTerm | SuffixTerm | SameElement +Phrase ::= '"' WORD+ '"' +NumTerm ::= NUM | '<' NUM | '>' NUM | '[' NUM? ';' NUM? ';' HITLIMIT? ']' + /* NUM is any numeric type including floating point */ + /* HITLIMIT is a optional count of many hits you want as minimum from this range */ +PrefixTerm ::= WORD '*' +SubstringTerm ::= '*' WORD '*' +SuffixTerm ::= '*' WORD +SameElement ::= '{' Field CoreTerm ( SPACE Field CoreTerm )* '}' +``` + +### Prefix searching + +Prefix matching is only available for attributes. A prefix search term (e.g. 'car\*') behaves like a pattern match on the given field: Documents that have a field value beginning with the given prefix are matched and returned (or not returned if the '-' syntax is used). A prefix search term does not add or change the ranking of the documents in the result set. + +### Term weight + +The weight is either one or more ! characters, or a ! followed by an integer. The integer is a fixed point scaling number with decimal factor 100, i.e. it can be regarded as a percentage. When using repeated ! characters, the weight is increased with 50 (from a default value of 100) for each !. A weight expression may also be applied to a phrase. + +A term weight is used to modify the relative importance of the terms in your query. The term score is only one part of the overall rank calculation, but by adding weight to the most important terms, you can ensure that they contribute more. For more details on rank calculation, see [Ranking guide](/en/basics/ranking). + +### Numerical terms + +`[x;y]` matches any number between _x_ and _y_, including the endpoints _x_ and _y_. Note that `>number` is the same as `[number+1;]` and `<number` is the same as `[;number-1]`. + +A few examples using numerical terms: + +```bash +perl size:<100 +``` +This query will get all documents with the word “perl” and with size less than 100Kb. +```bash +chess kasparov -karpov date:[19990101;19991231] +``` + +This query will get all documents last modified in 1999 containing _chess_ and _kasparov_, but not _karpov_. + +#### Advanced range search + +In order to quickly fetch the best documents given a simple range you can do that efficiently using capped range search. For it to be efficient it requires that you use [fast-search](/en/reference/schemas/schemas#attribute) on the attribute used for range search. + +It is fast because it will start only scan enough terms in the dictionary to satisfy the number of documents requested. A positive number will start from the left of your range and work its way right. A negative number will start from right and go left. + +```bash +date:[0;21000101;10] +``` +Will give you the at least 10 first documents since the birth of Jesus. +```bash +date:[0;21000101;-10] +``` + +Will give you the at least 10 last documents since the birth of Jesus. + +### Grouping in the simple query language + +There is only one level of parentheses supported; any use of additional parentheses within the expression will be ignored. In addition, note that the terms within should not be prefixed with + or -. + +When the parentheses are prefixed by a + (can be excluded for `all` type, because expressions are + by default), the search requires that at least one of the included terms is present in the document. This effectively gives you a way of having alternative terms expressing the same intent, while requiring that the concept is covered in the document. + +When the parentheses are prefixed by a -, the search excludes all documents that include all the terms, but allows documents that only use some of the terms in the expression. It is a bit more difficult to find good use for this syntax; it could for instance be used to remove documents that compare two different products, while still allowing documents only discussing one of them. + +## Search in URLs + +Create a URL-field in the index by creating a field of type [uri](/en/reference/schemas/schemas#uri) - refer to this for how to build queries. The indexer will report an ERROR in the log for invalid URLs. Notes: + +- Note however that finding documents matching a full URL does not behave like exact matching in i.e. string fields, but more like substring matching. A search for `myurlfield:http://www.mydomain.com/` will match documents where _myurlfield_ is set to both _http://www.mydomain.com/_, _http://www.mydomain.com/test_, and _http://redirect.com/?goto=http://www.mydomain.com/_ +- Hostname searches have an anchoring mechanism to limit which URLs match. By default, queries are anchored in the end, which means that a search for `mydomain.com` will match `www.mydomain.com`, but not `mydomain.com.au`. Adding a ^ (caret) to the start will turn on anchoring at the start, meaning that the query will only return exact matches. Adding a `*` at the end will turn off anchoring at the end. The query `^mydomain.com*` will match `mydomain.com.au`, but not `www.mydomain.com`. + +## Field Path Syntax + +Streaming search supports the [field path](/en/reference/schemas/document-field-path) syntax of the [document selection language](/en/reference/writing/document-selector-language) when searching structs and maps. Special for the map type is the ability to select a subset of map entries to search using the `mymap{"foo"} ` syntax. + +See the [field path](/en/reference/schemas/document-field-path) documentation for use-cases of the map data type. + +In the result output, a map is represented in the same way as in the Document XML: + +```xml +<field name="mymap"> + <item><key>foo</key><value>bar</value></item> + <item><key>fuz</key><value>baz</value></item> +</field> +``` + +## Removing syntax characters from queries + +It will sometimes be more robust to remove characters which are used in the query syntax from a user's search terms. An example could be URLs containing parentheses. Comma ("," ASCII 0x2C) may be used as a safe replacement character in these cases. + +```yaml +(x url:http://site.com/a)b) y +``` + +The URL `http://site.com/a)b` in this example could be quoted as following: + +```yaml +(x url:http://site.com/a,b) y +``` + +## Examples + +The _simple_ query language syntax accepts any input string and makes the most of it. A basic query consists of words separated by spaces (encoded as %20). In addition, + +- A phrase can be searched by enclosing it in quotes, like `"match exactly this"` +- Phrases and words may be preceded by -, meaning documents _must not_ contain this +- Phrases and words may be preceded by +, meaning documents _must_ contain this, currently only in use for subtype `any` +- Groups of words and phrases may be grouped using parenthesis, like `-(do not match if all of these words matches)` +- Each word or phrase may be preceded by an index or attribute name and a colon, like `indexname:word`, to match in that index. If the index name is omitted the index named _default_ is searched. + +Any _noise_ (characters not in indexes or attributes, and with no query language meaning) is ignored, all query strings are valid. The exception is queries which have no meaningful interpretation. An example is `-a`, which one would expect to return all documents _not_ containing _a_. Vespa, however, will return the error message _Null query_. All the following examples are of type _all_. + +Get all documents with the term _word_, having _microsoft_ but not _bug_ in the title: + +```bash +word title:microsoft -title:bug +``` +Search for all documents having the phrase "_to be or not to be_", but excluding those having _shakespeare_ in the title: +```bash +"to be or not to be" -title:shakespeare +``` +Get all documents with the word _Christmas_ in the title that were last modified Christmas Day 2009: +```bash +title:Christmas date:20091225 +``` +Get documents on US Foreign politics, excluding those matching both rival presidential candidates: +```bash +"us foreign politics" -(clinton trump) +``` + +Get documents on US Foreign politics, including only those matching at least one of the rival presidential candidates: + +```bash +"us foreign politics" (clinton trump) +``` \ No newline at end of file diff --git a/mintlify-docs/en/reference/querying/sorting-language.mdx b/mintlify-docs/en/reference/querying/sorting-language.mdx new file mode 100644 index 0000000000..a0fd3fe823 --- /dev/null +++ b/mintlify-docs/en/reference/querying/sorting-language.mdx @@ -0,0 +1,153 @@ +--- +title: "Sorting Language Reference" +sidebarTitle: "Sorting" +--- + +A sorting specification in a query consists of one or more sorting expressions. Each sorting expression is an optional sort order followed by an attribute name or a function over an attribute. Multiple expressions are separated by a single SPACE character. + +Using more than one sort expression will give you multilevel sorting. In this case, the most significant expression is the first, while subsequent expressions detail sorting within groups of equal values for the previous expression. + +<Card> +Sorting ::= SortExpr ( ' ' SortExpr )\* <br/> +SortExpr ::= \[SortOrder\] ( SortObject | MissingExpr ) <br/> +MissingExpr ::= MISSING '(' SortObject ',' MissingPolicy ')' <br/> +MissingPolicy ::= FIRST | <br/> + LAST | <br/> + AS ',' MissingValue <br/> +SortObject ::= SortAttribute | Function <br/> +Function ::= LOWERCASE '(' SortAttribute ')' | <br/> + RAW '(' SortAttribute ')' | <br/> + UCA '(' SortAttribute \[ ',' Locale \[ ',' Strength\] \] ')' <br/> +MISSING ::= 'missing' <br/> +FIRST ::= 'first' <br/> +LAST ::= 'last' <br/> +AS ::= 'as' <br/> +LOWERCASE ::= 'lowercase'<br/> +UCA ::= 'uca'<br/> +RAW ::= 'raw'<br/> +Locale ::= An identifier following [unicode locale identifiers,](https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers) fx 'en\_US'.<br/> +Strength :: 'PRIMARY' | 'SECONDARY' | 'TERTIARY' | 'QUATERNARY' | 'IDENTICAL'<br/> +SortOrder ::= '+' | '-' <br/> +SortAttribute ::= ID | ID.key | ID.value /\* A valid attribute name, with key/value appended for maps \*/ <br/> +MissingValue ::= QuotedString | Token <br/> +Token ::= TokenChar\* <br/> +TokenChar ::= any non Delimiter <br/> +QuotedString ::= '"' ( StringChar | '\\' QuotedChar )\* '"' <br/> +StringChar ::= any non QuotedChar <br/> +Delimiter ::= ' ' | ',' | '(' | ')' | QuotedChar <br/> +QuotedChar ::= '\\' | '"' <br/> +</Card> + +See [Geo search](/en/querying/geo-search) for sorting by distance. Refer to [YQL Vespa reference](/en/reference/querying/yql#order-by) for how to set sorting parameters in YQL. + +## Sort order + +`+` denotes ascending sorting order, while `-` gives descending order. Ascending order is defined as lowest values first for numerical attributes. Strings are sorted according to the sort function chosen. Descending order is the reverse of ascending order. + +<Info> +Note: `+` in query URLs must be encoded as %2B - for consistency, `-` can be encoded as %2D. +</Info> + +### Default sort order + +If `+`/`-` is omitted, the default is used, either the system-wide default of `+` or any override in [schema](/en/reference/schemas/schemas#sorting). Default sort order is `+` or ascending, except for `[rank]` or the special builtin `[relevance]`, which has `-` or descending. + +## Sort attribute + +The sorting attribute in a sort expression is the name of an attribute in the index. Attribute names will often be the same as field names. In the schema, an attribute is indicated by the indexing language fragment for a field having an [attribute](/en/reference/schemas/schemas#attribute) statement. + +When sorting on attributes, it is recommended to use the built-in *unranked* rank-profile. This allows the search kernel to execute the query significantly faster than execution the ranking framework for many hits and then finally ignore this score and sort by the specified sorting specification. + +### Multivalue sort attribute + +When sorting on a multivalue attribute ([map](/en/reference/schemas/schemas#map), [array](/en/reference/schemas/schemas#array), or [weightedset](/en/reference/schemas/schemas#weightedset)) one of the values for the document is selected to be used for sorting. Ascending sort order uses the lowest value while descending sort order uses the highest value. See the [missing policies](#missing) section for how a document without values in the attribute is treated. + +## Sort function + +Refer to [function](/en/reference/querying/yql#function). + +## Special sorting attributes + +Three special attributes are available for sorting in addition to the index specific attributes: + +| Attribute | Description | +| :--- | :--- | +| --- | --- | +| **\[relevance\]** | The document's relevance score for this query. This is the same as the default ordering when no sort specification is given (\[rank\] is a legacy alias for the same thing). | +| **\[source\]** | The document's source name. This is only relevant when querying multiple sources. | +| **\[docid\]** | The document's identification in the search backend. This will typically give you the documents in indexing order. **Keep in mind that this id is unique only to the backend node**. The same document might have different id on a different node. The same way a different document might have the same id on another node. This is just intended as a cheap way of getting an almost stable sort order. | + +These special attributes are most useful as secondary sort expressions in a multilevel sort. This will allow you to sort groups of equal values for the primary expression in either relevancy or indexing order. Without this additional sort expression, the order within each equal group is not deterministic. + +<Danger> +**Important:** In [YQL, using order by](/en/reference/querying/yql#order-by), the special sorting attributes must be enclosed in quotes. +</Danger> + +## Missing policies + +A document might not have a value in the attribute. One of the following missing policies will then be applied: + +| Policy | Example | Description | +| :--- | :--- | :--- | +| default | `+attr` | If the sort order is ascending and the attribute is single-valued then the document is sorted before any documents with values in the attribute. If the attribute is multi-valued or the sort order is descending then the document is sorted after any documents with values in the attribute. | +| first | `+missing(attr,first)` | The document is sorted before any documents with values in the attribute. | +| last | `+missing(attr,last)` | The document is sorted after any documents with values in the attribute. | +| as | `+missing(attr,as,42)` | The document is sorted as if it had the missing value specified in the [sorting specification](#sortspec). If the missing value cannot be converted to the attribute data type then an error is reported (query is aborted for indexed search, parts of the sort spec is ignored for streaming search). | + +Note that missing policies can be combined with other functions ,e.g. `+missing(lowercase(attr),as,"nothing here")`. + +## Limitations + +### Attribute only + +It is only possible to sort on [attributes](/en/content/attributes). Trying to sort on an [index or summary field](/en/basics/schemas#document-fields), without an associated attribute, will not work. + +Also note that [match-phase](/en/reference/schemas/schemas#match-phase) is enabled when sorting. + +### Optimization causing incorrect total hit count + +When sorting on a single-value numeric attribute with [fast-search](/en/content/attributes#fast-search) Vespa will by default activate an optimization which makes delivering sorted results much faster, but with inaccurate total-hit count. To disable this optimization, set the query parameter `sorting.degrading` to false (in the request or a [query profile](/en/querying/query-profiles)). See the [reference](/en/reference/api/query#sorting.degrading) for details. + +## Examples + +Sort by surname in ascending order: + +```bash ++surname +``` + +Sort by surname in ascending order after lowercasing the surname: + +```bash ++lowercase(surname) +``` + +Sort by surname in ascending English US locale collation order. + +```bash ++uca(surname,en_US) +``` + +Sort by surname in ascending Norwegian 'Bokmål' locale collation order: + +```bash ++uca(surname,nb_NO) +``` + +Sort by surname in ascending Norwegian 'Bokmål' locale collation order, but more attributes of a character are used to distinguish. Now it is case-sensitive and 'aa' and 'å' are different: + +```bash ++uca(surname,nb_NO,TERTIARY) +``` + +Sort by surname, with the youngest ones first when the names are equal: + +```bash ++surname -yearofbirth +``` + +Sort in ascending order yearofbirth groups, and sort by relevancy within each group of equal values with the highest relevance first: + +```bash ++yearofbirth -[relevance] +``` \ No newline at end of file diff --git a/mintlify-docs/en/reference/querying/yql.mdx b/mintlify-docs/en/reference/querying/yql.mdx new file mode 100644 index 0000000000..d8c46d929c --- /dev/null +++ b/mintlify-docs/en/reference/querying/yql.mdx @@ -0,0 +1,295 @@ +--- +title: "YQL Query Language Reference" +sidebarTitle: "The YQL query language" +--- + +Vespa accepts unstructured human input and structured queries for application logic separately, then combines them into a single data structure for executing. Human input is parsed heuristically, while application queries are formulated in YQL. + +<Info> + **Note:** See the [Query Language Guide](/en/querying/query-language) for query examples +</Info> + +A query URL looks like: + +```bash +http://myhost.mydomain.com:8080/search/?yql=select%20%2A%20from%20sources%20%2A%20where%20text%20contains%20%22blues%22 +``` + +In other words, `yql` contains: + +```bash +select * from sources * where text contains "blues" +``` + +This [matches](/en/reference/schemas/schemas#match) all documents where the field named _text_ contains the word _blues_. + +Quote `"` and backslash `\` characters in text values must be escaped by a backslash, also see [how does backslash escapes work](/en/learn/faq#how-does-backslash-escapes-work). + +<Danger> + **Important:** There is no way to query for a field that is not set / equals `null` or `NaN`. Work around using a "magic" value (like MAXINT) that is not normally used in the documents. +</Danger> + +## select + +_select_ is the list of [summary fields](/en/querying/document-summaries#selecting-summary-fields-in-yql) requested (a field with the `summary` index attribute). Vespa will hide other fields in the matching documents. + +```bash +select price,isbn from sources * where title contains "madonna" +``` + +The above explicitly requests the fields "price" and "isbn" (from all sources). To request all fields, use an asterisk as field selection: + +```sql +select * from sources * where title contains "madonna" +``` + +## from sources + +_from sources_ specifies which content [sources](/en/reference/api/query#model.sources) to query. Example: + +```sql +select * from music where title contains "madonna" +``` + +queries all document types in the _music_ content cluster or federation source. Query in: + +| | | +|:-----------|:-----------| +| all sources | `select ... from sources * where ...` | +| a set of sources | `select ... from sources source1, source2 where ...` | +| a single source | `select ... from source1 where ...` | + +In other words, _sources_ is used for querying some/all sources. If only a single source is queried, the _sources_ keyword is dropped. To restrict the query to only one schema (aka document type) use the [model.restrict](/en/reference/api/query#model.restrict) URL parameter. Also see [federation](/en/querying/federation). + +## where + +The `where` clause is a tree of operators: + +| | | | | | | | | | | | | | | +| --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | --- | +| numeric | The following numeric operators are available: `= < > <= >= range(field, lower bound, upper bound)`. where 500 >= price where range(fieldname, 0, 5000000000L) Numbers must be in the signed 32-bit range. Input 64-bit signed numbers using `L` as suffix. For the `range` operator, one can also use the strings `Infinity` or `-Infinity`: where (range(year, 2000, Infinity)) \| Annotation \| Effect \| \| --- \| --- \| \| [bounds](#bounds) \| Range: open or closed interval. \| \| [hitLimit](#hitlimit) \| Used for *capped range search*. The `range()` query operator with `hitLimit` can be used to efficiently implement top-k selection for ranking a subset of the documents in the index. See [example and use cases](/en/performance/practical-search-performance-guide#advanced-range-search-with-hitlimit). \| The [weightedset](/en/reference/schemas/schemas#weightedset) field does not support filtering on weight. Solve this using the [map](/en/reference/schemas/schemas#map) type and [sameElement](#sameelement) query operator - see [example](/en/querying/query-language#map). | +| boolean | The boolean operator is: `=` where alive = true | +| contains | The right-hand side argument of the contains operator is either a string literal, or a function, like `phrase`. `contains` is the basic building block for text matching. The kind of [matching](/en/reference/schemas/schemas#match) to be done depends on the field settings in the schema. where title contains "madonna" \| Annotation \| Effect \| \| --- \| --- \| \| [stem](#stem) \| By default, the string literal is [tokenized](/en/linguistics/linguistics-opennlp#tokenization) to match the field(s) searched. Explicitly control tokenization by using [stem](#stem):where title contains <CodeBlock>``` ({stem: false}"madonna")```</CodeBlock> \| The matched field must be an [indexed field or attribute](/en/basics/schemas#document-fields). Fields inside structs are referenced using dot notation - e.g `mystruct.mystructfield`. | +| and | `and` accepts other `and` statements, `or` statements, [userQuery](#userquery), logically inverted statements - and contains statements as arguments: where title contains "madonna" and title contains "saint" | +| or | `or` accepts other `or` statements, `and` statements, [userQuery](#userquery) - and contains statements as arguments: where title contains "madonna" or title contains "saint" | +| not | Use the `!` operator to match document that does *not* satisfy some condition: where title contains "madonna" and !(title contains "saint") | +| phrase | Phrases are expressed as a function: where text contains phrase("st", "louis", "blues") | +| near | `near()` matches if all argument terms occur within the specified distance, in any order. Negative terms (prefixed with `!`) exclude matches where those terms appear within the exclusion distance. where field contains near("a", "b", "c") where field contains ```({distance: 5}near("web", "search"))``` where field contains near("sql", "database", !"nosql") \| Annotation \| Default \| Description \| \| --- \| --- \| --- \| \| [distance](#distance) \| 2 \| Maximum position difference for terms to match. \| \| exclusionDistance \| (distance+1)/2 \| Exclusion zone size around negative terms. \| Negative terms must come after all positive terms. For multi-value fields, setting [element-gap](/en/reference/schemas/schemas#rank-element-gap) for the field in the rank profile enables distance calculation between adjacent elements. Features below `near()` and `onear()` are filtered based on the spans for the operator match before they are exposed to the ranking features. Given the query text contains ```({distance:1}near("a","b"))```and two documents. The text field in the first document is `"a a a a a a b b b b b b"`. The spans for the near match are `[[5,6]]`. Only the last occurrence of `"a"` and the first occurrence of `"b"` are kept. The text field in the second document is `"a b c a b c a b c a b c"`. The spans for the near match are `[[0,1],[3,4],[6,7],[9,10]]`. All occurrences of `"a"` and `"b"` are kept. | +| onear | `onear()` (ordered near) is like `near()`, but requires terms to appear in the same order as specified in the query. With distance set to (number of terms - 1), `onear()` is equivalent to `phrase()`. where field contains onear("web", "search", "engine") where field contains ```({distance: 5}onear("neural", "network"))``` where field contains onear("java", "tutorial", !"script") \| Annotation \| Default \| Description \| \| --- \| --- \| --- \| \| [distance](#distance) \| 2 \| Maximum position difference for terms to match. \| \| exclusionDistance \| (distance+1)/2 \| Exclusion zone size around negative terms. \| Negative terms must come after all positive terms. For multi-value fields, setting [element-gap](/en/reference/schemas/schemas#rank-element-gap) for the field in the rank profile enables distance calculation between adjacent elements. | +| sameElement | The `sameElement()` operator lets you denote conditions that must match within the *same* element in multivalue fields containing structs or strings. By default, sameElement uses `AND` to combine the conditions: *All* the conditions must match in the same element to produce a match. For example, given this **struct**: struct person ```{ field first_name type string {} field last\_name type string {} field year\_of\_birth type int {} } field persons type array\<person> { indexing: summary struct-field first\_name { indexing: attribute } struct-field last\_name { indexing: attribute } struct-field year\_of\_birth { indexing: attribute } }``` We can use this query: where persons contains sameElement(first\_name contains 'Joe', last\_name contains 'Smith', year\_of\_birth \< 1940) to return all documents containing a Joe Smith born before 1940 in the `persons` array. Searching a **map** is done by treating it as an array of a struct with the field members `key` and `value`. For example, given this map: ```field identities type map\<string, person> { indexing: summary struct-field key { indexing: attribute } struct-field value.first_name { indexing: attribute } struct-field value.last_name { indexing: attribute } struct-field value.year_of_birth { indexing: attribute } }``` We can use this query: where identities contains sameElement(key contains 'father', value.first\_name contains 'Joe', value.last\_name contains 'Smith', value.year\_of\_birth < 1940) to return all documents that have a Joe Smith born before 1940 keyed as a 'father'. `sameElement()` may also be used to search **array of string** fields. Supported query operators inside sameElement() are `and`, `equiv`, `near`, `onear`, `or`, `rank` and `phrase`. `and` can be used with `!`. For example given this field: field chunks type ```array\<string> \{ indexing: index | summary } ``` We can use these queries: where chunks contains sameElement("one" and "two") where chunks contains sameElement("one" and equiv("two","three")) where chunks contains ```sameElement("one" and (\{distance: 5}near("two","three",!"four"))) ```where chunks contains sameElement("one" and phrase("two","three")) where chunks contains sameElement("one" and !"two") where chunks contains sameElement("one" or "two") where chunks contains sameElement(rank("one" and "two", "three")) Features inside sameElement() for indexed fields are filtered based on the matching elements, e.g. [elementwise(bm25(descriptions),x,double)](../ranking//en/reference/ranking/rank-features#elementwise-bm25) will only contain tensor cells based on the matching elements. Use the [`elementFilter`](#elementfilter) annotation to restrict matching to specific element indices. For example, given a field `my_numbers type array<int>`: where```bash my_numbers contains (\{elementFilter:\[2\]}sameElement("42"))``` This only matches the element at index 2 in the array field. Multiple indices can be given: `{elementFilter:[0, 2, 5]}`. A shorthand form is also available: where my\_numbers\[2\] = 42 The shorthand form only supports a single index. Use the [`elementFilter`](#elementfilter) annotation to match multiple indices. | +| equiv | For cases where two terms in the same field should produce exactly the same behavior when matched, the `equiv()` operator can be used. This behaves like a special case of `or`. where fieldName contains equiv("A","B") The matching logic of equiv is the same as OR, and an OR does not have the limitations that EQUIV does (below). The difference is in how matches are visible to ranking functions. All words that are children of an OR count for ranking, while with EQUIV, they look like a single word to ranking: - Counts as only +1 for queryTermCount - Counts as 1 word for completeness measures - Proximity will not discriminate different words inside the EQUIV - Connectivity can be set between the entire EQUIV and the word before and after - Items inside the EQUIV are not directly visible to ranking features, so weight and connectivity on those will have no effect Limitations on how `equiv` can be used in a query: - `equiv` may not appear inside a phrase - It may only contain `TermItem` and `PhraseItem` instances. Operators like `and` cannot be placed inside `equiv` - `PhraseItems` inside `equiv` will rank like as if they have size 1 Learn how to use [equiv](/en/linguistics/query-rewriting#equiv). | +| uri | Used to search for urls indexed using the [uri field type](/en/reference/schemas/schemas#uri). where myUrlField contains uri("vespa.ai/foo") Various subfields are supported to search components of the URL, see the field type definition. \| Annotation \| Effect \| \| --- \| --- \| \| [startAnchor](#startanchor) \| Anchor uri.hostname at start. \| \| [endAnchor](#endanchor) \| Anchor uri.hostname at end. \| | +| fuzzy | [Levenshtein](https://en.wikipedia.org/wiki/Levenshtein_distance) edit distance search within a string or array\<string> [attribute](/en/reference/schemas/schemas#attribute). where myStringAttribute contains (\{prefixLength:1, maxEditDistance:2}fuzzy("parantesis")) Annotations below are configuring `fuzzy`: \| Annotation \| Effect \| \| --- \| --- \| \| [maxEditDistance](#maxeditdistance) \| An inclusive upper bound of edit distance between query and string attribute (default is 2). \| \| [prefixLength](#prefixlength) \| Number of characters that are considered frozen, so the fuzzy match will be performed only with the suffix left. Default is 0 (i.e. `fuzzy` will match across whole query) \| \| [prefix](#prefix) \| If `true`, a string is considered a match when it's possible to transform a *prefix* of the candidate string to the query string using at most `maxEditDistance` edits. See [fuzzy prefix match](/en/querying/text-matching#fuzzy-prefix-match). Default is `false`, which means that the entire string is considered. \| Find an example in [text matching](/en/querying/text-matching#fuzzy-match). **Important:** Only string [attribute](/en/reference/schemas/schemas#attribute) fields in [documents](/en/reference/applications/services/content#document) are supported (single, array or weightedset). Matching is optimized internally when `maxEditDistance` is 1 or 2. Setting [prefixLength](#prefixlength) greater than 0 narrows the match for the [fast-search](/en/reference/schemas/schemas#attribute), greatly reducing the number of terms that must be considered. | +| matches | Regular expression match is supported using [posix extended syntax](https://en.wikibooks.org/wiki/Regular_Expressions/POSIX_Extended_Regular_Expressions), with the limitation that it is **case-insensitive**. Example matching both `madonna`, `madona` and with any number of `n`s: where attribute\_field matches "mado\[n\]+a" Find more examples in the [text matching](/en/querying/text-matching#regular-expression-match) guide. **Important:** Only [attribute](/en/reference/schemas/schemas#attribute) fields in [documents](/en/reference/applications/services/content#document) is supported. It is not optimized for performance. Having a prefix using the `^` will be faster than not having one. Additionally, fields that serve as both attributes and indexes are not compatible. | +| text | *text()* accepts any text and tokenizes it into a set of tokens to be searched in a given field or fieldSet. By default, the tokens are searched with a [weakAnd](#weakand) operator. You can override the default behavior via annotations: `text()` supports the same annotations as [userInput()](#userinput). Example: where text\_field contains text("some text") With annotations: where text\_field contains (\{language:'en'}text("some text")) The text argument can be given as a reference using "@parameterName": yql=select \* from sources \* where text\_field contains (@text)&text=some text | +| userInput | *userInput()* parses text from end users or models that may contain query syntax for choosing the fields to search, specifying phrases and negative terms etc. Since the query in userInput can specify fields, there is no "contains field" prefix before the userInput operator. yql=select \* from sources \* where userInput('some text') The argument can be given as a reference using "@parameterName": yql=select \* from sources \* where userInput(@text)&text=some text Both of these will result in the query select \* from sources \* where weakAnd(default contains "some", default contains "text") The default behavior may be overridden by annotations: yql=select \* from sources \* where (\{grammar.syntax:'none',grammar.tokenization:'linguistics',grammar.composite:'near',distance:3}userInput('some text')) \| Annotation \| Effect \| \| --- \| --- \| \| [grammar](#grammar) \| Sets the query parse type to apply when interpreting the user input text. For any value of `grammar` other than `raw` or `segment`, only the following annotations are applied: - [defaultIndex](#defaultindex) - [totalTargetHits](#totaltargethits) (for weakAnd)- [targetHits](#targethits) (for weakAnd)- [distance](#distance) (for near/oNear)- [ranked](#ranked) - [filter](#filter) - [stem](#stem) - [normalizeCase](#normalizecase) - [accentDrop](#accentdrop) - [usePositionData](#usepositiondata) E.g. if annotating `userInput` with `phrase`, a `filter` annotation will have effect, but not `language`. See [isYqlDefault](/en/reference/api/query#model.type.isYqlDefault) on setting a default grammar in a request/query profile. \| \| [defaultIndex](#defaultindex) \| Same as [model.defaultIndex](/en/reference/api/query#model.defaultindex) in the query API. \| \| [language](#language) \| Language setting for the linguistics treatment of this userInput() call. \| \| [allowEmpty](#allowempty) \| Whether to allow empty input for query parsing and search terms. \| In addition, other annotations, like [stem](#stem) or [ranked](#ranked), will take effect as normal. More examples can be found in the [query API](/en/querying/query-api#input-examples) guide. | +| userQuery | *userQuery()* reads from [model.queryString](/en/reference/api/query#model.querystring) and parses the query using [simple query language](/en/reference/querying/simple-query-language). If set, [model.filter](/en/reference/api/query#model.filter) is combined with *model.queryString* before the parsing. The user query is first parsed, then the resulting tree is inserted into the corresponding place in the YQL query tree. Example: $ vespa query 'select \* from sources \* where vendor contains "brick and mortar" AND price < 50 AND userQuery()' \\ query="abc def -ghi" \\ type=all This evaluates to a query where: - the numeric field *price* must be less than 50 - *vendor* must match *brick and mortar* - the default index must contain the two terms *abc* and *def*, *and not* contain *ghi*. Use [model.defaultIndex](/en/reference/api/query#model.defaultindex) to specify a field or fieldset if not using *default* - see [example](/en/querying/query-api#fieldset). | +| rank | The first, and only the first, argument of the *rank()* function determines whether a document is a match, but all arguments are used for calculating rank features. The `rank` operator is useful for boosting documents based on the presence of certain terms without impacting matching or retrieval logic. where rank(a contains "A", b contains "B", c contains "C") It's also useful in hybrid search use cases. See [blog post](https://blog.vespa.ai/redefining-hybrid-search-possibilities-with-vespa/) for usage examples. For example, retrieve using the [nearestNeighbor](#nearestneighbor) query operator as the first argument and have matching features calculated for the other arguments. where rank(nearestNeighbor(field, queryVector), a contains "A", b contains "B", c contains "C") | +| in | The *in* operator is used to match a set of values in an integer or string field. A document is considered a match when at least one of the values matches the content of the field. This is an optimized shorthand for multiple OR conditions, and is similar to the IN operator in SQL. Available since Vespa 8.293.15 . Example: where integer\_field in (10, 20, 30) where string\_field in ('germany', 'france', 'norway') Where `string_field` is a field with `match:word`. There is no [linguistic](/en/linguistics/linguistics.html) processing like tokenization or stemming of the string values used in the *in* operator except lowercasing. See string [match](/en/reference/schemas/schemas#match).field string\_field type string \{ indexing: summary \| index # or attribute match: word rank:filter attribute: fast-search # if attribute } Using the *in* operator against string fields with `match:text` will cause recall issues because the field contents will be tokenized during indexing while the *in* operator does not tokenize the values. The argument before *in* is the name of the field or [fieldset](/en/reference/schemas/schemas#fieldset) to search. The argument after *in* is a comma-separated list of values, enclosed in parentheses. String values must be single or double-quoted if passed inline in YQL For multi-value fields (like arrays), the *in* operator works by checking if any element in the array matches any of the values in the set. This is similar to SQL's IN operator but more streamlined for array comparisons. Example: where integer\_array\_field in (10, 20, 30) If integer\_array\_field = \[5, 10, 15\], it will match because 10 is in the array. Similarly, if integer\_array\_field = \[20, 25, 30\], it will match because both 20 and 30 are in the array. For faster query parsing use [parameter substitution](#parameter-substitution) to submit the values as an additional request parameter. Quoting of string values are optional. Example: where integer\_field in (@integer\_values)&integer\_values=10,20,30 where string\_field in (@string\_values)&string\_values=germany,france,norway The *in* operator acts as a single term in the query tree, and does not provide any match information for text ranking features. For a discussion of usage and examples refer to: - [multivalue query operators](/en/ranking/multivalue-query-operators#in-example) - [multi-lookup set filtering](/en/performance/feature-tuning#multi-lookup-set-filtering) - [in operator system test](https://github.com/vespa-engine/system-test/tree/master/tests/search/in_operator) \| Field type \| Singlevalue or [multivalue](/en/querying/searching-multivalue-fields) [attribute or index field](/en/basics/schemas#document-fields) with basic type [byte](/en/reference/schemas/schemas#byte), [int](/en/reference/schemas/schemas#int), [long](/en/reference/schemas/schemas#long) or [string](/en/reference/schemas/schemas#string). String fields must have `match:word` or `match:exact`. \| \| --- \| --- \| \| Query model \| A set of values/tokens. \| \| Matching \| Documents where the field contains at least one of the values in the query. \| \| Ranking \| None. \| \| Java Query Item \| [NumericInItem](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/NumericInItem.html) and [StringInItem](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/StringInItem.html). \| **Important:** When using the *in* operator with an attribute field, set [fast-search](/en/content/attributes#fast-search) and [rank: filter](/en/reference/schemas/schemas#filter) for best possible performance. Always use `match:word` for string fields. | +| dotProduct | *dotProduct* calculates the dot product between the weighted set in the query and a weighted set field in the document as its rank score contribution: where dotProduct(description, \{"a":1, "b":2}) The result is stored as a [raw score](/en/ranking/multivalue-query-operators#raw-scores-and-query-item-labeling). A normal use case is a collection of weighted tokens produced by an algorithm, to match against a corpus containing weighted tokens produced by another algorithm in order to implement personalized content exploration. See example usage of *dotProduct* in [practical performance guide](/en/performance/practical-search-performance-guide#multi-valued-query-operators) . Refer to [multivalue query operators](/en/ranking/multivalue-query-operators) for a discussion of usage and examples. Keys must be single or double-quoted if passed inline in YQL - alternatively, use [parameter substitution](#parameter-substitution) to submit the weighted set with a simple format for faster query parsing - example: `where dotProduct(description, @myterms)`. \| Field type \| Weighted set attribute with fast-search. Note: Also supported for regular attribute or index fields, but then with much weaker performance). \| \| --- \| --- \| \| Query model \| Weighted set with \{token, weight} pairs \| \| Matching \| Documents where the weighted set field contains at least one of the tokens in the query. \| \| Ranking \| Dot product score between the weights of the matched query tokens and field tokens. This score is available using [rawScore](/en/reference/ranking/rank-features#rawScore(field)) or [itemRawScore](/en/reference/ranking/rank-features#itemRawScore(label)) rank features. \| \| Java Query Item \| [DotProductItem](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/DotProductItem.html) \| | +| weightedSet | When using *weightedSet* to search a field, all tokens present in the searched field will be matched against the weighted set in the query. This means that using a weighted set to search a single-value attribute field will have similar semantics to using a normal term to search a weighted set field. The low-level matching information resulting from matching a document with a weighted set in the query will contain the weights of all the matched tokens in descending order. Each matched weight will be represented as a standard occurrence on position 0 in element 0. where weightedSet(description, \{"a":1, "b":2}) *weightedSet* has similar semantics to [equiv](#equiv), as it acts as a single term in the query. However, the restriction dictating that it contains a collection of weighted tokens directly enables specific back-end optimizations that improves performance for large sets of tokens compared to using the generic [equiv](#equiv) or [or](#or) operators. Keys must be single or double-quoted if passed inline in YQL - alternatively, use [parameter substitution](#parameter-substitution) to submit the weighted set with a simple format for faster query parsing - example: `where weightedSet(description, @myterms)`. \| Field type \| Singlevalue or [multivalue](/en/querying/searching-multivalue-fields) attribute or index field. (Note: Most use cases operates on a single value field). \| \| --- \| --- \| \| Query model \| Weighted set with \{token, weight} pairs. \| \| Matching \| Documents where the field contains at least one of the tokens in the query. For filtering use cases we recommend using the [in operator](#in) instead, as it is simpler to use and has slightly better performance. \| \| Ranking \| The operator will act as a single term in the back-end. The query term weight is the weight assigned to the operator itself and the match weight is the largest weight among matching tokens from the weighted set. This operator does not produce a raw score. Due to better ranking and performance we recommend using [dotProduct](#dotproduct) instead. \| \| Java Query Item \| [WeightedSetItem](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/WeightedSetItem.html) \| | +| wand | `wand` can be used to search for documents where weighted tokens in a field matches a subset of weighted tokens in the query. At the same time, it internally calculates the dot product between token weights in the query and the field. `wand` is guaranteed to return the top-k hits according to its internal dot product rank score. It is an operator that scales adaptively from [or](#or) to [and](#and). Note that total hit count becomes inaccurate when using wand. `wand` optimizes the performance of using multiple threads per search in the backend, and is also called *Parallel Wand*. `wand` also allows numeric arguments, then the search argument is an array of arrays of length two. In each pair, the first number is the search term, the second its weight: where wand(description, \[\[11,1\], \[37,2\]\]) Keys must be single or double-quoted if passed inline in YQL - alternatively, use [parameter substitution](#parameter-substitution) to submit the weighted set with a simple format for faster query parsing - example: `where wand(description, @myterms)`. \| Annotation \| Effect \| \| --- \| --- \| \| [scoreThreshold](#scorethreshold) \| Minimum rank score for hits to include. \| \| [totalTargetHits](#totaltargethits) \| Wanted number of hits exposed to the first-phase ranking function in total over the content nodes evaluating the query. \| \| [targetHits](#targethits) \| Wanted number of hits exposed to the first-phase ranking function per content node. Prefer using [totalTargetHits](#totaltargethits) over this. \| where (\{scoreThreshold: 0.13, totalTargetHits: 7}wand(description, \{"a":1, "b":2})) Refer to [using wand](/en/ranking/wand) for introduction to the WAND algorithm and example usage of *wand* in [practical performance guide](/en/performance/practical-search-performance-guide#multi-valued-query-operators) . \| Field type \| Weighted set attribute with fast-search. Note: Also supported for regular attribute or index fields, but then with much weaker performance). \| \| --- \| --- \| \| Query model \| Weighted set with \{token, weight} pairs. \| \| Matching \| Documents where the weighted set field contains at least one of the tokens in the query and where the internal dot product score for this document, is larger than the worst among the current top-k best hits. This means that more than top-k documents are matched and returned for ranking. It also means that many documents are skipped, even they match several tokens in the query because the dot product score is too low. This skipping makes *wand* faster than [dotProduct](#dotproduct) in some cases. \| \| Ranking \| Dot product score between the weights of the matched query tokens and field tokens. This score is available using [rawScore](/en/reference/ranking/rank-features#rawScore(field)) or [itemRawScore](/en/reference/ranking/rank-features#itemRawScore(label)) rank features. Note that the top-k best hits are only guaranteed to be returned when using this internal score as the final ranking expression. \| \| Java Query Item \| [WandItem](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/WandItem.html) \| | +| weakAnd | `weakAnd` is sometimes called *Vespa Wand*. Unlike [wand](#wand), it accepts arbitrary word matches (across arbitrary fields) as arguments. Only a limited number of documents are returned for ranking (default is 100), but it does not guarantee to return the best k hits. This function can be seen as an optimized [or](#or): where weakAnd(a contains "A", b contains "B") \| Annotation \| Effect \| \| --- \| --- \| \| [totalTargetHits](#totaltargethits) \| Wanted number of hits exposed to the first-phase ranking function in total over the content nodes evaluating the query. \| \| [targetHits](#targethits) \| Wanted number of hits exposed to the first-phase ranking function per content node. Prefer using [totalTargetHits](#totaltargethits) over this. \| where (\{totaltargetHits: 7}weakAnd(a contains "A", b contains "B")) Unlike [wand](#wand), `weakAnd` can be used to search across several fields of various types, but it does NOT guarantee to return the top-k best number of hits. It can however be combined with any ranking expression. Keep in mind that this expression should correlate with its simple internal ranking score that uses query term weight and inverse document frequency for matching terms. Refer to [using wand](/en/ranking/wand) for a usage and examples. \| Field type \| Multiple fields of all types (both attribute and index). \| \| --- \| --- \| \| Query model \| Arbitrary number of query items searching across different fields. \| \| Matching \| Documents that matches at least one of the tokens in the query and where the internal operator score for this document is larger than the worst among the current top-k best hits. As with [wand](#wand), this means that typically more than top-k documents are matched and a lot of documents are skipped. \| \| Ranking \| Internal ranking score based on query term weight and inverse document frequency for matching terms to find the top-k hits. This score is currently not available to the ranking framework. Matching terms are exposed to the ranking framework (same as when using [and](#and) or [or](#or)), so an arbitrary ranking expression can be used in combination with this operator. Note that the ranking expression used should correlate with this internal ranking score. [bm25](/en/reference/ranking/rank-features#bm25), [nativeFieldMatch](/en/reference/ranking/rank-features#nativeFieldMatch) and [nativeDotProduct](/en/reference/ranking/rank-features#nativeDotProduct(field)) rank features are good starting points. \| \| Java Query Item \| [WeakAndItem](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/WeakAndItem.html) \| | +| geoLocation | `geoLocation` matches a [position](/en/reference/schemas/schemas#position) inside a geographical circle, specified as latitude, longitude, and a maximum distance (radius). See also [geoBoundingBox](#geoboundingbox). Example: where geoLocation(myfieldname, 63.5, 10.5, "200 km") In this example we search for documents near 63.5° north, 10.5° east, and within a 200 km radius. So a document with a "myfieldname" position in Trondheim, Norway at N63°25'47;E10°23'36 would match. The first parameter is the name of the attribute field. The second parameter is the latitude (positive for north, negative for south). The third parameter is the longitude (positive for east, negative for west). The fourth parameter must be a string specifying the radius and its units, where the supported units/suffixes include "km", "m" (abbr. for meters), "miles", "mi" (abbr. for miles), "deg" (abbr. for degrees) and "d" (contextual abbr. for degrees). The "deg" / "d" unit / suffix gives radius the same units as latitude. Any negative number for radius (e.g. "-1 m") is interpreted as an "infinite" radius, letting any geographical position at all match the geoLocation operator. The position attribute in the schema could look like: field myfieldname type position \{ indexing: attribute \| summary } Arrays of positions are also possible: field myfieldname type array\<position> \{ indexing: attribute } \| Annotation \| Effect \| \| --- \| --- \| \| [label](#label) \| Label for referring to this term during ranking. \| Properties: \| Field type \| position attribute (single-valued or array). \| \| --- \| --- \| \| Query parameters \| Field name, latitude, longitude, radius. \| \| Matching \| Returns documents inside the given geo circle. \| \| Ranking \| Use `closeness(myfieldname)`, or `distance(myfieldname)` in ranking calculations. See [closeness](/en/reference/ranking/rank-features#closeness(name)) and [distance](/en/reference/ranking/rank-features#distance(name)) documentation. \| \| Java Query Item \| [GeoLocationItem](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/GeoLocationItem.html) \| | +| geoBoundingBox | `geoBoundingBox` requires a [position](/en/reference/schemas/schemas#position) to be inside a geographical rectangle; specified as 4 numbers (in degrees). The 4 numbers must be in a specific order: south-western corner (minimum latitude, minimum longitude) followed by north-eastern corner (maximum latitude, maximum longitude). Examples: where geoBoundingBox(myfieldname, 63.25, 10.01, 63.45, 10.61) where geoBoundingBox(myfieldname, -23.12, -43.85, -22.59, -42.89) In the first example we search for documents inside a rectangular map view around Trondheim, Norway. So a document with a "myfieldname" position at [63°25'50"N 10°23'42"E](https://www.google.com/maps/place/63%C2%B025'50.0%22N+10%C2%B023'42.0%22E) would match. The second example surrounds [Rio de Janeiro](https://www.google.com/maps/place/22%C2%B059'13.0%22S+43%C2%B012'10.0%22W), Brazil. - The first parameter is the name of the attribute field. - The 2nd parameter is the minimum (southern) latitude (positive for north, negative for south). - The 3rd parameter is the minimum (western) longitude (positive for east, negative for west). - The 4th parameter is the maximum (northern) latitude (positive for north, negative for south). - The 5th parameter is the maximum (eastern) longitude (positive for east, negative for west). See the [geoLocation](#geolocation) operator for more details about positions. Note that there is no ranking contribution from this operator; if you want to get the distance to the center of the box, you need an additional `geoLocation` item with that point. Properties: \| Field type \| position attribute (single-valued or array). \| \| --- \| --- \| \| Query parameters \| Field name, southern, western, northern, eastern limits. \| \| Matching \| Returns documents inside the given geo bounding box. \| \| Ranking \| None. \| \| Java Query Item \| [GeoLocationItem](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/GeoLocationItem.html) \| | +| nearestNeighbor | `nearestNeighbor` matches the top-k nearest neighbors in a multidimensional vector space. Points in the vector space are specified as [tensors](/en/ranking/tensor-user-guide) with one indexed dimension, where the size of that dimension is equal to the dimensionality of the vector space. The document vectors are stored in a [tensor field attribute](/en/reference/schemas/schemas#tensor), and the query vector is sent with the query request. The following tensor field types are supported: - Single vector per document: Tensor type with one indexed dimension. Example: `tensor<float>(x[3])` - Multiple vectors per document: Tensor type with one or more mapped dimensions and one indexed dimension. Examples: `tensor<float>(m{},x[3])`, `tensor<float>(m{},n{},x[3])` Euclidean distance is used as the default [distance metric](/en/reference/schemas/schemas#distance-metric) and the exact nearest neighbors are returned. When storing multiple vectors per document, the vector that is closest to the query vector is used when calculating the distance between the document and the query. If an [HNSW index](/en/reference/schemas/schemas#index-hnsw) is specified on the tensor field, the approximate nearest neighbors are returned. Example: where (\{totaltargetHits: 10}nearestNeighbor(doc\_vector, query\_vector))&input.query(query\_vector)=\[3,5,7\]&ranking=semantic In this example we search for the top 10 nearest neighbors in a 3-dimensional vector space. *totalTargetHits* specifies the top-k nearest neighbors to expose to a user defined `semantic` [rank profile](/en/basics/ranking). The [totalTargetHits](#totaltargethits) annotation is required. The first parameter of *nearestNeighbor* is the name of the tensor field attribute containing the document vectors (*doc\_vector*). The second parameter is the name of the tensor sent with the query request (*query\_vector*). Specifying *query\_vector* as the name means the query request must set this tensor as *input.query(query\_vector)* - see the [reference](/en/reference/api/query#ranking.features). The tensor type of the **input query vector must be defined** in the rank profile: rank-profile semantic \{ inputs \{ query(query\_vector) tensor\<float>(x\[3\]) } first-phase: closeness(field, doc\_vector) } Also see [defining query feature types](../../ranking/ranking-expressions-features#query-feature-types). Failure to define the query input tensor in the schema will fail the request: Expected 'query(query\_vector)' to be a tensor, but it is the string '\[3,5,7\]' The document tensor field attribute is defined as follows: field doc\_vector type tensor\<float>(x\[3\]) \{ indexing: attribute \| summary } The example above does not define HNSW `index` and the search for neighbors will be exact. See [Nearest Neighbor Search](/en/querying/nearest-neighbor-search), [Approximate Nearest Neighbor Search using HNSW Index](/en/querying/approximate-nn-hnsw) and [Nearest Neighbor Search Guide](/en/querying/nearest-neighbor-search-guide) for more detailed examples. \| Annotation \| Effect \| \| --- \| --- \| \| [totalTargetHits](#totaltargethits) \| Specifies the number of hits nearestNeighbor should expose to [ranking](/en/basics/ranking) in total over the content nodes evaluating the query. Note that more or less hits may actually be produced. Setting target hits is required. \| \| [minTargetHits](#mintargethits) \| Specifies the *minimum* target hits to produce in this nearest neighbor operator. The default value is 100. Exploring too little in a graph leads to bad quality, and this parameter protects against that when totalTargetHits leads to some node with little content otherwise getting a low targetHits. \| \| [targetHits](#targethits) \| Specifies the target hits *per node*. Prefer using [totalTargetHits](#totaltargethits) over this. \| \| [approximate](#approximate) \| The optional `approximate` annotation may be set to `false` to not use an approximate [HNSW index](/en/reference/schemas/schemas#index-hnsw). This is especially useful to compare exact and approximate results in order to perform tuning of HNSW parameters. This annotation is default `true` when an HNSW index is specified, otherwise it is always `false`. Setting this to `false` might trigger [graceful query degradation](../../performance/graceful-degradation.html). Adjust [timeout](#timeout) as needed. \| \| [hnsw.exploreAdditionalHits](#hnsw-exploreadditionalhits) \| Tune how many extra nodes in the HNSW graph (in addition to `totalTargetHits`) that should be explored before selecting the best hits. Default is `0`. Increasing this parameter increases the accuracy of the approximate search, at the cost of more distance computations. \| \| [label](#label) \| Use to mark the query operator with a label that can be referred to from the ranking expression in the rank profile. See the [closeness](/en/reference/ranking/rank-features#closeness\(dimension,name\)) and [distance](/en/reference/ranking/rank-features#distance\(dimension,name\)) rank features. Useful when having multiple `nearestNeighbor` operators in the same query, e.g., when the schema has multiple vector fields. See [nearest neighbor search guide](/en/querying/nearest-neighbor-search-guide#multiple-nearest-neighbor-search-operators-in-the-same-query) for usage example. \| \| [distanceThreshold](#distancethreshold) \| Use to filter out hits with a higher distance than a threshold. See [nearest neighbor search guide](/en/querying/nearest-neighbor-search-guide#strict-filters-and-distant-neighbors) for usage example. \| Properties: \| Field type \| Tensor attribute with one indexed dimension of size N or with one or more mapped dimensions and one indexed dimension of size N. \| \| --- \| --- \| \| Query model \| Tensor with one indexed dimension of size N. \| \| Matching \| Returns documents where the distance (according to the [distance metric](/en/reference/schemas/schemas#distance-metric) used) between the document tensor and the query tensor is less than the greatest distance among the current top-k best hits. This means that typically more than top-k documents are matched and returned for ranking. This is similar to the behavior of [wand](#wand). When an [HNSW index](/en/reference/schemas/schemas#index-hnsw) is used, the top-k best hits are calculated before regular matching happens, taking the rest of the query filters into account. \| \| Ranking \| Calculates a closeness score that is defined as `1 / (1 + d)`, where `d` is the distance between the document tensor and query tensor. This score is available using [rawScore](/en/reference/ranking/rank-features#rawScore(field)), [itemRawScore](/en/reference/ranking/rank-features#itemRawScore(label)), or [closeness](/en/reference/ranking/rank-features#closeness\(dimension,name\)) rank features. The raw distance is available using the [distance](/en/reference/ranking/rank-features#distance(dimension,name)) rank feature. \| \| Java Query Item \| [NearestNeighborItem](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/NearestNeighborItem.html) \| | +| nonEmpty | *nonEmpty* takes as its only argument an arbitrary search expression. It will then perform a set of checks on that expression. If all the checks pass, the result is the same expression, otherwise the query will fail. The checks are as follows: <br/> 1. No empty search term <br/> 2. No empty operators, like phrases without terms <br/> 3. No null markers (NullItem) from e.g. failed query parsing <CodeBlock>```yql=select * from sources * where bar contains "a" and nonEmpty(bar contains "bar" and foo contains @foo)&foo= ``` </CodeBlock> Note how "foo" is empty in this case, which will force the query to fail. If "foo" contained a searchable term, the query would not have failed. | +| predicate | *predicate()* specifies a predicate query - see [predicate fields](/en/schemas/predicate-fields). It takes three arguments: the predicate field to search, a map of attributes, and a map of range attributes: <CodeBlock>```where predicate(predicate\_field,{"gender":"Female"},{"age":20L})```</CodeBlock> Due to a quirk in YQL-parsing, one cannot specify an empty map, use the number 0 instead. <CodeBlock> ``` where predicate(predicate\_field,0,{"age":20L})```</CodeBlock> | +| true | Matches all documents of any type. Care must be taken when using this since processing all documents as matches is expensive. At minimum, consider restricting to only one schema where you know the corpus isn't too big, see the [model.restrict](/en/reference/api/query#model.restrict) URL parameter. | +| false | Does not match any document at all. Not useful in itself, but could potentially be used as a placeholder in the query tree. | + +## order by + +Sort using `order by`. Add `asc` or `desc` after the name of an [attribute](/en/content/attributes) to set sort order - ascending order is default. Add another sorting attributes to get a secondary sort, that will be a tiebreaker for the primary ordering attribute. This is typically used to get a predictable ordering when the primary ordering attribute has the same value for multiple documents. + +where title contains "madonna" order by price asc, releasedate desc + +Sorting function, locale and strength are defined using the annotations "function", "locale" and "strength", as in: + +```bash +where title contains "madonna" order by {function: "uca", locale: "en\_US", strength: "IDENTICAL"}other desc, {function: "lowercase"}something +``` + +<Info> +**Note:** [match-phase](/en/reference/schemas/schemas#match-phase) is enabled when sorting - refer to the [sorting reference](/en/reference/querying/sorting-language). +</Info> +The [rank profile](/en/basics/ranking) determines the rank score each document will get. Results are ordered by that value by default, but `order by` overrides that ordering. Vespa does not optimize away the rank score computation in this case, it is still executed, even if the model score is thrown away. Use the built-in rank-profile *unranked* for optimal performance of sorting queries. + +To do a primary ordering on the rank score, and a secondary sort on an attribute, use `'[relevance]'` as the first order by attribute. See [Special sorting attributes](/en/reference/querying/sorting-language#special-sorting-attributes) for more details. + +| Annotation | Effect | +| :--- | :--- | +| [function](#function) | Sort function, default UCA. | +| [locale](#locale) | Locale identifier for the [UCA sort function](#function). | +| [strength](#strength) | Strength setting for the [UCA sort function](#function). | + +## limit / offset + +To specify a slice / limit the number of hits returned / do pagination, use `limit` and/or `offset`. This can also be controlled by using [native execution parameters](/en/reference/api/query#native-execution-parameters). + +<Danger> +**Important:** Values set in YQL takes precedence over [hits/offset](/en/reference/api/query#hits). Values for hits/offset in query profiles are also overridden by YQL, e.g., `limit 100` overrides `<field name="hits" overridable="false">50</field>`. +</Danger> + +Limited by [maxHits](/en/reference/api/query#hits) (default 400) and [maxOffset](/en/reference/api/query#offset) (default 1000) - these can be configured in a [queryProfile](/en/querying/query-profiles). + +Example: This returns two hits (if there are sufficiently many hits matching the query), skipping the 29 first documents + +<Card> +where title contains "madonna" limit 31 offset 29 +</Card> +## timeout + +Set query timeout in milliseconds using `timeout`. This can also be controlled by using the native execution parameter [timeout](/en/reference/api/query#timeout). YQL specified values takes precedence.: + +<Card> +where title contains "madonna" timeout 70 +</Card> +Only literal numbers are valid, i.e. setting another unit is not supported. + +<Note> +**Note:** The value is in milliseconds, whereas the native execution parameter [timeout](/en/reference/api/query#timeout) is in seconds +</Note> +## Parameter substitution + +Use parameter substitution to separate the YQL string from user input values. E.g., the [text(value)](#text) query operator supports parameter substitution for the `value` parameter: + +<Card> +... where default contains text(@userinput)&userinput=free+text +</Card> + +The query operators [field in (value)](#in), [dotProduct(field, value)](#dotproduct), [weightedSet(field, value)](#weightedset) and [wand(field, value)](#wand) support parameter substitution for the `value` parameter. + +The `value` string can be passed in one of the following forms (quotes can be skipped unless the keys contain `,` or `:`.): + +- List: `value, ...`. For the [in](#in) operator only. +- Array: `[[key, value], ...]`. For [dotproduct](#dotproduct), [weightedset](#weightedset) and [wand](#wand). +- Map: `{key: value, ...}`. For [dotproduct](#dotproduct), [weightedset](#weightedset) and [wand](#wand). + +See the [query API guide](/en/querying/query-api#parameter-substitution) for examples. + +## Annotations + +Terms and phrases can be annotated to manipulate the behavior. Add an annotation using `{}`: + +``` +where text contains ({distance: 5}near("a", "b")) and text contains ({distance:2}near("c", "d")) +``` + +Note that the annotation is enclosed by parentheses to scope the annotation to the operator. + +All annotations are supported by the string arguments to functions like and phrase() and near() and also the string argument to the "contains" operator. Some annotations are also supported by the functions which are handled like leaf nodes internally in the query tree: phrase(), near(), onear(), range(), equiv(), dotProduct(), weightedSet(), weakAnd(), wand() and nearestNeighbor(). + +Refer to [SelectTestCase.java](https://github.com/vespa-engine/vespa/blob/master/container-search/src/test/java/com/yahoo/select/SelectTestCase.java) for sample usage. + +| Annotation | Default | Values | Description | +| --- | --- | --- | --- | +| accentDrop | true | boolean | Remove accents from this term if it is the setting for this field. Refer to [linguistics](/en/linguistics/linguistics-opennlp#normalization). | +| allowEmpty | false | boolean | Whether to allow empty input for query parsing and query terms in [text](#text)/[userInput](#userinput). If `true`, a NullItem instance is inserted in the proper place in the query tree. If `false`, the query will fail if the user provided input can not be parsed or is empty. | +| andSegmenting | | true\|false | Force phrase or AND operator if re-segmenting (e.g. in stemming) this term results in multiple terms. Default is choosing from language settings. | +| annotations | | map | Map of `string: string`. Custom annotations. No special semantics inside the YQL layer. Example: ``` annotations : {cox: "another"}``` | +| approximate | | boolean | Used in [nearestNeighbor](#nearestneighbor). The optional *approximate* annotation may be set to `false` to disallow usage of an approximate [HNSW index](/en/reference/schemas/schemas#index-hnsw). This is especially useful to compare exact and approximate results in order to perform tuning of other parameters. This annotation is default `true` when an HNSW index is specified, otherwise it is always `false`. | +| ascending | | boolean | Ascending hit order. Used by [hitLimit](#hitlimit). | +| bounds | `closed` | enum | A [numeric](#numeric) interval is by default a closed interval. If the lower bound is exclusive, set to `leftOpen`. If the upper bound is exclusive, set to `rightOpen`. If both bounds are exclusive, set the annotation to `open`. Example: ```where ({bounds:"rightOpen"}range(year, 2000, 2018))``` | +| connectivity | | map | Map of `id: int, weight: double` of explicit connectivity between this item and the item with the given [id](#id) - see [text matching and ranking](/en/ranking/nativerank#weight-significance-and-connectedness). Example: ```connectivity: {id: 4, weight: 0.8}``` | +| descending | | boolean | Descending hit order. Used by [hitLimit](#hitlimit). | +| defaultIndex | `default` | Any searchable field in the schema. | Used by [userInput](#userinput). Same as [model.defaultIndex](/en/reference/api/query#model.defaultindex) in the query API. If [grammar](#grammar) is set to `raw` or `segment`, this will be the field searched. | +| distance | 2 | int | The *distance* annotation sets the maximum position difference to count as a match, see [near](#near) / [onear](#onear). All matching terms must fit within positions \[P, P+distance\] where P is the first term's position. Default is 2. ``` where text contains ({distance: 5}near("a", "b"))``` | +| elementFilter | | list of int | Used with [sameElement](#sameelement). Restricts matching in an array field to specific indices. A non-empty list makes sameElement match if and only if there is a match at one or more of the specified indices. where my\_numbers contains ```({elementFilter:\[0, 2, 5\]}sameElement("42")) ```| +| distanceThreshold | +infinity | double | Used in [nearestNeighbor](#nearestneighbor). The `distanceThreshold` annotation may be used to filter away hits with a higher distance than the given threshold from the results. Note that one will never get more hits with `distanceThreshold` than you would get without it - to get more hits, increase [totalTargetHits](#totaltargethits), too. The units for the threshold depends on the [distance metric](/en/reference/schemas/schemas#distance-metric) used. | +| endAnchor | true | boolean | The `hostname` subfield of [uri](#uri) supports anchoring to the start and/or end of the hostname, controlled by the `startAnchor` and `endAnchor` annotations. Anchoring to the end is on by default while anchoring to the start is not. Hence where myUrlField.hostname contains uri("vespa.ai") will match *vespa.ai* and *docs.vespa.ai*, while ```where myUrlField.hostname contains ({startAnchor: true}uri("vespa.ai")) ``` will only match vespa.ai. | +| filter | false | boolean | Regard this term as a "filter" term and not a term from the end user. Terms that are annotated with "filter:true" are not bolded. See also [model.filter](/en/reference/api/query#model.filter). Bolding of terms is controlled by [schema:bolding](/en/reference/schemas/schemas#bolding). | +| function | | | Default sort function for strings is `uca`. Field sort specification can be configured in the [schema](/en/reference/schemas/schemas#sorting), values in the query overrides the schema settings. Numeric fields are numerically sorted. \| Function \| Description \| \| --- \| --- \| \| `uca` \| This sorting is based on the [icu](https://icu.unicode.org/) library that follows the [Universal Collation Algorithm](https://unicode.org/reports/tr10/). The specifications of [locale](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4j/com/ibm/icu/util/ULocale.html) and [strength](https://unicode-org.github.io/icu-docs/apidoc/dev/icu4j/com/ibm/icu/text/Collator.html) are identical to how [icu](https://icu.unicode.org/) specifies them. Both [locale](#locale) and [strength](#strength) are optional, however `strength` requires `locale`. The [locale](#locale) query annotation will override locale-setting in the [schema](/en/reference/schemas/schemas#sorting). If `locale` is missing from both, the `lowercase` function will be used by default. \| \| `lowercase` \| This improves the sorting by first lowercasing and normalising the strings before sorting. This is slightly more correct and might be enough for the use case. It is not that much more costly than `raw` sort, and less expensive than `uca`. \| \| `raw` \| Raw byteorder is a simple and fast ordering based on memcmp of utf8 for strings and correct sort order compliant binary rep for other fields is done. However, that is not correct for anything except computers, looking only at the binary representation. \| | +| grammar | `weakAnd` | `raw`, `segment` and all values accepted for the [model.type](/en/reference/api/query#model.type) argument in the query API. | How to parse [userInput](#userinput). `raw` will treat the user input as a string to be matched without any processing, `segment` will do a first pass through the linguistic libraries, while the rest of the values will treat the string as a query to be parsed. The individual model.type settings can also be set, using `grammar.composite`, `grammar.tokenization`, `grammar.syntax`, and `grammar.profile`—refer to the [model.type](/en/reference/api/query#model.type) documentation. See also [userInput examples](/en/querying/query-api#input-examples). | +| hitLimit | | int | [Numeric](#numeric) operations support `hitLimit`. This is used for *capped range search*. An alternative to using negative and positive values for hitLimit is always using a positive number of hits (as a negative number of hits does not make much sense) and combine this with either of the [ascending](#ascending) and [descending](#descending) annotations (but not both). Example: `{hitLimit: 38, descending: true}` would be equivalent to setting it to -38, i.e. only populate with 38 hits and start from upper boundary, i.e. descending order. Note that `hitLimit` will limit the number of documents that are considered. This is a powerful optimisation that must be used with care, particularly in combination with other filters. The set of documents to be considered will be limited upfront by only selecting the N best according to the range query and the hitLimit annotation, for further query evaluation. `hitLimit` is not exact, but "at least". In addition, it will only kick in if the attribute has [fast-search](/en/reference/schemas/schemas#attribute). It will look up the upper or lower bound in the range in the dictionary and scan in ascending or descending order and select entries until it has satisfied hitLimit. You will get all documents for all the dictionary entries selected. See the [practical-search-performance-guide](/en/performance/practical-search-performance-guide#advanced-range-search-with-hitlimit) for an example. | +| hnsw.exploreAdditionalHits | | | Used in [nearestNeighbor](#nearestneighbor). When using an [HNSW index](/en/reference/schemas/schemas#index-hnsw), the optional `hnsw.exploreAdditionalHits` annotation can be used to tune how many extra nodes in the graph (in addition to `totalTargetHits`) should be explored before selecting the best hits. Using a greater number here gives better quality, but worse performance. | +| id | | int | Unique ID used for e.g. [connectivity](#connectivity). | +| implicitTransforms | true | boolean | Implicit term transformations (field defaults). If `implicitTransforms` is true, the settings for the field in the schema will be honored in term transforms, e.g. if the field has stemming, this term will be stemmed. If `implicitTransforms` is false, the search backend will receive the term exactly as written in the initial YQL expression. This is in other words a top level switch to turn off all other [stemming](/en/linguistics/linguistics-opennlp#stemming), accent removal, Unicode [normalizations](/en/linguistics/linguistics-opennlp#normalization) and so on. | +| label | | string | Used by [geoLocation](#geolocation) and [nearestNeighbor](#nearestneighbor). Label for referring to this term/operator during ranking. | +| language | | RFC 3066 language code | Language setting for the linguistics handling of [text](#text) and [userInput](#userinput), also see [model.language](/en/reference/api/query#model.language) in the query API reference. | +| locale | | | Used by the [UCA sort function](#function). An identifier following [unicode locale identifiers](https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers), e.g. `en_US`. | +| maxEditDistance | 2 | int | Used in [fuzzy](#fuzzy). An inclusive upper bound of edit distance between query and string attribute. | +| nfkc | true | boolean | NFKC [normalization](/en/linguistics/linguistics-opennlp#normalization). | +| normalizeCase | true | boolean | Normalize casing of this term if it is the setting for this field. | +| origin | | map | Map of `original: string, offset: int, length: int`. The (sub-)string which produced this term. Default unset. Example: ``` origin: {original: "abc", offset: 1, length: 2}``` | +| prefix | false | boolean | Do [prefix matching](/en/reference/schemas/schemas#prefix) for this term, e.g. search for "word\*". | +| substring | false | boolean | Do substring matching for this word if available in the index. ("Search for "\*word\*".") Only supported for [streaming search](/en/performance/streaming-search). | +| prefixLength | 0 | int | Used in [fuzzy](#fuzzy). Number of characters that are considered frozen, so the fuzzy match will be performed with the suffix left. | +| ranked | true | boolean | Include this term for ranking calculation. Setting ranked to false can speed up query evaluation. Read more about [schema reference](/en/reference/schemas/schemas#rank). [Example](/en/ranking/ranking-expressions-features#dumping-rank-features-for-specific-documents) | +| scoreThreshold | | double | A threshold in [wand](#wand) for the minimum score of hits to include as matches. | +| significance | | double | Significance value for text ranking features - see [text matching and ranking](/en/ranking/nativerank#weight-significance-and-connectedness). | +| startAnchor | false | boolean | See [endAnchor](#endanchor). | +| stem | true | boolean | Stem this term if it is the setting for this field. | +| strength | `PRIMARY` | - `PRIMARY` - `SECONDARY` - `TERTIARY` - `QUATERNARY` - `IDENTICAL` | Used by the [UCA sort function](#function). Default is `PRIMARY`, which only sorts on primary differentiating characteristics; this means that letters in uppercase/lowercase or with differences in accents only are considered equal. | +| suffix | false | boolean | Do *suffix matching* for this term, e.g. search for "\*word". | +| totalTargetHits | 100 | int | Used with [wand](#wand) and [weakAnd](#weakand), where the default is 100, and with [nearestNeighbor](#nearestneighbor), where it has no default. This sets the wanted number of hits exposed to the first-phase ranking function in total over the content nodes evaluating the query (a *group*). If additional second phase ranking is used, do not set `totalTargetHits` less than the configured rank-profile's [total-rerank-count](/en/reference/schemas/schemas#secondphase-total-rerank-count). See examples in [nearest neighbor search](/en/querying/nearest-neighbor-search). | +| minTargetHits | 100 | int | Used with [nearestNeighbor](#nearestneighbor). Specifies the *minimum* target hits to produce in this nearest neighbor operator. The default value is 100. Exploring too little in a graph leads to bad quality, and this parameter protects against that when totalTargetHits leads to some node with little content otherwise getting a low targetHits. | +| targetHits | 100 | int | Sets target hits per node. Prefer using [totalTargetHits](#totaltargethits) over this. | +| usePositionData | true | boolean | Use term position data for text ranking features such as [nativeRank](/en/ranking/nativerank). This is *term* position, not to be confused with [geo searches](/en/querying/geo-search). Setting "usePositionData:false" can improve query performance. | +| weight | 100 | int | Term weight, used in some text ranking features - see [text matching and ranking](/en/ranking/nativerank#weight-significance-and-connectedness). ```where title contains ({weight:200}"heads")``` | + +### Annotations of sub-expressions + +Consider the following query: + +```sql +select * from sources * where ({stem: false}(foo contains "a" and bar contains "b")) or foo contains ({stem: false}"c") +``` + +The "stem" annotation controls whether a given term should be stemmed if its field is configured as a stemmed field (default is "true"). The "AND" operator itself has no internal API for whether its operands should be stemmed or not, but we can still annotate as such, because when the value of a given annotation is determined, the expression tree is followed from the term in question and up through its ancestors. Traversing the tree stops when a value is found (or there is nothing more to traverse). In other words, none of the terms in this example will be stemmed. + +How annotations behave may be easier to understand of expressing a boolean query in the style of an S-expression: + +```sql +(AND term1 term2 (OR term3 term4) (OR term5 (AND term6 term7))) +``` + +The annotation scopes would then be as follows, i.e. annotations on which elements will be checked when determining the settings for a given term: + +| | | +| :--- | :--- | +| term1 | term1 itself, and the first AND | +| term2 | term2 itself, and the first AND | +| term3 | term3 itself, the first OR and the first AND | +| term4 | term4 itself, the first OR and the first AND | +| term5 | term5 itself, the second OR and the first AND | +| term6 | term6 itself, the second AND, the second OR and the first AND | +| term7 | term7 itself, the second AND, the second OR and the first AND | + +## Query properties + +Use YQL variable syntax to initialize words in phrases and as single terms. This removes the need for caring about quoting a term in YQL, as well as URL quoting. The term will be used _exactly_ as it is in the URL. As an example, look at a query with a YQL argument, and the properties _animal_ and _syntaxExample_: + +```bash +yql=select * from sources * where foo contains @animal and foo contains phrase(@animal, @syntaxExample, @animal)&animal=panda&syntaxExample=syntactic +``` + +This YQL expression will then access the query properties _animal_ and _syntaxExample_ and evaluate to: + +```sql +select * from sources * where (foo contains "panda" AND foo contains phrase("panda", "syntactic", "panda")) +``` + +## YQL in query profiles + +YQL requires quoting to be included in a URL. Since YQL is well suited to application logic, while not being intended for end users, a solution to this is storing the application's YQL queries into different [query profiles](/en/querying/query-profiles). To add a default query profile, add _search/query-profiles/default.xml_ to the [application package](/en/reference/applications/application-packages): + +```xml +<query-profile id="default"> + <field name="yql">select * from sources * where default contains "latest" or userQuery()</field> +</query-profile> +``` + +This will add _latest_ as an _OR term_ to all queries not having an explicit query profile parameter. The important thing to note is how it is not necessary to URL-quote anything in the query profiles files. They operate independently of the HTTP parsing as such. + +## Query rewriting in Searchers + +Searchers which modifies the textual YQL statement (not recommended) should be annotated with `@Before("ExternalYql")`. Searchers modifying query tree produced from an input YQL statement should annotate with `@After("ExternalYql")`. + +## Grouping + +Group / aggregate results by adding a grouping expression after a `|` - [read more](/en/querying/grouping). + +```sql +select * from sources * where sddocname contains 'purchase' | all(group(customer) each(output(sum(price)))) +``` diff --git a/mintlify-docs/en/reference/rag/chunking.mdx b/mintlify-docs/en/reference/rag/chunking.mdx new file mode 100644 index 0000000000..03cfdb22c8 --- /dev/null +++ b/mintlify-docs/en/reference/rag/chunking.mdx @@ -0,0 +1,39 @@ +--- +title: "Chunking Reference" +sidebarTitle: "Chunking" +--- + +Reference configuration for *chunkers*: Components that splits text into pieces in [chunk indexing expressions](/en/reference/writing/indexing-language#chunk), as in + +```bash +indexing: input myTextField | chunk fixed-length 500 | index +``` + +See also the [guide to working with chunks](/en/rag/working-with-chunks). + +## Built-in chunkers + +Vespa provides these built-in chunkers: + +| Chunker id | Arguments | Description | +| --- | --- | --- | +| sentence | \- | Splits the text into chunks at sentence boundaries. | +| fixed-length | target chunk length in characters | Splits the text into chunks with roughly equal length. This will prefer to make chunks of similar length, and to split at reasonable locations over matching the target length exactly. | + +## Chunker components + +Chunkers are [components](/en/applications/components), so you can also add your own: + +```xml +<container version="1.0"> + <component id="myChunker" + class="com.example.MyChunker" + bundle="the name in artifactId in pom.xml"> + <config name='com.example.my-chunker'> + <myValue>foo</myValue> + </config> + </component> +</container> +``` + +You create a chunker component by implementing the [com.yahoo.language.process.Chunker](https://github.com/vespa-engine/vespa/blob/master/linguistics/src/main/java/com/yahoo/language/process/Chunker.java) interface, see [these examples](https://github.com/vespa-engine/vespa/tree/master/linguistics/src/main/java/ai/vespa/language/chunker). \ No newline at end of file diff --git a/mintlify-docs/en/reference/rag/embedding.mdx b/mintlify-docs/en/reference/rag/embedding.mdx new file mode 100644 index 0000000000..0d92b79a15 --- /dev/null +++ b/mintlify-docs/en/reference/rag/embedding.mdx @@ -0,0 +1,367 @@ +--- +title: "Embedding Reference" +sidebarTitle: "Embedding" +--- +Reference configuration for [embedders](/en/rag/embedding). + +## Model config reference + +Embedder models use the [model](/en/reference/applications/config-files#model) type configuration which accepts the attributes `model-id`, `url` or `path`. Multiple of these can be specified as a single config value, where one is used depending on the deployment environment: + +- If a `model-id` is specified and the application is deployed on Vespa Cloud, the `model-id` is used. +- Otherwise, if a `url` is specified, it is used +- Otherwise, `path` is used. + +When using `path`, the model files must be supplied in the application package. + +## Huggingface Embedder + +An embedder using any [Huggingface tokenizer](https://huggingface.co/docs/tokenizers/index), including multilingual tokenizers, to produce tokens which is then input to a supplied transformer model in ONNX model format. + +The Huggingface embedder is configured in [services.xml](/en/reference/applications/services/services), within the `container` tag: + +```xml +<container id="default" version="1.0"> + <component id="hf-embedder" type="hugging-face-embedder"> + <transformer-model path="my-models/model.onnx"/> + <tokenizer-model path="my-models/tokenizer.json"/> + <prepend> + <query>query:</query> + <document>passage:</document> + </prepend> + </component> + ... +</container> +``` + +### Private Model Hub + +You may also use models hosted in a [private Huggingface model hub](https://huggingface.co/docs/hub/en/repositories-settings#private-repositories). + +Retrieve an API key from Huggingface with the appropriate permissions, and add it to the [vespa secret store.](/en/security/secret-store) Add the secret to the container `<secrets>` and refer to it in your Huggingface model configuration: + +```xml +<container id="default" version="1.0"> + <secrets> + <myPrivateHubApiKey vault="my-vault" name="my-secret-name" /> + </secrets> + <component id="hf-embedder" type="hugging-face-embedder"> + <transformer-model url="my-url" secret-ref="myPrivateHubApiKey"/> + <tokenizer-model url="my-url" secret-ref="myPrivateHubApiKey"/> + </component> +</container> +``` + +### Huggingface embedder reference config + +In addition to [embedder ONNX parameters](#embedder-onnx-reference-config): + +| Name | Occurrence | Description | Type | Default | +| --- | --- | --- | --- | --- | +| transformer-model | One | Use to point to the transformer ONNX model file | [model-type](#model-config-reference) | N/A | +| tokenizer-model | One | Use to point to the `tokenizer.json` Huggingface tokenizer configuration file | [model-type](#model-config-reference) | N/A | +| max-tokens | One | The maximum number of tokens accepted by the transformer model | numeric | 512 | +| transformer-input-ids | One | The name or identifier for the transformer input IDs | string | input\_ids | +| transformer-attention-mask | One | The name or identifier for the transformer attention mask | string | attention\_mask | +| transformer-token-type-ids | One | The name or identifier for the transformer token type IDs. If the model does not use `token_type_ids` use `<transformer-token-type-ids/>` | string | token\_type\_ids | +| transformer-output | One | The name or identifier for the transformer output | string | last\_hidden\_state | +| pooling-strategy | One | How the output vectors of the ONNX model is pooled to obtain a single vector representation. Valid values are `mean`,`cls` and `none` | string | mean | +| normalize | One | A boolean indicating whether to normalize the output embedding vector to unit length (length 1). Useful for `prenormalized-angular` [distance-metric](/en/reference/schemas/schemas#distance-metric) | boolean | false | +| prepend | Optional | Prepend instructions that are prepended to the text input before tokenization and inference. Useful for models that have been trained with specific prompt instructions. The instructions are prepended to the input text.<br/><br/> • Element `<query>` - Optional query prepend instruction.<br/> • Element `<document>` - Optional document prepend instruction. <br/><br/>`<prepend>`<br/> `<query>query:</query>`<br/> `<document>passage:</document>` <br/>`</prepend>` | Optional `<query> <document>` elements. | | + +## Bert embedder + +The Bert embedder is configured in [services.xml](/en/reference/applications/services/services), within the `container` tag: + +```xml +<container version="1.0"> + <component id="myBert" type="bert-embedder"> + <transformer-model path="models/e5-small-v2.onnx"/> + <tokenizer-vocab url="https://huggingface.co/intfloat/e5-small-v2/raw/main/vocab.txt"/> + </component> +</container> +``` + +### Bert embedder reference config + +In addition to [embedder ONNX parameters](#embedder-onnx-reference-config): + +| Name | Occurrence | Description | Type | Default | +| --- | --- | --- | --- | --- | +| transformer-model | One | Use to point to the transformer ONNX model file | [model-type](#model-config-reference) | N/A | +| tokenizer-vocab | One | Use to point to the Huggingface `vocab.txt` tokenizer file with valid wordpiece tokens. Does not support `tokenizer.json` format. | [model-type](#model-config-reference) | N/A | +| max-tokens | One | The maximum number of tokens allowed in the input | integer | 384 | +| transformer-input-ids | One | The name or identifier for the transformer input IDs | string | input\_ids | +| transformer-attention-mask | One | The name or identifier for the transformer attention mask | string | attention\_mask | +| transformer-token-type-ids | One | The name or identifier for the transformer token type IDs. If the model does not use `token_type_ids` use `<transformer-token-type-ids/>` | string | token\_type\_ids | +| transformer-output | One | The name or identifier for the transformer output | string | output\_0 | +| transformer-start-sequence-token | One | The start of sequence token | numeric | 101 | +| transformer-end-sequence-token | One | The start of sequence token | numeric | 102 | +| pooling-strategy | One | How the output vectors of the ONNX model is pooled to obtain a single vector representation. Valid values are `mean` and `cls` | string | mean | + +## colbert embedder + +The colbert embedder is configured in [services.xml](/en/reference/applications/services/services), within the `container` tag: + +```xml +<container version="1.0"> + <component id="colbert" type="colbert-embedder"> + <transformer-model path="models/colbertv2.onnx"/> + <tokenizer-model url="https://huggingface.co/colbert-ir/colbertv2.0/raw/main/tokenizer.json"/> + <max-query-tokens>32</max-query-tokens> + <max-document-tokens>256</max-document-tokens> + </component> +</container> +``` + +The Vespa colbert implementation works with default configurations for transformer models that use WordPiece tokenization. + +### colbert embedder reference config + +In addition to [embedder ONNX parameters](#embedder-onnx-reference-config): + +| Name | Occurrence | Description | Type | Default | +| --- | --- | --- | --- | --- | +| transformer-model | One | Use to point to the transformer ColBERT ONNX model file | [model-type](#model-config-reference) | N/A | +| tokenizer-model | One | Use to point to the `tokenizer.json` Huggingface tokenizer configuration file | [model-type](#model-config-reference) | N/A | +| max-tokens | One | Max length of token sequence the transformer-model can handle | numeric | 512 | +| max-query-tokens | One | The maximum number of ColBERT query token embeddings. Queries are padded to this length. Must be lower than max-tokens | numeric | 32 | +| max-document-tokens | One | The maximum number of ColBERT document token embeddings. Documents are not padded. Must be lower than max-tokens | numeric | 512 | +| transformer-input-ids | One | The name or identifier for the transformer input IDs | string | input\_ids | +| transformer-attention-mask | One | The name or identifier for the transformer attention mask | string | attention\_mask | +| transformer-mask-token | One | The mask token id used for ColBERT query padding | numeric | 103 | +| transformer-start-sequence-token | One | The start of sequence token id | numeric | 101 | +| transformer-end-sequence-token | One | The end of sequence token id | numeric | 102 | +| transformer-pad-token | One | The pad sequence token id | numeric | 0 | +| query-token-id | One | The colbert query token marker id | numeric | 1 | +| document-token-id | One | The colbert document token marker id | numeric | 2 | +| transformer-output | One | The name or identifier for the transformer output | string | contextual | + +The Vespa colbert-embedder uses `[unused0]`token id 1 for `query-token-id`, and `[unused1]`, token id 2 for ` document-token-id`document marker. Document punctuation chars are filtered (not configurable). The following characters are removed ``!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~``. + +### splade embedder reference config + +In addition to [embedder ONNX parameters](#embedder-onnx-reference-config): + +| Name | Occurrence | Description | Type | Default | +| --- | --- | --- | --- | --- | +| transformer-model | One | Use to point to the transformer ONNX model file | [model-type](#model-config-reference) | N/A | +| tokenizer-model | One | Use to point to the `tokenizer.json` Huggingface tokenizer configuration file | [model-type](#model-config-reference) | N/A | +| term-score-threshold | One | An optional threshold to increase sparseness, tokens/terms with a score lower than this is not retained. | numeric | N/A | +| max-tokens | One | The maximum number of tokens accepted by the transformer model | numeric | 512 | +| transformer-input-ids | One | The name or identifier for the transformer input IDs | string | input\_ids | +| transformer-attention-mask | One | The name or identifier for the transformer attention mask | string | attention\_mask | +| transformer-token-type-ids | One | The name or identifier for the transformer token type IDs. If the model does not use `token_type_ids` use `<transformer-token-type-ids/>` | string | token\_type\_ids | +| transformer-output | One | The name or identifier for the transformer output | string | logits | + +<Tooltip tip="This content is applicable to Vespa Cloud deployments.">`Vespa Cloud`</Tooltip> + +## VoyageAI Embedder + +An embedder that uses the [VoyageAI](https://www.voyageai.com/) API to generate embeddings. + +The VoyageAI embedder is configured in [services.xml](/en/reference/applications/services/services), within the `container` tag: + +```xml +<container id="default" version="1.0"> + <component id="voyage" type="voyage-ai-embedder"> + <model>voyage-law-2</model> + <api-key-secret-ref>voyage_api_key</api-key-secret-ref> + <dimensions>1024</dimensions> + <endpoint>https://api.voyageai.com/v1/embeddings</endpoint> + <truncate>true</truncate> + <batching max-size="16" max-delay="200ms"/> + </component> +</container> +``` + +### VoyageAI embedder reference config + +| Name | Occurrence | Description | Type | Default | +| --- | --- | --- | --- | --- | +| model | One | **Required**. The VoyageAI model to use. See the [VoyageAI embeddings documentation](https://docs.voyageai.com/docs/embeddings) for the complete list of available models including general-purpose, specialized, [contextualized](https://docs.voyageai.com/docs/contextualized-chunk-embeddings), and [multimodal](https://docs.voyageai.com/docs/multimodal-embeddings) models. | string | N/A | +| dimensions | One | **Required**. The number of dimensions for the output embedding vectors. Must match the tensor field definition in your schema. Valid values are `256`, `512`, `1024`, `1536`, or `2048`. See the [VoyageAI embeddings documentation](https://docs.voyageai.com/docs/embeddings) for model-specific dimension support. | integer | N/A | +| api-key-secret-ref | One | **Required**. Reference to the secret in Vespa's [secret store](/en/security/secret-store) containing the VoyageAI API key. | string | N/A | +| endpoint | Optional | VoyageAI API endpoint URL. | string | https://api.voyageai.com/v1/embeddings | +| truncate | Optional | Whether to truncate input text exceeding model limits. When enabled, text is automatically truncated. When disabled, requests with too-long text will fail. | boolean | true | +| quantization | Optional | Output quantization format for embedding vectors. Valid values are `auto`, `float`, `int8`, or `binary`. When set to `auto`, the embedder infers the appropriate quantization from the dimensions and cell type of the destination tensor in your schema. The `float` value also applies to `bfloat16` destination tensors. When using `binary` quantization, the destination tensor field must use `int8` cell type with 1/8 of the dimensions specified in the embedder configuration (e.g., 1024 dimensions → `tensor<int8>(x[128])`). See the [VoyageAI quantization documentation](https://docs.voyageai.com/docs/flexible-dimensions-and-quantization#quantization) for details on quantization options and [binarizing vectors](/en/rag/binarizing-vectors) for more on binary quantization in Vespa. | string | auto | +| batching | Optional | Enables dynamic batching of concurrent embedding requests into single VoyageAI API calls. When enabled, the embedder collects concurrent requests and sends them as a single batch, reducing the number of API calls and improving throughput. <br/><br/>• `max-size` — Maximum number of requests to include in a single batch. <br/>• `max-delay` — Maximum time to wait for a full batch before sending a partial one (e.g., `200ms`). | element | disabled | + +## OpenAI Embedder + +Available since `Vespa 8.678` + +An embedder that uses the [OpenAI](https://platform.openai.com/docs/guides/embeddings) embeddings API to generate embeddings. + +The OpenAI embedder is configured in [services.xml](/en/reference/applications/services/services), within the `container` tag: + +```xml +<container id="default" version="1.0"> + <component id="openai" type="openai-embedder"> + <model>text-embedding-3-small</model> + <api-key-secret-ref>openai_api_key</api-key-secret-ref> + <dimensions>1536</dimensions> + <endpoint>https://api.openai.com/v1/embeddings</endpoint> + </component> +</container> +``` + +### OpenAI embedder reference config + +| Name | Occurrence | Description | Type | Default | +| --- | --- | --- | --- | --- | +| model | One | **Required**. The OpenAI model to use, for example `text-embedding-3-small` or `text-embedding-3-large`. See the [OpenAI embeddings documentation](https://platform.openai.com/docs/guides/embeddings) for the complete list of available models. | string | N/A | +| dimensions | One | **Required**. The number of dimensions for the output embedding vectors. Must match the tensor field definition in your schema. The destination tensor field must use `float` or `bfloat16` cell type — the OpenAI API does not support quantization. | integer | N/A | +| api-key-secret-ref | Optional | Reference to the secret in Vespa's [secret store](/en/security/secret-store) containing the OpenAI API key. When unset, requests are sent without an `Authorization` header. | string | "" (no auth) | +| endpoint | Optional | OpenAI API endpoint URL. Set this to target a specific OpenAI-compatible API. | string | https://api.openai.com/v1/embeddings | +| batching | Optional | Enables dynamic batching of concurrent embedding requests into single OpenAI API calls. When enabled, the embedder collects concurrent requests and sends them as a single batch, reducing the number of API calls and improving throughput.<br/><br/> • `max-size` — Maximum number of requests to include in a single batch. <br/>• `max-delay` — Maximum time to wait for a full batch before sending a partial one (e.g., `200ms`). | element | disabled | +| prepend | Optional | Strings prepended to the text input before sending the embedding request. Useful for OpenAI-compatible instruction-tuned models that expect a task-specific prefix.<br/><br/> • Element `<query>` - Optional query prepend instruction. <br/>• Element `<document>` - Optional document prepend instruction. <br/><br/>`<prepend>`<br/> `<query>query: </query>`<br/> `<document>passage: </document>`<br/> `</prepend>` | Optional `<query> <document>` elements. | | + +## Mistral Embedder + +Available since `Vespa 8.678` + +An embedder that uses the [Mistral](https://docs.mistral.ai/capabilities/embeddings/overview/) embeddings API to generate embeddings. + +The Mistral embedder is configured in [services.xml](/en/reference/applications/services/services), within the `container` tag: + +```xml +<container id="default" version="1.0"> + <component id="mistral" type="mistral-embedder"> + <model>mistral-embed</model> + <api-key-secret-ref>mistral_api_key</api-key-secret-ref> + <dimensions>1024</dimensions> + <quantization>auto</quantization> + </component> +</container> +``` + +### Mistral embedder reference config + +| Name | Occurrence | Description | Type | Default | +| --- | --- | --- | --- | --- | +| model | One | **Required**. The Mistral model to use, for example `mistral-embed` or `codestral-embed`. See the [Mistral embeddings documentation](https://docs.mistral.ai/capabilities/embeddings/overview/) for the complete list of available models. | string | N/A | +| api-key-secret-ref | One | **Required**. Reference to the secret in Vespa's [secret store](/en/security/secret-store) containing the Mistral API key. | string | N/A | +| dimensions | One | **Required**. The number of dimensions for the output embedding vectors. Must match the tensor field definition in your schema. See the [Mistral embeddings documentation](https://docs.mistral.ai/capabilities/embeddings/overview/) for model-specific dimension support. | integer | N/A | +| quantization | Optional | Output quantization format for embedding vectors. Valid values are `auto`, `float`, `int8`, or `binary`. See the `quantization` row of the [VoyageAI embedder reference config](#voyageai-embedder-reference-config) for details on `auto` resolution and the destination tensor layout required for `int8` and `binary`. Note that not all Mistral models support `int8` and `binary` quantization — see the [Mistral embeddings documentation](https://docs.mistral.ai/capabilities/embeddings/overview/) for per-model support. | string | auto | +| batching | Optional | Enables dynamic batching of concurrent embedding requests into single Mistral API calls. When enabled, the embedder collects concurrent requests and sends them as a single batch, reducing the number of API calls and improving throughput.<br/><br/>• `max-size` — Maximum number of requests to include in a single batch. <br/>• `max-delay` — Maximum time to wait for a full batch before sending a partial one (e.g., `200ms`). | element | disabled | + +## Huggingface tokenizer embedder + +The Huggingface tokenizer embedder is configured in [services.xml](/en/reference/applications/services/services), within the `container` tag: + +```xml +<container version="1.0"> + <component id="tokenizer" type="hugging-face-tokenizer"> + <model url="https://huggingface.co/bert-base-uncased/raw/main/tokenizer.json"/> + </component> + </container> +``` + +### Huggingface tokenizer reference config + +| Name | Occurrence | Description | Type | Default | +| --- | --- | --- | --- | --- | +| model | One To Many | Use to point to the `tokenizer.json` Huggingface tokenizer configuration file. Also supports `language`, which is only relevant if one wants to tokenize differently based on the document language. Use "unknown" for a model to be used for any language (i.e. by default). | [model-type](#model-config-reference) | N/A | + +## Embedder ONNX reference config + +Vespa uses [ONNX Runtime](https://onnxruntime.ai/) to accelerate inference of embedding models. These parameters are valid for both [Bert embedder](#bert-embedder) and [Huggingface embedder](#huggingface-embedder). + +| Name | Occurrence | Description | Type | Default | +| --- | --- | --- | --- | --- | +| onnx-execution-mode | One | Low level ONNX execution model. Valid values are `parallel` or `sequential`. Only relevant for inference on CPU. See [ONNX runtime documentation](https://onnxruntime.ai/docs/performance/tune-performance/threading.html) on threading. | string | sequential | +| onnx-interop-threads | One | Low level ONNX setting.Only relevant for inference on CPU. | numeric | 1 | +| onnx-intraop-threads | One | Low level ONNX setting. Only relevant for inference on CPU. | numeric | 4 | +| onnx-gpu-device | One | The GPU device to run the model on. See [configuring GPU for Vespa container image](/en/operations/self-managed/vespa-gpu-container). Use `-1` to not use GPU for the model, even if the instance has available GPUs. | numeric | 0 | + +## SentencePiece embedder + +A native Java implementation of [SentencePiece](https://github.com/google/sentencepiece). SentencePiece breaks text into chunks independent of spaces, which is robust to misspellings and works with CJK languages. Prefer the [Huggingface tokenizer embedder](#huggingface-tokenizer-embedder) over this for better compatibility with Huggingface models. + +This is suitable to use in conjunction with [custom components](/en/applications/components), or the resulting tensor can be used in [ranking](/en/basics/ranking). + +To use the [SentencePiece embedder](https://github.com/vespa-engine/vespa/blob/master/linguistics-components/src/main/java/com/yahoo/language/sentencepiece/SentencePieceEmbedder.java), add it to [services.xml](/en/reference/applications/services/services): + +```xml +<container version="1.0"> + <component id="mySentencePiece" + class="com.yahoo.language.sentencepiece.SentencePieceEmbedder" + bundle="linguistics-components"> + <config name="language.sentencepiece.sentence-piece">; + <model> + <item> + <language>unknown</language> + <path>model/en.wiki.bpe.vs10000.model</path> + </item> + </model> + </config> + </component> + </container> +``` + +See the options available for configuring SentencePiece in [the full configuration definition](https://github.com/vespa-engine/vespa/blob/master/linguistics-components/src/main/resources/configdefinitions/language.sentencepiece.sentence-piece.def). + +## WordPiece embedder + +A native Java implementation of [WordPiece](https://github.com/google-research/bert#tokenization), which is commonly used with BERT models. Prefer the [Huggingface tokenizer embedder](#huggingface-tokenizer-embedder) over this for better compatibility with Huggingface models. + +This is suitable to use in conjunction with [custom components](/en/applications/components), or the resulting tensor can be used in [ranking](/en/basics/ranking). + +To use the [WordPiece embedder](https://github.com/vespa-engine/vespa/blob/master/linguistics-components/src/main/java/com/yahoo/language/wordpiece/WordPieceEmbedder.java), add it to [services.xml](/en/reference/applications/services/services) within the `container` tag: + +```xml +<container version="1.0"> + <component id="myWordPiece"> + class="com.yahoo.language.wordpiece.WordPieceEmbedder" + bundle="linguistics-components"> + <config name="language.wordpiece.word-piece"> + <model> + <item> + <language>unknown</language> + <path>models/bert-base-uncased-vocab.txt</path> + </item> + </model> + </config> + </component> + </container> +``` + +See the options available for configuring WordPiece in [the full configuration definition](https://github.com/vespa-engine/vespa/blob/master/linguistics-components/src/main/resources/configdefinitions/language.wordpiece.word-piece.def). + +WordPiece is suitable to use in conjunction with custom components, or the resulting tensor can be used in [ranking](/en/basics/ranking). + +## Using an embedder from Java + +When writing custom Java components (such as [Searchers](/en/applications/searchers) or [Document processors](/en/applications/document-processors#document-processors)), use embedders you have configured by [having them injected in the constructor](/en/applications/dependency-injection), just as any other component: + +```java +class MyComponent { + @Inject + public MyComponent(ComponentRegistry<Embedder> embedders) { + // embedders contains all the embedders configured in your services.xml + } +} +``` + +See a concrete example of using an embedder in a custom searcher in [LLMSearcher](https://github.com/vespa-cloud/vespa-documentation-search/blob/main/src/main/java/ai/vespa/cloud/docsearch/LLMSearcher.java). + +## Custom Embedders + +Vespa provides a Java interface for defining components which can provide embeddings of text: [com.yahoo.language.process.Embedder](https://github.com/vespa-engine/vespa/blob/master/linguistics/src/main/java/com/yahoo/language/process/Embedder.java). + +To define a custom embedder in an application and make it usable by Vespa (see [embedding a query text](/en/rag/embedding#embedding-a-query-text)), implement this interface and add it as a [component](/en/applications/developer-guide#developing-components) to [services.xml](/en/reference/applications/services/container): + +```xml +<container version="1.0"> + <component id="myEmbedder" + class="com.example.MyEmbedder" + bundle="the name in artifactId in pom.xml"> + <config name='com.example.my-embedder'> + <model model-id="minilm-l6-v2"/> + <vocab path="files/vocab.txt"/> + <myValue>foo</myValue> + </config> + </component> +</container> +``` \ No newline at end of file diff --git a/mintlify-docs/en/reference/ranking/constant-tensor-json-format.mdx b/mintlify-docs/en/reference/ranking/constant-tensor-json-format.mdx new file mode 100644 index 0000000000..6863593f6c --- /dev/null +++ b/mintlify-docs/en/reference/ranking/constant-tensor-json-format.mdx @@ -0,0 +1,147 @@ +--- +title: "Constant Tensor JSON Format" +sidebarTitle: "constant tensor" +--- + +This document describes with examples the JSON formats accepted when reading tensor constants from a file. For convenience, compactness, and readability there are various formats that can be used depending on the detailed tensor type: + +- [Dense tensors](#dense-tensors): indexed dimensions only +- [Sparse tensors](#sparse-tensors): mapped dimensions only +- [Mixed tensors](#mixed-tensors): both indexed and mapped dimensions + +## Canonical type + +A tensor type can be declared with its dimension in any order, but internally they will always be sorted in alphabetical order. So the type "`tensor(category{}, brand{}, a[3], x[768], d0[1])`" has the canonical string representation "`tensor(a[3],brand{},category{},d0[1],x[768])`" and the "x" dimension with size 768 is the innermost. For constants, all indexed dimensions must have a known size. + +## Dense tensors + +Tensors using only indexed dimensions are used for storing a vector, a matrix, and so on and are collectively known as "dense" tensors. These are particularly easy to handle, as they always have a known number of cells in a well-defined order. They can be input as nested arrays of numerical values. Example with vector of size 5: + +```json +{ + "type": "tensor(x[5])", + "values": [13.25, -22, 0.4242, 0, -17.0] + } +``` + +The "type" field is optional, but must match the [canonical form of the tensor type](#canonical-type) if present. This format is similar to "Indexed tensors short form" in the [document JSON format](/en/reference/schemas/document-json-format#tensor-short-form-indexed). + +Example of a 3x4 matrix; note that the dimension names will always be processed in [alphabetical order](#canonical-type) from outermost to innermost. + +```json +{ + "type": "tensor(bar[3],foo[4])", + "values": [ + [2.5, 1.0, 2.0, 3.0], + [1.0, 2.0, 3.0, 2.0], + [2.0, 3.0, 2.0, 1.5] + ] + } +``` + +Note that the arrays must have exactly the declared number of elements for each dimension, and be correctly nested. + +Example of an ONNX model input where we have an extra "batch" dimension which is unused (size 1) for this particular input, but still requires extra brackets: + +```json +{ + "type": "tensor(d0[1],d1[5],d2[2])", + "values": [ [ + [1.1, 1.2], + [2.1, 2.2], + [3.1, 3.2], + [4.1, 4.2], + [5.1, 5.2] + ] ] + } +``` + +## Sparse tensors + +Tensors using only mapped dimensions are collectively known as "sparse" tensors. JSON input for these will list the cells directly. Tensors with only one mapped dimension can use as simple JSON object as input: + +```json +{ + "type": "tensor(category{})", + "cells": { + "tag": 2.5, + "another": 2.75 + } + } +``` + +The "type" field is optional. This format is similar to "Short form for tensors with a single mapped dimension" in the [document JSON format](/en/reference/schemas/document-json-format#tensor-short-form-mapped). + +Tensors with multiple mapped dimensions must use an array of objects, where each object has an "address" containing the labels for all dimensions, and a "value" with the cell value: + +```json +{ + "type": "tensor(category{},product{})", + "cells": [ + { + "address": { "category": "foo", "product": "bar" }, + "value": 1.5 + }, + { + "address": { "category": "qux", "product": "zap" }, + "value": 3.5 + }, + { + "address": { "category": "pop", "product": "rip" }, + "value": 6.5 + } + ] + } +``` + +Again, the "type" field is optional, but must match the [canonical form of the tensor type](#canonical-type) if present. + +This format is also known as the [general verbose form](/en/reference/schemas/document-json-format#tensor), and it's possible to use it for any tensor type. + +## Mixed tensors + +Tensors with both mapped and indexed dimensions can use a "blocks" format; this is similar to the "cells" formats for sparse tensors, but instead of a single cell value you get a block of values for each address. With one mapped dimension and two indexed dimensions: + +```json +{ + "type": "tensor(a{},x[3],y[4])", + "blocks": { + "bar": [ + [1.0, 2.0, 0.0, 3.0], + [2.0, 2.5, 2.0, 0.5], + [3.0, 6.0, 9.0, 9.0] + ], + "foo": [ + [1.0, 0.0, 2.0, 3.0], + [2.0, 2.5, 2.0, 0.5], + [3.0, 3.0, 6.0, 9.0] + ] + } + } +``` + +The "type" field is optional, but must match the [canonical form of the tensor type](#canonical-type) if present. This format is similar to the first variant of "Mixed tensors short form" in the [document JSON format](/en/reference/schemas/document-json-format#tensor-short-form-mixed). + +With two mapped dimensions and one indexed dimensions: + +```json +{ + "type": "tensor(a{},b{},x[3])", + "blocks": [ + { + "address": { "a": "qux", "b": "zap" }, + "values": [2.5, 3.5, 4.5] + }, + { + "address": { "a": "foo", "b": "bar" }, + "values": [1.5, 2.5, 3.5] + }, + { + "address": { "a": "pop", "b": "rip" }, + "values": [3.5, 4.5, 5.5] + } + ] + } +``` + +Again, the "type" field is optional. This format is similar to the second variant of "Mixed tensors short form" in the [document JSON format](/en/reference/schemas/document-json-format#tensor-short-form-mixed). \ No newline at end of file diff --git a/mintlify-docs/en/reference/ranking/model-files.mdx b/mintlify-docs/en/reference/ranking/model-files.mdx new file mode 100644 index 0000000000..c31e3bbc6a --- /dev/null +++ b/mintlify-docs/en/reference/ranking/model-files.mdx @@ -0,0 +1,93 @@ +--- +title: "Stateless model reference" +sidebarTitle: "model files" +--- + +_.model_ files are used in [stateless model evaluation](/en/ranking/stateless-model-evaluation). These are files with [ranking expressions](/en/basics/ranking), located in [models](/en/reference/applications/application-packages) / a subdirectory of _models_, with _.model_ suffix: + +``` +├── models +│   └── my_model.model +└── services.xml +``` + +## .model file format specification + +<Card> +model [name] \{ + + inputs \{ + ([input-name] [[input-type](/en/reference/ranking/tensor#tensor-type-spec)])* + \} + + constants \{[constant](#constant)* + \} + + (function [name](\[argument-name\]*) \{ + expression: [[ranking expression](/en/reference/ranking/ranking-expressions)] + \})* + +\} +</Card> + +The elements can appear in any order (and number). + +### Constant element + +<Card> +[constant-name] [type]?: [scalar, [tensor on literal form](/en/reference/ranking/tensor#tensor-literal-form), or `file:` followed by a file reference] +</Card> + +| Name | Description | +| :--- | :--- | +| name | The name of the constant, written either the full feature name `constant(myName)`, or just as `name`. | +| type | The type of the constant, either `double` or a [tensor type](/en/reference/ranking/tensor#tensor-type-spec). If omitted, the type is double. | +| value | A number, a [tensor on literal form](/en/reference/ranking/tensor#tensor-literal-form), or `file:` followed by a path relative to the model file to a file containing the constant. The file must be stored on the [tensor JSON Format](/en/reference/schemas/schemas#tensor) and end with `.json`. The file may be lz4 compressed, in which case the ending must be `.json.lz4`. | + +Constant examples: + +```js +constants { + myDouble: 0.5 + constant(myOtherDouble) double: 0.6 + constant(myArray) tensor(x[3]):[1, 2, 3] + constant(myMap) tensor(key{}):\{key1: 1.0, key2: 2.0} + constant(myLargeTensor) tensor(x[10000]): file:constants/myTensor.json.lz4 +} +``` + +## Model example + +This file must be saved as `example.model` somewhere in the [models](/en/reference/applications/application-packages) directory tree, and the same directory must also contain `myLargeConstant.json.lz4` with a tensor as compressed json. + +```js expandable +model example { + + # All inputs that are not scalar (aka 0-dimensional tensor) must be declared + inputs { + + input1 tensor(name{},x[3]) + input2 tensor(x[3]) + } + + constants { + + constant(constant1) tensor(x[3]):{{x:0}:0.5, {x:1}:1.5, {x:2}:2.5} + constant(constant2): 3.0 + constant(myLargeConstant) tensor(x[10000]): file:myLargeConstant.json.lz4 + } + + function foo1() { + + expression: reduce(sum(input1 * input2, name) * constant1, max, x) * constant2 + } + + function foo2() { + + expression: reduce(sum(input1 * input2, name) * constant(constant1asLarge), max, x) * constant2 + } + +} +``` + +This makes the model _example_ available with the functions _foo1_ and _foo2_. \ No newline at end of file diff --git a/mintlify-docs/en/reference/ranking/nativerank.mdx b/mintlify-docs/en/reference/ranking/nativerank.mdx new file mode 100644 index 0000000000..175f01a511 --- /dev/null +++ b/mintlify-docs/en/reference/ranking/nativerank.mdx @@ -0,0 +1,257 @@ +--- +title: "nativeRank Reference" +sidebarTitle: "nativeRank" +--- + +The _nativeRank_ feature produces a reasonable text ranking score which is computed at an acceptable performance, and is a good candidate for [first phase](/en/ranking/phased-ranking) ranking. The _nativeRank_ feature is a linear combination of the normalized scores computed by the features _nativeFieldMatch_, _nativeProximity_, and _nativeAttributeMatch_. All these features are described in detail below. See the [configuration properties](#configuration-properties) section for how to configure the features. + +## nativeFieldMatch + +The _nativeFieldMatch_ feature captures how well query terms match searched index fields by looking at the number of times a term occurs in a field and how early in the field it occurs. The significance and weight of the terms are also taken into account such that unusual terms give a higher rank contribution than common ones. + +The score for _nativeFieldMatch_ is calculated as follows: + +$$ +\text{nativeFieldMatch} = +\frac{ +\left( +\sum_{i=1}^{n} +\text{termSignificance}_i \times \text{termWeight}_i +\right) +\left( +\sum_{j=1}^{m} +\text{fieldWeight}_j +\left( +\text{firstOccImp}_j \times \text{firstOccBoost}_{ij} ++ +(1 - \text{firstOccImp}_j) \times \text{numOccBoost}_{ij} +\right) +\right) +}{ +\left( +\sum_{i=1}^{n} +\text{termSignificance}_i \times \text{termWeight}_i +\right) +\left( +\sum_{j=1}^{m} +\text{fieldWeight}_j \times \text{fmMaxTable}_j +\right) +} +$$ + +where _n_ is the number of query terms searched in index fields, _m_ is the number of fields searched by query term _i_, _firstOccImpj_ is the _firstOccurrenceImportance_ for field _j_, and _firstOccBoostij_, _numOccBoostij_ and _fmMaxTablej_ are given below. + +$$ +\text{firstOccBoost}_{ij} = \text{firstOccurrenceTable}_j\left[\frac{\text{firstOcc}_{ij} \times \text{tableSize}_j}{\max(6, \text{fieldLength}_j)}\right] +$$ + +where _firstOccurrenceTablej_ is the boost table configured for field _j_, typically an expdecay function (see the [boost tables](#boost-tables) section below), _firstOccij_ is the first occurrence of query term _i_ in field _j_, and _tableSizej_ is the size of the boost table. + +$$ +\text{numOccBoost}_{ij} = \text{occurrenceCountTable}_j\left[\frac{\text{numOccs}_{ij} \times \text{tableSize}_j}{\max(6, \text{fieldLength}_j)}\right] +$$ + +where _occurrenceCountTablej_ is the boost table configured for field _j_, typically a loggrowth function (see the [boost tables](#boost-tables) section below), _numOccsij_ is the number of occurrences of query term _i_ in field _j_, and _tableSizej_ is the size of the boost table. + +$$ +\text{fmMaxTable}_j = \text{firstOccImp}_j \times \max(\text{firstOccurrenceTable}_j) + (1 - \text{firstOccImp}_j) \times \max(\text{occurrenceCountTable}_j) +$$ + +where \(\max(\text{boostTable}_j)\) is the max value in that table. _fmMaxTablej_ is 1 if table normalization is turned off (see the property _nativeRank.useTableNormalization_ in the [configuration properties](#configuration-properties) section). + +The default behavior for _nativeFieldMatch_ is to consider all query terms searching in all index fields when calculating the score. The calculation can be limited to a specified set of index fields as follows: + +`nativeFieldMatch(f1, f2)` + +In this case only query terms searching in index fields _f1_ and _f2_ are considered. + +## nativeProximity + +The _nativeProximity_ feature captures how near the matched query terms occur in searched index fields by looking at the word distance between query terms in query term pairs. Two query terms that are close to each other should give a higher score than two terms that are far from each other. + +The score for _nativeProximity_ is calculated as follows: + +$$ +\text{nativeProximity} = +\frac{ +\left( +\sum_{j=1}^{m} +\text{fieldWeight}_j +\right) +\left( +\sum_{ab} +\text{termPairWeight}_{ab} +\left( +\text{proxImp}_j \times \text{proxTable}_j[\text{dist}_{ab} - 1] ++ +(1 - \text{proxImp}_j) \times \text{revProxTable}_j[\text{dist}_{ba} - 1] +\right) +\right) +}{ +\left( +\sum_{j=1}^{m} +\text{fieldWeight}_j +\right) +\left( +\sum_{ab} +\text{termPairWeight}_{ab} \times \text{pMaxTable}_j +\right) +} +$$ + +where _m_ is the number of index fields, _ab_ is a term pair searched for in field _j_, _proxImpj_ is the _proximityImportance_ for field _j_, _proxTablej_ is the forward proximity boost table for field _j_, _distab_ is the minimum distance between occurrences of query terms _a_ and _b_ in field _j_, (_a_ occurs before _b_), _revProxTablej_ is the reverse proximity boost table for field _j_, _distba_ is the minimum distance between occurrences of query terms _b_ and _a_ in field _j_ (_b_ occurs before _a_), and _termPairWeightab_ and _pMaxTablej_ are given below. + +For each field _j_ we consider all query terms searched in this field and generate a set of term pairs. The _slidingWindowSize_ parameter determines how many pairs that are generated. With a sliding window of size 3 over the terms _a b c d_, we first consider the terms _a b c_, then the terms _b c d_, and finally the terms _c d_. The following pairs are generated: _ab_, _ac_, _bc_, _bd_, and _cd_. + +$$ +\text{termPairWeight}_{ab} = \text{connectedness}_{ab} \times (\text{termSignificance}_a \times \text{termWeight}_a + \text{termSignificance}_b \times \text{termWeight}_b) +$$ + +$$ +\text{connectedness}_{ac} = \frac{\min(\text{connectedness}_{ab}, \text{connectedness}_{bc})}{\text{dist}_{ac}} +$$ + +where _distac_ is the distance between term _a_ and _c_ in the query. + +$$ +\text{pMaxTable}_j = \text{proxImp}_j \times \max(\text{proxTable}_j) + (1 - \text{proxImp}_j) \times \max(\text{revProxTable}_j) +$$ + +where \(\max(\text{boostTable}_j)\) is the max value in that table. _pMaxTablej_ is 1 if table normalization is turned off (see the property _nativeRank.useTableNormalization_ in the [configuration properties](#configuration-properties) section). + +The default behavior for _nativeProximity_ is to consider all index fields and all query terms pairs searching in these fields when calculating the score. The calculation can be limited to a specified set of index fields as follows: + +`nativeProximity(f1, f2)` + +In this case only query term pairs searching in index fields _f1_ and _f2_ are considered. + +For multi-value fields, setting [element-gap](/en/reference/schemas/schemas#rank-element-gap) for the field in the rank profile enables distance calculation between adjacent elements. + +## nativeAttributeMatch + +The _nativeAttributeMatch_ feature captures how well query terms match searched attribute fields, and is calculated as follows: + +$$ +\text{nativeAttributeMatch} = +\frac{ +\left( +\sum_{i=1}^{n} +\text{termWeight}_i \times \text{attributeWeight}_j \times \text{sign}(w_{ij}) \times \text{weightTable}_j[abs(w_{ij})] +\right) +}{ +\left( +\sum_{i=1}^{n} +\text{termWeight}_i \times \text{attributeWeight}_j \times \max(\text{weightTable}_j) +\right) +} +$$ + +where _n_ is the number of query terms searched in attribute fields, _weightTablej_ is the boost table for attribute _j_, \(\max(\text{weightTable}_j)\) is the max value in that table (1 if table normalization is turned off), \(\text{sign}(w_{ij})\) is the sign of \(w_{ij}\). \(w_{ij}\) is dependent on the attribute type: + +- **Weighted set**: equals the weight associated with the key (represented by query term _i_) in attribute _j_. +- **Array**: equals the number of occurrences of query term _i_ in attribute _j_. +- **Single**: equals 1. + +The default behavior for _nativeAttributeMatch_ is to consider all query terms searching in all attribute fields when calculating the score. The calculation can be limited to a specified set of attribute fields as follows: + +`nativeAttributeMatch(a1, a2)` + +In this case only query terms searching in attribute fields _a1_ and _a2_ are considered. + +## nativeRank + +The _nativeRank_ feature is just a linear combination of the three other features, and is calculated as follows: + +$$ +\text{nativeRank} = +\frac{ +\text{fmw} \times \text{nativeFieldMatch} + \text{pw} \times \text{nativeProximity} + \text{amw} \times \text{nativeAttributeMatch} +}{ +\text{fmw} + \text{pw} + \text{amw} +} +$$ + +where _fmw_ is the _fieldMatchWeight_, _pw_ is the _proximityWeight_, and _amw_ is the _attributeMatchWeight_. + +The default behavior when calculating the native rank score is to consider all query terms searching in all defined index fields and attribute fields. In many cases though only a subset of these fields are of interest in the rank score calculation. You can set up _nativeRank_ for a subset of fields by specifying the field names in the parameter list as follows: + +```yaml +first-phase { + expression: nativeRank(title,body,tags) +} +``` + +In this case we have two index fields (_title_ and _body_) and one attribute field (_tags_), and the _nativeRank_ feature is calculated based on the features _nativeFieldMatch(title,body)_, _nativeProximity(title,body)_, and _nativeAttributeMatch(tags)_. Note that the CPU cost of calculating the native rank score is also reduced when specifying a subset of the fields. + +## Variables + +This is a list of the common variables used in the formulas above: + +- _attributeWeightj_: The weight of attribute field _j_. See the [schema reference](/en/reference/schemas/schemas#weight) for how to set this weight. The default value is 100. +- _connectednessab_: The connectedness between query terms _a_ and _b_. +- _fieldLengthj_: The length of field _j_ in number of words. +- _fieldWeightj_: The weight of index field _j_. See the [schema reference](/en/reference/schemas/schemas#weight) for how to set this weight. The default value is 100. +- _termSignificancei_: The significance of query term _i_. +- _termWeighti_: The weight of query term _i_. + +## Configuration properties + +This is a comprehensive list of all the configuration properties to all native rank features: + +- `nativeFieldMatch(fieldName)` -> `averageFieldLength`: The actual length of the field in the given document. When set this replaces the true field length in the nativeFieldMatch formula for all documents. +- `nativeFieldMatch(fieldName)` -> `firstOccurrenceTable`: The table used when calculating boost for the first occurrence in the given field. +- `nativeFieldMatch(fieldName)` -> `occurrenceCountTable`: The table used when calculating boost for the number of occurrences in the given field. +- `nativeFieldMatch(fieldName)` -> `firstOccurrenceImportance`: The importance value used for the given field. +- `nativeProximity` -> `proximityTable`: The table used when calculating forward proximity boost in a field. +- `nativeProximity(fieldName)` -> `proximityTable`: The table used when calculating forward proximity boost in the given field. +- `nativeProximity` -> `reverseProximityTable`: The table used when calculating reverse proximity boost in a field. +- `nativeProximity(fieldName)` -> `reverseProximityTable`: The table used when calculating reverse proximity boost in the given field. +- `nativeProximity` -> `proximityImportance`: The importance value used for the given field. +- `nativeProximity` -> `slidingWindowSize`: The size of the sliding window used when generating term pairs. + <Warning> + **Deprecated:** The elementGap rank property is deprecated and will be removed in Vespa 9. + </Warning> +- nativeProximity -> `elementGap.fieldName`: The gap between positions in adjacent elements in multi-value fields. Use the [element-gap](/en/reference/schemas/schemas#rank-element-gap) rank setting instead. +- `nativeAttributeMatch` -> `weightTable`: The table used when calculating boost for matching in an attribute field. +- `nativeAttributeMatch` -> `weightTable.attributeName`: The table used when calculating boost for matching in the given attribute. +- `nativeRank` -> `fieldMatchWeight`: How much to weight the score from _nativeFieldMatch_. +- `nativeRank` -> `proximityWeight`: How much to weight the score from _nativeProximity_. If table normalization is turned off the default value is 100.0. +- `nativeRank` -> `attributeMatchWeight`: How much to weight the score from _nativeAttributeMatch_. +- `nativeRank` -> `useTableNormalization`: Whether we should use table normalization for the native rank features. Set this property to _false_ to turn off table normalization + +For example, to override the _occurrenceCountTable_ and _reverseProximityTable_ for the index field _content_, add the following to the rank profile in the sd file: + +```bash +rank-properties { + nativeFieldMatch(content).occurrenceCountTable: "linear(0,0)" + nativeProximity(content).reverseProximityTable: "linear(0,0)" +} +``` + +See the [search definitions](/en/reference/schemas/schemas#rank-properties) reference for more information on rank-properties. + +### Boost tables + +The following boost tables are supported by the native rank features: + +| Name | Function | Description | +| :--- | :--- | :--- | +| expdecay(w,t) | `w * exp(-x/t)` | Represents an exponential decay function where _w_ is the weight controlling the amplitude and _t_ is the tune parameter controlling the slope. | +| loggrowth(w,t,s) | `w * log(1 + (x/s)) + t` | Represents a logarithmic growth function where _w_ is the weight controlling the amplitude, _t_ is the tune parameter controlling the offset, and _s_ is a scale parameter controlling the sensitivity to the variable _x_ | +| linear(w,t) | `w * x + t` | Represents a linear function where _w_ controls the slope and _t_ controls the offset. | + +The parameters _w_, _t_, and _s_ are floating point numbers, the same as the content of the tables. The default table size is 256 with x in the interval [0,255]. You can override this default size by specifying an optional last parameter to the table name. For instance, if you use _linear(1.5,0,512)_ you get a table with size 512 populated with the result of evaluating the function \(1.5 \times x + 0\) for all x in the interval [0,511]. + +### Rank types + +Four predefined rank types are supported by _nativeRank_: _about_ (default), _identity_, _tags_, and _empty_. Each type is associated with a set of boost tables that are used by the native rank features. See the [rank type](/en/reference/ranking/rank-types) document for detailed information on these type. + +When setting up the sd file, either use one of the predefined rank types for a field, or explicitly specify the boost tables to use for that field as a set of rank-properties. If you don't specify anything you get the boost tables associated with the _about_ type. The _about_ boost tables for _nativeFieldMatch_ and _nativeProximity_ are already optimized for textual match, while the boost table for _nativeAttributeMatch_ is data dependent and must be optimized for each use case. + +## nativeRank limitations + +The nativeRank feature is a pure text match scoring feature. In particular, it does not take the following concepts into account for documents that match a query: + +- Static rank or any other relevancy contribution that is based on a numeric value. Use the _attribute_ feature in a ranking expression to get this concept into the final relevancy score. +- Geographical location of a match correlated to a location associated with the query. Use the _distance_ or _closeness_ feature in a ranking expression to take this into account. +- The age of the matching documents. Use the _freshness_ feature in a ranking expression to take this into account. \ No newline at end of file diff --git a/mintlify-docs/en/reference/ranking/rank-feature-configuration.mdx b/mintlify-docs/en/reference/ranking/rank-feature-configuration.mdx new file mode 100644 index 0000000000..a6f6d57a7e --- /dev/null +++ b/mintlify-docs/en/reference/ranking/rank-feature-configuration.mdx @@ -0,0 +1,85 @@ +--- +title: "Rank Feature Configuration" +--- + +For some [rank features](/en/reference/ranking/rank-features), it is possible to set configuration variables for how the features are calculated. For features that are per field or attribute, the variables are set separately per field/attribute. + +## Properties + +Rank Features configuration properties are set by adding the following to the rank profile: + +```js +rank-properties { + <featurename>.<configuration-property>: <value> +} +``` + +Where _\<featurename\>_ is the name of a feature class (feature name up to the first dot), _\<configuration-property\>_ is a property from the list below, appropriate for the feature, and _\<value\>_ is either a number of a quoted string. Example: + +```js +rank-profile my-profile inherits default { + rank-properties { + fieldMatch(title).maxAlternativeSegmentations: 10 + fieldMatch(title).maxOccurrences: 5 + bm25(title).k1: 1.5 + bm25(title).b: 0.85 + bm25(title).averageFieldLength: 200 + elementwise(bm25(content),x,float).k1: 1.4 + elementwise(bm25(content),x,float).b: 0.87 + elementwise(bm25(content),x,float).averageElementLength: 50 + elementSimilarity(tags).output.sumWeightSquared: "sum((0.35*p+0.15*o+0.30*q+0.20*f)*w^2)" + elementSimilarity(tags).output.avgWeightSquared: "avg((0.35*p+0.15*o+0.30*q+0.20*f)*w^2)" + elementSimilarity(tags).output.sumWeight: "sum(w)" + } +} +``` + +For features missing from the list of properties below a rank property can be set with another syntax, e.g. setting _averageFieldLength_ for _my\_field_ for the _nativeFieldMatch_ feature can be done as: + +```bash +nativeFieldMatch.averageFieldLength.my_field: 512 +``` + +Rank profiles are inherited like other content of rank profiles. + +## List of properties + +An incomplete list of rank properties by the feature they apply to. + +| Feature | Parameter | Default | Description | +|----------------------------------------------|------------------------------|---------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| term | numTerms | 5 | The number of terms for which this is included in the rank features dump in the summary | +| [bm25(*fieldname*)](/en/reference/ranking/rank-features#bm25) | k1 | 1.2 | Used to limit how much a single query term can affect the score for a document. | +| | b | 0.75 | Used to control the effect of the field length compared to the average field length. | +| | averageFieldLength | Automatically calculated per field per content node for [indexed search](/en/reference/applications/services/content#document), 100 for [streaming search](/en/performance/streaming-search). | Used to set an explicit value for the average field length (in number of words). When using [streaming search](/en/performance/streaming-search#differences-in-streaming-search), no index structures are generated, and the average field length is not automatically calculated. Instead, manually set an average field length for a more precise BM25 score. | +| [elementwise(bm25(*fieldname*,x,*celltype*))](/en/reference/ranking/rank-features#elementwise-bm25) | k1 | 1.2 | Used to limit how much a single query term can affect the score for a document. Note that `bm25(fieldname).k1` will be used as a fallback before the default. | +| | b | 0.75 | Used to control the effect of the element length compared to the average element length. Note that `bm25(fieldname).b` will be used as a fallback before the default. | +| | averageElementLength | Automatically calculated per field element per content node for [indexed search](/en/reference/applications/services/content#document), 100 for [streaming search](/en/performance/streaming-search). | Used to set an explicit value for the average element length (in number of words). When using [streaming search](/en/performance/streaming-search#differences-in-streaming-search), no index structures are generated and the average element length is not automatically calculated. Instead, manually set an average element length for a more precise elementwise BM25 score. It should also be manually set for multi-node indexed search to get consistent scoring across the nodes. Note that `bm25(fieldname).averageFieldLength` will be used as a fallback before the default. | +| nativeRank | | | See the [nativeRank configuration](/en/reference/ranking/nativerank#configuration-properties) documentation | +| nativeFieldMatch | | | See the [nativeRank configuration](/en/reference/ranking/nativerank#configuration-properties) documentation | +| nativeProximity | | | See the [nativeRank configuration](/en/reference/ranking/nativerank#configuration-properties) documentation | +| fieldMatch | proximityLimit | 10 | The maximum allowed gap within a segment. | +| | proximityTable | 1/(2^(i/2)/3) for i in 9..0 followed by 1/2^(i/2) for i in 0..10 | The proximity table deciding the importance of separations of various distances. The table must have size proximityLimit\*2+1, where the first half is for reverse direction distances. The table must only contain values between 0 and 1, where 1 is "perfect" and 0 is "worst". | +| | maxAlternativeSegmentations | 10000 | The maximum number of *alternative* segmentations allowed in addition to the first one found. This will prefer to not consider iterations on segments that are far out in the field, and which start late in the query. | +| | maxOccurrences | 100 | The number of occurrences of each word is normalized against. This should be set as the number above which additional occurrences of the term have no real significance. | +| | proximityCompletenessImportance | 0.9 | A number between 0 and 1 that determines the importance of field completeness in relation to query completeness in the `match` and `completeness` metrics. | +| | relatednessImportance | 0.9 | The normalized importance of relatedness used in the `match` metric. | +| | earlinessImportance | 0.05 | The importance of the match occurring early in the query, relative to segmentProximityImportance, occurrenceImportance and proximityCompletenessImportance in the `match` metric. | +| | segmentProximityImportance | 0.05 | The importance of multiple segments being close to each other, relative to earlinessImportance, occurrenceImportance and proximityCompletenessImportance in the `match` metric. | +| | occurrenceImportance | 0.05 | The importance of having many occurrences of the query terms, relative to earlinessImportance, segmentProximityImportance and proximityCompletenessImportance in the `match` metric. | +| | fieldCompletenessImportance | 0.05 | A number between 0 and 1 that determines the importance of field completeness in relation to query completeness in the `match` and `completeness` metrics. | +| fieldTermMatch | numTerms | 5 | The number of terms for which this is included in the rank features dump in the summary | +| | numTerms.*fieldName* | 5 | The number of terms for which this is included in the rank features dump in the summary for the specified field. Also configurable using `fieldTermMatch(fieldName).numTerms` as the property name. | +| elementCompleteness | fieldCompletenessImportance | 0.5 | Higher values favor field completeness, lower values favor query completeness. Adjusting this parameter will also affect which element is selected as the best. | +| elementSimilarity | output.default | "max( (0.35*p + 0.15*o + 0.30*q + 0.20*f) * w)" | Describes how the default output should be calculated. The value must be on the form `aggregator(expression)`. The expression is used to combine the low-level similarity measures between the query and individual elements in the field that matched the query. The aggregator will be used to aggregate the output of the expression across matched elements. The available aggregators are `max`, `avg`, and `sum`. The available expression operators are `+`, `-`, `*`, `/`, and `^`. Parentheses may be used to override default operator precedence. Note that you must quote the expression using `"expression"`. <br/><br/>Terminals can be numbers or any of the following symbols:<br/><br/>| Symbol | Meaning |<br/>|--------|----------------------------|<br/>| **p** | normalized proximity measure |<br/>| **o** | normalized ordering measure |<br/>| **q** | normalized query coverage |<br/>| **f** | normalized field coverage |<br/>| **w** | element weight | | +| | output.name | N/A | Define an additional feature output called `name`. The value describes how the output should be calculated and has the same syntax as the `default` output described above. Example create a new output which can be accessed as `elementSimilarity(tags).sumW`:<br/>`elementSimilarity(tags).output.sumW: "sum(w)"` | +| attributeMatch | fieldCompletenessImportance | 0.05 | A number between 0 and 1 that determines the importance of field completeness in relation to query completeness in the `match` and `completeness` metrics. | +| | maxWeight | 256 | The maximal weight when calculating `attributeMatch(name).normalizedWeight`. Weights higher than this will not have any effect on this feature. | +| closeness | maxDistance | 9013305.0 | The maximal distance when calculating `closeness(name)`. Distances higher than this will not have any effect on this feature. The default is about 1000 km (1 km is about 9013.305 microdegrees). | +| | scaleDistance | 45066.525 | Basic scale for distances when calculating `closeness(name).logscale`. The default is about 5 km.<br/>**Deprecated:** use `halfResponse` instead | +| | halfResponse | 593861.739 | The distance that should give an output of 0.5 when calculating `closeness(name).logscale`. The default is about 65.89 km (must be in the range [1, maxDistance/2>). Use this parameter to fine-tune the distance range where half of the dynamics of the logscale function will be used. | +| freshness | maxAge | 3*30*24*60*60 | The maximal age in seconds when calculating `freshness(name)`. Ages older than this will not have any effect on this feature. The default is about 3 months. | +| | halfResponse | 7*24*60*60 | The age in seconds that should give an output of 0.5 when calculating `freshness(name).logscale`. The default is 7 days (must be in the range [1, maxAge/2>). Use this parameter to fine-tune the age range where half of the dynamics of the logscale function will be used. | +| random | seed | Current time in microseconds | The random seed. | +| randomNormal | seed | Current time in microseconds | The random seed for randomNormal. | +| foreach | maxTerms | | | \ No newline at end of file diff --git a/mintlify-docs/en/reference/ranking/rank-features.mdx b/mintlify-docs/en/reference/ranking/rank-features.mdx new file mode 100644 index 0000000000..5e7c1aa2d9 --- /dev/null +++ b/mintlify-docs/en/reference/ranking/rank-features.mdx @@ -0,0 +1,904 @@ +--- +title: "Rank Feature Reference" +sidebarTitle: "Rank features" +--- + +This is the list of the rank features in Vespa. These features are available during document ranking for combination into a complete rank score by a [ranking expression](/en/reference/ranking/ranking-expressions). The features are a combination of coarse grained features suitable for handwritten expressions, and finer grained features suitable for machine learning. + +See also [the overview of the ranking framework](/en/basics/ranking), and [rank feature configuration parameters](/en/reference/ranking/rank-feature-configuration). Notes: + +- Types: All rank feature values are doubles or tensors. Integers are converted to exact whole value doubles. String values are converted to exact whole value doubles using a hash function. String literals in ranking expressions are converted using the same hash function, to enable equality tests on string values. +- Features which are _normalized_ are between 0 and 1, where 0 is always the minimum and 1 the maximum. Normalized features should normally be preferred because they are more easily combined by [ranking expressions](/en/reference/ranking/ranking-expressions) into a complete normalized score. +- A query may override _any_ rank feature value by submitting that value as a feature with the query. +- Some features have parameters. It is always allowed to quote parameters with _"_. Nested quotes are not allowed and must be escaped using _\_. Parameters that can be parsed as feature names may be left unquoted. Examples: _foo(bar(baz(5.5)))_, _foo("bar(\"baz(\\\"5.5\\\")\")")_, _foo("need quote")_ + +## Feature list +### Query features + +- **query(value)**<br/> + Default: 0 + + An application specific feature submitted with the query, see [using the query feature](/en/ranking/ranking-expressions-features#using-query-variables). +- **term(n).significance**<br/> + Default: 0 + + A normalized number (between 0.0 and 1.0) describing the significance of the term; used as a multiplier or weighting factor by many other text matching rank features. + + This should ideally be set by a searcher in the container for global correctness as each node will estimate the significance values from the local corpus. Use the [Java API for significance](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/TaggableItem.html#setSignificance\(double\)) or [YQL annotation for significance](/en/reference/querying/yql#significance). + + As a fallback, a significance based on Robertson-Sparck-Jones term weighting is used; it is logarithmic from 1.0 for rare terms down to 0.5 for common terms (those occurring in every document seen). + + Note that "rare" is defined as a frequency of 0.000001 or less. This is the term document frequency (how many documents contain the term out of all documents that can be observed), so you cannot get 1.0 as the fallback until you actually have a large number of documents (minimum 1 million) in the same search process. + See [numTerms](/en/reference/ranking/rank-feature-configuration#term) config. +- **term(n).weight**<br/> + Default: 100 + + The importance of matching this query term given in the query +- **term(n).connectedness**<br/> + Default: 0.1 + + The normalized strength with which this term is connected to the previous term in the query. Must be assigned to query terms in a [searcher](/en/applications/searchers) using the [Java API for connectivity](https://javadoc.io/doc/com.yahoo.vespa/container-search/latest/com/yahoo/prelude/query/TaggableItem.html#setConnectivity\(com.yahoo.prelude.query.Item,double\)) or [YQL annotation for connectivity](/en/reference/querying/yql#connectivity). +- **queryTermCount**<br/> + Default: 0 + + The total number of terms in this query, including both user and synthetic terms in all fields. + +### Document features + + - **fieldLength(name)**<br/> + Default: 1000000 + + The number of terms in this field if one or more query term matched the field, 1000000 if no query term matched the field. +- **attribute(name)**<br/> + + Default: null + + The value of a [tensor](/en/reference/schemas/schemas#tensor) or single value *numeric* attribute or null/NaN if not set. Use *isNan()* to check if value is not defined. Using undefined values in ranking expressions leads to undefined behavior. +- **attribute(name,n)**<br/> + Default: 0 + + The value at index n (base 0) of a *numeric* array attribute with the given name. Note that the index number must be explicit, it cannot be the output of an [expression function](/en/reference/schemas/schemas#function-rank). The order of the items in an array attribute is the same as the order they have in the input feed. If items are added using partial updates they are added to the end of the existing items list. +- **attribute(name,key).weight**<br/> + Default: 0 + + The weight found at a given key in a weighted set attribute +- **attribute(name,key).contains**<br/> + Default: 0 + + 1 if the given key is present in a weighted set attribute, 0 otherwise +- **attribute(name).count**<br/> + Default: 0 + + The number of elements in the attribute with the given name. +- **tensorFromWeightedSet(source,dimension)**<br/> + Default: empty tensor + + Creates a `tensor<double>` with one mapped dimension from the given integer or string weighted set attribute. The attribute is specified as the full feature name, `attribute(name)`. The *dimension* parameter is optional. If omitted the dimension name will be the attribute name. + Example: Given the weighted set: + + ```bash + {key1:0, key2:1, key3:2.5} + ``` + *tensorFromWeightedSet(attribute(myField), dim)* produces: + ```bash + tensor<double>(dim{}):{ {dim:key1}:0.0, {dim:key2}:1.0, {dim:key3}:2.5} } + ``` + <Info> + **Note:** This creates a temporary tensor, and has build cost and extra memory is touched. Tensor evaluation is most effective when the cell types of all tensors are equal - use [cell\_cast](/en/reference/ranking/ranking-expressions#cell_cast) to enable optimizations. Also, duplicating the field in the schema to a native tensor instead of creating from a set can increase performance. + </Info> + +- **tensorFromLabels(attribute,dimension)**<br/> + Default: empty tensor + + Creates a `tensor<double>` with one mapped dimension from the given single value or array attribute. The value(s) must be integers or strings. The attribute is specified as the full feature name, `attribute(name)`. The *dimension* parameter is optional. If omitted the dimension name will be the attribute name. + Example: Given an attribute field `myField` containing the array value: + + ```bash + [v1, v2, v3] + ``` + *tensorFromLabels(attribute(myField), dim)* produces: + ```bash + tensor<double>(dim{}):{ {dim:v1}:1.0, {dim:v2}:1.0, {dim:v3}:1.0} } + ``` + See *tensorFromWeightedSet* for performance notes. + +- **tensorFromStructs(attribute,key,value,type)**<br/> + Default: empty tensor + + Creates a `tensor<type>` with one mapped dimension from the given `array<struct>` attribute. Keys are taken from the struct field *key* and values from the struct field *value*. The resulting tensor will have one mapped dimension named after the *key* field. The *type* parameter is required and must be `float` or `double`. + + Example: Given an `array<struct>` attribute `items` with fields `name` (string) and `price` (float): + + ```bash + tensorFromStructs(attribute(items), name, price, float) + ``` + ```bash + tensor<float>(name{}):{ {name:apple}:1.5, {name:banana}:0.75, {name:cherry}:2.25 } + ``` + + Example: Integer keys and float values: + ```bash + tensorFromStructs(attribute(ids), id, score, double) + ``` + ```bash + tensor(id{}):{ {id:100}:10.5, {id:200}:20.75, {id:300}:30.25 } + ``` + The function takes at least four arguments; it's possible to use several *key* arguments to get multiple mapped dimensions. If the `struct` above also contained a `region` field one could use + + ```bash + tensorFromStructs(attribute(items), name, region, price, float) + ``` + to get a `tensor<float>(name{},region{})` making it possible to have different prices of apples in different regions. + + *Details:* Empty or missing arrays yield an empty tensor of the requested type. The first argument must be an `attribute(...)` source. It's an application responsibility to ensure that the key (or combination of keys in the multi-dimensional case) is unique. + + See *tensorFromWeightedSet* for performance notes. + +### Field match features - normalized + +fieldMatch features provide a good measure of the degree to which a query matches the text of a field, but are expensive to calculate and therefore often only suitable for [second-phase](/en/ranking/phased-ranking) ranking expressions. See the [string segment match](/en/reference/ranking/string-segment-match) document for details on the algorithm computing this rank-feature set. Note that even using a fine-grained sub features like fieldMatch(_name_).absoluteOccurrence will have the same complexity and cost as using the general top level fieldMatch(_name_) feature. + +- **fieldMatch(name)**<br/> + Default: 0 + + A normalized measure of the degree to which this query and field matched (default, the long name of this is `match`). Use this if you do not want to create your own combination function of more fine-grained fieldmatch features. + +- **fieldMatch(name).proximity**<br/> + Default: 0 + + Normalized proximity - a value which is close to 1 when matched terms are close *inside each segment*, and close to zero when they are far apart inside segments. Relatively more connected terms influence this value more. This is absoluteProximity/average connectedness for the query terms for this field. + + Note that if all the terms are far apart, the proximity will be 1, but the number of segments will be high. Proximity is only concerned with closeness within segments, a total score must also take the number of segments into account. + +- **fieldMatch(name).completeness**<br/> + Default: 0 + + The normalized total completeness, where field completeness is more important: + + queryCompleteness * ( 1 - [fieldCompletenessImportance](/en/reference/ranking/rank-feature-configuration#fieldMatch) ) + [fieldCompletenessImportance](/en/reference/ranking/rank-feature-configuration#fieldMatch) * fieldCompleteness +- **fieldMatch(name).queryCompleteness**<br/> + Default: 0 + + The normalized ratio of query tokens matched in the field: + + `matches/query terms searching this field` +- **fieldMatch(name).fieldCompleteness**<br/> + Default: 0 + + The normalized ratio of query tokens which was matched in the field: + + `matches/fieldLength` +- **fieldMatch(name).orderness**<br/> + Default: 0 + + A normalized metric of how well the order of the terms agrees in the chosen segments: + + `1-outOfOrder/pairs` +- **fieldMatch(name).relatedness**<br/> + Default: 0 + + A normalized measure of the degree to which different terms are related (occurring in the same segment): + + `1-(segments-1)/(matches-1)` +- **fieldMatch(name).earliness**<br/> + Default: 0 + + A normalized measure of how early the first segment occurs in this field. + +- **fieldMatch(name).longestSequenceRatio**<br/> + Default: 0 + + A normalized metric of the relative size of the longest sequence: + + `longestSequence/matches` + +- **fieldMatch(name).segmentProximity**<br/> + Default: 0 + + A normalized metric of the closeness (inverse of spread) of segments in the field: + + `1-segmentDistance/fieldLength` + +- **fieldMatch(name).unweightedProximity**<br/> + Default: 0 + + The normalized proximity of the matched terms, not taking term connectedness into account. This number is close to 1 if all the matched terms are following each other in sequence, and close to 0 if they are far from each other or out of order. + +- **fieldMatch(name).absoluteProximity**<br/> + Default: 0 + + Returns the normalized proximity of the matched terms, weighted by the connectedness of the query terms. This number is 0.1 if all the matched terms are and have default or lower connectedness, close to 1 if they are following in sequence and have a high connectedness, and close to 0 if they are far from each other in the segments or out of order. + +- **fieldMatch(name).occurrence**<br/> + Default: 0 + + Returns a normalized measure of the number of occurrences of the terms of the query. This is 1 if there are many occurrences of the query terms *in absolute terms, or relative to the total content of the field*, and 0 if there are none. + + This is suitable for occurrence in fields containing regular text. + +- **fieldMatch(name).absoluteOccurrence**<br/> + Default: 0 + + Returns a normalized measure of the number of occurrence of the terms of the query: + + $$ + \frac{\underset{\text{all query terms}}{\sum} \left(\right. m i n \left(\right. \text{number of occurrences of the term} , m a x O c c u r r e n c e s \left.\right) \left.\right)}{\left(\right. \text{query term count} \times 100 \left.\right)} + $$ + + This is 1 if there are many occurrences of the query terms, and 0 if there are none. + + This number is not relative to the field length, so it is suitable for uses of occurrence to denote relative importance between matched terms (i.e. fields containing keywords, not normal text). + +- **fieldMatch(name).weightedOccurrence**<br/> + Default: 0 + + Returns a normalized measure of the number of occurrence of the terms of the query, weighted by term weight. This number is close to 1 if there are many occurrences of highly weighted query terms, in absolute terms, or relative to the total content of the field, and 0 if there are none. + +- **fieldMatch(name).weightedAbsoluteOccurrence**<br/> + Default: 0 + + Returns a normalized measure of the number of occurrence of the terms of the query, taking weights into account so that occurrences of higher weighted query terms has more impact than lower weighted terms. + This is 1 if there are many occurrences of the highly weighted terms, and 0 if there are none. + + This number is not relative to the field length, so it is suitable for uses of occurrence to denote relative importance between matched terms (i.e. fields containing keywords, not normal text). + +- **fieldMatch(name).significantOccurrence**<br/> + Default: 0 + + Returns a normalized measure of the number of occurrence of the terms of the query *in absolute terms, or relative to the total content of the field*, weighted by term significance. + + This number is 1 if there are many occurrences of the highly significant terms, and 0 if there are none + +### Field match features - normalized and relative to the whole query +- **fieldMatch(name).weight**<br/> + Default: 0 + + The normalized weight of this match relative to the whole query: The sum of the weights of all *matched* terms/the sum of the weights of all *query* terms. If all the query terms were matched, this is 1. If no terms were matched, or these matches has weight zero this is 0. + + As the sum of this number over all the terms of the query is always 1, sums over all fields of normalized rank features for each field multiplied by this number for the same field will produce a normalized number. + + Note that this scales with the number of matched query terms in the field. If you want a component which does not, divide by matches. + +- **fieldMatch(name).significance**<br/> + Default: 0 + + Returns the normalized term significance of the terms of this match relative to the whole query: The sum of the significance of all *matched* terms/the sum of the significance of all *query* terms. If all the query terms were matched, this is 1. If no terms were matched, or if the significance of all the matched terms is zero, this number is zero. + + This metric has the same properties as weight. + + See the [term(n).significance](/en/reference/ranking/rank-features#term(n).significance) feature for how the significance for a single term is calculated. +- **fieldMatch(name).importance**<br/> + Default: 0 + + Returns the average of significance and weight. This has the same properties as those metrics. +### Field match features - not normalized + +- **fieldMatch(name).segments**<br/> + Default: 0 + + The number of field text segments which are needed to match the query as completely as possible +- **fieldMatch(name).matches**<br/> + Default: 0 + + The total number of query terms which was matched in this field +- **fieldMatch(name).degradedMatches**<br/> + Default: 0 + + The number of degraded query terms which was matched in this field. A degraded term is a term where no occurrence information is available during calculation. The number of degraded matches is less than or equal to the total number of matches. +- **fieldMatch(name).outOfOrder**<br/> + Default: 0 + + The total number of out of order token sequences *within* matched field segments +- **fieldMatch(name).gaps**<br/> + Default: 0 + + The total number of position jumps (backward or forward) within field segments +- **fieldMatch(name).gapLength**<br/> + Default: 0 + + The summed length of all gaps within segments +- **fieldMatch(name).longestSequence**<br/> + Default: 0 + + The size of the longest matched continuous, in-order sequence in the field +- **fieldMatch(name).head**<br/> + Default: 0 + + The number of tokens in the field preceding the start of the first matched segment +- **fieldMatch(name).tail**<br/> + Default: 0 + + The number of tokens in the field following the end of the last matched segment +- **fieldMatch(name).segmentDistance**<br/> + Default: 0 + + The sum of the distance between all segments making up a match to the query, measured as the sum of the number of token positions separating the *start* of each *field* adjacent segment. + +### Query and field similarity + - **fieldTermMatch(name,n).firstPosition**<br/> + Default: 1000000 + + The position of the first occurrence of this query term in this index field. [numTerms](/en/reference/ranking/rank-feature-configuration#fieldTermMatch) configuration +- **fieldTermMatch(name,n).occurrences**<br/> + Default: 0 + + The number of occurrences of this query term in this index field + +- **matchCount(name)**<br/> + Default: 0 + + Returns number of times any term in the query matches this index/attribute field. + +- **matches(name)**<br/> + Default: 0 + + Returns 1 if the index/attribute field with the given name is matched by the query. +- **matches(name,n)**<br/> + Default: 0 + + Returns 1 if the index/attribute field with the given name is matched by the query term with position *n*. +- **termDistance(name,x,y).forward**<br/> + Default: 1000000 + + The minimum distance between the occurrences of term *x* and term *y* in this index field. Term *x* occurs before term *y*. + +- **termDistance(name,x,y).forwardTermPosition**<br/> + Default: 1000000 + + The position of the occurrence of term *x* in this index field used for the forward distance. +- **termDistance(name,x,y).reverse**<br/> + Default: 1000000 + + The minimum distance between the occurrences of term *y* and term *x* in this index field. Term *y* occurs before term *x*. +- **termDistance(name,x,y).reverseTermPosition**<br/> + Default: 1000000 + + The position of the occurrence of term *y* in this index field used for the reverse distance. + +### Query term and field match features + +- **fieldTermMatch(name,n).firstPosition**<br/> + Default: 1000000 + + The position of the first occurrence of this query term in this index field. [numTerms](/en/reference/ranking/rank-feature-configuration#fieldTermMatch) configuration +- **fieldTermMatch(name,n).occurrences**<br/> + Default: 0 + + The number of occurrences of this query term in this index field +- **matchCount(name)**<br/> + Default: 0 + + Returns number of times any term in the query matches this index/attribute field. +- **matches(name)**<br/> + Default: 0 + + Returns 1 if the index/attribute field with the given name is matched by the query. +- **matches(name,n)**<br/> + Default: 0 + + Returns 1 if the index/attribute field with the given name is matched by the query term with position *n*. +- **termDistance(name,x,y).forward**<br/> + Default: 1000000 + + The minimum distance between the occurrences of term *x* and term *y* in this index field. Term *x* occurs before term *y*. +- **termDistance(name,x,y).forwardTermPosition**<br/> + Default: 1000000 + + The position of the occurrence of term *x* in this index field used for the forward distance. +- **termDistance(name,x,y).reverse**<br/> + Default: 1000000 + + The minimum distance between the occurrences of term *y* and term *x* in this index field. Term *y* occurs before term *x*. +- **termDistance(name,x,y).reverseTermPosition**<br/> + Default: 1000000 + + The position of the occurrence of term *y* in this index field used for the reverse distance + +### Features for indexed multivalue string fields +- **elementCompleteness(name).completeness**<br/> + Default: 0 + + A weighted combination of fieldCompleteness and queryCompleteness for the element in the field that produces the highest value for this output after the elements weight is factored in. The weighting can be adjusted using [elementCompleteness(name).fieldCompletenessImportance](/en/reference/ranking/rank-feature-configuration#elementCompleteness). +- **elementCompleteness(name).fieldCompleteness**<br/> + Default: 0 + + The field completeness of the best matching element. This is calculated as: + + `max( (number of query terms matched in the element) / (element size), 1.0)`. +- **elementCompleteness(name).queryCompleteness**<br/> + Default: 0 + + The query completeness of the best matching element. This is calculated as: + + `(sum of weight for query terms matched in the element) / (sum of weight for query terms searching the field)`. +- **elementCompleteness(name).elementWeight**<br/> + Default: 0 + + The weight of the best matching element, starting from the default - i.e., negative weights will return 0. +- **elementSimilarity(name)**<br/> + Default: 0 + + Aggregated similarity between the query and individual field elements. The same sub-scores used by the `textSimilarity` feature are calculated for each individual element in the field. The final output is calculated as the maximum of the combined element similarity measures (similarity measures are combined the same way as the default output of the `textSimilarity` feature) multiplied with the element weight which is 1 for arrays, and the supplied weights for indexed weighted sets. + + This is a flexible feature; how sub-scores are combined for each element and how element scores are aggregated may be configured. You may also add additional outputs if you want to capture multiple signals from a single field. Use [elementSimilarity](/en/reference/ranking/rank-feature-configuration#elementSimilarity) to customize this feature. +### Attribute match features - normalized + - **attributeMatch(name)**<br/> + Default: 0 + + A normalized measure of the degree to which this query and field matched. This is currently the same as completeness. Note that depending on what the attribute is used for, this may or may not be a suitable metric. If the attribute is a weighted set representing counts of items (like tags), `normalizedWeight` is probably a better metric. +- **attributeMatch(name).completeness**<br/> + Default: 0 + + The normalized total completeness, where field completeness is more important: + + queryCompleteness * ( 1 - [fieldCompletenessImportance](/en/reference/ranking/rank-feature-configuration#attributeMatch) + [fieldCompletenessImportance](/en/reference/ranking/rank-feature-configuration#attributeMatch) * fieldCompleteness )` +- **attributeMatch(name).queryCompleteness**<br/> + Default: 0 + + The query completeness for this attribute: + `matches/the number of query terms searching this attribute` +- **attributeMatch(name).fieldCompleteness**<br/> + Default: 0 + + The normalized ratio of query tokens which was matched in the field. For arrays: `matches/array length` For weighted sets: `sum of weight of matched terms/sum of weights of entire set`. This is relatively expensive to calculate for large weighted sets. +- **attributeMatch(name).normalizedWeight**<br/> + Default: 0 + + A number which is close to 1 if the attribute weights of most matches in a weighted set are high (relative to [maxWeight](/en/reference/ranking/rank-feature-configuration#attributeMatch)), 0 otherwise +- **attributeMatch(name).normalizedWeightedWeight**<br/> + Default: 0 + + A number which is close to 1 if the attribute weights of most matches in a weighted set are high (relative to [maxWeight](/en/reference/ranking/rank-feature-configuration#attributeMatch)), and where highly weighted query terms has more impact, 0 otherwise +- **closeness(dimension,name)**<br/> + Default: 0 + + Used with the [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) query operator. A number which is close to 1 when a vector in the document tensor is close to the vector given in the query. The indexed dimension representing a vector in document and query tensors must be identical. + - *dimension*: Specifies the dimension of *name*. This must be either the string `field` or the string `label`. + When using `field`, the name given must be a field with a tensor attribute of appropriate type. Often used when the document type has only one vector field, see [example](/en/querying/nearest-neighbor-search#minimal-example). + When using `label`, queries are assumed to contain a [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) query item with a [label](/en/reference/querying/yql#label) that matches the given *name*. This is useful when having multiple vector fields, where `closeness()` then maps to the nearestNeighbor operator with the field configured. [Example](/en/querying/nearest-neighbor-search-guide#using-label). + - *name*: The value of the field name or label. + + <Info> + **Note:** `closeness()` is calculated **only** based on the vectors matched with nearestNeighbor operator. This means that the value of `closeness()` is not necessarily calculated based on the same vector returned by `closest()` rank feature if nearestNeighbor search is approximate, as `closest()` will be calculated based on *all* specified document vectors. + </Info> + The output value is + $$ + closeness(dimension, name) = \frac{1.0}{1.0 + distance(dimension, name)} + $$ + When the tensor field stores multiple vectors per document, the minimum distance between the vectors of a document and the query vector is used in the calculation above. +- **freshness(name)**<br/> + Default: 0 + + A number which is close to 1 if the timestamp in attribute *name* is recent compared to the current time compared to [maxAge](/en/reference/ranking/rank-feature-configuration#freshness): + + `max( 1-age(name)/maxAge , 0 )` + + Scales linearly with age, see [freshness plot](#freshness). +- **freshness(name).logscale**<br/> + Default: 0 + + A logarithmic-shaped freshness; also goes from 1 to 0, but looks like [freshness plot](/en/reference/ranking/rank-features#freshness). The function is based on `-log(age(name) + scale)` and is calculated as: + + $$ + \frac{l o g \left(\right. m a x A g e + s c a l e \left.\right) - l o g \left(\right. a g e \left(\right. n a m e \left.\right) + s c a l e \left.\right)}{l o g \left(\right. m a x A g e + s c a l e \left.\right) - l o g \left(\right. s c a l e \left.\right)} + $$ + + where scale is defined using [halfResponse and maxAge](/en/reference/ranking/rank-feature-configuration#freshness): + + $$ + \frac{- h a l f R e s p o n s e^{2}}{2 \times h a l f R e s p o n s e - m a x A g e} + $$ + + When `age(name) == halfResponse` the function output is 0.5. +### Attribute match features - normalized and relative to the whole query + +- **attributeMatch(name).weight**<br/> + Default: 0 + + This has the same semantics as fieldMatch(*name*).weight. +- **attributeMatch(name).significance**<br/> + Default: 0 + + This has the same semantics as fieldMatch(*name*).significance. +- **attributeMatch(name).importance**<br/> + Default: 0 + + Returns the average of significance and weight. This has the same properties as those metrics. + +### Attribute match features - not normalized + +- **attributeMatch(name).matches**<br/> + Default: 0 + + The number of query terms which was matched in this attribute +- **attributeMatch(name).totalWeight**<br/> + Default: 0 + + The sum of the weights of the attribute keys matched in a weighted set attribute +- **attributeMatch(name).averageWeight**<br/> + Default: 0 + + totalWeight/matches +- **attributeMatch(name).maxWeight**<br/> + Default: 0 + + The maximum weight of the attribute keys matched in a weighted set attribute +- **closest(name)**<br/> + Default: {} + + Used with the [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) query operator and a tensor field attribute *name* storing multiple vectors per document. This feature returns a tensor with one or more mapped dimensions and one point with a value of 1.0, where the label of that point indicates which document vector was closest to the query vector in the nearest neighbor search. + + Given a tensor field with type `tensor<float>(m{},x[3])` used with the *nearestNeighbor* operator, an example output of this feature is: + + ```bash + tensor<float>(m{}):{ 3: 1.0 } + ``` + In this example, the document vector with label *3* in the mapped *m* dimension was closest to the query vector. +- **closest(name,label)**<br/> + Default: {} + + Used with the [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) query operator tagged with a [label](/en/ranking/multivalue-query-operators#raw-scores-and-query-item-labeling) *label* and a tensor field attribute *name* storing multiple vectors per document. + See [closest(name)](/en/reference/ranking/rank-features#closest(name)) for details. +- **distance(dimension,name)**<br/> + Default: max double value + Used with the [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) query operator. A number which is close to 0 when a point vector in the document is close to a matching point vector in the query. The document vectors and the query vector must be the same tensor type, with one indexed dimension of size N, representing a point in an N-dimensional space. + - *dimension*: Specifies the dimension of *name*. This must be either the string `field` or the string `label`. + When using `field`, the name given must be a field with a tensor attribute of appropriate type. Often used when the document type has only one vector field, see [example](/en/querying/nearest-neighbor-search#minimal-example). + When using `label`, queries are assumed to contain a [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) query item with a [label](/en/reference/querying/yql#label) that matches the given *name*. This is useful when having multiple vector fields, where `distance()` then maps to the nearestNeighbor operator with the field configured. [Example](/en/querying/nearest-neighbor-search-guide#using-label). + - *name*: The value of the field name or label. + + The output value depends on the [distance metric](/en/reference/schemas/schemas#distance-metric) used. The default is the Euclidean distance between the "n"-dimensional query point "d" and the point "d" in the document tensor field: + + $$ + d i s t a n c e = \sqrt{\sum_{i = 1}^{n} \left(\right. q_{i} - d_{i} \left.\right)^{2}} + $$ + + + When the tensor field stores multiple vectors per document, the minimum distance between the vectors of a document and the query vector is used in the calculation above. +- **age(name)**<br/> + Default: 10B + + The document age in seconds relative to the unit time value stored in the attribute having this name +### Features combining multiple fields and attributes +- **match**<br/> + Default: 0 + + A normalized average of the fieldMatch and attributeMatch scores of all the searched fields and attributes, where the contribution of each field and attribute is weighted by its *weight* setting. +- **match.totalWeight**<br/> + Default: 0 + + The sum of the weight settings of all the field and attributes searched by the query +- **match.weight.name**<br/> + Default: 100 + + The (schema) weight setting of a field or attribute + +### Rank scores + +- **bm25(field)**<br/> + Default: 0 + + Calculates the [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) ranking function over the given [indexed string field](/en/reference/schemas/schemas#indexing-index). This feature is cheap to compute, about 3-4 times faster than nativeRank, while still providing a good rank score quality wise. This feature is a good candidate for usage in a first phase ranking function when ranking text documents. Note that the field must be enabled to be used with the bm25 feature; set the *enable-bm25* flag in the [index](/en/reference/schemas/schemas#index) section of the field definition. See the [BM25 Reference](/en/ranking/bm25) for more detailed information. +- **elementwise(bm25(field),dimension,cell\_type)**<br/> + Default: tensor(dimension{}):{} + Calculates the [Okapi BM25](https://en.wikipedia.org/wiki/Okapi_BM25) ranking function over each element in the given *multi-valued* [indexed string field](/en/reference/schemas/schemas#indexing-index) and creates a tensor with a single mapped dimension containing the bm25 score for each matching element. The element indexes (starting at 0) are used as dimension labels. This feature is more expensive than [bm25](#bm25), does not need the *enable-bm25* flag and can be tuned with [rank properties](/en/reference/ranking/rank-feature-configuration#elementwise-bm25). + + The *cell\_type* parameter can be omitted (the default value is `double`) except when setting rank properties. + + Example: If the `content` field is of type `array<string>` where query terms are found in elements 2 and 5 then the feature `elementwise(bm25(content),x,float)` as a summary feature might be shown as: + + ```bash + "elementwise(bm25(content),x,float)": { + "type": "tensor(x{})", + "cells": { + "2": 0.5112776, + "5": 0.1021805 + } + } + ``` +- **nativeRank**<br/> + Default: 0 + + A reasonably good rank score which is computed cheaply by Vespa. This value only is a good candidate first phase ranking function, and is the default used in the default rank profile. The value computed by this function may change between Vespa versions. See the [native rank reference](/en/reference/ranking/nativerank) for more information. +- **nativeRank(field,...)**<br/> + Default: 0 + + Same as *nativeRank*, but only the given set of fields are used in the calculation. +- **nativeFieldMatch**<br/> + Default: 0 + + Captures how well query terms match in index fields. Used by *nativeRank*. See the [native rank reference](/en/reference/ranking/nativerank) for more information. +- **nativeFieldMatch(field,...)**<br/> + Default: 0 + + Same as *nativeFieldMatch*, but only the given set of index fields are used in the calculation. +- **nativeProximity**<br/> + Default: 0 + + Captures how near matched query terms occur in index fields. Used by *nativeRank*. See the [native rank reference](/en/reference/ranking/nativerank) for more information. +- **nativeProximity(field,...)**<br/> + Default: 0 + + Same as *nativeProximity*, but only the given set of index fields are used in the calculation. +- **nativeAttributeMatch**<br/> + Default: 0 + + Captures how well query terms match in attribute fields. Used by *nativeRank*. See the [native rank reference](/en/reference/ranking/nativerank) for more information. +- **nativeAttributeMatch(field,...)**<br/> + Default: 0 + + Same as *nativeAttributeMatch*, but only the given set of attribute fields are used in the calculation. +- **nativeDotProduct(field)**<br/> + Default: 0 + + Calculates the sparse dot product between query term weights and match weights for the given field. Example: A weighted set string field X: + + ```bash + "X": { + "x": 10, + "y": 20, + "z": 30 + } + ``` + For the query (x!2 OR y!4), the nativeDotProduct(X) feature will have the value 100 (10\*2+20\*4) for that document. + <Info> + **Note:** `nativeDotProduct` and `nativeDotProduct(field)` is less optimal for computing the dot product - consider using [dotProduct(name,vector)](/en/reference/ranking/rank-features#dotProduct(name,vector)). + </Info> +- **nativeDotProduct**<br/> + Default: 0 + + Calculates the sparse dot product between query term weights and match weights as above, but for all term/field combinations. +- **firstPhase**<br/> + Default: 0 + + The value of the rank score calculated in the first phase (unavailable in first phase ranking expressions) +- **secondPhase**<br/> + Default: 0 + + The value of the rank score calculated in the second phase (unavailable in first phase and second phase ranking expressions) +- **firstPhaseRank**<br/> + Default: max double value + + The rank of the document after first phase within the content node when selecting which documents to rerank in second phase. The best document after first phase has rank 1, the second best 2, etc. The feature returns the default value for documents not selected for second phase ranking and for unsupported cases ([streaming search](/en/performance/streaming-search#differences-in-streaming-search), [summary features](/en/reference/schemas/schemas#summary-features), first phase expressions). Multiple documents can have the same *firstPhaseRank* value in multi-node configurations. +- **firstPhaseMax**<br/> + Default: -infinity + + The maximum first-phase rank score among all hits matched on the content node. Calculated locally on each content node, so may yield different results on different nodes. +- **relevanceScore**<br/> + Default: - + + The value of the rank score calculated either in the first or (when defined) in the second phase (unavailable in first phase and second phase ranking expressions) (since 8.559.30). +### Global features +- **globalSequence**<br/> + Default: n/a + + A global sequence number computed as (1 `<<` 48) - (LocalDocumentId `<<` 16 || [distribution-key](/en/reference/applications/services/content#node)). This will give a global sequence to documents. This is a cheap way of having stable ordering of documents. Note the large range of this value. Also note that if the system is not stable, e.g. if documents move around due to new nodes coming in, or nodes being removed, it will no longer be stable as documents might be found in a different replica. If you need true global ordering we suggest assigning a unique numeric id to your documents as an [attribute](/en/reference/schemas/schemas#attribute) field and use the [attribute(name)](/en/reference/ranking/rank-features#attribute(name)) feature. +- **now**<br/> + Default: n/a + + Time at which the query is executed in unix-time (seconds since epoch) +- **random**<br/> + Default: n/a + + A pseudorandom number in the range `[0,1>` which is drawn once per document during rank evaluation. By default, the current time in microseconds is used as a seed value. Users can specify a seed value by setting [random.seed](/en/reference/ranking/rank-feature-configuration#random) in the rank profile. If you need several independent random numbers the feature can be named like this: `random(foo)`, `random(bar)`. +- **random.match**<br/> + Default: n/a + + A pseudorandom number in the range `[0,1>` that is stable for a given hit. This means that a hit will always receive the same random score (on a single node). If it is required that the scores be different between different queries, specify a seed value dependent upon the query. By default, the seed value is 1024. Users can specify a seed value by adding the query parameter [rankproperty.random.match.seed=`<value>`](/en/reference/api/query#ranking.properties). If you need several independent random numbers the feature can be named like this: `random(foo).match`, `random(bar).match`. +- **randomNormal(mean,stddev)**<br/> + Default: 0.0,1.0 + + Same as [random](/en/reference/ranking/rank-features#random), except the random number is drawn from the Gaussian distribution using the supplied mean and stddev parameters. Can be called without parameters; default values are assumed. Seed is set similarly as *random*. If you need several independent random numbers with the same parameters, the feature can be named like this: `randomNormal(0.0,1.0,foo)`, `randomNormal(0.0,1.0,bar)`. If the parameters to *randomNormal* are not the same, you do not need to supply an additional name, e.g. `randomNormal(0.0, 0.1)` and `randomNormal(0.0, 0.5)` results in two independent values. +- **randomNormalStable(mean,stddev)**<br/> + Default: 0.0,1.0 + + Same as [randomNormal](/en/reference/ranking/rank-features#randomNormal(mean,stddev)), except that the generated number is stable for a given hit, similar to [random.match](/en/reference/ranking/rank-features#random.match). +- **constant(name)**<br/> + Default: n/a + + Returns the [constant](/en/reference/schemas/schemas#constant) tensor value. +### Match operator scores + +See [Raw scores and query item labeling](/en/ranking/multivalue-query-operators#raw-scores-and-query-item-labeling) + +- **rawScore(field)**<br/> + Default: 0 + + The sum of all raw scores produced by match operators for this field. +- **itemRawScore(label)**<br/> + Default: 0 + + The raw score produced by the query item with the given label. + +### Geo search +These features are for ranking on the distances between geographical coordinates, i.e. points on the surface of the earth defined by latitude/longitude pairs. See the main documentation for [Geo Search](/en/querying/geo-search). + +<Info> +**Note:** Some of these features have the same names as features used with the [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) query operator. Take care not to get them mixed up! +</Info> + +- **closeness(name)**<br/> + Default: 0 + + A number which is close to 1 if the position in attribute *name* is close to the query position compared to [maxDistance](/en/reference/ranking/rank-feature-configuration#closeness): + + `max(1-distance(name)/maxDistance , 0)` + + Scales linearly with distance, see [closeness plot](/en/reference/ranking/rank-features#closeness). +- **closeness(name).logscale**<br/> + Default: 0 + + A logarithmic-shaped closeness; like normal closeness it goes from 1 to 0, but looks like [closeness plot](/en/reference/ranking/rank-features#closeness). The function is a logarithmic fall-off based on `log(distance + scale)` and is calculated as: + + $$ + c l o s e n e s s \left(\right. n a m e \left.\right) . l o g s c a l e = \frac{l o g \left(\right. m a x D i s t a n c e + s c a l e \left.\right) - l o g \left(\right. d i s t a n c e \left(\right. n a m e \left.\right) + s c a l e \left.\right) \left.\right)}{\left(\right. l o g \left(\right. m a x D i s t a n c e + s c a l e \left.\right) - l o g \left(\right. s c a l e \left.\right) \left.\right)} + $$ + + where scale is defined using [halfResponse and maxDistance](/en/reference/ranking/rank-feature-configuration#closeness): + + $$ + s c a l e = \frac{h a l f R e s p o n s e^{2}}{\left(\right. m a x D i s t a n c e - 2 \times h a l f R e s p o n s e \left.\right)} + $$ + + When `distance(name) == halfResponse` the function output is 0.5; halfResponse should be less than `maxDistance/2` since that means adding a certain distance when you are close matters more than adding the same distance when you're already far away. +- **distance(name)**<br/> + Default: 6400M + + The Euclidean distance from the query position to the given position attribute in millionths of degrees (about 10 cm). If there are multiple positions in the query, items that actually search in *name* is preferred. Also: if multiple query items search in *name*, or *name* is an array of positions, or both, the closest distance found is returned. +- **distance(name).km**<br/> + Default: 711648.5 + + As above, but scaled, so it uses the kilometer as unit of distance, instead of "micro-degrees". +- **distance(name).index**<br/> + Default: -1 + + The array index of the closest position found. Useful when *name* is of `array<position>` type. +- **distance(name).latitude**<br/> + Default: 90 + + The latitude (geographical north-south coordinate) of the closest position found. In range from -90.0 (South Pole) to +90.0 (North Pole). Useful when *name* is of `array<position>` type. +- **distance(name).longitude**<br/> + Default: -180 + + The latitude (geographical east-west coordinate) of the closest position found. In range from -180.0 (extreme west) to +180.0 (extreme east). Useful when *name* is of `array<position>` type. +- **distanceToPath(name).distance**<br/> + Default: 6400M + + The Euclidean distance from a path through 2d space given in the query to the given position attribute in millionths of degrees. This is useful e.g. for finding the closest locations to a given road. The query path is set in the [rankproperty.distanceToPath(*name*).path](/en/reference/api/query#ranking.properties) query parameter, using syntax `"(x1,y1,x2,y2,..)"` also in millionth of degrees, see the [distance to path](/en/querying/geo-search#distance-to-path) example. The closest point along the path is referred to as the *intersection*. + + <Info> + **Note:** For documents with multiple locations, only the closest location is used for ranking purposes. + </Info> +- **distanceToPath(name).traveled**<br/> + Default: 1 + + The normalized distance along the query path traveled before intersection (0.0 indicates start of path, 0.5 is middle, and 1.0 is end of path). +- **distanceToPath(name).product**<br/> + Default: 0 + + The cross-product of the intersected path segment and the intersection-to-document vector. Given that the document was found to lie closest to the path element `A->B`, the intersected path segment vector is `[ B.x - A.x, B.y - A.y ]`. Furthermore, given that the intersection of that path element occurred at point `I` for document location `D`, the intersection-to-document vector is `[ I.x - D.x, I.y - D.y]`. This is useful e.g. for finding what side of a path a document exists by looking at the sign of this value. +### Utility features +- **foreach(dimension, variable, feature, condition, operation)**<br/> + Default: n/a + + *foreach* iterates over a set of feature output values and performs an operation on them. Only the values where the condition evaluates to true are considered for the operation. The result of this operation is returned. + + - *dimension*: Specifies what to iterate over. This can be: + - *terms*: All query term indices, from 0 and up to [maxTerms](/en/reference/ranking/rank-feature-configuration#foreach). + - *fields*: All index field names. + - *attributes*: All attribute field names. + - *variable*: The name of the variable 'storing' each of the items you are iterating over. + - *feature*: The name of the feature you want to use the output value from. Use the *variable* as part of the feature name, and for each item you iterate over this *variable* is replaced with the actual item. Note that the variable replacement is a simple string replace, so you should use a variable name that is not in conflict with the feature name. + - *condition*: The condition used on each feature output value to find out if the value should be considered by the operation. The condition can be: + - *\>a*: Use feature output if greater than number a. + - *`<a`*: Use feature output if less than number a. + - *true*: Use all feature output values. + - *operation*: The operation you want to perform on the feature output values. This can be: + - *sum*: Calculate the sum of the values. + - *product*: Calculate the product of the values. + - *average*: Calculate the average of the values. + - *max*: Find the max of the values. + - *min*: Find the min of the values. + - *count*: Count the number of values. + + Let's say you want to calculate the average score of the *fieldMatch* feature for all index fields, but only consider the scores larger than 0. Then you can use the following setup of the *foreach* feature: + + `foreach(fields,N,fieldMatch(N), ">0", average)`. + + Note that when using the conditions *`>a`* and *`<a`* the arguments must be quoted. + + You can also specify a ranking expression in the *foreach* feature by using the *rankingExpression* feature. The *rankingExpression* feature takes the expression as the first and only parameter and outputs the result of evaluating this expression. Let's say you want to calculate the average score of the squared *fieldMatch* feature score for all index fields. Then you can use the following setup of the *foreach* feature: + `foreach(fields, N, rankingExpression("fieldMatch(N)*fieldMatch(N)"), true, average)` + + Note that you must quote the expression passed in to the *rankingExpression* feature. + +- **dotProduct(name,vector)**<br/> + Default: 0 + + <Info> + **Note:** Most dot product use cases are better solved using [tensors](/en/ranking/tensor-user-guide). + </Info> + The sparse dot product of the vector represented by the given weighted set attribute and the vector sent down with the query. + You can also do an ordinary full dotproduct by using arrays instead of weighted sets. This will be a lot faster when you have full vectors in the document with more than 5-10% non-zero values. You are also then not limited to integer weights. All the numeric datatypes can be used with arrays, so you have full floating point support. The 32 bit floating point type yields the fastest execution. + - *name*: The name of the weighted set string/integer or array of numeric attribute. + - *vector*: The name of the vector sent down with the query. + +Each unique string/integer in the weighted set corresponds to a dimension and the belonging weight is the vector component for that dimension. The query vector is set in the [rankproperty.dotProduct.*vector*](/en/reference/api/query#ranking.properties) query parameter, using syntax `{d1:c1,d2:c2,…}` where *d1* and *d2* are dimensions matching the strings/integers in the weighted set and *c1* and *c2* are the vector components (floating point numbers). The number of dimensions in the weighted set and the query vector do not need to be the same. When calculating the dot product we only use the dimensions present in both the weighted set and the query vector. + +When using an array the dimensions is a positive integer starting at 0. If the query is sparse all non given dimensions are zero. That also goes for dimensions that outside of the array size in each document. + +Assume a weighted set string attribute X with: + + ```bash + "X": { + "x": 10, + "y": 20 + } + ``` + +for a particular document. The result of using the feature dotProduct(X,Y) with the query vector rankproperty.dotProduct.Y=`{x:2,y:4}` will then be 100 (10\*2+20\*4) for this document. + +Arrays can be passed down as `[w1 w2 w3 …]` or on sparse form `{d1:c1,d2:c2,…}` as is already supported for weighted sets. + + <Info> + **Note:** When the query vector ends up being the same as the query, it is better to annotate the query terms with weights (see [term weight](/en/reference/querying/simple-query-language#term-weight)) and use the nativeDotProduct feature instead. This will run more efficiently and improve the correlation between results produced by the WAND operator and the final relevance score. + </Info> + <Info> + **Note:** When using the dotProduct feature, [fast-search](/en/content/attributes#fast-search) is not needed, unless also used for searching. When using the dotProduct query operator, use fast-search. + </Info> + +- **tokenInputIds(length, input\_1, input\_2, ...)**<br/> + Default: n/a + + Convenience function for generating token sequence input to Transformer models. Creates a tensor with dimensions `d0[1], d1[length]`, where `d0` is the batch dimension and `d1` is the maximum length of the token sequence. Assumes the inputs are zero-padded tensors representing token sequences. The result is the token sequence: + + `CLS + input_1 + SEP + input_2 + SEP + ... + 0's` + - *length*: The maximum length of the token sequence + - *input\_N*: Where to retrieve input from. At least one is required. + + The inputs are typically retrieved from the query, document attributes or constants. For instance, `tokenInputIds(128, query(my_input), attribute(my_field))` where input types are: + + - `query(my_input): tensor(d0[32])` + - `attribute(my_field): tensor(d0[128])` + + will create a tensor of type `d0[1],d1[128]` consisting of the CLS token `101`, the tokens from the query, the SEP token `102`, the tokens from the document field, the SEP token `102`, and 0's for the rest of the tensor. + +- **customTokenInputIds(start\_sequence\_id, sep\_sequence\_idlength, input\_1, input\_2, ...)**<br/> + Default: n/a + + Convenience function for generating token sequence input to Transformer models. Creates a tensor with dimensions `d0[1], d1[length]`, where `d0` is the batch dimension and `d1` is the maximum length of the token sequence. Assumes the inputs are zero-padded tensors representing token sequences. The result is the token sequence: + `start_sequence_id + input_1 + sep_sequence_id + input_2 + sep_sequence_id + ... + 0's` + - *start\_sequence\_id*The start sequence id, typically *1* + - *sep\_sequence\_id*The separator sequence id, typically *2* + - *length*: The maximum length of the token sequence + - *input\_N*: Where to retrieve input from. At least one is required. + +The inputs are typically retrieved from the query, document attributes or constants. For instance, `customTokenInputIds(1,2,128, query(my_input), attribute(my_field))` where input types are: + - `query(my_input): tensor(d0[32])` + - `attribute(my_field): tensor(d0[128])` +- **tokenTypeIds(length, input\_1, input\_2, ...)**<br/> + Default: n/a + + Convenience function for generating token sequence input to Transformer models. Similar to the `tokenInputIds`, creates a tensor of type `d0[1],d1[length]` which represents a mask with zeros for the first input including CLS and SEP token, ones for the rest of the inputs (up to and including the final SEP token), and 0's for the rest of the tensor. +- **tokenAttentionMask(length, input\_1, input\_2, ...)**<br/> + Default: n/a + + Convenience function for generating token sequence input to Transformer models. Similar to the `tokenInputIds`, creates a tensor of type `d0[1],d1[length]` which represents a mask with ones for all tokens that are set (CLS and SEP and all inputs), and zeros for the rest. + +## Graphs for selected ranking functions + +### closeness + +<Frame> + ![Closeness logscale plot](/assets/img/relevance/closeness-logscale.png) +</Frame> + +The plot above shows the possible outputs from the closeness distance rank feature using the default maxDistance of 1000 km. The _linear(x)_ graph shows the default closeness output while the other graphs are logscale output for various values of the scaleDistance parameter: 9013.305 (1 km), 45066.525 (5 km - the default value), and 901330.5 (100 km). These values correspond to the following values of the halfResponse parameter: 276154.903 (30.64 km), 593861.739 (65.89 km), and 2088044.581 (231.66 km). + +### freshness + +<Frame> + ![Freshness logscale plot](/assets/img/relevance/freshness-logscale.png) +</Frame> + +The plot above shows the possible outputs from the freshness rank feature using the default maxAge of 7776000s (90 days). The _linear(x)_ graph shows the default freshness output while the other graphs are logscale output for various values of the halfResponse parameter: 172800s (2 days), 604800s (7 days - the default value), 1209600s (14 days). \ No newline at end of file diff --git a/mintlify-docs/en/reference/ranking/rank-types.mdx b/mintlify-docs/en/reference/ranking/rank-types.mdx new file mode 100644 index 0000000000..5b93f0c35e --- /dev/null +++ b/mintlify-docs/en/reference/ranking/rank-types.mdx @@ -0,0 +1,164 @@ +--- +title: "Rank Types" +--- + +This document presents the [rank-types](/en/reference/schemas/schemas#rank-type) that are supported by nativeRank and the main ranking features that are controlled through the use of them. See the [native rank reference](/en/reference/ranking/nativerank) for detailed information on the nativeRank feature. First, the generic mechanisms for ranking control through rank types are explained. Then, each ranking type will be described, with details of how it uses the different ranking controls. + +## Using rank-type + +The [rank-types](/en/reference/schemas/schemas#rank-type) for a field is either controlled by the [rank-profile](/en/reference/schemas/schemas#rank-profile) or per field. Using it per rank-profile gives more flexible control: + +```js +rank-profile product inherits default { + rank-type title: identity + rank-type body: about + first-phase { + expression: nativeRank + } +} +``` + +## Ranking controls + +This section describes the different generic ranking controls that are influenced by the ranking type selection. + +### FirstOcc boost + +The position of the first occurrence of a term in a document field is called the `FirstOcc`. A FirstOcc boost table map a particular value _**x**_ of FirstOcc to a rank contribution. For FirstOcc equal to or above the size of the table (default 256), the value for the last table element is used. + +Equation 1. Example FirstOcc boost table function: + +$$ +firstocc(x) = expdecay(x) = w \times e^{-\frac{x}{t}} +$$ + +where _**w**_ is the weight (controls the amplitude) and _**t**_ is the tune parameter (controls the slope). The curve shapes are illustrated in Figure 1-2. + +<Frame caption="Figure 1. FirstOcc Plot weight: increasing w lifts amplitude (x-axis is first occurrence position, and y-axis is firstocc boost)"> +![FirstOcc Plot weight](/assets/img/relevance/plot-firstocc-weight.png) +</Frame> +<br/> +<Frame caption="Figure 2. FirstOcc Plot tune: Increasing t reduces exponential falloff (x-axis is first occurrence position, and y-axis is firstocc boost)"> +![FirstOcc Plot tune](/assets/img/relevance/plot-firstocc-tune.png) +</Frame> +### NumOcc boost + +The number of occurrences of a term in a document field is called the `NumOcc`. A NumOcc boost table map a given number of occurrences _**x**_ to a particular rank contribution. For terms occurring more than or equal to the size of the table (default 256), the value for the last table element is used. + +Equation 2. Example NumOcc boost table function: + +$$ +numocc(x) = loggrowth(x) = w \times log(1+\frac{x}{s}) + t +$$ + +where _**w**_ is the weight (controls the amplitude) and _**t**_ is the tune parameter (controls the offset). _**s**_ is a scale parameter (controls the sensitivity to the numocc variable x). + +<Frame caption="Figure 3. NumOcc Plot weight: increasing w scales log amplitude/shape (x-axis is number of occurrences in the field and y-axis is numocc boost)"> +![NumOcc Plot weight](/assets/img/relevance/plot-numocc-weight.png) +</Frame> +<br/> +<Frame caption="Figure 4. NumOcc Plot tune: increasing t increases boost by offset value (x-axis is number of occurrences in the field and y-axis is numocc boost)"> +![NumOcc Plot tune](/assets/img/relevance/plot-numocc-tune.png) +</Frame> + +### Proximity boost + +When a query with more than one query term is executed, pairwise proximity is calculated between pairs of query terms. For a given pair, the actual ranking value will be based on lookup in a proximity boost table. + +For a given pair of query-terms _`a`_ _`b`_, the distance in the document field between the positions of the terms is calculated such that if the terms occur in the same order in both the query and the document, the distance will be positive. If the order is reversed from the query to the document, the value will be negative. The absolute value is in both cases the absolute difference in word positions in the document. + +For proximity calculations in the forward direction, the rank boost will be selected from the forward proximity table. For reversed proximity, a similar reverse proximity table is used. + +Typically, the rank score of the forward proximity table will be higher than the reverse proximity table, giving an overall asymmetrical ranking function. + +Equation 3. Example Proximity boost table function: + +$$ +prox(x) = expdecay(x) = w \times e^{-\frac{x}{t}} +$$ + +where _**x**_ is the absolute difference between the query term distance and document term distance, _**w**_ is the weight (controls the amplitude) and _**t**_ is the tune parameter (controls the slope). Refer to Figure 5 and 6: The same formula as in the FirstOcc examples are used, but with different parameters. + +<Frame caption="Figure 5. Proximity Plot weight: w scales amplitude (x-axis is proximity term pair distance and y-axis is proximity boost)"> +![Proximity Plot weight](/assets/img/relevance/plot-proximity-weight.png) +</Frame> +<br/> +<Frame caption="Figure 6. Proximity Plot tune: Increasing t reduces curve fall-off (x-axis is proximity term pair distance and y-axis is proximity boost)"> +![Proximity Plot tune](/assets/img/relevance/plot-proximity-tune.png) +</Frame> + +### Weight boost + +The weightboost table is for calculating the rank boost contribution from attributes. Equation 4 shows how attribute weight values are used for table look-ups in the weightboost table. + +Equation 4. Example Weightboost boost table: + +$$ +weight(x) = sign(x) \times weightboost[abs(x)] +$$ + +where \(\text{sign}(x)\) is the sign of \(x\), and \(\text{abs}(x)\) is the absolute value of \(x\). Hence, attributes can have negative rank contributions. The argument \(x\) used as input in this boost table is dependent on the attribute type: + +- **Weighted set:** \(x\) equals the attribute weight. +- **Array:** \(x\) equals to the number of match occurrences in the attribute array. +- **Single value:** \(x\) equals 1. + +## Rank types + +This section describes each individual ranking type, and details how it uses the different generic ranking controls. + +### "identity" rank type + +This ranking type is for fields that contain the identity of the document. A title of a book, or product name of a product are examples of this. + +The title will usually appear early in a document. So the FirstOcc table starts high and descends sharply. + +The title is usually not repeated, so the number of occurrences is not that important. It starts out at a medium height, and ascends slowly. + +Identity terms are well-connected, and fields are fairly small. So the proximity table has a sharp and high peak, with the importance falling rapidly. + +Supported by nativeRank using these tables: + +- nativeFieldMatch.firstOccurrenceTable: "expdecay(100,12.50)" +- nativeFieldMatch.occurrenceCountTable: "loggrowth(1500,4000,19)" +- nativeProximity.proximityTable: "expdecay(5000,3)" +- nativeProximity.reverseProximityTable: "expdecay(3000,3)" +- nativeAttributeMatch.weightTable: "linear(1,0)" + +### "about" rank type + +This is for fields that contain information directly relating to the document, describing what it is. A description field of a catalog, or a feature list of a product specification are typical examples. + +The order of occurrences is somewhat important, as usually the most important features will be put up first. The FirstOcc boost table starts out at a medium height, and falls fairly slowly. + +The about-ness of the description is much related to how many times a descriptive term is used. So the NumOcc boost table will start out fairly low, but climb high, with the maximum gain (curvature point) around 5 occurrences. + +Proximity is fairly important for about-ness. The proximity boost will be somewhat similar to the `identity` type, although it will be somewhat lower at the peak, and have a slightly shallower falloff. + +`about` is **the default ranking type** used when it is not explicitly specified which type to use. + +Supported by nativeRank using these tables: + +- nativeFieldMatch.firstOccurrenceTable: "expdecay(8000,12.50)" +- nativeFieldMatch.occurrenceCountTable: "loggrowth(1500,4000,19)" +- nativeProximity.proximityTable: "expdecay(500,3)" +- nativeProximity.reverseProximityTable: "expdecay(400,3)" +- nativeAttributeMatch.weightTable: "linear(1,0)" + +### "tags" rank type + +This is for search in attributes when they are used as tags, for example name tags for images that may be updated frequently. The tags rank type uses a logarithmic table to give more relative boost in the low range: As tags are added they should have significant impact on rank score, but as more and more tags are added, each new tag should contribute less. + +The `tags` rank type is based on the `about` rank type, i.e. only the weight boost table is different. For the other rank types a 1-to-1 linear table is used, except for the empty rank type, which has a table with zeros. + +Supported by nativeRank using this table: + +- nativeAttributeMatch.weightTable: "loggrowth(38,50,1)" + +The other tables are the same as the `about` rank type. + +### "empty" rank type + +This is used for fields where you do not want matches to have any impact on relevancy. Use this for instance when the field contains keywords used to partition the dataset. + +The empty rank type is also supported by nativeRank. diff --git a/mintlify-docs/en/reference/ranking/ranking-expressions.mdx b/mintlify-docs/en/reference/ranking/ranking-expressions.mdx new file mode 100644 index 0000000000..d5773a420a --- /dev/null +++ b/mintlify-docs/en/reference/ranking/ranking-expressions.mdx @@ -0,0 +1,190 @@ +--- +title: "Ranking Expressions" +--- + + +This is a complete reference to the _ranking expressions_ used to configure application specific ranking functions. For examples and an overview of how to use ranking expressions, see the [ranking overview](/en/basics/ranking). + +Ranking expressions are written in a simple language similar to ordinary functional notation. The atoms in ranking expressions are _rank features_ and _constants_. These atoms can be combined by _arithmetic operations_ and other _built-in functions_ over scalars and tensor. + +| Rank Features | A rank feature is a named value calculated or looked up by vespa for each query/document combination. See the [rank feature reference](/en/reference/ranking/rank-features) for a list of all the rank features available to ranking expressions.| +| Constants | A constant is either a floating point number, a boolean (true/false) or a quoted string. Since ranking expressions can only work on scalars and tensors, strings and booleans are immediately converted to scalars - true becomes 1.0, false 0.0 and a string its hash value. This means that **strings can only be used for equality comparisons**, other purposes such as parametrizing the key to slice out of a tensor will not work correctly.| + +## Arithmetic operations + +Basic mathematical operations are expressed in in-fix notation: + +```bash +a + b * c +``` + +Arithmetic operations work on any tensor in addition to scalars, and are a short form of joining the tensors with the arithmetic operation used to join the cells. For example `tensorA * tensorB` is the same as `join(tensorA, tensorB, f(a,b)(a * b))`. + +All arithmetic operators in order of decreasing precedence: + +| Arithmetic operator | Description | +| :--- | :--- | +| ^ | Power | +| % | Modulo | +| / | Division | +| \* | Multiplication | +| - | Subtraction | +| + | Addition | +| && | And: 1 if both arguments are non-zero, 0 otherwise. | +| || | Or: 1 if either argument is non-zero, 0 otherwise. | + +## Mathematical scalar functions + +| Function | Description | +| :--- | :--- | +| acos(*x*) | Inverse cosine of *x* | +| asin(*x*) | Inverse sine of *x* | +| atan(*x*) | Inverse tangent of *x* | +| atan2(*y*, *x*) | Inverse tangent of *y / x*, using signs of both arguments to determine correct quadrant. | +| bit(*x*, *y*) | Returns value of bit *y* in value *x* (for int8 values) | +| ceil(*x*) | Lowest integral value not less than *x* | +| cos(*x*) | Cosine of *x* | +| cosh(*x*) | Hyperbolic cosine of *x* | +| elu(*x*) | The Exponential Linear Unit activation function for value *x* | +| erf(*x*) | The Gauss error function for value *x* | +| exp(*x*) | Base-e exponential function. | +| fabs(*x*) | Absolute value of (floating-point) number *x* | +| floor(*x*) | Largest integral value not greater than *x* | +| fmod(*x*, *y*) | Remainder of *x / y* | +| isNan(*x*) | Returns 1.0 if *x* is NaN, 0.0 otherwise | +| ldexp(*x*, *exp*) | Multiply *x* by 2 to the power of *exp* | +| log(*x*) | Base-e logarithm of *x* | +| log10(*x*) | Base-10 logarithm of *x* | +| max(*x*, *y*) | Larger of *x* and *y* | +| min(*x*, *y*) | Smaller of *x* and *y* | +| pow(*x*, *y*) | Return *x* raised to the power of *y* | +| relu(*x*) | The Rectified Linear Unit activation function for value *x* | +| sigmoid(*x*) | The sigmoid (logistic) activation function for value *x* | +| sin(*x*) | Sine of *x* | +| sinh(*x*) | Hyperbolic sine of *x* | +| sqrt(*x*) | Square root of *x* | +| tan(*x*) | Tangent of *x* | +| tanh(*x*) | Hyperbolic tangent of *x* | +| hamming(*x*, *y*) | Hamming (bit-wise) distance between *x* and *y* (considered as 8-bit integers). | + +`x` and `y` may be any ranking expression. + +## The if function + +The `if` function chooses between two sub-expressions based on the truth value of a condition. + +```bash +if (expression1operatorexpression2, trueExpression, falseExpression) +``` + +If the condition given in the first argument is true, the expression in argument 2 is used, otherwise argument 3. The four expressions may be any ranking expression. Conditional operators in ranking expression if functions: + +| Boolean operator | Description | +| :--- | :--- | +| \<= | Less than or equal | +| \< | Less than | +| == | Equal | +| ~= | Approximately equal | +| \>= | Greater than or equal | +| \> | Greater than | + +The `in` membership operator uses a slightly modified if-syntax: + +```bash +if (expression1in[expression2, expression3, ..., expressionN], trueExpression, falseExpression) +``` + +If expression1 is equal to either of expression2 through expressionN, then trueExpression is used, otherwise falseExpression. + +## The switch function + +<Info> + **Note:** Available from Vespa 8.626.55 +</Info> + +The `switch` function chooses between multiple sub-expressions based on matching a value. It provides a more readable alternative to chained `if` statements when selecting from several options. + +```bash +switch (discriminant) { + case value1: result1, + case value2: result2, + ... + default: defaultResult +} +``` + +The `discriminant` expression is compared for equality against each `case` value expression in order. When a match is found, the corresponding result expression is evaluated and returned. If no case matches, the `default` result is returned. All expressions may be any ranking expression. At least one `case` must be specified. The `default` must be specified. + +## The foreach function + +The foreach function is not really part of the expression language but implemented as a [rank feature](/en/reference/ranking/rank-features#foreachdimensionvariablefeatureconditionoperation). + +## Tensor functions + +The following set of tensors functions are available to use in ranking expressions. The functions are grouped in primitive functions and convenience functions that can be implemented in terms of the primitive ones. + +### Primitive functions + +| Function | Description | +| :--- | :--- | +| **map( tensor, f(x)(expr) )** | Returns a new tensor with the lambda function defined in `f(x)(expr)` applied to each cell. Arguments: <br/><br/> - `tensor`: a tensor expression. For example `attribute(tensor_field)` <br/> - `f(x)(expr)`: a [lambda function](#lambda-functions-in-primitive-functions) with one argument. <br/><br/> Returns a new tensor where the expression in the lambda function is evaluated in each cell in `tensor`. <br/><br/> Examples: ```bash map(t, f(x)(x\*x)) ``` <br/><br/> [playground example map](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEcBXAgJwE8AKAFwEoSZGpCIJIPArQDOWNgB5oAGywBDHgD4uAD0QAWALp84iAAzEAjMQBMxAMz7IQiAF8hz0hmrlcDIh+GUaE50DAC2KjgA+gRaKqE4in7BECJhEVws7Nz8xGDQ2nzaAHpWfALBrhiVYPogzkA) | +| **map\_subspaces( tensor, f(x)(expr) )** | Returns a new tensor with the lambda function defined in `f(x)(expr)` applied to each dense subspace. <br/> Arguments: <br/><br/> - `tensor`: a tensor expression. For example `attribute(tensor_field)` <br/> - `f(x)(expr)`: a [lambda function](#lambda-functions-in-primitive-functions) with one argument. Returns a new tensor where the lambda function is evaluated for each dense subspace in `tensor`. This is an advanced feature that enables using dense [tensor generator](#tensor) expressions to transform mixed tensors. <br/> Example: <br/> ``` map\_subspaces(tensor(x{},y\[3\]):{a:\[1,2,3\]},f(d)(tensor(z\[2\])(d{y:(z)}+d{y:(z+1)})))``` <br/> [playground example for map\_subspaces](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEcBXAgJwE8AKAFwEoSZGpCIJIPArQDOWNgB4AlrR4AOAHxcAHsAC+xDogBMAXT5xgAQziIAjAFZiAWhsA2YzshCIOoXqHVyXAYiUhooSjQvOgZmWhwLAGMAawB9ACMFHikUgHdMgAsUgFsLHBSpZjSpeISCKUEwiBEGErKKqpq6rhZ2bn5iaC4AEz5eSRl5JVUNA1dTLgyeYeAOOC4OAHoVPj0AdkcOAFJtvlPPMJ8MS7BjEB0gA) | +| **filter\_subspaces( tensor, f(x)(expr) )** | Returns a new tensor containing only the subspaces for which the lambda function defined in `f(x)(expr)` returns true. <br/> Arguments: <br/> - `tensor`: a tensor expression. Must have at least one mapped dimension. <br/> - `f(x)(expr)`: a [lambda function](#lambda-functions-in-primitive-functions) with one argument. <br/> Returns a new tensor containing only the subspaces for which the lambda function defined in `f(x)(expr)` returns true. Typically used to get rid of unneeded values in sparse tensors. Example: ``` filter\_subspaces(tensor(x{}):{a:1,b:2,c:3,d:4},f(value)(value>2)) # tensor(x{}):{c:3,d:4} ``` <br/><br/> [playground example for filter\_subspaces](https://docs.vespa.ai/playground/#N4IgZiBcDaoPYAcogMYgDQiZUAXZABLgBYCmBAjgK4CGANgJa4CeBcYB9dBKxVAdgGsAziAC+Y9PGwhSGLFFD9kuAIzy5kELlL9hcAE4AeMHTg1cAPgAUvAYOBiAlDgAMkVQGZ0qyAHZ0ACZIAFZ0Tw8wgBZIT1d0EMhAsXFJaWQ0TGw8QgA5AgZhAgATZn4aAFsGNAkpEERkOSzFEGUtXI1kT1S6hq1MhRxtQjAGfmK2A2LSAzGAczYOFFI6OlFa9K0mwaUVQM7+lboAfUNpg2s1dAIKmgAPJx7N1Hls4a0bmkFyGk-hQQIYEMRDIBAARqRhLgCB0NvUZNs3m1tN1MJptIEjLC0vCMq8WvgPsUDIhOKsCMIqGDhAgaMsimASRUQeRbv8QRZOAZyGB6MI5HC+rJ8UNkbgogdwAw6DoDMdKdTafTLt5AdZhE51U5HoKZAM3oSQDw4BUwWMedLZaQJmyAQB3JjESYMOZjehEXT6AyA4Hcykyp64rYi3ZaXAhSXigBUalSAF0xEA) | +| **reduce( tensor, aggregator, dim1, dim2, ... )** | Returns a new tensor with the `aggregator` applied across dimensions dim1, dim2, etc. If no dimensions are specified, reduce over all dimensions. <br/> Arguments: <br/> - `tensor`: a tensor expression. <br/> - `aggregator`: the aggregator to use. See below. <br/> - `dim1, dim2, ...`: the dimensions to reduce over. Optional. <br/> Returns a new tensor with the aggregator applied across dimensions `dim1`, `dim2`, etc. If no dimensions are specified, reduce over all dimensions. <br/> Available aggregators are: <br/> - `avg`: arithmetic mean <br/> - `count`: number of elements <br/> - `max`: maximum value <br/> - `median`: median value <br/> - `min`: minimum value <br/> - `prod`: product of all values <br/> - `sum`: sum of all values <br/> Examples: <br/> ``` reduce(t, sum) # Sum all values in tensor reduce(t, count, x) # Count number of cells along dimension x ``` <br/> [playground example reduce](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEcBXAgJwE8AKAFwEoSZGpCIJIPArQDOWNgB5oAGywBDHgD4uAD0QAWALp84iAAzEAjMQBMxAMz7IQiAF8hz0hmrlcDIh+GUaE50DGwEACbMAMYEAPpSzAC2sQRaKok4in7BECKhEdEEXCzs3PzEYAmJAsGuGHVg+iDOQA) | +| **join( tensor1, tensor2, f(x,y)(expr) )** | Returns a new tensor constructed from the *natural join* between `tensor1` and `tensor2`, with the resulting cells having the value as calculated from `f(x,y)(expr)`, where `x` is the cell value from `tensor1` and `y` from `tensor2`. <br/> Arguments: <br/> - `tensor1`: a tensor expression. <br/> - `tensor2`: a tensor expression. <br/> - `f(x,y)(expr)`: a [lambda function](#lambda-functions-in-primitive-functions) with two arguments. Returns a new tensor constructed from the *natural join* between `tensor1` and `tensor2`, with the resulting cells having the value as calculated from `f(x,y)(expr)`, where `x` is the cell value from `tensor1` and `y` from `tensor2`. <br/> Formally, the result of the `join` is a new tensor with dimensions the union of dimension between `tensor1` and `tensor2`. The cells are the set of all combinations of cells that have equal values on their common dimensions. Examples: <br/> ``` join(t1, t2, f(x,y)(x \* y)) ``` <br/> [playground example join](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEcBXAgJwE8AKAFwEoSZGpCIJIPArQDOWNgB5oAGywBDHgD4uAD2ABfPnGDAtcAAy6EARgB0p4mhOWLYAEy3dkIRF1DdpDNTkuAxE-sKUaF50DGo8bACWAEbMErwCYTSEDBLSsgrKapo6fhx6BkYmdhxmzgDMtvbGZsTVTggALA0OcJYtNQgArF1Nva3OAGzunpk+GH5CgZjBYqFRFPiL5PRiAFZY8fQZwqJQewdcLOzc-PaxCcmpN2DQ2i182mAAVGAcfAJRs1QgIAuiBdEA) | +| **merge( tensor1, tensor2, f(x,y)(expr) )** | Returns a new tensor consisting of all cells from both the arguments, where the lambda function is used to produce a single value in the cases where both arguments provide a value for a cell. <br/> Arguments: <br/> - `tensor1`: a tensor expression. <br/> - `tensor2`: a tensor expression. <br/> - `f(x,y)(expr)`: a [lambda function](#lambda-functions-in-primitive-functions) with two arguments. <br/> Returns a new tensor having all the cells of both arguments, where the lambda is invoked to produce a single value only when both arguments have a value for the same cell. <br/> The argument tensors must have the same type, and that will be the type of the resulting tensor. Example: ``` merge(t1, t2, f(left,right)(right)) ``` <br/> [playground example merge](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gEcBXAgJwE8AKAFwEoSZGpCIJIPArQDOWNlwDWBDsAC+xAB6IATAF0+cYAEM4iAIzFdxAEYmAzMQAsOlZCEQVQtUOrlcDIqQ0UJRobnQMhjw8bACWVswSvAKBQYQMEtKyCkqqGtp6BjaIAKzEAGw6xADGJgDsxAAczq5BHhheGD6YfmIBYRT4XeT0YgC27ADmBAD6BOqGozgANn2paWOTBFws7Nz8xGCR0XEJW-tg0Fwr0DzEsRMAFvxc9098AmFtqF86ICpAA) | +| **tensor( tensor-type-spec )(expr)** | Generates new tensors according to type specification and expression `expr`. Arguments: - `tensor-type-spec`: an [indexed tensor type specification.](/en/reference/ranking/tensor#tensor-type-spec) - `(expression)`: a [lambda function](#lambda-functions-in-primitive-functions) expressing how to generate the tensor. Generates new tensors according to the type specification and expression `expr`. The tensor type must be an indexed tensor (e.g. `tensor<float>(x[10])`). The expression in `expr` will be evaluated for each cell. The arguments in the expression is implicitly the names of the dimensions defined in the type spec. <br/> Useful for creating transformation tensors. <br/> Examples: <br/> ``` tensor<float>(x\[3\])(x) ``` <br/> [playground generate examples](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gHMDaCAnAQwBcCB9VgO4kyNSEQSQetAM5Y2ACgAeiAMwBdAJRKNkERAC+I-aQzVyuBkROjKaPXQbNWnHvwIDeAJmE1M4qFKybAA80AA2WNwAfEqInmrEAJ5xmkpgALzpYIk69oYYxiJmmBYSVvYU+MXk9BJcPr6EDIFysQAsmnCIAIwAdAAMxGCeA0Mqo2BtA2q6vvmohaYVpU3W5LbVDhJO7Nx8grzQbFgAtrwctFhcABbsvC1sDb5izSxB8tApWlzAinDQAGpuvpcnMjCg1CB9EA) | +| **rename( tensor, dim-to-rename, new-names )** | Renames one or more dimensions in the tensor. <br/> Arguments: <br/> - `tensor`: a tensor expression. <br/> - `dim-to-rename`: a dimension, or list of dimensions, to rename. <br/> - `new-names`: new names for the dimensions listed above. Returns a new tensor with one or more dimension renamed. Examples: <br/> ``` rename(t1,x,z)``` <br/> [playground rename examples](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gBcSybIiEmDaBnLAJwAUAD2ABfYgE9xASjjBgwuAAYpKsQgCMAOlVolqyXE0awy3cX3G1y0+b2LrRk1t1jIrCGNYTW1crgMRKQ0UJRonnQM-NwAhgC2BAD6BMIJOAA2wZEQ7NFxiYKMxMLEAF4yHqHeGL4Y-piBnNmhFPgN5PScMbQJyanpWUmMAO5YLKG5HFA9fUXEIlIyCwCWxABWMpWRNai7ALogYkA) | +| **concat( tensor1, tensor2, dim )** | Concatenates two tensors along dimension `dim`. <br/> Arguments: <br/> - `tensor1`: a tensor expression. <br/> - `tensor2`: a tensor expression. <br/> - `dim`: the dimension to concatenate along. <br/> Returns a new tensor with the two tensors `tensor1` and `tensor2` concatenated along dimension `dim`. <br/> Examples: <br/> ``` concat(t,t2,x) ``` <br/> [playground concat examples](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gBcSybIiEmDaBnLAJwAUAD0QBmALoBKOIgAsxAKzEAbBMisIAX1ZbSGauVwMi+tpTSa6DRgCYWNTByiNufIaNvTZY4nPVWOhh6rIaYxpymVhT4YeT0nADGWLSJAIbMZo7sDMmpGYKMxHbEwlIajkGoIQbREYQO5rFWEJAJUHnpjAD6WIwAFgT83QDuaQCejdnOkJ0FJUVlFTRV2igSIFpAA) | +| **(tensor)partial-address** | Slice - returns a new tensor containing the cells matching the partial address.<br/> Arguments: <br/> - `tensor`: a tensor expression. <br/> - `partial-address`: Can be given in the form of a tensor address `{dimension:label,..}`, or for tensors referenced directly and having a single mapped or indexed type respectively as just a label in curly brackets `{label}` or just an index in square brackets, `[index]`. Index labels may be specified by a lambda expression enclosed in parentheses. <br/> Returns a new tensor containing the cells matching the partial address. A common special case is producing a single value by specifying a full address. The type of the resulting tensor is the dimensions of the argument tensor not specified by the partial address.<br/> Examples: <br/> ``` # a_tensor is of type tensor(key{},x\[2\]) a_tensor{key:key1,x:1} ``` <br/> [playground slice examples](https://docs.vespa.ai/playground/#N4KABGBEBmkFxgNrgmUrWQPYAd5QFNIAaFDSPBdDTAO30gBcSybIiEmDaBnLAJwAUAD0QBmALoBKOIgAsxAKzEAbBMisIAX1ZbSGauVwMi+tpTSa6DHgBsAlgGNTViOwaNgwuAAYtGmjAdDD1WQ0xjThdAinxw8npOOycCAH0AJhZAtw4oT290-ytg1FCDK2wLdzNyC3jrTkZMmrZcrl4BQQBrAgBPYD1RdOk4YDAe3oBGWUmAOh9iMHT5iUWJ9NkxecW5FaCAmhLtFvrKkyzzONcoRKhk51SAWwBDHBwCABNU+1oPgmFPhcYm0msAJnAJpNiN5JkVAkcgicKpFCEDMHVrpBbpB7mkXm9Pt9fv9Cc1MSD0mC+hC+lDvIIxABadJSOGHXQoCQgLRAA) | +| **tensor-literal-form** | Returns a new tensor having the type and cell values given explicitly. Each cell value may be supplied by a lambda which can access other features. <br/> Returns a new tensor from the [literal form](/en/reference/ranking/tensor#tensor-literal-form), where the type must be specified explicitly. Each value may be supplied by a lambda, which - in contrast to all other lambdas - *may refer to features and expressions from the context*. <br/> Examples: ``` # Declare an indexed tensor tensor(x[2]):[1.0, 2.0] # Declare an mapped tensor tensor(x{}):{x1:3, x2:4} ``` | +| **cell_cast( tensor, cell_type )** | Returns a new tensor that is the same as the argument, except that all cell values are converted to the given [cell type](/en/reference/ranking/tensor#tensor-type-spec). <br/> Arguments: <br/> - `tensor`: a tensor expression. <br/> - `cell_type`: wanted cell type. <br/> Example, casting from `bfloat16` to `float`: <br/> ``` # With a tensor t of the type tensor<bfloat16>(x\[5\])(x+1) cell_cast(t, float)``` | +| **cell_order( tensor, order )** | Returns a new tensor with the rank of the original cells based on the given order. <br/> Arguments: <br/> - `tensor`: a tensor expression. <br/> - `order`: `max` or `min` <br/> Returns a new tensor with the rank of the original cells based on the given order. With `max` the largest value gets rank 0. With `min` the smallest value gets rank 0. <br/> Examples: <br/> ``` cell_order(tensor(x\[3\]):\[2,3,1\],max) # tensor(x\[3\]):\[1,0,2\] cell_order(tensor(x\[3\]):\[2,3,1\],min) # tensor(x\[3\]):\[1,2,0\] ``` <br/> [playground example for cell_order](https://docs.vespa.ai/playground/#N4IgZiBcDaoPYAcogMYgDQiZUAXZABLgBYCmBAjgK4CGANgJa4CeBcYB9dBKxVAdgGsAziAC+Y9PGwhSGLFFD9kuAIzy5kELlL9hcAE4AeMHTg1cAPgAUvAYOBiAlDgAMkVQGZ0qyAHZ0ACZIAFZ0Tw8wgBZIT1d0EMhAsXFJaWQ0TGw8QgA5AgZhAgATZn4aAFsGNAkpEERkOSzFEGUtXI1kT1S6hq1MhRxtQjAGfmK2A2LSAzGAczYOFFI6OlFa9K0mwaUVQM7+lboAfUNpg2s1dAIKmgAPJx7N1Hls4a0bmkFyGk-hQQIYEMRDIBAARqRhLgCB0NvUZNs3m1tN1MJptIEjLC0vCMq8WvgPsUDIhOKsCMIqGDhAgaMsimASRUQeRbv8QRZOAZyGB6MI5HC+rJ8UNkbgogdwAw6DoDMdKdTafTLt5AdZhE51U5HoKZAM3oSQDw4BUwWMedLZaQJmyAQB3JjESYMOZjehEXT6AyA4Hcykyp64rYi3ZaXAhSXigBUalSAF0xEA) | + +### Lambda functions in primitive functions + +Some of the primitive functions accept lambda functions that are evaluated and applied to a set of tensor cells. The functions contain a single expression that have the same format and built-in functions as [general ranking expressions](/en/reference/ranking/ranking-expressions). However, the atoms are the arguments defined in the argument list of the lambda. + +The expression cannot access variables or data structures outside the lambda, i.e. they are not closures. + +Examples: + +```bash +f(x)(log(x)) +f(x,y)(if(x < y, 0, 1)) +``` + +### Non-primitive functions + +Non-primitive functions can be implemented by primitive functions, but are not necessarily so for performance reasons. Note that all the arithmetic operators, comparison operators, and scalar operations can also be applied to tensors directly, those are not repeated below here. + +| Function | Description | +| :--- | :--- | +| **argmax(t, dim)** | `join(t, reduce(t, max, dim), f(x,y)(if (x == y, 1, 0)))` <br/> Returns a tensor with cell(s) of the highest value(s) in the tensor set to 1. The dimension argument follows the same format as reduce as multiple dimensions can be given and is optional. | +| **argmin(t, dim)** | `join(t, reduce(t, min, dim), f(x,y)(if (x == y, 1, 0)))`<br/> Returns a tensor with cell(s) of the lowest value(s) in the tensor set to 1. The dimension argument follows the same format as reduce as multiple dimensions can be given and is optional. | +| **avg(t, dim)** | `reduce(t, avg, dim)` <br/> Reduce the tensor with the `average` aggregator along dimension `dim`. If the dimension argument is omitted, this reduces over all dimensions. | +| **count(t, dim)** | `reduce(t, count, dim)` <br/> Reduce the tensor with the `count` aggregator along dimension `dim`. If the dimension argument is omitted, this reduces over all dimensions. | +| **cosine_similarity(t1, t2, dim)** | `reduce(t1*t2, sum, dim) / sqrt(reduce(t1*t1, sum, dim) * reduce(t2*t2, sum, dim))` <br/> The cosine similarity between the two vectors in the given dimension. | +| **diag(n1, n2)** | `tensor(i[n1],j[n2])(if (i==j, 1.0, 0.0)))` <br/> Returns a tensor with the diagonal set to 1.0. | +| **elu(t)** | `map(t, f(x)(if(x < 0, exp(x)-1, x)))` <br/> [Exponential linear unit](https://arxiv.org/abs/1511.07289). | +| **euclidean_distance(t1, t2, dim)** | `join(reduce(map(join(t1, t2, f(x,y)(x-y)), f(x)(x * x)), sum, dim), f(x)(sqrt(x)))` <br/> euclidean_distance: `sqrt(sum((t1-t2)^2, dim))`. | +| **expand(t, dim)** | `t * tensor(dim[1])(1)` <br/> Adds an indexed dimension with name `dim` to the tensor `t`. | +| **hamming(t1, t2)** | `join(t1, t2, f(x,y)(hamming(x,y)))` <br/> Join and return the Hamming distance between matching cells of `t1` and `t2`. This function is mostly useful when the input contains vectors with binary data and summing the hamming distance over the vector dimension, e.g.: <br/> | type of input *t1* → | `tensor<int8>(dimone{},z[32])` <br/> | type of input *t2* → | `tensor<int8>(dimtwo{},z[32])` <br/> | expression → `reduce(join(t1, t2, f(a,b)(hamming(a,b)), sum, z)`| |output type → `tensor<float>(dimone{},dimtwo{})`| <br/> Note that the cell values are always treated as if they were both 8-bit integers in the range \[-128,127\], and only then counting the number of bits that are different. See also the corresponding [distance metric](/en/reference/schemas/schemas#distance-metric). Arguments can be scalars. | +| **l1_normalize(t, dim)** | `join(t, reduce(t, sum, dim), f(x,y) (x / y))` <br/> L1 normalization: `t / sum(t, dim)`. | +| **l2_normalize(t, dim)** | `join(t, map(reduce(map(t, f(x)(x * x)), sum, dim), f(x)(sqrt(x))), f(x,y)(x / y))` <br/> L2 normalization: `t / sqrt(sum(t^2, dim)`. | +| **matmul(t1, t2, dim)** | `reduce(join(t1, t2, f(x,y)(x * y)), sum, dim)` <br/> Matrix multiplication of two tensors. This is the product of the two tensors summed along a shared dimension. | +| **max(t, dim)** | `reduce(t, max, dim)` <br/> Reduce the tensor with the `max` aggregator along dimension `dim`. | +| **median(t, dim)** | `reduce(t, median, dim)` <br/> Reduce the tensor with the `median` aggregator along dimension `dim`. If the dimension argument is omitted, this reduces over all dimensions. | +| **min(t, dim)** | `reduce(t, min, dim)` <br/> Reduce the tensor with the `min` aggregator along dimension `dim`. | +| **prod(t, dim)** | `reduce(t, prod, dim)` <br/> Reduce the tensor with the `product` aggregator along dimension `dim`. If the dimension argument is omitted, this reduces over all dimensions. | +| **random(n1, n2, ...)** | `tensor(i1[n1],i2[n2],...)(random(1.0))` <br/> Returns a tensor with random values between 0.0 and 1.0, uniform distribution. | +| **range(n)** | `tensor(i[n])(i)` <br/> Returns a tensor with increasing values. | +| **relu(t)** | `map(t, f(x)(max(0,x)))` <br/> Rectified linear unit. | +| **sigmoid(t)** | `map(t, f(x)(1.0 / (1.0 + exp(0.0-x))))` <br/> Returns the sigmoid of each element. | +| **softmax(t, dim)** | `join(map(t, f(x)(exp(x))), reduce(map(t, f(x)(exp(x))), sum, dim), f(x,y)(x / y))` <br/> The softmax of the tensor, e.g. `e^x / sum(e^x)`. | +| **sum(t, dim)** | `reduce(t, sum, dim)` <br/> Reduce the tensor with the `summation` aggregator along dimension `dim`. If the dimension argument is omitted, this reduces over all dimensions. | +| **top(n, t)** | `t * filter_subspaces(cell_order(t, max) < n, f(s)(s))` <br/> top N function: Picks top N cells in a simple mapped tensor. | +| **unpack_bits(t)** | unpacks bits from int8 input to 8 times as many floats The innermost indexed dimension will expand to have 8 times as many cells, each with a float value of either 0.0 or 1.0 determined by one bit in the 8-bit input value. Comparable to `numpy.unpackbits` which gives the same basic functionality. A minimal input such as `tensor<int8>(x[1]):[9]` would give output `tensor<float>(x[8]):[0,0,0,0,1,0,0,1]` (default bit-order is big-endian). As a very complex example, an input with type `tensor<int8>(foo{},x[3],y[11],z{})` will produce output with type `tensor<float>(foo{},x[3],y[88],z{})` where "foo", "x" and "z" are unchanged, as "y" is the innermost indexed dimension. | +| **unpack_bits(t, cell_type)** | unpacks bits from int8 input to 8 times as many values <br/> Same as above, but with optionally different cell\_type (could be `double` for example, if you will combine the output with other tensors using double). | +| unpack\_bits(t, cell\_type, endian) | unpacks bits from int8 input to 8 times as many values <br/> Same as above, but also optionally different endian for the bits; must be either `big` (default) or `little`. | +| xw\_plus\_b(x, w, b, dim) | `join(reduce(join(x, w, f(x,y)(x * y)), sum, dim), b, f(x,y)(x+y))` <br/> Matrix multiplication of `x` (usually a vector) and `w` (weights), with `b` added (bias). A typical operation for activations in a neural network layer, e.g. `sigmoid(xw_plus_b(x,w,b)))`. | \ No newline at end of file diff --git a/mintlify-docs/en/reference/ranking/string-segment-match.mdx b/mintlify-docs/en/reference/ranking/string-segment-match.mdx new file mode 100644 index 0000000000..d13981554b --- /dev/null +++ b/mintlify-docs/en/reference/ranking/string-segment-match.mdx @@ -0,0 +1,163 @@ +--- +title: "String Segment Match" +--- + +The **string segment match** algorithm computes a set of metrics - the **string segment match metrics** - intended to capture all the information about how well a _query_ string matches a _field_ string, which is useful for document ranking in search, from the limited information usually available during matching in search engines. + +The algorithm works by locating _segments_, which are local regions in the field which contain one or more adjacent terms of the query. All segment start points in the field are explored, and the ones which produce the best overall segmentation are chosen. Informally, a segmentation is good if it contains few segments with the query matches close together. Example: + +<Frame> +![String segment match example](/assets/img/relevance/segment-example.png) +</Frame> + +Here two segments are found to cover the query. An alternative second segment is also found, but is discarded because it has inferior query term proximity. The other lone "Bush" instance is never considered because there is no segmentation causing "Bush" to be a segment start token (i.e. there is no lone "George"). + +A subset of the metrics are computed from the tokens _within_ located segments, while another subset of metrics characterizes the number and placement of the segments themselves. This allows the metrics to reflect the property of natural language that tokens which are close are often part of the same meaning (typically as parts of the same sentence) while somewhat more distant tokens are only weakly related. + +Queries typically consists of multiple intended segments, where each segment is continuous and in order, while the ordering between the segments is of little significance (although the more important segments tend to come first). + +By matching the query in term order to the best and fewest segments of the field, this algorithm makes use of the available field data to discover the likely query segmentation from the evidence. Explicit segmentation information can also be used in the form of connectivity scores which influences the segment scores and thus the chosen segments. + +## Source information + +The information used by this algorithm to calculate these metrics is (the last three are optional): + +- The position of each occurrence of a _matched_ term in the query and field +- The number of terms in the query and field +- A number per query term indicating the weight (importance) of each query term +- A number per query term indicating the relative frequency of the term +- A number per adjacent query term pair indicating the linguistic connectivity between the terms + +## Algorithm + +The algorithm locates segments from a given start query term as follows: + +```python expandable +i = the position of the first query term +j = the position of the first match of the query term at i +while (i < query.length) { + nextJ = the first location of query term i+1 at mostproximityLimitsteps to the right of j + if (nextJ not found) + nextJ = the first location query term i+1 at mostproximityLimitsteps to the left backwards of j + + if (nextJ is found) { // Find next token in this segment + i = i+1 + j = nextJ + } + else { + nextJ = the first location of query term i+1 at any location to the right forwards from j + if (nextJ not found) + nextJ = the first location of query term i+1 at any location to the left backwards from j + if (nextJ not found) { // Skip a non-existing query term + i = i+1 + } + else { // End of segment + return i,j as the segment end + } + } +} +``` + +So a segment is a set of terms in the field which corresponds to an adjacent subsection of query terms where the gap between any adjacent query term in the field is at most `proximityLimit` forwards or backwards, and where query terms not present in the field are ignored. + +Let's call the field term search order used above the _semantic distance_ between two field position. So, for example + +- the semantic distance between j and nextJ is n if nextJ is located n places after j and n\<`proximityLimit` +- the semantic distance between j and nextJ is `proximityLimit`+n if nextJ is located n places to the _left_ of j and n\<`proximityLimit` + +The algorithm explores tokens and segments in the semantic distance space. The algorithm works with any definition of semantic distance. The algorithm will record for each segment start point: + +- metrics - The current best known metrics of the combined segments up to this point +- previousJ - The end j of the previous segment in the field (if any) +- i - the query term i which is the start of this segment +- semanticDistanceExplored - the distance from previousJ explored so far +- open - whether there are possibly more j's to find beyond semanticDistanceExplored + +With this, we can list the high level pseudocode of the algorithm: + +```js expandable +currentSegment=a segment start point at starting at i=0 (the start of the query) +while (there are open segment start points) { + newSegment=find the next segment, at currentSegment.i with semanticDistance > currentsegment.semanticDistance + if (no newSegment) { + currentSegment.open = false + continue; + } + SegmentStartPoint existingStartPoint=find stored segment at start point newSegment.i+1 + if (no existingStartPoint) { + create and store a new (empty open) segment start point at newSegment.i+1 + } + else { + if (newSegment.score > existingStartPoint.score) { + existingStartPoint.metrics.score = newSegment.metrics.score + existingStartPoint.previousJ = newSegment.endJ + existingStartPoint.semanticDistanceExplored = newSegment.semanticDistance+1 + } + } + currentSegment=the next open start point (in the order they are found) +} +finalMetrics=metrics of segmentStartPoint at query.length +``` + +The `metric.score` deciding which of two segmentation paths is best is `absoluteProximity/segments^2`. Any combination of metrics which can be calculated for a partial segmentation may be used. + +Browse the [code](https://github.com/vespa-engine/vespa/tree/master/searchlib/src/main/java/com/yahoo/searchlib/ranking/features) for details. + +## Complexity + +The algorithm uses a linear programming technique to avoid recomputing earlier segments. A constant amount of data is stored per possible segment start point. Since there are at most as many start points as there are query terms, the memory complexity is `O(query.length)`. As the algorithm will try all possible segment starting points (up to a limit), and there are at most one starting point per query term, the time complexity is `O(query.length*total number of term occurrences)`. The average time complexity is `O(average segment length/average number of term occurrences)`. + +## Metric set + +The complete string segment match metrics set, computed by this algorithm, is: + +- match +- proximity +- completeness +- queryCompleteness +- fieldCompleteness +- orderness +- relatedness +- earliness +- longestSequenceRatio +- segmentProximity +- unweightedProximity +- absoluteProximity +- occurrence +- absoluteOccurrence +- weightedOccurrence +- weightedAbsoluteOccurrence +- significantOccurrence +- weight +- significance +- importance +- segments +- matches +- outOfOrder +- gaps +- gapLength +- longestSequence +- head +- tail +- segmentDistance + +These are documented as the features prefixed by `fieldMatch(name)`, see the [rank features reference](/en/reference/ranking/rank-feature-configuration). + +The metric set contains both low level, un-normalized metrics corresponding directly to a concept in the string segment match algorithm (e.g `segments`, `gaps`), normalized basic features (e.g. `proximity`, `queryCompleteness`), normalized metrics combining lower level metrics into some useful part of the truth (e.g. `completeness`, `orderness`) as well as a metric combining most of the others into one normalized value (`match`). Applications will choose the subset of the metrics which captures the properties they determine is important, at the granularity which is convenient. + +## Configuration parameters + +The algorithm has the following configuration parameters, where the three first are fundamental parameters of the algorithm, and the others are used to normalize or combine certain features. Configure using [rank feature configuration](/en/reference/ranking/rank-feature-configuration): + +| Parameter | Default | Description | +| --- | --- | --- | +| `proximityLimit` | 10 | The maximum allowed gap within a segment. | +| `proximityTable` | 1/(2^(i/2)/3) for i in 9..0 followed by 1/2^(i/2) for i in 0..10 | The proximity table deciding the importance of separations of various distances, The table must have size proximityLimit\*2+1, where the first half is for reverse direction distances. The table must only contain values between 0 and 1, where 1 is "perfect" and 0 is "worst". | +| `maxAlternativeSegmentations` | 10000 | The maximum number of _alternative_ segmentations allowed in addition to the first one found. This will prefer to not consider iterations on segments that are far out in the field, and which starts late in the query. | +| `maxOccurrences` | 100 | The number of occurrences the number of occurrences of each word is normalized against. This should be set as the number above which additional occurrences of the term has no real significance. | +| `proximityCompletenessImportance` | 0.9 | A number between 0 and 1 which determines the importance of field completeness in relation to query completeness in the `match` and `completeness` metrics. | +| `relatednessImportance` | 0.9 | The normalized importance of relatedness used in the `match` metric. | +| `earlinessImportance` | 0.05 | The importance of the match occurring early in the query, relative to segmentProximityImportance, occurrenceImportance and proximityCompletenessImportance in the `match` metric. | +| `segmentProximityImportance` | 0.05 | The importance of multiple segments being close to each other, relative to earlinessImportance, occurrenceImportance and proximityCompletenessImportance in the `match` metric. | +| `occurrenceImportance` | 0.05 | The importance of having many occurrences of the query terms, relative to earlinessImportance, segmentProximityImportance and proximityCompletenessImportance in the `match` metric. | +| `fieldCompletenessImportance` | 0.05 | A number between 0 and 1 which determines the importance of field completeness in relation to query completeness in the `match` and `completeness` metrics. | \ No newline at end of file diff --git a/mintlify-docs/en/reference/ranking/tensor.mdx b/mintlify-docs/en/reference/ranking/tensor.mdx new file mode 100644 index 0000000000..d7c499d07c --- /dev/null +++ b/mintlify-docs/en/reference/ranking/tensor.mdx @@ -0,0 +1,260 @@ +--- +title: "Tensor Reference" +--- + + +A tensor is a set of named _dimensions_ defining its _order_ and a set of values located in the space of those dimensions: + +- _Cell_: A value located in the dimension space. Consists of a cell address and the value at that address. +- _Address_: A set of key-values where each key is a _dimension_ from the set of dimensions of the tensor, and each value is a _label_ (integer or string) determining the cell's location in that dimension. + +The set of dimensions, cell values and cell address key-values can be of any size including zero. A dimension can be either mapped or indexed. Mapped dimensions use string identifiers as labels in the cell addresses (like a map), while indexed dimensions use integers in the range `[0,N>` (like an array), where N is the size of the dimension. + +## Tensor type spec + +Contained in [constant](/en/reference/schemas/schemas#constant) or [tensor field type](/en/reference/schemas/schemas#tensor). The dimensions of a tensor and the cell type defines its type. A tensor type contains a list of dimensions on the format: + +```bash +tensor<value-type>(dimension-1,dimension-2,...,dimension-N) +``` + +The value-type is one of: + +| Type | Description | +| :--- | :--- | +| float | 32-bit IEEE 754 floating point | +| double | 64-bit IEEE 754 floating point | +| int8 | signed 8-bit integer - see [performance considerations](/en/performance/feature-tuning#cell-value-types) | +| bfloat16 | first 16 bits of 32-bit IEEE 754 floating point - see [performance considerations](/en/performance/feature-tuning#cell-value-types) | + +A dimension is specified as follows: + +- `dimension-name{}` - a mapped dimension +- `dimension-name[size]` - an indexed dimension + +The tensor type for a tensor\<float\> with two mapped dimensions _x_ and _y_ looks like: + +```bash +tensor<float>(x{},y{}) +``` + +Example tensor with this type: + +```bash +{{x:a,y:b}:10.0, {x:c,y:d}:20.1} +``` + +The tensor type for a tensor\<float\> with two indexed dimensions _x_ and _y_ with sizes 3 and 2 respectively looks like: + +```bash +tensor<float>(x[3],y[2]) +``` + +Example tensor with this type (representing a matrix): + +```bash +{{x:0,y:0}:1, {x:0,y:1}:2.1, + {x:1,y:0}:3, {x:1,y:1}:5, + {x:2,y:0}:7, {x:2,y:1}:11} +``` + +Note that the labels are indexes in the range _[0,dimension-size\>_ + +A tensor\<double\> with both mapped and indexed dimensions is _mixed_: + +```bash +tensor<double>(key{},x[2]) +``` + +Example: + +```bash +{{key:a,x:0}:10, {key:b,x:0}:2.7, + {key:a,x:1}:5.3, {key:b,x:1}:-7 } +``` + +## Tensor literal form + +The tensor literal form is used in: + +- Tensors in queries, see [defining query feature types](/en/ranking/ranking-expressions-features#query-feature-types) and [tensor user guide](/en/ranking/tensor-user-guide#querying-with-tensors) +- Constant tensors in [stateless model evaluation](/en/reference/ranking/model-files) +- Building tensors using the [Java Tensor API](https://javadoc.io/doc/com.yahoo.vespa/vespajlib/latest/com/yahoo/tensor/Tensor.html) + +The tensor literal form is _not_ a JSON format. When sent inside a JSON format (like when you POST a query), it should be passed as a string. + +### General literal form + +The general literal form is verbose and explicit, can represent any tensor and is as follows (EBNF): + +```bash +literal tensor = ( tensor-type-spec ":" )? "{" cells "}" ; +cells = | cell , { "," cell } ; +cell = "{" address "}:" scalar ; +address = | element, { "," element } ; +element = dimension ":" label ; +dimension = integer | identifier ; +label = integer | identifier | 'string' | "string" ; +identifier = ["A"-"Z","a"-"z","0"-"9","_","@"](["A"-"Z","a"-"z","0"-"9","_","@","$"])* +``` + +For query inputs, the type should be declared as an input in the ranking profile, so the type spec is usually skipped. + +#### General literal form examples: + +An empty tensor: + +```bash +{} +``` + +A single value tensor with a single mapped dimension _x_: + +```bash +{ {x:foo}:5.0 } +``` + +A tensor with multiple values and mapped dimensions _x_ and _y_: + +```bash +{ {x:foo, y:bar}:5.0, {x:foo, y:baz}:7.0 } +``` + +A tensor where type is specified explicitly with a single indexed dimension _x_ representing a vector: + +```bash +tensor<float>(x[3]):{ {x:0}:3.0, {x:1}:5.0, {x:2}:7.0 } +``` + +A tensor with a type using the default value type (double) and quoted labels: + +```bash +tensor(key{}):{ {key:'key.1'}:3.0, {key:'key 2'}:5.0, {key:"key's"}:7.0 } +``` + +### Indexed short form + +Tensors where all dimensions are indexed can be written as numbers wrapped in square brackets in _right dimension adjacent_ order. If the type isn't declared already, this form requires an explicit tensor type. Note: Dimensions should be alphabetically ordered. + +Brackets must be nested according to the structure of the type, where values in dimensions to the right are closer than dimensions on the left. For backwards compatibility (not supported in expressions), cell values may also be given in the same order as a flat array. + +#### Indexed short form examples: + +A float 1d tensor in indexed form: + +```bash +tensor<float>(x[3]):[3.0, 5.0, 7.0] +``` + +A matrix in indexed form. Since the values for the right-most dimension (y) are adjacent, the value 3 is here assigned to the cell \{x:0,y:2}: + +```bash +tensor<float>(x[2],y[3]):[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] +``` + +Deprecated: Since inner brackets can be omitted, the above is equivalent to + +```bash +tensor<float>(x[2],y[3]):[1.0, 2.0, 3.0, 4.0, 5.0, 6.0] +``` + +### Binary hex format + +Tensors representing binary data (indexed tensors with `int8` cell value type) may be represented using a hexadecimal representation, similar to the corresponding [JSON tensor feed format](/en/reference/schemas/document-json-format#tensor-hex-dump). If the tensor type is declared as `tensor<int8>(x[2],y[3])` input could be just + +```bash +0B22038405FF +``` + +which would be equivalent to: + +```bash +[[11, 34, 3], [-124, 5, -1]] +``` + +### Mapped short form + +Tensors with a single mapped dimension can be written by specifying just the label in that implicit dimension instead of a full address map. This form requires a type to be declared or explicitly specified. + +#### Map short form example: + +```bash +tensor<float>(key{}):{ key1:1.0, key2:2.0 } +``` + +### Mixed short form + +Tensors with a single mapped dimension and one or more indexed dimensions can be written by specifying the mapped dimension in the map short form and the values of each dense subspace on the indexed short form. This form requires a type to be known (declared) or specified. + +#### Mixed short form example: + +A map of matrices: + +```bash +tensor<float>(key{},x[2],y[3]):{ key1:[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], + key2:[[1.1, 2.1, 3.1], [4.1, 5.1, 6.1]] } +``` + +This may even be combined with a hexadecimal format for the dense subspace: + +```bash +tensor<int8>(key{},x[5]):{ key1: 0102030405, key2: fffefdfcfb } +``` + +Tensors with a multiple mapped dimensions may use an extended variant of the mixed short form, where labels are nested. Again note that the type should be declared with dimensions in alphabetic order, so the nesting will follow a consistent ordering. + +```bash +tensor(category{},key{},x[2],y[3]):{ + cat1:{key1:[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], + key2:[[1.1, 2.1, 3.1], [4.1, 5.1, 6.1]]}, + cat2:{key1:[[7.3, 8.3, 9.3], [7.0, 8.0, 9.0]], + key3:[[7.5, 8.5, 9.5], [7.9, 8.9, 9.9]]} +} +``` +the equivalent fully-specified literal form would be +```bash +tensor(category{},key{},x[2],y[3]):{{category:cat1, key:key1, x:0, y:0}: 1.0, + {category:cat1, key:key1, x:0, y:1}: 2.0, + {category:cat1, key:key1, x:0, y:2}: 3.0, + {category:cat1, key:key1, x:1, y:0}: 4.0, + {category:cat1, key:key1, x:1, y:1}: 5.0, + {category:cat1, key:key1, x:1, y:2}: 6.0, + {category:cat1, key:key2, x:0, y:0}: 1.1, + {category:cat1, key:key2, x:0, y:1}: 2.1, + {category:cat1, key:key2, x:0, y:2}: 3.1, + {category:cat1, key:key2, x:1, y:0}: 4.1, + {category:cat1, key:key2, x:1, y:1}: 5.1, + {category:cat1, key:key2, x:1, y:2}: 6.1, + {category:cat2, key:key1, x:0, y:0}: 7.3, + {category:cat2, key:key1, x:0, y:1}: 8.3, + {category:cat2, key:key1, x:0, y:2}: 9.3, + {category:cat2, key:key1, x:1, y:0}: 7.0, + {category:cat2, key:key1, x:1, y:1}: 8.0, + {category:cat2, key:key1, x:1, y:2}: 9.0, + {category:cat2, key:key3, x:0, y:0}: 7.5, + {category:cat2, key:key3, x:0, y:1}: 8.5, + {category:cat2, key:key3, x:0, y:2}: 9.5, + {category:cat2, key:key3, x:1, y:0}: 7.9, + {category:cat2, key:key3, x:1, y:1}: 8.9, + {category:cat2, key:key3, x:1, y:2}: 9.9} +``` + +## Tensor functions + +Tensor functions are listed in the [expressions](/en/reference/ranking/ranking-expressions#tensor-functions) documentation. + +## Tensor rank features + +The following rank features can be used to refer to or create tensors in ranking expressions. The tensors can come from the document, the query or a constant in the application package: + +- [attribute(tensor\_attribute)](/en/reference/ranking/rank-features#attribute(name)) +- [query(tensor\_feature)](/en/reference/ranking/rank-features#query(value)) +- [constant(tensor\_constant)](/en/reference/ranking/rank-features#constant(name)) +- [tensorFromWeightedSet(source, dimension)](/en/reference/ranking/rank-features#tensorFromWeightedSet(source,dimension)) +- [tensorFromLabels(source, dimension)](/en/reference/ranking/rank-features#tensorFromLabels(source,dimension)) +- [tensorFromStructs(attribute,key,value,type)](/en/reference/ranking/rank-features#tensorFromStructs(attribute,key,value,type)) + +Use the following reference documentation on how use tensors in documents: + +- [Tensor field in schema](/en/reference/schemas/schemas#tensor) +- [Document JSON Format](/en/reference/schemas/document-json-format) \ No newline at end of file diff --git a/mintlify-docs/en/reference/release-notes/vespa7.mdx b/mintlify-docs/en/reference/release-notes/vespa7.mdx new file mode 100644 index 0000000000..ca0d679589 --- /dev/null +++ b/mintlify-docs/en/reference/release-notes/vespa7.mdx @@ -0,0 +1,231 @@ +--- +title: "Vespa 7 Release Notes" +sidebarTitle: "Vespa 7" +--- + +These notes documents the changes between Vespa major versions 6 and 7. As documented in [Vespa versions](/en/learn/releases#versions), major versions are used to mark breaking compatibility, not to release significant new functionality (which instead happens on minor versions). However, even on major versions, compatibility is broken only in two specific ways: **Changes to defaults** means that applications may need to set some option explicitly to preserve earlier behavior, and **removal of deprecated functionality** means that applications *which use functionality that has earlier been deprecated* need to change to keep working. + +Most deprecated functionality causes warning during compilation (Java API deprecations) or deployment (application package deprecations), however with web service APIs there is no way to emit deprecation warnings, and we have to rely on marking these as deprecated in the documentation. + +Given this, application owners need to do 3 tasks to be compatible with Vespa 7: + +- Review whether [changes to defaults](#changed-defaults) requires additional settings in the application (**note that this is likely** on changing from 6 to 7 due to the changes to tokenization and stemming). +- Make sure there are no deprecation warnings on compilation and deployment on Vespa 6. +- Review the list of removed web service APIs and API parameters and make sure these are not used by clients of the application. + +As Vespa 7 does not introduce new functionality, it is as safe and mature as the versions of Vespa 6 preceding it. Upon release of Vespa 7, no further releases will be made of Vespa 6 for any reason. + +## Changes + +The following sections lists the changes on moving from Vespa 6 to Vespa 7 which must be reviewed by applications. + +### Changed defaults + +The following defaults have changed: + +| Change | Configuration required to avoid change on Vespa 7 | +| --- | --- | +| `stemming: shortest` changed to `stemming: best` | Add [stemming: shortest](/en/reference/schemas/schemas#stemming) to the `schema` block of all schemas. | +| Default linguistics component changed from SimpleLinguistics to OpenNlpLinguistics, including language detection using Optimaize turned on by default. | Configure `com.yahoo.language.simple.SimpleLinguistics` as a component in services.xml as described in [linguistics in Vespa](/en/linguistics/linguistics) | +| The default format accepted by the Java HTTP client is changed from XML to [JSON](/en/reference/schemas/document-json-format) | To keep using XML: <br/><br/>• **Java API**: When calling `FeedClientFactory.create(sessionParams, ...)`, pass a `SessionParams` instance which has a `FeedParams` instance which have `dataFormat` set to `FeedParams.DataFormat.XML_UTF8` <br/>• **Command line**: Pass the `--xmloutput` option. | +| Query timeout changed from 5000 ms to 500 ms. | Set the [timeout](/en/reference/api/query#timeout) parameter explicitly in requests or query profiles. | +| [ranking.softtimeout.enable](/en/reference/api/query#ranking.softtimeout.enable) changed to default true | Set to `false` in requests or a query profile. | +| The default access log format is changed to [JSON](/en/operations/access-logging). | To keep the old proprietary format, set accesslog type=vespa in services.xml as described in [the accesslog reference](/en/reference/applications/services/container#accesslog). | +| Default return format in vespa-visit and vespa-get is changed to JSON | To get XML output specify the --xmloutput method | + +### JDK version + +Java components must be rebuilt with JDK 11 for the Vespa bundle-plugin to generate the correct set of imported packages for your OSGi bundles. + +### Removed Java APIs + +Classes and methods that were marked as deprecated in Vespa 6 are removed. If you get deprecation warnings for Vespa APIs when building your application, they must be fixed before migrating to Vespa 7. + +### Container Runtime Environment + +The following maven artifacts are no longer provided runtime: + +- commons-codec:commons-codec +- org.apache.httpcomponents:httpclient +- org.apache.httpcomponents:httpcore + +If you need any of these dependencies, they must be embedded in your bundle by adding them in scope 'compile' in pom.xml. + +### Removed HTTP APIs + +The following HTTP APIs are removed: + +| Name | Replacement | +| --- | --- | +| Legacy HTTP apis for document feeding:<br/>• /feed<br/>• /remove<br/>• /removelocation<br/>• /get<br/>• /visit<br/>• /document | The [/document/v1/](/en/reference/api/document-v1) web service API, or (for high throughput) the vespa-http-client. | + +### Removed HTTP API parameters + +The following HTTP API parameters are removed + +| Name | Replacement | +| --- | --- | +| The `defidx` parameter in the search API | Use a custom searcher if this functionality is needed. | + +### Removed command line tools + +The following command line tools are removed: + +| Name | Replacement | +| --- | --- | +| Vespa spooler | Custom client using the Java HTTP client | + +### Removed settings from schemas + +The following settings are removed from [schemas](/en/reference/schemas/schemas): + +| Name | Replacement | +| --- | --- | +| header | None. This setting doesn't have any effect | +| body | None. This setting doesn't have any effect | + +### Removed constructs from services.xml + +The following tags and attributes are removed from services.xml: + +| Name | Replacement | +| --- | --- | +| ‘rotationScheme’ attribute in `<container><accesslog>` | None, rotation scheme ‘date’ will always be used | +| `<container><filter>` tag | `<container><http><filtering>` | + +### Removed metrics + +The following metrics are removed: + +| Name | Replacement | +| --- | --- | +| free/used/totalMemoryBytes | mem.heap.free/used/total | +| http.in.bytes | serverBytesReceived | +| http.out.bytes | serverBytesSent | +| http.requests | serverNumRequests | +| http.latency | serverTotalSuccessfulResponseLatency | +| http.out.firstbytetime | serverTimeToFirstByte | +| proc.uptime | serverStartedMillis | +| proton.\* | content.proton.\* (note that metrics might have different structure and names in new namespace) | +| vds.filestor.spi.\* | vds.filestor.alldisks.allthreads.\* | + +### Empty fields + +Fields containing no value will not be included in responses on Vespa 7. + +### Allowed characters in request URIs + +Vespa 6 allowed some special characters in raw form in the query component of request URIs. Vespa 7 requires these characters to be properly percent-encoded (RFC 2396). + +| Character | Percent-encoding | +| --- | --- | +| `\` | `%5C` | +| `^` | `%5E` | +| `\` | `%60` | +| `{` | `%7B` | +| `\|` | `%7C` | +| `}` | `%7D` | + +### Changes to the default JSON result format + +The content of fields of type **position** in the default JSON query result format was rendered as XML on Vespa 6 but is rendered as JSON. + +Specifically, the content of a position field was rendered as a string like + +` <position x="-121996000" y="37401000" latlong="N37.401000;W121.996000"/> ` + +but is now instead rendered as a JSON map: + +```json +{ + "y": 37401000, + "x": -121996000, + "latlong": "N37.401000;W121.996000" +} +``` + +### Renamed metrics + +The following metrics are renamed: + +| Old Name | New Name | +| --- | --- | +| 95p\_query\_latency | query\_latency.95percentile | +| 99p\_query\_latency | query\_latency.99percentile | +| active\_queries | active\_queries.average | +| athenz-tenant-cert.expiry.seconds | athenz-tenant-cert.expiry.seconds.last | +| bytes | vds.datastored.alldisks.bytes.average | +| configserver.cacheChecksumElems | configserver.cacheChecksumElems.last | +| configserver.cacheConfigElems | configserver.cacheConfigElems.last | +| configserver.delayedResponses | configserver.delayedResponses.count | +| configserver.failedRequests | configserver.failedRequests.count | +| configserver.hosts | configserver.hosts.last | +| configserver.latency | configserver.latency.average | +| configserver.requests | configserver.requests.count | +| configserver.sessionChangeErrors | configserver.sessionChangeErrors.count | +| configserver.zkAvgLatency | configserver.zkAvgLatency.last | +| configserver.zkConnections | configserver.zkConnections.last | +| configserver.zkMaxLatency | configserver.zkMaxLatency.last | +| configserver.zkOutstandingRequests | configserver.zkOutstandingRequests.last | +| configserver.zkZNodes | configserver.zkZNodes.last | +| content.cluster-controller.cluster-state-change.count | cluster-controller.cluster-state-change.count | +| content.proton.memoryusage.max | content.proton.documentdb.memory\_usage.allocated\_bytes.max | +| content.proton.transport.docsum.latency.average | content.proton.docsum.latency.average | +| degraded\_queries | degraded\_queries.rate | +| deletefailed | vds.idealstate.delete\_bucket.done\_failed.rate | +| deleteok | vds.idealstate.delete\_bucket.done\_ok.rate | +| deletepending | vds.idealstate.delete\_bucket.pending.average | +| diskqueuesize | vds.filestor.alldisks.queuesize.average | +| diskqueuewait | vds.filestor.alldisks.averagequeuewait.sum.average | +| diskusage | content.proton.documentdb.disk\_usage.last | +| docs | vds.datastored.alldisks.docs.average | +| document\_requests | content.proton.docsum.docs.rate | +| documents\_active | content.proton.documentdb.documents.active.last | +| documents\_inmemory | content.proton.documentdb.index.docs\_in\_memory.last | +| documents\_processed | documents\_processed.rate | +| documents\_ready | content.proton.documentdb.documents.ready.last | +| documents\_removed | content.proton.documentdb.documents.removed.last | +| documents\_total | content.proton.documentdb.documents.total.last | +| empty\_results | empty\_results.rate | +| error.backend\_communication\_error | error.backend\_communication\_error.rate | +| error.backends\_oos | error.backends\_oos.rate | +| error.empty\_document\_summaries | error.empty\_document\_summaries.rate | +| error.internal\_server\_error | error.internal\_server\_error.rate | +| error.invalid\_query\_parameter | error.invalid\_query\_parameter.rate | +| error.invalid\_query\_transformation | error.invalid\_query\_transformation.rate | +| error.misconfigured\_server | error.misconfigured\_server.rate | +| error.plugin\_failure | error.plugin\_failure.rate | +| error.result\_with\_errors | error.result\_with\_errors.rate | +| error.timeout | error.timeout.rate | +| error.unhandled\_exception | error.unhandled\_exception.rate | +| error.unspecified | error.unspecified.rate | +| failed\_queries | failed\_queries.rate | +| handled.requests | handled.requests.count | +| hits\_per\_query | hits\_per\_query.average | +| joinfailed | vds.idealstate.join\_bucket.done\_failed.rate | +| joinok | vds.idealstate.join\_bucket.done\_ok.rate | +| joinpending | vds.idealstate.join\_bucket.pending.average | +| logd.processed.lines | logd.processed.lines.count | +| max\_query\_latency | query\_latency.max | +| mean\_query\_latency | query\_latency.average | +| mergefailed | vds.idealstate.merge\_bucket.done\_failed.rate | +| mergeok | vds.idealstate.merge\_bucket.done\_ok.rate | +| mergepending | vds.idealstate.merge\_bucket.pending.average | +| peak\_qps | peak\_qps.max | +| queries | queries.rate | +| query\_latency | content.proton.transport.query.latency.average | +| query\_requests | content.proton.transport.query.count.rate | +| search\_connections | search\_connections.average | +| sentinel.uptime | sentinel.uptime.last | +| slobrok.heartbeats.failed | slobrok.heartbeats.failed.count | +| splitfailed | vds.idealstate.split\_bucket.done\_failed.rate | +| splitok | vds.idealstate.split\_bucket.done\_ok.rate | +| splitpending | vds.idealstate.split\_bucket.pending.average | +| totalhits\_per\_query | totalhits\_per\_query.average | +| visit | vds.visitor.allthreads.created.sum.rate | +| visitorlifetime | vds.visitor.allthreads.averagevisitorlifetime.sum.average | +| visitorqueuewait | vds.visitor.allthreads.averagequeuewait.sum.average | + +### Other changes + +Vespa will not any longer implicitly load the "search" components" in containers which load the "document-api" components. If your application depends on "search" functionality in a container specifying the `<document-api>` tag in services.xml, make sure this container also specifies the `<search>` tag. \ No newline at end of file diff --git a/mintlify-docs/en/reference/release-notes/vespa8-geo-migration-guide.mdx b/mintlify-docs/en/reference/release-notes/vespa8-geo-migration-guide.mdx new file mode 100644 index 0000000000..d9a3ac0b2b --- /dev/null +++ b/mintlify-docs/en/reference/release-notes/vespa8-geo-migration-guide.mdx @@ -0,0 +1,117 @@ +--- +title: "Position fields - Vespa 8 migration" +--- + +Refer to [Vespa 8 release notes](/en/reference/release-notes/vespa8) - this is a guide on how to migrate from Vespa 7 to Vespa 8 when using position fields. The guide is relevant for applications having a `position` field in a schema. + +For the rest of this document, we assume a schema containing: + +```bash +field myfield type position {...} +``` + +## Step 1: Upgrade to Vespa 8 in geo legacy mode + +Add to *services.xml*, see [legacy-v7-json-rendering](/en/reference/querying/default-result-format#geo-position-rendering), add under the root `services` tag: + +```xml +<services> + <legacy> + <v7-geo-positions>true</v7-geo-positions> + </legacy> +``` + +## Step 2: Result rendering + +If the position field is only used as a filter, and not returned in result sets, skip this section. In Vespa 7, a position filed could be rendered as: + +```json +"myfield.position": { + "y": 63453700, + "x": 10460800, + "latlong": "N63.453700;E10.460800" +} +``` + +and optionally: + +```json +"myfield": { + "y": 63453700, + "x": 10460800 +} +``` + +With Vespa 8, the result format is changed to: + +```json +"myfield": { + "lat": 63.4537, + "lng": 10.4608 +} +``` + +Note that this is also the Vespa 8 feeding format. + +<Warning> +**Important:** + +Change all code that parses query results to expect the new format. This includes programs that parses the result JSON and [Searchers](/en/applications/searchers). +</Warning> + +On Vespa 7, the `distance` rank feature is output as: + +```bash +"myfield.distance": 14352, +``` + +On Vespa 8, use the summary feature instead: + +```bash +distance(myfield).km +``` + +## Step 3: Query API + +Change from using the `pos.ll` / `pos.radius` / `pos.bb` / `pos.attribute` parameters, e.g.: + +```bash +pos.ll=63.4225N+10.3637E&pos.radius=5km +``` + +to using [YQL](../../querying/query-language.html): + +```bash +where geoLocation(myfieldname, 63.5, 10.5, "5 km") +``` + +## Step 4: Feeding format and /document/v1/ API + +The Vespa 7 feeding format can be used on Vespa 8, it is however recommended changing to: + +```json +"myfield": { + "lat": 63.4537, + "lng": 10.4608 +} +``` + +This is the same format as in query results. + +The result format when using GET / VISIT in [document/v1/](/en/reference/api/document-v1) is changed from: + +```json +"myfield": { + "y": 63453700, + "x": 10460800 +} +``` + +to: + +```json +"myfield": { + "lat": 63.4537, + "lng": 10.4608 +} +``` \ No newline at end of file diff --git a/mintlify-docs/en/reference/release-notes/vespa8.mdx b/mintlify-docs/en/reference/release-notes/vespa8.mdx new file mode 100644 index 0000000000..94a576f2ec --- /dev/null +++ b/mintlify-docs/en/reference/release-notes/vespa8.mdx @@ -0,0 +1,423 @@ +--- +title: "Vespa 8 Release Notes" +sidebarTitle: "Vespa 8" +--- + +This document lists the changes between Vespa major versions 7 and 8. As documented in [Vespa versions](/en/learn/releases#versions), new functionality in Vespa is introduced in minor versions, while major versions are used to mark releases breaking compatibility. As Vespa 8 does not introduce any new functionality, it is as safe and mature as the versions of Vespa 7 preceding it. No further releases will be made of Vespa 7, except possible critical security fixes. + +## Overview + +The compatibility breaking changes in Vespa 8 fall into these categories: + +- [Changes to default behaviour](#changed-defaults) +- [Application package structure and settings](#application-package-changes) - deprecated settings and constructs in e.g. *schemas* and *services.xml* are removed. +- [Java APIs](#java-api-changes) - deprecated APIs are removed or revoked from Vespa's [public API](https://javadoc.io/doc/com.yahoo.vespa/annotations/latest/com/yahoo/api/annotations/PublicApi.html) surface. +- [Container runtime environment](#container-runtime) - incompatible changes to the Java build and runtime environments. +- [HTTP API changes](#removed-http-api-parameters) +- [Removed command line tools](#removed-command-line-tools) +- [Removed or renamed metrics](#removed-or-renamed-metrics) +- [Changes to the document selection language](#document-selection-exact-type-matching) +- [Security related changes](#security) +- [Operating system support](#operating-system) +- [Other changes](#other-changes), not covered by any of the above categories. + +To ensure their applications are compatible with Vespa 8, application owners must: + +- Review the list of [changes to defaults](#changed-defaults) and add the necessary options if you want to preserve behavior from Vespa 7. +- Make sure there are no deprecation warnings when compiling against Vespa 7. +- Review the [application package changes](#application-package-changes) and make sure there are no deployment warnings when deploying on Vespa 7. +- Review the list of [HTTP API changes](#removed-http-api-parameters) and update any clients of the application. +- Review the remaining sections of this document, and update the application and its environment accordingly. + +Usage of deprecated Java APIs produce warnings during compilation, while *deployment warnings* are produced for application package deprecations and most changes to the container runtime environment. In hosted Vespa or Vespa Cloud, deployment warnings are shown in the application's console view. However, for other types of changes, there is no way to emit deprecation warnings, so these are only described in this document and other Vespa documentation. + +The following sections lists all the changes from Vespa 7 to Vespa 8 in detail. + +## Changed defaults + +These changes may break clients, and impact both performance and user experience. Applications that are in production and relies on these defaults should make configuration changes to keep the existing behavior when upgrading to Vespa 8. This can be done on Vespa 7, *before* upgrading - using [bucket tests](/en/applications/testing#feature-switches-and-bucket-tests) can be useful. + +The following defaults have changed: + +| Change | Configuration required to avoid change on Vespa 8 | +| --- | --- | +| The default [simple query language](/en/reference/querying/simple-query-language) (for end users) is changed from `all` to [weakAnd](/en/ranking/wand#weakand).<br/> <Note>**Note:**<br/><br/> This might increase recall, and increase latency significantly if document corpus is large.</Note> | Explicitly pass [model.type](/en/reference/api/query#model.type)\=all in queries or set this parameter in the relevant [query profiles](/en/querying/query-profiles): `<field name="model.type">all</field>`. | +| The default grammar in [YQL userInput](/en/reference/querying/yql#userinput) is changed from `all` to [weakAnd](/en/ranking/wand#weakand). <br/> <Note>**Note:**<br/><br/> This might increase recall, and increase latency significantly if document corpus is large.</Note> | Prefix `userInput` in YQL by `{grammar: "all"}`. | +| The value of the services.xml [legacy flag v7-geo-positions](/en/reference/querying/default-result-format#geo-position-rendering) changes from true to false. See the [Vespa 8 geo migration guide](/en/reference/release-notes/vespa8-geo-migration-guide). | Add to services.xml: ` <legacy> <v7-geo-positions>true</v7-geo-positions> </legacy> ` | +| Fields of type `map` [changes JSON rendering](/en/reference/querying/default-result-format#inconsistent-map-rendering) in search results. | Add overrides in your query profile(s) for the `renderer.json.jsonMaps` parameter. | +| Fields of type `weightedset` [changes JSON rendering](/en/reference/querying/default-result-format#inconsistent-weightedset-renderingg) in search results. | Add overrides in your query profile(s) for the `renderer.json.jsonWsets` parameter. | +| Expressions used as summary features, are no longer rendered wrapped in `rankingExpression()`. | Specify configuration in your rank profile as shown in [this example](/en/reference/querying/default-result-format#summary-features-wrapped-in-rankingexpression). | +| Fields of type `raw` are now presented as a base64 encoded string in summary, the same way as in json feed format. Earlier, you needed to add `raw-as-base64-in-summary` in your schema file to get this behavior. | If you have fields of type "raw" and you must have the old summary behavior for them in search results, add the line `raw-as-base64-in-summary : false` in your schema definition. | +| The default tensor format in responses has changed from 'long' to 'short': Tensors in query results, document API responses, and stateless model evaluation are rendered in the short form appropriate for their type (if any), documented [here](/en/reference/schemas/document-json-format#tensor). | **Queries**: Pass [presentation.format.tensors](/en/reference/api/query#presentation.format.tensors)\=long in queries, or set it parameter in the relevant [query profiles](/en/querying/query-profiles). <br/><br/> **Document/v1**: Pass the parameter `format.tensors=long` in requests. <br/><br/> **Stateless model evaluation**: Pass the parameter `format.tensors=long` in requests. | +| The default fieldset when getting or visiting documents is now `[document]` in all cases, meaning you only get those fields that are declared in the "document" block of the schema (generated fields are not included). This was already the default for the `/document/v1` API when fetching or visiting documents of a single known document type. Now it is also the default when visiting at the root level, for the command line tools `vespa-visit` and `vespa-get`, and if you use the programmatic `documentapi` from java to fetch documents. | In most cases there is no difference between `[all]` and `[document]` fieldsets - so no action is needed. If the old behavior is needed you can:<br/><br/> • For the command line tools, specify the fieldset as `-l "[all]"` to include generated fields.<br/> • For `/document/v1` specify `[all]` as the value for the `fieldSet` parameter.<br/>• If using `documentapi` from java, add the line `params.setFieldSet("[all]");` to modify your `VisitorParameters` value , or `params = params.withFieldSet("[all]");` to modify your `DocumentOperationParameters` value. <br/><br/>If you run document processors to generate fields and want those returned, it may be more useful to declare a fieldset with just those fields you actually want as output instead. | +| Vespa will now limit the number of groups and hits in [grouping query results](/en/querying/grouping) when `max` is not specified explicitly in grouping expressions. The default value is determined by [grouping.defaultMaxGroups](/en/reference/api/query#grouping.defaultmaxgroups)/ [grouping.defaultMaxHits](/en/reference/api/query#grouping.defaultmaxhits). The parameter [grouping.globalMaxGroups](/en/reference/api/query#grouping.globalmaxgroups) must now be overridden in query profiles to allow grouping expressions that may return unbounded or large results. | • [grouping.defaultMaxGroups](/en/reference/api/query#grouping.defaultmaxgroups) changed from `-1` to `10`. <br/>• [grouping.defaultMaxHits](/en/reference/api/query#grouping.defaultmaxhits) changed from `-1` to `10`. <br/>• [grouping.globalMaxGroups](/en/reference/api/query#grouping.globalmaxgroups) changed from `-1` to `10000`. <br/>• [grouping.defaultPrecisionFactor](/en/reference/api/query#grouping.defaultprecisionfactor) changed from `1.0` to `2.0`. | +| Vespa [access logs](/en/operations/access-logging) are compressed with [zstd](https://github.com/facebook/zstd). | Add a config override under `<container>` in `services.xml`: <br/><br/> `<config name="container.core.access-log">`<br/> `<fileHandler>` <br/>`<compressionFormat>GZIP<compressionFormat>` | + +## Application package changes + +### Removed settings from schemas + +The following settings are removed from [schema](/en/reference/schemas/schemas): + +| Name | Replacement | +| --- | --- | +| attribute: huge | None. Setting *huge* on an attribute doesn't have any effect, the code is rewritten to support it by default. | +| [compression](/en/reference/schemas/schemas#compression) | None. Document compression is not needed, as compression is always enabled. | +| body (inside a field definition) | None. Deprecated since before Vespa 7, had no effect in Vespa 7. | +| header (inside a field definition) | None. Deprecated since before Vespa 7, had no effect in Vespa 7. | +| field type weightedset`<float>` | Because floating-point types are inherently imprecise they are badly suited as keys in maps and sets. If you feel the need for such data consider using something like:<br/><br/> `struct weightedfloat {`<br/> `field value type float {}`<br/> `field weight type int {}`<br/> `}` <br/> `field myfield type array<weightedfloat> {` <br/> `...` | +| field type map`<float,anything>` | Using "float" as the key in a map is no longer supported, see `weightedset<float>` above. | +| field type weightedset`<double>` | Using "double" as the key in a set is no longer supported, see `weightedset<float>` above. | +| field type map`<double,anything>` | Using "double" as the key in a map is no longer supported, see `weightedset<float>` above. | +| field type weightedset`<uri>` | Using complex types as the key in a set is no longer supported, see `weightedset<float>` above. | +| field type map`<uri,anything>` | Using complex types as the key in a map is no longer supported, see `weightedset<float>` above. | +| Old syntax for array types like "string\[\]" | Write as `array<string>` instead. | +| Rank functions must have different names in a rank-profile | Only the last of two functions with the same name would be used. Remove or rename the first one. | +| Conflicting sorting settings are now rejected | Only keep the last of the conflicting settings. | +| A summary-field may only be added once in a document-summary block | Remove duplicates. | +| Schema and document should have the same name | Change name of the schema, so it is equal to the contained document. | + +### TensorFlow import + +Vespa 8 removes support for direct import of [TensorFlow models](/en/ranking/tensorflow). [ONNX](https://onnx.ai/) is now the preferred ML model format, and works both for [ranking](/en/ranking/onnx) and [stateless model evaluation](/en/ranking/stateless-model-evaluation). ONNX contains tools to convert models from TensorFlow to ONNX, but Vespa will no longer provide this. + +### Changed semantics in services.xml + +The following elements and attributes in services.xml have new semantics: + +| Name | Description | +| --- | --- | +| `<nodes><redundancy>` | It is now an error to configure a number of nodes (per group) that is smaller than the configured redundancy. It used to generate an application-level warning, with the redundancy implicitly reduced. Remove any `<nodes>` override in the non-prod environments, as the node count is automatically adjusted. | + +### Removed constructs from services.xml + +The following elements and attributes are removed from services.xml: + +| Parent element | Removed construct | Description | +|---|---|---| +| `<admin>` | `<filedistribution>` | Configuring up/download rates is not supported | +| | `<configserver>` | Use [`configservers`](/en/reference/applications/services/admin#configservers) element instead | +| `<config>` | *namespace* attribute | The namespace must be included in the *name* attribute. | +| | `<myArray operation="append">` syntax | Previously used to append items to config arrays. Use [`item`](/en/reference/applications/config-files#configuring-arrays) instead. | +| `<container>` | *jetty* attribute | Removed, had no effect on Vespa 7. | +| | `<nodes>` jvm attributes | JVM attributes *jvmargs, allocated-memory, jvm-options, jvm-gc-options* renamed and moved to [`JVM`](/en/reference/applications/services/container#jvm) subelement | +| | `<client>` | Previously used for setting up client providers. Use a [`request handler`](/en/applications/request-handlers) instead. | +| | `<handler><clientBinding>` | Client bindings are no longer supported. | +| `<content>` | `<dispatch>` | Removed due to removal of *vespa-dispatch-bin*, [details.](#vespa-dispatch-bin-process-is-removed) | +| | `<tuning><dispatch><min-group-coverage>` | Use [`min-active-docs-coverage`](/en/reference/applications/services/content#min-active-docs-coverage) instead. | +| | `<tuning><dispatch><use-local-node>` | Ignored, the local node will automatically be preferred when appropriate. | +| | `<engine><proton><tuning><searchnode><flushstrategy><native><transactionlog><maxentries>` | Use [`maxsize`](/en/reference/applications/services/content#flushstrategy-native-transactionlog-maxsize) instead. Vespa 7 documentation: The maximum number of entries in the [`transaction log`](/en/content/proton#transaction-log) for a document type before running flush, default 1000000 (1 M). | +| | `<engine><proton><tuning><searchnode><summary><store><logstore><chunk><maxentries>` | Use [`maxsize`](/en/reference/applications/services/content#summary-store-logstore-chunk-maxsize) instead. Vespa 7 documentation: Maximum number of documents in a chunk. See *summary.log.chunk.maxentries*. | +| `<services>` (root) | `<jdisc>` | Use [`container`](/en/reference/applications/services/container) instead. | +| | `<service>` | Running generic services is no longer supported. | +| | `<clients>` | Client load types are deprecated and ignored. | + +### *application/* folder support removed + +Application package content (*services.xml*, the *schemas/* folder, etc.) is supposed to be put at the root level in the application zip, such that the unzipped application package has *./services.xml*, etc. + +But it used to be that the application package content could be placed inside an *application* directory. This support is removed on Vespa 8. + +### *searchdefinitions/* folder is deprecated + +Search definition schemas should now be placed in the *schemas/* folder. The old folder will still work on Vespa 8, but causes a deprecation warning upon deployment. + +## Java API changes + +### Removed Java packages + +| Package | Description | +| --- | --- | +| *com.yahoo.docproc.util* | Removed | +| *com.yahoo.jdisc.test* | No longer [public API](https://javadoc.io/doc/com.yahoo.vespa/annotations/latest/com/yahoo/api/annotations/PublicApi.html) | +| *com.yahoo.log.event* | No longer [public API](https://javadoc.io/doc/com.yahoo.vespa/annotations/latest/com/yahoo/api/annotations/PublicApi.html) | +| *com.yahoo.statistics* | Removed | +| *com.yahoo.vespa.curator* | No longer [public API](https://javadoc.io/doc/com.yahoo.vespa/annotations/latest/com/yahoo/api/annotations/PublicApi.html) | +| *com.yahoo.documentapi.messagebus.loadtypes* | Load types are no longer supported. Use corresponding method overloads without *LoadType* or *LoadTypeSet* parameters instead. | + +### Removed Java Classes and methods + +Classes and methods that were marked as deprecated in Vespa 7 are removed. If deprecation warnings are emitted for Vespa APIs when building the application, these must be fixed before migrating to Vespa 8. The sections below contain only the most notable changes. + +The following classes are no longer public API and have been moved to Vespa internal packages: + +| Package | Class | Migration advice | +|---|---|---| +| *com.yahoo.config.subscription* | All classes, except [`ConfigGetter`](https://javadoc.io/doc/com.yahoo.vespa/config/latest/com/yahoo/config/subscription/ConfigGetter.html) | Config should be [`injected`](/en/applications/configuring-components#use-config-in-code) to your component class constructor. | +| *com.yahoo.docproc* | *DocprocExecutor* | For unit tests, follow the steps in the [`document-processing`](https://github.com/vespa-engine/sample-apps/blob/master/examples/document-processing/src/test/java/ai/vespa/example/album/ProductTypeRefinerDocProcTest.java) sample app. If you need a *DocumentTypeManager* in production code, it can be directly [`injected`](/en/applications/dependency-injection) to your component class constructor. | +| | *DocprocService* | For unit tests, follow the steps in the [`document-processing`](https://github.com/vespa-engine/sample-apps/blob/master/examples/document-processing/src/test/java/ai/vespa/example/album/ProductTypeRefinerDocProcTest.java) sample app. If you need a *DocumentTypeManager* in production code, it can be directly [`injected`](/en/applications/dependency-injection) to your component class constructor. | +| | *DocumentOperationWrapper* | No replacement - if needed, contact the Vespa team for advice. | +| | *HandledProcessingException* | | +| | *ProcessingEndpoint* | | +| | *TransientFailureException* | | +| *com.yahoo.log* | *VespaFormatter* | No replacement. | + +The following methods are removed: + +| Method | Migration advice | +| --- | --- | +| *com.yahoo.documentapi.DocumentAccess.createDefault()* | Container components can have a *DocumentAccess* injected via their constructor. For use outside the container, e.g. in a custom command line tool, use the new method *createForNonContainer()*. | +| *com.yahoo.log.LogSetup.getLogHandler()* | No replacement. | + +### Breaking changes to Java APIs + +The Javadoc of the deprecated types/members should document the replacement API. The below list is not exhaustive - some smaller and trivial changes are not listed. + +| Type(s) | Description | +| --- | --- | +| *com.yahoo.processing* | Removed use of Guava's *ListenableFuture* in type signatures. Replacement uses *CompletableFuture*. | +| *com.yahoo.search.handler.HttpSearchResponse.waitableRender()* | Removed use of Guava's *ListenableFuture* in type signature. The method is replaced with *asyncRender()*. | +| *com.yahoo.jdisc.handler* | Removed use of Guava's *ListenableFuture* in type signatures. Replacement uses *CompletableFuture* | +| *com.yahoo.searchlib.rankingexpression.rule* | Removed use of Guava collection types in type signatures. | +| *com.yahoo.search.rendering.JsonRenderer* | Removed use of Jackson types from class signature. | +| *com.yahoo.jdisc.Container* | Removed use of Guice types from class signature. | +| *com.yahoo.vdslib.VisitorStatistics* | Removed all *set/getSecondPass*\-related methods. | +| *com.yahoo.documentapi* | Removed all methods taking in a *com.yahoo.documentapi.messagebus.DocumentProtocol.Priority* argument. Explicit operation priorities are deprecated and should not be set by the client. | + +### Removed support for built-in XML factories + +The Jdisc container has historically supported injection of built-in providers for the following XML factories: + +- *javax.xml.datatype.DatatypeFactory* +- *javax.xml.parsers.DocumentBuilderFactory* and *SAXParserFactory* +- *javax.xml.stream.XMLEventFactory*, *XMLInputFactory* and *XMLOutputFactory* +- *javax.xml.transform.TransformerFactory* +- *javax.xml.validation.SchemaFactory* +- *javax.xml.xpath.XPathFactory* + +These are now removed. Please check for more recent alternatives if you need this type of XML processing. + +### Deprecated Java APIs + +A few redundant APIs have been deprecated because they have replacements that provide the same, or better, functionality. We advise you switch to the replacement to reduce future maintenance cost. + +| Type(s) | Replacement | +| --- | --- | +| *com.yahoo.container.jdisc.LoggingRequestHandler* | Use *com.yahoo.container.jdisc.ThreadedHttpRequestHandler* instead. | +| *com.yahoo.log.LogLevel* | Use *java.util.logging.Level* instead. | + +## Container Runtime Environment + +### JDK version + +Vespa 8 upgrades the JDK version from 11 to 17. To ensure full compatibility, all container components should be rebuilt with JDK 17 before being deployed on Vespa 8. + +### Changes to provided maven artifacts + +[Guava](https://search.maven.org/artifact/com.google.guava/guava) has been upgraded from version 20.0 to 27.1. If you are using APIs that have been removed from the library since version 20, your code must be updated. In most cases, it should be trivial to find replacement APIs in Java's standard library. + +The following Maven artifacts are no longer provided runtime to user application plugins by the Jdisc container: + +| Artifact | Notes | +|---|---| +| [`*com.fasterxml.jackson.jaxrs:jackson-jaxrs-base*`](https://search.maven.org/artifact/com.fasterxml.jackson.jaxrs/jackson-jaxrs-base) | JSON input/output handling for JAX-RS implementations, e.g. Jersey | +| [`*com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider*`](https://search.maven.org/artifact/com.fasterxml.jackson.jaxrs/jackson-jaxrs-json-provider) | JSON input/output handling for JAX-RS implementations, e.g. Jersey | +| [`*com.fasterxml.jackson.module:jackson-module-jaxb-annotations*`](https://search.maven.org/artifact/com.fasterxml.jackson.module/jackson-module-jaxb-annotations) | Jackson data binding with JAXB annotations. | +| [`*com.google.code.findbugs:jsr305*`](https://search.maven.org/artifact/com.google.code.findbugs/jsr305) | Annotations in package *javax.annotation[.*]*, e.g. *Nullable* and *Nonnnull*. | +| [`*com.google.inject.extensions:guice-assistedinject*`](https://search.maven.org/artifact/com.google.inject.extensions/guice-assistedinject) | Guice extensions.<br />For component injection see [Depending on another component](/en/applications/dependency-injection#depending-on-another-component) | +| [`*com.google.inject.extensions:guice-multibindings*`](https://search.maven.org/artifact/com.google.inject.extensions/guice-multibindings) | Guice extensions. | +| [`*javax.annotation:javax.annotation-api*`](https://search.maven.org/artifact/javax.annotation/javax.annotation-api) | Annotations in package *javax.annotation[.*]*, e.g. *ManagedBean* and *Resource*. | +| [`*javax.validation:validation-api*`](https://search.maven.org/artifact/javax.validation/validation-api) | Javax bean validation, used by Jersey 2. | +| [`*org.eclipse.jetty:*`](https://search.maven.org/search?q=g:org.eclipse.jetty) | The Eclipse Jetty Project. | +| [`*org.apache.felix:org.apache.felix.framework*`](https://search.maven.org/artifact/org.apache.felix/org.apache.felix.framework) | Felix OSGi framework. | +| *org.apache.felix:org.apache.felix.log* | Felix OSGi framework. | +| [`*org.apache.felix:org.apache.felix.main*`](https://search.maven.org/artifact/org.apache.felix/org.apache.felix.main) | Felix OSGi framework. | +| [`*org.bouncycastle:bcpkix-jdk15on*`](https://search.maven.org/artifact/org.bouncycastle/bcpkix-jdk15on) | Bouncy Castle crypto API. | +| [`*org.bouncycastle:bcprov-jdk15on*`](https://search.maven.org/artifact/org.bouncycastle/bcprov-jdk15on) | Bouncy Castle crypto provider. | +| *org.glassfish.*:* | Jersey 2. All related artifacts are removed. | +| [`*org.json:json*`](https://search.maven.org/artifact/org.json/json) | See [vespa-engine/vespa#14762](https://github.com/vespa-engine/vespa/issues/14762) | +| [`*org.javassist:javassist*`](https://search.maven.org/artifact/org.javassist/javassist) | Bytecode manipulation, used by Jersey 2. | +| [`*org.jvnet.mimepull:mimepull*`](https://search.maven.org/artifact/org.jvnet.mimepull/mimepull) | MIME Streaming Extension, used by Jersey 2. | +| [`*org.lz4:lz4-java*`](https://search.maven.org/artifact/org.lz4/lz4-java) | Compression library. | + +Make sure your application OSGi bundle embeds the required artifacts from the above list. An artifact can be embedded by adding it in scope *compile* to the *dependencies* section in pom.xml. Typically, these artifacts have until now been used in scope *provided*. Use `mvn dependency:tree` to check whether any of the listed artifacts are directly or transitively included as dependencies. + +As always, remove any dependencies that are not required by your project. Consult the Maven documentation on [Dependency Exclusions](https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html#dependency-exclusions) for how to remove a transitively included dependency. + +An example adding *org.json:json* as a compile scoped dependency: + +```xml +<dependencies> + ... + <dependency> + <groupId>org.json</groupId> + <artifactId>json</artifactId> + <version>20211205</version> + <scope>compile</scope> + </dependency> + ... +</dependencies> +``` + +## Removed HTTP API parameters + +The following HTTP API parameters are removed from the [query API](/en/reference/api/query): + +| Standard API path | Parameter name | Replacement | +| --- | --- | --- | +| /search/ | *pos.ll* | add a [geoLocation](/en/reference/querying/yql#geolocation) item to the query | +| /search/ | *pos.radius* | add a [geoLocation](/en/reference/querying/yql#geolocation) item to the query | +| /search/ | *pos.attribute* | add a [geoLocation](/en/reference/querying/yql#geolocation) item to the query | +| /search/ | *pos.bb* | Support for restricting search by a bounding box, using the `pos.bb` query parameter, has been removed - add a [geoLocation](/en/reference/querying/yql#geolocation) item to the query | + +## Removed command line tools + +### vespa-http-client + +The `vespa-http-client` command line tool is removed on Vespa 8 and is replaced by the new [vespa-feed-client](/en/clients/vespa-feed-client). The new client uses [HTTP/2](/en/performance/http2) and the [Document v1 API](/en/writing/document-v1-api-guide). + +The underlying rest API used by the vespa-http-client will still be available and supported on Vespa 8. You can therefore continue to use an old client distributed with Vespa 7 to feed to a Vespa 8 installation. Note that there will not be released any updates for vespa-http-client after the initial Vespa 8 release, while fixes and security updates to the rest API implementation will continue as part of Vespa 8. We strongly recommend that you migrate away from vespa-http-client in a timely manner. + +## Removed or renamed metrics + +The following metrics are renamed: + +| Old Name | New name | Description | +| --- | --- | --- | +| *vds.filestor.alldisks.\** | vds.filestor.\* | *alldisks* has been removed from the metric name. | +| *vds.visitor.\*.sum.\** | vds.visitor.\*.\* | *sum* has been removed from the metric name. | +| *vds.filestor.\*.sum.\** | vds.filestor.\*.\* | *sum* has been removed from the metric name. | +| *vds.distributor.\*.sum.\** | vds.distributor.\*.\* | *sum* has been removed from the metric name. | + +The following metrics are removed: + +| Name | Description | +| --- | --- | +| *http.status.401.rate* | Use *http.status.4xx.rate* with dimension *statusCode*\==401 | +| *http.status.403.rate* | Use *http.status.4xx.rate* with dimension *statusCode*\==403 | +| *content.proton.documentdb.matching.query\_collateral\_time.\** | Use *content.proton.documentdb.matching.query\_setup\_time.\** instead | +| *content.proton.documentdb.matching.rank\_profile.query\_collateral\_time.\** | Use *content.proton.documentdb.matching.rank\_profile.query\_setup\_time.\** instead | +| *vds.visitor.allthreads.averagevisitorlifetime~~.sum~~.average* | Use .sum/.count instead | +| *vds.visitor.allthreads.averagequeuewait~~.sum~~.average* | Use .sum/.count instead | +| *vds.visitor.allthreads.queuesize~~.sum~~.average* | Use .sum/.count instead | +| *vds.visitor.allthreads.completed~~.sum~~.average* | Use .sum/.count instead | +| *vds.visitor.allthreads.averagemessagesendtime~~.sum~~.average* | Use .sum/.count instead | +| *vds.visitor.allthreads.averageprocessingtime~~.sum~~.average* | Use .sum/.count instead | +| *vds.filestor~~.alldisks~~.queuesize.average* | Use .sum/.count instead | +| *vds.filestor~~.alldisks~~.averagequeuewait.average* | Use .sum/.count instead | +| *vds.filestor~~.alldisks~~.allthreads.put~~.sum~~.latency.average* | Use .sum/.count instead | +| *vds.filestor~~.alldisks~~.allthreads.remove~~.sum~~.latency.average* | Use .sum/.count instead | +| *vds.filestor~~.alldisks~~.allthreads.get~~.sum~~.latency.average* | Use .sum/.count instead | +| *vds.filestor~~.alldisks~~.allthreads.update~~.sum~~.latency.average* | Use .sum/.count instead | +| *vds.filestor~~.alldisks~~.allthreads.createiterator~~.sum~~.latency.average* | Use .sum/.count instead | +| *vds.filestor~~.alldisks~~.allthreads.visit~~.sum~~.latency.average* | Use .sum/.count instead | +| *vds.filestor~~.alldisks~~.allthreads.remove\_location~~.sum~~.latency.average* | Use .sum/.count instead | +| *vds.filestor~~.alldisks~~.allthreads.deletebuckets~~.sum~~.latency.average* | Use .sum/.count instead | +| *vds.distributor.puts~~.sum~~.latency.average* | Use .sum/.count instead | +| *vds.distributor.removes~~.sum~~.latency.average* | Use .sum/.count instead | +| *vds.distributor.updates~~.sum~~.latency.average* | Use .sum/.count instead | +| *vds.distributor.gets~~.sum~~.latency.average* | Use .sum/.count instead | +| *vds.distributor.visitor~~.sum~~.latency.average* | Use .sum/.count instead | + +## Exact matching of document types in selection language + +The [document selection language](/en/reference/writing/document-selector-language) now uses *exact* matching for document types rather than *inheritance* ("is-a") matching. + +Example with two minimal document schemas: + +- `document my_doc_type {}` +- `document my_extended_doc_type inherits my_doc_type {}` + +Previously, the selection expression `my_doc_type` would match both a document instance of type `my_doc_type` *and* `my_extended_doc_type`. It will now *only* match a document of type `my_doc_type`. + +## Security + +### Strict mode enabled by default + +*Strict mode* for request filtering in the jdisc container is enabled by default in Vespa 8. See documentation on the [strict-mode](/en/reference/applications/services/http#filtering) attribute in services.xml for details. + +### Request headers controlling remote host/port in access log + + +The jdisc container will use the *X-Forwarded-For* and *X-Forwarded-Port* request headers to set the remote host and port respectively in the access log. The following request headers will no longer be handled by default: + +- y-ra +- yahooremoteip +- client-ip +- y-rp + +## Operating system support for Vespa artifacts + +### RPMs + +The supported OS for Vespa RPMs changes from [CentOS Linux 7](https://www.centos.org/centos-linux/) to [CentOS Stream 8](https://www.centos.org/centos-stream/) for Vespa 8. RPMs will still be built and distributed on [Fedora Copr](https://copr.fedorainfracloud.org/coprs/g/vespa/vespa/). If you install Vespa RPMs you will have to upgrade your OS to [CentOS Stream 8](https://www.centos.org/centos-stream/). + +### OCI containers (Docker containers) + +The base image used in our OCI containers changes from [docker.io/centos:7](https://hub.docker.com/_/centos) to [quay.io/centos/centos:stream8](https://quay.io/repository/centos/centos?tab=tags) for Vespa 8. This means that the container image is built and tested on systems running kernel version *4.18.0* (current kernel for CentOS Stream 8). If you use Vespa's container image, you should upgrade the hosts running the containers to the same or a newer kernel version. + +## Other changes + +### Unknown rank profiles + +Queries that specify a rank profile which does not exist in all the schemas being queried will now fail instead of falling back to using the `default` profile. Queries to multiple schemas must use a rank profile that exists in all of them, which can be ensured by [inheriting](/en/schemas/inheritance-in-schemas) a common schema. + +### Unknown summary classes + +Queries that specify a non-existent [summary class](/en/querying/document-summaries) will now fail, instead of being rendered empty. Queries to multiple schemas must use a summary class that exists in all of them, which can be ensured by [inheriting](/en/schemas/inheritance-in-schemas) a common schema. + +### The "qrserver" service name + +Vespa containers are in general using "container" as their service name. However, a container cluster that has declared neither [document-processing](/en/reference/applications/services/docproc) nor [document-api](/en/reference/applications/services/container#document-api) used to be named "qrserver". On Vespa 8 all container clusters uses the service name "container". This affects the output of all metrics APIs, as well as the Vespa log output. + +### Container access logs + +The folder for container access logs has been moved from `$VESPA_HOME/logs/vespa/qrs/` to `$VESPA_HOME/logs/vespa/access/`. + +The default compression format has changed from gzip to zstd, see [changed defaults](#changed-defaults). + +### ONNX output in summary features + +When defining an ONNX model output in summary features, Vespa 8 ensures that the summary feature name is `onnx` rather than `onnxModel` as in previous version. + +### Changes in rankfeatures + +Vespa can calculate and return all [rank-features](/en/reference/api/query#ranking.listfeatures) in the `rankfeatures` summary field. Vespa 8 contains some changes to this list: + +- `now` is removed +- `bm25(field)` is added +- `matches(field)` is added + +### The "storage" message bus routing policy is removed + +The "storage" routing policy was removed in early Vespa 7, and clients specifying it have been forwarded to the content routing policy for backwards compatibility. The forwarding is removed on Vespa 8 and clients needs be updated. + +Replace all usages of the "storage" policy with "content", which behaves identically. + +### vespa-dispatch-bin process is removed + +The dispatch functionality is moved into the Vespa Container and the *vespa-dispatch-bin* process is removed. As this is not a public interface, the default was switched to **not** using vespa-dispatch-bin in Vespa-7.109.10. The process was removed in subsequent Vespa releases: + +|||||| +| --- | --- | --- | --- | --- | +| [Dispatch](/en/querying/query-api) | Content cluster | dynamically allocated in 19100 - 19899 range | `$VESPA_HOME/sbin/vespa-dispatch-bin` | Dispatcher, communicates between container and content nodes. Can be multi-level in a hierarchy | + + +Rolling upgrade note: A rolling upgrade over Vespa-7.109.10 should work with no extra steps. + +### YQL format + +- When Vespa outputs an YQL statement, it will now not end the string by a semicolon. Terminating statements with semicolon continue to be optional and legal input. +- [Annotations](/en/reference/querying/yql#annotations) are not enclosed in `[]` brackets (still valid input). +- The annotation name is not quoted (still valid input). + +Example Vespa 7 / Vespa 8: + +```js +where text contains ([{"distance": 5}]near("a", "b")); +where text contains ({distance: 5}near("a", "b")) +``` + +### Upgrade procedure + +The [upgrade procedure](/en/operations/self-managed/live-upgrade) has been simplified in Vespa 8 (when upgrading from Vespa 7 to Vespa 8 or between Vespa 8 versions). When upgrading from Vespa 7.x to Vespa 7.y replace item 3 [Upgrade config servers](/en/operations/self-managed/live-upgrade#upgrade-config-server) in the [upgrade procedure](/en/operations/self-managed/live-upgrade) with this procedure: +- When upgrading the config servers, the nodes of the application cannot receive config until they are upgraded themselves. We need to set all of them in standalone mode before continuing by running this command on each node: + + ```bash + $ vespa-configproxy-cmd -m setmode memorycache + ``` + Each node will automatically reattach itself when it is upgraded. +- Install the new Vespa version on the config servers and [restart](/en/operations/self-managed/admin-procedures#vespa-start-stop-restart) them one by one. Wait until it is up again, look in vespa log for "Changing health status code from 'initializing' to 'up'" or use [health checks](/en/operations/self-managed/configuration-server#troubleshooting). +- Redeploy and activate the application: + + `$` [`vespa deploy`](/en/clients/vespa-cli#deployment) \ No newline at end of file diff --git a/mintlify-docs/en/reference/release-notes/vespa9.mdx b/mintlify-docs/en/reference/release-notes/vespa9.mdx new file mode 100644 index 0000000000..38b6e52def --- /dev/null +++ b/mintlify-docs/en/reference/release-notes/vespa9.mdx @@ -0,0 +1,170 @@ +--- +title: "Vespa 9 Release Notes" +sidebarTitle: "Vespa 9 (upcoming)" +--- + +This document lists the changes between Vespa major versions 8 and 9. As documented in [Vespa versions](https://vespa.ai/releases#versions), new functionality in Vespa is introduced in minor versions, while major versions are used to mark releases breaking compatibility. As Vespa 9 does not introduce any new functionality, it is as safe and mature as the versions of Vespa 8 preceding it. + +Note: This is work in progress, Vespa 9 is tentatively planned for release in Q1 2026. + +## Overview + +The compatibility breaking changes in Vespa 9 fall into these categories: + +- [Changes to default behaviour](#changed-defaults) +- [Application package structure and settings](#application-package-changes) - deprecated settings and constructs in e.g. *schemas* and *services.xml* are removed. +- [Java APIs](#java-api-changes) - deprecated APIs are removed or revoked from Vespa's [public API](https://javadoc.io/doc/com.yahoo.vespa/annotations/latest/com/yahoo/api/annotations/PublicApi.html) surface. +- [Container runtime environment](#container-runtime) - incompatible changes to the Java build and runtime environments. +- [HTTP API changes](#removed-http-api-parameters) +- [Removed command line tools](#removed-command-line-tools) +- [Removed or renamed metrics](#removed-or-renamed-metrics) +- [Security related changes](#security) +- [Operating system support](#operating-system) +- [Other changes](#other-changes), not covered by any of the above categories. + +To ensure their applications are compatible with Vespa 9, application owners must: + +- Review the list of [changes to defaults](#changed-defaults) and add the necessary options if you want to preserve behavior from Vespa 8. +- Make sure there are no deprecation warnings when compiling against Vespa 8. +- Review the [application package changes](#application-package-changes) and make sure there are no deployment warnings when deploying on Vespa 8. +- Review the list of [HTTP API changes](#removed-http-api-parameters) and update any clients of the application. +- Review the remaining sections of this document, and update the application and its environment accordingly. + +Usage of deprecated Java APIs produce warnings during compilation, while *deployment warnings* are produced for application package deprecations and most changes to the container runtime environment. In hosted Vespa or Vespa Cloud, deployment warnings are shown in the application's console view. However, for other types of changes, there is no way to emit deprecation warnings, so these are only described in this document and other Vespa documentation. + +The following sections lists all the changes from Vespa 8 to Vespa 9 in detail. + +## Changed defaults + +These changes may break clients, and impact both performance and user experience. Applications that are in production and relies on these defaults should make configuration changes to keep the existing behavior when upgrading to Vespa 8. This can be done on Vespa 8, *before* upgrading - using [bucket tests](/en/applications/testing#feature-switches-and-bucket-tests) can be useful. + +The following defaults have changed: + +| Change | Configuration required to avoid change on Vespa 9 | +| --- | --- | + +## Application package changes + +### Removed settings from schemas + +The following settings are removed from [schema](/en/reference/schemas/schemas): + +| Name | Replacement | +| --- | --- | + +### Changed semantics in services.xml + +The following elements and attributes in services.xml have new semantics: + +| Name | Description | +| --- | --- | + +### Removed constructs from services.xml + +The following elements and attributes are removed from services.xml: + +| Parent element | Removed construct | Description | +| --- | --- | --- | + +### *searchdefinitions/* folder support removed + +Schemas should now be placed in the *schemas/* folder. + +## Java API changes + +### Removed Java packages + +| Package | Description | +| --- | --- | + +### Removed Java Classes and methods + +Classes and methods that were marked as deprecated in Vespa 8 are removed. If deprecation warnings are emitted for Vespa APIs when building the application, these must be fixed before migrating to Vespa 9. The sections below contain only the most notable changes. + +The following classes are no longer public API and have been moved to Vespa internal packages: + +| Package | Class | Migration advice | +| --- | --- | --- | +| com.yahoo.search.predicate | *PredicateIndex* + related classes | The Predicate Search Java Library is removed (*com.yahoo.vespa:predicate-search*). Use [predicate fields](/en/schemas/predicate-fields.) in Vespa instead. | + +The following methods are removed: + +| Method | Migration advice | +| --- | --- | + +### Breaking changes to Java APIs + +The Javadoc of the deprecated types/members should document the replacement API. The below list is not exhaustive - some smaller and trivial changes are not listed. + +| Type(s) | Description | +| --- | --- | + +### Deprecated Java APIs + +A few redundant APIs have been deprecated because they have replacements that provide the same, or better, functionality. We advise you switch to the replacement to reduce future maintenance cost. + +| Type(s) | Replacement | +| --- | --- | + +## Container Runtime Environment + +### JDK version + +Vespa 9 upgrades the JDK version from 17 to 25. Java artifacts built against older JDK versions will still be compatible with Vespa 9 (JDK 25). The opposite will not be possible - Vespa 8 (JDK 21) is not compatible with newer JVM byte code. It's possible though to use the *\--release* option for *javac* to target an older JDK version. + +### Changes to provided maven artifacts + +The following Maven artifacts are no longer provided runtime to user application plugins by the Jdisc container: + +| Artifact | Notes | +| --- | --- | + +Make sure your application OSGi bundle embeds the required artifacts from the above list. An artifact can be embedded by adding it in scope *compile* to the *dependencies* section in pom.xml. Typically, these artifacts have until now been used in scope *provided*. Use `mvn dependency:tree` to check whether any of the listed artifacts are directly or transitively included as dependencies. + +As always, remove any dependencies that are not required by your project. Consult the Maven documentation on [Dependency Exclusions](https://maven.apache.org/guides/introduction/introduction-to-optional-and-excludes-dependencies.html#dependency-exclusions) for how to remove a transitively included dependency. + +An example adding *org.json:json* as a compile scoped dependency: + +```xml +<dependencies> + ... + <dependency> + <groupId>org.json</groupId> + <artifactId>json</artifactId> + <version>20211205</version> + <scope>compile</scope> + </dependency> + ... +</dependencies> +``` + +## Removed HTTP API parameters + +The following HTTP API parameters are removed from the [query API](/en/reference/api/query): + +| Standard API path | Parameter name | Replacement | +| --- | --- | --- | + +## Removed command line tools + +## Removed or renamed metrics + +The following metrics are renamed: + +The following metrics are removed: + +## Security + +## Operating system support for Vespa artifacts + +### OCI containers (Docker containers) + +## Other changes + +### Changes in rankfeatures + +Vespa can calculate and return all [rank-features](/en/reference/api/query#ranking.listfeatures) in the `rankfeatures` summary field. Vespa 9 contains some changes to this list: + +### Upgrade procedure + +See [upgrade procedure](/en/operations/self-managed/live-upgrade) for how to upgrade. \ No newline at end of file diff --git a/mintlify-docs/en/reference/schemas/document-field-path.mdx b/mintlify-docs/en/reference/schemas/document-field-path.mdx new file mode 100644 index 0000000000..e2fdcd0757 --- /dev/null +++ b/mintlify-docs/en/reference/schemas/document-field-path.mdx @@ -0,0 +1,70 @@ +--- +title: "Document field path reference" +--- + +The field path syntax is used several places in Vespa to traverse documents through arrays, structs, maps and sets and generate a set of values matching the expression. Examples - If the document contains the field `mymap`, and it has a key `mykey`, the expression returns the value of the map for that key: + +```bash +mymap{mykey} +``` + +Returns the value in index 3 of the `myarray` field, if set: + +```bash +myarray[3] +``` + +Returns the value of the `value1` field in the struct field `mystruct`, if set: + +```bash +mystruct.value1 +``` + +If mystructarray is an array field containing structs, returns the values of value1 for each of those structs: + +```bash +mystructarray.value1 +``` + +The following syntax can be used for the different field types, and can be combined recursively as required: + +## Maps/weighted Sets + +| | | +| :--- | :--- | +| \<mapfield\>\{\<keyvalue\>\} | Retrieve the value of a specific key | +| \<mapfield\>\{$\<variablename\>\} | Retrieve all values, setting the [variable](#variables) to the key value for each | +| \<mapfield\>.key | Retrieve all key values | +| \<mapfield\>.value | Retrieve all values | +| \<mapfield\> | Retrieve all keys | + +In the case of weighted sets, the value referenced above is the weight of the item. + +## Array + +| | | +| :--- | :--- | +| \<arrayfield\>[\<index\>] | Retrieve the value in a specific index | +| \<arrayfield\>[$\<variablename\>] | Retrieve all values in the array, setting the [variable](#variables) to the index of each | +| \<arrayfield\> | Retrieve all values in the array | + +## Struct + +| | | +| :--- | :--- | +| \<structfield\>\{.\<subfield\>} | Return the value of the struct field | +| \<structfield\> | Return the value of all subfields | + +<Info> +Note that when specifying values of subscripts of maps, weighted sets and arrays, only numbers and strings may be used. +</Info> + +## Variables + +It can be useful to reference several field paths using a common variable. For instance, if you have an array of structs, you may want to use document selection on fields within the same array index together. This could be done by an expression like: + +```bash +mydoctype.mystructarray{$x}.field1=="foo" AND mydoctype.mystructarray{$x}.field2=="bar" +``` + +Variables either have a `key` value (for maps and weighted sets), or an `index` value (for arrays). Variables cannot be used across such contexts (that is, a map key cannot be used to index into an array). \ No newline at end of file diff --git a/mintlify-docs/en/reference/schemas/document-json-format.mdx b/mintlify-docs/en/reference/schemas/document-json-format.mdx new file mode 100644 index 0000000000..cfa4df686e --- /dev/null +++ b/mintlify-docs/en/reference/schemas/document-json-format.mdx @@ -0,0 +1,972 @@ +--- +title: "Document JSON format reference" +sidebarTitle: "document JSON format" +--- + +This document describes the JSON format used for sending document operations to Vespa. Field types are defined in the [schema reference](/en/reference/schemas/schemas#field). This is a reference for: + +- JSON representation of [document operations](#document-operations) (put, get, remove, update) +- JSON representation of [field types](#field-types) in Vespa documents +- JSON representation of addressing fields for update, and [update operations](#update-operations) + +Also refer to [encoding troubleshooting](/en/linguistics/troubleshooting-encoding). + +```bash expandable +Document operations + Put + Get + Remove + Update + Test and set + Create +Field types + string + int + long + bool + byte + float + double + position + predicate + raw + uri + array + weightedset + Tensors + Indexed tensors short form + Short form for tensors with a single mapped dimension + Mixed tensors short form + Cell values as binary data (hex dump format) + Tensor verbose form + struct + map + reference + Empty fields +Update operations + assign + Single value field + Assign tensor + Assign struct field + Replacing entire struct + Replace individual struct fields + Assign map field + Map to primitive value + Map to struct + Arrays + Array of primitive values + Array of struct + Weighted set field + Clearing a field + add + Add array elements + Add to weighted set + Add to tensor + Remove elements in composites + Remove from weighted set field + Remove from map field + Remove from tensor + Arithmetic + match + Tensor modify + Fieldpath +``` + +## Field types + +Unless otherwise noted, these formats are used both for returned values in read operations, and as input in write operations (put operations and field assign update operations). + + +| | | +| :--- | :--- | +| string | <CodeBlock>```json "name": "Polly" ```</CodeBlock> Feeding in an empty string ("") for a field will have the same effect as not feeding a value for that field, and the field will not be rendered in the document API and in document summaries. | +| int | <CodeBlock>```json "age": 42 ```</CodeBlock> | +| long | <CodeBlock>```json "current_time_ms": 1742837807000 ```</CodeBlock> | +| bool | *true* or *false*: <CodeBlock>```json "alive": false ```</CodeBlock> | +| byte | <CodeBlock>```json "tinynumber": 128 ```</CodeBlock> | +| float | <CodeBlock>```json "weight": 123.4567 ```</CodeBlock> | +| double | <CodeBlock>```json "weight": 123.4567 ```</CodeBlock> | +| position | A position is encoded as a lat/lng object: <CodeBlock>```json "mypos": { "lat": 37.4181488, "lng": -122.0256157 } ```</CodeBlock> See [Geo Search](/en/querying/geo-search) for details. | +| predicate | A [predicate](/en/reference/schemas/schemas#predicate) is represented with a string: <CodeBlock>```json "predicate_field": "gender in [Female] and age in [20..30] and pos in [1..4]" ```</CodeBlock> | +| raw | The content of a [raw](/en/reference/schemas/schemas#raw) field is represented as a base64-encoded string: <CodeBlock>```json "raw_field": "VW5rbm93biBhcnRpc3QgZnJvbSB0aGUgbW9vbg==" ```</CodeBlock> When used as *summary* field it will be rendered as a base64-encoded string. | +| uri | A URI is a string: <CodeBlock>```json "url": "https://www.yahoo.com/" ```</CodeBlock> | +| array | Arrays are represented as JSON arrays. <CodeBlock>```json "int_array_field": [ 123, 456, 789 ] "string_array_field": [ "item 1", "item 2", "item 3" ] ```</CodeBlock> An array of struct is represented as a JSON array of JSON objects matching the defined struct field: <CodeBlock>```json "array_of_struct_field": [ { "first_name": "Chris", "last_name": "Martin" }, { "first_name": "James", "last_name": "Hetfield" }, { "first_name": "Diana", "last_name": "Krall" } ] ```</CodeBlock> Feeding in an empty array (\[\]) for a field will have the same effect as not feeding a value for that field, and the field will not be rendered in the document API and in document summaries. | +| weightedset | Weighted sets are represented as maps where the value is the weight. Note, even if the key is not a string as such, it will be represented as a string in the JSON format. <CodeBlock> ```json "int_weighted_set": { "123": 2, "456": 78 } "string_weighted_set": { "item 1": 143, "item 2": 6 } ``` </CodeBlock> Feeding in an empty weightedset ({}) for a field will have the same effect as not feeding a value for that field, and the field will not be rendered in the document API and in document summaries. | +| tensor | **Indexed tensors short form:** An array where the values are ordered in the standard value order, where indexes of dimensions to the right are incremented before indexes to the left, where dimensions are ordered alphabetically (such that, e.g. with a tensor with dimensions x,y the "y" values for each value of "x" are adjacent): <CodeBlock> ```json "tensorfield": [ 2.0, 3.0, 5.0, 7.0 ] ``` </CodeBlock> The cells array can optionally be nested in an object under the key "values". This is how tensor values are returned [by default](/en/reference/api/document-v1#format.tensors), along with another key "type" containing the tensor type. <br/><br/> **Short form for tensors with a single mapped dimension**: A map with the dimension key as key and the value as value.<CodeBlock> ```json "tensorfield": { "a": 2.0, "b": 3.0 } ``` </CodeBlock> The cells object can optionally be nested in an object under the key "cells". This is how tensor values are returned [by default](/en/reference/api/document-v1#format.tensors), along with another key "type" containing the tensor type.<br/> **Mixed tensors short form:** If the tensor has a single sparse dimension: A map where the key is the value of that dimension and the value is a nested array containing the values of the dense subspace within that key. If the tensor has multiple sparse dimensions: An array nested in a "blocks" element where the elements consist of a map with the keys "address" and "values", where "address" is a map with the sparse dimensions and their values (as in cells), and "values" is a nested array containing the values of the dense subspace within that address.<br/><br/> Example - single sparse dimension: <br/><br/> <CodeBlock> ```json "tensorfield": { "x1":[2.0,3.0], "x2":[4.0,5.0] } ``` </CodeBlock> Example - multiple sparse dimensions: <CodeBlock> ```json "tensorfield": { "blocks": [ {"address":{"x":"x1","y":"y2"},"values":[2.0,3.0]}, {"address":{"x":"x2","y":"y2"},"values":[4.0,5.0]} ] } ``` </CodeBlock> This is how tensor values are returned [by default](/en/reference/api/document-v1#format.tensors), along with another key "type" containing the tensor type. <br/><br/> **Cell values as binary data** For dense and mixed tensors it's possible to fill the cell values directly from binary data sent in as a string of hexadecimal digits. The simplest possible case is if you have a vector with `int8` cell value type: <CodeBlock> ```json "tensorfield": { "values": "FF00118022FE" } ``` </CodeBlock> This can be used to represent the value `tensor<int8>(x[6]):[-1,0,17,-128,34,-2]`. <br/><br/> For other cell types, it's possible to take the bits of the floating-point value, interpreted directly as an unsigned integer of appropriate width (16, 32, or 64 bits) and use the hex dump (respectively 4, 8, or 16 hex digits per cell) in a string. For "float" cells (32-bit IEE754 floating-point) a simple snippet for converting a cell could look like this: <CodeBlock> ```python import struct def float_to_hex(f: float): return format(struct.unpack('=I', struct.pack('=f', f))[0], '08X') ``` </CodeBlock> As an advanced combination example, if you have a tensor with type `tensor<float>(tag{},x[3])` this input could be used, shown with corresponding output: <CodeBlock> ```json "mixedtensor": { "foo": "3DE38E393E638E393EAAAAAB", "bar": "3EE38E393F0E38E43F2AAAAB", "baz": "3F471C723F638E393F800000" } "mixedtensor":{ "type":"tensor<float>(tag{},x[3])", "blocks":{ "foo":[0.1111111119389534,0.2222222238779068,0.3333333432674408], "bar":[0.4444444477558136,0.5555555820465088,0.6666666865348816], "baz":[0.7777777910232544,0.8888888955116272,1.0] } } ```</CodeBlock> **Verbose:** [Tensor](/en/ranking/tensor-user-guide) fields may be represented as an array of cells: <CodeBlock> ```json "tensorfield": [ { "address": { "x": "a", "y": "0" }, "value": 2.0 }, { "address": { "x": "a", "y": "1" }, "value": 3.0 }, { "address": { "x": "b", "y": "0" }, "value": 4.0 }, { "address": { "x": "b", "y": "1" }, "value": 5.0 } ] ```</CodeBlock> This works for any tensor but is verbose, so shorter forms specific to various tensor types are also supported. Use the shortest form applicable to your tensor type for the best possible performance. <br/> The cells array can optionally be nested in an object under the key "cells". This is how tensor values are returned [by default](/en/reference/api/document-v1#format.tensors), along with another key "type" containing the tensor type. | +| struct | <CodeBlock>```"mystruct": { "intfield": 123, "stringfield": "foo" } ```</CodeBlock> | +| map | The JSON dictionary key must be a string, even if the map key type in the schema is not a string: <CodeBlock> ```json "int_to_string_map": { "123": "foo", "456": "bar", "789": "foobar" } ```</CodeBlock> Feeding in an empty map ({}) for a field will have the same effect as not feeding a value for that field, and the field will not be rendered in the document API and in document summaries. | +| reference | String with document ID referring to a [parent document](/en/schemas/parent-child): <CodeBlock> ```json "artist_ref": "id:mynamespace:artists::artist-1" ```</CodeBlock> | + + +## Empty fields + +In general, fields that have not received a value during feeding will be ignored when rendering the document. They are considered as empty fields. However, certain field types have some values which causes them to be considered empty. For instance, the empty string ("") is considered empty, as well as the empty array ([]). See the above table for more information for each type. + +## Document operations + +Refer to [reads and writes](/en/writing/reads-and-writes) for details - alternatives: + +- Use the [Vespa CLI](/en/clients/vespa-cli#documents). +- [/document/v1/](/en/reference/api/document-v1): This API accepts one operation per request, with the document ID encoded in the URL. +- [Vespa feed client](/en/clients/vespa-feed-client): Java APIs / command line tool to feed document operations asynchronously to Vespa, over HTTP. + +### Put + +The "put" payload has a "put" operation and ["fields"](#field-types) containing field values; ([/document/v1/ example](/en/writing/document-v1-api-guide#post)): + +```json +{ + "put": "id:mynamespace:music::123", + "fields": { + "title": "Best of Bob Dylan" + } +} +``` + +### Get + +"get" does not have a payload - the response has the same "field" object as in "put", and also "id" and "pathId" fields ([/document/v1/ example](/en/writing/document-v1-api-guide#get)): + +```json +{ + "pathId": "/document/v1/mynamespace/music/docid/123", + "id": "id:mynamespace:music::123", + "fields": { + "title": "Best of Bob Dylan" + } +} +``` + +### Remove + +The "remove" payload only has a "remove" operation ([/document/v1/ example](/en/writing/document-v1-api-guide#delete)): + +```json +{ + "remove": "id:mynamespace:music::123" +} +``` + +### Update + +The "update" payload has an "update" operation and "fields". Note: Each field must contain an [update operation](#update-operations), not just the field value directly; ([/document/v1/ example](/en/writing/document-v1-api-guide#put)): + +```json +{ + "update": "id:mynamespace:music::123", + "fields": { + "title": { + "assign": "The best of Bob Dylan" + } + } +} +``` + +Flags can be added to add a [test and set](#test-and-set) condition, or allow the update to [create](#create) a new document (a so-called "upsert" operation). + +#### Test and set + +An optional _condition_ can be added to operations to specify a _test and set_ condition - see [conditional writes](/en/writing/document-v1-api-guide#conditional-writes). The value of the _condition_ is a [document selection](/en/reference/writing/document-selector-language), encoded as a string. Example: Increment the _sales_ field only if it is already equal to 999 ([/document/v1/ example](/en/writing/document-v1-api-guide#conditional-writes)): + +```json +{ + "update": "id:mynamespace:music::bob/BestOf", + "condition": "music.sales==999", + "fields": { + "sales": { + "increment": 1 + } + } +} +``` + +<Info> + **Note:** Use _documenttype.fieldname_ in the condition, not only _fieldname_. +</Info> + +If the condition is not met, a 412 response code is returned. + +#### create (create if nonexistent) + +**Updates** to nonexistent documents are supported using _create_; ([/document/v1/ example](/en/writing/document-v1-api-guide#create-if-nonexistent)): + +```json +{ + "update": "id:mynamespace:music::bob/BestOf", + "create": true, + "fields": { + "title": { + "assign": "The best of Bob Dylan" + } + } +} +``` + +Since Vespa 8.178, _create_ can also be used together with conditional **Put** operations ([/document/v1/ example](/en/writing/document-v1-api-guide#conditional-updates-and-puts-with-create) - review notes there before using): + +```json +{ + "put": "id:mynamespace:music::123", + "condition": "music.sales==999", + "create": true, + "fields": { + "title": "Best of Bob Dylan" + } +} +``` + +## Update operations + +The update operations are: [`assign`](#assign), [`add`](#add), [`remove`](#composite-remove), [arithmetics](#arithmetic) (`increment` `decrement` `multiply` `divide`), [`match`](#match), [`modify`](#tensor-modify) + +## assign + +`assign` is used to replace the value of a field (or an element of a collection) with a new value. When assigning, one can generally use the same syntax and structure as when feeding that field's value in a `put` operation. + +### Single value field + +```js +field title type string { + indexing: summary +} +``` + +```json +{ + "update": "id:mynamespace:music::example", + "fields": { + "title": { + "assign": "The best of Bob Dylan" + } + } +} +``` + +### Tensor field + +```js +field tensorfield type tensor(x{},y{}) { + indexing: attribute | summary +} +``` + +```json +{ + "update": "id:mynamespace:tensordoctype::example", + "fields": { + "tensorfield": { + "assign": { + "cells": [ + { "address": { "x": "a", "y": "b" }, "value": 2.0 }, + { "address": { "x": "c", "y": "d" }, "value": 3.0 } + ] + } + } + } +} +``` + +This will fully replace the entire tensor stored in this field. + +### Struct field + +#### Replacing all fields in a struct + +A full struct is replaced by assigning an object of struct key/value pairs. + +```js +struct person { + field first_name type string {} + field last_name type string {} +} +field contact type person { + indexing: summary +} +``` + +```json +{ + "update": "id:mynamespace:workers::example", + "fields": { + "contact": { + "assign": { + "first_name": "Bob", + "last_name": "The Plumber" + } + } + } +} +``` + +#### Individual struct fields + +Individual struct fields are updated using [field path](#fieldpath) syntax. Refer to the [reference](/en/reference/schemas/schemas#struct-name) for restrictions using structs. + +```json +{ + "update": "id:mynamespace:workers::example", + "fields": { + "contact.first_name": { + "assign": "Bob" + }, + "contact.last_name": { + "assign": "The Plumber" + } + } +} +``` + +### Map field + +Individual map entries can be updated using [field path](/en/reference/schemas/document-field-path) syntax. The following declaration defines a `map` where the `key` is an Integer and the value is a `person` struct. + +```js +struct person { + field first_name type string {} + field last_name type string {} +} +field contact type map<int, person> { + indexing: summary +} +``` + +Example updating part of an entry in the `contact` map: + +- `contact` is the name of the map field to be updated +- `{0}` is the key that is going to be updated +- `first_name` is the struct field to be updated inside the `person` struct + +```json +{ + "update": "id:mynamespace:workers::example", + "fields": { + "contact{0}.first_name": { + "assign": "John" + } + } +} +``` + +Assigning an element to a key in a map will insert the key/value mapping if it does not already exist, or overwrite it with the new value if it does exist. Refer to the [reference](/en/reference/schemas/schemas#map) for restrictions using maps. + +#### Map to primitive value + +```js +field my_food_scores type map<string, string> { + indexing: summary +} +``` + +```json +{ + "update": "id:mynamespace:food::example", + "fields": { + "my_food_scores{Strawberries}": { + "assign": "Delicious!" + } + } +} +``` + +#### Map to struct + +```js +struct contact_info { + field phone_number type string {} + field email type string {} +} +field contacts type map<string, contact_info> { + indexing: summary +} +``` + +```json +{ + "update": "id:mynamespace:people::d_duck", + "fields": { + "contacts{\"Uncle Scrooge\"}": { + "assign": { + "phone_number": "555-123-4567", + "email": "number_one_dime_luvr1877@example.com" + } + } + } +} +``` + +### Array field + +#### Array of primitive values + +```js +field ingredients type array<string> { + indexing: summary +} +``` + +Assign full array: + +```json +{ + "update": "id:mynamespace:cakes:tasty_chocolate_cake", + "fields": { + "ingredients": { + "assign": ["sugar", "butter", "vanilla", "flour"] + } + } +} +``` + +Assign existing elements in array: + +```json +{ + "update": "id:mynamespace:cakes:tasty_chocolate_cake", + "fields": { + "ingredients[3]": { + "assign": "2 cups of flour (editor's update: NOT asbestos!)" + } + } +} +``` + +Note that the index element 3 needs to exist. Alternative using match: + +```json +{ + "update": "id:mynamespace:cakes:tasty_chocolate_cake", + "fields": { + "ingredients": { + "match": { + "element": 3, + "assign": "2 cups of flour (editor's update: NOT asbestos!)" + } + } + } +} +``` + +Individual array elements may be updated using [field path](/en/reference/schemas/document-field-path) or [match](#match) syntax. + +#### Array of struct + +Refer to the reference for restrictions using [array of structs](/en/reference/schemas/schemas#array). + +```js +struct person { + field first_name type string {} + field last_name type string {} +} +field people type array<person> { + indexing: summary +} +``` + +```json +{ + "update": "id:mynamespace:students:example", + "fields": { + "people[34]": { + "assign": { + "first_name": "Bobby", + "last_name": "Tables" + } + } + } +} +``` + +Note that the element index needs to exist. Use [add](#add-array-elements) to add a new element. Alternative syntax using match: + +```json +{ + "update": "id:mynamespace:students:example", + "fields": { + "people": { + "match": { + "element": 34, + "assign": { + "first_name": "Bobby", + "last_name": "Tables" + } + } + } + } +} +``` + + +### Weighted set field + +Adding new elements to a weighted set can be done using [add](#add-weighted-set), or by assigning with `field{key}` syntax. Example of the latter: + +```js +field int_weighted_set type weightedset<int> { + indexing: summary +} +field string_weighted_set type weightedset<string> { + indexing: summary +} +``` + +```json +{ + "update":"id:mynamespace:weightedsetdoctype::example1", + "fields": { + "int_weighted_set{123}": { + "assign": 123 + }, + "int_weighted_set{456}": { + "assign": 100 + }, + "string_weighted_set{\"item 1\"}": { + "assign": 144 + }, + "string_weighted_set{\"item 2\"}": { + "assign": 7 + } + } +} +``` + +Note that using the `field{key}` syntax for weighted sets _may_ be less efficient than using [add](#add-weighted-set). + +### Clearing a field + +To clear a field, assign a `null` value to it. + +```json +{ + "update": "id:mynamespace:music::example", + "fields": { + "title": { + "assign": null + } + } +} +``` + +## add + +`add` is used to add entries to arrays, weighted sets or to the mapped dimensions of tensors. + +### Adding array elements + +The added entries are appended to the end of the array in the order specified. + +```js +field tracks type array<string> { + indexing: summary +} +``` + +```json +{ + "update": "id:mynamespace:music::https://music.yahoo.com/bobdylan/BestOf", + "fields": { + "tracks": { + "add": [ + "Lay Lady Lay", + "Every Grain of Sand" + ] + } + } +} +``` + +### Add weighted set entries + +Add weighted set elements by using a JSON key/value syntax, where the value is the weight of the element. + +Adding a key/weight mapping that already exists will overwrite the existing weight with the new one. + +```js +field int_weighted_set type weightedset<int> { + indexing: summary +} +field string_weighted_set type weightedset<string> { + indexing: summary +} +``` + +```json +{ + "update":"id:mynamespace:weightedsetdoctype::example1", + "fields": { + "int_weighted_set": { + "add": { + "123": 123, + "456": 100 + } + }, + "string_weighted_set": { + "add": { + "item 1": 144, + "item 2": 7 + } + } + } +} +``` + +### Add tensor cells + +Add cells to mapped or mixed tensors. Invalid for tensors with only indexed dimensions. Adding a cell that already exists will overwrite the cell value with the new value. The address must be fully specified, but cells with bound indexed dimensions not specified will receive the default value of `0.0`. See system test [tensor add update](https://github.com/vespa-engine/system-test/tree/master/tests/search/tensor_feed/tensor_add_remove_update) for more examples. + +```js +field tensorfield type tensor(x{},y[3]) { + indexing: attribute | summary +} +``` + +```json +{ + "update": "id:mynamespace:tensordoctype::example", + "fields": { + "tensorfield": { + "add": { + "cells": [ + { "address": { "x": "b", "y": "0" }, "value": 2.0 }, + { "address": { "x": "b", "y": "1" }, "value": 3.0 } + ] + } + } + } +} +``` + +In this example, cell `{"x":"b","y":"2"}` will implicitly be set to 0.0. + +So if you started with the following tensor: + +```json +{ + {"x": "a", "y": "0"}: 0.2, + {"x": "a", "y": "1"}: 0.3, + {"x": "a", "y": "2"}: 0.5, +} +``` + +You now end up with this tensor after the above add operation was applied: + +```json +{ + {"x": "a", "y": "0"}: 0.2, + {"x": "a", "y": "1"}: 0.3, + {"x": "a", "y": "2"}: 0.5, + {"x": "b", "y": "0"}: 2.0, + {"x": "b", "y": "1"}: 3.0, + {"x": "b", "y": "2"}: 0.0, +} +``` + +Prefer the _block short form_ for mixed tensors instead. This also avoids the problem where cells with indexed dimensions are not specified: + +```json +{ + "update": "id:mynamespace:tensordoctype::example", + "fields": { + "tensorfield": { + "add": { + "blocks": [ + { "address": { "x": "b" }, "values": [2.0, 3.0, 5.0] } + ] + } + } + } +} +``` + +## remove + +Remove elements from arrays, weighted sets, maps and tensors with `remove`. + +### Array field + +Array fields support removal by value using "remove" and the array of values to remove. + +For example, given this field: + +```js +field string_array type array<string> { + indexing: summary +} +``` + +This update will remove all the array elements that are "hello" or "1": + +```json +{ + "update":"id:mynamespace:weightedsetdoctype::example1", + "fields": { + "string_array": { + "remove": ["hello", 1] + } + } +} +``` + +### Weighted set field + +```js +field string_weighted_set type weightedset<string> { + indexing: summary +} +``` + +```json +{ + "update":"id:mynamespace:weightedsetdoctype::example1", + "fields": { + "string_weighted_set": { + "remove": { + "item 2": 0 + } + } + } +} +``` + +### Map field + +``` +field string_map type map<string, string> { + indexing: summary +} +``` + +```json +{ + "update":"id:mynamespace:mapdoctype::example1", + "fields": { + "string_map{item 2}": { + "remove": 0 + } + } +} +``` + +### Tensor field + +Removes cells from mapped or mixed tensors. Invalid for tensors with only indexed dimensions. Only mapped dimensions should be specified for tensors with both mapped and indexed dimensions, as all indexed cells the mapped dimensions point to will be removed implicitly. See system test [tensor remove update](https://github.com/vespa-engine/system-test/tree/master/tests/search/tensor_feed/tensor_add_remove_update) for more examples. + +```js +field tensorfield type tensor(x{},y[2]) { + indexing: attribute | summary +} +``` + +```json +{ + "update": "id:mynamespace:tensordoctype::example", + "fields": { + "tensorfield": { + "remove": { + "addresses": [ + {"x": "b"}, + {"x": "c"} + ] + } + } + } +} +``` + +In this example, cells `{x:b,y:0},{x:b,y:1},{x:c,y:0},{x:c,y:1}` will be removed. + +It is also supported to specify only a subset of the mapped dimensions in the addresses. In that case, all cells that match the label values of the specified dimensions are removed. In the given example, all cells having label `b` for dimension `x` are removed. + +```js +field tensorfield type tensor(x{},y{},z[2]) { + indexing: attribute | summary +} +``` + +```json +{ + "update": "id:mynamespace:tensordoctype::example", + "fields": { + "tensorfield": { + "remove": { + "addresses": [ + {"x": "b"} + ] + } + } + } +} +``` + +## Arithmetic + +The four arithmetic operators `increment`, `decrement`, `multiply` and `divide` are used to modify _single value_ numeric values without having to look up the current value before applying the update. Example: + +```js +field sales type int { + indexing: summary | attribute +} +``` + +```json +{ + "update": "id:mynamespace:music::https://music.yahoo.com/bobdylan/BestOf", + "fields": { + "sales": { + "increment": 1 + } + } +} +``` + +## match + +If an arithmetic operation is to be done for a specific key in a _weighted set or array_, use the `match` operation: + +```js +field track_popularity type weightedset<string> { + indexing: summary | attribute +} +``` + +```json +{ + "update": "id:mynamespace:music::https://music.yahoo.com/bobdylan/BestOf", + "fields": { + "track_popularity": { + "match": { + "element": "Lay Lady Lay", + "increment": 1 + } + } + } +} +``` + +In other words, for the weighted set "track\_popularity", `match` the element "Lay Lady Lay", then `increment` its weight by 1. See the [weightedset properties](/en/reference/schemas/schemas#weightedset-properties) reference for how to make incrementing a non-existing key trigger auto-create of the key. + +If the updated field is an array, the `element` value would be a positive integer. + +<Info> + **Note:** Only one element can be matched per operation. +</Info> + +## Modify tensors + +Individual cells in tensors can be modified using the `modify` update. The cells are modified according to the given operation: + +- `replace` - replaces a single cell value +- `add` - adds a value to the existing cell value +- `multiply` - multiples a value with the existing cell value + +The addresses of cells must be fully specified. If the cell does not exist, the update for that cell will be ignored. Use `"create": true` (see example below) to create non-existing cells before the modify update is applied. See system test [tensor modify update](https://github.com/vespa-engine/system-test/tree/master/tests/search/tensor_feed/tensor_modify_update) for more examples. + +```js +field tensorfield type tensor(x[3]) { + indexing: attribute | summary +} +``` + +```json +{ + "update": "id:mynamespace:tensordoctype::example", + "fields": { + "tensorfield": { + "modify": { + "operation": "replace", + "addresses": [ + { "address": { "x": "1" }, "value": 7.0 }, + { "address": { "x": "2" }, "value": 8.0 } + ] + } + } + } +} +``` + +In this example, cell `{"x":"1"}` is replaced with value 7.0 and `{"x":"2"}` with value 8.0. If operation `add` or `multiply` was used instead, 7.0 and 8.0 would be added or multiplied to the current values of cells `{"x":"1"}` and `{"x":"2"}`. + +For tensors with a single mapped dimension the _cells short form_ can also be used: + +```js +field tensorfield type tensor(x{}) { + indexing: attribute | summary +} +``` + +```json +{ + "update": "id:mynamespace:tensordoctype::example", + "fields": { + "tensorfield": { + "modify": { + "operation": "add", + "create": true, + "cells": { + "b": 5.0, + "c": 6.0 + } + } + } + } +} +``` + +In this example, 5.0 is added to cell `{"x":"b"}` and 6.0 is added to cell `{"x":"c"}`. With `"create": true` non-existing cells in the input tensor are created before applying the modify update. The default cell value is 0.0 for `replace` and `add`, and 1.0 for `multiply`. This means a non-existing cell ends up with the value specified in the operation. + +For mixed tensors the _block short form_ can also be used to modify entire dense subspaces: + +```js +field tensorfield type tensor(x{},y[3]) { + indexing: attribute | summary +} +``` + +```json +{ + "update": "id:mynamespace:tensordoctype::example", + "fields": { + "tensorfield": { + "modify": { + "operation": "replace", + "blocks": { + "a": [1,2,3], + "b": [4,5,6] + } + } + } + } +} +``` + +## Fieldpath + +Fieldpath is for accessing fields within composite structures - for structures that are not part of index or attribute, it is possible to access elements directly using fieldpaths. This is done by adding more information to the field value. For map structures, specify the key (see [example](#assign)). + +```js +mymap{mykey} +``` + +and then do operation on the element which is keyed by "mykey". Arrays can be accessed as well (see [details](#assign)). + +```js +myarray[3] +``` + +And this is also true for structs (see [details](#assign)). **Note:** Struct updates do not work for [index](/en/reference/applications/services/content#document) mode: + +```json +mystruct.value1 +``` + +This also works for nested structures, e.g. a `map` of `map` to `array` of `struct`: + +```json +{ + "update": "id:mynamespace:complexdoctype::foo", + "fields": { + "nested_structure{firstMapKey}{secondMapKey}[4].title": { + "assign": "Look at me, mom! I'm hiding deep in a nested type!" + } + } +} +``` \ No newline at end of file diff --git a/mintlify-docs/en/reference/schemas/schemas.mdx b/mintlify-docs/en/reference/schemas/schemas.mdx new file mode 100644 index 0000000000..ace4091eee --- /dev/null +++ b/mintlify-docs/en/reference/schemas/schemas.mdx @@ -0,0 +1,2188 @@ +--- +title: "Schema reference" +--- + +This reference documents the syntax and content of schemas, document types, and fields. This is a reference, see [schemas](/en/basics/schemas) for an overview and examples. + +## Syntax + +Throughout this document, a string in square brackets represents some argument. The whole string, including the brackets, is replaced by a concrete string in a schema. + +Constructs in schemas have a regular syntax. Each element starts with the element _identifier_, possibly followed by the _name_ of this particular occurrence of the element, possibly followed by a space-separated list of interleaved _attribute names_ and _attribute values_, possibly followed by the _element body_. Thus, one will find elements of these varieties: + +```bash +[element-identifier] : [element-body] +[element-identifier] [element-name] : [element-body] +[element-identifier] [element-name] [attribute-name] [attribute-value] +[element-identifier] [element-name] [attribute-name] [attribute-value] { + [element-body] +} +``` + +One-line element values start with a colon and end with a newline. + +Multiline values (for fields supporting them) are any block of text enclosed in curly brackets. + +Comments may be inserted anywhere and start with a hash (#). + +Names are _identifiers_, they must match `["a"-"z","A"-"Z", "_"]["a"-"z","A"-"Z","0"-"9","_"]*`. + +A schema file is not sensitive to indentation. + +## Elements + +Elements and structure of a schema file: + +```yaml expandable +schema + document + struct + field + match + field + alias + attribute + distance-metric + bolding + dictionary + id + index + hnsw + indexing + linguistics + match + normalizing + query-command + rank + rank-type + sorting + stemming + struct-field + indexing + match + query-command + struct-field + … + summary + summary-to DEPRECATED + summary + summary-to DEPRECATED + weight + compression + index + field + fieldset + rank-profile + diversity + attribute + min-groups + match-phase + attribute + order + total-max-hits + max-hits + first-phase + total-keep-rank-count + keep-rank-count + rank-score-drop-limit + expression + second-phase + expression + rank-score-drop-limit + total-rerank-count + rerank-count + global-phase + expression + rerank-count + rank-score-drop-limit + function [name] + inputs + constants + onnx-model + significance + rank-properties + match-features + mutate + on-match + on-first-phase + on-second-phase + on-summary + summary-features + rank-features + ignore-default-rank-features + num-threads-per-search + num-search-partitions + min-hits-per-thread + termwise-limit + post-filter-threshold + approximate-threshold + filter-first-threshold + filter-first-exploration + exploration-slack + target-hits-max-adjustment-factor + filter-threshold + rank + filter-threshold + element-gap + rank-type + weakand + stopword-limit + adjust-target + allow-drop-all + rank-profile (inner) + constant + onnx-model + stemming + documentid + document-summary + summary + import field + raw-as-base64-in-summary +schema +``` + +## schema + +The root element of schemas. A schema describes a type of data and what we should compute over it. A schema must be defined in a file named `[schema-name].sd`. + +```bash +schema [name] inherits [name] { + [body] +} +``` + +The `inherits` attribute is optional. If a schema is inherited, this schema will include all the constructs of it as if they were defined in this schema (except the parent document type). The document type in this must declare that it inherits the document type of the parent schema. + +The body is mandatory and may contain: + +| Name | Occurrence | Description | +| :--- | :--- | :--- | +| [document](#document) | One | A document type defined in this schema | +| [field](#field) | Zero to many | A field not contained in the document. Use _synthetic fields_ (outside [document](#document)) to derive new field values to be placed in the indexing structure from document fields. Find examples in [reindexing](/en/operations/reindexing#use-cases). | +| [fieldset](#fieldset) | Zero to many | Group document fields together for searching | +| [rank-profile](#rank-profile) | Zero to many | A bundle of ranking functions and settings, selectable in a query. | +| [constant](#constant) | Zero to many | A constant tensor located in a file used for ranking | +| [onnx-model](#onnx-model) | Zero to many | An ONNX model located in the application package used for ranking | +| [stemming](#stemming) | Zero or one | The default stemming setting. | +| [raw-as-base64-in-summary](#raw-as-base64-in-summary) | Zero or one | Base64 encode raw fields in summary rather than using an escaped string. The default is true. | +| [documentid](#documentid) | Zero or one | Whether document IDs are stored on disk only or made an attribute. | +| [document-summary](#document-summary) | Zero to many | An explicitly defined document summary | +| [import field](#import-field) | Zero to many | Import a field value from a global document | + +## document + +Contained in [schema](#schema) and describes a document type. This can also be the root of the schema, if the document is not to be queried directly. + +```bash +document [name] inherits [name-list] { + [body] +} +``` + +The document name is optional; it defaults to the containing `schema` element's name. If there is no containing `schema` element, the document name is required. If the document with a name is defined inside a schema, the document name must match the `schema` element's name. The reference to _document type_ in the documentation refers to the document name defined here. + +The `inherits` attribute is optional and has as value a comma-separated list of names of other document types. A document type may inherit the fields of one or more other document types, see [document inheritance](/en/schemas/inheritance-in-schemas) for examples. If no document types are explicitly inherited, the document inherits the generic `document` type. + +The body of a document type is optional and may contain: + +| Name | Occurrence | Description | +| :--- | :--- | :--- | +| [struct](#struct) | Zero to many | A struct type definition for this document. | +| [field](#field) | Zero to many | A field of this document. | +| [compression](#compression) | Zero to one | Specifies compression options for documents of this document type in storage. | + +## struct + +Contained in [document](#document). Defines a composite type. A struct consists of zero or more fields that the user can access together as one. The struct has to be defined before it is used as a type in a field specification. + +```bash +struct [name] { + [body] +} +``` + +The body of a struct is optional and may contain: + +| Name | Occurrence | Description | +| :--- | :--- | :--- | +| [field](#field) | Zero to many | A field of this struct. | + +## field + +Contained in [schema](#schema), [document](#document), or [struct](#struct). Defines a named value with a type and (optionally) how this field should be stored, indexed, searched, presented, and how it should influence ranking. + +```bash +field [name] type [type-name] { + [body] +} +``` +Do not use names that are used for other purposes in the indexing language or other places in the schema file. Reserved names are: +- attribute +- body +- case +- context +- documentid +- else +- header +- hit +- host +- if +- index +- position +- reference +- relevancy +- sddocname +- summary +- switch +- tokenize + +Other names not to use include any words that start with a number or include special characters. + +The _type_ attribute is mandatory - supported types: + +| Field type | Description | +| --- | --- | +| array\<type\> | + +For single-value (primitive) types, use array\<type\> to create an array field of the element type: + +| Index | Each element is indexed separately | +| Attribute | Added as an array attribute | +| Summary | Added as an array summary field | + +Also used to create an array field of the given [struct type](#struct). The struct type must be defined separately. Example: + +```js +struct person { + field first_name type string {} + field last_name type string {} +} + +field people type array<person> { + indexing: summary + summary: matched-elements-only + struct-field first_name { + indexing: attribute + attribute: fast-search + } +} +``` + +The entire _people_ field is part of the document summary. The [struct field](#struct-field) _first\_name_ is defined as an _attribute_ for searching, with [fast-search](/en/content/attributes#fast-search). A subset, or all, of the struct fields can be defined as attributes. + +Use the [sameElement](/en/reference/querying/yql#sameelement) operator to ensure matches in the same struct field instance. + +Use [matched-elements-only](#matched-elements-only) to reduce the amount of data that is returned in the document summary. + +<Warning> + **Important:** + +`key` and `value` are reserved words in an array\<struct\>, as these are used to implement [map](#map). Do not use these as struct-field names. +</Warning> + +Restrictions: + +- Array of struct types does not support [ranking features](/en/basics/ranking) and can only be used for matching and filtering. +- All struct arrays can be fed, retrieved, and used in document summaries. +- Some parts of struct arrays can be searched in [indexed search mode](/en/reference/applications/services/content#document), while all parts of struct arrays can be searched in [streaming search](/en/performance/streaming-search). See below for supported cases. + +| Index | Only supported in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). Set this on the top-level struct array field to make all parts searchable. | +| Attribute | Only supported for [struct fields](#struct-field) that have primitive types (bool, string, int, long, byte, float, double). Any struct field must be defined as an attribute to be used for searching. The struct type can still contain fields of non-primitive types, as long as these are not defined as attributes. | +| Summary | Added as an array summary field | + + | +| bool | + +Use for boolean values. + +``` +field alive type bool { + indexing: summary | attribute +} +``` + +| Index | Not supported | +| Attribute | Added as a boolean | +| Summary | Added as a boolean value (`true` or `false`) | + +<Warning>**Important:** Defaults to `false` if not specified. </Warning> + | +| byte | + +Use for single 8-bit numbers. + +```js +field smallnumber type byte { + indexing: summary | attribute +} +``` + +| Index | Not supported. An attribute will automatically be used instead | +| Attribute | Added as a byte which supports range searches | +| Summary | Added as a byte | + + | +| double | + +Use for high precision floating point numbers (64-bit IEEE 754 double). + +``` +field mydouble type double { + indexing: summary | attribute +} +``` + +| Index | Not supported. An attribute will automatically be used instead | +| Attribute | Added as a 64-bit IEEE 754 double which supports range searches | +| Summary | Added as a 64-bit IEEE 754 double | + + | +| float | + +Use for floating point numbers (32-bit IEEE 754 float). + +``` +field myfloat type float { + indexing: summary | attribute +} +``` + +| Index | Not supported. An attribute will automatically be used instead | +| Attribute | Added as a 32-bit IEEE 754 float which supports range searches | +| Summary | Added as a 32-bit IEEE 754 float | + + | +| int | + +Use for single 32-bit integers. + +``` +field release_year type int { + indexing: summary | attribute +} +``` + +| Index | Not supported. An attribute will automatically be used instead | +| Attribute | Becomes integer attributes, which supports range grouping and range searches | +| Summary | Added as a 32-bit integer | + + | +| long | + +Use for single 64-bit integers. + +``` +field bignumber type long { + indexing: summary | attribute +} +``` + +| Index | Not supported. An attribute will automatically be used instead | +| Attribute | Becomes a 64-bit integer attribute, which supports range grouping and range searches | +| Summary | Added as a 64-bit integer | + + | +| map\<key-type,value-type\> | + +Use to create a map where each unique key is mapped to a single value. Any primitive type can be used as _key-type_ and any primitive type or Vespa struct type as _value-type_. Example of a map of primitive types, where the _key_ and _value_ fields are specified as _attributes_: + +``` +field my_map type map<string, int> { + indexing: summary + struct-field key { indexing: attribute } + struct-field value { indexing: attribute } +} +``` + +Note that a Vespa map entry is handled as a _struct_ with a _key_ and _value_ field with _key-type_ and _value-type_ as types. This explains the _struct-field_ syntax above. The full _my\_map_ field is configured into the document summary. + +A more complex example is a map of struct: + +``` +struct person { + field first_name type string {} + field last_name type string {} + field age type int {} +} + +field identities type map<string, person> { + indexing: summary + summary: matched-elements-only + struct-field key { + indexing: attribute + attribute: fast-search + } + struct-field value.first_name { + indexing: attribute + attribute: fast-search + } + struct-field value.last_name { + indexing: attribute + attribute: fast-search + } +} +``` + +This example illustrates that the struct elements are configured individually - there is no field configuration for _age_ - one can define a subset of the struct fields as attributes. + +Here, _key_, _value.first\_name_ and _value.last\_name_ are defined as attributes. This makes them available for searching and [grouping](/en/querying/grouping#grouping-over-a-map-field). A common use case is requiring matches in the same map entry (e.g., match both first and last name for the same person), see the [sameElement](/en/reference/querying/yql#sameelement) operator for how to implement this. Use [matched-elements-only](#matched-elements-only) to reduce the amount of data in the document summary. + +[fast-search](/en/performance/feature-tuning#when-to-use-fast-search-for-attribute-fields) is used to make query access faster by creating an index structure for lookups. + +As an alternative to a map, an [array\<struct\>](#array) can contain the same element multiple times and maintains order. + +Restrictions: + +- Map of struct or primitive types do not support [ranking features](/en/basics/ranking) and can only be used for matching and filtering. +- All map types can be fed, retrieved, and used in document summaries. +- Some map types can be searched in [indexed search mode](/en/reference/applications/services/content#document), while all map types can be searched in [streaming search](/en/performance/streaming-search). See below for supported cases: + +| Index | Only supported in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). Set this on the top-level map field to make all struct fields in the map field searchable. | +| Attribute | Only supported for [struct fields](#struct-field) where _value-type_ is either a primitive type (bool, string, int, long, byte, float, double) or a [struct type](#struct) with fields of primitive types. Any struct field must be defined as an attribute to be used for searching. The _value-type_ struct can still contain fields of non-primitive types, as long as these are not defined as attributes. | +| Summary | Added as a map. | + + | +| position | + +Used to filter and/or rank documents by distance to a position in the query, see [Geo search](/en/querying/geo-search). + +``` +field location type position { + indexing: attribute +} +``` + +| Index | Not supported | +| Attribute | Added as an interleaved 64-bit integer (see [Z-order curve](https://en.wikipedia.org/wiki/Z-order_curve)) - queries are implemented by doing a set of range searches in the attribute. This attribute has [fast-search](/en/content/attributes#fast-search) set implicitly | +| Summary | Refer to the [reference](/en/reference/schemas/document-json-format#position) | + + | +| predicate | + +Use to match queries to a set of boolean constraints. See [querying predicate fields.](/en/schemas/predicate-fields.#queries) Predicate fields are not supported in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). + +``` +field predicate_field type predicate { + indexing: attribute + index { + arity: 2 # mandatory + lower-bound: 3 + upper-bound: 200 + dense-posting-list-threshold: 0.25 + } +} +``` + +| Index | Not supported | +| Attribute | Indexed in-memory in a variable-size binary format that is optimized for application during query evaluation | +| Summary | Added as-is | + + | +| raw | + +Use for binary data + +``` +field rawfield type raw { + indexing: summary | attribute +} +``` + +| Index | Not supported | +| Attribute | Added as raw data. Not searchable. | +| Summary | Added as raw data. Outputted as a base64-encoded string. See [JSON feed format](/en/reference/schemas/document-json-format#raw) for details. | + + | +| reference\<document-type\> | + +A _reference\<document-type\>_ field is a reference to an instance of a document-type - i.e., a foreign key. Reference fields are not supported in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). + +``` +field artist_ref type reference<artist> { + indexing: attribute +} +``` + The reference is the [document ID](/en/schemas/documents) of the document-type instance. References are used to join documents in a [parent-child relationship](/en/schemas/parent-child). A reference can only be made to [global](/en/reference/applications/services/content#document) documents. The following types of references are not supported: +- Self-reference +- Cyclic reference: If document type _foo_ has a reference to _bar_, then _bar_ cannot have a reference to _foo_ + + A reference attribute field can be searched using the document ID of the parent document-type instance as query term. Note that this will be a linear scan as [fast-search](#attribute) is not supported. + +| Index | Invalid - deployment will fail | +| Attribute | As [string](#string) - a reference must be an attribute. Can be an empty string or point to a non-existent document. Memory usage is about 33 bytes per parent document. This is composed of 24 bytes used in a reference store, with a btree structure on top of that which requires 5 bytes on average (depends on lid compaction). In addition 4 bytes on average for a reference from child document to the parent document (depends on lid compaction). In total about 33 bytes. | +| Summary | As [string](#string) | + + | +| string | + +Use for a text field of any length. String fields may only contain _text characters_, as defined by `isTextCharacter` in [com.yahoo.text.Text](https://github.com/vespa-engine/vespa/blob/master/vespajlib/src/main/java/com/yahoo/text/Text.java) + +``` +field surname type string { + indexing: summary | index +} +``` + +| Index | Refer to [linguistics](/en/linguistics/linguistics) for details on normalization, tokenization and stemming. | +| Attribute | Added as-is. [match](#match) exact or prefix is supported types of searches in string attributes. Searches are however case-insensitive. A query for `BritneY.spears` will match a document containing `BrItNeY.SpEars` | +| Summary | Added as-is | + + | +| struct | + +Use to define a field with a struct datatype. Create a [struct type](#struct) inside the document definition and declare the struct field in a document or struct using the struct type name as the field type: + +``` +struct person { + field first_name type string {} + field last_name type string {} +} +field my_person type person { + indexing: summary +} +``` + Restrictions: +- Struct fields can **not** be searched in indexed search mode (but [array of struct](#array) and [map type](#map) are searchable, with some restrictions). +- Struct fields can be fed, retrieved, and used in document summaries. + +| Index | Only supported in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). Set this on the top-level field to make all parts searchable. | +| Attribute | Not supported. | +| Summary | Added as a struct. | + + | +| tensor(dimension-1,...,dimension-N) | + +Use to create a tensor field with the given [tensor type spec](/en/reference/ranking/tensor#tensor-type-spec) that can be used for [ranking](/en/basics/ranking) and [nearest neighbor search](/en/querying/nearest-neighbor-search). A tensor field is otherwise not searchable. + +See [tensor evaluation reference](/en/reference/ranking/tensor) for definition, the [tensor user guide](/en/ranking/tensor-user-guide) and the [JSON feed format](/en/reference/schemas/document-json-format#tensor). + +``` +field tensorfield type tensor<float>(x{},y{}) { + indexing: attribute | summary +} + +field tensorfield type tensor<float>(x[2],y[2]) { + indexing: attribute | summary +} +``` + +| Index | Supported for tensor types with: +- One indexed dimension - single vector per document +- One or more mapped dimensions and one indexed dimension - multiple vectors per document + + See [approximate nearest neighbor search](/en/querying/approximate-nn-hnsw). | +| Attribute | Added as-is in an attribute to be used for ranking and nearest neighbor search. | +| Summary | Added as-is. | + + | +| uri | + +Use for URL type matching. URI fields are not supported in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). + +| Index | + +The URL is split into its different components, which are indexed separately. Note that only URLs can be indexed this way, not other URIs. The different components are as defined by the HTTP standard: Scheme, hostname, port, path, query, and fragment. Example: + +``` +http://mysite.mydomain.com:8080/path/shop?d=hab&id=1804905709&cat=100#frag1 +``` + +| scheme | http | +| hostname | mysite.mydomain.com (indexed as "mysite", "mydomain" and "com") | +| port | 8080 (note that port numbers 80 and 443 are not indexed, as they are the normal port numbers) | +| path | /path/shop (indexed as "path" and "shop") | +| query | d=hab&id=1804905709&cat=100 (indexed as "d", "hab", "id", "1804905709", "cat" and "100") | +| fragment | frag1 | + + The syntax for searching these different components is: +``` +[field-name].[component-name]:term +``` + Example: In a URI field `sourceurl`, search for documents from slashdot: +``` +query=sourceurl.hostname:slashdot +``` + +URL hostnames also support _anchored searching_, see [search in URL fields](/en/reference/querying/yql#uri). + +It is not possible to index uri-typed fields into a common index, i.e., it has to be indexed separately from other fields. If you need to combine URLs with other fields, you could store it in a string-field instead, but then you can not search in the different parts of the URL (scheme, hostname, port, path, query, and fragment). + +**Aliasing** also works differently for URL fields - you are allowed to create aliases both to the index (as usual) and to the components of it. Use + +``` +alias [component]: [alias] +``` + to create an alias for a component. For example, given this field: +``` +field surl type uri { + indexing: summary | index + alias: url + alias hostname: site +} +``` + +a search in "surl" and "url" will search in the entire url, while "surl.hostname" or "site" will search the hostname. + + | +| Attribute | Not allowed | +| Summary | Added as-is as a string | + + | +| weightedset\<element-type\> | + +Use to create a multivalue field of the element type, where each element is assigned a signed 32-bit integer weight. + +``` +field tag type weightedset<string> { + indexing: attribute | summary +} +``` + +The element type can be one of the following primitive types: byte, int, long, and string. + +To access a weighted set in ranking when using `attribute`, see [attribute the match features](/en/reference/ranking/rank-features#attribute-match-features-not-normalized), or convert the weighted set to a tensor using the tensorFromWeightedSet(field, dimensionName) feature. + +To access a weighted set in ranking when using `index`, see [ranking features for indexed multivalued fields](/en/reference/ranking/rank-features#features-for-indexed-multivalue-string-fields). Note that when using `index` with weightedset, queries are matching across elements in the set. + +It is possible to specify that a new key should be created if it does not exist before the update, and that it should be removed if the weight is set to zero - see the [reference](#weightedset-properties). + +The weightedset field does not support filtering on weight. If you need that use the [map](#map) type and [sameElement](/en/reference/querying/yql#sameelement) query operator - see [this example](/en/querying/query-language#map). + +| Index | Each token present in the field is indexed separately. Information indexed includes element number, element weight, and a list of token occurrence positions for each element in which the token is present | +| Attribute | Added as a multivalue weighted attribute | +| Summary | Added as a multivalue summary field if this is an attribute | + + | + +The body of a field is optional for [schema](#schema), [document](#document) and [struct](#struct). It may contain the following elements: + +| Name | Occurrence | Description | +| :--- | :--- | :--- | +| alias | Zero to many | Make an index or attribute available in queries under an additional name. This has minimal performance impact and can safely be added to running applications. Example: <br/><CodeBlock> field artist type string \{ alias: artist_name }</CodeBlock> `alias` works for primitive types, i.e, types you can set `indexing` on like `string`. Composite types are not supported, the below will throw an error when deploying: <br/><CodeBlock> document ppl \{ struct person \{ field first_name type string {} field last_name type string {} } field p type array\<person> \{ struct-field first_name \{ indexing: attribute } struct-field last_name \{ indexing: attribute } alias: members } } </CodeBlock> | +| [attribute](#attribute) | Zero to many | Specify an attribute setting. | +| [bolding](#bolding) | Zero to one | Specifies whether the content of this field should be bolded. Only supported for [index](#indexing-index) fields of type string or array\<string>. | +| [id](#id) | Zero to one | Explicitly decide the numerical id of this field. Is normally not necessary, but can be used to save some disk space. | +| [index](#index) | Zero to many | Specify a parameter of an index. | +| [indexing](#indexing) | Zero to one | The indexing statements used to create index structure additions from this field. | +| [match](#match) | Zero to one | Set the matching type to use for this field. | +| [normalizing](#normalizing) | Zero or one | Specifies the kind of text normalizing to do on a string field. | +| [query-command](#query-command) | Zero to many | Specifies a command which can be received by a plugin searcher in the Search Container. | +| [rank](#rank) | Zero or one | Specify if the field is used for ranking. | +| [rank-type](#rank-type) | Zero to one | Selects the set of low-level rank settings to be used for this field when using default `nativeRank`. | +| [sorting](#sorting) | Zero or one | The sort specification for this field. | +| [stemming](#stemming) | Zero or one | Specifies stemming options to use for this field. | +| [struct-field](#struct-field) | Zero to many | A subfield of a field of type struct. The struct must have been defined to contain this subfield in the struct definition. If you want the subfield to be handled differently from the rest of the struct, you may specify it within the body of the struct-field. | +| [summary](#summary) | Zero to many | Sets a summary setting of this field, set to `dynamic` to make a dynamic summary. | +| [summary-to](#summary-to) | Zero to one | <Warning>**Deprecated:** Use [document-summary](#document-summary) instead.</Warning> The list of document summary names this should be included in. | +| [weight](#weight) | Zero to one | The importance of a field when searching multiple fields and using `nativeRank`. | +| [weightedset](#weightedset-properties) | Zero to one | Properties of a weightedset [weightedset\<element-type>](#weightedset) | + +Fields can not have default values. See the [document guide](/en/schemas/documents#fields) for how to auto-set field values. + +It is not possible to query for fields without value (i.e. query for NULL) - see the [query language reference](/en/reference/querying/yql). Fields without value are not returned in [query results](/en/reference/querying/default-result-format). + +Fields can be declared outside the document block in the schema. These fields are not part of the document type but behave like regular fields for queries. Since they are not part of the document, they cannot be written directly, but instead take their values from document fields, using the `input` expression: `indexing: input my_document_field | embed | summary | index` + +This is useful e.g., to index a field in multiple ways, or to change the field value, something which is not allowed with document fields. When the document field(s) used as input are updated, these fields are updated with them. + +## struct-field + +Contained in [field](#field) or [struct-field](#struct-field). Defines how this struct field (a subfield of a struct) should be stored, indexed, searched, presented, and how it should influence ranking. The field in which this struct field is contained must be of type struct or a collection of type struct: + +```bash +struct-field [name] { + [body] +} +``` + +The body of a struct field is optional and may contain the following elements: + +| Name | Occurrence | Description | +| :--- | :--- | :--- | +| [indexing](#indexing) | Zero to one | The indexing statements used to create index structure additions from this field. For indexed search only `attribute` is supported, which makes the struct field a searchable in-memory attribute that can also be used for e.g. grouping and ranking. For [streaming search](/en/performance/streaming-search)`index` and `summary` are supported in addition. | +| [attribute](#attribute) | Zero to many | Specifies an attribute setting. For example `attribute:fast-search`. | +| [rank](#rank) | Zero to one | Specifies [rank](#rank) settings | +| [match](#match) | Zero to one | Specifies [match](#match) settings | + +If this struct field is of type struct (i.e., a nested struct), only [indexing:summary](#indexing) may be specified. See [array\<type\>](#array) for example use. + +## fieldset + +Contained in [schema](#schema). + +<Info> + **Note:** this is not related to the [Document fieldset](/en/schemas/documents#fieldsets). Also see the [FAQ](/en/learn/faq#must-all-fields-in-a-fieldset-have-compatible-type-and-matching-settings) for a discussion of what happens when using different types/match settings. +</Info> + +A fieldset groups fields together for searching: + +```js +fieldset myfieldset { + fields: a,b,c +} +``` + +Create a fieldset named `default` to be used as the default (i.e., when not specified in the query): + +```js +fieldset default { + fields: a,b,c +} +``` + +See [example queries](/en/querying/query-api#fieldset). + +The fields in the fieldset should be as similar as possible in terms of indexing clause and [match mode](#match). If they are not, test the application thoroughly. Having different match modes for the fields in the fieldset generates a warning during application deployment. If specific match settings for the fieldset are needed, such as _exact_, specify it using _match_: + +```js +fieldset myfieldset { + fields: a,b,c + match { + exact + } +} +``` + +Use [query-commands](#query-command) in the field set to set search settings. Example: + +```js +fieldset myfieldset { + fields: a,b,c + query-command:"exact @@" +} +``` + +Adding a fieldset will not create extra index structures in memory / on disk; it is just a mapping. + +Note that document frequency is aggregated across all member fields when matching a fieldset, which affects [BM25](/en/ranking/bm25) and significance values, and can cause [weakAnd](/en/ranking/wand) to prune matches for common terms like _"the"_ when they are frequent in one member field but rare in another. See the [FAQ](/en/learn/faq#why-can-common-words-like-the-hurt-recall-and-collapse-significance-across-a-fieldset) for details and the per-field workaround. + +## compression + +<Warning> + **Deprecated:** see [deprecations](/en/reference/release-notes/vespa8#compression). +</Warning> + +Contained in [document](#document). If a compression level is set within this element, **lz4** compression is enabled for whole documents. + +```js +compression { + [body] +} +``` +The body of a compression specification is optional and may contain: + +| Name | Occurrence | Description | +| :--- | :--- | :--- | +| type | Zero to one | **LZ4** is the only valid compression method.| +| level | Zero to one | Enable compression. LZ4 is linear and 9 means HC(high compression).| + +| threshold | Zero to one | A percentage (multiplied by 100) giving the maximum size that compressed data can have to keep the compressed value. If the resulting compressed data is higher than this, the document will be stored uncompressed. The default value is 95.| + + +## rank-profile + +Contained in [schema](#schema) or equivalently in separate files in the [application package](/en/reference/applications/application-packages), named `[profile-name].profile` in any directory below `schemas/[schema-name]/`. A [rank profile](/en/basics/ranking#rank-profiles) is a named set of ranking expression functions and settings which can be [selected in the query](/en/reference/api/query#ranking.profile). + +Whether defined inline in the schema or in a separate .profile file, the syntax of a rank profile is + +```bash +rank-profile [name] inherits [rank-profile1], [rank-profile2], ... { + [body] +} +``` + +The `inherits` list is optional and may contain the name of other rank profiles in this schema or one it inherits. Elements not defined in this rank profile will then be inherited from those profiles. Inheriting multiple profiles that define the same elements leads to an error at deployment. + +The body of a rank-profile may contain: + +| Name | Occurrence | Description | +| :--- | :--- | :--- | +| [diversity](#diversity) | Zero or one | Specification of required diversity between the different phases. | +| [strict](#strict) | Zero or one | true/false: Whether to use strict or loose type checking. | +| [match-phase](#match-phase) | Zero or one | Ranking configuration to be used for hit limitation during matching. | +| [first-phase](#firstphase-rank) | Zero or one | The ranking config to be used for first-phase ranking. | +| [second-phase](#secondphase-rank) | Zero or one | The ranking config to be used for second-phase ranking. | +| [global-phase](#globalphase-rank) | Zero or one | The ranking config to be used for global-phase ranking. | +| [function \[name\]](#function-rank) | Zero or more | Defines a named function that can be referenced during ranking phase(s) and (if without arguments) as part of match-and summary-features. | +| [inputs](#inputs) | Zero or many | List of query features used in ranking expressions. | +| [constants](#constants) | Zero or many | List of constant features available in ranking expressions. | +| [mutate](#mutate) | Zero or many | Specification of mutations you can apply after different phases of a query. | +| [onnx-model](#onnx-model) | Zero or many | An onnx model to make available in this profile. | +| [significance](#significance) | Zero or one | To enable the use of significance models defined in the service.xml config. | +| [rank-properties](#rank-properties) | Zero or one | List of any rank property key-values to be used by rank features. | +| [match-features](#match-features) | Zero or more | The [rank features](/en/reference/ranking/rank-features) to be returned with each hit, computed in the *match* phase. | +| [summary-features](#summary-features) | Zero or more | The [rank features](/en/reference/ranking/rank-features) to be returned with each hit, computed in the *fill* phase. | +| [rank-features](#rank-features) | Zero or more | The [rank features](/en/reference/ranking/rank-features) to be dumped when using the query-argument [rankfeatures](/en/reference/api/query#ranking.listfeatures). | +| ignore-default-rank-features | Zero or one | Do not dump the default set of rank features, only those explicitly specified with the [rank-features](#rank-features) command. | +| num-threads-per-search | Zero or one | Overrides the global [persearch](/en/reference/applications/services/content#requestthreads-persearch) threads to a **lower** value. | +| min-hits-per-thread | Zero or one | After estimating the number of hits for a query prior to query evaluation, this number is used to decide how many threads to use for the query.<br/> `num_treads = min([num-threads-per-search](#num-threads-per-search), estimated_hits / min-hits-per-thread)` <br/>The current default is 1. If you suspect the fixed cost per thread is too high, increasing this number might be a good idea. Especially if most of your queries are cheap, but you have increased the [num-threads-per-search](#num-threads-per-search) in order to reduce latency for your costly queries covering a lot of documents. The default might change, or the optimal value might be adaptive rendering overrides ignored or counterproductive. | +| num-search-partitions | Zero or one | The number of logical partitions in which the corpus is divided on a search node. By default, this is the same as [num-threads-per-search](#num-threads-per-search). A partition is the smallest unit a search thread will handle. If you have a locality in time when searching and feeding documents, you might want to split it into more, smaller partitions. That way, you avoid that one costly partition leaves some threads idle while others are working hard. <br/> If you have 8 threads per search, you might have 10x as many partitions at 80 reducing max skew with a similar factor. Note that a value of zero turns on adaptive partitioning which tries to solve this optimally. <Info> **Note:** If `num-search-partitions` is set to 0 (work sharing is enabled), make sure `termwise-limit` is set to 1.0 (termwise evaluation is disabled). This is to avoid redoing termwise evaluation when work is passed from one thread to another.</Info> | +| termwise-limit | Zero or one | If estimated number of hits > corpus \* termwise-limit, it will prune candidates with a CPU cache-friendly [TAAT](/en/performance/feature-tuning#hybrid-taat-daat) with the terms not needed for ranking, prior to doing [DAAT](/en/performance/feature-tuning#hybrid-taat-daat). Current default is 1.0 which turns it off. A value between 0.05 and 0.20 can be a good starting point. This is particularly useful if you have many weak filters. Note that this is a manual override. The default might change, or the optimal value might be adaptive rendering overrides ignored or counterproductive. | +| post-filter-threshold | Zero or one | Threshold value (in the range \[0.0, 1.0\]) deciding if a query with an approximate [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) operator combined with filters is evaluated using post-filtering instead of the default filtering. Post-filtering is chosen when the estimated filter hit ratio of the query is *larger* than this threshold. The default value is 1.0, which disables post-filtering. See [Controlling the filtering behavior with approximate nearest neighbor search](https://blog.vespa.ai/constrained-approximate-nearest-neighbor-search/#controlling-the-filtering-behavior-with-approximate-nearest-neighbor-search) for more details. <br/><br/> With post-filtering the [totalTargetHits](/en/reference/querying/yql#totaltargethits) value used when searching the HNSW index is auto-adjusted in an effort to expose the node's share of *totalTargetHits* hits to first-phase ranking after post-filtering has been applied. The following formula is used: <CodeBlock>adjustedTargetHits = min(targetHits / estimatedFilterHitRatio, targetHits \* targetHitsMaxAdjustmentFactor)</CodeBlock>. Use [target-hits-max-adjustment-factor](#target-hits-max-adjustment-factor) to control the upper bound of the adjusted *targetHits*. This parameter has no effect in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). | +| approximate-threshold | Zero or one | Threshold value (in the range \[0.0, 1.0\]) deciding if a query with an approximate [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) operator combined with filters is evaluated by searching the [HNSW](#index-hnsw) graph for approximate neighbors with filtering, or performing an [exact nearest neighbor search](/en/querying/nearest-neighbor-search) with pre-filtering. The fallback to exact search is chosen when the estimated filter hit ratio of the query is *less* than this threshold. The default value is 0.02. See [Controlling the filtering behavior with approximate nearest neighbor search](https://blog.vespa.ai/constrained-approximate-nearest-neighbor-search/#controlling-the-filtering-behavior-with-approximate-nearest-neighbor-search) for more details. This parameter has no effect in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). | +| filter-first-threshold | Zero or one | Threshold value (in the range \[0.0, 1.0\]) deciding if the filter is checked before computing a distance (*filter-first heuristic*) while searching the [HNSW](#index-hnsw) graph for approximate neighbors with filtering. This improves the response time at low hit ratios but causes a dip in recall. The heuristic is used when the estimated filter hit ratio of the query is *less* than this threshold. The default value is 0.2. This parameter has no effect in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). | +| filter-first-exploration | Zero or one | Value (in the range \[0.0, 1.0\]) specifying how aggressively the filter-first heuristic explores the graph when searching the [HNSW](#index-hnsw) graph for approximate neighbors with filtering. A higher value means that the graph is explored more aggressively and improves the recall at the cost of the response time. The default value is 0.01. This parameter has no effect in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). | +| exploration-slack | Zero or one | Value (in the range \[0.0, 1.0\]) specifying slack to delay the termination of the search of the [HNSW](#index-hnsw) graph for approximate neighbors with or without filtering. A higher value means that more of the graph is explored and improves the recall at the cost of the response time. The default value is 0.0. This parameter has no effect in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). | +| target-hits-max-adjustment-factor | Zero or one | Value (in the range \[1.0, inf\]) used to control the auto-adjustment of [totalTargetHits](/en/reference/querying/yql#totaltargethits) used when evaluating an approximate [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) operator with post-filtering. The default value is 20.0. Setting this value to 1.0 disables auto-adjustment of *targetHits*. See [post-filter-threshold](#post-filter-threshold) for more details. This parameter has no effect in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). | +| filter-threshold | Zero or one | The threshold value (in the range \[0.0, 1.0\]) deciding when matching in *index* fields should be treated as filters. This happens for query terms with [estimated hit ratios](/en/learn/glossary#estimated-hit-ratio) (in the range \[0.0, 1.0\]) that are above the *filter-threshold*. Use this to optimize query performance when searching large text [index](/en/basics/schemas#document-fields) fields, by allowing a per query combination of [rank: filter](#filter) and [rank: normal](#normal) behavior. This parameter can be overridden per *index* field, see [field-level filter-threshold](#rank-filter-threshold) for a more detailed description with tradeoffs. <br/><br/> In testing with various text datasets (e.g., Wikipedia), a *filter-threshold* setting of 0.05 has been shown to be a good starting point. See [Tuning query performance for lexical search](/en/performance/feature-tuning#tuning-query-performance-for-lexical-search) for more details. This parameter has no effect in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). Use the [ranking.matching.filterThreshold](/en/reference/api/query#ranking.matching.filterThreshold) query parameter to override this value. | +| [rank](#rank) | Zero or more | Specify rank settings of a field in this profile. | +| [rank-type](#rank-type) | Zero or more | The rank-type of a field in this profile. | +| [weakand](#weakand) | Zero or one | Tunes the [weakAnd](/en/ranking/wand#weakand) algorithm to automatically exclude terms and documents with expected low query significance based on [document frequency](/en/learn/glossary#document-frequency-normalized) statistics present in the document corpus. This makes matching faster at the cost of potentially reduced recall. | +| [rank-profile (inner)](#rank-profile) | Zero or more | An inner rank profile, useful for grouping related profiles, especially when defined in separate .profile files. This behaves just like a top level rank profile, except that: <br/> - The full name of the profile to use in queries will be `containing-profile-name.inner-profile-name`. <br/> - The profile must explicitly inherit the containing profile. | + +## match-phase + +Contained in [rank-profile](#rank-profile). The match-phase feature lets you increase performance by limiting hits exposed to first-phase ranking to the highest (lowest) values of some attribute. The performance gain may be substantial, especially with an expensive first-phase function. The quality loss depends on how well the chosen attribute correlates with the first-phase score. + +Documents that have no value of the chosen attribute will be taken as having the value 0. + +See also [graceful degradation](/en/performance/graceful-degradation#match-phase-degradation) and [result diversity](/en/querying/result-diversity#match-phase-diversity). + +```js +match-phase { + attribute: [numeric single value attribute] + order: [ascending | descending] + total-max-hits: [integer] +} +``` + +| Name | Description | +| :--- | :--- | +| attribute | The quality attribute that decides which documents are a match if the match phase estimates that there will be more than the node's share if [total-max-hits](#match-phase-total-max-hits) hits. The attribute must be single-value numeric with [fast-search](#attribute) enabled. It should correlate with the order which would be produced by a full query evaluation. No default. | +| order | Whether the attribute should be used in `descending` order (prefer documents with a high value) or `ascending` order (prefer documents with a low value). Usually, it is not necessary to specify this, as the default value `descending` is by far the most common. | +| total-max-hits | The total max hits that should be produced in the match phase across all nodes in the group evaluating the query. This number should be large, and larger the worse the correlation between the match-phase attribute and the first-phase function. | +| max-hits | The max hits each content node should attempt to produce in the match phase. Prefer using [total-max-hits](#match-phase-total-max-hits) over this. | + +## strict + +Contained in [rank-profile](#rank-profile). True or false. By default, Vespa uses loose type checking, where any query feature used but not defined in a query profile type is assumed to be a float. Set true to cause a deploy failure on missing query property type definitions instead. + +```bash +strict: true +``` + +## diversity + +Contained in [rank-profile](#rank-profile). Diversity is used to guarantee diversity in the different query phases. If you have [match-phase](#match-phase), it will provide diverse results from match-phase to first-phase. If you have [second-phase](#secondphase-rank), it will provide diverse results from first-phase to second-phase. + +Read more about this in [result diversity](/en/querying/result-diversity). + +Specify the name of an attribute that will be used to provide diversity. Result sets are guaranteed to get at least [min-groups](#diversity-min-groups) unique values from the [diversity attribute](#diversity-min-groups) from this phase, but no more than max-hits. For [match-phase](#match-phase) max-hits = the node's share of [match-phase total-max-hits](#match-phase-max-hits). For [second-phase](#secondphase-rank) max-hits = the node's share of [total-rerank-count](#secondphase-total-rerank-count). A document is considered a candidate if: + +- The query has not yet reached the _max-hits_ number produced from this phase. +- The query has not yet reached the max number of candidates in one group. This is computed by the _max-hits_ of the phase divided by [min-groups](#diversity-min-groups) + +```js +diversity { + attribute: [attribute name] + min-groups: [integer] +} +``` + +| Name | Description | +| :--- | :--- | +| attribute | Which attribute to use when deciding diversity. The attribute must be a single-valued numeric, string or [reference](#reference) type.| +| min-groups | Specifies the minimum number of groups returned from the phase. Using this with [match-phase](#match-phase) often means one can reduce [total-max-hits](#match-phase-total-max-hits). In [second-phase](#secondphase-rank) you might reduce [total-rerank-count](#secondphase-total-rerank-count) and still get good and diverse results.| + +## first-phase + +Contained in [rank-profile](#rank-profile). The config specifying the first phase of ranking. See [phased ranking with Vespa](/en/ranking/phased-ranking). This is the initial ranking performed on all matching documents; you should therefore avoid doing computationally expensive relevancy calculations here. By default, this will use the ranking feature `nativeRank`. + +```js +first-phase { + [body] +} +``` +The body of a first-phase ranking statement consists of: + +| Name | Description | +| --- | --- | +| [expression](#expression) | Specify the ranking expression to be used for the first phase of ranking - see [ranking expressions](/en/reference/ranking/ranking-expressions).| +| total-keep-rank-count | How many documents to keep the first phase top rank values for in total over the nodes evaluating the query. The default value is 10000 per node.| +| keep-rank-count | How many documents to keep the first phase top rank values for per node. Prefer [total-keep-rank-count](#total-keep-rank-count) over this.| +| rank-score-drop-limit | Drop all hits with a first-phase rank score less than or equal to this floating-point number. Use this to implement a rank cutoff. Default is `-Double.MAX_VALUE`.| + +## expression + +Contained in [first-phase](#firstphase-rank) or [second-phase](#secondphase-rank) or [global-phase](#globalphase-rank). Specify a [ranking expression](/en/reference/ranking/ranking-expressions). The expression can either be written directly or loaded from a file. When writing it directly, the syntax is: + +```bash +expression: [ranking expression] +``` +or +```js +expression { + [ranking expression] + [ranking expression] + [ranking expression] +} +``` + +The second format is primarily a convenience feature when using long expressions, enabling them to be split over multiple lines. + +Expressions can also be loaded from a separate file. This is useful when dealing with the long expressions generated by e.g. MLR. The syntax is: + +```js +expression: file:[path-to-expressionfile] +``` + +The path is relative to the location of the schema definition file. The file itself must end with `.expression`. This suffix is optional in the schema. Therefore `expression: file:mlrranking.expression` and `expression: file:mlrranking` are identical. Both refer to a file called `mlrranking.expression` in the _schemas_ directory. + +<Info> + **Note:** Directories are not allowed in the path. +</Info> + +## rank-features + +Contained in [rank-profile](#rank-profile). List of extra [rank features](/en/reference/ranking/rank-features) to be dumped when using the query-argument [rankfeatures](/en/reference/api/query#ranking.listfeatures). + +```bash +rank-features: [feature] [feature] +``` +or +```js +rank-features { + [feature] + [feature] +} +``` + +Any number of ranking features can be listed on each line, separated by space. + +## inputs + +Contained in [rank-profile](#rank-profile). List of inputs: Query features consumed by ranking expressions in this profile. + +Query features are set either as a [request property](/en/reference/api/query#ranking.features), or equivalently from a [Searcher](/en/applications/searchers), by calling `query.getRanking().getFeatures().put("query(myInput)", myValue)`. + +Query feature types can also be declared in [query profile types](/en/querying/query-profiles#query-profile-types), but declaring inputs in the profile needing them is usually preferable. + +Inputs are inherited from inherited profiles. + +```js +inputs { + name [type]? (: value)? +} +``` + +| Name | Description | +| :--- | :--- | +| name | The name of the inputs, written either the full feature name `query(myName)`, or just as `name`. | +| type | The type of the constant, either `double` or a [tensor type](/en/reference/ranking/tensor#tensor-type-spec). If omitted, the type is double. | +| value | An optional default module, used if this input is not set in the query. A number, or a [tensor on literal form](/en/reference/ranking/tensor#tensor-literal-form). | + +Input examples: + +```bash +inputs { + myDouble: 0.5 + query(myOtherDouble) double + query(myArray) tensor(x[3]) + query(myMap) tensor(key{}):{key1: 1.0, key2: 2.0} +} +``` + +## constants + +Contained in [rank-profile](#rank-profile). List of constants available in ranking expressions, resolved and optimized at configuration time. + +Constants are inherited from inherited profiles, and from the schema itself. + +```js +constants { + name [type]?: value|file:[path] +} +``` + +| Name | Description | +| :--- | :--- | +| name | The name of the constant, written either the full feature name `constant(myName)`, or just as `name`. | +| type | The type of the constant, either `double` or a [tensor type](/en/reference/ranking/tensor#tensor-type-spec). If omitted, the type is double. | +| value | A number, a [tensor on literal form](/en/reference/ranking/tensor#tensor-literal-form), or `file:` followed by a path from the application package root to a file containing the constant. The file must be stored in a valid [tensor JSON Format](/en/reference/ranking/constant-tensor-json-format) and end with `.json`. The file may be lz4 compressed, in which case the ending must be `.json.lz4`. | + +Constant examples: + +```js +constants { + myDouble: 0.5 + constant(myOtherDouble) double: 0.6 + constant(myArray) tensor(x[3]):[1, 2, 3] + constant(myMap) tensor(key{}]):{key1: 1.0, key2: 2.0} + constant(myLargeTensor) tensor(x[10000]): file:constants/myTensor.json.lz4 +} +``` + +## rank-properties + +Contained in [rank-profile](#rank-profile). List of generic properties, in the form of key/value pairs to be used by ranking features. [Examples](/en/reference/ranking/rank-feature-configuration). + +```js +rank-properties { + key: value +} +``` + +| Name | Description | +| :--- | :--- | +| key | Name of the property. | +| value | A number or any string. Must be quoted if it contains spacing. | + +## function (inline)? [name] + +Contained in [rank-profile](#rank-profile). Define a named function that can be referenced as a part of the ranking expression, or (if having no arguments) as a feature. A function accepts any number of arguments. + +```js +function [name]([arg1], [arg2], [arg3]) { + expression: … +} +``` +or +```js +function [name] ([arg1], [arg2], [arg3]) { + expression { + [ranking expression] + [ranking expression] + … +} +``` +Note that the parenthesis is required after the name. A rank-profile example is shown below: +```js +rank-profile default inherits default { + function myfeature() { + expression: fieldMatch(title) + freshness(timestamp) + } + function otherfeature(foo) { + expression{ nativeRank(foo, body) } + } + + first-phase { + expression: myfeature * 10 + } + second-phase { + expression: otherfeature(title) * myfeature + } + summary-features: myfeature +} +``` + +You can not include functions that accept arguments in summary features. + +Adding the `inline` modifier will inline this function in the calling expression if it also has no arguments. This is faster for small and cheap functions (and more expensive for others). + +## second-phase + +Contained in [rank-profile](#rank-profile). The config specifying the second phase of ranking. See [phased ranking with Vespa](/en/ranking/phased-ranking). This is the optional re-ranking phase performed on the top-ranking hits from the `first-phase`, and where you should put any advanced relevancy calculations. For example Machine Learned Ranking (MLR) models. By default, no second-phase ranking is performed. + +```js +second-phase { + [body] +} +``` +The body of a secondphase-ranking statement consists of: + +| Name | Description | +| --- | --- | +| [expression](#expression) | Specify the ranking expression to be used for the second phase of ranking. (for a description, see the [ranking expression](/en/reference/ranking/ranking-expressions) documentation. Hits not reranked might be re-scored using a linear function to avoid a greater rank score than the worst reranked hit. This linear function will normally attempt to map the first phase rank score range of reranked hits to the reranked rank score range | +| rank-score-drop-limit | When set, drop all hits with a second phase rank score (possibly a [re-scored](#secondphase-rescoring) rank score) less than or equal to this floating point number. Use this to implement a second-phase rank cutoff. By default, this value is not set. This can also be [set in the query](/en/reference/api/query#ranking.secondphase.rankscoredroplimit). | +| total-rerank-count | Optional argument. Specifies the number of hits to be re-ranked in the second phase in total over the content nodes that participate in evaluating a query (a *group*). The default value is 100 per node. This can also be [set in the query](/en/reference/api/query#ranking.secondphase.totalrerankcount). Hits not reranked might be [re-scored](#secondphase-rescoring). | +| rerank-count | Optional argument. Specifies the number of hits to be re-ranked in the second phase on each content node. This can also be [set in the query](/en/reference/api/query#ranking.secondphase.rerankcount). Prefer using [total-rerank-count](#secondphase-total-rerank-count) over this. | + +## global-phase + +Contained in [rank-profile](#rank-profile). The config specifying the global phase of ranking. See [phased ranking with Vespa](/en/ranking/phased-ranking). This is an optional re-ranking phase performed on the top-ranking hits in the stateless container after merging hits from all the content nodes. The "top ranking" here means as scored by the first-phase ranking expression or (if specified) second-phase ranking expression. Typically used for computing large ONNX models, which would be expensive to compute on all content nodes. By default, no global-phase ranking is performed. + +```js +global-phase { + [body] +} +``` +The body of a global-phase ranking statement consists of: + +| Name | Description | +| :--- | :--- | +| [expression](#expression) | Specify the ranking expression to be used for the global phase of ranking. (for a description, see the [ranking expression](/en/reference/ranking/ranking-expressions) documentation. | +| rerank-count | Optional argument. Specifies the number of hits to be re-ranked in the global phase. The default value is 100. Note for complex setups: Applied to hits from one schema at a time, so if a query searches in multiple schemas simultaneously, global-phase may run for 100 hits per schema as default. | +| rank-score-drop-limit | When set, drop all hits with a global phase rank score (possibly a [re-scored](#globalphase-rank) rank score) less than or equal to this floating point number. Use this to implement a global phase rank cutoff. By default, this value is not set. This can also be [set in the query](/en/reference/api/query#ranking.globalphase.rankscoredroplimit). | + +## summary-features + +Contained in [rank-profile](#rank-profile). List of [rank features](/en/reference/ranking/rank-features) to be included with each result hit, in the [summaryfeatures](/en/reference/querying/default-result-format#summaryfeatures) field. Also see [feature values in results](/en/ranking/ranking-expressions-features#accessing-feature-function-values-in-results). + +If not specified, the features are as specified in the parent profile (if any). To inherit the features from the parent profile _and_ specify additional features, specify explicitly that the features should be inherited from the parent, as shown below. Refer to [schema inheritance](/en/schemas/inheritance-in-schemas) for examples. + +The rank features specified here are computed in the [fill phase](/en/applications/searchers#multiphase-searching) of multiphased queries. + +<Info> + **Note:** Rank-features references in _summary-features_ are **re-calculated** during the _fill protocol phase_ for the hits which made it into the global top ranking hits (from all nodes). See [match-features](#match-features) for an alternative. +</Info> + +```bash +summary-features: [feature] [feature]… +``` + +or + +```bash +summary-features [inherits parent-profile (, other-parent-profile)*]? { + [feature] + [feature] +} +``` + +Any number of rank features separated by space can be listed on each line. + +<Info> + **Note:** Note that compound expressions must be wrapped in a [function](/en/ranking/ranking-expressions-features#accessing-feature-function-values-in-results) to be accessible by match/summary-features. +</Info> + +## match-features + +Contained in [rank-profile](#rank-profile). List of [rank features](/en/reference/ranking/rank-features) to be included with each result hit, in the [matchfeatures](/en/reference/querying/default-result-format#matchfeatures) field. Also see [feature values in results](/en/ranking/ranking-expressions-features#accessing-feature-function-values-in-results). + +If not specified, the features are as specified in the parent profile (if any). To inherit the features from the parent profile _and_ specify additional features, specify explicitly that the features should be inherited from the parent as shown below, also see [schema inheritance](/en/schemas/inheritance-in-schemas). + +To disable match-features from parent rank profiles, use `match-features {}`. + +_match-features_ is similar to [summary-features](#summary-features), but the rank features specified here are computed in the _first protocol phase_ of [multiprotocol query execution](/en/applications/searchers#multiphase-searching), also called the _match_ protocol phase. This gives a different performance trade-off, for details, see [feature values in results](/en/ranking/ranking-expressions-features#accessing-feature-function-values-in-results). + +```bash +match-features: [feature] [feature]… +``` + +or + +```bash +match-features [inherits parent-profile (, other-parent-profile)*]? { + [feature] + [feature] +} +``` + +Any number of ranking features separated by space can be listed on each line. + +<Info> + **Note:** Note that compound expressions must be wrapped in a [function](/en/ranking/ranking-expressions-features#accessing-feature-function-values-in-results) to be accessible by match/summary-features. +</Info> + +## mutate + +Contained in [rank-profile](#rank-profile). Specifies mutating operations you can do to each of the documents that make it through the 4 query phases, _on-match_, _on-first-phase_, _on-second-phase_ and _on-summary_. + +```js +mutate { + [phase name] { [attribute name] [operation] [numeric_value] } +} +``` +The phases are: + +| Name | Description | +| :--- | :--- | +| on-match | All documents that satisfy the query. | +| on-first-phase | All documents from [on-match](#on-match), and is not dropped due the optional [rank-score-drop-limit](#rank-score-drop-limit) | +| on-second-phase | All documents from [on-first-phase](#on-first-phase) that makes it onto the [second-phase](#secondphase-rank) heap. | +| on-summary | All documents where are a summary is requested. | + +The attribute must be a single value numeric attribute, enabled as [mutable](#mutable). It must also be defined outside the [document](#document) clause. + +| Operation | Description | +| :--- | :--- | +| \= | Set the value of the attribute to the given value. | +| += | Add the given value to the attribute | +| \-= | Subtract the given value from the attribute | + + +Find examples and use cases in [rank phase statistics](/en/ranking/phased-ranking#rank-phase-statistics). + +## constant + +_Prefer to define constants in the rank profiles that need them, with rank profile inheritance to avoid repetition. See [constants](#constants)._ + +Contained in [schema](#schema). This defines a named constant tensor located in a file with a given type that can be used in ranking expressions using the rank feature [constant(name)](/en/reference/ranking/rank-features#constant(name)): + +```bash +constant [name] { + [body] +} +``` +The body of a constant must contain: + +| Name | Description | Occurrence | +| :--- | :--- | :--- | +| file | Path to the file containing this constant, relative to the application package root. The file must be stored in a valid [tensor JSON Format](/en/reference/ranking/constant-tensor-json-format) and end with `.json`. The file may be lz4 compressed, in which case the ending must be `.json.lz4`. | One | +| type | The type of the constant tensor, refer to [tensor-type-spec](/en/reference/ranking/tensor#tensor-type-spec) for reference. | One | + +Constant tensor example: + +```js +constant my_constant_tensor { + file: constants/my_constant_tensor_file.json + type: tensor<float>(x{},y{}) +} +``` +This example has a constant tensor with two mapped dimensions, `x` and `y`: +```json +{ + "cells": [ + { "address": { "x": "a", "y": "b"}, "value": 2.0 }, + { "address": { "x": "c", "y": "d"}, "value": 3.0 } + ] +} +``` + +When an application with tensor constants is deployed, the files are distributed to the content nodes before the new configuration is used by the search nodes. Incremental changes to constant tensors are not supported. When changed, replace the old file with a new one and redeploy the application, or create a new constant with a new name in a new file. + +## raw-as-base64-in-summary + +Contained in [schema](#schema). Whether raw fields should be rendered as a base64 encoded string in summary, the same way as in [json feed format](/en/reference/schemas/document-json-format#raw), rather than an escaped string. This is by default true. + +## onnx-model + +Contained in [rank-profile](#rank-profile) or [schema](#schema). This defines a named ONNX model located in a file that can be used in ranking expressions using the "onnx" rank feature. + +Prefer to define onnx models in the rank profiles using them. Onnx models are inherited from parent profiles and from the schema. + +```bash +onnx-model [name] { + [body] +} +``` + +The body of an ONNX model must contain: + +| Name | Occurrence | Description | +| :--- | :--- | :--- | +| file | One | Path to the location of the file containing the ONNX model. The path is relative to the root of the application package containing this schema. | +| input | Zero to many | An input to the ONNX model. The ONNX name, as given in the model, as well as the source for the input, is specified. | +| output | Zero to many | An output of the ONNX model. The ONNX name, as given in the model, as well as the name for use in Vespa, is specified. If no output is defined and is not referred to from the rank feature, the first output defined in the model is used. | +| gpu-device | Zero or one | Set the GPU device number to use for computation, starting at 0, i.e. if your GPU is `/dev/nvidia0` set this to 0. This must be an Nvidia CUDA-enabled GPU. Currently only models used in [global-phase](#globalphase-rank) can make use of GPU-acceleration. | +| intraop-threads | Zero or one | The number of threads available for running operations with multithreaded implementations. | +| interop-threads | Zero or one | The number of threads available for running multiple operations in parallel. This is only applicable for `parallel` execution mode. | +| execution-mode | Zero or one | Controls how the operators of a graph are executed, either `sequential` or `parallel`. | + +For more details including examples, see [ranking with ONNX models.](/en/ranking/onnx) + +## significance + +Contained in [rank-profile](#rank-profile). Configures a [significance model](/en/ranking/significance). + +```js +significance { + use-model: true +} +``` + +The body must contain: + +| name | occurrence | description | +| :--- | :--- | :--- | +| use-model | One | Enable or disable the use of significance models specified in [service.xml](/en/reference/applications/services/search#significance). | + +For more details see [Significance Model.](/en/ranking/significance) + +## document-summary + +Contained in [schema](#schema). An explicitly defined document summary. By default, a document summary named `default` is created. Using this element, other document summaries containing a different set of fields can be created. + +```bash +document-summary [name] inherits [document-summary1], [document-summary2], ... { + [body] +} +``` + +The `inherits` attribute is optional. If defined, it contains the name of other document summaries in the same schema (or a parent) which this summary should inherit the fields of. Refer to [schema inheritance](/en/schemas/inheritance-in-schemas) for examples. + +The body of a document summary consists of: + +| Name | Occurrence | Description | +| :--- | :--- | :--- | +| from-disk | Zero or one | Mark this summary as accessing fields on disk. This will silence the warnings that this summary reads from disk; in the console for prod deployments, on the command line for manual deployments. Read more in [Document Summaries](/en/querying/document-summaries#performance) on how to avoid disk access to speed up queries. | +| [summary](#summary) | Zero to many | A summary field in this document summary. | +| omit-summary-features | Zero or one | Specifies that [summary-features](#summary-features) should be omitted from this document summary. Use this to reduce CPU cost in [multiphase searching](/en/applications/searchers#multiphase-searching) when using multiple document summaries to fill hits, and only some of them need the summary features that are specified in the [rank-profile](#rank-profile). | + +Use the [summary](/en/reference/api/query#presentation.summary) query parameter to choose a document summary in searches or in [grouping](/en/reference/querying/grouping-language#summary). See also [document summaries](/en/querying/document-summaries). + +## documentid + +Available since `Vespa 8.691.19` . + +Contained in [schema](#schema). Sets whether document IDs are stored on disk only or made an attribute by also storing them in memory. Changing this setting will only take effect on the next restart of the `searchnode` service. + +Making the document IDs an attribute allows to return [Document IDs in search results](/en/schemas/documents#docid-in-results) and to visit Document IDs without disk access, cf. [Export documents](/en/operations/data-management#export-documents). + +```bash +documentid: [setting] +``` +The settings are: + +| Setting | Description | +| :--- | :--- | +| from-disk | Store document IDs on disk only. This is the default setting. | +| attribute | Make the document IDs an attribute by also storing them in memory. | + +## stemming + +Contained in [field](#field), [schema](#schema) or [index](#index). Sets how to stem a field or an index, or how to stem by default. Typically used with [OpenNLP linguistics](/en/linguistics/linguistics-opennlp). Read more on [stemming](/en/linguistics/linguistics-opennlp#stemming). + +```bash +stemming: [stemming-type] +``` +The stemming types are: + +| Type | Description | +| --- | --- | +| none | No stemming: Keep words unchanged | +| best | Use the 'best' stem of each word according to some heuristic scoring. This is the default setting | +| shortest | Use the shortest stem of each word | +| multiple | Use multiple stems. Retains all stems returned from the linguistics library | + +<Info> + **Note:** When combining multiple fields in a [fieldset](#fieldset), all fields should use the same stemming type. +</Info> + +## normalizing + +Contained in [field](#field). Sets [normalizing](/en/linguistics/linguistics-opennlp#normalization) to be done on this field. The default is to normalize. + +```js +normalizing: [normalizing-type] +``` + +| Type | Description | +| :--- | :--- | +| none | No normalizing. | + +## dictionary + +Contained in [field](#field), and specifies details of the dictionary used in the inverted index of the field. Applies only to [attributes](#attribute) annotated with `fast-search`. You can specify either `btree` or `hash`, or both. If both are specified, btree is used for range/prefix and hash for exact lookups. + +### Dictionary Types + +**btree** (Default): Provides good performance for exact, prefix, and range lookups. Recommended for most use cases. Find more details in [attribute index structures](/en/content/attributes#index-structures). + +**hash**: Optimized for fields with high cardinality (many unique values), such as unique ID fields where each posting list contains only one item. + +<Info> + **Note:** When using `hash`, prefix searches for strings and range searches for numeric fields will fall back to a full scan. This is primarily beneficial when you have many unique terms with few occurrences each, where dictionary lookup costs would otherwise be significant. +</Info> + +### Case Handling for String Fields + +For string attributes, you can specify how character case is handled: + +- **uncased** (Default): Case-insensitive - 'bear', 'Bear', and 'BEAR' are treated as identical +- **cased**: Case-sensitive - 'bear', 'Bear', and 'BEAR' are treated as different terms + +This setting is automatically checked against the field's [match:casing](#match) setting. + +### Important Rules for String Fields with Dictionaries + +**For `btree` dictionaries**: Both `cased` and `uncased` options are supported. + +**For `hash` dictionaries**: Only `cased` is supported for string fields. When using `hash` dictionaries: + + - You **must** set `match: cased` on the field + - You **must** include `cased` in the dictionary block + +```js +dictionary { + hashcased} +``` + +### Example: Case-Sensitive Hash Dictionary + +```js +field id_str type string { + indexing: summary | attribute + attribute: fast-searchmatch: casedrank: filter + dictionary { + hashcased} +} +``` + +## attribute + +Contained in [field](#field) or [struct-field](#struct-field). Specifies a property of an index structure attribute: + +```bash +attribute [attribute-name]: [property] +``` +or +```bash +attribute [attribute-name] { + [property] + [property] + … +} +``` +Read the [introduction to attributes](/en/content/attributes). If the attribute name is specified, it will be used instead of the field name as the name of the attribute. +<Warning> + **Deprecated:** Deprecated, use a field with the wanted name outside the document instead. +</Warning> +Actions required when [adding or modifying attributes](#modifying-schemas). Properties: + +| Property | Description | +| :--- | :--- | +| fast-search | Create a dictionary/index structure to speed up search in the attribute. [Read more](/en/content/attributes#index-structures). | +| fast-access | If [searchable-copies](/en/reference/applications/services/content#searchable-copies) \< [redundancy](/en/reference/applications/services/content#redundancy), use _fast-access_ to load the attribute in memory on all nodes with a document replica. Use this for fast access when doing [partial updates](/en/writing/reads-and-writes) and when used in a [selection expression](/en/reference/applications/services/content#documents) for garbage collection. If [searchable-copies](/en/reference/applications/services/content#searchable-copies) == [redundancy](/en/reference/applications/services/content#redundancy) (default), this property is a no-op. [Read more](/en/performance/sizing-feeding#redundancy-settings). | +| fast-rank | Only supported for [tensor](/en/ranking/tensor-user-guide) field types with at least one mapped dimension. Ensures that the per-document tensors are stored in-memory using a format that is more optimal for [ranking expression](/en/reference/ranking/ranking-expressions) evaluation. This comes at the cost of using more memory. Without this setting, these tensors are serialized in-memory, which requires deserialization as part of ranking expression evaluation. See [tensor performance](/en/performance/feature-tuning#tensor-ranking). | +| paged | This can reduce the memory footprint by allowing paging the attribute data out of memory to disk. Not supported for [tensor](#tensor) with fast-rank and [predicate](#predicate) types. See [paged attributes](/en/content/attributes#paged-attributes) for details. Do not enable _paged_ before fully understanding the consequences. | +| [sorting](#sorting) | The sort specification for this attribute. | +| [distance-metric](#distance-metric) | Specifies the distance metric to use with the [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) query operator. Only relevant for tensor attribute fields. | +| mutable | Marks the attribute as a special mutable attribute that can be updated by a [mutate](#mutate) operation during query evaluation.| + +An attribute is [multivalued](/en/querying/searching-multivalue-fields) if assigning it multiple values during indexing, by using a multivalued field type like array or map, or by using e.g. [split](/en/reference/writing/indexing-language#split) / [for\_each](/en/reference/writing/indexing-language#for_each) or by letting multiple fields write their value to the attribute field. + +Note that [normalizing](#normalizing) and [tokenization](/en/linguistics/linguistics-opennlp#tokenization) is not supported for attribute fields. + +Queries in attribute fields are not normalized, nor stemmed. Use [index](#index) on fields to enable. Both _index_ and _attribute_ can be set on a field. + +## sorting + +Contained in [attribute](#attribute) or [field](#field). Specifies how sorting should be done. + +```bash +sorting : [property] +``` +or +```js +sorting { + [property] + … +} +``` + +| Property | Description | +| :--- | :--- | +| order | `ascending` (default) or `descending`. Used unless overridden using [order by](/en/reference/querying/yql#function) in query. | +| function | [Sort function](/en/reference/querying/yql#function): `uca` (default), `lowercase` or `raw`. Note that if no language or locale is specified in the query, the field, or generally for the query, `lowercase` will be used instead of `uca`. See [order by](/en/reference/querying/yql#order-by) for details. | +| strength | [UCA sort strength](/en/reference/querying/yql#strength), default `primary` - see [strength](/en/reference/querying/yql#strength) for values. Values set in the query override the schema definition. | +| locale | [UCA locale](/en/reference/querying/yql#locale), default none, indicating that it is inferred from the query. It should only be set here if the attribute is filled with data in one language only. See [locale](/en/reference/querying/yql#locale) for details. Values set in the query override the schema definition. | + +## distance-metric + +Specifies the distance metric to use with the [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) query operator to calculate the distance between document positions and the query position. Only relevant for tensor attribute fields, where each tensor holds one or multiple vectors. + +Which distance metric to use depends on the model used to produce the vectors; it must match the distance metric used during representation learning (model learning). If you are using an "off-the-shelf" model to vectorize your data, please ensure that the distance metric matches the distance metric suggested for use with the model. Different types of vectorization models use different types of distance metrics. + +<Danger> + **Important:** When changing the `distance-metric` or `max-links-per-node`, the content nodes must be restarted to rebuild the HNSW index - see [changes that require restart but not re-feed](#changes-that-require-restart-but-not-re-feed) +</Danger> + +The calculated distance will be used to select the closest hits for _nearestNeighbor_ query operator, but also to build the [HNSW](/en/querying/approximate-nn-hnsw) index (if specified) and to produce the [distance](/en/reference/ranking/rank-features#distance(dimension,name)) and [closeness](/en/reference/ranking/rank-features#closeness(dimension,name)) ranking features. + +```bash +distance-metric: [metric] +``` +These are the available metrics; the expressions given for _distance_ and _closeness_ assume a query vector _qv = [x0, x1, ...]_ and an attribute vector _av = [y0, y1, ...]_ with same dimension of size _n_ for all vectors. + +| METRIC | DESCRIPTION | DISTANCE | CLOSENESS | +| :--- | :--- | :--- | :--- | +| euclidean | The normal [euclidean](#euclidean) (aka L2) distance. | $d = \sqrt{(x_1 - y_1)^2 + (x_2 - y_2)^2 + etc + (x_n - y_n)^2}$ <br/>range: $[0, \infty)$ | $\frac{1}{1 + d}$ | +| angular | The [angle](#angular) between $q_v$ and $a_v$ vectors. | $d = \cos^{-1}((qa)/(q a))$ <br/>range: $[0, \pi]$ | $\frac{1}{1 + d}$ | +| dotproduct | Used for [maximal inner product search](#dotproduct). | $d = -(\vec{q} \cdot \vec{a})$ <br/>range: $[-\infty, +\infty]$ | $-d = \vec{q} \cdot \vec{a}$ | +| prenormalized-angular | Assumes normalized vectors, see [note](#prenormalized-angular) below. | $d = 1.0 - ((qa)/(q a))$ <br/>range: $[0,2]$ | $\frac{1}{1 + d}$ | +| geodegrees | Assumes geographical coordinates, see [note](#geodegrees) below. | $d =$ great-circle (km) <br/>range: $[0, 20015]$ | $\frac{1}{1 + d}$ | +| hamming | Only useful for binary tensors using int8 precision, see [note](#hamming) below. | $d = \sum_{i=1}^{n} \mathrm{popcount}(x_i \oplus y_i)$ <br/>range: $[0, 8n]$ | $\frac{1}{1 + d}$ | + +### euclidean + +The default metric is [Euclidean distance](https://en.wikipedia.org/wiki/Euclidean_distance) which is just the length of a line segment between the two points. To compute the Euclidean distance directly in a ranking expression instead of fetching one already computed in a nearestNeighbor query operator, use the [Euclidean\_distance helper function](/en/reference/ranking/ranking-expressions#euclidean-distance-t): + +```js +function mydistance() { + expression: euclidean_distance(attribute(myembedding), query(myqueryvector), mydim)) + } +``` + +### angular + +The _angular_ distance metric computes the _angle_ between the vectors. Its range is `[0,pi]`, which is the angular distance. This is also known as ordering by [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) where the score function is just the cosine of the angle. To compute the angular distance directly in a ranking expression, use the [cosine\_similarity helper function](/en/reference/ranking/ranking-expressions#cosine-similarity-t): + +```js +function angle() { + expression: acos(cosine_similarity(attribute(myembedding), query(myqueryvector), mydim)) + } +``` + Conversely, the cosine similarity can be recovered from the [distance rank-feature](/en/reference/ranking/rank-features#distance(dimension,name)) when using a nearestNeighbor query operator: + +```js +rank-profile cosine { + first-phase { + expression: cos(distance(field, myembedding)) + } + } +``` +If possible, it's slightly better for performance to normalize both query and document vectors to the same L2 norm and use the `prenormalized-angular` metric instead; but note that returned distance and closeness will be different. + +### dotproduct + +The _dotproduct_ distance metric is used to _mathematically transform_ a "maximum inner product" search into a form where it can be solved by nearest neighbor search, where the dotproduct is used as a score directly (large positive dotproducts are considered "nearby"). Internally, an extra dimension is added (ensuring that all vectors are normalized to the same length) and a distance similar to _prenormalized-angular_ is used to build the HNSW index. For details, see [this high level guide](https://towardsdatascience.com/maximum-inner-product-search-using-nearest-neighbor-search-algorithms-c125d24777ef) based on [section 3.1 Order Preserving Transformations in this paper](https://www.microsoft.com/en-us/research/wp-content/uploads/2016/02/XboxInnerProduct.pdf). + +Note that the _distance_ and _closeness_ rank-features will not have the usual semantic meanings when using the _dotproduct_ distance metric. In particular, _closeness_ will just return the dot product ∑n(xi\*yi) which may have any negative or positive value, and _distance_ is just the negative dot product. If a normalized closeness in range `[0,1]` is needed, an appropriate [sigmoid function](https://en.wikipedia.org/wiki/Sigmoid_function) must be applied. For example, if your attribute is named "foobar", and the maximum dotproduct seen is around 4000, the expression `sigmoid(0.001*closeness(field,foobar))` could be a possible choice. + +The _dotproduct_ distance metric is useful for some vectorization models, including matrix factorization, that use "maximum inner product" (MIP), with vectors that aren't normalized. These models use both direction and magnitude. + +### prenormalized-angular + +The _prenormalized-angular_ distance metric **must only be used** when **both** query and document vectors are normalized. This metric was previously named "innerproduct" and required unit-length vectors. The new version computes the length of the query vector once and assumes all other vectors are of the same length. + +Using _prenormalized-angular_ with vectors that are not normalized causes unpredictable nearest neighbor search, and is observed to give bad results both for performance and quality. + +The length, magnitude, or norm of a vector _x_ is calculated as `length = sqrt(sum(pow(xi,2)))`. The unit length normalized vector is then given by `[xi/length]`. Zero vectors may not be used at all. + +The Vespa _prenormalized-angular_ computes the [cosine similarity](https://en.wikipedia.org/wiki/Cosine_similarity) and uses `1.0 - cos(angle)` as the distance metric. It gives exactly the same ordering as `angular` distance, but with a distance in the range [0,2], since cosine similarity has range [1,-1], so the end result is 0.0 for same direction vectors, 1.0 for a right angle, and 2.0 for vectors with exactly opposite directions. Getting the cosine score (or angle) is therefore easy: + +```js +rank-profile cosine { + first-phase { + expression: 1.0 - distance(field, embedding) + } + function angle() { + expression: acos(1.0 - distance(field, embedding)) + } + } +``` + To compute the cosine similarity directly in a ranking expression instead of fetching one already computed in a nearestNeighbor query operator, use the [cosine\_similarity helper function](/en/reference/ranking/ranking-expressions#cosine-similarity-t): + +```js +function mysimilarity() { + expression: cosine_similarity(attribute(myembedding), query(myqueryvector), mydim)) + } +``` + +### geodegrees + +The _geodegrees_ distance metric is only valid for geographical coordinates (two-dimensional vectors containing latitude and longitude on Earth, in degrees). It computes the great-circle distance (in kilometers) between two geographical points using the [Haversine formula](https://en.wikipedia.org/wiki/Haversine_formula). See [geodegrees system test](https://github.com/vespa-engine/system-test/blob/master/tests/search/nearest_neighbor/geo.sd) for an example. + +### hamming + +The [Hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) metric counts the number of dimensions where the vectors have different coordinates. This isn't useful for floating-point data since it means you only get 1 bit of information from each floating-point number. Instead, it should be used for binary data, where each bit is considered a separate coordinate. Practically, this means you should use the `int8` [cell value type](/en/performance/feature-tuning#cell-value-types) for your tensor, with the usual encoding from bit pattern to numerical value, for example: + +- `00000000` → `0` (hex `00`) +- `00010001` → `17` (hex `11`) +- `00101010` → `42` (hex `2A`) +- `01111111` → `127` (hex `7F`) +- `10000000` → `-128` (hex `80`) +- `10000001` → `-127` (hex `81`) +- `11111110` → `-2` (hex `FE`) +- `11111111` → `-1` (hex `FF`) + +Feeding data for this use case may be done with ["hex dump"](/en/reference/schemas/document-json-format#tensor-hex-dump) format instead of numbers in range `[-128,127]` both to have a more natural format for representing binary data, and to avoid the overhead of parsing a large JSON array of numbers. + +## bolding + +Contained in [field](#field) or [summary](#summary). Highlight matching query terms in the [summary](#summary): + +```bash +bolding: on +``` + +The default is no bolding, set `bolding: on` to enable it. Note that this command is overridden by `summary: dynamic`. If both are specified, bolding will be ignored. The difference between using bolding instead of `summary: dynamic` is the latter will provide a dynamic abstract in addition to highlighting query terms, while the first only highlights. Bolding is only supported for [index](#indexing-index) fields of type string or array\<string\>. + +The default XML element used to highlight the search terms is \<hi\> - to override, set _container.qr-searchers_ configuration. Example using `<strong>`: + +```xml +<container> + <search> + <config name="container.qr-searchers"> + <tag> + <bold> + <open><strong></open> + <close></strong></close> + </bold> + <separator>...</separator> + </tag> + </config> + </search> +</container> +``` + +Maximum field byte length for bolding is 64Mb - field values larger than this will be represented as a snippet as in `summary: dynamic`. + +## id + +Contained in [field](#field). Sets the numerical ID of this field. All fields have a document-internal ID internally for transfer and storage. IDs are usually determined programmatically as a 31-bit number. Some storage and transfer space can be saved by instead explicitly setting IDs to a 7-bit number. + +```js +id: [positive integer] +``` + +An ID must satisfy these requirements: + +- Must be a positive integer +- Must be less than 100 or larger than 127 +- Must be unique within the document and all documents this document inherits + +## index + +Contained in [field](#field) or [schema](#schema). Sets index parameters. + +Content in [string](#string)-fields with _index_ is [normalized](#normalizing) and [tokenized](/en/linguistics/linguistics-opennlp#tokenization) by default. The field can be single- or multivalued (e.g. `array<string>`). + +For [tensor](#tensor)-typed fields, _index_ creates an [HNSW](#index-hnsw) index for [Approximate Nearest Neighbor](/en/querying/nearest-neighbor-search-guide) queries, with a default [euclidean](#euclidean) distance metric. The index is built after a [content node restart](#changes-that-require-restart-but-not-re-feed) (automated on Vespa Cloud). + +Examples: + +```js +index [index-name]: [property] +``` +or +```js +index [index-name] { + [property] + [property] + … +} +``` + +<Warning> + **Deprecated:** If `index-name` is specified, it will be used instead of the field name as the name of the index. This use is deprecated, use a synthetic field with the wanted name outside the `document` block instead - see an [example](/en/writing/indexing#date-indexing). +</Warning> +Parameters: + +| Property | Occurrence | Description | +| :--- | :--- | :--- | +| [stemming](#stemming) | Zero to one | Set the stemming of this index. Indexes without a stemming setting get their stemming setting from the fields added to the index. Setting this explicitly is useful if fields with conflicting stemming settings are added to this index. | +| arity | One (mandatory for predicate fields), else zero. | Set the [arity value for a predicate field](/en/schemas/predicate-fields#index-size). The data type for the containing field must be `predicate`. | +| lower-bound | Zero to one | Set the [lower bound value for a predicate field](/en/schemas/predicate-fields#upper-and-lower-bounds). The data type for the containing field must be `predicate`. | +| upper-bound | Zero to one | Set the [upper bound value for predicate fields](/en/schemas/predicate-fields#upper-and-lower-bounds). The data type for the containing field must be `predicate`. | +| dense-posting-list-threshold | Zero to one | Set the [dense posting list threshold value for predicate fields](/en/schemas/predicate-fields#dense-posting-list-threshold). The data type for the containing field must be `predicate`. | +| enable-bm25 | Zero to one | Enable this index field to be used with the [bm25 rank feature](/en/reference/ranking/rank-features#bm25). This creates posting lists for the [indexes](/en/content/proton#index) for this field that has interleaved features in the document ID streams. This makes it fast to compute the _bm25_ score. See the [BM25 reference](/en/ranking/bm25) for details and example use. | +| [hnsw](#index-hnsw) | Zero to one | Specifies optional parameters for an HNSW index to enable faster, approximate nearest neighbor search. Only supported for tensor attribute fields with tensor types with: +- One indexed dimension - single vector per document +- One or more mapped dimensions and one indexed dimension - multiple vectors per document + + Used in combination with the [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) query operator. | + +## hnsw + +Contained in [index](#index). Specifies optional parameters for an HNSW index to enable faster, approximate nearest neighbor search using the [nearestNeighbor](/en/reference/querying/yql#nearestneighbor) query operator. + +<Info> + **Note:** Specifying the `index` keyword in the [indexing](#indexing) statement of a tensor creates an HNSW index with default settings, even if this block is not specified! +This implements a modified version of the Hierarchical Navigable Small World (HNSW) graphs algorithm ([paper](https://arxiv.org/abs/1603.09320)). +</Info> + +Only supported for the following tensor attribute field types: + +- Single vector per document: Tensor type with one indexed dimension. Example: `tensor<float>(x[3])` +- Multiple vectors per document: Tensor type with one or more mapped dimensions and one indexed dimension. Examples: `tensor<float>(m{},x[3])`, `tensor<float>(m{},n{},x[3])` + +HNSW indexes are not supported in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). + +```js +hnsw { + [parameter]: [value] + [parameter]: [value] + ... +} +``` + +The following parameters are used when building the index graph: + +| Parameter | Description | +| :--- | :--- | +| max-links-per-node | Specifies how many links per HNSW node to select when building the graph. The default value is 16. In [HNSWlib](https://github.com/nmslib/hnswlib/blob/master/ALGO_PARAMS.md) (implementation based on the paper) this parameter is known as _M_. | +| neighbors-to-explore-at-insert | Specifies how many neighbors to explore when inserting a document in the HNSW graph. The default value is 200. In HNSWlib, this parameter is known as _ef\_construction_. | + +The [distance metric](#distance-metric) specified on the _attribute_ is used when building and searching the graph. Example: + +```js +field text_embedding type tensor<float>(x[384]) { + indexing: summary | attribute | index + attribute { + distance-metric: prenormalized-angular + } + index { + hnsw { + max-links-per-node: 24 + neighbors-to-explore-at-insert: 200 + } + } +} +``` + +See [Approximate Nearest Neighbor Search using HNSW Index](/en/querying/approximate-nn-hnsw) for examples of use, and see [Approximate Nearest Neighbor Search in Vespa - Part 1](https://blog.vespa.ai/approximate-nearest-neighbor-search-in-vespa-part-1/) blog post for how the Vespa team selected HNSW as the baseline algorithm for extension and integration in Vespa. + +## indexing + +Contained in [field](#field) or [struct-field](#struct-field). One or more Indexing Language instructions used to produce index, attribute and summary data from this field. Indexing instructions have pipeline semantics similar to Unix shell commands. The value of the field enters the pipeline during indexing, and the pipeline puts the value into the desired index structures, possibly doing transformations and pulling in other values along the way. + +```bash +indexing: [index-statement] +``` +or +```js +indexing { + [indexing-statement]; + [indexing-statement]; + … +} +``` + +If the field containing this is defined outside the document, it must start with an indexing statement which outputs a value (either "input [fieldname]" to fetch a field value, or a literal, e.g, "some-value" ). Fields in documents will use the value of the enclosing field as input (input [fieldname]) if one isn't explicitly provided. + +Specify the operations separated by the pipe (`|`) character. For advanced processing needs, use the [indexing language](/en/reference/writing/indexing-language), or write a [document processor](/en/applications/document-processors). Supported expressions for fields are: + +| expression | description | +| :--- | :--- | +| attribute | [Attribute](/en/content/attributes) is used to make a field available for sorting, grouping, ranking and searching using [match](#match) mode `word`. | +| index | Creates a searchable [index](/en/content/proton#index) for the values of this field using [match](#match) mode `text`. By default, the index name will be the same as the name of the schema field. Use a [fieldset](#fieldset) to combine fields in the same set for searching. | +| set\_language | Sets document language - [details](/en/reference/writing/indexing-language#set_language). | +| summary | Includes the value of this field in a [summary](/en/reference/writing/indexing-language#summary) field. Modify summary output by using [summary:](#summary) (e.g., to generate dynamic teasers). | + +When combining both `index` and `attribute` in the indexing statement for a field, e.g `indexing: summary | attribute | index`, the [match](#match) mode becomes `text` for the field. So searches in this field will not search the contents in the [attribute](#attribute) but the index. + +Find examples and more details in the [Text Matching](/en/querying/text-matching) guide. + +## linguistics + +Sets a linguistics 'profile' deciding how content and searches against this field should be linguistically processed. The profile value is propagated to the [linguistics component](/en/linguistics/linguistics) which can be configured to do profile-specific processing. + +```js +linguistics { + profile: myIndexAndSearchProfile +} +``` +or +```js +linguistics { + profile { + index: myIndexProfile + search: mySearchProfile + } +} +``` + +## match + +Contained in [field](#field), [fieldset](#fieldset) or [struct-field](#struct-field). Sets the matching method to use for this field to something other than the default token matching. + +```bash +match: [property] +``` +or +```js +match { + [property] + [property] + … +} +``` + +Whether the match type is `text`, `word` or `exact`, all term matching will be done after [normalization](/en/linguistics/linguistics-opennlp#normalization) and locale-independent lowercasing (in that order). + +Find examples and more details in the [Text Matching](/en/querying/text-matching) guide. Also see search using [regular expressions](/en/reference/querying/yql#matches). + +| Property | Valid with | Description | +| :--- | :--- | :--- | +| text | index | The default for string fields with `index`. Can not be combined with exact matching. The field is matched per [token](/en/linguistics/linguistics-opennlp#tokenization). | +| exact | index, attribute | Can not be combined with *text* matching. The field is matched *exactly*: Strings containing any characters whatsoever will be indexed and matched as-is. Lowercasing is still performed unless `match: cased` is also used. In queries, the exact match string ends at the exact match terminator (below). <br/><br/> A field with `match: exact` is considered to be a [filter field](#filter), just as if `rank: filter` was specified. This is because there is only one word per field (or per item in the case of multivalued types such as `array<string>`), so there is little ranking information. Turn off the implicit `rank: filter` by adding `rank: normal`. | +| exact-terminator | index, attribute | Only valid for `match: exact`. Default is `@@`. Specify terminator in [query strings](/en/reference/api/query#model.querystring). If the query strings can contain `@@`, set a different terminator, or use `match: word`, see below. Example - use: <CodeBlock>```match { exact exact-terminator: "@%" }``` </CodeBlock>on a field called `tag` to make query `tag:a b c!@%` match documents with the string *a b c!* <br/><br/> Example using the default terminator: If `tag` is an exact match field, the query: <CodeBlock> someword AND (tag:!\*!@@ OR tag:(kanoo)@@)</CodeBlock> matches documents with `someword` and either `!*!` or `(kanoo)` as a tag. Note that without the `@@` terminating the second tag string, the second tag value would be `(kanoo))`. | +| word | index, attribute | This is the default matching mode for [string attributes](/en/content/attributes). It cannot be combined with *text* matching. Match word means that the entire content of the field is indexed as a single word. Word matching is like exact matching, but with more advanced query parsing. The query terms are heuristically parsed, taking into account some usual query syntax characters. One can also use double quotes to include spaces, stars, or exclamation marks. Example: If `artist` is a string attribute, the query: foo AND (artist:"'N Sync" OR artist:"\*NSYNC" OR artist:A\*teens OR artist:"Wham!") matches documents with `foo` and at least one of `'N Sync` or `*NSYNC` or `A*teens` or `Wham!` in the artist field Note that without the quotes, the space in `'N Sync` would end that word and would result in a search for just `'N`, similarly the `!` would mean to increase the weight of a `Wham` term if not quoted. | +| prefix | attribute | Has no effect, as [attributes](/en/content/attributes) always support prefix searches. Prefix matching must be [specified in the query](/en/reference/querying/yql#prefix). See also [regular expressions](/en/reference/querying/yql#matches). | +| substring | [Streaming mode](/en/performance/streaming-search#differences-in-streaming-search) only | Set default match mode to *substring* for the field. Only available in streaming search. As the data structures in streaming search support substring searches, one can always set substring matching in the query, without setting the field to the substring default. Also see [regular expressions](/en/reference/querying/yql#matches). | +| suffix | [Streaming mode](/en/performance/streaming-search) only | Like substring above. | +| uncased | index, attribute | Use case-insensitive matching (the default). | +| cased | index, attribute | Use case-sensitive matching. Usually only used together with `match: exact` or `match: word` modes. When using `match: text`, note that if you are using a custom [linguistics implementation](/en/linguistics/linguistics-custom), this will only have effect for string index fields if that implementation produces cased tokens. | +| max-length | index | Limit the length of the field that will be used for matching. By default, only the first 1M characters are indexed. When adjusting this limit, it might also be needed to adjust [max-occurrences](#max-occurrences). | +| max-occurrences | index | Configure the maximum number of occurrences that will be indexed for each unique token/term in the field for a given document. If this limit is reached, consecutive occurrences of the same token/term are ignored for that document. The default value is 10000. <br/><br/>Adjusting this limit might be needed when using the [phrase](/en/reference/querying/yql#phrase), [near](/en/reference/querying/yql#near), or [onear](/en/reference/querying/yql#onear) query operators to query documents with large field values (see [max-length](#max-length)) that contain more than 10000 occurrences of common tokens/terms. When using these operators, it is only possible to match among the first *max-occurrences* of a token/term in a document. | +| max-token-length | index | Configure the max length of tokens that will be indexed for the field. Longer tokens are silently ignored. The unit is characters (cf. java.lang.String.length()). The default value is 1000. | +| gram | index | This field is matched using n-grams. For example, with the default gram size 2, the string "hi blue" is tokenized to "hi bl lu ue" both in the index and in queries to the index. <br/><br/> N-gram matching is useful mainly as an alternative to [segmentation](/en/linguistics/linguistics-opennlp#tokenization) in CJK languages. Typically, it results in increased recall and lower precision. However, as Vespa usually uses proximity in ranking, the precision offset may not be of much importance. Grams consume more resources than other matching methods because both indexes and queries will have more terms, and the terms contain repetition of the same letters. On the other hand, CPU-intensive CJK segmentation is avoided. It may also be used for substring matching in general. | +| gram-size | index | A positive, nonzero number, default 2. Sets the gram size when gram matching is used. Example: <CodeBlock>```match { gram gram-size: 3 }```</CodeBlock> | + +## rank + +Contained in [field](#field), [struct-field](#struct-field) or [rank-profile](#rank-profile). Set the kind of ranking calculations that will be done for the field. Even though the actual ranking expressions decide the ranking, this setting tells Vespa which preparatory calculations and which data structures are needed for the field. + +```bash +rank [field-name]: [ranking settings] +``` +or +```js +rank { + [ranking setting] +} +``` +The field name should only be specified when used inside a rank-profile. The following ranking settings are supported in addition to the default: + +| Ranking setting | Description | +| :--- | :--- | +| filter | Indicates that matching in this field should use fast bit vector data structures only. This saves CPU during matching, but only a few simple ranking features will be available for the field. This setting is appropriate for fields typically used for filtering or simple boosting purposes, like filtering or boosting on the language of the document. <br/><br/> - For *index* fields, this setting does not change index formats but helps choose the most compact representation when matching against the field. <br/><br/> - For *attribute* fields with *fast-search* this setting builds additional posting list representations (bit vectors) can significantly speed up query evaluation. See [feature tuning](/en/performance/feature-tuning#when-to-use-fast-search-for-attribute-fields) and [the practical search performance guide](/en/performance/practical-search-performance-guide). | +| normal | The reverse of `filter`. Matching in this field will use normal data structures and give normal match information for ranking. Used to turn off implicit `rank: filter` when using [match: exact](#exact). If both `filter` and `normal` are set somehow, the effect is as if only `normal` was specified. | + +Related: See the [filter](/en/reference/querying/yql#filter) query annotation for how to annotate query terms as filters. + +### filter-threshold + +Contained in a [rank-profile](#rank-profile). Used to optimize query performance when searching large text [index](/en/basics/schemas#document-fields) fields, by allowing a per query combination of [rank: filter](#filter) and [rank: normal](#normal) behavior. See [profile-level filter-threshold](#filter-threshold) for how to use the same value for all _index_ fields. + +```bash +rank [field-name] { + filter-threshold: 0.05 +} +``` + +| Setting | Description | +| --- | --- | +| filter-threshold | Threshold value (in the range [0.0, 1.0]) deciding when matching in this _index_ field should be treated as a filter. This happens for query terms with [estimated hit ratios](/en/learn/glossary#estimated-hit-ratio) (in the range [0.0, 1.0]) that are above the _filter-threshold_. Then, fast bitvector data structures are used, similar to when the field is set to [rank: filter](#filter). This saves CPU and Disk I/O during matching and typically results in faster query evaluation, with the downside being that only a boolean signal is available for ranking (the document being a match or not). [BM25](/en/ranking/bm25) handles this by assuming one occurrence of the query term in the document, and the field length being equal to the average field length.<br/><br/>Use this to optimize query performance when searching large text _index_ fields with e.g. the [weakAnd](/en/ranking/wand#weakand) query operator and [BM25](/en/ranking/bm25) ranking. Query terms that are common in the corpus (e.g., stopwords) are treated as filters with faster matching and simplified ranking, while other query terms are handled as usual with full ranking.<br/><br/>In testing with various text datasets (e.g., Wikipedia), a _filter-threshold_ setting of 0.05 has been shown to be a good starting point. [Read more](/en/performance/feature-tuning#posting-lists).<br/><br/>This setting is only relevant for [index](/en/basics/schemas#document-fields) fields, and cannot be used in combination with [rank: filter](#filter). Has no effect in [streaming search](/en/performance/streaming-search#differences-in-streaming-search).| + +### element-gap + +Contained in a [rank-profile](#rank-profile). Used to specify the gap between positions in adjacent elements in multi-value fields. The default value is `infinity`. It should be specified as `infinity` or as a positive integer number. + +Consider an `array<string>` field with three elements `["a c", "x b x a x x x x x x b", "x x d"]`. Normally, distance calculation is only performed within an element, thus the minimum forward distance between occurrences for terms _a_ and _b_ is 7. By setting the `element-gap` for the field to 0, adjacent elements are considered and the minimum forward distance between occurrences for terms _a_ and _b_ becomes 3. The minimum distance between occurrences for terms _c_ and _d_ is not calculated since they are not in adjacent elements. + +The `element-gap` setting affects the [nativeProximity](/en/reference/ranking/rank-features#nativeProximity) rank feature, the [near](/en/reference/querying/yql#near) query operator and the [onear](/en/reference/querying/yql#onear) query operator. + +```bash +rank [field-name] { + element-gap: 0 +} +``` + +No restart or reindexing is required when changing this setting, it is immediately effective. + +## query-command + +Contained in [fieldset](#fieldset), [field](#field) or [struct-field](#struct-field). Specifies a function to be performed on query terms to the indexes of this field when searching. The Search Container server has support for writing Vespa Searcher plugins that process these commands. + +```js +query-command: [an identifier or quoted string] +``` + +If you write a plugin searcher that needs some index-specific configuration parameter, that parameter can be set here. + +There is one built-in query-command available: `phrase-segmenting`. If this is set, terms connected by non-word characters in user queries (such as "a.b") will be parsed to a phrase item, instead of by default, an AND item where these terms have connectivity set to 1. + +## rank-type + +Contained in [field](#field) or [rank-profile](#rank-profile). Selects the low-level rank settings to be used for this field when using `nativeRank`. + +```bash +rank-type [field-name]: [rank-type-name] +``` +The field name can be skipped inside fields. Defined rank types are: + +| Type | Description | +| :--- | :--- | +| identity | Used for fields that contains only what this document _is_, e.g., "Title". Complete identity hits will get a high rank. | +| about | Some text which is (only) about this document, e.g. "Description". About hits get high rank on partial matches and higher for matches early in the text and repetitive matches. This is the default rank type. | +| tags | Used for simple tag fields of type tag. The tags rank type uses a logarithmic table to give more relative boost in the low range: As tags are added, they should have a significant impact on the rank score, but as more and more tags are added, each new tag should contribute less. | +| empty | Gives no relevancy effect on matches. Used for fields you just want to treat as filters. | + +For `nativeRank`, one can specify a rank type per field. If the supported rank types do not meet requirements, one can explicitly configure the native rank features using rank-properties. See the [native rank reference](/en/reference/ranking/nativerank) for more information. + +## weakand + +Contained in [rank-profile](#rank-profile). + +Tunes the [weakAnd](/en/ranking/wand#weakand) algorithm to automatically exclude terms and documents with expected low query significance based on document frequency statistics present in the document corpus. This makes matching faster at the cost of potentially reduced recall. + +```js +weakand { + [body] +} +``` + +Note that all document frequency calculations are done using _content node-local_ document statistics (i.e. [global significance](/en/ranking/significance#global-significance-model) does not have an effect). This means results may differ across different content nodes and/or content node groups. + +The body of a `weakand` statement consists of: + +| Property | Occurrence | Description | +| :--- | :--- | :--- | +| stopword-limit | Zero to one | A number in the range \[0, 1\]. Represents the maximum [normalized document frequency](/en/learn/glossary#document-frequency-normalized) a query term can have in the corpus (i.e. the ratio of all documents where the term occurs at least once) before it's considered a stopword and dropped entirely from being a part of the `weakAnd` evaluation. This makes matching faster at the cost of potentially producing more hits. Dropped terms are not exposed as part of ranking. <br/> Example: <br/> stopword-limit: 0.60 This will drop all query terms that occur in at least 60% of the documents.<br/> Using `stopword-limit` is similar to explicitly removing stopwords from the query up front, but has the benefit of dynamically adapting to the actual document corpus and not having to know—or specify—a set of stopwords. [Read more](/en/performance/feature-tuning#posting-lists). | +| adjust-target | Zero to one | A number in the range \[0, 1\] representing [normalized document frequency](/en/learn/glossary#document-frequency-normalized). Used to derive a per-query document score threshold, where documents scoring lower than the threshold will not be considered as potential hits from the `weakAnd` operator. The score threshold is selected to be equal to that of the query term whose document frequency is *closest* to the configured `adjust-target` value. <br/> This can be used to efficiently *exclude* documents that only match terms that occur very frequently in the document corpus. Such terms are likely to be stopwords that have low semantic value for the query, and excluding documents only containing them is likely to have only a minor impact on recall. <br/> This makes overall matching faster by reducing the number of hits produced by the `weakAnd` operator. <br/> Example: <CodeBlock> adjust-target: 0.01 </CodeBlock> This excludes documents that only have terms that occur in more than approximately 1% of the document corpus. The actual threshold is query-specific and based on the query term score whose document frequency is closest to 1%. <br/>`adjust-target` can be used together with [stopword-limit](#weakand-stopword-limit) to efficiently prune both terms and documents with low significance when processing queries. [Read more](/en/performance/feature-tuning#posting-lists). | +| allow-drop-all | Zero to one | A boolean value. The default behavior of `weakAnd` is to always keep at least one term (the least common one) even though it is considered a stopword. This is to avoid dropping all query terms in order to make sure that some hits are produced.<br/> If set to `true`, the `weakAnd` operator will allow removal of all query terms if they are all considered stopwords (i.e., if `stopword-limit` is set and all query terms are above the limit).<br/> This may be desired (and significantly improve query performance) if `weakAnd` is used together with another query operator, e.g. the [nearestNeighbor](/en/querying/nearest-neighbor-search#querying-using-nearestneighbor-query-operator) operator. <br/> Be aware that if this is set to `true` and all query terms are considered stopwords, the `weakAnd` operator will not produce *any* hits. And by extension, if `weakAnd` is used by itself, the query may return no hits. <br/> Example: <br/> <CodeBlock>allow-drop-all: true</CodeBlock> This overrides the default behavior of `weakAnd` and allows all query terms to be dropped if they are all considered stopwords. <Danger>**Important:** Defaults to `false` if not specified.</Danger> | + +## summary-to + +<Warning> + **Deprecated:** Use [document-summary](#document-summary) instead. +</Warning> + +Contained in [field](#field) or [struct-field](#struct-field). Specifies the name of the document summaries that should contain this field. +```bash +summary-to: [summary-name], [summary-name], … +``` + +Fields with summary will always be part of the default summary regardless of this setting. Use explicit [document-summary](#document-summary) instead. See also [document summaries](/en/querying/document-summaries). + +## summary + +Contained in [field](#field) or [document-summary](#document-summary) or [struct-field](#struct-field). Declares a summary field. + +```bash +summary: [property] +``` +or +```bash +summary [name] { + [body] +} +``` +The summary _name_ can be skipped if this is set inside a field. The name will then be the same as the name of the source field. _full_ summary is the default. Long field values (like document content fields) should be made _dynamic_. The body of a summary may contain: + +| Name | Occurrence | Description | +| :--- | :--- | :--- | +| full | Zero to one | Returns the full field value in the summary (the default). | +| bolding: on | Zero to one | Specifies whether the content of this field should be [bolded](#bolding). Only supported for [index](#indexing-index) fields of type string or array\<string>. | +| dynamic | Zero to one | Make the value returned in results from this summary field a *dynamic abstract* of the source field by extracting fragments of text around matching query terms. Matching query terms will also be highlighted, in similarity with the bolding feature. This highlighting is not affected by the query-argument bolding. The default XML element used to highlight query terms is `<hi>` - refer to [bolding](#bolding) for how to configure. *dynamic* is only supported for [index](#indexing-index) fields of type string or array\<string>. For array\<string> fields, a dynamic abstract is created per string item in the array. | +| source | Zero to one | Specifies the name of the field or fields from which the value of this summary field should be fetched. If multiple fields are specified, the value will be taken from the first field if that has a value, from the second if the first one is empty, and so on. <CodeBlock> ```source: [field-name], [field-name], …``` </CodeBlock> When this is not specified, the source field is assumed to be the field with the same name as the summary field. <br/><br/> Refer to [attribute](#add-or-remove-an-existing-document-field-from-document-summary) and [non-attribute](#add-or-remove-a-new-non-attribute-document-field-from-document-summary) fields for modifying a schema. | +| to | Zero to one | Specifies the name of the document summaries that this should be included in.<br/><br/> <CodeBlock> ```to: [document-summary-name], [document-summary-name], …``` </CodeBlock> This can only be specified in fields, not in the explicit document summaries. When this is not specified, the field will go to the `default` document summary. | +| matched-elements-only | Zero to one | Specifies that only the matched elements in a searchable [array of primitive](#array), [weightedset](#weightedset), [array of struct](#array) or [map type](#map) field are returned as part of document summary. For array of struct or map type fields, this is typically used in accordance with the [sameElement](/en/reference/querying/yql#sameelement) operator, but it can also be used when searching directly on a sub-struct field. It is also supported when the field is [imported](#import-field).<br/><br/> See [example use](#map) and example schemas: <br/><br/> - [matched elements only](https://github.com/vespa-engine/system-test/blob/master/tests/search/matched_elements_only/indexed/test.sd) <br/><br/> - [array of struct and map type](https://github.com/vespa-engine/system-test/blob/master/tests/search/struct_and_map_types/attribute_fields/test.sd) | +| select-elements-by | Zero to one | Use a summary feature to control which elements in an [array of primitive](#array) or [array of struct](#array) field are returned as part of document summary. <CodeBlock>``` select-elements-by: <summary-feature-name>``` </CodeBlock> The summary feature used must be a tensor with a single mapped dimension. An element will be returned if its id is a label along the mapped dimension of this tensor. <br/><br/> - [schema example](https://github.com/vespa-engine/system-test/blob/master/tests/search/chunk_selection/test.sd) | +| tokens | Zero to one | Make the value returned in results from this summary field be an array of the tokens indexed in the source field. Multiple tokens at the same location are put into a nested array. The source field must be specified, and it must be an [index](#indexing-index) or [attribute](#indexing-attribute) field of type string, array\<string\> or weightedset\<string\>. If the source field is of type weightedset\<string\> then the summary field is rendered as if the source field was of type array\<string\>, weights are not shown. This is mainly useful for [linguistics transformations debugging](/en/querying/text-matching#tokens-example), to correlate query trace with the tokens indexed. | + +Read more about [document summaries](/en/querying/document-summaries). + +## weight + +Contained in [field](#field). The weight of a field - the default is 100. The field weight is used when calculating the [rank scores](/en/basics/ranking). + +```js +weight: [positive integer] +``` + +## weightedset + +Contained in [field](#field) of type weightedset. Properties of a weighted set. + +```bash +weightedset: [property] +``` +or +``` +```js +weightedset { + [property] + [property] + … +} +``` + +| Property | Occurrence | Description | +| :--- | :--- | :--- | +| create-if-nonexistent | Zero to one | If the weight of a key is adjusted in a document using a partial update increment or decrement command, but the key is currently not present, the command will be ignored by default. Set this to make keys to be created in this case instead. This is useful when the weight is used to represent the count of the key. <br/><br/> <CodeBlock> ``` field tag type weightedset<string> { indexing: attribute \| summary weightedset { create-if-nonexistent remove-if-zero } } ``` </CodeBlock> | +| remove-if-zero | Zero to one | This is the companion of `create-if-nonexistent` for the converse case: By default, keys may have zero as weight. With this turned on, keys whose weight is adjusted (or set) to zero will be removed. | + + +## import field + +Contained in [schema](#schema). Using a [reference](#reference) to a document type, import a field from that document type into this schema to be used for matching, ranking, grouping, and sorting. Only attribute fields can be imported. Importing fields are not supported in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). + +The imported field inherits all but the following properties from the parent field: + +- [attribute: fast-access](#attribute) + +Refer to [parent/child](/en/schemas/parent-child) for a complete example. Note that the imported field is put outside the document type: + +```js +schema myschema { + document myschema { + field parentschema_ref type reference<parentschema> { + indexing: attribute + } + } + import field parentschema_ref.name as parent_name {} +} +``` + +Extra restrictions apply for some of the field types: + +| Field type | Restriction | +| :--- | :--- | +| array of struct | Can be imported if at least one of the struct fields has an attribute. All struct fields with attributes must have primitive types. Only the struct fields with attributes will be visible. | +| map of struct | Can be imported if the key field has an attribute, and at least one of the struct fields has an attribute. All struct fields with attributes must have primitive types. Only the key field and the struct fields with attributes will be visible. | +| map | Can be imported if both key and value fields have primitive types and have attributes. | +| position | Can be imported if it has an attribute. | +| array of position | Can be imported if it has an attribute. | + +To use an imported field in summary, create an explicit [document summary](#document-summary) containing the field. + +Imported fields can be used to expire documents, but [read this first](/en/schemas/documents#document-expiry). + +## Document and search field types + +Note that it is possible to make a document field of one type into one or more instances of another search field, by declaring a field outside the document, which uses other fields as input. For example, to create an integer attribute for a string containing a comma-separated list of integers in the document, do like this: + +```js +schema example { + document example { + field yearlist type string { # Comma-separated years + } + } + + field year type array<int> { # Search field using the yearlist value + indexing: input yearlist | split "," | attribute + } +} +``` + +## Modifying schemas + +This section describes how a schema in a live application can be modified—categories: + +1. [Valid changes without restart or re-feed](#valid-changes-without-restart-or-re-feed) +2. [Changes that require restart but not re-feed](#changes-that-require-restart-but-not-re-feed) +3. [Changes that require reindexing](#changes-that-require-reindexing) +4. [Changes that require re-feed](#changes-that-require-re-feed) + +When running `vespa prepare` on a new application package, the changes in the schema files are compared with the files in the current active package. If some of the changes require restart or re-feed, the output from `vespa prepare` specifies which actions are needed. + +<Danger> + **Important:** For changes that are not covered below, and no output is returned from `vespa prepare`, the impact is undefined and in no way guaranteed to allow a system to stay live until re-feeding. Changes not related to the schema are discussed in [admin procedures](/en/operations/self-managed/admin-procedures). +</Danger> + +### Valid changes without restart or re-feed + +Procedure: + +1. Run `vespa prepare` on the changed application +2. Run `vespa activate`. The changes will take effect immediately + +Changes: + +| Change | Description | +| :--- | :--- | +| Add a new document field | Add a new document field as index, attribute, summary or any combination of these. Existing documents will implicitly get the new field with no content. Documents fed after the change can specify the new field. If the field has existed with the same type earlier, then old content *may or may not* reappear | +| Remove a document field | Existing documents will no longer see the removed field, but the field data is not completely removed from the search node | +| Add or remove an existing document field from document summary | Add an existing field to summary or any number of summary classes, and remove an existing field from summary or any number of summary classes. Example: <br/><br/> <CodeBlock> ``` document-summary short-summary { summary artist {} } ``` </CodeBlock> <br/><br/> A change adding an [attribute](/en/content/attributes) field with a new name to a summary class using [source](#source) does not require restart or re-feed: <br/><br/> <CodeBlock> ``` field artist type string { indexing: summary \| attribute } document-summary rename-summary { summary artist_name { source: artist } } ``` </CodeBlock> <br/><br/> Also see [non-attribute](#add-or-remove-a-new-non-attribute-document-field-from-document-summary) fields. | +| Remove the attribute aspect from a field that is also an index field | This is the only scenario of changing the attribute aspect of a document field that is allowed without restart | +| Add, change or remove field sets | Change [fieldsets](#fieldset) used to group fields together for searching | +| Change the alias or sorting attribute settings for an attribute field | | +| Add, change or remove rank profiles | | +| Change document field weights | | +| Add, change or remove field aliases | | +| Add, change or remove rank settings for a field | Exception: Changing `rank: filter` on an attribute field in mode *index* requires restart. See details in [next section](#changes-that-require-restart-but-not-re-feed) | +| Add or remove a schema | Removing a schema definition file will make [proton](/en/content/proton) drop all documents of that type, subsequently releasing memory and disk. | + + +### Changes that require restart but not re-feed + +Procedure: + +1. Run `vespa prepare` on the changed application. Output specifies which restart actions are needed +2. Run `vespa activate` +3. Restart `services` on the services specified in the `prepare` output + +Changes: + +| Change | Description | +| :--- | :--- | +| Change the attribute aspect of a document field | Add or remove a field as attribute. When adding, the attribute is populated based on the field value in stored documents during restart. When removing, the field value in stored documents is updated based on the content in the attribute during restart. | +| Change the attribute settings for an attribute field | Change the following attribute settings: `fast-search`, `fast-access`, `fast-rank`, `paged`. | +| Change the rank filter setting for an attribute field | Add or remove `rank: filter` on an attribute field. | +| Change the hnsw index settings for a tensor attribute field | Adding or removing the [hnsw index](#index-hnsw) on a tensor attribute field, or changing the `distance-metric` or `max-links-per-node` requires a restart to rebuild the index. Changing `neighbors-to-explore-at-insert` requires a restart, but does not rebuild the index. | +| Change the distance metric for a tensor attribute field | Change, add, or remove the [distance metric](#distance-metric) on a tensor attribute field. If no distance metric is specified, _euclidean_ is used as the default. | + +Example: Given a content cluster _mycluster_ with mode _index_: + +```js +schema test { + document test { + field f1 type string { indexing: summary } + } +} +``` +Then add field `f1` as an attribute: +```js +schema test { + document test { + field f1 type string { indexing: attribute | summary } + } +} +``` +The following is output from `vespa prepare` - which restart actions are needed: +```js +WARNING: Change(s) between active and new application that require restart: +In cluster 'mycluster' of type 'search': + Restart services of type 'searchnode' because: + 1) Document type 'test': Field 'f1' changed: add attribute aspect +``` + +### Changes that require reindexing + +All the changes listed below require [reindexing](/en/operations/reindexing) of all documents. Unlike re-feed, which requires an external source of data, reindexing is done using documents stored in Vespa, and is automatic (once triggered). It can also run concurrently with feed and serving, but until reindexing is complete, affected fields will be empty or have potentially wrong annotations not matching the query processing. Procedure: + +1. Run `vespa prepare` on the changed application. Output specifies which reindexing actions are needed +2. Run `vespa activate` +3. [Enable reindexing](/en/reference/api/deploy-v2#reindex) for the indicated document types and clusters +4. Run `vespa prepare` and `vespa activate` again to start reindexing process + +Changes: + +| Change | Description | +| :--- | :--- | +| Change index aspect of a document field | This changes the document processing pipeline before documents arrive in the backend. Only documents fed after index aspect was added will have annotations and be present in the reverse index. Only documents fed after index aspect was removed will avoid disk bloat due to unneeded annotations. | +| Switch stemming/normalizing on or off | This changes the document processing pipeline before documents arrive in the backend, and what annotations are made for an indexed field. <Danger>**Important:** If not re-feeding after such a change, serving works, but recall is undefined as the index has been produced using a different setting than the one used when doing stemming/normalizing of the query terms.</Danger> | +| Add, change, or remove match settings for a field | Example: Adding `match: word` to a field. This changes the document processing pipeline before documents arrive in the backend, and what annotations are made for an indexed field. <Danger>**Important:** If not reindexing after such a change, serving works, but recall is undefined as the index has been produced using one match mode while run-time is using a different match mode.</Danger> | +| Add or remove a new non-attribute document field from document summary | A change adding an [index or summary](#document-fields) field (without [attribute](/en/content/attributes)) with a new name to a summary class using [source](#source) requires re-index: <CodeBlock>```field artist type string { indexing: summary \| index } document-summary rename-summary { summary artist_name { source: artist } } ``` </CodeBlock> Also see [attribute](#add-or-remove-an-existing-document-field-from-document-summary) fields. | + +Example: Given a content cluster *mycluster* with mode + +```js +schema test { + document test { + field f1 type string { indexing: summary } + } +} +``` + +Then add field `f1` as an index: + +```js +schema test { + document test { + field f1 type string { indexing: index | summary } + } +} +``` + +The following is output from `vespa prepare` - which reindex actions are needed: + +```js +WARNING: Change(s) between active and new application that require re-index: +Reindex document type 'test' in cluster 'mycluster' because: + 1) Document type 'test': Field 'f1' changed: add index aspect, indexing script: '{ input f1 | summary f1; }' -> '{ input f1 | tokenize normalize stem:"SHORTEST" | index f1 | summary f1; }' +``` + +### Changes that require re-feed + +All the changes listed below require re-feeding of all documents. Unless a change is listed in the above sections, treat it as if it were listed here. Until re-feed is complete, affected fields will be empty or have potentially wrong annotations not matching the query processing. Procedure: + +1. Run `vespa prepare` on the changed application. Output specifies which re-feed actions are needed +2. Stop feeding, wait until done +3. Run `vespa activate` +4. Re-feed all documents + +Changes: + +| Change | Description | +| --- | --- | +| **Change a document field's data type or collection type** | Existing documents will no longer have any content for this field. To populate the field, re-feed the existing documents using the new type for this field. There will be no automatic conversion from old to new field type. <Danger>**Important:** If not re-feeding after such a change, serving works, but searching this field will not give any results.</Danger> | +| Change a tensor attribute's tensor type | | + +Example: Given a content cluster *mycluster* with mode *index*: + +```js +schema test { + document test { + field f1 type string { indexing: summary } + } +} +``` +Then change field `f1` to hold an int: +```js +schema test { + document test { + field f1 type int { indexing: summary } + } +} +``` +The following is output from `vespa prepare` - which re-feed actions are needed: +```js +WARNING: Change(s) between active and new application that require re-feed: +Re-feed document type 'test' in cluster 'mycluster' because: + 1) Document type 'test': Field 'f1' changed: data type: 'string' -> 'int' +``` \ No newline at end of file diff --git a/mintlify-docs/en/reference/security/mtls.mdx b/mintlify-docs/en/reference/security/mtls.mdx new file mode 100644 index 0000000000..55536997d1 --- /dev/null +++ b/mintlify-docs/en/reference/security/mtls.mdx @@ -0,0 +1,175 @@ +--- +# Copyright Vespa.ai. All rights reserved. +title: "Mutually authenticated TLS (mTLS) reference" +sidebarTitle: "Mtls" +--- + + +<Note> +**Note:** + +See [Securing a self-hosted Vespa application with mutually authenticated TLS (mTLS)](/en/security/mtls) for a practical guide. +</Note> + + +## Environment variables + + +| Name | Description | +| :--- | :--- | +| VESPA_TLS_CONFIG_FILE | Absolute path JSON configuration file with TLS configuration. | +| VESPA_TLS_INSECURE_MIXED_MODE |Enables TLS mixed mode. See [TLS Mixed mode](#tls-mixed-mode) for possible values.| + + +### TLS mixed mode + +Possible TLS mixed mode settings for `VESPA_TLS_INSECURE_MIXED_MODE`: + + +| Name | Description | +| :--- | :--- | +| plaintext_client_mixed_server | Clients do not use TLS, servers accept both TLS and plaintext clients. | +| tls_client_mixed_server | Clients use TLS, servers accept both TLS and plaintext clients. | +| tls_client_tls_server | All clients and servers use TLS only. | + + +### Configuration file + +The TLS configuration file contains a single top-level JSON object. + +#### Top-level elements + +| Name | Required | Description | +| :--- | :--- | :--- | +| [files](#the-files-element)| Yes | JSON object containing file system paths crypto material. | +| authorized-peers | No | JSON array of [authorized-peer](#the-authorized-peer-element) objects. Authorization engine is disabled if not specified. See dedicated [section](#peer-authorization-rules) on how to create peer authorization rules. | +| accepted-ciphers | No | JSON array of accepted TLS cipher suites. See [here](#cipher-suites) for cipher suites enabled by default. You can only specify a *subset* of the default cipher suites. *This is an expert option*—use the default unless you have good reasons not to. | +| accepted-protocols | No | JSON array of accepted TLS protocol versions. See [here](#protocol-versions) for TLS versions enabled by default. You can only specify a *subset* of the default protocol versions. *This is an expert option*—use the default unless you have good reasons not to. | +| disable-hostname-validation | No |Disables TLS/HTTPS hostname validation. Enabled by default (default value false).| + + + +#### The *files* element + +| Name | Required | Description | +| :--- | :--- | :--- | +| private-key | Yes | Absolute path to file containing the private key in PKCS#8 PEM format. | +| certificate | Yes | Absolute path to file containing X.509 certificate chain (including any intermediate certificates). Certificates must be encoded in PEM format separated by newlines. | +| ca-certificates | Yes | Absolute path to file containing all trusted X.509 Certificate Authorities. Certificates must be encoded in PEM format separated by newlines. | + + +#### The *authorized-peer* element + +| Name | Required | Description | +| :--- | :--- | :--- | +| required-credentials | Yes | A JSON array specifying each [credential requirement](#the-required-credential-element) for this particular rule. | +| name | Yes | Name of the rule. | +| description | No | Description of the rule. | + + +#### The *required-credential* element + +| Name | Required | Description | +| :--- | :--- | :--- | +| field | Yes | Certificate field. Possible values: *CN*, *SAN\_DNS*, *SAN\_URI*. | +| must-match | Yes | String containing a "glob"-style pattern. | + + + +#### Peer authorization rules + +The `authorized-peers` member is an array of credential rule-set objects. For a peer to be considered authorized its certificate MUST match at least one rule set completely. + +Each rule set must contain a `required-credentials` array of credential matchers. For a certificate to match a rule set it MUST match all its credential matchers. + +A credential is matched by checking a pattern given in `must-match` against a specified certificate `field`. The following fields are currently supported: +* *CN* - the Common Name part of the certificate's Distinguished Name information. If multiple CN entries are present, the last one will be considered. +* *SAN_DNS* - a Subject Alternate Name with type DNS. A certificate may contain many SAN entries. If so, all entries are checked and the credential is considered a match if at least one entry matches. +* *SAN_URI* - a Subject Alternate Name with type URI. It is similar to *SAN_DNS* but with slightly different pattern matching semantics. + +For *CN* and *SAN_DNS* fields, the `must-match` pattern is a "glob"-style pattern with the following semantics: +* `*` matches 0-n non-dot characters within a single dot-separated hostname part. This is similar to the wildcards used by certificates for HTTPS hostname validation. Examples + * `*.baz` matches `bar.baz` but not `foo.bar.baz` or `foo.baz.bar`. + * `*.*.baz` matches `foo.bar.baz` but not `bar.baz`. + * `*-myservice` matches `foo-myservice` but not `bar.foo-myservice`. +* `?` matches exactly 1 non-dot character within a single dot-separated hostname part. Examples: + * `?.bar` matches `x.bar` but not `bar`, `.bar` or `yx.bar`. + * `?.?.baz` matches `x.y.baz` but not `x.baz` or `xx.yy.baz`. + +For *SAN_URI* fields, `must-match` is also a "glob"-style pattern, with some deviation compared to *CN*/*SAN_DNS*: +* The `*` wildcard matches 0-n non-slash characters. A `/` is used as separator between the host and path components, as well as separator between path segments. Examples: + * `vespa://myapp/content/*` matches `vespa://myapp/content/node1` but not `vespa://myapp/container/node1` or `vespa://myapp/content/node1/myservice`. + * `vespa://*/*/*` matches `vespa://myapp/content/node1` but not `vespa://myapp/content` or `vespa://myapp/content/node1/myservice`. +* `?` in a pattern has no special behaviour - it only matches the `?` literal. URIs use `?` as separator between the path and query components. + +The description field is optional and is useful for e.g. documenting why a particular ruleset is present. It has no semantic meaning to the authorization engine. + +Not providing the `authorized-peers` field means only certificate validity is used for authorization. If the `authorized-peers` field is provided, it must contain at least one entry. + +#### Example + +```json expandable +{ + "files": { + "ca-certificates": "/absolute/path/to/ca-certs.pem", + "certificates": "/absolute/path/to/host-certs.pem", + "private-key": "/absolute/path/to/private-key.pem", + "disable-hostname-validation": false + }, + "authorized-peers": [ + { + "required-credentials": [ + { "field": "CN", "must-match": "vespa-monitoring.example.com" }, + { "field": "SAN_DNS", "must-match": "*.us-east-*.monitor.example.com" } + ], + "description": "Backend monitoring service access", + "name": "monitoring" + }, + { + "required-credentials": [ + { "field": "SAN_DNS", "must-match": "*.mycluster.vespa.example.com" } + ], + "description": "Cluster-internal node P2P access", + "name": "cluster" + } + ] +} +``` +## TLS features supported by Vespa + +Vespa is built with modern, high-performance cryptography libraries. For security reasons, the Vespa TLS stack has some additional constraints that are always present: +* `TLSv1.2` is the oldest TLS version that can be negotiated. +* Only cipher suites supporting [forward secrecy](https://en.wikipedia.org/wiki/Forward_secrecy) can be negotiated (i.e. cipher suites using ECDHE as part of their key exchange). +* Only modern, symmetric ciphers with [AEAD](https://en.wikipedia.org/wiki/Authenticated_encryption#Authenticated_encryption_with_associated_data) properties are supported. In practice this means [AES-GCM](https://en.wikipedia.org/wiki/AES-GCM) or [ChaCha20-Poly1305](https://en.wikipedia.org/wiki/ChaCha20-Poly1305). Supported cipher suites are listed [here](#cipher-suites). +* TLS compression is explicitly disabled to mitigate [CRIME](https://en.wikipedia.org/wiki/CRIME)/BREACH-style compression oracle attacks. +* TLS renegotiation is explicitly disabled. +* TLS session resumption is explicitly disabled, as this opens up some potential vulnerabilities related to replay attacks. Note that the Vespa application container edge does support session resumption, due to needing to support many frequent, short-lived connections from proxies and clients. + + +<Note> +**Note:** + +The above assumes you are using a Vespa version built with our `vespa_openssl` package (which is the case for all Open-Source RPMs and Docker images). If you are doing a custom build, it is highly recommended building against OpenSSL 1.1.1 or newer. Older versions may have performance regressions or reduced crypto functionality. OpenSSL versions prior to 1.0.1 are not supported. +</Note> + + +### Default TLS protocol settings + +Vespa will by default use the following TLS configuration (unless overridden by `accepted-ciphers` / `accepted-protocols`). + +#### Protocol versions + +* `TLSv1.3` - _note:_ due to certain limitations in the current Java runtime, TLSv1.3 is only supported by the C++ backends for now. We will revisit this in the near future to ensure Java defaults to TLSv1.3 as well. +* `TLSv1.2` + +#### Cipher suites + +* `TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384` +* `TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384` +* `TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256` (JDK 12+) +* `TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256` +* `TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256` +* `TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256` +* `TLS_AES_128_GCM_SHA256` (TLSv1.3) +* `TLS_AES_256_GCM_SHA384` (TLSv1.3) +* `TLS_CHACHA20_POLY1305_SHA256` (TLSv1.3, JDK 12+) diff --git a/mintlify-docs/en/reference/writing/document-selector-language.mdx b/mintlify-docs/en/reference/writing/document-selector-language.mdx new file mode 100644 index 0000000000..3c23a0b2e0 --- /dev/null +++ b/mintlify-docs/en/reference/writing/document-selector-language.mdx @@ -0,0 +1,301 @@ +--- +title: "Document selector language reference" +sidebarTitle: "document selector language" +--- + +This document describes the _document selector language_, used to select a subset of documents when feeding, dumping and garbage collecting data. It defines a text string format that can be parsed to build a parse tree, which in turn can answer whether a given document is contained within the subset or not. + +## Examples + +Match all documents in the `music` schema: + +`music` + +As applications can have multiple schemas, match document type (schema) and then a specific value in the `artistname` field: + +`music and music.artistname == "Coldplay"` + +Below, the first condition states that the documents should be of type music, and the author field must exist. The second states that the field length must be set, and be less than or equal to 1000: + +`music.author and music.length <= 1000` + +The next expression selects all documents where either of the subexpressions are true. The first one states that the author field should include the name John Doe, with anything in between or in front. The `\n` escape is converted to a newline before the field comparison is done. Thus requiring the field to end with Doe and a newline for a match to be true. The second expression selects all books where no author is defined: + +`book.author = "*John*Doe\n" or not book.author` + +Here is an example of how parentheses are used to group expressions. Also, a constant value false has been used. Note that the `(false or music.test)` sub-expression could be exchanged with just `music.test` without altering the result of the selection. The sub-expression within the `not` clause selects all documents where the size field is above 1000 and the test field is defined. The `not` clause inverts the selection, thus selecting all documents with size less than or equal to 1000 or the test field undefined: + +`not (music.length > 1000) and (false or music.test)` + +Other examples: + + - `music.version() == 3 and (music.givenname + " " + music.surname).lowercase() = "bruce spring*"` + - `id.user.hash().abs() % 300 % 7 = 1` + - `music.wavstream.hash() == music.checksum` + - `music.size / music.length > 10` + - `music.expire > now() - 7200` + +## Case sensitiveness + +The identifiers used in this language (`and or not true false null id scheme namespace specific user group`) are not case-sensitive. It is recommended to use lower cased identifiers for consistency with the documentation. + +## Branch operators / precedence + +The branch operators are used to combine other nodes in the parse tree generated from the text format. The different branch nodes existing is listed in the table below in order of precedence. Operators listed in order of precedence: + +| Operator | Description | +| :--- | :--- | +| NOT | Unary prefix operator inverting the selection of the child node | +| AND | Binary infix operator, which is true if all its children are | +| OR | Binary infix operator, which is true if any of its children are | + +Use parentheses to define own precedence. `a and b or c and d` is equivalent to `(a and b) or (c and d)` since and has higher precedence than or. The expression `a and (b or c) and d` is not equivalent to the previous two, since parentheses have been used to force the or-expression to be evaluated first. + +Parentheses can also be used in value calculations. Where modulo `%` has the highest precedence, multiplication `*` and division `/` next, addition `+` and subtractions `-` have lowest precedence. + +## Primitives + +| Primitive | Description | +| :--- | :--- | +| Boolean constant | The boolean constants `true` and `false` can be used to match all/nothing | +| Null constant | Referencing a field that is not present in a document returns a special `null` value. The expression `music.title` is shorthand for `music.title != null`. There are potentially subtle interactions with null values when used with comparisons, see [comparisons with missing fields (null values)](#comparisons-with-missing-fields-null-values). | +| Document type | A document type can be used as a primitive to select a given type of documents - [example](/en/writing/visiting#analyzing-field-values). | +| Document field specification | A document field specification (`doctype.field`) can be used as a primitive to select all documents that have field set - a shorter form of `doctype.field != null` | +| Comparison | The comparison is a primitive used to compare two values | + +## Comparison + +Comparisons operators compares two values using an operator. All the operators are infix and take two arguments. + +| Operator | Description | +| --- | --- | +| \> | This is true if the left argument is greater than the right one. Operators using greater than or less than notations only makes sense where both arguments are either numbers or strings. In case of strings, they are ordered by their binary (byte-wise) representation, with the first character being the most significant and the last character the least significant. If the argument is of mixed type or one of the arguments are not a number or a string, the comparison will be invalid and not match. | +| \< | Matches if left argument is less than the right one | +| \<= | Matches if the left argument is less than or equal to the right one | +| \>= | Matches if the left argument is greater than or equal to the right one | +| == | Matches if both arguments are exactly the same. Both arguments must be of the same type for a match | +| != | Matches if both arguments are not the same | +| = | String matching using a glob pattern. Matches only if the pattern given as the right argument matches the whole string given by the left argument. Asterisk `*` can be used to match zero or more of any character. Question mark `?` can be used to match any one character. The pattern matching operators, regex `=~` and glob `=`, only makes sense if both arguments are strings. The regex operator will never match anything else. The glob operator will revert to the behaviour of `==` if both arguments are not strings. | +| =~ | String matching using a regular expression. Matches if the regular expression given as the right argument matches the string given as the left argument. Regex notation is like perl. Use '^' to indicate start of value, '$' to indicate end of value | + +### Comparisons with missing fields (null values) + +The only comparison operators that are well-defined when one or both operands may be `null` (i.e. field is not present) are `==` and `!=`. Using any other comparison operators on a `null` value will yield a special _invalid_ value. + +Invalid values may "poison" any logical expression they are part of: + +- `AND` returns invalid if none of its operands are false and at least one is invalid +- `OR` returns invalid if none of its operands are true and at least one is invalid +- `NOT` returns invalid if the operand is invalid + +If an invalid value is propagated as the root result of a selection expression, the document is not considered a match. This is usually the behavior you want; if a field does not exist, any selection requiring it should not match either. However, in garbage collection, documents which results in an invalid selection are _not_ removed as that could be dangerous. + +One example where this may have _unexpected_ behavior: + +1. You have many documents of type `foo` already fed into a cluster. +2. You add a new field `expires_at_time` to the document type and update a subset of the documents that you wish to keep. +3. You add a garbage collection selection to the `foo` document declaration to only keep non-expired documents: `foo.expires_at_time > now()` + +At this point, the old documents that _do not_ contain an `expires_at_time` field will _not_ be removed, as the expression will evaluate to invalid instead of `false`. + +To work around this issue, "short-circuiting" using a field presence check may be used: `(foo.expires_at_time != null) and (foo.expires_at_time > now())`. + +## Null behavior with imported fields + +If your selection references imported fields, `null` will be returned for any imported field when the selection is evaluated in a context where the referenced document can't be retrieved. For GC expressions this will happen in the client as part of the feed routing logic, and it may also happen on backend nodes whose parent document set is incomplete (in case of node failures etc.). It is therefore important that you have this in mind when writing GC selections using imported fields. + +When you specify a selection criteria in a `<document>` tag, you're stating what a document must satisfy in order to be fed into the content cluster and to be kept there. + +As an example, imagine a document type `music_recording` with an imported field `artist_is_cool` that points to a boolean field `is_cool` in a parent `artist` document. If you only want your cluster to retain recordings from artists that are certifiably cool, you might be tempted to write a selection like the following: + +```js +<document type="music_recording" + mode="index" + selection="music_recording.artist_is_cool == true"> +``` + +**This won't work as expected**, because this expression is evaluated as part of the feeding pipeline to figure out if a cluster should accept a given document. At that point in time, there is no access to the parent document. Consequently, the field will return `null` and the document won't be routed to the cluster. + +Instead, write your expressions to handle the case where the parent document _may not exist_: + +```js +<document type="music_recording" + mode="index" + selection="(music_recording.artist_is_cool == null) or (music_recording.artist_is_cool == true)"> +``` + +With this selection, we explicitly let a document be accepted into the cluster if its imported field is _not_ available. However, if it _is_ available, we allow it to be used for GC. + +## Locale / Character sets + +The language currently does not support character sets other than ASCII. Glob and regex matching of single characters are not guaranteed to match exactly one character, but might match a part of a character represented by multiple byte values. + +## Values + +The comparison operator compares two values. A value can be any of the following: + +| | | +| :--- | :--- | +|Document field specification | Syntax: `<doctype>.<fieldpath>` <br/> Documents have a set of fields defined, depending on the document type. The field name is the identifier used for the field. This expression returns the value of the field, which can be an integer, a floating point number, a string, an array, or a map of these types. <br/> For multivalues, we support only the *equals* operator for comparison. The semantics is that the array returned by the fieldvalue must *contain* at least one element that matches the other side of the comparison. For maps, there must exist a key matching the comparison. <br/> The simplest use of the fieldpath is to specify a field, but for complex types please refer to [the field path syntax documentation](/en/reference/schemas/document-field-path). | +| Id | Syntax: ` id.[scheme\|namespace\|type\|specific\|user\|group] ` <br/> Each document has a document ID, uniquely identifying that document within a Vespa installation. The id operator returns the string identifier, or if an optional argument is given, a part of the id. <br/> - scheme (id) <br/> - namespace (to separate different users' data) <br/> - type (specified in the id scheme) <br/> - specific (User specified part to distinguish documents within a namespace) <br/> - user (The number specified in document IDs using the n= modifier) <br/> - group (The string group specified in document IDs using the g= modifier) | +| null | The value null can be given to specify nothingness. For instance, a field specification for a document not containing the field will evaluate to null, so the comparison 'music.artist == null' will select all documents that don't have the artist field set. 'id.user == null' will match all documents that don't use the `n=` [document ID scheme](/en/schemas/documents#id-scheme). Tensor fields can *only* be compared against null. It's not possible to write a document selection that uses the *contents* of tensor fields—only their presence can be checked. | +| Number | A value can be a number, either an integer or a floating point number. Type of number is insignificant. You don't have to use the same type of number on both sides of a comparison. For instance '3.0 < 4' will match, and '3.0 == 3' will probably match (operator == is generally not advised for floating point numbers due to rounding issues). Numbers can be written in multiple ways - examples: <CodeBlock>1234 -234 +53 +534.34 543.34e4 -534E-3 0.2343e-8</CodeBlock> | +| Strings| A string value is given quoted with double quotes (i.e. "mystring"). The string is interpreted as an ASCII string. Only ASCII values 32 to 126 can be used unescaped, except for the characters `\` and `"` which must be escaped. <ul><li>Newline: `\n`</li><li>Carriage return: `\r`</li><li>Tab: `\t`</li><li>Form feed: `\f`</li><li>Quotation mark (`"`): `\"`</li><li>Any other character: `\x##` (where `##` is a two-digit hexadecimal number specifying the ASCII value)</li></ul>| + +### Value arithmetics + +You can do arithmetics on values. The common arithmetics operators addition `+`, subtraction `-`, multiplication `*`, division `/` and modulo `%` are supported. + +### Functions + +Functions are called on something and returns a value that can be used in comparison expressions: + +| | | +| :---| :--- | +| Value functions | A value function takes a value, does something with it and returns a value which can be of any type. <br/> - *abs()* Called on a numeric type, returns the absolute value of that numeric type. That is -3 returns 3 and -4.3 returns 4.3. <br/> - *hash()* Calculates an MD5 hash of whatever value it is called on. The result is a signed 64-bit integer. (Use abs() after if you want to only get positive hash values). <br/> - *lowercase()* Called on a string value to turn upper case characters into lower case ones. <Note>**NOTE:** This only works for the characters 'a' through 'z', no locale support.</Note> | +| Document type functions | Some functions can take a document type instead of a value, and return a value based on the type. <br/> - *version()* The `version()` function returns the version number of a document type. | + +#### Now function + +Document selection provides a _now()_ function, which returns the current date timestamp. Use this to filter documents by age, typically for [garbage collection](/en/reference/applications/services/content#documents). + +**Example**: If you have a long field _inserttimestamp_ in your `music` schema, this expression will only match documents from the last two hours: + +`music.inserttimestamp > now() - 7200` + +## Using imported fields in selections + +When using [parent-child](/en/schemas/parent-child) you can refer to simple imported fields (i.e. top-level primitive fields) in selections as if they were regular fields in the child document type. Complex fields (collections, structures etc.) are not supported. + +<Danger> + **Important:** special care needs to be taken when using document selections referencing imported fields, especially if using these are part of garbage collection expressions. If an imported field references a document that cannot be accessed at evaluation time, the imported field behaves as if it had been a regular, non-present field in the child document. In other words, it will return the special `null` value. +</Danger> + +See [comparisons with missing fields (null values)](#comparisons-with-missing-fields-null-values) for a more detailed discussion of null-semantics and how to write selections that handle these in a well-defined manner. In particular, read [null behavior with imported fields](#null-behavior-with-imported-fields) if you're writing GC selections. + +### Example + +The following is an example of a 3-level parent-child hierarchy. + +Grandparent schema: + +```js +schema grandparent { + document grandparent { + field a1 type int { + indexing: attribute | summary + } + } +} +``` + +Parent schema, with reference to grandparent: + +```js +schema parent { + document parent { + field a2 type int { + indexing: attribute | summary + } + field ref type reference<grandparent> { + indexing: attribute | summary + } + } + import field ref.a1 as a1 {} +} +``` + +Child schema, with reference to parent and (transitively) grandparent: + +```js +schema child { + document child { + field a3 type int { + indexing: attribute | summary + } + field ref type reference<parent> { + indexing: attribute | summary + } + } + import field ref.a1 as a1 {} + import field ref.a2 as a2 {} +} +``` + +Using these in document selection expressions is easy: + +Find all child docs whose grandparents have an `a1` greater than 5: + +`child.a1 > 5` + +Find all child docs whose parents have an `a2` of 10 and grandparents have `a1` of 4: + +`child.a1 == 10 and child.a2 == 4` + +Find all child docs where the parent document cannot be found (or where the referenced field is not set in the parent): + +`child.a2 == null` + +Note that when visiting `child` documents we only ever access imported fields via the **child** document type itself. + +A much more complete list usage examples for the above document schemas and reference relations can be found in the [imported fields in selections](https://github.com/vespa-engine/system-test/blob/master/tests/search/parent_child/imported_fields_in_selections.rb) system test. This test covers both the visiting and GC cases. + +## Constraints + +Language identifiers restrict what can be used as document type names. The following values are not valid document type names: _true, false, and, or, not, id, null_ + +## Grammar - EBNF of the language + +To simplify, double casing of strings has not been included. The identifiers "null", "true", "false" etc. can be written in any case, including mixed case. + +```js expandable +nil = "null" ; +bool = "true" | "false" ; +posdigit = '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' ; +digit = '0' | posdigit ; +hexdigit = digit | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' + | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' ; +integer = ['-' | '+'], posdigit, { digit } ; +float = ['-' | '+'], digit, { digit }, + ['.' , { digit }, [ ('e' | 'E'), posdigit, { digit }] ] ; +number = float | integer ; +stdchars = ? All ASCII chars except '\\', '"', 0 - 31 and 127 - 255 ? ; +alpha = ? ASCII characters in the range a-z and A-Z ? ; +alphanum = alpha | digit ; +space = ( ' ' | '\t' | '\f' | '\r' | '\n' ) ; +string = '"', { stdchars | ( '\\', ( 't' | 'n' | 'f' | 'r' | '"' ) ) + | ( "\\x", hexdigit, hexdigit ) }, '"' ; +doctype = alpha, { alphanum } ; +fieldname = { alphanum '{' |'}' | '[' | ']' '.' } ; +function = alpha, { alphanum } ; +idarg = "scheme" | "namespace" | "type" | "specific" | "user" | "group" ; +searchcolumnarg = integer ; +operator = ">=" | ">" | "==" | "=~" | "=" | "<=" | "<" | "!=" ; +idspec = "id", ['.', idarg] ; +searchcolumnspec = "searchcolumn", ['.', searchcolumnarg] ; +fieldspec = doctype, ( function | ('.', fieldname) ) ; +value = ( valuegroup | nil | number | string | idspec | searchcolumnspec | fieldspec ), + { function } ; +valuefuncmod = ( valuegroup | value ), '%', + ( valuefuncmod | valuegroup | value ) ; +valuefuncmul = ( valuefuncmod | valuegroup | value ), ( '*' | '/' ), + ( valuefuncmul | valuefuncmod | valuegroup | value ) ; +valuefuncadd = ( valuefuncmul | valuefuncmod | valuegroup | value ), + ( '+' | '-' ), + ( valuefuncadd | valuefuncmul | valuefuncmod | valuegroup + | value ) ; +valuegroup = '(', arithmvalue, ')' ; +arithmvalue = ( valuefuncadd | valuefuncmul | valuefuncmod | valuegroup + | value ) ; +comparison = arithmvalue, { space }, operator, { space }, + arithmvalue ; +leaf = bool | comparison | fieldspec | doctype ; +not = "not", { space }, ( group | leaf ) ; +and = ( not | group | leaf ), { space }, "and", { space }, + ( and | not | group | leaf ) ; +or = ( and | not | group | leaf ), { space }, "or", { space }, + ( or | and | not | group | leaf ) ; +group = '(', { space }, ( or | and | not | group | leaf ), + { space }, ')' ; +expression = ( or | and | not | group | leaf ) ; +``` \ No newline at end of file diff --git a/mintlify-docs/en/reference/writing/indexing-language.mdx b/mintlify-docs/en/reference/writing/indexing-language.mdx new file mode 100644 index 0000000000..e625f324c5 --- /dev/null +++ b/mintlify-docs/en/reference/writing/indexing-language.mdx @@ -0,0 +1,138 @@ +--- +title: "Indexing language reference" +sidebarTitle: "indexing language" +--- + +This reference documents the full Vespa _indexing language_. If more complex processing of input data is required, implement a [document processor](/en/applications/document-processors). + +The indexing language is analogous to UNIX pipes, in that statements consists of expressions separated by the _pipe_ symbol where the output of each expression is the input of the next. Statements are terminated by semicolon and are independent of each other (except when using variables). + +Find examples in the [indexing](/en/writing/indexing) guide. + +## Indexing script + +An indexing script is a sequence of [indexing statements](#indexing-statement) separated by a semicolon (`;`). A script is executed statement-by-statement, in order, one document at a time. + +Vespa derives one indexing script per search cluster based on the search definitions assigned to that cluster. As a document is fed to a search cluster, it passes through the corresponding [indexing cluster](/en/reference/applications/services/content#document-processing), which runs the document through its indexing script. Note that this also happens whenever the document is [reindexed](/en/operations/reindexing), so expressions such as [now](#now) must be thought of as the time the document was (last) _indexed_, not when it was _fed_. + +You can examine the indexing script generated for a specific search cluster by retrieving the configuration of the indexing document processor. + +```sh +$ vespa-get-config -i search/cluster.<cluster-name> -n vespa.configdefinition.ilscripts +``` + +The current _execution value_ is set to `null` prior to executing a statement. + +## Indexing statement + +An indexing statement is a sequence of [indexing expressions](#indexing-expression) separated by a pipe (`|`). A statement is executed expression-by-expression, in order. + +Within a statement, the execution value is passed from one expression to the next. + +The simplest of statements passes the value of an input field into an attribute: + +```sh +input year | attribute year; +``` + +The above statement consists of 2 expressions; `input year` and `attribute year`. The former sets the execution value to the value of the "year" field of the input document. The latter writes the current execution value into the attribute "year". + +## Indexing expression + +### Primitives + +A string, numeric literal and true/false can be used as an expression to explicitly set the execution value. Examples: `"foo"`, `69`, `true`). + +### Outputs + +An output expression is an expression that writes the current execution value to a document field. These expressions also double as the indicator for the type of field to construct (i.e. attribute, index or summary). It is important to note that you can not assign different values to the same field in a single document (e.g. `attribute | lowercase | +index` is **illegal** and will not deploy). + +| Expression | Description | +| :--- | :--- | +| `attribute` | Writes the execution value to the current field. During deployment, this indicates that the field should be stored as an attribute. | +| `index` | Writes the execution value to the current field. During deployment, this indicates that the field should be stored as an index field. | +| `summary` | Writes the execution value to the current field. During deployment, this indicates that the field should be included in the document summary. | + +### Arithmetics + +Indexing statements can contain any combination of arithmetic operations, as long as the operands are numeric values. In case you need to convert from string to numeric, or convert from one numeric type to another, use the applicable [converter](#converters) expression. The supported arithmetic operators are: + +| Operator | Description | +| :--- | :--- | +| `<lhs> + <rhs>` | Sets the execution value to the result of adding of the execution value of the `lhs` expression with that of the `rhs` expression. | +| `<lhs> - <rhs>` | Sets the execution value to the result of subtracting of the execution value of the `lhs` expression with that of the `rhs` expression. | +| `<lhs> * <rhs>` | Sets the execution value to the result of multiplying of the execution value of the `lhs` expression with that of the `rhs` expression. | +| `<lhs> / <rhs>` | Sets the execution value to the result of dividing of the execution value of the `lhs` expression with that of the `rhs` expression. | +| `<lhs> % <rhs>` | Sets the execution value to the remainder of dividing the execution value of the `lhs` expression with that of the `rhs` expression. | +| `<lhs> . <rhs>` | Sets the execution value to the concatenation of the execution value of the `lhs` expression with that of the `rhs` expression. If _both_ `lhs` and `rhs` are collection types, this operator will append `rhs` to `lhs` (if any operand is null, it is treated as an empty collection). If not, this operator concatenates the string representations of `lhs` and `rhs` (if any operand is null, the result is null). | + +You may use parenthesis to declare precedence of execution (e.g. `(1 + + 2) * 3`). This also works for more advanced array concatenation statements such as `(input str_a | split ',') . (input str_b | split + ',') | index arr`. + +### Converters + +These expressions let you convert from one data type to another. + +| Converter | Input | Output | Description | +| :--- | :--- | :--- | :--- | +| `binarize [threshold]` | Any tensor | Any tensor | Replaces all values in a tensor by 0 or 1. This takes an optional argument specifying the threshold a value needs to be larger than to be replaced by 1 instead of 0. The default threshold is 0. This is useful to create a suitable input to [pack\_bits](#pack_bits). | +| `embed [id] [args]` | String | A tensor | Invokes an [embedder](/en/reference/rag/embedding) to convert a text to one or more vector embeddings. The type of the output tensor is what is required by the following expression (as supported by the specific embedder). Arguments are given space separated, as in `embed colbert chunk`. The first argument and can be omitted when only a single embedder is configured. Any additional arguments are passed to the embedder implementation. If the same chunk expression with the same input occurs multiple times in a schema, its value will only be computed once. | +| `chunk id [args]` | String | A tensor | Invokes a which convert a string into an array of strings. Arguments are given space separated, as in `chunk fixed-length 512`. The id of the chunker to use is required and can be a chunker bundled with Vespa, or any chunker component added in the services.xml, see the [chunking reference](/en/reference/rag/chunking). Any additional arguments are passed to the chunker implementation. If the same chunk expression with the same input occurs multiple times in a schema, its value will only be computed once. | +| `hash` | String | int or long | Converts the input to a hash value (using SipHash). The hash will be int or long depending on the target field. | +| `pack_bits` | A tensor | A tensor | Packs the values of a binary tensor into bytes with 1 bit per value in big-endian order. The input tensor must have a single dense dimension. It can have any value type and any number of sparse dimensions. Values that are not 0 or 1 will be binarized with 0 as the threshold. <br/> The output tensor will have: <br/> - `int8` as the value type. <br/> - The dense dimension size divided by 8 (rounded upwards to integer). <br/> - The same sparse dimensions as before. <br/> The resulting tensor can be unpacked during ranking using [unpack\_bits](/en/reference/ranking/ranking-expressions#unpack-bits). A tensor can be converted to binary form suitable as input to this by the [binarize function](#binarize). | +| `to_array` | Any | Array\<inputType> | Converts the execution value to a single-element array. | +| `to_byte` | Any | Byte | Converts the execution value to a byte. This will throw a NumberFormatException if the string representation of the execution value does not contain a parseable number. | +| `to_double` | Any | Double | Converts the execution value to a double. This will throw a NumberFormatException if the string representation of the execution value does not contain a parseable number. | +| `to_float` | Any | Float | Converts the execution value to a float. This will throw a NumberFormatException if the string representation of the execution value does not contain a parseable number. | +| `to_int` | Any | Integer | Converts the execution value to an int. This will throw a NumberFormatException if the string representation of the execution value does not contain a parseable number. | +| `to_long` | Any | Long | Converts the execution value to a long. This will throw a NumberFormatException if the string representation of the execution value does not contain a parseable number. | +| `to_bool` | Any | Bool | Converts the execution value to a boolean type. If the input is a string it will become true if it is not empty. If the input is a number it will become true if it is != 0. | +| `to_pos` | String | Position | Converts the execution value to a position struct. The input format must be either a) `[N\|S]<val>;[E\|W]<val>`, or b) `x;y`. | +| `to_string` | Any | String | Converts the execution value to a string. | +| `to_uri` | String | Uri | Converts the execution value to a URI struct | +| `to_wset` | Any | WeightedSet\<inputType> | Converts the execution value to a single-element weighted set with default weight. | +| `to_epoch_second` | String | Long | Converts an ISO-8601 instant formatted String to Unix epoch (or Unix time or POSIX time or Unix timestamp) which is the number of seconds elapsed since January 1, 1970, UTC. The converter uses [java.time.Instant.parse](https://docs.oracle.com/en/java/javase/20/docs/api/java.base/java/time/Instant.html#parse\(java.lang.CharSequence\)) to parse the input string value. This will throw a DateTimeParseException if the input cannot be parsed. Examples: <br/> - `2023-12-24T17:00:43.000Z` is converted to `1703437243L` <br/> - `2023-12-24T17:00:43Z` is converted to `1703437243L` <br/> - `2023-12-24T17:00:43.431Z` is converted to `1703437243L` <br/> - `2023-12-24T17:00:43.431+00:00` is converted to `1703437243L` | + +### Other expressions + +The following are the unclassified expressions available: + +| Expression | Description | +| --- | --- | +| `_` | Returns the current execution value. This is useful, e.g., to prepend some other value to the current execution value, see [this example](/en/writing/indexing#execution-value-example). | +| `attribute <fieldName>` | Writes the execution value to the named attribute field. | +| `base64decode` | If the execution value is a string, it is base-64 decoded to a long integer. If it is not a string, the execution value is set to `Long.MIN_VALUE`. | +| `base64encode` | If the execution value is a long integer, it is base-64 encoded to a string. If it is not a long integer, the execution value is set to `null`. | +| `echo` | Prints the execution value to standard output, for debug purposes. | +| `flatten` | <Warning>**Deprecated:** Use [tokens](/en/reference/schemas/schemas#tokens) in the schema instead.</Warning> | +| `for_each { <script> }` | Executes the given indexing script for each element in the execution value. Here, element refers to each element in a collection, or each field value in a struct. | +| `generate [id] [args]` | Invokes a [field generator](/en/rag/document-enrichment) to generate a field valued from an input string. The argument is the id of the `FieldGenerator` component as described in [Document enrichment with LLMs](/en/rag/document-enrichment). If the same generate expression with the same input occurs multiple times in a schema, its value will only be computed once. | +| `get_field <fieldName>` | Retrieves the value of the named field from the execution value (which needs to be either a document or a struct), and sets it as the new execution value. | +| `get_language` | Retrieves the code of the last assigned or detected language, ur "un" for "unknown" if no language has been assigned or detected. Language is detected when a string is tokenized or embedded, so this can be used to retrieve the language detected by a previous field executing one such operation, e.g. by indexing. | +| `get_value <key>` | Retrieves a value from an input Map. The key can be a number, identifier or quoted string. If the given key does not have a value, the output is empty. | +| `get_var <varName>` | Retrieves the value of the named variable from the execution context and sets it as the execution value. Note that variables are scoped to the indexing script of the current field. | +| `hex_decode` | If the execution value is a string, it is parsed as a long integer in base-16. If it is not a string, the execution value is set to `Long.MIN_VALUE`. | +| `hex_encode` | If the execution value is a long integer, it is converted to a string representation of an unsigned integer in base-16. If it is not a long integer, the execution value is set to `null`. | +| `hostname` | Sets the execution value to the name of the host computer. | +| ` if (<left> <cmp> <right>) {     <trueScript> } [ else { <falseScript> } ] ` | Executes the `trueScript` if the conditional evaluates to true, or the `falseScript` if it evaluates to false. If either `left` or `right` is null, no expression is executed. The value produced is the value returned from the chosen branch, and these must produce values of compatible types (or none). | +| `index <fieldName>` | Writes the execution value to the named index field. | +| `input <fieldName>` | Retrieves the value of the named field from the document and sets it as the execution value. The field name may contain '.' characters to retrieve nested struct fields. | +| `join "<delim>"` | Creates a single string by concatenating the string representation of each array element of the execution value. This function is useful or indexing data from a [multivalue](/en/querying/searching-multivalue-fields) field into a singlevalue field. | +| `lowercase` | Lowercases all the strings in the execution value. | +| `ngram <size>` | Adds ngram annotations to all strings in the execution value. | +| `normalize` | [normalize](/en/linguistics/linguistics-opennlp#normalization) the input data. The corresponding query command for this function is `normalize`. | +| `now` | Outputs the current system clock time as a UNIX timestamp, i.e. seconds since 0 hours, 0 minutes, 0 seconds, January 1, 1970, Coordinated Universal Time (Epoch). | +| `random [ <max> ]` | Returns a random integer value. Lowest value is 0 and the highest value is determined either by the argument or, if no argument is given, the execution value. | +| ` sub-expression1 \|\| sub-expression2 \|\| ... ` | Returns the value of the first alternative sub-expression which returns a non-null value. See [this example](/en/writing/indexing#choice-example). | +| ` select_input { ( case <fieldName>: <statement>; )* } ` | Performs the statement that corresponds to the **first** named field that is not empty (see [example](/en/writing/indexing#select-input-example)). | +| `set_language` | Sets the language of this document to the string representation of the execution value. Parses the input value as an RFC 3066 language tag, and sets that language for the current document. This affects the behavior of the [tokenizer](/en/linguistics/linguistics-opennlp#tokenization). The recommended use is to have one field in the document containing the language code, and that field should be the **first** field in the document, as it will only affect the fields defined **after** it in the schema. Read [linguistics](/en/linguistics/linguistics#language-handling) for more information on how language settings are applied. | +| `set_var <varName>` | Writes the execution value to the named variable. Note that variables are scoped to the indexing script of the current field. | +| `substring <from> <to>` | Replaces all strings in the execution value by a substring of the respective value. The arguments are inclusive-from and exclusive-to. Both arguments are clamped during execution to avoid going out of bounds. | +| `split <regex>` | Splits the string representation of the execution value into a string array using the given regex pattern. This function is useful for creating [multivalue](/en/querying/searching-multivalue-fields) fields such as an integer array out of a string of comma-separated numbers. | +| `summary <fieldName>` | Writes the execution value to the named summary field. During deployment, this indicates that the field should be included in the document summary. | +| ` switch { ( case '<value>': <caseStatement>; )* [ default: <defaultStatement>; ] } ` | Performs the statement of the case whose value matches the string representation of the execution value (see [example](/en/writing/indexing#switch-example)). | +| `tokenize [ normalize ] [ stem ]` | Adds linguistic annotations to all strings in the execution value. Read [linguistics](/en/linguistics/linguistics) for more information. | +| `trim` | Removes leading and trailing whitespace from all strings in the execution value. | +| `uri` | Converts all strings in the execution value to a URI struct. If a string could not be converted, it is removed. | \ No newline at end of file diff --git a/mintlify-docs/en/schemas/concrete-documents.mdx b/mintlify-docs/en/schemas/concrete-documents.mdx new file mode 100644 index 0000000000..cc4f2ee2eb --- /dev/null +++ b/mintlify-docs/en/schemas/concrete-documents.mdx @@ -0,0 +1,151 @@ +--- +title: "Concrete documents" +--- + +In [document processing](/en/applications/document-processors), `setFieldValue()` and `getFieldValue()` is used to access fields in a `Document`. The data for each of the fields in the document instance is wrapped in field values. If the documents use structs, they are handled the same way. Example: + + +```java +book.setFieldValue("title", new StringFieldValue("Moby Dick")); +``` + +Alternatively, use code generation to get a *concrete document type*, a `Document` subclass that represents the exact document type (defined for example in the file `book.sd`). To generate, include it in the build, plugins section in `pom.xml`: + +```xml +<plugin> + <groupId>com.yahoo.vespa</groupId> + <artifactId>vespa-documentgen-plugin</artifactId> + <!-- Find latest version at search.maven.org/search?q=g:com.yahoo.vespa%20a:vespa-documentgen-plugin --> + <version>8.689.26</version> + <configuration> + <schemasDirectory>etc/schemas</schemasDirectory> + </configuration> + <executions> + <execution> + <id>document-gen</id> + <goals> + <goal>document-gen</goal> + </goals> + </execution> + </executions> +</plugin> +``` + +`schemasDirectory` contains the [schemas](/en/reference/schemas/schemas). Generated classes will be in `target/generated-sources`. The document type `book` will be represented as the Java class `Book`, and it will have native methods for data access, so the code example above becomes: + +```java +book.setTitle("Moby Dick"); +``` + +<table> +<tr className='border-b border-gray-200'> + <th className='text-left'>Configuration</th> + <th className='text-left'>Description</th> +</tr> +<tr> + <td>**Java package**</td> + <td> + Specify the Java package of the generated types: + +```xml +<configuration> + <packageName>com.yahoo.mypackage</packageName> +</configuration> +``` + </td> +</tr> +<tr> + <td>**User provided annotation types**</td> + <td> + To provide the Java implementation of a given annotation type, yielding *behaviour of annotations* (implementing additional interfaces may be one scenario): + +```xml +<configuration> + <schemasDirectory>etc/schemas</schemasDirectory> + <provided> + <annotation> + <type>NodeImpl</type> + <clazz>com.yahoo.vespa.document.NodeImpl</clazz> + </annotation> + <annotation> + <type>DocumentImpl</type> + <clazz>com.yahoo.vespa.document.DocumentImpl</clazz> + </annotation> + </provided> +</configuration> +``` + +Here, the plugin will not generate a type for `NodeImpl` and `DocumentImpl`, but the `ConcreteDocumentFactory` will support them, so that code depending on this will work. + </td> +</tr> +<tr> + <td>**Abstract annotation types**</td> + <td> + Make a generated annotation type abstract: + +```xml +<configuration> + <abztract> + <annotation> + <type>myabstractannotationtype</type> + </annotation> + </abztract> +</configuration> +``` + </td> +</tr> +</table> + +## Inheritance + +If input document types use single inheritance, the generated Java types will inherit accordingly. However, if a document type inherits from more than one type (example: `document myDoc inherits base1, base2`), the Java type for `myDoc` will just inherit from `Document`, since Java has single inheritance. Refer to [schema inheritance](/en/schemas/inheritance-in-schemas) for examples. + +## Feeding + +Concrete types are often used in a docproc, used for feeding data into stateful clusters. To make Vespa use the correct type during feeding and serialization, include in `<container>` in [services.xml](/en/reference/applications/services/services): + +```xml highlight={2-4} +<container id="default" version="1.0"> + <document type="book" + bundle="the name in <artifactId> in your pom.xml" + class="com.yahoo.mypackage.Book"/> +``` + +Vespa will make the type `Book` and all other concrete document, annotation and struct types from the bundle available to the docproc(s) in the container. The specified bundle must be the `Bundle-SymbolicName`. It will also use the given Java type when feeding through a docproc chain. If the class is not in the specified bundle, the container will emit an error message about not being able to load `ConcreteDocumentFactory` as a component, and not start. There is no need to `Export-Package` the concrete document types from the bundle, a `package-info.java` is generated that does that. + +## Factory and copy constructor + +Along with the actual types, the Maven plugin will also generate a class `ConcreteDocumentFactory`, which holds information about the actual concrete types present. It can be used to initialize an object given the document type: + +```java +Book b = (Book) ConcreteDocumentFactory.getDocument("book", new DocumentId("id:book:book::0")); +``` + +This can be done for example during deserialization, when a document is created. The concrete types also have copy constructors that can take a generic `Document` object of the same type. The contents will be deep-copied: + +```java +Document bookGeneric; +// … +Book book = new Book(bookGeneric, bookGeneric.getId()); +``` + +All the accessor and mutator methods on `Document` will work as expected on concrete types. Note that `getFieldValue()` will *generate* an ad-hoc `FieldValue` *every time*, since concrete types don't use them to store data. `setFieldValue()` will pack the data into the native Java field of the type. + +## Document processing + +In a document processor, cast the incoming document base into the concrete document type before accessing it. + +Example: + +```java +public class ConcreteDocDocProc extends DocumentProcessor { + public Progress process(Processing processing) { + DocumentPut put = (DocumentPut) processing.getDocumentOperations().get(0); + Book b = (Book) (put.getDocument()); + b.setTitle("The Title"); + return Progress.DONE; + } +} +``` + +Concrete document types are not supported for document updates or removes. diff --git a/mintlify-docs/en/schemas/documents.mdx b/mintlify-docs/en/schemas/documents.mdx new file mode 100644 index 0000000000..4a9ad5e956 --- /dev/null +++ b/mintlify-docs/en/schemas/documents.mdx @@ -0,0 +1,210 @@ +--- +title: "Documents" +--- + +Vespa models data as *documents*. A document has a string identifier, set by the application, unique across all documents. A document is a set of [key-value pairs](/en/writing/document-api-guide). A document has a schema (i.e. type), defined in the [schema](/en/basics/schemas). + +When configuring clusters, a [documents](/en/reference/applications/services/content#documents) element sets what document types a cluster is to store. This configuration is used to configure the garbage collector if it is enabled. Additionally, it is used to define default routes for documents sent into the application. By default, a document will be sent to all clusters having the document type defined. Refer to [routing](/en/writing/document-routing) for details. + +Vespa uses the document ID to distribute documents to nodes. From the document identifier, the content layer calculates a numeric location. A bucket contains all the documents, where a given amount of least-significant bits of the location are all equal. This property is used to enable co-localized storage of documents - read more in [buckets](/en/content/buckets) and [content cluster elasticity](/en/content/elasticity). + +Documents can be [global](/en/reference/applications/services/content#document), see [parent/child](/en/schemas/parent-child). + +## Document IDs + +The document identifiers are URIs, represented by a string, which must conform to a defined URI scheme for document identifiers. The document identifier string may only contain *text characters*, as defined by `isTextCharacter` in [com.yahoo.text.Text](https://github.com/vespa-engine/vespa/blob/master/vespajlib/src/main/java/com/yahoo/text/Text.java). + +### id scheme + +Vespa currently has only one defined scheme, the *id scheme*: `id:<namespace>:<document-type>:<key/value-pair>:<user-specified>` + +<Note> +**Note:** + +An example mapping from ID to the URL used in [/document/v1/](/en/writing/document-v1-api-guide) is from `id:mynamespace:mydoctype::user-defined-id` to `/document/v1/mynamespace/mydoctype/docid/user-defined-id`. Find examples and tools in [troubleshooting](/en/writing/document-v1-api-guide#document-not-found). +</Note> + +Find examples in the [/document/v1/](/en/writing/document-v1-api-guide) guide. + +| Part | Required | Description | +| :--- | :--- | :--- | +| namespace | Yes | Not used by Vespa, see [below](/en/schemas/documents#namespace). | +| document-type | Yes | Document type as defined in [services.xml](/en/reference/applications/services/content#document) and the [schema](/en/reference/schemas/schemas). | +| key/value-pair | Optional | Modifiers to the id scheme, used to configure document distribution to [buckets](/en/content/buckets#document-to-bucket-distribution). With no modifiers, the id scheme distributes all documents uniformly. The key/value-pair field contains one of two possible key/value pairs; **n** and **g** are mutually exclusive: <br/> `n=*<number>*`<br/> Number in the range `[0,2^63-1]` - only for testing of abnormal bucket distributions <br/><br/> `g=<groupname>`<br/> The *groupname* string is hashed and used to select the storage location<br/><br/><Warning> **Important:** <br/>This is only useful for document types with [mode=streaming or mode=store-only](/en/reference/applications/services/content#document). Do not use modifiers for regular indexed document types.</Warning><br/><br/> See [streaming search](/en/performance/streaming-search). Using modifiers for regular indexed document will cause unpredictable feeding performance, in addition, search dispatch does not have support to limit the search to modifiers/buckets. | +| user-specified | Yes | A unique ID string. | + + +### Document IDs in search results + +The full document ID (as a string) will often contain redundant information and be quite long; a typical value may look like "id:mynamespace:mydoctype::user-specified-identifier", where only the last part is useful outside Vespa. The document ID is therefore not stored in memory by default and **not always present** in the [search results](/en/reference/querying/default-result-format#id). + +To enable storing the document IDs in memory, set [`documentid`](/en/reference/schemas/schemas#documentid) to `attribute` in the schema. This is possible since Vespa 8.691.19 . To return the document IDs from memory in the search results, configure a [document summary](/en/querying/document-summaries) like this: + +```js +schema music { + documentid: attribute + document music { + field ... + } + document-summary empty-summary { + summary documentid { + source: documentid + } + } + ... +``` + +Then, use `presentation.summary=empty-summary` in the query API. + +With the default `from-disk` setting for [`documentid`](/en/reference/schemas/schemas#documentid) in the schema, the document ID is stored on disk only. To return the value in the search results anyway, configure a [document summary](/en/querying/document-summaries) like this: + +```js +schema music { + document music { + field ... + } + document-summary empty-summary { + summary documentid { + source: documentid + } + from-disk + } + ... +``` + +The `from-disk` setting mutes a warning for document-summary disk access; use a higher query timeout when requesting many IDs like this. + +A more memory-efficient, but also more complicated alternative is to put your own unique identifier (usually the "user-specified-identifier" above) in a document field, typically named "myid" or "shortid" or similar: + +```js +field shortid type string { + indexing: attribute | summary +} +``` + +This enables using a [document summary](/en/querying/document-summaries) with only in-memory fields while still getting the identifier you actually care about. If the "user-specified-identifier" is just a simple number you could even use "type int" for this field for minimal memory overhead. + +## Namespace + +The namespace in document IDs is useful when you have multiple document collections that you want to be sure never end up with the same document ID. It has no function in Vespa beyond this, and can just be set to any short constant value like for example "doc". Consider also letting synthetic documents used for testing use namespace "test" so it's easy to detect and remove them if they are present outside the test by mistake. + +Example - if feeding + +- document A by `curl -X POST https:.../document/v1/first_namespace/my_doc_type/docid/shakespeare` +- document B by `curl -X POST https:.../document/v1/second_namespace/my_doc_type/docid/shakespeare` + +then those will be separate documents, both searchable, with different document IDs. The document ID differs not in the user specified part (this is `shakespeare` for both documents), but in the namespace part (`first_namespace` vs `second_namespace`). The full document ID for document A is `id:first_namespace:my_doc_type::shakespeare`. + +The namespace has no relation to other configuration elsewhere, like in *services.xml* or in schemas. It is just like the user specified part of each document ID in that sense. Namespace can not be used in queries, other than as part of the full document ID. However, it can be used for [document selection](/en/reference/writing/document-selector-language), where `id.namespace` can be accessed and compared to a given string, for instance. An example use case is [visiting](/en/writing/visiting) a subset of documents. + +## Fields + +Documents can have fields, see the [schema reference](/en/reference/schemas/schemas#field). + +A field can not be defined with a default value. Use a [choice ('||') indexing statement or a](/en/writing/indexing#choice-example) [document processor](/en/applications/document-processors) to assign a default to document put/update operations. + +## Fieldsets + +Use *fieldset* to limit the fields that are returned from a read operation, like *get* or *visit* - see [examples](/en/clients/vespa-cli#documents). Vespa may return more fields than specified if this does not impact performance. + +<Note> +**Note:** + +Document field sets is a different thing than [searchable fieldsets](/en/reference/schemas/schemas#fieldset). +</Note> + +There are two options for specifying a fieldset: + +- Built-in fieldset +- Name of a document type, then a colon ":", followed by a comma-separated list of fields (for example `music:artist,song` to fetch two fields declared in `music.sd`) + +Built-in fieldsets: + +| Fieldset | Description | +| :--- | :--- | +| [all] | Returns all fields in the schema (generated fields included) and the document ID. | +| [document] | Returns original fields in the document, including the document ID. | +| [none] | Returns no fields at all, not even the document ID. *Internal, do not use* | +| [id] | Returns only the document ID | +| `<document type>:[document]` | <Danger>**Deprecated:**<br/> Use `[document]`</Danger><br/> Same as `[document]` fieldset above: Returns only the original document fields (generated fields not included) together with the document ID. | + +If a built-in field set is not used, a list of fields can be specified. Syntax: + +```bash +<document type>:field1,field2,… +``` + +Example: + +```bash +music:title,artist +``` + +## Document expiry + +To auto-expire documents, use a [selection](/en/reference/applications/services/content#documents.selection) with [now](en/reference/writing/indexing-language#now). Example, set time-to-live (TTL) for *music*\-documents to one day, using a field called *timestamp*: + +```xml +<documents garbage-collection="true"> + <document type="music" + mode="index" + selection="music.timestamp > now() - 86400" /> +</documents> +``` + +<Note> +**Note:** + +The `selection` expression says which documents to *keep*, not which ones to delete. The *timestamp* field must have a value in seconds since EPOCH: +</Note> + +```js +field timestamp type long { + indexing: attribute + attribute { + fast-access + } +} +``` + +When `garbage-collection="true"`, Vespa iterates over the document space to purge expired documents. Vespa will invoke the configured GC selection for each stored document once every [garbage-collection-interval](/en/reference/applications/services/content#documents.selection) seconds. It is unspecified when a particular document will be processed within the configured interval. + +<Warning> +**Important:** + +This is a best-effort garbage collection feature to conserve CPU and space. Use query filters if it is important to exclude documents based on a criterion. +</Warning> + +- Using a *selection* with *now* can have side effects when re-feeding or re-processing documents, as timestamps can be stale. A common problem is feeding with too old timestamps, resulting in no documents being indexed. +- Normally, documents that are already expired at write time are not persisted. When using [create](/en/writing/document-v1-api-guide#create-if-nonexistent) (Create if nonexistent), it is possible to create documents that are expired and will be removed in next cycle. +- Deploying a configuration where the selection string selects no documents will cause all documents to be garbage collected. Use [visit](/en/writing/visiting) to test the selection string. Garbage collected documents are not to be expected to be recoverable. +- The fields that are referenced in the selection expression should be attributes. Also, either the fields should be set with *"fast-access"* or the number of [searchable copies](/en/reference/applications/services/content#searchable-copies) in the content cluster should be the same as the [redundancy](/en/reference/applications/services/content#redundancy). Otherwise, the document selection maintenance will be slow and have a major performance impact on the system. +- [Imported fields](/en/reference/schemas/schemas#import-field) can be used in the selection string to expire documents, but special care needs to be taken when using these. See [using imported fields in selections](/en/reference/writing/document-selector-language#using-imported-fields-in-selections) for more information and restrictions. +- Document garbage collection is a low priority background operation that runs continuously unless preempted by higher priority operations. If the cluster is too heavily loaded by client feed operations, there's a risk of starving GC from running. To verify that garbage collection is not starved, check the [vds.idealstate.max\_observed\_time\_since\_last\_gc\_sec.average](/en/operations/metrics) distributor metric. If it significantly exceeds `garbage-collection-interval` it is an indication that GC is starved. + +To batch remove, set a selection that matches no documents, like *"not music"* + +Use [vespa visit](/en/writing/visiting) to test the selection. Dump the IDs of all documents that would be *preserved*: + +```sh +$ vespa visit --selection 'music.timestamp > now() - 86400' --field-set "music.timestamp" +``` + +Negate the expression by wrapping it in a `not` to dump the IDs of all the documents that would be *removed* during GC: + +```sh +$ vespa visit --selection 'not (music.timestamp > now() - 86400)' --field-set "music.timestamp" +``` + +## Processing documents + +To process documents, use [Document processing](/en/applications/document-processors). Examples are enriching documents (look up data from other sources), transform content (like linguistic transformations, tokenization), filter data and trigger other events based on the input data. + +See the sample app [album-recommendation-docproc](https://github.com/vespa-engine/sample-apps/tree/master/examples/document-processing) for use of Vespa APIs like: + +- [Document API](/en/writing/document-api-guide) - work on documents and fields in documents, and create unit tests using the Application framework +- [Document Processing](/en/applications/document-processors) - chain independent processors with ordering constraints + +The sample app [vespa-documentation-search](https://github.com/vespa-cloud/vespa-documentation-search) has examples of processing PUTs or UPDATEs (using [create-if-nonexistent](/en/writing/document-v1-api-guide#create-if-nonexistent)) of documents in [OutLinksDocumentProcessor](https://github.com/vespa-cloud/vespa-documentation-search/blob/main/src/main/java/ai/vespa/cloud/docsearch/OutLinksDocumentProcessor.java). It is also in introduction to using [multivalued fields](/en/querying/searching-multivalue-fields) like arrays, maps and tensors. Use the [VespaDocSystemTest](https://github.com/vespa-cloud/vespa-documentation-search/blob/main/src/test/java/ai/vespa/cloud/docsearch/VespaDocSystemTest.java) to build code that feeds and tests an instance in the Vespa Developer Cloud / local Docker instance. + +Both sample apps also use the Document API to GET/PUT/UPDATE other documents as part of processing, using asynchronous [DocumentAccess](https://github.com/vespa-engine/vespa/blob/master/documentapi/src/main/java/com/yahoo/documentapi/DocumentAccess.java). Use this as a starting point for applications that enrich data when writing. diff --git a/mintlify-docs/en/schemas/exposing-schema-information.mdx b/mintlify-docs/en/schemas/exposing-schema-information.mdx new file mode 100644 index 0000000000..d3a773c4db --- /dev/null +++ b/mintlify-docs/en/schemas/exposing-schema-information.mdx @@ -0,0 +1,117 @@ +--- +title: "Exposing schema information" +description: "Some applications need to expose information about schemas to data plane clients. This document explains how to add an API for that to your application." +--- + +You need to know two things: + +- Your application can expose any custom API by implementing a [handler](/en/applications/request-handlers). +- Information about the deployed schemas are available in the component `com.yahoo.search.schema.SchemaInfo`. + +With this information, we can add an API exposing schemas information through the following steps. + +<Steps> +<Step title="Make sure your application package can contain Java components"> + +Application packages containing Java components must follow Maven layout. If your application package root contains a `pom.xml` and `src/main` you're good, otherwise convert it to this layout by copying the pom.xml from [the album-recommendation.java](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation-java) sample app and moving the files to follow this layout before moving on. + +</Step> +<Step title="Add a handler exposing schema info"> + +Add the following handler (to a package of your choosing): + +```java expandable +package ai.vespa.example; + +import com.yahoo.container.jdisc.HttpRequest; +import com.yahoo.container.jdisc.HttpResponse; +import com.yahoo.container.jdisc.ThreadedHttpRequestHandler; +import com.yahoo.jdisc.Metric; +import com.yahoo.search.schema.SchemaInfo; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.Charset; +import java.util.concurrent.Executor; + +public class SchemaInfoHandler extends ThreadedHttpRequestHandler { + + private final SchemaInfo schemaInfo; + + public SchemaInfoHandler(Executor executor, Metric metric, SchemaInfo schemaInfo) { + super(executor, metric); + this.schemaInfo = schemaInfo; + } + + @Override + public HttpResponse handle(HttpRequest httpRequest) { + // Creating JSON, handling different paths etc. left as an exercise for the reader + StringBuilder response = new StringBuilder(); + for (var schema : schemaInfo.schemas().values()) { + response.append("schema: " + schema.name() + "\n"); + for (var field : schema.fields().values()) + response.append(" field: " + field.name() + "\n"); + } + return new Response(200, response.toString()); + } + + private static class Response extends HttpResponse { + + private final byte[] data; + + Response(int code, byte[] data) { + super(code); + this.data = data; + } + + Response(int code, String data) { + this(code, data.getBytes(Charset.forName(DEFAULT_CHARACTER_ENCODING))); + } + + @Override + public String getContentType() { + return "application/json"; + } + + @Override + public void render(OutputStream outputStream) throws IOException { + outputStream.write(data); + } + + } + + private static class ErrorResponse extends Response { + ErrorResponse(int code, String message) { + super(code, "{\"error\":\"" + message + "\"}"); + } + } + +} +``` + +</Step> +<Step title="Add the new API handler to your container cluster"> + +In your `services.xml` file, under `<container>`, add: + +```xml +<handler id="ai.vespa.example.SchemaInfoHandler" bundle="[your pom.xml artifactId]"> + <binding>http://*/schema/v1/*</binding> +</handler> +``` + +</Step> +<Step title="Deploy the modified application"> +```sh +$ mvn install +$ vespa deploy +``` + +</Step> +<Step title="Verify that it works"> +```sh +$ vespa curl "schema/v1/" +``` + +</Step> +</Steps> diff --git a/mintlify-docs/en/schemas/inheritance-in-schemas.mdx b/mintlify-docs/en/schemas/inheritance-in-schemas.mdx new file mode 100644 index 0000000000..5915a2f039 --- /dev/null +++ b/mintlify-docs/en/schemas/inheritance-in-schemas.mdx @@ -0,0 +1,240 @@ +--- +title: "Inheritance in schemas" +description: "Both document types and full schemas can be inherited to make it easy to design a structured application package with little duplication. Document type inheritance defines a type hierarchy which is also useful for applications that [federate queries](/en/querying/federation) as queries can be written to the common supertype. This guide covers the different elements in the schema that supports inheritance:" +--- + +1. Schemas +2. Document types +3. Rank profiles +4. Document summaries + +<Frame>![Schema elements that support inheritance](/assets/img/inheritance-overview.svg)</Frame> + +<Note> +**Note:** + +Inheritance is not to be confused with [parent/child](/en/schemas/parent-child), which is a feature to import field values at query time. +</Note> + +## Schema inheritance + +A schema that inherits another gets all the content of the parent schema as if it was defined inside the inheriting schema. A schema that inherits another must also (explicitly) inherit its document type: + +```js +schema books inherits items { + document books inherits items { + field author type string { + indexing: summary | index + } + } +} +``` + +## Document type inheritance + +A document type can inherit another document type. This will include all fields, also fields declared outside the document block in the schema, rank-profiles defined in the super-schema can then be inherited in the schema of this document, see [Rank profile inheritance](#rank-profile-inheritance) below. + +Both schemas *music* and *books* have the *title* field through inheritance: + +`my-app/schemas/items.sd`: + +```js +document items { + field title type string { + indexing: summary | index + } +} +``` + +`my-app/schemas/books.sd`: + +```js +schema books { + document books inherits items { + field author type string { + indexing: summary | index + } + } +} +``` + +`my-app/schemas/music.sd`: + +```js +schema music { + document music inherits items { + field artist type string { + indexing: summary | index + } + } +} +``` + +This is equivalent to: + +`my-app/schemas/books.sd`: + +```js +schema books { + document books { + field title type string { + indexing: summary | index + } + field author type string { + indexing: summary | index + } + } +} +``` + +`my-app/schemas/music.sd`: + +```js +schema music { + document music { + field title type string { + indexing: summary | index + } + field artist type string { + indexing: summary | index + } + } +} +``` + +Notes: + +- Multiple inheritance and multiple levels of inheritance is supported. +- Inheriting a document type defined in another content cluster is allowed. +- Overriding fields defined in supertypes is not allowed. +- [Imported fields](/en/reference/schemas/schemas#import-field) defined in supertypes are not inherited. + +## Rank profile inheritance + +Where fields define the document types, rank profiles define the computations over the documents. Rank profiles can be inherited from rank-profiles defined in the same schema, or defined in another schema when this document inherits the document defined in the schema where the rank profile is defined: + +`my-app/schemas/items.sd`: + +```js +schema items { + document items { + field title type string { + indexing: summary | index + } + } + + rank-profile items_ranking_base { + function title_score() { + expression: fieldLength(title) + } + first-phase { + expression: title_score + } + summary-features { + title_score + } + } +} +``` + +`my-app/schemas/books.sd`: + +```js +schema books { + document books inherits items { + field author type string { + indexing: summary | index + } + } + + rank-profile items_ranking inherits items_ranking_base {} + + rank-profile items_subschema_ranking inherits items_ranking_base { + first-phase { + expression: title_score + fieldMatch(author) + } + summary-features inherits items_ranking_base { + fieldMatch(author) + } + } +} +``` + +`my-app/schemas/music.sd`: + +```js +schema music { + document music inherits items { + field artist type string { + indexing: summary | index + } + } + + rank-profile items_ranking inherits items_ranking_base {} + + rank-profile items_subschema_ranking inherits items_ranking_base { + first-phase { + expression: title_score + fieldMatch(artist) + } + summary-features inherits items_ranking_base { + fieldMatch(artist) + } + } +} +``` + +*items_ranking* can be considered the "base" ranking. Pro-tip: Set this as the *default* rank profile by modifying the default [query profile](/en/querying/query-profiles): + +`my-app/search/query-profiles/default.xml`: + +```xml +<query-profile id="default"> + <field name="ranking.profile">items_ranking</field> +</query-profile> +``` + +Queries using *ranking.profile=default* will then use the first-phase ranking defined in *items.sd*. + +Another way to inherit behavior is to override the first-phase ranking in the sub-schemas, still using functions defined in the super-schema (e.g. *title_score*). + +### Summary features + +[Summary-features](/en/reference/schemas/schemas#summary-features) and [match-features](/en/reference/schemas/schemas#match-features) are rank features computed during ranking, to be included in [results](/en/reference/querying/default-result-format). These features can be inherited from parent rank profiles - the above example uses `inherits` to include scores from features in super- and sub-schema - example result: + +```json +"summaryfeatures": { + "fieldMatch(author)": 0, + "rankingExpression(title_score)": 4 +} +``` + +In the examples above, both *books* and *music* schemas implement rank profiles with same names (e.g. *items_subschema_ranking*), so they can be used in queries spanning both. If a query's rank profile can not be found in a given schema, Vespa's default rank profile [nativerank](/en/ranking/nativerank) is used. + +[Inputs](/en/reference/schemas/schemas#inputs) to a rank profile are automatically inherited from the parent rank profile. If a new inputs block is defined in a child rank profile, those inputs will be added cumulatively to those defined in the parent. + +## Document summary inheritance + +[Document summaries](/en/querying/document-summaries) can inherit others defined in the same or an inherited schema. + +`my-app/schemas/books.sd`: + +```js +schema books { + document books { + field title type string { + indexing: summary | index + } + field author type string { + indexing: summary | index + } + } + + document-summary items_summary_tiny { + summary title {} + } + + document-summary items_summary_full inherits items_summary_tiny { + summary author {} + } +} +``` diff --git a/mintlify-docs/en/schemas/parent-child.mdx b/mintlify-docs/en/schemas/parent-child.mdx new file mode 100644 index 0000000000..738c4929d0 --- /dev/null +++ b/mintlify-docs/en/schemas/parent-child.mdx @@ -0,0 +1,187 @@ +--- +title: "Parent/Child" +sidebarTitle: "Parent-child relationships" +--- + +Using [document references](/en/reference/schemas/schemas#reference), documents can have parent/child relationships. Use this to join data by [importing](/en/reference/schemas/schemas#import-field) fields from parent documents. Features: + +- simplify document operations - one write to update one value +- no de-normalization needed - simplifies data updates and atomic update into all children +- search child documents based on properties from parent documents +- search parent documents only +- use imported fields as part of [visiting](/en/writing/visiting) and [garbage collection](/en/schemas/documents#document-expiry) with [document selection](/en/reference/writing/document-selector-language#using-imported-fields-in-selections) expressions + +Parent/child relationships are not supported in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). + +An alternative to parent documents is using arrays or maps of struct fields - this guide covers both. + +Common use cases are applications with structured data like commerce (e.g. products with multiple sellers), advertising (advertisers with campaigns with ads, that have budgets that need realtime updates). + +High-level overview of documents, imported fields and array fields: + +<Frame>![Parent/child and global documents](/assets/img/parent-child.svg)</Frame> + +## Parent documents + +Model parent-child relationships by using [references](/en/reference/schemas/schemas#reference) to [global documents](/en/reference/applications/services/content#document). This is like foreign keys in a relational database. Parents can have parents. A document can have references to multiple parents - the parents can be of same or different types. + +Using a *reference*, [attribute](/en/content/attributes) fields can be [imported](/en/reference/schemas/schemas#import-field) from parent types into the child's [schema](/en/basics/schemas) and used for matching, ranking, grouping and sorting. A reference is a special attribute with the parent's [document ID](/en/schemas/documents#document-ids) as value. References are hence weak: + +- no cascade delete +- a referenced document can be non-existent - imported fields do not have values in this case + +When using parent-child relationships, data does not have to be denormalized as fields from parents are imported into children. Use this to update parent fields to limit number of updates if a field's value is shared between many documents. This also limits the resources (memory / disk) required to store and handle documents on content nodes. + +At cluster changes, global documents are merged to new nodes before regular documents. For consistency, a content node is not serving queries before all global documents are synchronized - refer to [content cluster elasticity](/en/content/elasticity). + +<Warning> +**Important:** + +Cyclic or self [references](/en/reference/schemas/schemas#reference) are not allowed. +</Warning> + +Performance notes: + +- As parent documents are global, a write executes on all content nodes - see [examples](/en/performance/sizing-feeding#parent-child) +- Node capacity will limit the number of parent documents - there should normally be an order of magnitude fewer parent documents than child documents +- Memory usage grows accordingly. A global document is otherwise equal to a regular document, but each content node must be sized to hold all global documents plus its share of regular documents +- Reference fields add a memory indirection and does not impact query performance much +- [Search performance notes](/en/performance/feature-tuning#parent-child-and-search-performance) +- [Partial updates](/en/writing/partial-updates) to a reference field requires a read-modify-write to the document store and limits throughput. + +## Multivalue fields + +A document can have [fields](/en/basics/schemas#document-fields) that are arrays or maps of struct. Structs and documents are similar - a set of field name/value pairs. One-to-many mappings can therefore be implemented this way, as an alternative to using parent/child, when each document has a set of properties that belongs to that document. + +Refer to [multivalue fields](/en/querying/searching-multivalue-fields) for more information. + +## Parent or multivalue? + +As a rule of thumb, model the items *searched for* as the document - example products for sale. Shared properties, like vendor, can be model using a parent document, importing a vendor name field - assuming a vendor has many products, and the vendor list is limited. Use arrays or maps of structs for properties documents might have, like shoe size or screen resolution - one can then have a struct field for property name and another for property value, giving a flexible structure for products with an unlimited set of possible properties. + +## Parent/child example + +In `services.xml`: + +```xml +<content id="mycluster" version="1.0"> + <documents> + <document type="advertiser" mode="index" global="true" /> + <document type="campaign" mode="index" global="true" /> + <document type="salesperson" mode="index" global="true" /> + <document type="ad" mode="index" /> + </documents> +</content> +``` + +Schemas and data: + +```js +schema advertiser { + document advertiser { + field name type string { + indexing : attribute + } + } +} +``` + +```json +{ + "put": "id:test:advertiser::cool", + "fields": { + "name": "cool" + } +} +``` + +```js +schema campaign { + document campaign { + field advertiser_ref type reference<advertiser> { + indexing: attribute + } + field budget type int { + indexing : attribute + } + } + import field advertiser_ref.name as advertiser_name {} +} +``` + +```json +[{ + "put": "id:test:campaign::thebest", + "fields": { + "advertiser_ref": "id:test:advertiser::cool", + "budget": 20 + } +}, +{ + "put": "id:test:campaign::nextbest", + "fields": { + "advertiser_ref": "id:test:advertiser::cool", + "budget": 10 + } +}] +``` + +```js +schema salesperson { + document salesperson { + field name type string { + indexing: attribute + } + } +} +``` + +```json +{ + "put": "id:test:salesperson::johndoe", + "fields": { + "name": "John Doe" + } +} +``` + +```js +schema ad { + document ad { + field campaign_ref type reference<campaign> { + indexing: attribute + } + field other_campaign_ref type reference<campaign> { + indexing: attribute + } + field salesperson_ref type reference<salesperson> { + indexing: attribute + } + } + + import field campaign_ref.budget as budget {} + import field salesperson_ref.name as salesperson_name {} + import field campaign_ref.advertiser_name as advertiser_name {} + + document-summary my_summary { + summary budget {} + summary salesperson_name {} + summary advertiser_name {} + } +} +``` + +```json +{ + "put": "id:test:ad::1", + "fields": { + "campaign_ref": "id:test:campaign::thebest", + "other_campaign_ref": "id:test:campaign::nextbest", + "salesperson_ref": "id:test:salesperson::johndoe" + } +} +``` + +Document type *ad* has two references to *campaign* (via *campaign_ref* and *other_campaign_ref*) and one reference to *salesperson* (via *salesperson_ref*). The *budget* field from *campaign* is imported into the *ad* schema (via *campaign_ref*) and given the name *budget*. Similarly, the *name* of *salesperson* is imported as *salesperson_name*. + +Document type *campaign* has a reference to *advertiser* and imports the field *name* as *advertiser_name*. This is also imported into *ad* via *campaign_ref* from its grandparent *advertiser*. To use the imported fields in summary, define a document summary *my_summary* containing these fields. diff --git a/mintlify-docs/en/schemas/predicate-fields.mdx b/mintlify-docs/en/schemas/predicate-fields.mdx new file mode 100644 index 0000000000..9a80dc1775 --- /dev/null +++ b/mintlify-docs/en/schemas/predicate-fields.mdx @@ -0,0 +1,329 @@ +--- +title: "Predicate Fields" +--- + +[Predicate](/en/reference/schemas/schemas#predicate) fields provides a way to match queries to a set of *boolean constraints* in a document. Example use cases: + +- Boolean constraints in advertisements, specifying their target groups. Query with a set of impressions, i.e., specific values for a given user, to find out which ads can be shown to this user. +- Saved searches for e-commerce. Users save searches in the form of predicates, and new items are matched to the saved searches. + +Example document and query, with a `target` [predicate field](/en/reference/schemas/schemas#predicate): + +```json +{ + "put": "id:ns:user::12345", + "fields": { + "target": "gender in ['male'] and age in [30..40] and income in [200..50000]", + "name": "John Doe" + } +} +``` + +```js +select name from user where predicate(target, {"gender":"male"}, {"income":30000L}) +``` + +Read more in the [Predicate Search sample application](https://github.com/vespa-engine/sample-apps/blob/master/examples/predicate-fields/README.md). + +There are some trade-offs between index size and query performance when configuring predicate fields, see [configuration](#configuration). + +Predicate fields are good for solving problems where practitioners have used Percolator Queries. + +<Note> +**Note:** + +Predicate fields are not supported in [streaming search](/en/performance/streaming-search#differences-in-streaming-search). +</Note> + +## Boolean Constraints + +A boolean constraint (predicate) specifies a target area for queries to land in. Its attributes may be simple true/false criteria, subsets of sets to match, or ranges of values. + +### Predicates + +A predicate is a specification of a boolean constraint in the form of a boolean expression. For example, the predicate `gender in [Female] and age in [20..30] and pos in [1..4]` can specify that an ad requires target users to be women between 20 and 30 years of age, and that the ad must be placed in one of the top four positions. + +See [grammar](#grammar) for details. + +### Attributes + +The variables in predicates are known as *attributes*. There are two types of attributes: + +- **Regular attributes**. Regular attributes take string values. Specify in the predicate that a regular attribute must have one value of multiple alternatives. E.g. `hobby in [Music, Hiking]` evaluates true if hobby is assigned to either `Music` or `Hiking` (or both). +- **Range attributes**. Range attributes take integer values and may only be used in range expressions. A range expression specifies either a lower bound, an upper bound or both: + 1. `age in [10..]` - age must be 10 or higher + 2. `age in [..10]` - age must be 10 or lower + 3. `age in [10..15]` - age must be between 10 and 15, inclusive + +### Predicate Samples + +The subset expression evaluates to true if the regular attribute is assigned to any of the values listed in the brackets: + +```js +hobby in [Music, Hiking, Biking] +``` + +The range expression evaluates to true if the range attribute is in the specified range (boundaries are inclusive): + +```js +age in [20..29] +``` + +It's also possible to specify only the lower or upper bound for a range expression: + +```js +age in [..29] +``` + +Use the `or` operator to create disjunctions: + +```js +age in [..29] or hobby in [Music, Biking] +``` + +Similarly, use the `and` operator to create conjunctions: + +```js +age in [20..29] and hobby in [Music] +``` + +Parenthesis can be used to create more complex predicates: + +```js +(age in [20..29] and gender in [Male]) or (age in [30..39] and gender in [Female]) +``` + +The subset and range expression can be negated using the `not` operator: + +```js +age not in [20..29] and hobby not in [Music] +``` + +```js +not age in [20..29] and not hobby in [Music] +``` + +The `not` operator can also be combined with parenthesis: + +```js +not (age in [20..29] or hobby in [Music]) +``` + +Attributes and values containing non-alphanumeric letters must be surrounded with quotes: + +```js +"profile.gender" in ['Male', "Female"] +``` + +If a string surrounded with double-quotes contains a double-quote, escape it with backslash. Same rule applies for single quotes in single-quoted strings. Double quotes in single-quoted strings and single quotes in double-quoted string shall not be escaped. + +```js +"single'quote" in ["double\"quote", 'double"quote', 'single\'quote'] +``` + +Set the predicate to the value true to make it always a match. Setting the predicate to false will ensure that it's never a match. + +```js +true +``` + +```js +false +``` + +## Queries + +A boolean query represents a set of concrete values for attributes, which may fall within the target area drawn up by one or more sets of boolean constraints. Queries are specified by two lists of attributes with values. One list holds regular attributes, each with one or more discrete values, while the other list holds range attributes with a single value each. + +### Search Using YQL+ + +Boolean queries are made using the `predicate` function of YQL+. The predicate function takes three parameters: The predicate field, a map of regular attribute key/value pairs, and a map of range attribute key/value pairs. + +```js +select * from sources * where predicate(predicate_field, {"gender":"Female", "gender":"Male", hobby:"Hiking"}, {"age":20L, "pos":2L}) +``` + +One can use empty maps when specifying attributes: + +```js +select * from sources * where predicate(predicate_field, {}, {"age":20L}) +``` + +When specifying multiple values for the same key, it is possible to use an array as the value: + +```js +select * from sources * where predicate(predicate_field,{"gender":["Female","Male"], "hobby":"Hiking"}, {"age":20L}) +``` + +### Subqueries + +For efficiency reasons it is possible to specify multiple queries at once. This is done by providing a bitmap with each term, where the bitmap represents which (out of 64) subqueries the term is a part of. A typical use case for this is when we want to find ads for multiple positions on a page. Then the user profile information will be part of every subquery while the ad placement varies. Remember that all subqueries are used every time, which means that empty subqueries also can get matches. + +#### Specifying Subqueries in YQL + +Subqueries are specified as maps where the key is a string representation of either a hex number or a list of bit numbers, and the value is a map of attribute key/value pairs. The two queries below demonstrates the two different methods of mapping attributes to subqueries. + +```js +select * from sources * where predicate(predicate_field, {"0x3":{"gender":"Female"}, "0x1":{"hobby":["music","hiking"]}}, {"0x2":{"age":23L}}) +``` + +```js +select * from sources * where predicate(predicate_field, {"[0,1]":{"gender":"Female"}, "[0]":{"hobby":["music","hiking"]}}, {"[1]":{"age":23L}}) +``` + +The queries above is constructed from the following two queries: + +```js +select * from sources * where predicate(predicate_field, {"gender":"Female", "hobby":["music","hiking"]},{}) +select * from sources * where predicate(predicate_field, {"gender":"Female"}, {"age":23L}) +``` + +Note that the subquery bit numbers use zero-based numbering, e.g. first subquery has index `0`. Highest valid subquery has index `63`. + +Any value `0x1`-`0xFFFFFFFFFFFFFFFF` is a valid subquery bitmap. + +<Note> +**Note:** + +When no subquery mapping is specified, the attribute is applied to all subqueries. +</Note> + +#### Identifying Subqueries in Results + +When using subqueries you need to add the `subqueries` summary feature to your schema. For each hit, the subqueries are reported in two different summary features, one for the lower 32 bits, named `lsb`, and one for the upper 32 bits, named `msb`. + +See the [predicate search example](https://github.com/vespa-engine/sample-apps/tree/master/examples/predicate-fields) for how to configure a custom *searcher*, *services.xml* and the *schema* required to retrieve the subquery bitmap of each hit. + +#### Predicate Example + +A typical use case for the subquery feature is when we want to find ads for multiple positions on a page. The user profile information will be identical for every subquery while the ad placement varies. The following example uses 3 different attributes; `age`, `gender` and `pos`. The 2 former attributes represents the user profile, while the `pos` attribute determines the ad placement. Assume the following 3 documents are indexed: + +```json +[ + { + "fields" : { + "target" : "age in [20..30] and gender in [Female, Male] and pos in [1]" + }, + "put" : "id:test:ad::1" + }, + { + "fields" : { + "target" : "gender in [Male] and pos in [1, 2]" + }, + "put" : "id:test:ad::2" + }, + { + "fields" : { + "target" : "age in [20..] and gender in [Female, Male] and pos in [2]" + }, + "put" : "id:test:ad::3" + } +] +``` + +Find all ads that target males at age 25 for ad placement 1 and 2. To do that, create a query consisting of two subqueries, one for placement 1 and the other for placement 2: + +```js +select * from sources * where predicate(target, {"[0,1]":{"gender":"Male"}, "[0]":{"pos": "1"}, "[1]":{"pos": "2"}}, {"[0,1]":{"age":25L}}) +``` + +Note that each subquery has a separate value for `pos`, while the `gender` and `age` values are common for both subqueries. + +The query will return 3 hits, one for each document. Each document will have a summary feature with the subquery bitmap (64-bit). This is assuming that the `SubqueriesSearcher` from the sample app is used. If not so, each document will have two summary features, one for the lower 32-bit and one for the upper 32-bit of the subquery bitmap. + +- The document with id `id:test:ad::1` will have subquery bitmap of `0x1`; the lowest bit set to 1 as the document is a hit for subquery #1. +- The document with id `id:test:ad::2` is a hit for both subqueries and has the two lowest bits set to 1, giving `0x3` as subquery bitmap. +- Following the same principle, the subquery bitmap of `id:test:ad::3` is `0x2`. + +## Configuration + +<Note> +**Note:** + +Using predicate fields is complex and tuning the configuration for performance requires insight in the underlying algorithms. +</Note> + +A field of type predicate requires an index definition with a mandatory parameter, `arity`, a value which trades index size for query complexity. See [Index Size](#index-size) for more details. Fields of type predicate also accept three other optional parameters: `lower-bound`, `upper-bound` and `dense-posting-list-threshold`. These properties are helpful in optimizing query performance and index size. The two former parameters sets the lower and upper bounds on values of range attributes. The latter value determines how the boolean index is structured, trading index size for potentially better query performance. + +To feed a predicate, put it in a field of type [predicate](/en/reference/schemas/schemas#predicate) as a string - refer to the [JSON reference](/en/reference/schemas/document-json-format#predicate). + +### Schema + +The following schema example sets up an attribute predicate field including the mandatory arity parameter. + +```js +schema example { + document example { + + field predicate_field type predicate { + indexing: attribute + index { + arity: 2 # mandatory + lower-bound: 3 + upper-bound: 200 + dense-posting-list-threshold: 0.25 + } + } + + } + + # For subquery reporting: + rank-profile default { + summary-features: subqueries(predicate_field).lsb subqueries(predicate_field).msb + } +} +``` + +### Upper and Lower Bounds + +The `upper-bound` and `lower-bound` parameters specify the range of values that the boolean expressions are expected to operate on. Queries with values outside this range are rejected. The index is optimized based on the bounds, so if the bounds are changed, the index needs to be rebuilt. + +### Dense Posting List Threshold + +The `dense-posting-list-threshold` parameter is a threshold that impacts how the boolean index is structured in memory. The boolean index consists of several sparse data structures (B-tree based posting lists). The largest posting lists are also stored in a dense vector based structure. The dense posting lists are faster for searching, but may increase the overall index size significantly. Only posting lists with relative size above the threshold are stored in the dense format (for a corpus of 1mill documents and threshold=0.5, all posting lists of size >500k will be stored as vector). The optimal value depends on corpus characteristics and will lay somewhere between 0.15 - 0.50. A too low threshold will have large, negative impact on both query performance and index size, while a too large threshold may slightly decrease the query performance. + +The default value is 0.40. Valid range is (0, 1]. + +### Index Size + +When using range attributes, the attributes are expanded to a set of attributes for sub-ranges that together covers the entire range. The granularity of the sub-ranges are controlled by the parameter `arity`. A low arity will make smaller indexes, but require more terms in the queries. Conversely, a high arity makes for large indexes but fewer query terms. + +Also impacting index size is the size of intervals that are accepted in the boolean constraints. A typical case is intervals with infinite endpoints, i.e. match every number greater than *x*. Using 2^63 as infinity makes the intervals large, and impacts index size. A lower max-value reduces the index size. The max-values can be easily controlled with the `upper-bound` and `lower-bound` parameters. + +The `dense-posting-list-threshold` parameter has an inverse impact on the index size. Increasing the threshold is beneficial if a smaller index size is preferred over query performance. + +The following figure shows how the number of terms for a single document grows with increasing arity and range limit: + +<Frame> +![](/assets/graph-image.png) +</Frame> + +## Grammar + +```js +predicate = disjunction <EOF> ; +disjunction = conjunction [ 'or' disjunction ] ; +conjunction = ( leaf | [ 'not' ], '(', disjunction, ')' ) [ 'and' conjunction ] ; +leaf = value, [ 'not' ], 'in', ( value | multivalue | range ) + | 'true' + | 'false' ; + +value = alphanum { alphanum } | string ; +multivalue = '[' value, { ',', value } ']' ; +range = '[' [ integer ] '..' [ integer ] ']' ; + +alphanum = alpha | digit | '_'; +string = '\'', { stdchars_1 | escape_1 }, '\'' + | '"', { stdchars_2 | escape_2 }, '"' ; + +integer = [ '-' | '+' ], ( posdigit, { digit } | '0' ); + +alpha = ? ASCII characters in the range a-z and A-Z ? ; +digit = '0' | posdigit ; +posdigit = '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' ; +stdchars_1 = ? All unicode chars except '\\' and '\'' ? ; +stdchars_2 = ? All unicode chars except '\\' and '"' ? ; +escape_1 = '\\', ( '\\' | 't' | 'n' | 'f' | 'r' | '\'' | 'x', hexdigit, hexdigit ) +escape_2 = '\\', ( '\\' | 't' | 'n' | 'f' | 'r' | '"' | 'x', hexdigit, hexdigit ) +hexdigit = digit | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'A' | 'B' | 'C' | 'D' | 'E' | 'F' ; +``` diff --git a/mintlify-docs/en/schemas/structs.mdx b/mintlify-docs/en/schemas/structs.mdx new file mode 100644 index 0000000000..7b9e6ea488 --- /dev/null +++ b/mintlify-docs/en/schemas/structs.mdx @@ -0,0 +1,60 @@ +--- +title: "Structs" +description: "This document explains how to use structs in Vespa documents." +--- + +## Structs + +A [struct](/en/reference/schemas/schemas#struct) is contained in a document and groups one or more fields into a composite type that can be accessed like a single field. + +Example: + +```js +struct email { + field sender type string {} + field recipient type string {} + field subject type string {} + field content type string {} +} + +field emails type array<email> {} +``` + +In this example the struct is part of an [array](/en/reference/schemas/schemas#array). A struct can also be used in a [map](/en/reference/schemas/schemas#map). + +## Struct fields + +A [struct-field](/en/reference/schemas/schemas#struct-field) defines how a given field in a struct should be indexed and searched. + +Note that though a struct-field refers to a field in a struct, the struct-field itself is defined inside a field. + +Using the *email* struct defined previously (see [struct](/en/reference/schemas/schemas#struct)), we can define indexing for a specific field, like *content*: + +```js +field emails type array<email> { + indexing: summary + struct-field content { + indexing: attribute + attribute: fast-search + } +} +``` + +The equivalent code (including the struct definition) in Pyvespa is as follows: + +```python +email_struct = Struct(name="email", fields=[ + Field(name="sender", type="string"), + Field(name="recipient", type="string"), + Field(name="subject", type="string"), + Field(name="content", type="string"), +]) +emails_field = Field(name="emails", + type="array<email>", + indexing=["summary"], + struct_fields=[StructField(name="content", indexing=["attribute"], attribute=["fast-search"])] +) +schema = Schema(name="schema", document=Document()) +schema.add_fields(emails_field) +schema.document.add_structs(email_struct) +``` diff --git a/mintlify-docs/en/security/cloudflare-workers.mdx b/mintlify-docs/en/security/cloudflare-workers.mdx new file mode 100644 index 0000000000..8cefc79d61 --- /dev/null +++ b/mintlify-docs/en/security/cloudflare-workers.mdx @@ -0,0 +1,143 @@ +--- +title: "Using Cloudflare Workers with Vespa Cloud" +sidebarTitle: "Cloudflare Workers" +--- +This guide describes how you can access mutual TLS protected Vespa Cloud endpoints using [Cloudflare Workers](https://workers.cloudflare.com/). + +## Writing and reading from Vespa Cloud Endpoints + +Vespa Cloud's endpoints are protected using mutual TLS. This means the client must present a TLS certificate that the Vespa application trusts. The application knows which certificate to trust because the certificate is included in the Vespa application package. + +### mTLS Configuration + +Mutual TLS certificates can be created using the [Vespa CLI](/en/clients/vespa-cli): + +For example, for tenant `samples` with application `vsearch` and instance `default`: + +```bash +$ vespa auth cert --application samples.vsearch.default +Success: Certificate written to security/clients.pem +Success: Certificate written to $HOME/.vespa/samples.vsearch.default/data-plane-public-cert.pem +Success: Private key written to $HOME/.vespa/samples.vsearch.default/data-plane-private-key.pem +``` + +Refer to the [security guide](/en/security/guide) for details. + +### Creating a Cloudflare Worker to interact with mTLS Vespa Cloud endpoints + +In March 2023, Cloudflare announced [Mutual TLS available for Workers](https://blog.cloudflare.com/mtls-workers/), see also [Workers Runtime API mTLS](https://developers.cloudflare.com/workers/runtime-apis/mtls/). + +Install wrangler and create a worker project. Wrangler is the Cloudflare command line interface (CLI), refer to [Workers:Get started guide](https://developers.cloudflare.com/workers/get-started/guide/). Once configured and authenticated, one can upload the Vespa Cloud data plane certificates to Cloudflare. + +Upload Vespa Cloud mTLS certificates to Cloudflare: + +```bash +$ npx wrangler mtls-certificate upload \ + --cert $HOME/.vespa/samples.vsearch.default/data-plane-public-cert.pem \ + --key $HOME/.vespa/samples.vsearch.default/data-plane-private-key.pem \ + --name vector-search-dev +``` + +The output will look something like this: +```bash +Uploading mTLS Certificate vector-search-dev... +Success! Uploaded mTLS Certificate vector-search-dev +ID: 63316464-1404-4462-baf7-9e9f81114d81 +Issuer: CN=cloud.vespa.example +Expires on 3/11/2033 +``` + +Notice the `ID` in the output; This is the `certificate_id` of the uploaded mTLS certificate. To use the certificate in the worker code, add an `mtls_certificates` variable to the `wrangler.toml` file in the project to bind a name to the certificate id. In this case, bind to `VESPA_CERT`: + +```toml +mtls_certificates = [ + { binding = "VESPA_CERT", certificate_id = "63316464-1404-4462-baf7-9e9f81114d81" } +] +``` + +With the above binding in place, you can access the `VESPA_CERT` in Worker code like this: + +```javascript +export default { + async fetch(request, env) { + return await env.VESPA_CERT.fetch("https://vespa-cloud-endpoint"); + } +} +``` + +Notice that `env` is a variable passed by the Cloudflare worker infrastructure. + +### Worker example + +The following worker example forwards POST and GET HTTP requests to the `/search/` path of the Vespa cloud endpoint. It rejects other paths or other HTTP methods. + +```js expandable +/** + * Simple Vespa proxy that forwards read (POST and GET) requests to the + * /search/ endpoint + * Learn more at https://developers.cloudflare.com/workers/ + */ + +export default { + async fetch(request, env, ctx) { + //Change to your endpoint url, obtained from the Vespa Cloud Console. + //Use global endpoint if you have global routing with multiple Vespa regions + const vespaEndpoint = "https://vsearch.samples.aws-us-east-1c.dev.z.vespa-app.cloud"; + async function MethodNotAllowed(request) { + return new Response(`Method ${request.method} not allowed.`, { + status: 405, + headers: { + Allow: 'GET,POST', + } + }); + } + async function NotAcceptable(request) { + return new Response(`Path not Acceptable.`, { + status: 406, + }); + } + + if (request.method !== 'GET' && request.method !== 'POST') { + return MethodNotAllowed(request); + } + let url = new URL(request.url) + const { pathname, search } = url; + if (!pathname.startsWith("/search/")) { + return NotAcceptable(request); + } + const destinationURL = `${vespaEndpoint}${pathname}${search}`; + let new_request = new Request(destinationURL, request); + return await env.VESPA_CERT.fetch(new_request) + }, +}; +``` + +To deploy the above to the worldwide global edge network of Cloudflare, use: + +```bash +$ npx wrangler publish +``` + +To start a local instance, use: + +```bash +$ npx wrangler dev +``` + +Test using `curl`: +```bash +$ curl --json '{"yql": "select * from sources * where true"}' http://127.0.0.1:8787/search/ +``` + +After publishing to Cloudflare production: + +```bash +$ curl --json '{"yql": "select * from sources * where true"}' https://your-worker-name.workers.dev/search/ +``` + +## Data plane access control permissions + +Vespa Cloud supports having multiple certificates to separate `read` and `write` access. +This way, one can upload the read-only certificate to a Cloudflare worker to limit write access. + +See [Data plane access control permissions](/en/security/guide#permissions). diff --git a/mintlify-docs/en/security/guide.mdx b/mintlify-docs/en/security/guide.mdx new file mode 100644 index 0000000000..3b7d6b4191 --- /dev/null +++ b/mintlify-docs/en/security/guide.mdx @@ -0,0 +1,410 @@ +--- +title: "Security Guide" +--- + +Vespa Cloud has several security mechanisms it is important for developers to understand. Vespa Cloud has two different interaction paths, *Data Plane* and *Control Plane*. Communication with the Vespa application goes through the *Data Plane*, while the *Control Plane* is used to manage Vespa tenants and applications. + +The *Control Plane* and the *Data Plane* has different security mechanisms, described in this guide. + +## SOC 2 + +Vespa.ai has a SOC 2 attestation - read more in the [Trust Center](https://trust.vespa.ai/). + +## Data Plane + +Data plane requests are protected using mutual TLS, or optionally tokens. + +### Configuring mTLS + +Certificates can be created using the [Vespa CLI](/en/clients/vespa-cli): + +```bash +$ vespa auth cert --application <tenant>.<app>.<instance> +``` + +```bash +$ vespa auth cert --application scoober.albums.default +Success: Certificate written to security/clients.pem +Success: Certificate written to $HOME/.vespa/scoober.albums.default/data-plane-public-cert.pem +Success: Private key written to $HOME/.vespa/scoober.albums.default/data-plane-private-key.pem +``` + +The certificates can be created regardless of the application existence in Vespa Cloud. One can use this command to generate `security/clients.pem` for an application package: + +```bash +$ cp $HOME/.vespa/scoober.albums.default/data-plane-public-cert.pem security/clients.pem +``` + +Certificates can also be created using OpenSSL: + +```bash +$ openssl req -x509 -sha256 -days 1825 -newkey rsa:2048 -keyout key.pem -out security/clients.pem +``` + +The certificate is placed inside the application package in [security/clients.pem](/en/reference/applications/application-packages). Make sure `clients.pem` is placed correctly if the certificate is created with OpenSSL, while the Vespa CLI will handle this automatically. + +`security/clients.pem` files can contain multiple PEM encoded certificates by concatenating them. This allows you to have multiple clients with separate private keys, making it possible to rotate to a new certificate without any downtime. + +### Permissions + +To support different permissions for clients, it is possible to limit the permissions of a client. Only `read` or `write` permissions are supported. + +#### Request mapping + +The request actions are mapped from HTTP method. The default mapping rule is: + +- GET → `read` +- PUT, POST, DELETE → `write` + +For `/search/` this is replaced by: + +- GET, POST → `read` + +#### Example + +Create 3 different certificates, for three different use cases: + +- Serving - `read` +- Ingest - `write` +- Full access - `read, write` + +```bash +$ openssl req -x509 -sha256 -days 1825 -newkey rsa:2048 -keyout key.pem -out security/serve.pem +$ openssl req -x509 -sha256 -days 1825 -newkey rsa:2048 -keyout key.pem -out security/ingest.pem +$ openssl req -x509 -sha256 -days 1825 -newkey rsa:2048 -keyout key.pem -out security/full_access.pem +``` + +<Note> +Notes: + +- Files must be placed in the *security* folder inside the application package +- Certificates must be unique +- Certificate chains are currently not supported +- Files must be written using PEM encoding +</Note> + +Reference the certificate files from services xml using the `clients` element: + +```xml expandable +<container version='1.0'> + ... + <clients> + <client id="serve" permissions="read"> + <certificate file="security/serve.pem"/> + </client> + <client id="ingest" permissions="write"> + <certificate file="security/ingest.pem"/> + </client> + <client id="full_access" permissions="read,write"> + <certificate file="security/full_access.pem"/> + </client> + </clients> + ... +</container> +``` + +#### Custom request mapping + +The default mapping can be changed by overriding `requestHandlerSpec()`: + +```java expandable +/** + * Example overriding acl mapping of POST requests to read + */ +public class CustomAclHandler extends ThreadedHttpRequestHandler { + + private final static RequestHandlerSpec REQUEST_HANDLER_SPEC = + RequestHandlerSpec.builder().withAclMapping( + HttpMethodAclMapping.standard() + .override(Method.POST, AclMapping.Action.READ) + .build()) + .build(); + + @Override + public RequestHandlerSpec requestHandlerSpec() { + return REQUEST_HANDLER_SPEC; + } +``` + +### Configuring tokens + +Application endpoints can also be configured with token based authentication. Note that it is still required to define at least one client for mTLS. + +<Note> +**Note:** + +Token authentication must be explicitly enabled when used in combination with [Private Endpoints](/en/operations/private-endpoints). +</Note> + +#### Creating tokens using the console + +Tokens are identified by a name, and can contain multiple versions to easily support token rotation. + +To create a new token: + +<Steps> +<Step> +In the [console](https://console.vespa.ai) tenant view, open [**Account > Tokens**](https://console.vespa-cloud.com/link/tenant/account/tokens) +</Step> +<Step> +Click **Add token** +</Step> +<Step> +Enter a name you'll reference in the application later and click **Add**. Remember to copy the token value and store it securely. +</Step> +</Steps> + +To add a new token *version*: + +<Steps> +<Step> +Find the existing token, click **Add version** +</Step> +<Step> +Select expiration and click **Add**. Copy the token value and store securely. +</Step> +</Steps> + +To revoke a version: + +<Steps> +<Step> +Find the existing token version, click **Revoke** +</Step> +</Steps> + +To manually rotate a token: + +<Steps> +<Step> +Add a new token *version* following the above steps +</Step> +<Step> +Revoke the old version when no clients use the old version +</Step> +</Steps> + +#### Application configuration with token endpoints + +After creating a token, it must be configured in your application's services.xml by adding the [clients](/en/reference/applications/services/container#clients) element to your container cluster(s). + +Here is an example with multiple container clusters and tokens (you may only have one): + +```xml expandable +<container id="documentapi" version="1.0"> + ... + <clients> + <client id="mtls" permissions="read,write"> + <certificate file="security/clients.pem"/> + </client> + <client id="feed-token-client" permissions="read,write"> + <token id="feed-token"/> + </client> + </clients> + ... +</container> +<container id="query" version="1.0"> + ... + <clients> + <client id="mtls" permissions="read"> + <certificate file="security/clients.pem"/> + </client> + <client id="query-token-client" permissions="read"> + <token id="query-token"/> + </client> + </clients> + ... +</container> +``` + +#### Security recommendations + +The cryptographic properties of token authentication vs mTLS are comparable. There are however a few key differences in how they are used: + +- tokens are sent as a header with every request +- since they are part of the request they are also more easily leaked in log outputs or source code (e.g. curl commands). + +It is therefore recommended to + +- create tokens with a short expiry (keeping the default of 30 days). +- keep tokens in a secret provider, and remember to hide output. +- never commit secret tokens into source code repositories! + +### Use endpoints + +#### Using mTLS + +Once the application is configured and deployed with a certificate in the application package, requests can be sent to the application. Again, the Vespa CLI can help to use the correct certificate. + +```bash +$ vespa curl --application <tenant>.<app>.<instance> /ApplicationStatus +``` + +```bash +$ curl --key $HOME/.vespa/scoober.albums.default/data-plane-private-key.pem \ + --cert $HOME/.vespa/scoober.albums.default/data-plane-public-key.pem \ + $ENDPOINT +``` + +#### Using tokens + +The token endpoint must be used when using tokens. After deployment is complete, the token endpoint will be available in the token endpoint list (marked “Token”). To use the token endpoint, the token should be sent as a bearer authorization header: + +```bash +$ vespa query \ + --header="Authorization: Bearer $TOKEN" \ + 'yql=select * from music where album contains "head"' +``` + +```bash +curl -H "Authorization: Bearer $TOKEN" $ENDPOINT +``` + +#### Using a browser + +In Vespa guides, curl is used in examples, like: + +```bash +$ curl --cert ./data-plane-public-cert.pem --key ./data-plane-private-key.pem $ENDPOINT +``` + +To use a browser, install key/cert pair into KeyChain Access (MacOS Sonoma), assuming Certificate Common Name is "cloud.vespa.example" (as in the guides): + +<Steps> +<Step> +Install key/cert pair: + +```bash +$ cat data-plane-public-cert.pem data-plane-private-key.pem > pkcs12.pem +$ openssl pkcs12 -export -out pkcs12.p12 -in pkcs12.pem +``` + +</Step> +<Step> +New password will be requested, and it will be used in the next steps. +</Step> +<Step> +In Keychain Access: With login keychain + - Click "File" -> Import Items. + - Choose pkcs12.p12 file created before and type the password. + - Double-click the imported certificate, open "Trust" and set "When using this certificate" to "Always Trust". + - Right-click and "New Certificate Preference...", then add the $ENDPOINT. +</Step> +<Step> +Open the same URL in Chrome, choose the example.com certificate and allow Chrome to read the private key. +</Step> +</Steps> + +#### Using Postman + +Many developers prefer interactive tools like [Postman](https://postman.com/). The Vespa blog has an article on [how to use Postman with Vespa](https://blog.vespa.ai/interface-with-vespa-apis-using-postman/). + +#### Using Cloudflare Workers + +See [Using Cloudflare Workers with Vespa Cloud](/en/security/cloudflare-workers). + +### Different credentials per instance + +To use different credentials per [instance](/en/learn/tenant-apps-instances), use [services.xml variants](/en/operations/deployment-variants#servicesxml-variants). As an example, use this to have a separate MTLS keypair for production instances (use the same pattern if using tokens.): + +```xml expandable +<container id="default" version="1.0"> + <clients> + <client id="mtls" permissions="read" deploy:instance="default"> + <certificate file="security/clients_dev.pem"/> + </client> + <client id="mtls" permissions="read" deploy:instance="qa"> + <certificate file="security/clients_qa.pem"/> + </client> + <client id="mtls" permissions="read" deploy:instance="prod"> + <certificate file="security/clients_prod.pem"/> + </client> + </clients> + <search/> + <document-api/> +</container> +``` + +Depending on the [instance](/en/operations/automated-deployments) deployed to, a different keypair will be used for dataplane access. Use the same mechanism to have a dedicated credential for the [dev](/en/operations/environments#dev) environment, using `deploy:environment="dev"`. + +## Control Plane + +The control plane is used to manage the Vespa applications. + +There are two different ways for access the Control Plane, using `vespa auth login` to log in as a regular user and using Application Keys. `vespa auth login` is intended for developers deploying manually to dev, while Application Keys are intended for deploying applications to production, typically by a continuous build tool. See more about these two methods below. + +### Managing users + +Tenant administrators manage user access through the Vespa Console. + +<Frame>![Vespa Console user management](/assets/img/manage-users.png)</Frame> + +Users have two different privilege levels + +- **Admin:** Can administrate the tenants metadata and the users of the tenant. +- **Developer:** Can administrate the applications deployed in the tenant. +### User access to Control Plane + +Outside using the Vespa Console, communicating with the Control Plane is easiest with the [Vespa CLI](/en/clients/vespa-cli). + +```bash +$ vespa auth login +Your Device Confirmation code is: ****-**** + +If you prefer, you can open the URL directly for verification +Your Verification URL: https://vespa.auth0.com/activate?user_code=****-**** + +Press Enter to open the browser to log in or ^C to quit... + +Waiting for login to complete in browser ... done + +Successfully logged in. +``` + +After logging in with the Vespa CLI, the CLI can be used to deploy applications. Users are logged in with the same privilege as the user described in the Vespa Console. + +### Application Key + +If programmatic access to the Control Plane is needed, for example from a CI/CD system like GitHub Actions, the Application Key can be used - see example [deploy-vector-search.yaml](https://github.com/vespa-cloud/vector-search/blob/main/.github/workflows/deploy-vector-search.yaml). + +#### Configuration + +The Application Key can be generated in the Console from the Deployment Screen. The key is generated in the browser but the private key appears as a download in the browser. The public key can be downloaded separately from Deployment Screen. The private key is never persisted in Vespa Cloud, so it is important that the private key is kept securely. If lost, the private key is unrecoverable. + +<Frame>![Vespa Console application key management](/assets/img/application-key.png)</Frame> + +The Application Key can also be generated using the Vespa CLI. + +```bash +$ vespa auth api-key -a <tenant>.<app>.<instance> +``` + +```bash expandable +$ vespa auth api-key -a scoober.albums.default +Success: API private key written to $HOME/.vespa/scoober.api-key.pem + +This is your public key: +-----BEGIN PUBLIC KEY----- +MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE5fQUq12J/IlQQdE8pWC5596S7x9f +HpPcyxCX2dXBS4aqKxnfN5HEyTkLCNGCo9HQljgLziqW1VFzshAdm3hHQg== +-----END PUBLIC KEY----- + +Its fingerprint is: +91:1f:de:e3:9f:d3:21:28:1b:1b:05:40:52:72:81:4f + +To use this key in Vespa Cloud click 'Add custom key' at +https://console.vespa-cloud.com/tenant/scoober/keys +and paste the entire public key including the BEGIN and END lines. +``` + +#### Using the application key + +The Application Key can be used from the Vespa CLI to run requests again the Control Plane. Action like deploying applications to Vespa Cloud. + +```bash +$ vespa deploy -z dev.aws-us-east-1c +``` + +## Dataplane access + +Vespa Cloud users on paid plans have access to Vespa Cloud Support. For cases where the Vespa Team needs access to the application's data to provide support, the Vespa support personnel can request access after an explicit approval from the customer in the open support case. diff --git a/mintlify-docs/en/security/mtls.mdx b/mintlify-docs/en/security/mtls.mdx new file mode 100644 index 0000000000..7e29bbf4a1 --- /dev/null +++ b/mintlify-docs/en/security/mtls.mdx @@ -0,0 +1,456 @@ +--- +title: "Securing Vespa with mutually authenticated TLS (mTLS)" +sidebarTitle: "mTLS" +--- + +<Note> +**Note:** + +This document is relevant for **self-hosted Vespa** only. +</Note> + +[Transport Layer Security (TLS)](https://datatracker.ietf.org/doc/html/rfc5246) is a protocol that uses cryptography to enable secure, tamper-proof communication over the network. This document describes the TLS functionality in Vespa and how to configure it. When properly configured, TLS ensures only trusted Vespa services can talk to each other. See accompanying [reference](/en/reference/security/mtls) for details on configuration syntax. + +By default, all communication between self-hosted Vespa nodes is *unauthenticated* and *unencrypted*. This means anyone with network access can read and write data and potentially execute commands on the system. *Enabling TLS is therefore a fundamental part of a secure Vespa installation*. + +You should configure TLS even if you already have a firewall set up to prevent outside connections to your system. TLS helps protect against the case even where an attacker has managed to get a foothold inside your private network. Vespa will in some future version require TLS for all internal communication. To ensure you are ready for this, secure your systems as soon as possible. + +Vespa offers two separate planes of TLS connectivity: + +- **HTTP(S) application containers.** This is the edge of your cluster where search queries and feed requests are handled. Authentication and authorization for this plane is handled separately from Vespa-internal traffic; this is covered in [Configuring Http Servers and Filters](/en/applications/http-servers-and-filters#tls). See also [Securing the application container](/en/security/securing-your-vespa-installation#securing-the-application-container). +- **Vespa-internal communication.** This is all communication between processes running on the nodes in your cluster. This includes clients connecting directly to the backends instead of going through the application container APIs. Only mutually authenticated TLS (mTLS) may be configured for this traffic. + +This document only covers **Vespa-internal communication**. + +Enabling TLS in Vespa means that all internal endpoints are mTLS protected, even HTTP servers for status pages and metrics. Be especially aware of this if you have custom solutions in place for collecting and aggregating low level metrics or status pages from the Vespa backends. Though the terms *TLS* and *mTLS* may be used interchangeably in this document, *TLS* implies *mTLS* for all Vespa-internal traffic. + +Refer to the [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) example application for a working example. + +## Prerequisites + +This section assumes you have some experience with generating and using certificates and private keys. For an introduction, see [Appendix A: setting up with a self-signed Certificate Authority](#appendix-a-setting-up-with-a-self-signed-certificate-authority) which gives step-by-step instructions on setting up certificates that can be used internally for a single Vespa application. + +In order to enable TLS, some extra files must be present on every node in your Vespa application: + +- A file containing the X.509 certificates of all trusted Certificate Authorities in PEM format. +- A file containing the X.509 certificate chain that will be used by Vespa processes on the node. This is in standard PEM format. The host's own certificate should be the first certificate listed in the file, followed by intermediate certificates (if any), separated by newlines. +- A file containing the private key corresponding to the certificate in the above chain, in PKCS#8 PEM format. Note that Vespa does not currently support encrypted private key files. +- A JSON configuration file telling Vespa which certificate/key files to use, and to provide further options for authorization. See [Writing a TLS configuration file](#writing-a-tls-configuration-file) for how to write these. + +How certificate and key material is distributed to the nodes is outside the scope of this article. See [dedicated section](#automatic-reloading-of-crypto-material) for Vespa's support of automatic and live reloading of TLS credentials. + +<Warning> +**Warning:** + +You should never include public Certificate Authorities as part of the list of Certificate Authorities you trust. Only use CAs that you (or your organization) directly control or trust. This is to minimize the risk of malicious actors exploiting bugs or authentication flaws in public CAs to pose as your organization and acquire certificates that would allow them to directly access your system. +</Warning> + +<Warning> +**Warning:** + +Private keys must be kept secret and protected against unauthorized access. Make sure only the user running the Vespa processes can read the private key file. Key and certificate files should only be writable by administrator users. +</Warning> + +## Configuring Vespa TLS + +On any node running Vespa software, TLS is controlled via a single environment variable. This variable contains an absolute path pointing to a JSON configuration file: + +```bash +VESPA_TLS_CONFIG_FILE=/absolute/path/to/my-tls-config.json +``` + +This environment variable must be set to a valid file path before any Vespa services are started on the node. All nodes in your Vespa application must have a TLS config file pointing to the certificates that are trusted by the other nodes. + +See [Vespa environment variables](/en/operations/self-managed/files-processes-and-ports#environment-variables) for information on configuring environment variables for Vespa. + +Setting `VESPA_TLS_CONFIG_FILE` automatically enables TLS for all Vespa processes on the node. Vespa command-line tools will automatically pick up the required configuration and work transparently. + +<Warning> +**Important:** + +If this variable is not set, Vespa starts up in insecure mode without any TLS! +</Warning> + +### Writing a TLS configuration file + +The simplest possible configuration file only needs to know the certificates to trust and the certificate/key pair that identifies the node itself. Example: + +```json +{ + "files": { + "ca-certificates": "/absolute/path/to/ca-certs.pem", + "certificates": "/absolute/path/to/host-certs.pem", + "private-key": "/absolute/path/to/private-key.pem" + } +} +``` + +Set the environment variable, for example by appending to [conf/vespa/default-env.txt](/en/operations/self-managed/files-processes-and-ports#environment-variables): + +```bash +override VESPA_TLS_CONFIG_FILE /absolute/path/to/my-tls-config.json +``` + +All file paths must be absolute. If a Vespa process cannot load one or more files, it will fail to start up. + +### Configuring TLS peer authorization rules + +For many simpler deployments, a dedicated self-signed Certificate Authority will be used for the Vespa cluster alone. In that case simply being in possession of a valid certificate is enough to be authorized to access the cluster nodes; no one except the Vespa nodes is expected to have such a certificate. More complex deployments may instead use a shared CA, e.g. a corporate CA issuing certificates to nodes across many services and departments. In that case simply having a valid certificate is not sufficient to be used as an authorization mechanism. + +You can constrain which certificates may access the internal Vespa service by using *authorization rules*. These are consulted as part of every TLS handshake and must pass before any connection can be established. + +Authorization rules are specified as part of the JSON configuration file using the top-level [`authorized-peers`](/en/reference/security/mtls#top-level-elements) member. + +#### Example + +Let's assume our Vespa cluster consists of many nodes, each with their own certificate signed by a shared Certificate Authority. Each certificate contains a Subject Alternate Name (SAN) DNS name entry of the form `<unique-node-id>.mycluster.vespa.example.com`, where *unique-node-id* is unique per cluster node. These nodes will be running the actual Vespa services and must all be able to talk to each other. + +Let's also assume there is a monitoring service that requires low-level access to the services. Certificates presented by nodes belonging to this service will always have a Common Name (CN) value of `vespa-monitoring.example.com` and a DNS SAN entry of the form `<instance>.<region>.monitor.example.com`. Any monitoring instance in any us-east region must be able to access our cluster, but no others. + +Our TLS config file implementing these rules may look like this: + +```json +{ + "files": { + "ca-certificates": "/absolute/path/to/ca-certs.pem", + "certificates": "/absolute/path/to/host-certs.pem", + "private-key": "/absolute/path/to/private-key.pem" + }, + "authorized-peers": [ + { + "required-credentials": [ + { "field": "CN", "must-match": "vespa-monitoring.example.com" }, + { "field": "SAN_DNS", "must-match": "*.us-east-*.monitor.example.com" } + ], + "description": "Backend monitoring service access" + }, + { + "required-credentials": [ + { "field": "SAN_DNS", "must-match": "*.mycluster.vespa.example.com" } + ], + "description": "Cluster-internal node P2P access" + } + ] +} +``` + +See the [reference documentation](/en/reference/security/mtls#peer-authorization-rules) for details on syntax and semantics. + +### Automatic reloading of crypto material + +Vespa performs periodic reloading of the specified TLS configuration file. Currently, this happens every 60 minutes. This reloading happens live and does not impact service availability. Both certificates, the private key and authorization rules are reloaded. Vespa currently does not watch the configuration file for changes, so altering the config file or any of its dependencies does not trigger a reload by itself. + +If live reloading fails, the old configuration continues to be used and a warning is emitted to the local Vespa log. + +Vespa does not currently lock files before reading them. To avoid race conditions where files are reloaded by Vespa while they are being written, consider splitting file refreshing into multiple phases: + +<Steps> +<Step> +Instead of overwriting existing key/cert files, write *new* files with different file names. +</Step> +<Step> +Create a temporary TLS config JSON file pointing to these files. +</Step> +<Step> +Atomically rename the new TLS config file to the name specified by `VESPA_TLS_CONFIG_FILE`. +</Step> +<Step> +Garbage-collect the old files at a later point (for example at the next refresh time). +</Step> +</Steps> + +## Setting up TLS for a new Vespa application or upgrading with downtime + +With no Vespa services running on any nodes, ensure the `VESPA_TLS_CONFIG_FILE` environment variable is set to a valid configuration file path on every node, and [is visible to any Vespa start scripts](/en/operations/self-managed/files-processes-and-ports#environment-variables). Start Vespa services as you normally would. Check cluster health with [vespa-get-cluster-state](/en/reference/operations/self-managed/tools#vespa-get-cluster-state) and check [vespa-logfmt](/en/reference/operations/self-managed/tools#vespa-logfmt) for any TLS-related error messages that indicate a misconfiguration (such as certificate rejections etc.)—see the Troubleshooting section. The cluster should quickly converge to an available state. + +This is the simplest and fastest way to enable TLS, and is highly recommend if downtime is acceptable. + +## Upgrading an existing non-TLS Vespa application to TLS without downtime + +If you already have a Vespa application serving live traffic that you don't want to take down completely in order to enable TLS, it's possible to perform a gradual, rolling upgrade. Doing this requires insecure and TLS connections to be used alongside each other for some time, moving more and more nodes onto TLS. Finally, once all nodes are speaking only TLS, the support for insecure connections must be removed entirely. + +To achieve this, Vespa supports a feature called *insecure mixed mode*. Enabling mixed mode lets all servers handle both TLS and insecure traffic at the same time. + +Mixed mode is controlled via the value set in environment variable `VESPA_TLS_INSECURE_MIXED_MODE`. + +TLS rollout happens in 3 phases: + +**Phase 1:** clients do not use TLS, servers accept both TLS and plaintext clients + +<Steps> +<Step> +Set `VESPA_TLS_INSECURE_MIXED_MODE=plaintext_client_mixed_server`. +</Step> +<Step> +Set `VESPA_TLS_CONFIG` file as documented in [Configuring Vespa TLS](#configuring-vespa-tls). +</Step> +<Step> +Rolling restart of all Vespa services to make mixed mode take effect. +</Step> +</Steps> + +**Phase 2:** clients use TLS, servers accept both TLS and plaintext clients + +<Steps> +<Step> +Set `VESPA_TLS_INSECURE_MIXED_MODE=tls_client_mixed_server`. +</Step> +<Step> +Rolling restart of all Vespa services to make mixed mode take effect. +</Step> +</Steps> + +**Phase 3:** all clients and servers use TLS only + +<Steps> +<Step> +Remove the `VESPA_TLS_INSECURE_MIXED_MODE` environment variable. +</Step> +<Step> +Rolling restart of all Vespa services to make enforced TLS take effect. +</Step> +</Steps> + +<Warning> +**Warning:** + +The insecure mixed mode environment variable MUST be removed from all nodes (and all services subsequently restarted) before a cluster can be considered secure. Even a single service left with insecure mixed mode enabled could be used by a determined attacker as a jumpgate into other (believed secure) services. +</Warning> + +## Verify configuration of TLS + +Successful configuration should be verified at runtime once TLS is enabled on all nodes. The [openssl s_client](https://www.openssl.org/docs/man1.1.1/man1/openssl-s_client.html) tool is suitable for this. Connect to a Vespa service, e.g a configserver on port 19071 or a container on port 8080, and verify that `openssl s_client` successfully completes the TLS handshake. + +```bash +$ openssl s_client -connect <hostname>:<port> \ + -CAfile /absolute/path/to/ca-certs.pem \ + -key /absolute/path/to/private-key.pem \ + -cert /absolute/path/to/host-cert.pem +``` + +Further, you should verify that servers require clients to authenticate by omitting `-key`/`-cert` from above command. The `s_client` tool should print an error during handshake and exit immediately. + +```bash +$ openssl s_client -connect <hostname>:<port> \ + -CAfile /absolute/path/to/ca-certs.pem +``` + +## FAQ + +<AccordionGroup> +<Accordion title="Should TLS be used even if I have a latency-sensitive real-time search application?"> +Yes. The Vespa cloud team has run many such applications in production for a long time and the overhead imposed by TLS is negligible. Significant effort have been spent tuning Vespa's TLS integrations to keep overhead to a minimum. +</Accordion> +<Accordion title="How much overhead does TLS impose in practice?"> +With modern CPUs, expect somewhere around 1-2% extra CPU usage for symmetric encryption (i.e. active connections). Connection handshakes have an expected extra latency of 2-4 ms of CPU time (network latency not included) due to more expensive cryptographic operations. Vespa performs handshake operations in separate threads to avoid stalling other network traffic. Vespa also uses long-lived connections internally to reduce the number of handshakes. +</Accordion> +</AccordionGroup> + +## Troubleshooting + +### Certificate validation fails due to mismatching hostnames + +Vespa enables the [HTTPS endpoint identification algorithm](https://datatracker.ietf.org/doc/html/rfc2818#section-3) by default. This extra verification can only be used if all certificates have their respective host's IP addresses and hostnames in the Subject / Subject Alternative Names extensions. [Disable hostname validation](/en/reference/security/mtls#top-level-elements) if this is not the case. + +## Appendix A: setting up with a self-signed Certificate Authority + +Our goal is to create cryptographic keys and certificates that can be used by Vespa for secure mTLS communication within a single Vespa installation. + +This requires the following steps, which we'll go through below: + +<Steps> +<Step> +[Creating a root Certificate Authority](/en/security/mtls#creating-a-root-certificate-authority-ca). This is only done once, regardless of how many Vespa hosts you want to secure. +</Step> +<Step> +[Creating a private key and Certificate Signing Request (CSR) for each Vespa host](/en/security/mtls#creating-a-private-key-and-certificate-for-a-vespa-host). +</Step> +<Step> +[Signing the CSR using the CA, creating a certificate for each Vespa host](#sign-host-certificate). +</Step> +</Steps> + +We'll be using the [OpenSSL command-line tool](https://www.openssl.org/docs/man1.1.1/man1/) to generate all our crypto keys and certificates. + +<Note> +**Note:** + +If you are setting up Vespa in an organization that already has procedures for provisioning keys and certificates, you should first reach out to the team responsible for this to make sure you're following best practices. +</Note> + +### Creating a root Certificate Authority (CA) + + +When a server (or client) presents a certificate as part of proving its identity to us, we must have a way to determine if this information is trustworthy. We do this by verifying if the certificate is *cryptographically signed* by a [Certificate Authority (CA)](https://en.wikipedia.org/wiki/Certificate_authority) that we already know we can trust. It is possible that the certificate is in fact signed by a CA that we don't directly trust, but that in turn is signed by a CA that we *do* trust. These are known as *intermediate* Certificate Authorities and are part of what's known as the *certificate chain*. There may be more than one intermediate CA in a chain. In our simple setup we will not be using any intermediate CAs. + +At the top of the chain sits a *root* Certificate Authority. Since we trust the root CA, we also implicitly trust any intermediate CA it has signed and in turn any leaf certificates such an intermediate CA has signed. + +A root Certificate Authority is special in that it has no CA above it to sign in. It is *self-signed*. + +To create our own root CA for our Vespa installation we'll first create its [*private key*](https://en.wikipedia.org/wiki/Public-key_cryptography). + +We have two choices of what kind of key to create; either based on [RSA](https://en.wikipedia.org/wiki/RSA_\(cryptosystem\)) or [Elliptic Curve (EC)](https://en.wikipedia.org/wiki/Elliptic-curve_cryptography) cryptography. EC keys are faster to process than RSA-based keys and take up less space, but older OS versions or cryptographic libraries may not support these. In the latter case, RSA keys offer the highest level of backwards compatibility. + +(Recommended) either create an Elliptic Curve private key: + +```bash +$ openssl ecparam -name prime256v1 -genkey -noout -out root-ca.key +``` + +**OR:** create an RSA private key: + +```bash +$ openssl genrsa -out root-ca.key 2048 +``` + +The root CA private key is stored in `root-ca.key`. This key is used to sign certificates and the file MUST therefore be kept secret! If it is compromised, an attacker can create any number of valid certificates that impersonate your Vespa hosts. + +We'll now create our CA X.509 certificate, self-signed with the private key. Substitute the information given in `-subj` with whatever is appropriate for you; it's not really important for our simple usage. + +```bash +$ openssl req -new -x509 -nodes \ + -key root-ca.key \ + -out root-ca.pem \ + -subj '/C=US/L=California/O=ACME/OU=ACME test root CA' \ + -sha256 \ + -days 3650 +``` + +Copy the resulting `root-ca.pem` file to your Vespa node(s) and point the `"ca-certificates"` field in the TLS config file to its absolute file path on the node. + +With both the CA key and certificate, we have what we need to start signing certificates for the hosts Vespa will be running on. + +### Creating a private key and certificate for a Vespa host + +<Note> +***Note:*** + +*This section can be repeated for each Vespa host in your application. See [Alternatives to having a unique certificate per individual host](/en/security/mtls#alternatives-to-having-a-unique-certificate-per-individual-host) for (possibly less secure) options that do not require doing this step per host.* +</Note> + +Just like our CA our host needs its own private cryptographic key. + +If we're using Elliptic Curve keys: + +```bash +$ openssl ecparam -name prime256v1 -genkey -noout -out host.key +``` + +**OR:** if we're using RSA keys: + +```bash +$ openssl genrsa -out host.key 2048 +``` + +As part of creating the certificate we'll first create a [Certificate Signing Request (CSR)](https://en.wikipedia.org/wiki/Certificate_signing_request). Again, you can substitute the information in `-subj` with something more appropriate for you. + +```bash +$ openssl req -new \ + -key host.key -out host.csr \ + -subj '/C=US/L=California/OU=ACME/O=My Vespa App' \ + -sha256 +``` + +#### Sign host certificate + +By default, Vespa runs with TLS hostname validation enabled, which requires the server's certificate to contain a hostname matching what the client is connecting to. This is fundamental to the security of protocols such as HTTP, but often sees less use with mTLS. Vespa supports it as an added layer of security. Using certificates containing hostnames has the added benefit that you can run tools such as `curl` against Vespa HTTPS status pages without having to explicitly disable certificate verification. + +Certificates can contain many entries known as ["Subject Alternate Names" (SANs)](https://en.wikipedia.org/wiki/Subject_Alternative_Name) that list what DNS names and IP addresses the certificate is issued for. We'll add a single such DNS SAN entry with the hostname of our node. We'll also use the opportunity to add certain X.509 extensions to the certificate that specifies exactly what the certificate can be used for. + +Below, substitute `myhost.example.com` with the hostname of your Vespa node. + +```bash +$ cat > cert-exts.cnf << EOF +[host_cert_extensions] +basicConstraints = critical, CA:FALSE +keyUsage = critical, digitalSignature, keyAgreement, keyEncipherment +extendedKeyUsage = serverAuth, clientAuth +subjectKeyIdentifier = hash +authorityKeyIdentifier = keyid,issuer +subjectAltName = @host_sans +[host_sans] +DNS.1 = myhost.example.com +EOF +``` + +We can now use our existing CA key and certificate to sign the host's CSR, additionally providing the above file of certificate extensions to OpenSSL. + +```bash +$ openssl x509 -req \ + -in host.csr \ + -CA root-ca.pem \ + -CAkey root-ca.key \ + -CAcreateserial \ + -out host.pem \ + -extfile cert-exts.cnf \ + -extensions host_cert_extensions \ + -days 3650 \ + -sha256 +``` + +This creates an X.509 certificate in PEM format for the host, valid for 3650 days from the time of signing. + +We can inspect the certificate using the `openssl x509` command. Here's some example output for a certificate using EC keys. Your output will look different since the serial number, dates and key information etc. will differ. + +```bash expandable +$ openssl x509 -in host.pem -text -noout +Certificate: + Data: + Version: 3 (0x2) + Serial Number: 13516182920561857512 (0xbb9320c1234a93e8) + Signature Algorithm: ecdsa-with-SHA256 + Issuer: C=US, L=California, O=ACME, OU=ACME test root CA + Validity + Not Before: Aug 19 13:09:37 2021 GMT + Not After : Aug 17 13:09:37 2031 GMT + Subject: C=US, L=California, OU=ACME, O=My Vespa App + Subject Public Key Info: + Public Key Algorithm: id-ecPublicKey + Public-Key: (256 bit) + pub: + 04:ed:01:0e:1e:c5:05:17:99:41:74:68:a0:c5:32: + 52:4f:45:d5:04:f8:a0:9c:35:26:ae:66:0c:e5:89: + 34:5c:21:09:b8:a9:ed:81:22:06:bb:d1:1c:9e:13: + 80:0a:9a:9e:0c:a0:78:ac:7c:c4:6f:1c:ec:e6:df: + c1:59:2d:71:8e + ASN1 OID: prime256v1 + NIST CURVE: P-256 + X509v3 extensions: + X509v3 Basic Constraints: critical + CA:FALSE + X509v3 Key Usage: critical + Digital Signature, Key Encipherment, Key Agreement + X509v3 Extended Key Usage: + TLS Web Server Authentication, TLS Web Client Authentication + X509v3 Subject Key Identifier: + 08:EF:C7:B4:95:36:64:EC:2A:2F:9F:5A:C3:EA:F0:98:2C:E5:78:EC + X509v3 Authority Key Identifier: + DirName:/C=US/L=California/O=ACME/OU=ACME test root CA + serial:94:77:40:20:69:50:87:45 + + X509v3 Subject Alternative Name: + DNS:myhost.example.com + Signature Algorithm: ecdsa-with-SHA256 + 30:45:02:21:00:91:58:bb:7f:47:75:60:c3:49:09:b3:d2:54: + ad:d2:47:58:1c:17:c7:5a:5f:f0:f4:9c:67:e9:6a:44:21:8e: + 08:02:20:23:9c:99:42:1b:91:29:26:f7:83:58:d1:09:65:38: + c1:18:e8:0d:55:3a:57:f6:e0:c6:5b:72:57:e4:d9:6a:d8 +``` + +Copy `host.key` and `host.pem` to your Vespa host and point the `"private-key"` and `"certificates"` TLS config fields to their respective absolute paths. The CSR and extension config files can be safely discarded. + +<Warning> +**Warning:** + +Ensure that `host.key` is only readable by the Vespa user on your host(s) +</Warning> + +### Alternatives to having a unique certificate per individual host + +It's possible to avoid having to create a separate certificate per host in favor of a single certificate shared between all hosts. + +- Hostname SANs do not have to be added (or match) if hostname validation is explicitly disabled in the TLS config file. **Caveat:** this makes it impossible for clients to verify that they're talking to the host they expected. +- Many SAN entries can be added to the extension file, one per host. **Caveat:** new certificates must be generated if new hosts are added to the Vespa application that aren't already in the SAN list. +- If all hosts share a common pattern (e.g. `foo.vespa.example.com` and `bar.vespa.example.com`) it's possible to use a wildcard DNS SAN entry (`*.vespa.example.com`) instead of listing all hosts. + +However, for production deployments we recommend using a distinct certificate per host to help mitigate the impact of a host being compromised. diff --git a/mintlify-docs/en/security/secret-store.mdx b/mintlify-docs/en/security/secret-store.mdx new file mode 100644 index 0000000000..008911276e --- /dev/null +++ b/mintlify-docs/en/security/secret-store.mdx @@ -0,0 +1,129 @@ +--- +title: "Secret Store" +--- + +Vespa Cloud supports secure storage and management of secrets for use in your application. A secret is a text-based value such as an API key, a token or other private configuration value required by your application. + +By organizing secrets into vaults, setting application-specific access controls, and integrating secrets cleanly into your application code, Vespa Cloud ensures that sensitive data like API keys and tokens are kept safe and are easily updatable. + +This guide takes you through secret management for your tenant and how to use them in your application. + +Use the [Retrieval Augmented Generation (RAG) in Vespa](https://github.com/vespa-engine/sample-apps/tree/master/retrieval-augmented-generation#deploying-to-the-vespa-cloud-using-gpu) sample application for a practical example getting started using the Secret Store. This example uses the Secret Store to store an OpenAI API key. + +## Secret management + +In the Vespa Cloud console, the "Account" section of your tenant contains a "Secret store" tab. This is where you configure all secrets for your tenant. + +### Vaults + +Secrets are organized into vaults, where each vault can contain a number of secrets. The vault also contains rules for which applications can use the secrets in the vault. You can have any number of vaults. + +To create a new vault, click the "+ New vault" button. The vault name must match the rule `[.a-zA-Z0-9_-]` meaning only alphanumeric characters and `.`, `_`, and `-` are allowed. Spaces are not allowed. + +<Frame>![Secret store overview](/assets/img/secret-store.png)</Frame> + +After creation, you can delete the entire vault by clicking the red trash bin button on the top right. + +### Access control + +Each vault has an "Access control" section which determines which application has access to the secrets in the vault. For each application, you can set up which environment - [dev](/en/operations/environments#dev) or [prod](/en/operations/environments#prod) (including test and staging) - the application should have access within. + +Note that the application must have been created before you can set access control to it. Use the steps at [Retrieval Augmented Generation (RAG) in Vespa](https://github.com/vespa-engine/sample-apps/tree/master/retrieval-augmented-generation#deploying-to-the-vespa-cloud-using-gpu) to create an application and grant access. + +### Secrets + +To add a new secret, click the "+ New secret" button. The same naming rules apply for secrets. You can give any value to the secret. Note that once this is saved the secret will never be visible again. You can update the secret to a new value, but never retrieve the actual value. Maximum length for a secret is 64K characters. + +Each tenant has a limit of 15 secrets. + +<Frame>![Creating new secret](/assets/img/secret-store-secret.png)</Frame> + +After the secret has been created, you can update the secret to a new value or delete it. + +Note that when a secret is updated, applications using it will start using this new value within 60 seconds. + +Also note that your application will not deploy successfully if the application requests a secret that for some reason is not available, by either not being defined or does not have access to it. + +## Example: Using an OpenAI API key for RAG + +Set up a RAG search chain that uses an OpenAI API key as secret: + +```xml expandable +<services version="1.0" xmlns:deploy="vespa" xmlns:preprocess="properties"> + <container id="default" version="1.0"> + + {/* configure the OpenAI API key secret */} + <secrets> + <apiKey vault="my-vault" name="openai-api-key" /> + </secrets> + + {/* configure the OpenAI client to use the secret */} + <component id="openai" class="ai.vespa.llm.clients.OpenAI"> + <config name="ai.vespa.llm.clients.llm-client"> + <apiKeySecretRef>apiKey</apiKeySecretRef> + </config> + </component> + + {/* configure a search chain to use the OpenAI client */} + <search> + <chain id="rag" inherits="vespa"> + <searcher id="ai.vespa.search.llm.RAGSearcher"> + <config name="ai.vespa.search.llm.llm-searcher"> + <providerId>openai</providerId> + </config> + </searcher> + </chain> + </search> + + </container> +</services> +``` + +Try [Retrieval Augmented Generation (RAG) in Vespa](https://github.com/vespa-engine/sample-apps/tree/master/retrieval-augmented-generation#deploying-to-the-vespa-cloud-using-gpu) for a practical example. + +## Using secrets + +To use the secret in an application, add `secrets` to `services.xml`: + +```xml +<services version="1.0" xmlns:deploy="vespa" xmlns:preprocess="properties"> + <container id="default" version="1.0"> + + <secrets> + <myApiKey vault="my-vault" name="my-api-key" /> + </secrets> + + </container> +</services> +``` + +In this example, we refer to a secret named `my-api-key` in the vault `my-vault` with the name `myApiKey` in the application. + +To access this secret in a custom component, inject the `Secrets` as a constructor parameter in the component, like a Searcher: + +```java expandable +import ai.vespa.secret.Secret; +import ai.vespa.secret.Secrets; +... + +public class MySearcher extends Searcher { + + private final Secret apiKeySecret; + + public MySearcher(Secrets secrets) { + apiKeySecret = secrets.get("myApiKey"); + } + + @Override + public Result search(Query query, Execution execution) { + String apiKey = apiKeySecret.current(); + // ... do something with the current value of secret ... + return execution.search(query); + } + +} +``` + +Typically, store the `Secret` in your class, and when you want to use the secret value itself, you call `Secret.current();`. This ensures that you will use the current secret value if it is updated. Note that it can take up to 60 seconds for the current secret value to be updated for your container code. + +Ensure that you do not store the `current` value itself - then the secret value will not be updated when the configuration is changed. diff --git a/mintlify-docs/en/security/securing-your-vespa-installation.mdx b/mintlify-docs/en/security/securing-your-vespa-installation.mdx new file mode 100644 index 0000000000..62a1b95721 --- /dev/null +++ b/mintlify-docs/en/security/securing-your-vespa-installation.mdx @@ -0,0 +1,54 @@ +--- +title: "Securing a Vespa Installation" +--- + +<Note> +**Note:** + +This document is only relevant for **self-hosting Vespa** as Vespa Cloud does this for you. +</Note> + +It is critical to understand the security requirements and limitations of any networked system. Vespa is no exception. This document gives the most important information related to security at the network and physical host levels. + +To keep the self-hosted Vespa installation safe, follow the guidelines outlined below: +1. Isolate the Vespa hosts +2. Secure the application container with access control filters and TLS +3. Lockdown directory permissions +4. Securing Vespa with mutually authenticated TLS (mTLS) + + +## Isolating the Vespa hosts + +**When running self-hosted Vespa instances, hosts running Vespa MUST NOT be directly exposed to the public internet or to untrusted networks. Failure to ensure this may lead to data exfiltration/infiltration or host compromise.** + +Vespa's internal protocols are not authenticated by default and are therefore not safe in the face of untrusted network actors. When running Vespa in your own organization, or on the public cloud in particular, this is something you must take into account. + +Connections to *any* hosts running Vespa services should *only* be allowed from a controlled set of trusted hosts. All Vespa hosts must be able to connect to, and receive connections from, all other Vespa hosts that are part of the same installation. For added security, consider limiting Vespa hosts to only be able to talk to other Vespa hosts. If you are contacting external services as part of [federation](/en/querying/federation) in the application container, your container hosts must be able to connect to these services. + +This may be implemented by e.g. iptables, AWS Security Groups or similar technologies. + +The entry point into your Vespa installation is port 8080 on hosts running the application container. This port is used for feed, document retrieval and search queries. It should only be exposed to an untrusted network if you have properly [secured the application container](#securing-the-application-container). It should never be exposed directly to external traffic. All traffic to the containers should be sent by your frontends or backends. + +Internode communication inside a Vespa installation is not encrypted by default and will need to be configured explicitly as documented in [Securing Vespa with mutually authenticated TLS](/en/security/mtls). + +## Securing the application container + +By default, the container allows unauthenticated writes to, and reads from, the Vespa installation. For a production deployment, this must be locked down. + +Connections to the HTTP containers may be [protected with TLS](/en/applications/http-servers-and-filters#tls). Mutual TLS is supported and should be configured. + +Access to the container API endpoints can be controlled using [request filters](/en/applications/http-servers-and-filters#set-up-filter-chains). These filters can implement the required authentication and authorization logic for your specific use case. + +If you do not set up TLS with restrictive filter logic, you should restrict the container port in the same way as you would the rest of the Vespa hosts. + +## Locking down directory permissions + +All Vespa processes run under the Linux user given by `$VESPA_USER` and store their data under `$VESPA_HOME`. You should ensure the files and directories under `$VESPA_HOME` are not accessible by other users if you store sensitive data in your application. + +Note also that private keys used by the container to set up TLS must be protected to be readable by the container process only. + +Vespa does not have application-level support for encryption of on-disk document stores or indexes. This would typically be implemented through file-system or storage-level (e.g. remote cloud storage) encryption. + +## Securing Vespa with mutually authenticated TLS (mTLS) + +Protect all internal endpoints and protocols in Vespa with mutually authenticated Transport Layer Security (mTLS). See the [dedicated documentation](/en/security/mtls) on how to get started, and try the [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) example. diff --git a/mintlify-docs/en/security/security.mdx b/mintlify-docs/en/security/security.mdx new file mode 100644 index 0000000000..0105f3df7a --- /dev/null +++ b/mintlify-docs/en/security/security.mdx @@ -0,0 +1,9 @@ +--- +title: "Vespa Cloud Security" +sidebarTitle: "Security overview" +--- + +- [**Security Guide**](/en/security/guide) is a practical guide to using the different security features and getting started with them. +- [**Secret Store**](/en/security/secret-store) is a guide on how to integrate AWS Parameter Stores with Vespa Cloud. +- [**Cloudflare Workers**](/en/security/cloudflare-workers) describes how you can access mutal TLS protected Vespa Cloud endpoints using Cloudflare Workers. +- [**Whitepaper**](/en/security/whitepaper) is an in-depth description of the security architecture of Vespa Cloud. diff --git a/mintlify-docs/en/security/whitepaper.mdx b/mintlify-docs/en/security/whitepaper.mdx new file mode 100644 index 0000000000..a1309edb69 --- /dev/null +++ b/mintlify-docs/en/security/whitepaper.mdx @@ -0,0 +1,157 @@ +--- +title: "Vespa Cloud Security Whitepaper" +sidebarTitle: "Security Whitepaper" +--- + +## Introduction + +This document describes the Vespa Cloud service security features and operational procedures. + +Vespa.ai has a SOC 2 attestation - read more in the [Trust Center](https://trust.vespa.ai/). + +## Concepts and architecture + +<Frame>![Vespa Cloud overall architecture diagram](/assets/img/overall-architecture.png)</Frame> + +The Vespa Cloud consists of a *Control Plane* and a *Date Plane*. Each have their own web service APIs, respectively managing Vespa applications (Control), and interacting with a deployed Vespa application (Data). + +The Control Plane manages deployment of applications in the zones they specify, and lets tenant administrators manage their tenant information in Vespa Cloud. The Control Plane is shared among all tenants in Vespa Cloud and is globally synchronized. + +The Data Plane lets the tenants communicate with their deployed Vespa applications. It supports queries, feeding, and any other type of requests the tenant has configured and deployed in their application. The Data Plane is isolated for each tenant, application, and (optionally) service. + +The Vespa Cloud is divided into *Zones*. A zone is a combination of an *environment* and a *region* and have names like *prod.aws-us-east-1c*. Zones are stand-alone and does not have critical dependencies on services outside the zone. Tenants can implement service redundancy by specifying that applications be deployed in multiple zones. + +A Zone is managed by a *Configuration Server* cluster. These receive the application packages from the *Control Plane* on deployment and manages the local deployment process in the zone, including provisioning the node resources required to run the deployed application in the zone. Separately, it is responsible for maintaining those resources - replacing failed nodes, upgrading their OS and similar. + +Vespa applications run on *Nodes* - a Linux container executed on a *Host*. The *Host* is the actual machine running the containers. Each Host has a management process that receives instructions from the Configuration Server about what containers should run on the Host. Once started, the containers ask the Configuration Server cluster what Vespa services of what application they should run. + +It is the individual Node that contains the customer data such as indexes and document, and which receives the queries and feeding requests from the customer's authenticated and authorized clients. Each Node is always dedicated to a single Vespa application cluster. Hosts are shared by default, but applications may specify that they require dedicated hosts to obtain an additional level of security isolation. + +## Service deployment + +### Control plane authentication and authorization + +#### Control plane API access + +All API operations towards the Vespa Cloud control plane require authorization, and no tenant or application information will be presented for unauthorized access. A user can present a valid OAuth2 token which will be verified by the API. If a OAuth2 token is not available the user can choose to use an API key instead. The intended use for API keys is for service automation (e.g. CI/CD workflows or GitHub actions), but they can also be used by developers. + +#### Roles and privileges + +Members of tenants in Vespa Cloud can be assigned to three different roles that grant different privileges: + +- **Reader:** Can read tenant and application metadata. This is the minimal privilege which is implicitly granted to all members of a tenant. +- **Developer:** Can create applications, deploy to dev and prod zones. These are the privileges needed by members working on applications. +- **Administrator:** Can manage members of a tenant and tenant metadata, such as tenant contact information and billing actions. + +All role memberships are stored in an external identity provider. + +#### Control plane audit logs + +All operations against the control plane are persisted in an audit log capturing *timestamp*, *client*, *principal* (user), *HTTP method*, *resource* accessed, and *payload* (for certain requests). As this data can potentially be sensitive, it is available upon request from Vespa Cloud support. + +### Service isolation + +<Frame>![image](/assets/img/service-isolation.png "Service isolation")</Frame> + +Nodes belonging to the same application are allowed to communicate with each other while nodes of different applications are isolated on the network layer and through authorization. + +Communication between Vespa services is encrypted and authenticated using mutual TLS (mTLS). Identities and certificates are provided by infrastructure components that can validate the configuration. + +#### Access control and service identity + +Each host and node has a unique cryptographic service identity. This identity is required in all inter-service communication, including HTTPS and internal binary RPC protocols. On the host, node, and configuration server level there are authorization rules in place to ensure that only relevant services can communicate with each other and retrieve resources from shared services, like the configuration server. + +#### Node isolation + +The identity of the node is based on the tenant, application, and instance the node is part of. The host and configuration server will together establish the identity of the node. The configuration server tells the host which nodes it should start, and the host requests a cryptographic identity for the nodes from the identity provider. + +This node identity is used for all internal communication inside the application. + +Nodes are implemented as Linux containers on the hosts. Each node runs in their own container user namespaces, and each node has a dedicated IP address. + +#### Host isolation + +The lowest physical resource in the service architecture is a host. The configuration server is responsible for provisioning hosts and will keep track of known hosts, and reject any unknown hosts. Hosts only communicate directly with the configuration server and cannot communicate with each other. + +#### Configuration isolation + +Both nodes and hosts will consume application configuration from the configuration server. The configuration server will apply authorization rules based on the host and node identity. Authorization rules are based on least privilege. Hosts will only see which nodes to run, while the nodes are able to access the application configuration. + +#### Network isolation + +All communication between services is protected through mTLS. mTLS authorization is based on the identity mentioned above. In addition, network level isolation is used to prevent any unauthorized network access between services. The network rules are configured in the configuration server and applied by the host. Changes to the topology are reflected within minutes. + +## Communication + +### Data plane + +All access to application endpoints are secured by mTLS and optionally token authentication. Upon deployment, every application is provided a certificate with SAN DNS names matching the endpoint names. This certificate will be automatically refreshed every 90 days. The application owner must provide a set of trusted Certificate Authorities which will be used by all clients when accessing the endpoints using mTLS. + +### Federation + +It is possible for an application owner to federate calls to 3rd party services. Either as scheduled jobs, or per request. To support this use case we provide access to a credential storage in the customer's AWS account. + +## Data Storage + +### Encryption at Rest + +All customer data is encrypted at rest using the cloud provider's native encryption capabilities (AWS KMS or Google Cloud KMS). Encryption is performed with the following properties: + +- Cipher: A strong, industry-standard cipher such as AES-256 (or the provider's default strong cipher) +- Key Management: Customer-managed keys within the respective cloud provider's key management service (AWS KMS or Google Cloud KMS) + +Access to the keys is strictly controlled and audited through IAM roles and policies employing least privilege. Key rotation is managed automatically by the cloud provider on a regular basis. + +### Data classification + +All data handled by Vespa Cloud is classified into two different classes which has different policies associated with them. + +- **Internal data:** Information intended for internal consumption in Vespa Cloud operations. This includes system level logs from services that do not handle customer data. Internal data is readable by authenticated and authorized members of the Vespa Cloud engineering team. +- **Confidential data:** Confidential data is data that is sensitive to Vespa Cloud or Vespa Cloud customers. Access to confidential data is subject to stringent business need-to-know. Access to confidential data is regulated and only granted to Vespa Cloud team members in a peer-approved, time-limited, and audited manner. *All customer data is considered confidential.* + +### Asset types + +| Asset | Class | Description | +| :--- | :--- | :--- | +| Control Plane data | Internal | The Control Plane maintains a database to facilitate orchestration of Vespa applications in multiple zones. This contains metadata about tenants and applications in Vespa Cloud. | +| Configuration Server data | Confidential | The configuration server database contains the Vespa application model as well as the orchestration database. Since the configuration server is part of establishing node and host identities, the configuration server data is considered confidential. | +| Infrastructure logs | Internal | Logs from infrastructure services like the configuration servers, the control plane services, etc. are considered internal. This includes logs from Control Plane, Configuration Servers, and Hosts. | +| Application package | Internal | The application.zip file uploaded to Vespa Cloud by the customer is considered internal. The application package contains settings and configuration that Vespa Cloud operations needs insight in to operate the platform. | +| Node logs | Confidential | The logs inside the Node may contain data printed by the customer. Because of this the logs are classified as confidential since Vespa Cloud cannot guarantee they are free of confidential data. This includes Data Plane access logs in addition to the node Vespa logs. | +| Core dumps / heap dumps | Confidential | Occasionally core dumps and heap dumps are generated for running services. These files may contain customer data and are considered confidential. | +| Node data | Confidential | All data on the node itself is considered confidential. This data includes the document data and the indexes of the application. | + +#### Logs + +All logs are stored on the nodes where they are generated, but also archived to a remote object storage. All logs are kept for a maximum of 30 days. Access to logs is based on the classifications described above. All logs are persisted in the same geographic region as the Vespa application that generated them. + +Archived logs are encrypted at rest with keys automatically rotated at regular intervals. + +Logs on the node are encrypted at rest with the same mechanism that encrypts indexes and document databases. + +### Access management + +Access to confidential data is only granted on a case-by-case basis. Access is reviewed, time-limited, and audited. No Vespa Cloud team member is allowed to access any confidential data without review. + +## Security Measures + +Vespa Cloud employs a multi-layered approach to security, encompassing vulnerability management, secure development practices, and proactive testing. These include: + +## Security Testing + +Vespa Cloud proactively assesses its security posture through: + +- A vulnerability disclosure program, detailed at https://vespa.ai/responsible-disclosure/, enabling security researchers to responsibly report potential vulnerabilities. +- A yearly hybrid security pentest program, conducted in partnership with Intigriti, to proactively identify and address vulnerabilities. + +### Secure Development + +Vespa Cloud follows a CI/CD process with mandatory code review for all commits. Static analysis tools are employed to detect issues in source code and third-party dependencies. In addition, the security team conducts regular internal security reviews of code and infrastructure to identify and address potential vulnerabilities throughout the development lifecycle. + +### Vulnerability Management + +Vespa is released up to 4 times a week, and we strive to keep all applications and dependencies updated to the latest versions. Operating system upgrades are rolled out every 90 days to address OS-level vulnerabilities. In case of a severe security issue, fixes are applied and rolled out as quickly as possible. + +### Incident Response + +Any unexpected production issue, including security incidents, is handled through our incident management process. Non-security incidents are announced through our console. Security incidents are communicated directly to affected customers. A post-mortem review process is initiated after every incident. In the event of a potential security breach, a forensic investigation is conducted. diff --git a/mintlify-docs/en/writing/batch-delete.mdx b/mintlify-docs/en/writing/batch-delete.mdx new file mode 100644 index 0000000000..0c53696354 --- /dev/null +++ b/mintlify-docs/en/writing/batch-delete.mdx @@ -0,0 +1,187 @@ +--- +title: "Batch delete" +--- + +Options for batch deleting documents: + +<Steps> +<Step> +Use [vespa feed](/en/clients/vespa-cli#documents): + +```sh +$ vespa feed -t my-endpoint deletes.json +``` + +</Step> +<Step> +Find documents using a query, delete, repeat. Pseudocode: + +```sh +while True; do + query and read document IDs, if empty exit + delete document IDs using [/document/v1](/en/reference/api/document-v1#delete) + wait a sec # optional, add wait to reduce load while deleting +``` +</Step> +<Step> +Use a [document selection](/en/schemas/documents#document-expiry) to expire documents. This deletes all documents *not* matching the expression. It is possible to use parent documents and imported fields for expiry of a document set. The content node will iterate over the corpus and delete documents (that are later compacted out): + +```xml +<documents garbage-collection="true"> + <document type="mytype" + mode="index" + selection="mytype.version > 4" /> +</documents> +``` +</Step> +<Step> +Use [/document/v1](/en/reference/api/document-v1#delete) to delete documents identified by a [document selection](/en/reference/writing/document-selector-language) — example dropping all documents from the *my_doctype* schema. The *cluster* value is the ID of the content cluster in *services.xml*, e.g., `<content id="my_cluster" version="1.0">`: + +```sh +$ curl -X DELETE \ + "$ENDPOINT/document/v1/my_namespace/my_doctype/docid?selection=true&cluster=my_cluster" +``` + +</Step> +<Step> +It is possible to drop a schema, with all its content, by removing the mapping to the content cluster. To understand what is happening, here is the status before the procedure: + +```sh +# ls $VESPA_HOME/var/db/vespa/search/cluster.music/n0/documents + +drwxr-xr-x 6 vespa vespa 4096 Oct 25 16:59 books +drwxr-xr-x 6 vespa vespa 4096 Oct 25 12:47 music +``` + +Remove the schema from configuration: + +```xml +<documents> + <document type="music" mode="index" /> + {/* document type="books" mode="index" / */} +</documents> +``` + +It is not required to remove the schema file itself. It is however required to add a `schema-removal` entry to [validation-overrides.xml](/en/reference/applications/validation-overrides): + +```xml +<validation-overrides> + <allow until="2022-10-31">schema-removal</allow> +</validation-overrides> +``` + +<Note> +**Note:** + +Use validation override name `content-type-removal` before Vespa 8.73. +</Note> + +Deploy the application package. This will reconfigure the content node processes, and the directory with the schema data is removed: + +```sh +# ls $VESPA_HOME/var/db/vespa/search/cluster.music/n0/documents + +drwxr-xr-x 6 vespa vespa 4096 Oct 25 12:47 music +``` + +Add the mapping back and redeploy — the cluster now has a `books` schema with zero documents. + +```sh +# ls $VESPA_HOME/var/db/vespa/search/cluster.music/n0/documents + +drwxr-xr-x 6 vespa vespa 4096 Oct 25 17:06 books +drwxr-xr-x 6 vespa vespa 4096 Oct 25 12:47 music +``` + +Use the [Custom Component State API](/en/content/proton#custom-component-state-api) to inspect document count per schema. + +The procedure, deploying with and without the schema, is an efficient way to drop all documents. After the procedure, it is good practice to remove *validation-overrides.xml* or the `schema-removal` element inside, to avoid accidental data loss later. The directory listing above is just for illustration. +</Step> +</Steps> + +## Example + +This is an end-to-end example on how to track number of documents, and delete a subset using a [selection string](/en/reference/writing/document-selector-language). + +### Feed sample documents + +Feed a batch of documents, e.g. using the [vector-search](https://github.com/vespa-cloud/vector-search) sample application: + +```sh +$ vespa feed <(python3 feed.py 100000 3) +``` + +See number of documents for a node using the [content.proton.documentdb.documents.total](/en/reference/operations/metrics/searchnode#content_proton_documentdb_documents_total) metric (here 100,000): + +```sh +$ docker exec vespa curl -s http://localhost:19092/prometheus/v1/values | grep ^content.proton.documentdb.documents.total + + content_proton_documentdb_documents_total_max{metrictype="standard",instance="searchnode",documenttype="vector",clustername="vectors",vespa_service="vespa_searchnode",} 100000.0 1695383025000 + + content_proton_documentdb_documents_total_last{metrictype="standard",instance="searchnode",documenttype="vector",clustername="vectors",vespa_service="vespa_searchnode",} 100000.0 1695383025000 +``` + +Using the metric above is useful while feeding this example. Another alternative is [visiting](/en/writing/visiting) all documents to print the ID: + +```sh +$ vespa visit --field-set "[id]" | wc -l + 100000 +``` + +At this point, there are 100,000 document in the index. + +### Define selection + +Define the subset of documents to delete — e.g. by age or other criteria. In this example, select random 1%. Do a test run: + +```sh +$ vespa visit --field-set "[id]" --selection 'id.hash().abs() % 100 == 0' | wc -l + 1016 +``` + +Hence, the selection string `id.hash().abs() % 100 == 0` hits 1,016 documents. + +### Delete documents + +Delete documents, see the number of documents deleted in the response: + +```sh +$ curl -X DELETE \ + "http://localhost:8080/document/v1/mynamespace/vector/docid?selection=id.hash%28%29.abs%28%29+%25+100+%3D%3D+0&cluster=vectors" + + { + "pathId":"/document/v1/mynamespace/vector/docid", + "documentCount":1016 + } +``` + +In case of a large result set, a `continuation` token is returned in the response. Loop until no more tokens are returned: + +```sh +$ ENDPOINT="http://localhost:8080" +$ URI="/document/v1/mynamespace/vector/docid?selection=id.hash%28%29.abs%28%29+%25+100+%3D%3D+0&cluster=vectors" +$ CONTINUATION="" +$ while true; do + RESPONSE=$(curl -s -X DELETE "${ENDPOINT}${URI}${CONTINUATION}") + echo "$RESPONSE" + TOKEN=$(echo "$RESPONSE" | jq -r '.continuation // empty') + if [ -z "$TOKEN" ]; then break; fi + CONTINUATION="&continuation=${TOKEN}" + done +``` + +### Validate + +Check that all documents matching the selection criterion are deleted: + +```sh +$ vespa visit --selection 'id.hash().abs() % 100 == 0' --field-set "[id]" | wc -l + 0 +``` + +List remaining documents: + +```sh +$ vespa visit --field-set "[id]" | wc -l + 98984 +``` diff --git a/mintlify-docs/en/writing/document-api-guide.mdx b/mintlify-docs/en/writing/document-api-guide.mdx new file mode 100644 index 0000000000..4f02bdbac0 --- /dev/null +++ b/mintlify-docs/en/writing/document-api-guide.mdx @@ -0,0 +1,468 @@ +--- +title: "Document API" +--- + +This is an introduction to how to build and compile Vespa clients using the Document API. It can be used for feeding, updating and retrieving documents, or removing documents from the repository. See also the [Java reference](https://javadoc.io/doc/com.yahoo.vespa/documentapi). + +Use the [VESPA\_CONFIG\_SOURCES](/en/operations/self-managed/files-processes-and-ports#environment-variables) environment variable to set config servers to interface with. + +The most common use case is using the async API in a [document processor](/en/applications/document-processors) - from the sample apps: + +- Async GET in [LyricsDocumentProcessor.java](https://github.com/vespa-engine/sample-apps/blob/master/examples/document-processing/src/main/java/ai/vespa/example/album/LyricsDocumentProcessor.java) +- Async UPDATE in [ReviewProcessor.java](https://github.com/vespa-engine/sample-apps/blob/master/use-case-shopping/src/main/java/ai/vespa/example/shopping/ReviewProcessor.java) + +## Documents + +All data fed, indexed and searched in Vespa are instances of the `Document` class. A [document](/en/schemas/documents) is a composite object that consists of: + +- A `DocumentType` that defines the set of fields that can exist in a document. A document can only have a single *document type*, but document types can inherit the content of another. All fields of an inherited type is available in all its descendants. The document type is defined in the [schema](/en/reference/schemas/schemas), which is converted into a configuration file to be read by the `DocumentManager`. + All registered document types are instantiated and stored within the document manager. A reference to these objects can be retrieved using the `getDocumentType()` method by supplying the name and the version of the wanted document type. + `DocumentManager` initialization is done automatically by the Document API by subscribing to the appropriate configuration. +- A `DocumentId` which is a unique document identifier. The document distribution uses the document identifier, see the [reference](/en/content/buckets#distribution) for details. +- A set of `(Field, FieldValue)` pairs, or "fields" for short. The `Field` class has methods for getting its name, data type and internal identifier. The field object for a given field name can be retrieved using the `getField(<fieldname>)` method in the `DocumentType`. + +Use [DocumentAccess](https://javadoc.io/doc/com.yahoo.vespa/documentapi/latest/com/yahoo/documentapi/DocumentAccess.html) javadoc. Sample app: + +```js +<dependencies> + <dependency> + <groupId>com.yahoo.vespa</groupId> + <artifactId>documentapi</artifactId> + <version>8.689.26</version> {/* Find latest version at search.maven.org/search?q=g:com.yahoo.vespa%20a:documentapi */} + </dependency> +<dependencies> +``` + +```java expandable +import com.yahoo.document.DataType; +import com.yahoo.document.Document; +import com.yahoo.document.DocumentId; +import com.yahoo.document.DocumentPut; +import com.yahoo.document.DocumentType; +import com.yahoo.document.DocumentUpdate; +import com.yahoo.document.datatypes.StringFieldValue; +import com.yahoo.document.datatypes.WeightedSet; +import com.yahoo.document.update.FieldUpdate; +import com.yahoo.documentapi.DocumentAccess; +import com.yahoo.documentapi.SyncParameters; +import com.yahoo.documentapi.SyncSession; + +public class DocClient { + + public static void main(String[] args) { + + // DocumentAccess is injectable in Vespa containers, but not in command line tools, etc. + DocumentAccess access = DocumentAccess.createForNonContainer(); + DocumentType type = access.getDocumentTypeManager().getDocumentType("music"); + DocumentId id = new DocumentId("id:namespace:music::0"); + Document docIn = new Document(type, id); + SyncSession session = access.createSyncSession(new SyncParameters.Builder().build()); + + // Put document with a1,1 + WeightedSet<StringFieldValue> wset = new WeightedSet<>(DataType.getWeightedSet(DataType.STRING)); + wset.put(new StringFieldValue("a1"), 1); + docIn.setFieldValue("aWeightedset", wset); + DocumentPut put = new DocumentPut(docIn); + System.out.println(docIn.toJson()); + session.put(put); + + // Update document with a1,10 and a2,20 + DocumentUpdate upd1 = new DocumentUpdate(type, id); + WeightedSet<StringFieldValue> wset1 = new WeightedSet<>(DataType.getWeightedSet(DataType.STRING)); + wset1.put(new StringFieldValue("a1"), 10); + wset1.put(new StringFieldValue("a2"), 20); + upd1.addFieldUpdate(FieldUpdate.createAddAll(type.getField("aWeightedset"), wset1)); + System.out.println(upd1.toString()); + session.update(upd1); + + Document docOut = session.get(id); + System.out.println("document get:" + docOut.toJson()); + + session.destroy(); + access.shutdown(); + } +} +``` + +To test using the [sample apps](https://github.com/vespa-engine/sample-apps/tree/master/album-recommendation), enable more ports for client to connect to config server and other processes on localhost - change docker command: + +```js +$ docker run --detach --name vespa --hostname localhost --privileged \ + --volume $VESPA_SAMPLE_APPS:/vespa-sample-apps --publish 8080:8080 \ + --publish 19070:19070 --publish 19071:19071 --publish 19090:19090 --publish 19099:19099 --publish 19101:19101 --publish 19112:19112 \ + vespaengine/vespa +``` + + +## Fields + +Examples: + +```js expandable +doc.setFieldValue("aByte", (byte)1); +doc.setFieldValue("aInt", (int)1); +doc.setFieldValue("aLong", (long)1); +doc.setFieldValue("aFloat", 1.0); +doc.setFieldValue("aDouble", 1.0); +doc.setFieldValue("aBool", new BoolFieldValue(true)); +doc.setFieldValue("aString", "Hello Field!"); + +doc.setFieldValue("unknownField", "Will not see me!"); + +Array<IntegerFieldValue> intArray = new Array<>(doc.getField("aArray").getDataType()); +intArray.add(new IntegerFieldValue(11)); +intArray.add(new IntegerFieldValue(12)); +doc.setFieldValue("aArray", intArray); + +Struct pos = PositionDataType.valueOf(1,2); + pos = PositionDataType.fromString("N0.000002;E0.000001"); // two ways to set same value +doc.setFieldValue("aPosition", pos); + +doc.setFieldValue("aPredicate", new PredicateFieldValue("aLong in [10..20]")); + +byte[] rawBytes = new byte[100]; +for (int i = 0; i < rawBytes.length; i++) { + rawBytes[i] = (byte)i; +} +doc.setFieldValue("aRaw", new Raw(ByteBuffer.wrap(rawBytes))); + +Tensor tensor = Tensor.Builder.of(TensorType.fromSpec("tensor<float>>(x[2],y[2])")). + cell().label("x", 0).label("y", 0).value(1.0). + cell().label("x", 0).label("y", 1).value(2.0). + cell().label("x", 1).label("y", 0).value(3.0). + cell().label("x", 1).label("y", 1).value(5.0).build(); +doc.setFieldValue("aTensor", new TensorFieldValue(tensor)); + +MapFieldValue<StringFieldValue, StringFieldValue> map = new MapFieldValue<>(new MapDataType(DataType.STRING, DataType.STRING)); +map.put(new StringFieldValue("key1"), new StringFieldValue("foo")); +map.put(new StringFieldValue("key2"), new StringFieldValue("bar")); +doc.setFieldValue("aMap", map); + +WeightedSet<StringFieldValue> wset = new WeightedSet<>(DataType.getWeightedSet(DataType.STRING)); +wset.put(new StringFieldValue("strval 1"), 5); +wset.put(new StringFieldValue("strval 2"), 10); +doc.setFieldValue("aWeightedset", wset); +``` + +## Document updates + +A document update is a request to modify a document, see [reads and writes](/en/writing/reads-and-writes). + +Primitive, and some multivalue fields (WeightedSet and Array`<primitive>`), are updated using a [FieldUpdate](https://javadoc.io/doc/com.yahoo.vespa/document/latest/com/yahoo/document/update/FieldUpdate.html). + +Complex, multivalue fields like Map and Array`<struct>` are updated using [AddFieldPathUpdate](https://javadoc.io/doc/com.yahoo.vespa/document/latest/com/yahoo/document/fieldpathupdate/AddFieldPathUpdate.html), [AssignFieldPathUpdate](https://javadoc.io/doc/com.yahoo.vespa/document/latest/com/yahoo/document/fieldpathupdate/AssignFieldPathUpdate.html) and [RemoveFieldPathUpdate](https://javadoc.io/doc/com.yahoo.vespa/document/latest/com/yahoo/document/fieldpathupdate/RemoveFieldPathUpdate.html). Field path updates are only supported on non-attribute [fields](/en/reference/schemas/schemas#field), [index](/en/reference/schemas/schemas#index) fields, or fields containing [struct field](/en/reference/schemas/schemas#struct-field) attributes. If a field is both an index field and an attribute, then the document is updated in the document store, the index is updated, but the attribute is not updated. Thus, you can get old values in document summary requests and old values being used in ranking and grouping. A [field path](/en/reference/schemas/document-field-path) string identifies fields to update - example: + +```bash +upd.addFieldPathUpdate(new AssignFieldPathUpdate(type, "myMap{key2}", new StringFieldValue("abc"))); +``` + +*FieldUpdate* examples: + +```js expandable +// Simple assignment +Field intField = type.getField("aInt"); +IntegerFieldValue intFieldValue = new IntegerFieldValue(2); +FieldUpdate assignUpdate = FieldUpdate.createAssign(intField, intFieldValue); +upd.addFieldUpdate(assignUpdate); + +// Arithmetic +FieldUpdate addUpdate = FieldUpdate.createIncrement(type.getField("aLong"), 3); +upd.addFieldUpdate(addUpdate); + +// Composite - add one array element +upd.addFieldUpdate(FieldUpdate.createAdd(type.getField("aArray"), + new IntegerFieldValue(13))); + +// Composite - add two array elements +upd.addFieldUpdate(FieldUpdate.createAddAll(type.getField("aArray"), + List.of(new IntegerFieldValue(14), new IntegerFieldValue(15)))); + +// Composite - add weightedset element +upd.addFieldUpdate(FieldUpdate.createAdd(type.getField("aWeightedset"), + new StringFieldValue("add_me"),101)); + +// Composite - add set to set +WeightedSet<StringFieldValue> wset = new WeightedSet<>(DataType.getWeightedSet(DataType.STRING)); +wset.put(new StringFieldValue("a1"), 3); +wset.put(new StringFieldValue("a2"), 4); +upd.addFieldUpdate(FieldUpdate.createAddAll(type.getField("aWeightedset"), wset)); + +// Composite - update array element +upd.addFieldUpdate(FieldUpdate.createMap(type.getField("aArray"), + new IntegerFieldValue(1), // array index + new AssignValueUpdate(new IntegerFieldValue(2)))); // value at index + +// Composite - increment weight +upd3.addFieldUpdate(FieldUpdate.createIncrement(type.getField("aWeightedset"), + new StringFieldValue("a1"), 1)); + +// Composite - add to set +upd.addFieldUpdate(FieldUpdate.createMap(type.getField("aWeightedset"), + new StringFieldValue("element1"), // value + new AssignValueUpdate(new IntegerFieldValue(30)))); +``` + +*FieldPathUpdate* examples: + +```js +// Add an element to a map +Array stringArray = new Array(DataType.getArray(DataType.STRING)); +stringArray.add(new StringFieldValue("my-val")); +AddFieldPathUpdate addElement = new AddFieldPathUpdate(type, "aMap{key1}", stringArray); +upd.addFieldPathUpdate(addElement); + +// Modify an element in a map +upd.addFieldPathUpdate(new AssignFieldPathUpdate(type, "aMap{key2}", new StringFieldValue("abc"))); +```` + +### Update reply semantics + +Sending in an update for which the system can not find a corresponding document to update is *not* considered an error. These are returned with a successful status code (assuming that no actual error occurred during the update processing). Use [UpdateDocumentReply.wasFound()](https://javadoc.io/doc/com.yahoo.vespa/documentapi/latest/com/yahoo/documentapi/UpdateResponse.html#wasFound\(\)) to check if the update was known to have been applied. + +If the update returns with an error reply, the update *may or may not* have been applied, depending on where in the platform stack the error occurred. + +## Document Access + +The starting point of for passing documents and updates to Vespa is the `DocumentAccess` class. This is a singleton (see `get()` method) session factory (see `createXSession()` methods), that provides three distinct access types: + +- **Synchronous random access**: provided by the class `SyncSession`. Suitable for low-throughput proof-of-concept applications. +- [**Asynchronous random access**](#asyncsession): provided by the class `AsyncSession`. It allows for document repository writes and random access with **high throughput**. +- [**Visiting**](#visitorsession): provided by the class `VisitorSession`. Allows a set of documents to be accessed in order decided by the document repository, which gives higher read throughput than random access. + +### AsyncSession + +This class represents a session for asynchronous access to a document repository. It is created by calling `myDocumentAccess.createAsyncSession(myAsyncSessionParams)`, and provides document repository writes and random access with high throughput. The usage pattern for an asynchronous session is like: + +1. `put()`, `update()`, `get()` or `remove()` is invoked on the session, and it returns a synchronous `Result` object that indicates whether the request was successful or not. The `Result` object also contains a *request identifier*. +2. The client polls the session for a `Response` through its `getNext()` method. Any operation accepted by an asynchronous session will produce exactly one response within the configured timeout. +3. Once a response is available, it is matched to the request by inspecting the response's request identifier. The response may also contain data, either a retrieved document or a failed document put or update that needs to be handled. +4. Note that the client must process the response queue or your JVM will run into garbage collection issues, as the underlying session keeps track of all responses and unless they are consumed they will be kept alive and not be garbage collected. + +Example: + +```java expandable +import com.yahoo.document.*; +import com.yahoo.documentapi.*; + +public class MyClient { + + // DocumentAccess is injectable in Vespa containers, but not in command line tools, etc. + private final DocumentAccess access = DocumentAccess.createForNonContainer(); + private final AsyncSession session = access.createAsyncSession(new AsyncParameters()); + private boolean abort = false; + private int numPending = 0; + + /** + * Implements application entry point. + * + * @param args Command line arguments. + */ + public static void main(String[] args) { + MyClient app = null; + try { + app = new MyClient(); + app.run(); + } catch (Exception e) { + e.printStackTrace(); + } finally { + if (app != null) { + app.shutdown(); + } + } + if (app == null || app.abort) { + System.exit(1); + } + } + + /** + * This is the main entry point of the client. This method will not return until all available documents + * have been fed and their responses have been returned, or something signaled an abort. + */ + public void run() { + System.out.println("client started"); + while (!abort) { + flushResponseQueue(); + + Document doc = getNextDocument(); + if (doc == null) { + System.out.println("no more documents to put"); + break; + } + System.out.println("sending doc " + doc); + + while (!abort) { + Result res = session.put(doc); + if (res.isSuccess()) { + System.out.println("put has request id " + res.getRequestId()); + ++numPending; + break; // step to next doc. + } else if (res.type() == Result.ResultType.TRANSIENT_ERROR) { + System.out.println("send queue full, waiting for some response"); + processNext(9999); + } else { + res.getError().printStackTrace(); + abort = true; // this is a fatal error + } + } + } + if (!abort) { + waitForPending(); + } + System.out.println("client stopped"); + } + + /** + * Shutdown the underlying api objects. + */ + public void shutdown() { + System.out.println("shutting down document api"); + session.destroy(); + access.shutdown(); + } + + /** + * Returns the next document to feed to Vespa. This method should only return null when the end of the + * document stream has been reached, as returning null terminates the client. This is the point at which + * your application logic should block if it knows more documents will eventually become available. + * + * @return The next document to put, or null to terminate. + */ + public Document getNextDocument() { + return null; // TODO: Implement at your discretion. + } + + /** + * Processes all immediately available responses. + */ + void flushResponseQueue() { + System.out.println("flushing response queue"); + while (processNext(0)) { + // empty + } + } + + /** + * Wait indefinitely for the responses of all sent operations to return. This method will only return + * early if the abort flag is set. + */ + void waitForPending() { + while (numPending != 0) { + if (abort) { + System.out.println("waiting aborted, " + numPending + " still pending"); + break; + } + System.out.println("waiting for " + numPending + " responses"); + processNext(9999); + } + } + + /** + * Retrieves and processes the next response available from the underlying asynchronous session. If no + * response becomes available within the given timeout, this method returns false. + * + * @param timeout The maximum number of seconds to wait for a response. + * @return True if a response was processed, false otherwise. + */ + boolean processNext(int timeout) { + Response res; + try { + res = session.getNext(timeout); + } catch (InterruptedException e) { + e.printStackTrace(); + abort = true; + return false; + } + if (res == null) { + return false; + } + System.out.println("got response for request id " + res.getRequestId()); + --numPending; + if (!res.isSuccess()) { + System.err.println(res.getTextMessage()); + abort = true; + return false; + } + return true; + } +} +``` + +### VisitorSession + +This class represents a session for sequentially visiting documents with high throughput. + +A visitor is started when creating the `VisitorSession` through a call to `createVisitorSession`. A visitor target, that is a receiver of visitor data, can be created through a call to `createVisitorDestinationSession`. The `VisitorSession` is a receiver of visitor data. See [visiting reference](/en/writing/visiting) for details. The `VisitorSession`: + +- Controls the operation of the visiting process +- Handles the data resulting from visiting data in the system + +Those two different tasks may be set up to be handled by a `VisitorControlHandler` and a `VisitorDataHandler` respectively. These handlers may be supplied to the `VisitorSession` in the `VisitorParameters` object, together with a set of other parameters for visiting. Example: To increase performance, let more separate visitor destinations handle visitor data, then specify the addresses to remote data handlers. + +The default `VisitorDataHandler` used by the `VisitorSession` returned from `DocumentAccess` is `VisitorDataQueue` which queues up incoming documents and implements a polling API. The documents can be extracted by calls to the session's `getNext()` methods and can be ack-ed by the `ack()` method. The default `VisitorControlHandler` can be accessed through the session's `getProgress()`, `isDone()`, and `waitUntilDone()` methods. + +Implement custom `VisitorControlHandler` and `VisitorDataHandler` by subclassing them and supplying these to the `VisitorParameters` object. + +The `VisitorParameters` object controls how and what data will be visited - refer to the [javadoc](https://javadoc.io/doc/com.yahoo.vespa/documentapi/latest/com/yahoo/documentapi/VisitorParameters.html). Configure the [document selection](/en/reference/writing/document-selector-language) string to select what data to visit - the default is all data. + +You can specify what fields to return in a result by specifying a [fieldSet](https://javadoc.io/doc/com.yahoo.vespa/documentapi/latest/com/yahoo/documentapi/VisitorParameters.html) - see [document field sets](/en/schemas/documents#fieldsets). Specifying only the fields you need may improve performance a lot, especially if you can make do with only in-memory fields or if you have large fields you don't need returned. + +Example: + +```java expandable +import com.yahoo.document.Document; +import com.yahoo.document.DocumentId; +import com.yahoo.documentapi.DocumentAccess; +import com.yahoo.documentapi.DumpVisitorDataHandler; +import com.yahoo.documentapi.ProgressToken; +import com.yahoo.documentapi.VisitorControlHandler; +import com.yahoo.documentapi.VisitorParameters; +import com.yahoo.documentapi.VisitorSession; + +import java.util.concurrent.TimeoutException; + +public class MyClient { + + public static void main(String[] args) throws Exception { + VisitorParameters params = new VisitorParameters("true"); + params.setLocalDataHandler(new DumpVisitorDataHandler() { + + @Override + public void onDocument(Document doc, long timeStamp) { + System.out.print(doc.toXML("")); + } + + @Override + public void onRemove(DocumentId id) { + System.out.println("id=" + id); + } + }); + params.setControlHandler(new VisitorControlHandler() { + + @Override + public void onProgress(ProgressToken token) { + System.err.format("%.1f %% finished.\n", token.percentFinished()); + super.onProgress(token); + } + + @Override + public void onDone(CompletionCode code, String message) { + System.err.println("Completed visitation, code " + code + ": " + message); + super.onDone(code, message); + } + }); + params.setRoute(args.length > 0 ? args[0] : "[Storage:cluster=storage;clusterconfigid=storage]"); + params.setFieldSet(args.length > 1 ? args[1] : "[document]"); + + // DocumentAccess is injectable in Vespa containers, but not in command line tools, etc. + DocumentAccess access = DocumentAccess.createForNonContainer(); + VisitorSession session = access.createVisitorSession(params); + if (!session.waitUntilDone(0)) { + throw new TimeoutException(); + } + session.destroy(); + access.shutdown(); + } +} +``` + +The first optional argument to this client is the [route](/en/writing/document-routing) of the cluster to visit. The second is the [fieldset](/en/schemas/documents#fieldsets) set to retrieve. \ No newline at end of file diff --git a/mintlify-docs/en/writing/document-routing.mdx b/mintlify-docs/en/writing/document-routing.mdx new file mode 100644 index 0000000000..adf096e227 --- /dev/null +++ b/mintlify-docs/en/writing/document-routing.mdx @@ -0,0 +1,877 @@ +--- +title: "Routing" +sidebarTitle: "Document routing" +--- + +*Routing* is used to configure the paths that documents and updates written to Vespa take through the system. Vespa will automatically set up a routing configuration which is appropriate for most cases, so no explicit routing configuration is necessary. However, explicit routing can be used in advanced use cases such as sending different document streams to different document processing clusters, or through multiple consecutive clusters etc. + +There are other, more in-depth, articles on routing: + +- Use [vespa-route](/en/reference/operations/self-managed/tools#vespa-route) to inspect routes and services of a Vespa application, like in the [example](#example-reconfigure-the-default-route) +- [Routing policies reference](#routing-policies-reference). See the [routing policies](#routing-policies) note for complex routes and default routing + +In Vespa, there is a transport layer and a programming interface that are available to clients that wish to communicate with a Vespa application. The transport layer is *Message Bus*. [Document API](/en/writing/document-api-guide) is implemented on top of Message Bus. Configuring the interface therefore exposes some features available in Message Bus. Refer to the [Vespa APIs and interfaces](/en/reference/api/api) for clients using the *Document API*. The atoms in Vespa routing are *routes* and *hops*. + +[document-processing](https://github.com/vespa-engine/sample-apps/tree/master/examples/document-processing) is an example of custom document processing, and useful for testing routing. + +## A route is a sequence of hops + +The sequence of hosts, routers, bridges, gateways, and other devices that network traffic takes, or could take, from its source to its destination is what is classically termed a *route*. As a verb, *to route* means to determine the link down which to send a packet, that will minimize its total journey time according to some routing algorithm. + +In Vespa, a route is simply a sequence of named hops. Instead of leaving selection logic to a route, the responsibility of resolving recipients is given to the [hops](#a-hop-is-a-point-to-point-transmission)' [selectors](#selection-logic). A hop can do more or less whatever it wants to change a message's journey through your application; it can slightly alter itself by choosing among some predefined recipients, it can change itself completely by either rewriting or looking up another hop, or it can even modify the entire route from that branch onwards. In effect, a route can end up branching at several points along its path, resulting in complex routes. As the figure suggests, Message Bus supports both [unicasting](https://en.wikipedia.org/wiki/Unicast) and [multicasting](https://en.wikipedia.org/wiki/Multicast) - Message Bus allows for arbitrarily complex routes. Each node in the above graph represents a Vespa service: + +<Frame> +![Illustration of routes with hops](/assets/img/routing.svg) +</Frame> + +## A hop is a point-to-point transmission + + +In telecommunication, a *hop* is one step, from one router to the next, on the path of a packet on an Internet Protocol network. It is a direct host-to-host connection forming part of the route between two hosts in a routed network such as the Internet. In more general terms, a hop is a point-to-point transmission in a series required to get a message from point A to point B. + +With Message Bus the concept of hops was introduced as the smallest steps of the transmission of a message. A hop consists of a *name* that is used by the messaging clients to select it, a list of *recipient* services that it may transmit to, and a *selector* that is used to select among those recipients. Unlike traditional hops, in Vespa a hop is a transmission from one sender to many recipients. + +Well, the above is only partially true; it is the easiest way to understand the hop concept. In fact, a hop's recipient list is nothing more than a configured list of strings that is made available to all [routing policies](#routing-policies) that are named in the selector string. See [selection logic](#selection-logic) below for details. + +A hop's recipient is the service name of a Message Bus client that has been registered in Vespa's service location broker (vespa-slobrok). These names are well-defined once their derivation logic is understood; they are "/"-separated sets of address-components whose values are given by a service's role in the application. An example of a recipient is: + +```text +search/cluster.foo/*/feed-destination +``` + +The marked components of the above recipient, `/search/cluster.foo/*`, resolves to a host's symbolic name. This is the name with which a Message Bus instance was configured. The unmarked component, `feed-destination`, is the local name of the running service that the hop transmits to, i.e. the name of the *session* created on the running Message Bus instance. + +The Active Configuration page in Vespa's administration interface gives an insight into what symbolic names exist for any given application by looking at its current configuration subscriptions. All available Message Bus services use their `ConfigId` as their host's symbolic name. See [vespa-route](/en/reference/operations/self-managed/tools#vespa-route) for how to inspect this, or use the [config API](/en/reference/api/config-v2). + +A hop can be prefixed using the special character "?" to force it to behave as if its [ignore-result](#hop) attribute was configured to "true". + +### Asterisk + +A service identifier may include the special character "\*" as an address component. A recipient that contains this character is a request for the network to choose *any one* service that matches it. + +## Routing policies + +A routing policy is a protocol-specific algorithm that chooses among a list of candidate recipients for a single address component - see [hop description](#a-hop-is-a-point-to-point-transmission) above. These policies are designed and implemented as key parts of a Message Bus protocol. E.g. for the "Document" protocol these are what make up the routing behavior for document transmission. Without policies, a hop would only be able to match verbatim to a recipient, and thus the only advanced selection logic would be that of the [asterisk](#asterisk). + +In addition to implementing a selection algorithm, a routing policy must also implement a merging algorithm that combines the replies returned from each selected recipient into a single sensible reply. This is needed because a client does not necessarily know whether a message has been sent to one or multiple recipients, and **Message Bus guarantees a single reply for every message**. + +More formally, a routing policy is an arbitrarily large (or small), named, stand-alone piece of code registered with a Message Bus protocol. As discussed [above](#selection-logic), an instance of a policy is run both when resolving a route to recipients, and when merging replies. The policy is passed a `RoutingContext` object that pretty much allows it to do whatever it pleases to the route and replies. The same policy object and the same context object is used for both selection and merging. + +Refer to the [routing policy reference](#routing-policies-reference). + +## Selection logic + +When Message Bus is about to route a message, at the last possible time, it inspects the **first** hop of the message's route to resolve a set of recipients. First, all of its [policies are resolved](#1-resolve-policy-directives). Second, the output service name is matched to the routing table to see if it maps to another hop or route. Finally, the message is [sent](#3-send-to-services) to all chosen recipient services. Because each policy can select multiple recipients, this can give rise to an arbitrarily complex routing tree. There are, of course, safeguards within Message Bus to prevent infinite recursions due to circular dependencies or misconfiguration. + +<Note> +**Note:** + +It **is** possible to develop a different protocol with other policies to run in the application, but since all of Vespa's component only support the "Document" protocol, it makes little sense to do so. +</Note> + +### 1. Resolve Policy Directives + +The logic run at this step is actually simple; as long as the hop string contains a policy directive, i.e. some arbitrary string enclosed in square brackets, Message Bus will create and run an instance of that policy for the protocol of the message being routed. + +```text +Name: storage/cluster.backup +Selector: storage/cluster.backup/distributor/[Distributor]/default +Recipients: - +``` + +The above hop is probably the simplest hop you will encounter in Vespa; it has a single policy directive contained in a string that closely resembles service names discussed above, and it has no recipients. When resolving this hop, Message Bus creates an instance of the "DocumentRouteSelector" policy and invokes its `select()` method. The "Distributor" policy will replace its own directive with a proper distributor identifier, yielding a hop string that is now an unambiguous service identifier. + +```text +Name: indexing +Selector: [DocumentRouteSelector] +Recipients: search/cluster.music + search/cluster.books +``` + +This hop has a selector which is nothing more than a single policy directive, "\[DocumentRouteSelector\]", and it has two configured recipients, "search/cluster.music" and "search/cluster.books". This policy expands the hop to zero, one or two **new** routes by replacing its own directive with the content of the recipient routes. Each of these routes may have one or more hops themselves. In turn, these will be processed independently. When replies are available from all chosen recipients, the policy's `merge()` method is invoked, and the resulting reply is passed upwards. + +```text +Name: default +Selector: [AND:indexing storage/cluster.backup] +Recipients: - +``` + +This hop has a selector but no recipients. The reason for this is best explained in the [routing policies reference](#routing-policies-reference), but it serves as an example of a hop that has no configured recipients. Notice how the policy directive contains a colon (":") which denotes that the remainder of the directive is a parameter to the policy constructor. This policy replaces the whole route of the message with the set of routes named in the parameter string. + +What routing policies are available depends on what protocol is currently running. As of this version the only supported protocol is "Document". This offers a set of routing policies discussed [below](#routing-policies-reference). + +### 2. Resolve Hop- and Route names + +As soon as all policy directives have been resolved, Message Bus makes sure that the resulting string is, in fact, a service name and not the name of another hop or route (in that order) configured for the running protocol. The outcome is either: + +1. The string is recognized as a hop name - The current hop is replaced by the named one, and processing returns to [step 1](#1-resolve-policy-directives). +2. The string is recognized as a route name - The current route, including all the hops following this, is replaced by the named one. Processing returns to [step 1](#1-resolve-policy-directives). +3. The string is accepted as a service name - This terminates the current branch of the routing tree. If all branches are terminated, processing proceeds to [step 3](#3-send-to-services). + +Because hop names are checked before route names, Message Bus also supports a "route:" prefix that forces the remainder of the string to resolve to a configured route or fail. + +### 3. Send to Services + +When the route resolver reaches this point, the first hop of the message being sent has been resolved to an arbitrarily complex routing tree. Each leaf of this tree represents a service that is to receive the message, unless some policy has already generated a reply for it. No matter how many recipients are chosen, the message is serialized only once, and the network transmission is able to share the same chunk of memory between all recipients. + +As replies to the message arrive at the sender they are handed over to the corresponding leaf nodes of the routing tree, but merging will not commence until all leaf nodes are ready. + +Route resolving happens just before network transmission, after all resending logic. This means that if the route configuration changes while there are messages scheduled for resending, these will adhere to the new routes. + +If the resolution of a recipient passed through a hop that was configured to [ignore results](#hop), the network layer will reply immediately with a synthetic "OK". + +## Example: Reconfigure the default route + +Assume that the application requires both search and storage capabilities, but that the default feed should only pass through to search. An imaginary scenario for this would be a system where there is a continuous feed being passed into Vespa with no filtering on spam. You would like a minimal storage-only cluster that stores a URL blocklist that can be used by a custom document processor to block incoming documents from offending sites. + +Apart from the blocklist and the document processor, add the following: + +```js +<routing version="1.0"> + <routingtable protocol="document"> + <route name="default" hops="docproc/cluster.blocklist/*/chain.blocklist indexing" /> + </routingtable> +</routing> +``` + +This overrides the default route to pass through any available blocklisting document processor before being indexed. If the document processor decides to block a message, it must respond with an appropriate *ok* reply, or your client software needs to accept whatever error reply you decide to return when blocking. + +When feeding blocklist information to storage, your application need only use the already available `storage` hop. + +See [#13193](https://github.com/vespa-engine/vespa/issues/13193) for a discussion on using *default* as a name. + +### The Document API + +With the current implementation of Document API running on Message bus, the configuration of the API implies configuration of the latter. Most clients will only ever route through this API. To use the Document API, you need to instantiate a class that implements the `DocumentAccess` interface. At the time of writing only `MessageBusDocumentAccess` exists, and it requires a parameter set for creation. These parameters are contained in an instance of `MessageBusDocumentAccessParam` that looks somewhat like the following: + +```js +class MessageBusDocumentAccessParams { + String documentManagerConfigId; // The id to resolve to document manager config. + String oosServerPattern; // The service pattern to resolve to fleet controller + // services. + String appConfigId; // The id to resolve to application config. + String slobrokConfigId; // The id to resolve to slobrok config. + String routingConfigId; // The id to resolve to messagebus routing config. + + String routeName; // The name of the route to send to. + int traceLevel; // The trace level to use when sending. + + class SourceSessionParams { + int maxPending; // Maximum number of pending messages. + int maxPendingSize; // Maximum size of pending messages. + double timeout; // Default timeout in seconds for messages + // that have no timeout set. + double requestTimeoutA; // Default request timeout in seconds, using + double requestTimeoutB; // the equation 'requestTimeout = a * retry + b'. + double retryDelay; // Number of seconds to wait before resending. + }; +} +``` + +The most obvious configuration parameter is `routeName`, which informs the `MessageBusDocumentAccess` object the name of the route to use when sending documents and updates. The second parameter is `traceLevel`, which allows a client to see exactly how the data was transmitted. + +<Note> +**Note:** + +Tracing can be enabled on a level from 1-9, where a higher number means more tracing. Because the concept of tracing is not exposed by the Document API itself, its data will simply be printed to standard output when a reply arrives for the sender. This should therefore not be used in production, but can be helpful when debugging. +</Note> + +Refer to the [Document API JavaDoc](https://javadoc.io/doc/com.yahoo.vespa/documentapi). + +## Routing servicesThis is the reference documentation for all elements in the *routing* section of [services.xml](/en/reference/applications/services/services). + +```text +routing [version] + routingtable [protocol, verify] + route [name, hops] + hop [name, selector, ignore-result] + recipient [session] + services [protocol] + service [name] +``` + +## routing + +Contained in [services](/en/reference/applications/services/services#services). The container element for all configuration related to routing. + +| Attribute | Required | Value | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| version | required | number | | Must be set to "1.0" in this Vespa-version | + +Optional subelements: + +<CardGroup> + <Card title="routingtable" icon="code-branch" href="#routingtable" horizontal /> + <Card title="services" icon="server" href="#services" horizontal /> +</CardGroup> + +Example: + +```js +<routing version="1.0"> + <routingtable protocol="document"> + <route name="route1" hops="hop1 hop2" /> + <route name="route2" hops="hop3 hop4 hop5" /> + <hop name="hop1" selector="docproc/cluster.foo/docproc/*/feed-processor"> + <recipient session="docproc/cluster.foo/docproc/*/feed-processor" /> + </hop> + <hop name="hop2" selector="search/cluster.bar/[SearchGroup]/[SearchRow]/[SearchColumn]/feed-destination"> + <recipient session="search/cluster.bar/g0/c0/r0/feed-destination" /> + <recipient session="search/cluster.bar/g0/c1/r0/feed-destination" /> + <recipient session="search/cluster.bar/g0/c0/r1/feed-destination" /> + <recipient session="search/cluster.bar/g0/c1/r1/feed-destination" /> + <recipient session="search/cluster.bar/g1/c0/r0/feed-destination" /> + <recipient session="search/cluster.bar/g1/c1/r0/feed-destination" /> + <recipient session="search/cluster.bar/g1/c0/r1/feed-destination" /> + <recipient session="search/cluster.bar/g1/c1/r1/feed-destination" /> + </hop> + </routingtable> + <services protocol="document"> + <service name="foo/bar" /> + </services> +</routing> +``` + +## routingtable + +Contained in [routing](#routing). Specifies a routing table for a specific protocol. + +| Attribute | Required | Value | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| protocol | required | | | Configure which protocol to use. Only the protocol *document* is defined, so if you define a routing table for an unsupported protocol, the application will just log an INFO entry that contains the name of that protocol. | +| verify | optional | boolean | | ToDo: document this | + +Optional subelements: + +<CardGroup> + <Card title="route" icon="code-branch" href="#route" horizontal /> + <Card title="hop" icon="arrow-right-circle" href="#hop" horizontal /> +</CardGroup> + +Example: + +```js +<routing version="1.0"> + <routingtable protocol="document"> + <route name="route1" hops="hop1 hop2" /> + <hop name="hop1" selector="docproc/cluster.foo/docproc/*/feed-processor"> + <recipient session="docproc/cluster.foo/docproc/*/feed-processor" /> + </hop> + </routingtable> +</routing> +``` + +## route + +Contained in [routingtable](#routingtable). Specifies a route for a message to its destination through a set of intermediate hops. If at least one hop in a route does not exist, the application will fail to start and issue an error that contains the name of that hop. + +| Attribute | Required | Value | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| name | required | | | Route name. | +| hops | required | | | A whitespace-separated list of hop names, where each name must be a valid hop. | + +Subelements: none + +Example: + +```js +<routing version="1.0"> + <routingtable protocol="document"> + <route name="route1" hops="hop1 hop2" /> + <route name="route2" hops="hop3 hop4 hop5" /> + </routingtable> +</routing> +``` + +## hop + +Contained in [routingtable](#routingtable). Specifies a single hop that can be used to construct one or more routes. A hop must have a name that is unique within the routing table to which it belongs. A hop contains a selector string and a list of recipient sessions. + +| Attribute | Required | Value | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| name | required | | | Hop name. | +| selector | required | | | Selector string. | +| ignore-result | optional | | | If set to *true*, specifies that the result of routing through that hop should be ignored. | + +Optional subelements: + +<CardGroup> + <Card title="recipient" icon="user" href="#recipient" horizontal /> +</CardGroup> + +Example: + +```js +<routing version="1.0"> + <routingtable protocol="document"> + <hop name="hop1" selector="docproc/cluster.foo/docproc/*/feed-processor"> + <recipient session="docproc/cluster.foo/docproc/*/feed-processor" /> + </hop> + </routingtable> +</routing> +``` + +## recipient + +Contained in [hop](#hop). Specifies a recipient session of a hop. + +| Attribute | Required | Value | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| session | required | | | This attribute must correspond to a running instance of a service that can be routed to. All session identifiers consist of a location part and a name. A search node is always given a session name on the form *search/cluster.name/g#/r#/c#/feed-destination*, whereas a document processor service is always named *docproc/cluster.name/docproc/#/feed-processor*. | + +Subelements: none + +Example: + +```js +<routing version="1.0"> + <routingtable protocol="document"> + <hop name="search/cluster.music" selector="search/cluster.music/[SearchGroup]/[SearchRow]/[SearchColumn]/feed-destination"> + <recipient session="search/cluster.music/g0/c0/r0/feed-destination" /> + <recipient session="search/cluster.music/g0/c0/r1/feed-destination" /> + <recipient session="search/cluster.music/g1/c0/r0/feed-destination" /> + <recipient session="search/cluster.music/g1/c0/r1/feed-destination" /> + </hop> + </routingtable> +</routing> +``` + +## services + +Contained in [routing](#routing). Specifies a set of services available for a specific protocol. At the moment the only supported protocol is *document*. The services specified are used by the route verification step to allow hops and routes to reference services known to exist, but that can not be derived from *services.xml*. + +| Attribute | Required | Value | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| protocol | required | | | Configure which protocol to use. Only the protocol *document* is defined. | + +Optional subelements: + +<CardGroup> + <Card title="service" icon="server" href="#service" horizontal /> +</CardGroup> + +Example: + +```js +<routing version="1.0"> + <services protocol="document"> + <service name="foo/bar" /> + </services> +</routing> +``` + +## service + +Contained in [services](#services). Specifies a single known service that can not be derived from the *services.xml*. + +| Attribute | Required | Value | Default | Description | +| :--- | :--- | :--- | :--- | :--- | +| name | required | | | The name of the service. | + +Subelements: none + +Example: + +```js +<routing version="1.0"> + <services protocol="document"> + <service name="foo/bar" /> + </services> +</routing> +``` + +## Routingpolicies reference + + +This article contains detailed descriptions of the behaviour of all routing policies available in Vespa. + +The *Document protocol* is currently the only Message Bus protocol supported by Vespa. Furthermore, all routing policies that are part of this protocol share a common code path for [merging replies](#merge). The policies offered by the protocol are: + +- [AND](#and) - Selects all configured recipient hops. +- [DocumentRouteSelector](#documentrouteselector) - Uses a [document selection string](/en/reference/writing/document-selector-language) to select compatible routes +- [Content](#content) - Selects a content cluster distributor based on system state +- [MessageType](#messagetype) - Selects a next hop based on message type +- [Extern](#extern) - Selects a recipient by querying a remote Vespa application +- [LocalService](#localservice) - Selects a recipient based on ip address +- [RoundRobin](#roundrobin) - Selects one from the configured recipients in round-robin order +- [SubsetService](#subsetservice) - Selects only among a subset of all matching services +- [LoadBalancer](#loadbalancer) - A round-robin policy that chooses between the recipients by generating a weight according to their performance + +### Common Document `merge()` logic + +The shared merge logic of most Document routing policies is an attempt to do the "right" thing when merging multiple replies into one. It works by first stepping through all replies, storing their content as either: + +1. OK replies, +2. IGNORE replies, or +3. ERROR replies + +If at least one ERROR reply is found, return a new reply that contains all the errors of the others. If there is at least one OK reply, return the first OK reply, but transfer all feed information from the others to this (this is specific data for start- and end-of-feed messages). Otherwise, return a new reply that contains all the IGNORE errors. Pseudocode: + +```text +for each reply, do + if reply has no errors, do + store reply in OK list + else, do + if reply has only IGNORE errors + copy all errors from reply to IGNORE list + else, do + copy all errors from reply to ERROR list + +if ERROR list is not empty, do + return new reply with all errors +else, do + if OK list is not empty, do + return first reply with all feed answers + else, do + return new reply with all IGNORE errors +``` + +## Routing policies reference + +| Policy | Description | +| :--- | :--- | +| AND | This is a mostly a convenience policy that allows the user to fork a message's route to all configured recipients. It is not message-type aware, and will simply always select all recipients. Replies are merged according to the [shared logic](#merge). <br/><br/> The optional string parameter is parsed as a space-separated list of hops. Configured recipients have precedence over parameter-given recipients, although this is likely to be changed in the future. | +| DocumentRouteSelector | This policy is responsible for selecting among the policy's recipients according to the subscription rules defined by a content cluster's *documents* element in [services.xml](/en/reference/applications/services/services). If the "selection" attribute is set in the "documents" element, its value is processed as a [document select](/en/reference/writing/document-selector-language) string, and run on documents and document updates to determine routes. If the "feedname" attribute is set, all feed commands are filtered through it. <br/><br/> The recipient list of this policy is required to map directly to route names. E.g. if a recipient is "search/cluster.music", and a message is appropriate according to the selection criteria, the message is routed to the "search/cluster.music" route. If the route does not exist, this policy will reply with an error. In short, this policy selects one or more recipient routes based on document content and configured criteria. <br/><br/> If more than one route is chosen, its replies are merged according to the [shared logic](#merge). <br/><br/> This policy does not support any parameters. <br/><br/> The configuration for this is "documentrouteselectorpolicy" available from config id "routing/documentapi". <br/><br/> <Warning> **Important:**<br/> Because GET messages do not contain any document on which to run the selection criteria, this policy returns an IGNORED reply that the merging logic processes. You can see this by attempting to retrieve a document from an application that does not have a content cluster.</Warning> | +| Content | This policy allows you to send a message to a content cluster. The policy uses a system state retrieved from the cluster in question in conjunction with slobrok information to pick the correct distributor for your message. <br/><br/> In short; use this policy when communicating with document storage. <br/><br/> This policy supports multiple parameters, up to one each of:<br/><br/> **cluster**<br/>The name of the cluster you want to reach. Example: cluster=mycluster <br/><br/> **config**<br/> A comma-separated list of config servers or proxies you want to use to fetch configuration for the policy. This can be used to communicate with other clusters than the one you're currently in. Example: config=tcp/myadmin1:19070,tcp/myadmin2:19070 <br/><br/> Separate each parameter with a semicolon. | +| MessageType | This policy will select the next hop based on the type of the message. You configure where all messages should go (defaultroute). Then you configure what messages types should be overridden and sent to alternative routes. It is currently only used internally by vespa when using the [content](/en/reference/applications/services/content#content) element. | +| Extern | This policy implements the necessary logic to communicate with an external Vespa application and resolve a single service pattern using that other application's slobrok servers. Keep in mind that there might be some delay from the moment this policy is initially created and when it receives the response to its service query, so using this policy might cause a message to be resent a few times until it is resolved. If you disable retries, this policy might cause all messages to fail for the first seconds.<br/><br/> This policy uses its parameter for both the address of the extern slobrok server to connect to, and also the pattern to use for querying. The parameter is required to be on the form `<spec>;<service>`, where `spec` is a comma-separated list of slobrok connection specs on the form "tcp/hostname:port", and `service` is a service running on the remote Vespa application. <br/><br/> <Warning> **Important:**<br/> The remote application needs to have a version of both message bus and the document api that is binary compatible with the application sending from. This can be a problem even between patch releases, so keep the application versions in sync when using this policy.</Warning> | +| LocalService | This policy is used to select among all matching services, but preferring those running on the same host as the current one. The pattern used when querying for available services is the current one, but replacing the policy directive with an asterisk. E.g. the hop "docproc/cluster.default/\[LocalService\]/chain.default" would prefer local services among all those that match the pattern "docproc/cluster.default/\*/chain.default". If there are multiple matching services that run locally, this policy will do simple round-robin load balancing between them. If no matching services run locally, this policy simply returns the asterisk as a match to allow the underlying network logic to do load balancing among all available.<br/><br/> This policy accepts an optional parameter which overrides the local hostname. Use this if you wish the hop to prefer some specific host. <br/><br/> <Warning> **Important:**<br/> There is no additional logic to replace other policy directives with an asterisk, meaning that if other policies directives are present in the hop string after "\[LocalService\]", no services can possibly be matched.</Warning> | +| RoundRobin | This policy is used to select among a configured set of recipients. For each configured recipient, this policy determines what online services are matched, and then selects one among all of those in round-robin order. If none of the configured recipients match any available service, this policy returns an error that indicates to the sender that it should retry later.<br/><br/> Because this policy only selects a single recipient, it contains no merging logic. | +| SubsetService | This policy is used to select among a subset of all matching services, and is used to minimize number of connections in the system. The pattern used when querying for available services is the current one, but replacing the policy directive with an asterisk. E.g. the hop "docproc/cluster.default/\[SubsetService:3\]/chain.default" would select among a subset of all those that match the pattern "docproc/cluster.default/\*/chain.default". Given that the pattern returns a set of matches, this policy stores a subset of these based on the hash-value of the running message bus' connection string (this is unique for each instance). If there are no matching services, this policy returns the asterisk as a match to allow the underlying network logic to fail gracefully.<br/><br/> This policy parses its optional parameter as the size of the subset. If none is given, the subset defaults to size 5.<br/><br/> <Warning> **Important:** <br/>There is no additional logic to replace other policy directives with an asterisk, meaning that if other policies directives are present in the hop string after "\[SubsetService\]", no services can possibly be matched.</Warning> | +| LoadBalancer | This policy is used to send to a stateless cluster such as docproc, where any node can be chosen to process any message. Messages are sent between the nodes in a round-robin fashion, but each node is assigned a weight based on its performance. The weights are calculated by measuring the number of times the node had a full input-queue and returned a busy response. Use this policy to send to docproc clusters that have nodes with different performance characteristics. <br/><br/> This policy supports multiple parameters, up to one each of: <br/><br/>**cluster**<br/> The name of the cluster you want to reach. Example: cluster=docproc/cluster.default (mandatory) <br/><br/>**session**<br/>The destination session you want to reach. In the case of docproc, the name of the docproc chain. Example: session=chain.mychain (mandatory) <br/><br/>**config**<br/> A comma-separated list of config servers or proxies you want to use to fetch configuration for the policy. This can be used to communicate with other clusters than the one you're currently in. Example: config=tcp/myadmin1:19070,tcp/myadmin2:19070 <br/><br/> Separate each parameter with a semicolon. By default, this policy will use the current Vespa cluster for configuration. | + +## Routing for indexing + +A normal Vespa configuration has container and content cluster(s), with one or more document types defined in *schemas*. Routing document writes means routing documents to the *indexing* container cluster, then the right *content* cluster. + +The indexing cluster is a container cluster - see [multiple container clusters](#multiple-container-clusters) for variants. Add the [document-api](/en/reference/applications/services/container#document-api) feed endpoint to this cluster. The mapping from document type to content cluster is in [document](/en/reference/applications/services/content#document) in the content cluster. From [album-recommendation](https://github.com/vespa-engine/sample-apps/blob/master/album-recommendation/app/services.xml): + +```js +<services version="1.0"> + + <container id="container" version="1.0"> + <document-api /> + <search /> + <nodes> + <node hostalias="node1" /> + </nodes> + </container> + + <content id="music" version="1.0"> + <redundancy>1</redundancy> + <documents> + <document type="music" mode="index" /> + </documents> + <nodes> + <node hostalias="node1" distribution-key="0" /> + </nodes> + </content> + +</services> +``` + +Given this configuration, Vespa knows which is the container cluster used for indexing, and which content cluster that stores the *music* document type. Use [vespa-route](/en/reference/operations/self-managed/tools#vespa-route) to display routing generated from this configuration: + +```text +$ vespa-route +There are 6 route(s): + 1. default + 2. default-get + 3. music + 4. music-direct + 5. music-index + 6. storage/cluster.music + +There are 2 hop(s): + 1. container/chain.indexing + 2. indexing +``` + +Note the *default* route. This route is auto-generated by Vespa, and is used when no other route is used when using [/document/v1](/en/reference/api/document-v1). *default* points to *indexing*: + +```text +$ vespa-route --route default +The route 'default' has 1 hop(s): + 1. indexing +``` + +```text +$ vespa-route --hop indexing +The hop 'indexing' has selector: + [DocumentRouteSelector] +And 1 recipient(s): + 1. music +``` + +```text +$ vespa-route --route music +The route 'music' has 1 hop(s): + 1. [MessageType:music] +``` + +In short, the *default* route handles documents of type *music*. Vespa will route to the container cluster with *document-api* - note the *chain.indexing* above. This is a set of built-in *document processors* that does the indexing (below). + +Refer to the [trace appendix](#appendix-trace) for routing details. + +## chain.indexing + +This indexing chain is set up on the container once a content cluster has `mode="index"`. + +The [IndexingProcessor](https://github.com/vespa-engine/vespa/blob/master/docprocs/src/main/java/com/yahoo/docprocs/indexing/IndexingProcessor.java) annotates the document based on the [indexing script](/en/reference/writing/indexing-language) generated from the schema. Example: + +```js +$ vespa-get-config -n vespa.configdefinition.ilscripts \ + -i container/docprocchains/chain/indexing/component/com.yahoo.docprocs.indexing.IndexingProcessor + +maxtermoccurrences 100 +fieldmatchmaxlength 1000000 +ilscript[0].doctype "music" +ilscript[0].docfield[0] "artist" +ilscript[0].docfield[1] "artistId" +ilscript[0].docfield[2] "title" +ilscript[0].docfield[3] "album" +ilscript[0].docfield[4] "duration" +ilscript[0].docfield[5] "year" +ilscript[0].docfield[6] "popularity" +ilscript[0].content[0] "clear_state | guard { input artist | tokenize normalize stem:"BEST" | summary artist | index artist; }" +ilscript[0].content[1] "clear_state | guard { input artistId | summary artistId | attribute artistId; }" +ilscript[0].content[2] "clear_state | guard { input title | tokenize normalize stem:"BEST" | summary title | index title; }" +ilscript[0].content[3] "clear_state | guard { input album | tokenize normalize stem:"BEST" | index album; }" +ilscript[0].content[4] "clear_state | guard { input duration | summary duration; }" +ilscript[0].content[5] "clear_state | guard { input year | summary year | attribute year; }" +ilscript[0].content[6] "clear_state | guard { input popularity | summary popularity | attribute popularity; }" +``` + +Refer to [linguistics](/en/linguistics/linguistics) for more details. + +By default, the indexing chain is set up on the *first* container cluster in *services.xml*. When having multiple container clusters, it is recommended to configure this explicitly, see [multiple container clusters](#multiple-container-clusters). + +## Document selection + +The [document](/en/reference/applications/services/content#document) can have a [selection](/en/reference/writing/document-selector-language) string, normally used to expire documents. This is also evaluated during feeding, so documents that would immediately expire are dropped. This is not an error, the document API will report 200 - but can be confusing. + +The evaluation is done in the [DocumentRouteSelector](https://github.com/vespa-engine/vespa/blob/master/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/DocumentRouteSelectorPolicy.java) at the feeding endpoint - *before* any processing/indexing. I.e. the document is evaluated using the selection string (drop it or not), then where to route it, based on document type. + +Example: the selection is configured to not match the document being fed: + +```js +<content id="music" version="1.0"> + <redundancy>1</redundancy> + <documents> + <document type="music" mode="index" selection='music.album == "thisstringwillnotmatch"'/> +``` + +```js +$ vespa-feeder --trace 6 doc.json + +<trace> + [1564576570.693] Source session accepted a 4096 byte message. 1 message(s) now pending. + [1564576570.713] Sequencer sending message with sequence id '-1163801147'. + [1564576570.721] Recognized 'default' as route 'indexing'. + [1564576570.727] Recognized 'indexing' as HopBlueprint(selector = { '[DocumentRouteSelector]' }, recipients = { 'music' }, ignoreResult = false). + [1564576570.811] Running routing policy 'DocumentRouteSelector'. + [1564576570.822] Policy 'DocumentRouteSelector' assigned a reply to this branch. + [1564576570.828] Sequencer received reply with sequence id '-1163801147'. + [1564576570.828] Source session received reply. 0 message(s) now pending. +</trace> + +Messages sent to vespa (route default) : +---------------------------------------- +PutDocument: ok: 0 msgs/sec: 0.00 failed: 0 ignored: 1 latency(min, max, avg): 9223372036854775807, -9223372036854775808, 0 +``` + +Without the selection (i.e. everything matches): + +```js +$ vespa-feeder --trace 6 doc.json + +<trace> + [1564576637.147] Source session accepted a 4096 byte message. 1 message(s) now pending. + [1564576637.168] Sequencer sending message with sequence id '-1163801147'. + [1564576637.176] Recognized 'default' as route 'indexing'. + [1564576637.180] Recognized 'indexing' as HopBlueprint(selector = { '[DocumentRouteSelector]' }, recipients = { 'music' }, ignoreResult = false). + [1564576637.256] Running routing policy 'DocumentRouteSelector'. + [1564576637.268] Component '[MessageType:music]' selected by policy 'DocumentRouteSelector'. + ... +</trace> + +Messages sent to vespa (route default) : +---------------------------------------- +PutDocument: ok: 1 msgs/sec: 1.05 failed: 0 ignored: 0 latency(min, max, avg): 845, 845, 845 +``` + +In the last case, in the [DocumentRouteSelector](https://github.com/vespa-engine/vespa/blob/master/documentapi/src/main/java/com/yahoo/documentapi/messagebus/protocol/DocumentRouteSelectorPolicy.java) routing policy, the document matched the selection string / there was no selection string, and the document was forward to the nex hop in the route. + +## Document processing + +Add custom processing of documents using [document processing](/en/applications/document-processors). The normal use case is to add document processors in the default route, before indexing. Example: + +```js expandable +<services version="1.0"> + + <container id="container" version="1.0"> + <document-api /> + <search /> + <document-processing> + <chain id="default"> + <documentprocessor + id="com.mydomain.example.Rot13DocumentProcessor" + bundle="album-recommendation-docproc" /> + </chain> + </document-processing> + <nodes> + <node hostalias="node1" /> + </nodes> + </container> + + <content id="music" version="1.0"> + <redundancy>1</redundancy> + <documents> + <document >type="music" mode="index" /> + </documents> + <nodes> + <node hostalias="node1" distribution-key="0" /> + </nodes> + </content> + +</services> +``` + +Note that a new hop *default/chain.default* is added, and the default route is changed to include this: + + +```text +$ vespa-route + +There are 6 route(s): + 1. default + 2. default-get + 3. music + 4. music-direct + 5. music-index + 6. storage/cluster.music + +There are 3 hop(s): + 1. default/chain.default + 2. default/chain.indexing + 3. indexing +``` + +```text +$ vespa-route --route default + +The route 'default' has 2 hop(s): + 1. default/chain.default + 2. indexing +``` + + +Note that the document processing chain must be called *default* to automatically be included in the default route. + +### Inherit indexing chain + +An alternative to the above is inheriting the indexing chain - use this when getting this error: + +```text +Indexing cluster 'XX' specifies the chain 'default' as indexing chain. +As the 'default' chain is run by default, using it as the indexing chain will run it twice. +Use a different name for the indexing chain. +``` + +Call the chain something else than *default*, and let it inherit *indexing*: + +```js expandable +<services version="1.0"> + + <container id="container" version="1.0"> + <document-api /> + <search /> + <document-processing> + <chain id="offer-processing" inherits="indexing"> + <documentprocessor id="processor.OfferDocumentProcessor"/> + </chain> + </document-processing> + <nodes> + <node hostalias="node1" /> + </nodes> + </container> + + <content id="music" version="1.0"> + <redundancy>1</redundancy> + <documents> + <document type="offer" mode="index"/> + <document-processing cluster="default" chain="offer-processing"/> + </documents> + <nodes> + <node hostalias="node1" distribution-key="0" /> + </nodes> + </content> + +</services> +``` + +See [#13193](https://github.com/vespa-engine/vespa/issues/13193) for details. + +## Multiple container clusters + +Vespa can be configured to use more than one container cluster. Use cases can be to separate search and document processing or having different document processing clusters due to capacity constraints or dependencies. Example with separate search and feeding/indexing container clusters: + +```js expandable +<services version="1.0"> + + <container id="container-search" version="1.0"> + <search /> + <nodes> + <node hostalias="node1" /> + </nodes> + </container> + + <container id="container-indexing" version="1.0"> + <http> + <server id="httpServer2" port="8081" /> + </http> + <document-api /> + <document-processing /> + <nodes> + <node hostalias="node1" /> + </nodes> + </container> + + <content id="music" version="1.0"> + <redundancy>1</redundancy> + <documents> + <document type="music" mode="index" /> + <document-processing cluster="container-indexing" /> + </documents> + <nodes> + <node hostalias="node1" distribution-key="0" /> + </nodes> + </content> + +</services> +``` + +Notes: + +- The indexing route is explicit using [document-processing](/en/reference/applications/services/content#document-processing) elements from the content to the container cluster +- Set up *document-api* on the same cluster as indexing to avoid network hop from feed endpoint to indexing processors +- If no *document-processing* is configured, it defaults to a container cluster named *default*. When using multiple container clusters, it is best practice to explicitly configure *document-processing*. + +Observe the *container-indexing/chain.indexing* hop, and the indexing chain is set up on the *container-indexing* cluster: + +```text +$ vespa-route + +There are 6 route(s): + 1. default + 2. default-get + 3. music + 4. music-direct + 5. music-index + 6. storage/cluster.music + +There are 2 hop(s): + 1. container-indexing/chain.indexing + 2. indexing +``` + +```js +$ curl -s http://localhost:8081 | python -m json.tool | grep -C 3 chain.indexing + + { + "bundle": "container-disc:7.0.0", + "class": "com.yahoo.messagebus.jdisc.MbusClient", + "id": "chain.indexing@MbusClient", + "serverBindings": [] + }, + { +-- + "class": "com.yahoo.docproc.jdisc.DocumentProcessingHandler", + "id": "com.yahoo.docproc.jdisc.DocumentProcessingHandler", + "serverBindings": [ + "mbus://*/chain.indexing" + ] + }, + { +``` + +## Appendix: trace + +Below is a trace example, no selection string: + +```js expandable +$ cat doc.json +[ +{ + "put": "id:mynamespace:music::123", + "fields": { + "album": "Bad", + "artist": "Michael Jackson", + "title": "Bad", + "year": 1987, + "duration": 247 + } +} +] + +$ vespa-feeder --trace 6 doc.json +<trace> + [1564571762.403] Source session accepted a 4096 byte message. 1 message(s) now pending. + [1564571762.420] Sequencer sending message with sequence id '-1163801147'. + [1564571762.426] Recognized 'default' as route 'indexing'. + [1564571762.429] Recognized 'indexing' as HopBlueprint(selector = { '[DocumentRouteSelector]' }, recipients = { 'music' }, ignoreResult = false). + [1564571762.489] Running routing policy 'DocumentRouteSelector'. + [1564571762.493] Component '[MessageType:music]' selected by policy 'DocumentRouteSelector'. + [1564571762.493] Resolving '[MessageType:music]'. + [1564571762.520] Running routing policy 'MessageType'. + [1564571762.520] Component 'music-index' selected by policy 'MessageType'. + [1564571762.520] Resolving 'music-index'. + [1564571762.520] Recognized 'music-index' as route 'container/chain.indexing [Content:cluster=music]'. + [1564571762.520] Recognized 'container/chain.indexing' as HopBlueprint(selector = { '[LoadBalancer:cluster=container;session=chain.indexing]' }, recipients = { }, ignoreResult = false). + [1564571762.526] Running routing policy 'LoadBalancer'. + [1564571762.538] Component 'tcp/vespa-container:19101/chain.indexing' selected by policy 'LoadBalancer'. + [1564571762.538] Resolving 'tcp/vespa-container:19101/chain.indexing [Content:cluster=music]'. + [1564571762.580] Sending message (version 7.83.27) from client to 'tcp/vespa-container:19101/chain.indexing' with 179.853 seconds timeout. + [1564571762.581] Message (type 100004) received at 'container/container.0' for session 'chain.indexing'. + [1564571762.581] Message received by MbusServer. + [1564571762.582] Request received by MbusClient. + [1564571762.582] Running routing policy 'Content'. + [1564571762.582] Selecting route + [1564571762.582] No cluster state cached. Sending to random distributor. + [1564571762.582] Too few nodes seen up in state. Sending totally random. + [1564571762.582] Component 'tcp/vespa-container:19114/default' selected by policy 'Content'. + [1564571762.582] Resolving 'tcp/vespa-container:19114/default'. + [1564571762.586] Sending message (version 7.83.27) from 'container/container.0' to 'tcp/vespa-container:19114/default' with 179.995 seconds timeout. + [1564571762.587181] Message (type 100004) received at 'storage/cluster.music/distributor/0' for session 'default'. + [1564571762.587245] music/distributor/0 CommunicationManager: Received message from message bus + [1564571762.587510] Communication manager: Sending Put(BucketId(0x2000000000000020), id:mynamespace:music::123, timestamp 1564571762000000, size 275) + [1564571762.587529] Communication manager: Passing message to source session + [1564571762.587547] Source session accepted a 1 byte message. 1 message(s) now pending. + [1564571762.587681] Sending message (version 7.83.27) from 'storage/cluster.music/distributor/0' to 'storage/cluster.music/storage/0/default' with 180.00 seconds timeout. + [1564571762.587960] Message (type 10) received at 'storage/cluster.music/storage/0' for session 'default'. + [1564571762.588052] music/storage/0 CommunicationManager: Received message from message bus + [1564571762.588263] PersistenceThread: Processing message in persistence layer + [1564571762.588953] Communication manager: Sending PutReply(id:mynamespace:music::123, BucketId(0x2000000000000020), timestamp 1564571762000000) + [1564571762.589023] Sending reply (version 7.83.27) from 'storage/cluster.music/storage/0'. + [1564571762.589332] Reply (type 11) received at 'storage/cluster.music/distributor/0'. + [1564571762.589448] Source session received reply. 0 message(s) now pending. + [1564571762.589459] music/distributor/0Communication manager: Received reply from message bus + [1564571762.589679] Communication manager: Sending PutReply(id:music:music::123, BucketId(0x0000000000000000), timestamp 1564571762000000) + [1564571762.589807] Sending reply (version 7.83.27) from 'storage/cluster.music/distributor/0'. + [1564571762.590] Reply (type 200004) received at 'container/container.0'. + [1564571762.590] Routing policy 'Content' merging replies. + [1564571762.590] Reply received by MbusClient. + [1564571762.590] Sending reply from MbusServer. + [1564571762.590] Sending reply (version 7.83.27) from 'container/container.0'. + [1564571762.612] Reply (type 200004) received at client. + [1564571762.613] Routing policy 'LoadBalancer' merging replies. + [1564571762.613] Routing policy 'MessageType' merging replies. + [1564571762.615] Routing policy 'DocumentRouteSelector' merging replies. + [1564571762.622] Sequencer received reply with sequence id '-1163801147'. + [1564571762.622] Source session received reply. 0 message(s) now pending. +</trace> + +Messages sent to vespa (route default) : +---------------------------------------- +PutDocument: ok: 1 msgs/sec: 3.30 failed: 0 ignored: 0 latency(min, max, avg): 225, 225, 225 +``` diff --git a/mintlify-docs/en/writing/document-v1-api-guide.mdx b/mintlify-docs/en/writing/document-v1-api-guide.mdx new file mode 100644 index 0000000000..d249510ed0 --- /dev/null +++ b/mintlify-docs/en/writing/document-v1-api-guide.mdx @@ -0,0 +1,429 @@ +--- +title: "/document/v1 API guide" +sidebarTitle: "/document/v1" +--- + +Use the */document/v1/* API to read, write, update and delete documents. + +Refer to the [document/v1 API reference](/en/reference/api/document-v1) for API details. [Reads and writes](/en/writing/reads-and-writes) has an overview of alternative tools and APIs as well as the flow through the Vespa components when accessing documents. See [getting started](#getting-started) for how to work with the */document/v1/ API*. + +| | | +| :--- | :--- | +|GET|**Get**<br/><br/>`$ curl http://localhost:8080/document/v1/my_namespace/music/docid/love-id-here-to-stay`<br/><br/>**Visit** <br/><br/>[Visit](/en/writing/visiting) all documents with given namespace and document type:<br/><br/>`$ curl http://localhost:8080/document/v1/namespace/music/docid`<br/><br/>Visit all documents using continuation:<br/><br/>`$ curl http://localhost:8080/document/v1/namespace/music/docid?continuation=AAAAEAAAAAAAAAM3AAAAAAAAAzYAAAAAAAEAAAAAAAFAAAAAAABswAAAAAAAAAAA`<br/><br/>Visit using a *selection*:<br/><br/>`$ curl http://localhost:8080/document/v1/namespace/music/docid?selection=music.genre=='blues'`<br/><br/>Visit documents across all *non-global* document types and namespaces in content cluster `mycluster`:<br/><br/>`$ curl http://localhost:8080/document/v1/?cluster=mycluster`<br/><br/>Visit documents across all *[global](/en/reference/applications/services/content#document)* document types and namespaces stored in content cluster `mycluster`:<br/><br/>`$ curl http://localhost:8080/document/v1/?cluster=mycluster&bucketSpace=global`<br/><br/>Read about [visiting throughput](#visiting-throughput) below.| +|**POST**|Post data in the [document JSON format](/en/reference/schemas/document-json-format):<br/><br/>`$ curl -X POST -H "Content-Type:application/json" --data '`<br/>`{`<br/>`"fields": {`<br/>`"artist": "Coldplay",`<br/>`"album": "A Head Full of Dreams",`<br/>`"year": 2015`<br/>`}`<br/>`}' \`<br/>`http://localhost:8080/document/v1/mynamespace/music/docid/a-head-full-of-dreams`| +|**PUT**|Do a [partial update](/en/writing/partial-updates) for a document:<br/><br/>`$ curl -X PUT -H "Content-Type:application/json" --data '`<br/>`{`<br/>`"fields":` <br/>`{`<br/>`"artist":` <br/>`{`<br/>`"assign": "Warmplay"`<br/>`}`<br/>`}`<br/>`}' \`<br/>`http://localhost:8080/document/v1/mynamespace/music/docid/a-head-full-of-dreams`| +|**DELETE**|Delete a document by ID:<br/><br/>`$ curl -X DELETE http://localhost:8080/document/v1/mynamespace/music/docid/a-head-full-of-dreams`<br/><br/>Delete all documents in the `music` schema:<br/><br/>`$ curl -X DELETE \`<br/>`"http://localhost:8080/document/v1/mynamespace/music/docid?selection=true&cluster=my_cluster"`| + +## Conditional writes + +A *test-and-set* [condition](/en/reference/writing/document-selector-language) can be added to Put, Remove and Update operations: + +Example: + +```sh +$ curl -X PUT -H "Content-Type:application/json" --data ' + { + "condition": "music.artist==\"Warmplay\"", + "fields": { + "artist": { + "assign": "Coldplay" + } + } + }' \ + http://localhost:8080/document/v1/mynamespace/music/docid/a-head-full-of-dreams +``` + +<Warning> +**Important:** + +*Use documenttype.fieldname (e.g. music.artist) in the condition, not only fieldname.* +</Warning> + +If the condition is not met, a 412 Precondition Failed is returned: + +```json +{ + "pathId": "/document/v1/mynamespace/music/docid/a-head-full-of-dreams", + "id": "id:mynamespace:music::a-head-full-of-dreams", + "message": "[UNKNOWN(251013) @ tcp/vespa-container:19112/default]: ReturnCode(TEST_AND_SET_CONDITION_FAILED, Condition did not match document nodeIndex=0 bucket=20000000000000c4 ) " +} +``` + +Also see the [condition reference](/en/reference/schemas/document-json-format#test-and-set). + +## Create if nonexistent + +### Upserts + +Updates to nonexistent documents are supported using [create](/en/reference/schemas/document-json-format#create). This is often called an *upsert* — insert a document if it does not already exist, or update it if it exists. An empty document is created on the content nodes, before the update is applied. This simplifies client code in the case of multiple writers. Example: + +```sh +$ curl -X PUT -H "Content-Type:application/json" --data ' + { + "fields": { + "artist": { + "assign": "Coldplay" + } + } + }' \ + http://localhost:8080/document/v1/mynamespace/music/docid/a-head-full-of-thoughts?create=true +``` + +### Conditional updates and puts with create + +Conditional updates and puts can be combined with [create](/en/reference/schemas/document-json-format#create). This has the following semantics: + +- If the document already exists, the condition is evaluated against the most recent document version available. The operation is applied if (and only if) the condition matches. +- Otherwise (i.e. the document does not exist or the newest document version is a tombstone), the condition is *ignored* and the operation is applied as if no condition was provided. + +You can use conditional puts to handle out-of-order writes. Say you have a `version` field in your source-of-truth system. You can use it to only write a document if it doesn't exist or if the version is older than the one you're trying to write: + + +```sh +$ curl -X POST -H "Content-Type:application/json" --data ' + { + "condition": "music.version < 42", + "create": true, + "fields": { + "version": 42, + "artist": "Coldplay" + } + }' \ + http://localhost:8080/document/v1/mynamespace/music/docid/a-head-full-of-thoughts +``` + +Similarly, updates can depend on the version being older. Add `create=true` to make them upserts. + +```sh + $ curl -X PUT -H "Content-Type:application/json" --data ' + { + "condition": "music.version < 43", + "create": true, + "fields": { + "version": { + "assign": 43 + }, + "artist": { + "assign": "Warmplay" + } + } + }' \ + http://localhost:8080/document/v1/mynamespace/music/docid/a-head-full-of-thoughts +``` + +<Danger> +**Warning:** + +If all existing replicas of a document are missing when an operation with `"create": true` is executed, a new document will always be created. This happens even if a condition has been given. If the existing replicas become available later, their version of the document will be overwritten by the newest update since it has a higher timestamp. +</Danger> + +<Note> +**Note:** + +See [document expiry](/en/schemas/documents#document-expiry) for auto-created documents — it is possible to create documents that do not match the selection criterion. +</Note> + +<Note> +**Note:** + +Specifying *create* for a Put operation *without* a condition has no observable effect, as unconditional Put operations will always write a new version of a document regardless of whether it existed already. +</Note> + +## Data dump + +To iterate over documents, use [visiting](/en/writing/visiting) — sample output: + +```json +{ + "pathId": "/document/v1/namespace/doc/docid", + "documents": [ + { + "id": "id:namespace:doc::id-1", + "fields": { + "title": "Document title 1" + } + } + ], + "continuation": "AAAAEAAAAAAAAAM3AAAAAAAAAzYAAAAAAAEAAAAAAAFAAAAAAABswAAAAAAAAAAA" +} +``` + +Note the *continuation* token — use this in the next request for more data. Below is a sample script dumping all data using [jq](https://stedolan.github.io/jq/) for JSON parsing. It splits the corpus in 8 slices by default; using a number of slices at least four times the number of container nodes is recommended for high throughput. Timeout can be set lower for benchmarking. (Each request has a maximum timeout of 60s to ensure progress is saved at regular intervals) + +```sh expandable +#!/bin bash +set -eo pipefail + +if [ $# -gt 2 ] +then + echo "Usage: $0 [number of slices, default 8] [timeout in seconds, default 31536000 (1 year)]" + exit 1 +fi + +endpoint="https://my.vespa.endpoint" +cluster="db" +selection="true" +slices="${1:-8}" +timeout="${2:-31516000}" +curlTimeout="$((timeout > 60 ? 60 : timeout))" +url="$endpoint/document/v1/?cluster=$cluster&selection=$selection&stream=true&timeout=$curlTimeout&concurrency=8&slices=$slices" +auth="--key my-key --cert my-cert -H 'Authorization: my-auth'" +curl="curl -sS $auth" +start=$(date '+%s') +doom=$((start + timeout)) + +## auth can be something like auth='--key data-plane-private-key.pem --cert data-plane-public-cert.pem' +curl="curl -sS $auth" + +function visit { + sliceId="$1" + documents=0 + continuation="" + while + printf -v filename "data-%03g-%012g.json.gz" $sliceId $documents + json="$(eval "$curl '$url&sliceId=$sliceId$continuation'" | tee >( gzip > $filename ) | jq '{ documentCount, continuation, message }')" + message="$(jq -re .message <<< $json)" && echo "Failed visit for sliceId $sliceId: $message" >&2 && exit 1 + documentCount="$(jq -re .documentCount <<< $json)" && ((documents += $documentCount)) + [ "$(date '+%s')" -lt "$doom" ] && token="$(jq -re .continuation <<< $json)" + do + echo "$documentCount documents retrieved from slice $sliceId; continuing at $token" + continuation="&continuation=$token" + done + time=$(($(date '+%s') - start)) + echo "$documents documents total retrieved in $time seconds ($((documents / time)) docs/s) from slice $sliceId" >&2 +} + +for ((sliceId = 0; sliceId < slices; sliceId++)) +do + visit $sliceId & +done +wait +``` + + +### Visiting throughput + +Note that visit with selection is a linear scan over all the music documents in the request examples at the start of this guide. Each complete visit thus requires the selection expression to be evaluated for all documents. Running concurrent visits with selections that match disjoint subsets of the document corpus is therefore a poor way of increasing throughput, as work is duplicated across each such visit. Fortunately, the API offers other options for increasing throughput: + +- Split the corpus into any number of smaller [slices](/en/reference/api/document-v1#slices), each to be visited by a separate, independent series of HTTP requests. This is by far the most effective setting to change, as it allows visiting through all HTTP containers simultaneously, and from any number of clients—either of which is typically the bottleneck for visits through */document/v1*. A good value for this setting is at least a handful per container. +- Increase backend [concurrency](/en/reference/api/document-v1#concurrency) so each visit HTTP response is promptly filled with documents. When using this together with slicing (above), take care to also stream the HTTP responses (below), to avoid buffering too much data in the container layer. When a high number of slices is specified, this setting may have no effect. +- [Stream](/en/reference/api/document-v1#stream) the HTTP responses. This lets you receive data earlier, and more of it per request, reducing HTTP overhead. It also minimizes memory usage due to buffering in the container, allowing higher concurrency per container. It is recommended to always use this, but the default is not to, due to backwards compatibility. + +## Getting started + +<Tip> +**Pro-tip:** + +It is easy to generate a `/document/v1` request by using the [Vespa CLI](/en/clients/vespa-cli), with the `-v` option to output a generated `/document/v1` request - example: +</Tip> + +```sh +$ vespa document -v ext/A-Head-Full-of-Dreams.json + + curl -X POST -H 'Content-Type: application/json' + --data-binary @ext/A-Head-Full-of-Dreams.json + http://127.0.0.1:8080/document/v1/mynamespace/music/docid/a-head-full-of-dreams + + Success: put id:mynamespace:music::a-head-full-of-dreams +``` + +See the [document JSON format](/en/reference/schemas/document-json-format) for creating JSON payloads. + +This is a quick guide into dumping random documents from a cluster to get started: + +<Steps> +<Step> +To get documents from a cluster, look up the content cluster name from the configuration, like in the [album-recommendation](https://github.com/vespa-engine/sample-apps/blob/master/album-recommendation/app/services.xml) example: `<content id="music" version="1.0">`. +</Step> +<Step> +Use the cluster name to start dumping document IDs (skip `jq` for full json): + +```bash +$ curl -s 'http://localhost:8080/document/v1/?cluster=music&wantedDocumentCount=10&timeout=60s' | \ + jq -r .documents[].id +``` + +```bash +id:mynamespace:music::love-is-here-to-stay +id:mynamespace:music::a-head-full-of-dreams +id:mynamespace:music::hardwired-to-self-destruct +``` + +`wantedDocumentCount` is useful to let the operation run longer to find documents, to avoid an empty result. This operation is a scan through the corpus, and it is normal to get empty result and the [continuation token](#data-dump). +</Step> +<Step> + +Look up the document with id `id:mynamespace:music::love-is-here-to-stay`: + +```bash +$ curl -s 'http://localhost:8080/document/v1/mynamespace/music/docid/love-is-here-to-stay' | jq . +``` + +```json +{ + "pathId": "/document/v1/mynamespace/music/docid/love-is-here-to-stay", + "id": "id:mynamespace:music::love-is-here-to-stay", + "fields": { + "artist": "Diana Krall", + "year": 2018, + "category_scores": { + "type": "tensor<float>(cat{})", + "cells": { + "pop": 0.4000000059604645, + "rock": 0, + "jazz": 0.800000011920929 + } + }, + "album": "Love Is Here To Stay" + } +} +``` + +</Step> +<Step> + +Read more about [document IDs](/en/schemas/documents). + +</Step> +</Steps> + +## Troubleshooting + +- When troubleshooting documents not found using the query API, use [vespa visit](/en/clients/vespa-cli#documents) to export the documents. Then compare the `id` field with other user-defined `id` fields in the query. + ```bash + $ vespa visit + ``` + + ```json + { + "id": "id:mynamespace:music::when-we-all-fall-asleep-where-do-we-go", + "fields": { + "artist": "Billie Eilish", + "doc_id": 12345 + } + } + ``` + Find more details on the components of the [document ID](/en/schemas/documents#id-scheme). +- Document not found responses look like: + ```bash + $ curl http://127.0.0.1:8080/document/v1/mynamespace/music/docid/non-existing-doc + ``` + + ```json + { + "pathId": "/document/v1/mynamespace/music/docid/non-existing-doc", + "id": "id:mynamespace:music::non-existing-doc" + } + ``` + + This might look like an empty document, use `-v` for more output: + + ```bash + $ curl -v http://127.0.0.1:8080/document/v1/mynamespace/music/docid/non-existing-doc + > GET /document/v1/mynamespace/music/docid/non-existing-doc HTTP/1.1 + > Host: 127.0.0.1:8080 + > User-Agent: curl/7.88.1 + > Accept: */* + > + < HTTP/1.1 404 Not Found + < Date: Fri, 26 May 2023 08:53:20 GMT + < Content-Type: application/json;charset=utf-8 + < Content-Length: 108 + ``` + + ```json + { + "pathId": "/document/v1/mynamespace/music/docid/non-existing-doc", + "id": "id:mynamespace:music::non-existing-doc" + } + ``` + + Observe the *404 Not Found*. Using the [Vespa CLI](/en/clients/vespa-cli#documents) is great for troubleshooting - use `-v` for verbose output, this prints an equivalent `curl` command: + + ```bash + $ vespa document get -v id:mynamespace:music::non-existing-doc + curl -X GET http://127.0.0.1:8080/document/v1/mynamespace/music/docid/non-existing-doc + ``` + + Error: Invalid document operation: 404 Not Found + + ```json + { + "pathId": "/document/v1/mynamespace/music/docid/non-existing-doc", + "id": "id:mynamespace:music::non-existing-doc" + } + ``` +- Query results can have results like: + + ```json + { + "id": "index:mydoctype/3/399f8030300282ca93929939", + "relevance": 0, + "source": "test", + "fields": { + "sddocname": "testdoc", + "myfield": 12 + } + } + ``` + + [Query result IDs](/en/reference/querying/default-result-format#id) are not the same as document IDs. Use a separate field for the document ID, if needed. + +- Delete *all* documents in *music* schema, with security credentials: + + ```bash + $ curl -X DELETE \\ + --cert data-plane-public-cert.pem --key data-plane-private-key.pem \\ + "http://localhost:8080/document/v1/mynamespace/music/docid?selection=true&cluster=my_cluster" + ``` + + +## Request size limit + +Starting from version 8.577.16, Vespa returns 413 (Content too large) as a response to POST and PUT requests that are above the request size limit. To avoid this, automatically check document size and truncate or split large documents before feeding. For optimal performance, it is recommended to keep the document size below 10 MB. + +## Backpressure + +Vespa returns response code 429 (Too Many Requests) as a backpressure signal whenever client feed throughput exceeds system capacity. Clients should implement retry strategies as described in the [HTTP best practices](/en/clients/http-best-practices) document. + +Instead of implementing your own retry logic, consider using Vespa's feed clients which automatically handle retries and backpressure. See the [feed command](/en/clients/vespa-cli#documents) of the Vespa CLI and the [vespa-feed-client](/en/clients/vespa-feed-client). + +The `/document/v1` API includes a configurable operation queue that by default is tuned to balance latency, throughput and memory. Applications can adjust this balance by overriding the parameters defined in the [document-operation-executor](https://github.com/vespa-engine/vespa/blob/master/configdefinitions/src/vespa/document-operation-executor.def) config definition. + +To optimize for higher throughput at the cost of increased latency and higher memory usage on the container, increase any of the `maxThrottled` (maximum queue capacity in number of operations), `maxThrottledAge` (maximum time in queue in seconds), and `maxThrottledBytes` (maximum memory usage in bytes) parameters. This allows the container to buffer more operations during temporary spikes in load, reducing the number of 429 responses while increasing request latency. Make sure to increase operation and client timeouts to accommodate for the increased latency. + +See the [config definition](https://github.com/vespa-engine/vespa/blob/master/configdefinitions/src/vespa/document-operation-executor.def) for a detailed explanation of each parameter. + +Set the values to `0` for the opposite effect, i.e. to optimize for latency. Operations will be dispatched directly, and failed out immediately if the number of pending operations exceeds the dynamic window size of the document processing pipeline. + +*Example: overriding the default value of all 3 parameters to `0`.* + +```js +<container id="feed" version="1.0"> + <document-api/> + + <config name="com.yahoo.document.restapi.document-operation-executor"> + <maxThrottled>0</maxThrottled> + <maxThrottledAge>0</maxThrottledAge> + <maxThrottledBytes>0</maxThrottledBytes> + </config> + +</container> +``` + +The effective operation queue configuration is logged when the container starts up, see below example. + +```text +INFO container Container.com.yahoo.document.restapi.resource.DocumentV1ApiHandler Operation queue: max-items=256, max-age=3000 ms, max-bytes=100 MB +``` + +You can observe the state of the operation queue through the metrics [`httpapi_queued_operations`](/en/reference/operations/metrics/container#httpapi_queued_operations), [`httpapi_queued_bytes`](/en/reference/operations/metrics/container#httpapi_queued_bytes) and [`httpapi_queued_age`](/en/reference/operations/metrics/container#httpapi_queued_age). + +## Using number and group id modifiers + +Do not use group or number modifiers with regular indexed mode document types. These are special cases that only work as expected for document types with [mode=streaming or mode=store-only](/en/reference/applications/services/content#document). Examples: + +| | | +| :--- | :--- | +|**Get**|Get a document in a group:<br/><br/>`$ curl http://localhost:8080/document/v1/mynamespace/music/number/23/some_key`<br/><br/>`$ curl http://localhost:8080/document/v1/mynamespace/music/group/mygroupname/some_key`| +|**Visit** |Visit all documents for a group:<br/><br/>`$ curl http://localhost:8080/document/v1/namespace/music/number/23/`<br/><br/>`$ curl http://localhost:8080/document/v1/namespace/music/group/mygroupname`| diff --git a/mintlify-docs/en/writing/feed-block.mdx b/mintlify-docs/en/writing/feed-block.mdx new file mode 100644 index 0000000000..0d6a01e1b9 --- /dev/null +++ b/mintlify-docs/en/writing/feed-block.mdx @@ -0,0 +1,59 @@ +--- +title: "Feed block" +--- + +A content cluster blocks external write operations when at least one content node has reached the [resource limit](/en/reference/applications/services/content#resource-limits) of disk or memory. This is done to avoid saturating resource usage on content nodes. The *Cluster controller* monitors the resource usage of the content nodes and decides whether to block feeding. Transient resource usage (see details in the metrics below) is not included in the monitored usage. This ensures that transient resource usage is covered by the resource headroom on the content nodes, instead of leading to feed blocked due to natural fluctuations. + +<Note> +**Note:** + +When running Vespa in a Docker image on a laptop, one can easily get `[UNKNOWN(251009) @ tcp/vespa-host:19112/default]: ReturnCode(NO_SPACE, External feed is blocked due to resource exhaustion: in content cluster 'example': disk on node 0 [vespa-host] is 81.7% full (the configured limit is 80.0%, effective limit lowered to 79.0% until feed unblocked)`. Fix this by increasing allocated storage for the Docker daemon, clean up unused volumes or remove unused Docker images. +</Note> + +HTTP clients will see *507 Server Error: Insufficient Storage* when this happens. + +When feed is blocked, write operations are rejected by *Distributors*. All Put operations and most Update operations are rejected. These operations are still allowed: + +- Remove operations +- Update [assign](/en/reference/schemas/document-json-format#assign) operations to numeric single-value fields + +To remedy, add nodes to the content cluster. The data will [auto-redistribute](/en/content/elasticity), and feeding is unblocked when all content nodes are below the limits. For self-managed Vespa you can configure [resource-limits](/en/reference/applications/services/content#resource-limits), although this is not recommended. Increasing them too much might lead to OOM and content nodes being unable to start. + +<Warning> +**Important:** + +Always **add** nodes, do not change node capacity - this is in practise safer and quicker. As most Vespa applications are set up on homogeneous nodes, changing node capacity can cause a full node set swap and more data copying than just adding more nodes of the same kind. Copying data will in itself stress nodes, adding one node is normally the smallest and safest change. +</Warning> + +These [metrics](/en/operations/metrics) are used to monitor resource usage and whether feeding is blocked: + +| | | +| :--- | :--- | +| **cluster-controller.resource_usage.nodes_above_limit** | The number of content nodes that are above one or more resource limits. When above 0, feeding is blocked. | +| **content.proton.resource_usage.disk** | A number between 0 and 1, indicating how much disk (of total available) is used on the content node. Transient disk used during [disk index fusion](/en/content/proton#disk-index-fusion) is not included. | +| **content.proton.resource_usage.memory** | A number between 0 and 1, indicating how much memory (of total available) is used on the content node. Transient memory used by [memory indexes](/en/content/proton#memory-index-flush) is not included. | + +When feeding is blocked, error messages are returned in write operation replies - example: + +```text +ReturnCode(NO_SPACE, External feed is blocked due to resource exhaustion: + in content cluster 'example': memory on node 0 [my-vespa-node-0.example.com] is 82.0% full (the configured limit is 80.0%, effective limit lowered to 79.0% until feed unblocked)) +``` + +Note that when feeding is blocked resource usage needs to decrease below another, lower limit before getting unblocked. This is to avoid flip-flopping between blocking and unblocking feed when being near the limit. This lower limit is 1% lower than the configured limit. + +The address space used by data structures in attributes (*Multivalue Mapping*, *Enum Store*, and *Tensor Store*) can also go full and block feeding - see [attribute data structures](/en/content/attributes/#data-structures) for details. This will rarely happen. The following metric is used to monitor address space usage: + +| | | +| :--- | :--- | +| **content.proton.documentdb.attribute.resource_usage.address_space.max** | A number between 0 and 1, indicating how much address space is used by the worst attribute data structure on the content node. | + + +An error is returned when the address space limit (default value is 0.90) is exceeded: + +```text +ReturnCode(NO_SPACE, External feed is blocked due to resource exhaustion: + in content cluster 'example': attribute-address-space:example.ready.a1.enum-store on node 0 [my-vespa-node-0.example.com] is 91.0% full (the configured limit is 90.0%)) +``` + +To remedy, add nodes to the content cluster to distribute documents with attributes over more nodes. \ No newline at end of file diff --git a/mintlify-docs/en/writing/indexing-paged-vectors.mdx b/mintlify-docs/en/writing/indexing-paged-vectors.mdx new file mode 100644 index 0000000000..aee5573261 --- /dev/null +++ b/mintlify-docs/en/writing/indexing-paged-vectors.mdx @@ -0,0 +1,74 @@ +--- +title: "Indexing paged vectors" +--- + +Most of the data of a vector (tensor) index is the vectors themselves. The vector data must be accessed to calculate true distances both when querying the index and when adding vectors to it, and due to the high dimensionality these accesses are effectively random. While it is viable to [page](/en/content/attributes/#paged-attributes) indexed vector attributes to disk for *queries* if somewhat higher latency can be tolerated, it does not allow a large vector index to be built at reasonable speed: To create a high quality index, each vector insert must make many distance calculations, which results in low write throughput when the vectors in the index do not reside in RAM. + +To build vector indexes larger than available memory efficiently the procedure described here can be used. This is suitable when: + +- You want to build an index for vector retrieval (not just store the vectors for ranking/brute force NN), with a vector data set that doesn't fit in memory across the content nodes you want to deploy for it. +- The vector data in question is mostly write-once (frequent writes to other fields is fine), and rescaling of the content cluster will not be necessary. + +## Steps + +<Steps> +<Step> +Declare the vector field(s) to be indexed as [paged](/en/content/attributes/#paged-attributes). + +```js + schema docs { + document docs { + field myVectors type tensor<bfloat16>(chunk{}, x[384]) { + indexing: attribute | index + attribute: paged + } + } + } +``` + +</Step> +<Step> +Calculate how much data you can fit in memory: + + Calculate your attribute raw data size (taking just the vector is close enough unless you have many other attribute fields),<br/> + multiply by the number of [searchable-copies](/en/reference/applications/services/content#searchable-copies) you want,<br/> + multiply by 1.2 to add room for the index over the vectors,<br/> + divide by 0.65 to leave room for working memory,<br/> + multiply by your total number of documents.<br/><br/> + This gives you the total memory needed across all the nodes in your content cluster (or across one group if you have multiple).<br/><br/> + **Example** with the type above with 1B documents and 10 chunks average per document:<br/> + 10 \* 384\*2 bytes \* 2 \* 1.2 / 0.65 \* 1B = 14.178 Gb total cluster memory. +</Step> +<Step> +Create one document type per data subset which fits in memory under the calculation above. + +**Example:** Suppose you want to create a vector index over four years worth of documents of type `docs` and that you only want to allocate enough memory to fit 25% of the vector data across the cluster. Create four subtypes of `docs`, one for each year: docs2021, docs2022, docs2023 and docs 2024, in four different schema files. Each of these can inherit the parent type and otherwise be empty: + +```js + schema docs2021 inherits docs { + document docs2021 inherits docs { + } + } +``` + +You can of course also add time-period-specific fields and ranking here. +</Step> +<Step> +Add all the subtypes to the content cluster you want in services.xml: + +```js + <content id="myClusterId" version="1.0"> + <documents> + <document type="docs2021" mode="index" /> + <document type="docs2022" mode="index" /> + ... + </documents> +``` +</Step> +<Step> +Feed each of the types completely one by one, without applying queries at the same time. +</Step> +<Step> +Once all the types are written, you can apply query traffic. Vespa will search across all the types by default, but it is possible to restrict to a subset using the [restrict](/en/reference/api/query#model.restrict) query parameter. +</Step> +</Steps> diff --git a/mintlify-docs/en/writing/indexing.mdx b/mintlify-docs/en/writing/indexing.mdx new file mode 100644 index 0000000000..f7ca5fe54a --- /dev/null +++ b/mintlify-docs/en/writing/indexing.mdx @@ -0,0 +1,129 @@ +--- +title: "Indexing" +--- + +Refer to the [overview](/en/learn/overview). The primary index configuration is the [schema](/en/basics/schemas). + +[services.xml](/en/reference/applications/services/services) configures how indexing is distributed to the nodes, see [multinode-HA](https://github.com/vespa-engine/sample-apps/blob/master/examples/operations/multinode-HA/services.xml) for a full example: + +```xml +<container id="feed" version="1.0"> + <document-api /> + <document-processing /> + <nodes count="2" /> +</container> +``` + +It is important to configure both `document-api` and `document-processing` to run the document processing on the same nodes as the document API endpoint, to avoid network hops to other nodes (for better throughput). Normally, one will run the indexing preprocessing on these nodes, too, see the [document-processing reference](/en/reference/applications/services/content#document-processing) for a full example. + +## Date indexing + +Vespa does not have a "date" field type. Best practise is using a [long](/en/reference/schemas/schemas#long) field. If the date is a string in the source data, one can use [to_epoch_second](/en/reference/writing/indexing-language#to_epoch_second) to transform into a long-field: + +```js +schema docs { + + document docs { + field date_string type string { + indexing: summary + } + } + + field date type long { + indexing: input date_string | to_epoch_second | attribute | summary + } + + field last_modified type long { + indexing: now | attribute | summary + } +} +``` + +The synthetic `date` field can be used in queries and [grouping](/en/querying/grouping): + +```json +"fields": { + "last_modified": 1695995429, + "date": 1703437243, + "date_string": "2023-12-24T17:00:43.000Z" +} +``` + +<Note> +**Note:** + +The `date` and `last_modified` fields above are placed outside the `document` section, as their content is generated from the document input. Use `vespa visit --field-set "[all]"` to dump all fields. +</Note> + +Note how [now](/en/reference/writing/indexing-language#now) is used to get current time. + +## Execution value example + +Accessing the execution value (the value passed into this expression) explicitly is useful when it is to be used as part of an expression such as concatenation. In this example we have a document with a title and an array of sentences, and we prepend each sentence by the document title (and a space), before converting it to a set of embedding vectors (represented by a 2d mixed tensor). + +```js +input mySentenceArray | for_each { input title . " " . _ } | embed | attribute my2dTensor | index my2dTensor +``` + +## Choice (||) example + +The choice expression is used to provide alternatives if an expression may return null. + +```js +(input myField1 || "") . " " . (input myField2 || "") | embed | attribute | index +``` + +In this example two fields are concatenated, but if one of the fields is empty, the empty string is used instead. If the empty string alternatives are not provided, no embedding will be produced if either input field is missing. + +## select_input example + +The `select_input` expression is used to choose a statement to execute based on which fields are non-empty in the input document: + +```js +select_input { + CX: input CX | set_var CX; + CA: input CA . " " . input CB | set_var CX; +} +``` + +This statement executes `input CX | set_var CX;` unless CX is empty. If so, it will execute `input CA . " " . input CB | set_var CX;` unless CA is empty. + +## Switch example + +The switch-expression behaves similarly to the switch-statement in other programming languages. Each case in the switch-expression consists of a string and a statement. The execution value is compared to each string, and if there is a match, the corresponding statement is executed. An optional default operation (designated by `default:`) can be added to the end of the switch: + +```js +input mt | switch { + case "audio": input fa | index; + case "video": input fv | index; + default: 0 | index; +}; +``` + +## Indexing statements example + +Using indexing statements, multiple document fields can be used to produce one index structure field. For example, the index statement: + +```js +input field1 . input field2 | attribute field2; +``` + +combines *field1* and *field2* into the attribute named *field2*. When partially updating documents which contains indexing statement which combines multiple fields the following rules apply: + +- Only attributes where *all* the source values are available in the source document update will be updated +- The document update will fail when indexed (only) if *no* attributes end up being updated when applying the rule above + +Example: If a schema has the indexing statements + +```js +input field1 | attribute field1; +input field1 . input field2 | attribute field2; +``` + +the following will happen for the different partial updates: + +| Partial update contains | Result | +|:---|:---| +| field1 | field1 is updated | +| field2 | The update fails | +| field1 and field2 | field1 and field2 are updated | diff --git a/mintlify-docs/en/writing/initial-batch-feed.mdx b/mintlify-docs/en/writing/initial-batch-feed.mdx new file mode 100644 index 0000000000..c9ac21d72c --- /dev/null +++ b/mintlify-docs/en/writing/initial-batch-feed.mdx @@ -0,0 +1,82 @@ +--- +title: "Initial batch feed" +sidebarTitle: "Index bootstrap" +--- + +One of the first things you do when creating a Vespa application is deploying it to Vespa Cloud and batch feeding all your data. We assume you have made the first version of the schema file(s) and generated a set of Vespa feed files. You will face some of these questions: + +- What is a good resource configuration for the content nodes holding the index? +- What happens when I change the node resource configuration? +- How many nodes are required to hold the full index? +- Is using autoscaling a good idea when batch feeding? +- How do I know what the feeding bottleneck is? +- What is the difference between a container and content cluster? + +Review the introduction section of the [Vespa overview](/en/learn/overview) to understand the different between *container* and *content* clusters before continuing. + +The first thing to note is that changes to node resources (i.e., instance type) are [automated](/en/performance/node-resources). This makes it easy to just get started feeding, observe, change, and repeat. + +The next thing is, changing node resources takes some time to apply, as data and indexes are migrated to the node(s) with the new configuration — more details below. This also means, it is easier to change this with little data and few nodes deployed, like in the 1% step below. Therefore, do an iteration or two to get the disk/memory balance right. + +Once the node resources configuration is about right, feed more data. This is potentially time-consuming. Make sure the bottleneck is the nodes in the content clusters (you should see at least one thread at 100%), It is a good idea to make sure that the *container* cluster with the feed endpoint is not at 100% CPU, add more nodes if so. [More details](/en/performance/sizing-feeding#feed-testing). + +Finally, we recommend in this phase to not use [autoscaling](/en/operations/autoscaling). Autoscaling is great for incremental changes, in *this* case, we feed from zero to full. Best practice is, once you have a rough idea based on the smaller feed batches, allocate 50-100% more content node capacity than your estimate, and feed to full. This will speed up the process with less wall-clock time and no delays due to [blocked feeding](/en/writing/feed-block). When the initial feed is done, it is easy to evaluate possible overcapacity and reduce node count. + +## Summary + +When bootstrapping an index, one must consider node resource configuration and number of nodes. The strategy is to iterate: + +<Frame>![Growing a Vespa cluster in steps](/assets/img/index-bootstrap.svg)</Frame> + +1. Feed smaller datasets +2. Evaluate +3. Deploy new node counts / node resource configuration +4. Wait for data migration to complete +5. Evaluate + +While doing this, ensure the cluster is **not more than 50% full** — this gives headroom to later increase/shrink the index and change schema configuration easily using automatic reindexing. It is easy to downscale resources after the bootstrap, and it saves a lot of time keeping the clusters within limits — hence max 50%. + +## Preparations + +The content node resource configuration should not have ranges for index bootstrap, as autoscaling will interfere with the evaluation in this step. This is a good starting point, **make sure there are no ranges like [2,3]**: + +```xml +<nodes count="2"> + <resources vcpu="2.0" memory="8Gb" disk="50Gb" /> +</nodes> +``` + +To evaluate how full the content cluster is, use the "Metrics" view in the Vespa Cloud Console or [metrics](/en/operations/monitoring) from content nodes — example: + +```sh +$ curl \ + --cert data-plane-public-cert.pem \ + --key data-plane-private-key.pem \ + https://ab1f1234.b68a8765.z.vespa-app.cloud/prometheus/v1/values | \ + egrep 'disk.util|mem.util' | egrep 'clusterId="content/' +``` + +Once able to see the metrics above, you are ready to bootstrap the index. + +## Bootstrap + +|Step|Description| +|---|---| +|**1% feed**|The purpose of this step is to feed a tiny chunk of the corpus to:<br/><br/>1. Estimate the memory and disk resource configuration.<br/>2. Estimate the number of nodes required for the 10% step.<br/>Feed a small data set, using `vespa feed` as in [getting started](/en/basics/deploy-an-application). Observe the util metrics, stop no later than 50% memory/disk util. The resource configuration should be modified so disk is in the 50-80% range of memory. Example: if memory util is 50%, disk util should be 30-45%. The reasoning is that memory is a more expensive component than disk, better over-allocate on disk and just track memory usage.<br/><br/>Look at memory util. Say the 1% feed caused a 15% memory util - this means that the 10% feed will take 150%, or 3X more than the 50% max. There are two options, either increase memory/disk or add more nodes. A good rule of thumb at this stage is that the final 100% feed could fit on 4 or more nodes, and there is a 2-node minimum for redundancy. The default configuration at the start of this document is quite small, so a 3X at this stage means triple the disk and memory, and add more nodes in later steps.<br/><br/>Deploy changes (if needed). Whenever node count increases or resource configuration is modified, new nodes are added, and data is migrated to new nodes. Example: growing from 2 to 3 nodes means each of the 2 current nodes will migrate 33% of their data to the new node. Read more in [elasticity](/en/content/elasticity).<br/><br/>It saves time to let the cluster finish data migration before feeding more data, and it lets you observe metrics before next steps. In this step it will be fast as the data volume is small, but nevertheless check the "Content Buckets Out Of Sync (%), per Cluster" panel in the Vespa Cloud Console Metrics view, or the [vds.idealstate.merge\_bucket.pending.average](/en/reference/operations/metrics/distributor#vds_idealstate_merge_bucket_pending) metric. Wait for 0 for all nodes - this means data migration is completed:<br/><br/>`$ curl \`<br/>`--cert ~/.vespa/mytenant.myapp.default/data-plane-public-cert.pem \`<br/>`--key ~/.vespa/mytenant.myapp.default/data-plane-private-key.pem \`<br/>`https://ab1f1234.b68a8765.z.vespa-app.cloud/prometheus/v1/values?consumer=Vespa \`<br/>`egrep 'vds_idealstate_merge_bucket_pending_average'`<br/><br/>At this point, you can validate that both memory and disk util is less than 5%, so the 10% feed will fit.| +|**10% feed**|Feed the 10% corpus, still observing util metrics.<br/><br/>As the content cluster capacity is increased, it is normal to eventually be CPU bound in the container or content cluster. Grep for `cpu_util` in metrics (like in the example above) to evaluate.<br/><br/>A 10% feed is a great baseline for the full capacity requirements. Fine tune the resource config and number of hosts as needed. If you deploy changes, wait for the `vds.idealstate.merge_bucket.pending.average` metric to go to zero again. This now takes longer time as nodes are configured larger, it normally completes within a few hours.<br/><br/>Again validate memory and disk util is less than 5% before the full feed.| +|**100% feed**|Feed the full data set, observing the metrics. You should be able to estimate timing by extrapolation, this is linear at this scale. At feed completion, observe the util metrics for the final fine-tuning.<br/><br/>A great exercise at this point is to add a node then reduce a node, and take the time to completion (`vds.idealstate.merge_bucket.pending.average` to 0). This is useful information when the application is in production, as you know the time to add or shrink capacity in advance.<br/><br/>It can be a good idea to reduce node count to get the memory util closer to 70% at this step, to optimize for cost. However, do not spend too much time optimizing in this step, next step is normally [sizing for query load](/en/performance/sizing-search). This will again possibly alter resource configuration and node counts / topology, but now you have a good grasp at how to easily bootstrap the index for these experiments.| + +## Troubleshooting + +Make sure you are able to feed and access documents as the example in [preparations](#preparations). Read [security guide](/en/security/guide) for cert/key usage. + +Feeding too much will cause a [feed blocked](/en/writing/feed-block) state. Add a node to the full content cluster in services.xml, and wait for data migration to complete — i.e. wait for the `vds.idealstate.merge_bucket.pending.average` metric to go to zero. It is better to add a node than increasing node resources, as data migration is quicker. + +## Further reading + +<CardGroup> + <Card title="Reads and Writes" icon="database" href="/en/writing/reads-and-writes" horizontal /> + <Card title="Vespa Feed Sizing Guide" icon="bar-chart" href="/en/performance/sizing-feeding" horizontal /> + <Card title="Vespa Cloud Benchmarking" icon="volcano" href="/en/performance/benchmarking-cloud" horizontal /> + <Card title="Monitoring" icon="display" href="/en/operations/monitoring" horizontal /> +</CardGroup> \ No newline at end of file diff --git a/mintlify-docs/en/writing/partial-updates.mdx b/mintlify-docs/en/writing/partial-updates.mdx new file mode 100644 index 0000000000..799c8741bb --- /dev/null +++ b/mintlify-docs/en/writing/partial-updates.mdx @@ -0,0 +1,66 @@ +--- +title: "Partial Updates" +--- + +A partial update is an update to one or more fields in a document. It also includes updating all index structures so the effect of the partial update is immediately observable in queries. See [document update JSON format](/en/reference/schemas/document-json-format#update) for full details on the various possible partial update operations. + +<Frame> +![Attribute is an in-memory data structure](/assets/img/attributes-update.svg) +</Frame> + +In Vespa, all fields can be partially updated by default. A field is index, attribute or summary or a combination of these, and both index and attribute fields can be queried. + +- For [index](/en/content/proton#index) and summary fields, an update means a read-modify-write to the [document store](/en/content/proton#document-store) and limits throughput. +- Most [attribute](/en/content/attributes) fields do not require the document store read-modify-write, increasing write throughput by orders of magnitude. The following attribute types require a read-modify-write to the document store: + - [array of struct](/en/reference/schemas/schemas#array) + - [map of primitive and struct](/en/reference/schemas/schemas#map) + - [predicate](/en/reference/schemas/schemas#predicate) + - [reference](/en/reference/schemas/schemas#reference) + +<Warning> +**Important:** + +For highest possible write throughput for field updates, use attributes to write at memory speed. +</Warning> + +| Field Setting | Searchable | Fast searchable | Matching | Ranking | Display in results | +| :--- | :--- | :--- | :--- | :--- | :--- | +| index | Y | Y | Text and Exact matching | Y | N | +| attribute | Y | Y with attribute:fast-search | Exact matching | Y | Y | +| summary | N | N | N | N | Y | + +Examples: + +| Schema | Description | +| :--- | :--- | +| `field user type string { indexing: summary `|` index }` | Summary + index field. The field is stored in the document store, a partial update to the field will trigger read + write. | +| `field user type string { indexing: attribute }` | Attribute only field. The field is stored in the attribute (in-memory) and a partial update will update the document in-place and will be visible for queries, ranking, grouping and sorting immediately. | + +## Use cases + +Partial updates have many use cases. *Functionally*, it enables updating a document without anything else than the ID, simplifying logic in the upper levels of the serving stack. *Performance-wise*, partial updates enables applications with a real-time update flow in tens of thousands updates per second. Examples: + +| Use case | Description | +| :--- | :--- | +| **Filtering — Inventory updates** | Update product price and inventory count in real time. Do not show items out of stock. | +| **Filtering — Update relations** | Add a "this person likes me" to the "likes me" [set](/en/reference/querying/yql#weightedset) — display candidates based on sets of likes/dislikes/other relations. | +| **Ranking** | Update click / views / non-clicks: Feed usage data to use in ranking — rank popular items higher. | + + +## Write pipeline + +Refer to [proton](/en/content/proton) for an overview of the write-pipeline and the Transaction Log Server (TLS). + +| Field Setting | Description | +| :--- | :--- | +| index | For all [indexed fields](/en/reference/schemas/schemas#index), a memory index is used for the recent changes, implemented using B-trees. This is periodically [flushed](/en/content/proton#memory-index-flush) to a disk-based posting list index. Disk-based indexes are subsequently [merged](/en/content/proton#disk-index-fusion). <br/><br/> Updating the in-memory B-trees is lock-free, implemented using copy-on-write semantics. This gives high performance, with a predictable steady-state CPU/memory use. The driver for this design is the requirement for a sustained, high change rate, with stable, predictable read latencies and small temporary increases in CPU/memory. This compared to index hierarchies, merging smaller real-time indices into larger, causing temporary hot-spots.<br/><br/> When updating an indexed field, the document is read from the [document store](/en/content/proton#document-store), the field is updated, and the full document is written back to the store. At this point, the change is searchable, and an ACK is returned to the client. Use [attributes](/en/content/attributes/) to avoid such document disk accesses and increase performance for partial updates. Find more details in [feed performance](/en/performance/sizing-feeding/). | +| attribute | Attribute fields are in-memory fields, see [attributes](/en/content/attributes/). This makes updates inexpensive and fast. Attribute data is periodically flushed, see [attribute-flush](/en/content/proton#attribute-flush). Note that operations are persisted to the Transaction Log Service (TLS), in the rare case of a power failure or unclean shutdown, the operations are synced from the TLS.<br/><br/> Note there is no transactional support for updates across fields. To support high rate, there is no coordination between threads - example: <br/><br/> `{`<br/> `"update" : "id:namespace:doctype::1",`<br/> `"fields" : {` <br/>`"firstName" : { "assign" : "John" },`<br/> `"lastName" : { "assign" : "Smith" }`<br/> `}`<br/> `}`<br/><br/> Above, the attributes *firstName* and *lastName* are updated in the same operation from the client, whereas the update in the search core is non-transactional. This is a throughput vs consistency tradeoff that enables the extreme update rates without being a practical limitation for many applications. More details in [attributes](/en/content/attributes/).<br/><br/> Updating [multivalue](/en/querying/searching-multivalue-fields) attributes (arrays, maps, sets, tensors) means reading the current value, making the update and writing it back:<br/><br/> • [Array of primitive types](/en/reference/schemas/schemas#array), [weightedsets](/en/reference/schemas/schemas#weightedset) and [tensors](/en/reference/schemas/schemas#tensor) are in memory and therefore fast, see [attribute data structures](/en/content/attributes/#data-structures) for performance considerations. <br/><br/> • If the attribute field is an [array of struct](/en/reference/schemas/schemas#array) or [map](/en/reference/schemas/schemas#map), values are written in the document store and update rates are hence lower - refer to [#10892 updates of array of map/struct](https://github.com/vespa-engine/vespa/issues/10892). <br/><br/> Query execution time can be improved by adding an in-memory B-tree posting list structure using [fast-search](/en/performance/feature-tuning#when-to-use-fast-search-for-attribute-fields). This increases work when updating, as both the value and the posting list is updated and hence decreases update throughput.<br/><br/> See [sizing-feeding](/en/performance/sizing-feeding/#attribute-store) for how to ensure an attribute is in memory on all nodes with a replica (searchable-copies or fast-access). | +| summary | An update to the [document store](/en/content/proton#document-store) is read the current version, modify and write back a new blob. Refer to [document summaries](/en/querying/document-summaries/).<br/><br/> Attribute fields that are also in summary get their values from the memory structures, not the document store. Use [summary class](/en/reference/schemas/schemas#document-summary) with attributes only for applications with high write/query rates using memory only. | + +## Further reading + +- [reads and writes](/en/writing/reads-and-writes) — functional overview of the Document API +- [sizing-feeding](/en/performance/sizing-feeding) — troubleshooting +- [attributes](/en/content/attributes) — to understand all aspects of attributes +- [proton](/en/content/proton) — full write pipeline +- [parent-child](/en/schemas/parent-child) — how to use parent attributes for even higher update rates diff --git a/mintlify-docs/en/writing/reads-and-writes.mdx b/mintlify-docs/en/writing/reads-and-writes.mdx new file mode 100644 index 0000000000..59880a8ff4 --- /dev/null +++ b/mintlify-docs/en/writing/reads-and-writes.mdx @@ -0,0 +1,63 @@ +--- +title: "Reads and writes" +description: "This guide covers the aspects of accessing [documents](/en/schemas/documents) in Vespa. Documents are stored in content clusters. Writes (PUT, UPDATE, DELETE) and reads (GET) pass through a container cluster. Find a more detailed flow at the end of this article." +--- + +<Frame>![Vespa Overview](/assets/img/vespa-overview.svg)</Frame> + +Highlights: + +- Vespa's indexing structures are built for high-rate field updates. Refer to the [feed sizing guide](/en/performance/sizing-feeding) for write performance, in particular [partial updates](/en/writing/partial-updates) for partial updates. +- Vespa supports [parent/child](/en/schemas/parent-child) for de-normalized data. This can be used to simplify the code to update application data, as one write will update all children documents. +- Applications can add custom feed [document processors](/en/applications/document-processors) and multiple container clusters — see [indexing](/en/writing/indexing) for details. +- Writes in Vespa are *consistent* in a stable cluster, but Vespa will prioritize availability over consistency when there is a conflict. See the [elasticity](/en/content/elasticity#consistency) documentation and the [Vespa consistency model](/en/content/consistency). It is recommended to use the same client instance for updating a given document when possible — for data consistency, but also [performance](/en/performance/sizing-feeding#concurrent-mutations) (see *concurrent mutations*). Read more on write operation [ordering](/en/content/content-nodes#ordering). For performance, group field updates to the same document into [one update operation](/en/performance/sizing-feeding#client-roundtrips). +- Applications can [auto-expire documents](/en/schemas/documents#document-expiry). This feature also blocks PUTs to documents that are already expired — see [indexing](/en/writing/document-routing#document-selection) and [document selection](/en/reference/applications/services/content#documents). This is a common problem when feeding test data with timestamps, and the writes a silently dropped. + +Also see [troubleshooting](/en/operations/self-managed/admin-procedures#troubleshooting). + +## Operations + +| Operation | Description | +| :--- | :--- | +| **Get** | Get a document by ID. | +| **Put** | Write a document by ID — a document is overwritten if a document with the same document ID exists. <br/><br/>Puts can have [conditions](/en/writing/document-v1-api-guide#conditional-writes) for test-and-set use cases. Conditions can be combined with [create if nonexistent](/en/writing/document-v1-api-guide#create-if-nonexistent), which causes the condition to be ignored if the document does not already exist. | +| **Remove** | Remove a document by ID. If the document to be removed is not found, it is not considered a failure. Read more about [data-retention](/en/operations/self-managed/admin-procedures#data-retention-vs-size). Also see [batch deletes](/en/writing/batch-delete).<br/><br/> Removes can have [conditions](/en/writing/document-v1-api-guide#conditional-writes) for test-and-set use cases.<br/><br/> A removed document is written as a tombstone, and later garbage collected — see [removed-db / prune / age](/en/reference/applications/services/content#removed-db-prune-age). Vespa does not retain, nor return, the document data of removed documents. | +| **Update** | Also referred to as [partial updates](/en/writing/partial-updates), as it updates one or more fields of a document by ID — the [document v1 API](/en/writing/document-v1-api-guide#put) can be used to perform [updates in the JSON Document format](/en/reference/schemas/document-json-format#update). If the document to update is not found, it is not considered a failure. Updates support [create if nonexistent](/en/writing/document-v1-api-guide#create-if-nonexistent) (upsert). <br/><br/>Updates can have [conditions](/en/writing/document-v1-api-guide#conditional-writes) for test-and-set use cases. <br/><br/>All data structures ([attribute](/en/content/attributes), [index](/en/content/proton#index) and [summary](/en/querying/document-summaries)) are updatable. Note that only *assign* and *remove* are idempotent — message re-sending can apply updates more than once. Use *conditional writes* for stronger consistency. <br/><br/>**All field types** <br/><br/>• [assign](/en/reference/schemas/document-json-format#assign) (may also be used to clear fields) <br/><br/> **Numeric field types** <br/><br/> • [increment](/en/reference/schemas/document-json-format#arithmetic). Also see [auto-generate weightedset keys](/en/reference/schemas/schemas#weightedset)<br/><br/> • [decrement](/en/reference/schemas/document-json-format#arithmetic),<br/><br/> • [multiply](/en/reference/schemas/document-json-format#arithmetic), <br/><br/>• [divide](/en/reference/schemas/document-json-format#arithmetic).<br/><br/> **Composite types**<br/><br/> • [add](/en/reference/schemas/document-json-format#add) For *array* and *weighted set*. To put into a *map*, see the [assign](/en/reference/schemas/document-json-format#assign) section, <br/><br/>• [remove](/en/reference/schemas/document-json-format#composite-remove),<br/><br/> • [match](/en/reference/schemas/document-json-format#match) Pick element from collection, then apply given operation to matched element,<br/><br/> • [accessing elements within a composite field using fieldpaths](/en/reference/schemas/document-json-format#fieldpath)<br/><br/> **Tensor types** <br/><br/>• [modify](/en/reference/schemas/document-json-format#tensor-modify) Modify individual cells in a tensor - can replace, add or multiply cell values,<br/><br/> • [add](/en/reference/schemas/document-json-format#tensor-add) Add cells to mapped or mixed tensors, <br/><br/>• [remove](/en/reference/schemas/document-json-format#tensor-remove) Remove cells from mapped or mixed tensors | + +## API and utilities + +Also see the [JSON Document format](/en/reference/schemas/document-json-format): + +| API / util | Description | +| :--- | :--- | +| [Vespa CLI](/en/clients/vespa-cli) | Command-line tool to `get`, `put`, `remove`, `update`, `feed`, `visit`. | +| [/document/v1/](/en/reference/api/document-v1) | API for `get`, `put`, `remove`, `update`, `visit`. | +| [Java Document API](/en/writing/document-api-guide) | Provides direct read-and write access to Vespa documents using Vespa's internal communication layer. Use this when accessing documents from Java components in Vespa such as [searchers](/en/applications/searchers) and [document processors](/en/applications/document-processors). See the [Document](https://github.com/vespa-engine/vespa/blob/master/document/src/main/java/com/yahoo/document/Document.java) class. | +| [pyvespa](https://vespa-engine.github.io/pyvespa/reads-writes) | Python client library for reading and writing documents to Vespa. Provides convenient methods for feeding, querying, and visiting documents. Expect less performance than Vespa CLI and vespa-feed-client for heavy batch feed operations. | + +Advanced / debugging tools: + +- [vespa-feed-client](/en/clients/vespa-feed-client): Java library and command line client for feeding document operations using [/document/v1/](/en/reference/api/document-v1). +- [vespa-feeder](/en/reference/operations/self-managed/tools#vespa-feeder) is a utility for feeding over the [Message Bus](/en/writing/document-routing). +- [vespa-get](/en/reference/operations/self-managed/tools#vespa-get) gets single documents over the [Message Bus](/en/writing/document-routing). +- [vespa-visit](/en/reference/operations/self-managed/tools#vespa-visit) gets multiple documents over the [Message Bus](/en/writing/document-routing). + +## Feed flow + +Use the [Vespa CLI](/en/clients/vespa-cli), [vespa-feed-client](/en/clients/vespa-feed-client), [pyvespa python client](https://vespa-engine.github.io/pyvespa/reads-writes) or [/document/v1/ API](/en/reference/api/document-v1) to read and write documents: + +<Frame>![Feed with feed client](/assets/img/elastic-feed-container.svg)</Frame> + +Alternatively, use [vespa-feeder](/en/reference/operations/self-managed/tools#vespa-feeder) to feed files or the [Java Document API](/en/writing/document-api-guide). + +<Frame>![Feed with vespafeeder](/assets/img/elastic-feed-vespafeeder.svg)</Frame> + +[Indexing](/en/writing/document-routing#routing-for-indexing) and/or [document processing](/en/applications/document-processors) is a chain of processors that manipulate documents before they are stored. Document processors can be user defined. When using indexed search, the final step in the chain prepares documents for indexing. + +The [Document API](/en/writing/document-api-guide) forwards requests to distributors on content nodes. For more information, read about [content nodes](/en/content/content-nodes) and the [search core](/en/content/proton). + +## Further reading + +- [Visiting](/en/writing/visiting) +- [/document/v1/ API guide](/en/writing/document-v1-api-guide) +- [/document/v1/ API reference](/en/reference/api/document-v1) diff --git a/mintlify-docs/en/writing/visiting.mdx b/mintlify-docs/en/writing/visiting.mdx new file mode 100644 index 0000000000..d2fa1f9b0c --- /dev/null +++ b/mintlify-docs/en/writing/visiting.mdx @@ -0,0 +1,312 @@ +--- +title: "Visiting" +--- + +Visiting is a feature to efficiently get or process a set of documents, identified by a [document selection expression](/en/reference/writing/document-selector-language). + +Visiting is often used to back up or migrate applications, [cloning applications and data](/en/operations/cloning) is a good guide for this. + +Use the [Vespa CLI](/en/clients/vespa-cli) to run visit — example, using the [quick start](/en/basics/deploy-an-application-local): + +```sh +$ vespa visit +``` + +```json +{ + "id": "id:mynamespace:music::love-is-here-to-stay", + "fields": { + "artist":"Diana Krall", + "year":2018, + "category_scores": { + "type": "tensor<float>(cat{})", + "cells": { + "pop": 0.4000000059604645, + "rock": 0.0, + "jazz": 0.800000011920929 + } + }, + "album": "Love Is Here To Stay" + } +} +``` + +Typically, the visit use cases are not time sensitive, like data reprocessing, and document dump for backup and cluster clone — [cloning applications and data](/en/operations/cloning) is a good read for more details. + +Visiting does not have snapshot isolation—it returns the state of documents as they were when iterated over. Iteration order is implementation-specific. See [the consistency documentation](/en/content/consistency#read-consistency) for more details. + +<Note> +**Note:** + +Due to the bucket iteration, visiting is normally a high-latency iteration. Even an empty content cluster is pre-partitioned into many data buckets to enable scaling. Dumping just one document requires iteration over all data buckets, unless *location*-specific selections are used. To test visiting performance, use larger data sets; do not extrapolate from small. Use the [query API](/en/querying/query-api) or [`vespa document get`](/en/clients/vespa-cli#documents) for low latency operations on small result sets. +</Note> + +See [request handling](#request-handling) for details on how visiting works. Also see the internal [vespa-visit](/en/reference/operations/self-managed/tools#vespa-visit) tool. For programmatic access, see the [visitor session API](/en/writing/document-api-guide#visitorsession). + +## Visiting from pyvespa + +The [pyvespa library](https://vespa-engine.github.io/pyvespa/reads-writes#visiting) provides a convenient Python interface for visiting documents. + +## Data export + +Export data to stdout: + +```sh +$ vespa visit +``` + +To export a subset, `--slices 100 --slice-id 0` exports 1% of the corpus by efficiently iterating over only 1/100th of the data space. For a given number of `--slices`, it's possible to visit the entire corpus (possibly in parallel) with non-overlapping output by visiting with all `--slice-id` values from (and including) 0 up to (and excluding) `--slices`: + +```sh +$ vespa visit --slices 100 --slice-id 0 +``` + +<Note> +**Note:** + +If the application has [global document types](/en/reference/applications/services/content#document), use `--bucket-space global` to visit these documents. By default, visiting only iterates over non-global documents. +</Note> + +### Performance note: feeding data exported via visiting + +Vespa uses hashing to distribute documents pseudo-randomly across many [buckets](/en/content/buckets). The operations in an incoming document stream are expected to be distributed evenly across both the set of all buckets and the content nodes storing them. This parallelizes the load and efficiently utilizes the hardware resources in the cluster. + +Visiting iterates over the buckets in the cluster, returning all documents stored in a bucket before moving on to the next bucket (often processing many buckets in parallel). As a consequence there's a direct correlation between the internal document-to-bucket distribution and the document output ordering of the visiting process. + +If the output of visiting is fed directly back into a content cluster, this correlation means that the stream of documents is no longer uniformly distributed across buckets and/or content nodes. Prior to Vespa 8.349 this is likely to greatly reduce feeding performance due to increased contention around backend bucket-level write locks and indexing threads. Vespa 8.349 and beyond contains optimizations that bring re-feeding performance much closer to that of initial feeding. + +A simple strategy to avoid this problem is to *shuffle* the visiting output prior to re-feeding it. This removes any correlation between feed operations and the underlying data distribution. + +## Selection + +To select (i.e. filter) which documents to visit, use a [document selection expression](/en/reference/writing/document-selector-language). See the [Vespa CLI cheat sheet](/en/clients/vespa-cli#cheat-sheet) for more examples. + + +```sh +$ vespa visit --selection "id = 'id:mynamespace:music::love-is-here-to-stay'" +$ vespa visit --selection "year = 2018" +``` + +## Fields + +To select which fields to output, use a [fieldset](/en/schemas/documents#fieldsets). See [examples](/en/clients/vespa-cli#documents) — common use cases are using a comma-separated list of fields or the *[document]* / *[all]* shorthand. + +```sh +$ vespa visit --field-set=[all] +$ vespa visit --field-set=music:id,year +``` + + +## Timestamp ranges + +Both the [Document V1 API](/en/reference/api/document-v1) and the [Vespa CLI](/en/clients/vespa-cli) have options for returning documents last modified within a particular timestamp range. Either—or both—of the *from* and *to* parts of the requested timestamp range can be specified: + +- For Document V1, specify the range using the `fromTimestamp` and `toTimestamp` HTTP request parameters (in microseconds). +- For `vespa visit`, specify the range using `--from` and `--to` (in seconds). + +Setting a timestamp range is only a *filter* on the document set that would otherwise be returned by a visitor without a timestamp range. It does *not* imply snapshot isolation. The returned set of documents may be affected by concurrent modifications to documents, as any modification updates the document timestamp. + +## Reprocessing + +Reprocessing is used to solve these use cases: + +- To change the document type used in ways that will not be backward compatible, define a new type, and reprocess to change all the existing documents. +- Document identifiers can be changed to a new scheme. +- Search documents can be reindexed after indexing steps have been changed. + +This example illustrates how one can identify a subset of the documents in a content cluster, reprocess these, and write them back. It is assumed that a Vespa cluster is set up, with data. + +### 1. Set up a document reprocessing cluster + +This example document processor: + +- deletes documents with an *artist* field whose value contains *Metallica* +- uppercases *title* field values of all other documents + +```js expandable +import com.yahoo.docproc.Arguments; +import com.yahoo.docproc.DocumentProcessor; +import com.yahoo.docproc.Processing; +import com.yahoo.docproc.documentstatus.DocumentStatus; +import com.yahoo.document.DocumentOperation; +import com.yahoo.document.DocumentPut; +import com.yahoo.document.Document; + +/** + * Example of using a document processor will modify and/or delete + * documents in the context of a reprocessing use case. + */ +public class ReProcessor extends DocumentProcessor { + private String deleteFieldName; + private String deleteRegex; + private String uppercaseFieldName; + + public ReProcessor() { + deleteFieldName = "artist"; + deleteRegex = ".*Metallica.*"; + uppercaseFieldName = "title"; + } + + public Progress process(Processing processing) { + Iterator<DocumentOperation> it = processing.getDocumentOperations().iterator(); + while (it.hasNext()) { + DocumentOperation op = it.next(); + if (op instanceof DocumentPut) { + Document doc = ((DocumentPut) op).getDocument(); + + // Delete the current document if it matches: + String deleteValue = (String) doc.getValue(deleteFieldName); + if (deleteValue != null) { + if (deleteValue.matches(deleteRegex)) { + it.remove(); + continue; + } + } + + // Uppercase the other field: + String uppercaseValue = doc.getValue(uppercaseFieldName).toString(); + if (uppercaseValue != null) { + doc.setValue(uppercaseFieldName, uppercaseValue.toUpperCase()); + } + } + } + return Progress.DONE; + } +} +``` + +To compile this processor, see the [Developer Guide](/en/applications/developer-guide). For more information on document processing, refer to [Document processor Development](/en/applications/document-processors). After having changed the Vespa setup, reload config: + +```bash +$ vespa deploy music +``` + +Restart nodes as well to activate. + +### 2. Select documents + +Define a selection criteria for the documents to be reprocessed. (To reprocess *all* documents, skip this). For this example, assume all documents where the field *year* is greater than 1995. The selection string *music.year > 1995* does this. + +### 3. Set route + +The visitor sends documents to a [Messagebus route](/en/writing/document-routing) - examples: + +- **default** - Documents are sent to the *default* route. +- **indexing** - Documents are sent to *indexing*. +- **`<clustername>/chain.<chainname>`**: Documents are sent to the document processing chain *chainname* running in cluster *clustername*. + +Assume you have a container cluster with id *reprocessing* containing a docproc chain with id *reprocessing-chain*. This example route sends documents from the content node, into the document reprocessing chain, and ultimately, into indexing: + +```bash +reprocessing/chain.reprocessing-chain indexing +``` + +Details: [Message Bus Routing Guide](/en/writing/document-routing). + +### 4. Reprocess + +Start reprocessing: + +```bash +$ [vespa-visit](/en/reference/operations/self-managed/tools.html#vespa-visit) -v --selection "music AND music.year > 1995" \ + --datahandler "reprocessing/chain.reprocessing-chain indexing" +``` + +The '-v' option emits progress information on standard error. + +## Analyzing field values + +Use *visit* to list ids of documents meeting criteria like empty fields - example, find unset or empty "name" field: + +```bash +$ vespa visit \ + --pretty-json \ + --cluster default \ + --selection 'restaurant AND (restaurant.name == "" OR restaurant.name == null)' \ + --field-set "[id]" +``` + +Note the first part of the selection string "restaurant AND ..." - this is to ensure that the selection restricts to the *restaurant* document type (i.e. schema). + +Also see count and list [fields with NaN](/en/querying/grouping#count-fields-with-nan). + +## Request handling + +In short, *visit* iterates over all, or a set of, [buckets](/en/content/buckets) and sends documents to (a set of) targets. The target is normally the visit client (like [vespa-visit](/en/reference/operations/self-managed/tools#vespa-visit)), but can be set a set of targets that act like sinks for the documents - see [vespa-visit-target](/en/reference/operations/self-managed/tools#vespa-visit-target). + +### Client + +If the selection criteria managed to map the visitor to a specific set of buckets, these will be used when sending distributor visit requests. If not, the visit client will iterate the entire bucket space, typically at the minimum split-level required to decide correct distributor. + +The distributors will receive the requests and look at what buckets it has that are contained by the requested bucket. If more than one, the distributor will only start a limited number of bucket visitors at the same time. Once it has processed the first ones, it will reply to the visitor client with the last bucket processed. + +As all buckets have a natural order, the client can use the returned bucket as a progress counter. Thus, after a distributor request has returned, the client knows one of the following: + +- All buckets contained within the bucket sent have been visited +- All buckets contained within the bucket sent, up to this specific bucket in the order, have been visited +- No buckets existed that was contained within the requested bucket + +The client can decide whether to visit in strict order, allowing only one bucket to be pending at a time, or whether to start visiting many buckets at a time, allowing great throughput. + +### Distributor + +The distributors receive visit requests from clients for a given bucket, which may map to none, one or many buckets within the distributor. It picks one or more of the first buckets in the order, picks out one content node copy of each and passes the request on to the content nodes. + +Once all the content node requests have been responded to, the distributor will reply to the client with the last bucket visited, to be used as a progress counter. + +Subsequent client requests to the distributor will have the progress counter set, letting the distributor ignore all the buckets prior to that point in the ordering. + +Bucket splitting and joining does not alter the ordering, and does thus not affect visiting much as long as the buckets are consistently split. If two buckets are joined, where the first one have already been visited, a visit request has to be sent to the joined bucket. The content layer use the progress counter to avoid revisiting documents already processed in the bucket. + +If the distributor only starts one bucket visitor at a time, it can ensure the visitor order is kept. Starting multiple buckets at a time may improve throughput and decrease latency, but progress tracking will be less fine-grained, so a bit more documents may need to be revisited when continued after a failure. + +### Content node + +The content node receives visit requests for single buckets for which they store documents. It may receive many in parallel, but their execution is independent of each other. + +The visitor layer in the content node picks up the visit requests. There it is assigned a visitor thread, and an instance of the processor type is created for that visitor. The processor will set up an iterator in the backend and send one or more requests for documents to the backend. + +The document selection specified in the visitor client is sent through to the backend, allowing it to filter out unwanted data at the lowest level possible. + +Once documents are retrieved from the backend, back up to the visitor layer, the visit processor will process the data. + +The default is one iterator request is pending to the backend at any time. By sending many small iterator requests, having several pending at a time, the processing may occur in parallel with the document fetching. + +## Troubleshooting + +### Not exporting all documents + +Normally all documents share the same [bucket space](/en/content/buckets#bucket-space)—documents for multiple schemas are co-located. When using [parent/child](/en/schemas/parent-child), global documents are stored in a separate bucket space. Use the [bucket-space](/en/reference/api/document-v1#bucketspace) parameter to visit the `default` or `global` space. This is a common problem when dumping all documents and dumped count is not the expected count. + +### Visiting performance is poor + +When visiting, all content nodes may push data to the visitor *client* in parallel. Therefore, the client is often the bottleneck. Slow data processing implicitly slows down the entire visiting process. In particular, large floating point tensor fields are very expensive to render as JSON. + +To verify if client-side rendering is the bottleneck, run [vespa-visit](/en/reference/operations/self-managed/tools#vespa-visit) with your usual selection criteria and field set, but add the `--nullrender` argument (available from Vespa 8.134). This receives and processes documents as usual, but completely skips rendering. + +If you are redirecting the output of visiting to any custom business logic (such as running `jq` on the stream of documents), make sure you are not accidentally buffering up data internally—this goes for both input and output. To verify if processing the visitor output is the bottleneck, run visiting with `stdout` redirected to `/dev/null` and compare the time taken. + +If the client is not the bottleneck, it is possible visiting performance is limited by disk performance. Non-attribute fields are not memory backed and must be fetched from disk when evaluating selections. This includes document IDs, which must always be returned for matching documents. To see if any fields are particularly expensive to fetch or return, run visiting with a selection and/or field set that does *not* include potentially expensive fields. + +### Visitor operations are hanging + +A visit operation might stall/hang if the content cluster is in an inconsistent state—replicas are still merging between nodes. + +### Handshake failed + +Running vespa visit via the [/document/v1](/en/reference/api/document-v1) API: + +```bash +[HANDSHAKE_FAILED @ localhost]: An error occurred while resolving version of recipient(s) +[tcp/container0:37227/visitor-1-1682523227698 at tcp/container0:37227] from host 'content0' +``` + +The visit client in this case is the Vespa Container node with the /document/v1 interface. A visit is a relatively long-lived operation - the client starts a visitor operation on each Content node, that connects back to the client (here `tcp/container0:37227`) to send data. This might sound a bit odd - why connect back? + +The idea is that results of the visitor operation might be pushed to multiple destinations for increased throughput - see [request handling](#request-handling). This explains why it connects back on a random port, and why one cannot see the port in [vespa-model-inspect](/en/reference/operations/self-managed/tools#vespa-model-inspect) - it is temporary. + +This also means, Vespa-nodes must be able to intercommunicate on all ports, see [Docker containers](/en/operations/self-managed/docker-containers). + +Check [multinode-HA](https://github.com/vespa-engine/sample-apps/tree/master/examples/operations/multinode-HA) for an example - a Docker network is used for all containers - also see "network" in [docker-compose.yaml](https://github.com/vespa-engine/sample-apps/blob/master/examples/operations/multinode-HA/docker-compose.yaml). + +Another source of this error can be an unresponsive container instance, e.g. during overload. \ No newline at end of file diff --git a/mintlify-docs/favicon.png b/mintlify-docs/favicon.png new file mode 100644 index 0000000000..27cb33ef16 Binary files /dev/null and b/mintlify-docs/favicon.png differ diff --git a/mintlify-docs/index.mdx b/mintlify-docs/index.mdx new file mode 100644 index 0000000000..b0d3eea17b --- /dev/null +++ b/mintlify-docs/index.mdx @@ -0,0 +1,294 @@ +--- +title: "Vespa Documentation" +description: "Technical documentation for Vespa — the open-source big data serving engine for real-time search, recommendation, and AI applications" +mode: custom +--- + +{/* Hero Section */} + +<div className="relative overflow-hidden"> + <div className="max-w-6xl mx-auto px-6 py-16 md:py-24"> + <div className="flex flex-col md:flex-row items-center gap-8 md:gap-12"> + {/* Left content - text aligned left */} + <div className="flex-1 text-left"> + <h1 className="text-4xl md:text-6xl font-bold tracking-tight mb-6 text-gray-900 dark:text-white"> + Get Started + <br /> + <span className="text-[#61D790]">With Vespa</span> + </h1> + <p className="text-lg md:text-xl text-gray-600 dark:text-gray-400 max-w-xl mb-10"> + Learn how to build, deploy, and operate applications on Vespa — the + open-source serving engine for search, recommendations, and AI at any + scale. + </p> + <div className="flex flex-col sm:flex-row gap-4 items-start sm:items-center"> + <a + href="/en/basics/deploy-an-application" + className="inline-flex items-center justify-center px-8 py-3 bg-[#61D790] text-black text-lg font-medium rounded-full hover:bg-[#4fc77a] transition-colors" + > + Quickstart Guide + </a> + <a + href="/en/learn/overview" + className="inline-flex items-center justify-center px-8 py-3 border border-gray-900 dark:border-gray-100 text-gray-900 dark:text-gray-100 text-lg font-medium rounded-full hover:bg-gray-100 dark:hover:bg-gray-800 transition-colors" + > + Platform Overview + </a> + </div> + </div> + {/* Right content - illustration */} + <div className="flex-1 flex justify-center md:justify-end"> + <img + src="/logo/hero-light.svg" + alt="Vespa workspace illustration" + noZoom + className="w-full max-w-md md:max-w-lg h-auto dark:hidden" + /> + <img + src="/logo/hero-dark.svg" + alt="Vespa workspace illustration" + noZoom + className="w-full max-w-md md:max-w-lg h-auto hidden dark:block" + /> + </div> + </div> + </div> +</div> + +{/* Getting Started - Arrow Links */} + +<div className="max-w-6xl mx-auto px-6 py-12"> + <h2 className="text-2xl font-bold mb-2 text-gray-900 dark:text-white">Getting Started</h2> + <p className="text-gray-600 dark:text-gray-400 mb-8"> + Choose your deployment method and start building + </p> + + <CardGroup cols={3}> + <Card title="Deploy to Vespa Cloud" icon="cloud-arrow-up" href="/en/basics/deploy-an-application"> + Create an application package, configure services.xml, and deploy to managed infrastructure + </Card> + <Card title="Python Client (pyvespa)" icon="code" href="https://vespa-engine.github.io/pyvespa/getting-started-pyvespa-cloud.html"> + Define schemas, deploy applications, and query Vespa programmatically with Python + </Card> + <Card title="Self-Hosted (Docker)" icon="server" href="/en/basics/deploy-an-application-local.html"> + Run Vespa locally or on your own infrastructure using Docker containers + </Card> + </CardGroup> +</div> + +{/* Use Cases - Feature Grid */} + +<div className="max-w-6xl mx-auto px-6 py-12"> + <h2 className="text-2xl font-bold mb-2 text-gray-900 dark:text-white">Common Use Cases</h2> + <p className="text-gray-600 dark:text-gray-400 mb-8"> + Reference architectures and implementation guides for typical applications + </p> + + <CardGroup cols={2}> + <Card title="Search Applications" icon="magnifying-glass" href="/en/learn/tutorials"> + Implement text search with BM25, vector search with HNSW, or hybrid approaches combining both methods. + </Card> + <Card title="Retrieval-Augmented Generation" icon="sparkles" href="/en/rag/rag"> + Configure document chunking, embedding generation, retrieval pipelines, and LLM integration for RAG. + </Card> + <Card title="Recommendation Systems" icon="heart" href="/en/learn/tutorials"> + Build content-based and collaborative filtering systems with real-time user signals and ML ranking. + </Card> + <Card title="E-commerce Search" icon="shopping-cart" href="/en/learn/tutorials/e-commerce"> + Configure product schemas, faceted search, inventory filtering, and personalized result ranking. + </Card> + </CardGroup> +</div> + +{/* Core Concepts - Multi-column Link List */} + +<div className="max-w-6xl mx-auto px-6 py-12"> + <h2 className="text-2xl font-bold mb-2 text-gray-900 dark:text-white">Core Concepts</h2> + <p className="text-gray-600 dark:text-gray-400 mb-8"> + Understand the fundamental components of Vespa + </p> + + <div className="grid grid-cols-1 md:grid-cols-3 gap-8"> + <div> + <h3 className="flex items-center gap-2 font-semibold pb-3 mb-4 border-b border-gray-200 dark:border-gray-800 text-gray-900 dark:text-white"> + <Icon icon="book-open" size={18} className="text-gray-400" /> + Vespa Basics + </h3> + <ul className="space-y-2"> + <li><a href="/en/basics/applications" className="group flex items-center justify-between text-gray-600 dark:text-gray-400 hover:text-[#61D790] transition-colors">Applications<span className="opacity-0 group-hover:opacity-100 transition-opacity">→</span></a></li> + <li><a href="/en/basics/schemas" className="group flex items-center justify-between text-gray-600 dark:text-gray-400 hover:text-[#61D790] transition-colors">Schemas<span className="opacity-0 group-hover:opacity-100 transition-opacity">→</span></a></li> + <li><a href="/en/basics/writing" className="group flex items-center justify-between text-gray-600 dark:text-gray-400 hover:text-[#61D790] transition-colors">Writing Documents<span className="opacity-0 group-hover:opacity-100 transition-opacity">→</span></a></li> + <li><a href="/en/basics/deploy-an-application" className="group flex items-center justify-between text-gray-600 dark:text-gray-400 hover:text-[#61D790] transition-colors">Deployment<span className="opacity-0 group-hover:opacity-100 transition-opacity">→</span></a></li> + <li><a href="/en/learn/overview" className="group flex items-center justify-between text-gray-600 dark:text-gray-400 hover:text-[#61D790] transition-colors">Platform Overview<span className="opacity-0 group-hover:opacity-100 transition-opacity">→</span></a></li> + </ul> + </div> + + <div> + <h3 className="flex items-center gap-2 font-semibold pb-3 mb-4 border-b border-gray-200 dark:border-gray-800 text-gray-900 dark:text-white"> + <Icon icon="magnifying-glass" size={18} className="text-gray-400" /> + Querying & Search + </h3> + <ul className="space-y-2"> + <li><a href="/en/querying/query-api" className="group flex items-center justify-between text-gray-600 dark:text-gray-400 hover:text-[#61D790] transition-colors">Query API<span className="opacity-0 group-hover:opacity-100 transition-opacity">→</span></a></li> + <li><a href="/en/querying/query-language" className="group flex items-center justify-between text-gray-600 dark:text-gray-400 hover:text-[#61D790] transition-colors">YQL Query Language<span className="opacity-0 group-hover:opacity-100 transition-opacity">→</span></a></li> + <li><a href="/en/querying/nearest-neighbor-search" className="group flex items-center justify-between text-gray-600 dark:text-gray-400 hover:text-[#61D790] transition-colors">Nearest Neighbor Search<span className="opacity-0 group-hover:opacity-100 transition-opacity">→</span></a></li> + <li><a href="/en/querying/grouping" className="group flex items-center justify-between text-gray-600 dark:text-gray-400 hover:text-[#61D790] transition-colors">Grouping & Aggregation<span className="opacity-0 group-hover:opacity-100 transition-opacity">→</span></a></li> + <li><a href="/en/querying/text-matching" className="group flex items-center justify-between text-gray-600 dark:text-gray-400 hover:text-[#61D790] transition-colors">Text Matching<span className="opacity-0 group-hover:opacity-100 transition-opacity">→</span></a></li> + </ul> + </div> + + <div> + <h3 className="flex items-center gap-2 font-semibold pb-3 mb-4 border-b border-gray-200 dark:border-gray-800 text-gray-900 dark:text-white"> + <Icon icon="brain" size={18} className="text-gray-400" /> + Ranking & ML + </h3> + <ul className="space-y-2"> + <li><a href="/en/ranking/ranking-intro" className="group flex items-center justify-between text-gray-600 dark:text-gray-400 hover:text-[#61D790] transition-colors">Ranking Introduction<span className="opacity-0 group-hover:opacity-100 transition-opacity">→</span></a></li> + <li><a href="/en/ranking/onnx" className="group flex items-center justify-between text-gray-600 dark:text-gray-400 hover:text-[#61D790] transition-colors">ONNX Models<span className="opacity-0 group-hover:opacity-100 transition-opacity">→</span></a></li> + <li><a href="/en/rag/embedding" className="group flex items-center justify-between text-gray-600 dark:text-gray-400 hover:text-[#61D790] transition-colors">Embeddings<span className="opacity-0 group-hover:opacity-100 transition-opacity">→</span></a></li> + <li><a href="/en/ranking/phased-ranking" className="group flex items-center justify-between text-gray-600 dark:text-gray-400 hover:text-[#61D790] transition-colors">Phased Ranking<span className="opacity-0 group-hover:opacity-100 transition-opacity">→</span></a></li> + <li><a href="/en/ranking/tensor-user-guide" className="group flex items-center justify-between text-gray-600 dark:text-gray-400 hover:text-[#61D790] transition-colors">Tensor Guide<span className="opacity-0 group-hover:opacity-100 transition-opacity">→</span></a></li> + </ul> + </div> + + </div> +</div> + +{/* Tutorials - Horizontal Cards */} + +<div className="max-w-6xl mx-auto px-6 py-12"> + <h2 className="text-2xl font-bold mb-2 text-gray-900 dark:text-white">Tutorials</h2> + <p className="text-gray-600 dark:text-gray-400 mb-8"> + Step-by-step walkthroughs for common implementation patterns + </p> + + <CardGroup cols={2}> + <Card title="Text Search" icon="file" href="/en/learn/tutorials/text-search"> + Configure schemas, indexing, and BM25 ranking for full-text search + </Card> + <Card title="Vector Search" icon="magnifying-glass" href="/en/querying/nearest-neighbor-search-guide"> + Set up HNSW indexes, configure distance metrics, and query embeddings + </Card> + <Card title="Hybrid Search" icon="bolt" href="/en/learn/tutorials/hybrid-search"> + Combine lexical and semantic search with reciprocal rank fusion + </Card> + <Card title="RAG Implementation" icon="sparkles" href="/en/learn/tutorials/rag-blueprint"> + End-to-end RAG setup with chunking, retrieval, and LLM integration + </Card> + </CardGroup> + + <div className="mt-6"> + <Card title="News Search & Recommendations (Multi-part Series)" icon="book-open" href="/en/learn/tutorials/news-1-deploy-an-application"> + Complete application covering deployment, feeding, querying, ranking, and recommendation algorithms + </Card> + </div> +</div> + +{/* Developer Tools - Badge Cards */} + +<div className="max-w-6xl mx-auto px-6 py-12"> + <h2 className="text-2xl font-bold mb-2 text-gray-900 dark:text-white">Developer Tools & SDKs</h2> + <p className="text-gray-600 dark:text-gray-400 mb-8"> + Client libraries and command-line tools for interacting with Vespa + </p> + + <div className="grid grid-cols-1 md:grid-cols-2 gap-6"> + <a + href="/en/clients/vespa-cli" + className="group flex items-center justify-between p-4 rounded-lg border border-gray-200 dark:border-gray-800 bg-white dark:bg-gray-900 hover:border-[#61D790] transition-colors" + > + <div className="flex items-center gap-3"> + <Icon icon="terminal" size={20} className="text-gray-900 dark:text-white" /> + <span className="font-medium">Vespa CLI</span> + </div> + <span className="flex items-center gap-2 text-sm text-[#61D790] opacity-0 group-hover:opacity-100 transition-opacity"> + Get started + <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"> + <path d="M5 12h14"/> + <path d="m12 5 7 7-7 7"/> + </svg> + </span> + </a> + + <a + href="https://vespa-engine.github.io/pyvespa/" + className="group flex items-center justify-between p-4 rounded-lg border border-gray-200 dark:border-gray-800 bg-white dark:bg-gray-900 hover:border-[#61D790] transition-colors" + > + <div className="flex items-center gap-3"> + <Icon icon="python" size={20} className="text-gray-900 dark:text-white" /> + <span className="font-medium">pyvespa</span> + </div> + <span className="flex items-center gap-2 text-sm text-[#61D790] opacity-0 group-hover:opacity-100 transition-opacity"> + Get started + <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"> + <path d="M5 12h14"/> + <path d="m12 5 7 7-7 7"/> + </svg> + </span> + </a> + + <a + href="/en/clients/vespa-feed-client.html" + className="group flex items-center justify-between p-4 rounded-lg border border-gray-200 dark:border-gray-800 bg-white dark:bg-gray-900 hover:border-[#61D790] transition-colors" + > + <div className="flex items-center gap-3"> + <Icon icon="mug-hot" size={20} className="text-gray-900 dark:text-white" /> + <span className="font-medium">Java Feed Client</span> + </div> + <span className="flex items-center gap-2 text-sm text-[#61D790] opacity-0 group-hover:opacity-100 transition-opacity"> + Get started + <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"> + <path d="M5 12h14"/> + <path d="m12 5 7 7-7 7"/> + </svg> + </span> + </a> + + <a + href="https://github.com/vespa-engine/sample-apps" + className="group flex items-center justify-between p-4 rounded-lg border border-gray-200 dark:border-gray-800 bg-white dark:bg-gray-900 hover:border-[#61D790] transition-colors" + > + <div className="flex items-center gap-3"> + <Icon icon="folder-open" size={20} className="text-gray-900 dark:text-white" /> + <span className="font-medium">Sample Applications</span> + </div> + <span className="flex items-center gap-2 text-sm text-[#61D790] opacity-0 group-hover:opacity-100 transition-opacity"> + Browse + <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round"> + <path d="M5 12h14"/> + <path d="m12 5 7 7-7 7"/> + </svg> + </span> + </a> + + </div> +</div> + +{/* CTA Banner */} + +<div className="max-w-6xl mx-auto px-6 py-12 pb-20"> + <div className="flex flex-col md:flex-row md:items-center md:justify-between gap-6 p-8 rounded-lg border border-gray-200 dark:border-gray-800 bg-white dark:bg-gray-900"> + <div> + <h2 className="text-xl font-bold mb-1 text-gray-900 dark:text-white"> + Need help? + </h2> + <p className="text-gray-600 dark:text-gray-400"> + Check the FAQ, ask questions on Stack Overflow, or join the community. + </p> + </div> + <div className="flex flex-wrap gap-3"> + <a + href="/en/learn/faq" + className="inline-flex items-center justify-center px-5 py-2.5 bg-[#61D790] text-black font-medium rounded-full hover:opacity-90 transition-opacity" + > + FAQ + </a> + <a + href="https://stackoverflow.com/questions/tagged/vespa" + className="inline-flex items-center justify-center px-5 py-2.5 border border-gray-900 dark:border-gray-100 text-gray-900 dark:text-gray-100 font-medium rounded-full hover:bg-gray-100 dark:hover:bg-gray-800 transition-colors" + > + Stack Overflow + </a> + </div> + </div> +</div> diff --git a/mintlify-docs/ja/features.mdx b/mintlify-docs/ja/features.mdx new file mode 100644 index 0000000000..8800ed4774 --- /dev/null +++ b/mintlify-docs/ja/features.mdx @@ -0,0 +1,102 @@ +--- +title: "Vespaの機能" +--- + +## Vespaとは何か? + +Vespaはリアルタイムで巨大なデータセットに対して計算を実行・配信するためのエンジンです。 +どのような量のデータでも書き込んで保存することができ、典型的には数十ミリ秒で完了するデータに対する膨大なクエリーを実行することができます。 + + +データを選択するために、クエリーには構造化されたフィルタと構造化されていないテキスト検索の両方を指定することができます。 +検索の関連度、レコメンデーション、ターゲティングやパーソナライズといった用途を実現するため、マッチしたすべてのデータはランキング関数(典型的には機械学習による)に従ってランキングされます。 + + +マッチしたすべてのドキュメントはグループやサブグループに分けることもでき、このときデータはグラフ、タグクラウド、ナビゲーション・ツール、結果の多様性といった機能を実現するため各グループに集約されます。 + + +クエリー、検索結果、書き込みを処理するアプリケーション固有の動作は、Javaコンポーネントをアプリケーション・パッケージに含めることで追加することができます。 + +Vespaはリアルタイムです。データの断片とコアに対してクエリーを同時実行することで、さらなるクエリー量には同じデータのたくさんのコピー(グループ)に対してクエリーを同時実行することで、どのような量のデータでも一定の応答時間を維持するよう設計されています。数十ミリ秒以内にレスポンスを返却できるよう最適化されています。データの書き込みは数ミリ秒で参照できるようになり、各ノードで秒あたり数千から数万のレートで扱うことができます。 + + +Vespaを簡単にセットアップして運用できるようにするため多くの作業が費やされました。 +どのようなVespaアプリケーション(単一ノードのシステムから複数のデータセンターにまたがる数百ノードを稼働させるシステムまで)も<i>アプリケーション・パッケージ</i>と呼ばれる単一のアーティファクトで完全に設定されます。ノードやプロセス、コンポーネントの低レベルな設定はアプリケーション・パッケージで指定された特性をもとに行われます。 + + +Vespaはスケーラブルです。数百億のドキュメントを扱う数百ノードにもおよぶシステムは珍しいものではなく、単一ノードのシステムをセットアップして変更するのと大変さは変わりません。 + +すべてのシステム・コンポーネントは蓄積されたデータと同様に冗長性と自己修正のメカニズムを備えているため、ハードウェア故障は運用上の緊急事態ではなく、都合のよい時にキャパシティを再度追加することで対処できます。 + + +Vespaは自己修復のメカニズムを備えておりダイナミックです。マシンが失われたり新しいものが追加されると、データの配信と書き込みは継続しながら、各マシンに自動的に再配布されます。 + +変更されたアプリケーション・パッケージをデプロイすることで、配信を継続しながら設定情報とJavaコンポーネントを変更することが可能です - ダウンタイムはなく、再起動も必要ありません。 + + + +## 機能 + +このセクションではVespaの主な機能の概要を説明します。 +ドキュメンテーションの残りで詳しく説明します。 + +<h3>データと書き込み</h3> + + + - Vespaのドキュメントは追加、置換、変更(1つのフィールドあるいは任意のサブセット)、削除することができます。 + - 書き込みは恒久的な状態になり、さらに(デフォルトでは)クエリーで参照できるようになるとクライアントに受信したことが通知されます。 + - 書き込みリクエストはクエリーに対する配信を継続しながら、毎ノード毎秒あたり数千から数万のボリュームを維持して発行することができます。 + - データは設定可能な冗長レベルでレプリケーションされます。 + - ノードが追加、削除あるいは意図せず失われると、設定された冗長レベルのデータの均等分散が維持されます。 + - データの破損は、破損していないデータのレプリカから自動的に修復されます。 + - データはシンプルなHTTP APIか、(大容量向けに)小さなスタンドアロンのJavaクライアントで書き込むことができます。 + - 一般的なプリミティブ型やコレクション、構造体、テンソルをドキュメントのデータ・スキーマのフィールドに指定することができます。 + - 同時にいくつものデータ・スキーマを使うことができます。 + - ドキュメントは相互に参照することができ、参照されているドキュメントのフィールドはパフォーマンスのペナルティなしでクエリーで指定することができます。 + - カスタムのJavaコンポーネントを追加することで、書き込み処理をプロセスすることができます。 + - データはバッチ再処理用にシステムからストリーミングすることができます。 + +#### クエリー + + - クエリーは構造化されたフィルターと構造化されていない検索演算子の任意の組み合わせを含むことができます。 + - クエリーは大きなテンソルとベクトルを(例えばユーザーを表現するために)含むことができます。 + - クエリーで検索結果がどのようにランキングされて、またオーガナイズされるべきかを指定します(以下のセクションを参照してください)。 + - カスタムのJavaコンポーネントを追加することで、クエリーと検索結果をプロセスすることができます - カスタムのリクエスト・ハンドラで任意のHTTPリクエストをクエリーに変換することもできます。 + - クエリーの応答時間は典型的には数十ミリ秒以内で、ハードウェアを追加することで負荷やデータサイズに対応できます。 + - 事前に設定されたドキュメントのグループ(例えば、ユーザーのドキュメント)に対してのみ<i>ストリーミング検索</i>モードを利用することができます。このモードでは各ノードで、短い応答時間を維持しながら数十億のドキュメントを保持して配信することができます。 + + +#### ランキング + + - すべての結果は設定されたランキング関数でランキングされます。ランキング関数はクエリーで指定します。 + - ランキング関数にはスカラーまたはテンソル(多次元配列)の任意の数学関数を指定することができます。 + - スカラー関数にはビジネスロジックや決定木を表現するための "if" 関数が含まれます。 + - テンソル関数には深層ニューラルネットワークのようなもっとも進化した機械学習ランキング関数の表現が可能な原始関数と合成関数の強力なセットが含まれます。 + - 期待の持てる候補のランキングにより多くのCPUを割り当てられるように、複数フェーズのランキングがサポートされています。 + - ドキュメントでの位置情報を用いたテキストのランキング特徴量の強力なセットがすぐに使えます。 + - その他にも2次元の距離や鮮度といったランキング特徴量があります。 + +### 結果のオーガナイズとプレゼンテーション + + - クエリーでの指定にしたがって、クエリーに対してマッチしたドキュメントをグルーピングしたり、集約したりすることができます。 + - 同時実行される複数台のマシンにまたがる場合であっても、すべてのマッチしたドキュメントが含まれます。 + - マッチしたドキュメントはユニークな値や数値的なバケットでグルーピングすることができます。 + - 任意のレベルのグループとサブグループがサポートされており、複数の並列グルーピングを1つのクエリーで指定することができます。 + - データは集約することができ(カウントする、平均をとるなど)、また各グループやサブグループ内で選択することができます。 + - ドキュメントからのいかなるデータ選択もクライアントに返却される最終的な検索結果に含めることができます。 + - マッチしたフィールドにおける検索エンジン・スタイルのキーワードのハイライトがサポートされています。 + +## 設定と運用 + + - VespaはRPMまたはDockerイメージとしてインストールすることができます。それは個人のラップトップでも、所有しているデータセンターでも、AWSでも可能です。 + - Vespaのアプリケーションは独立した構築可能なアーティファクトで完全に記述されます: それは<i>アプリケーション・パッケージ</i>で、個々のマシンやプロセスを個別に設定する必要はありません。 + - システムは任意の数のノードを含む各タイプ(ステートレスとステートフル)からなる複数のクラスタで構成することができます。 + - どのようなサイズのシステムもアプリケーション・パッケージの2つの短い設定ファイルで記述することができます。 + - ドキュメント・スキーマ、Javaコンポーネント、ランキング関数/モデルもアプリケーションパッケージで設定されます。 + - アプリケーションによって意図されたシステムを実現するため、アプリケーション・パッケージが単一のユニットとしてVespaにデプロイされます。 + - アプリケーションのたいていの変更(Javaコンポーネントの変更を含む)は変更されたアプリケーション・パッケージをデプロイすることで適用することができます。システムは配信と書き込みを維持しながら変更処理を管理します。 + - たいていのドキュメント・スキーマの変更(フィールド型の変更を除く)はシステムが稼働した状態で適用することができます。 + - アプリケーション・パッケージの変更は稼働中のシステムに対する破壊的な変更を防ぐため、デプロイ時にバリデーションされます。 + - Vespaには単一障害点がなく、自動でフェイルしたノードを迂回します。 + - システムのログはリアルタイムで中央サーバーに収集されます。 + - すべてのノードから、サードパーティーのメトリクス/アラートシステムに選択されたメトリクスを送信することができます。 diff --git a/mintlify-docs/ja/introduction-to-documentation.mdx b/mintlify-docs/ja/introduction-to-documentation.mdx new file mode 100644 index 0000000000..212cbda2ba --- /dev/null +++ b/mintlify-docs/ja/introduction-to-documentation.mdx @@ -0,0 +1,37 @@ +--- +title: "Vespaドキュメンテーションの紹介" +--- + +このドキュメンテーションはVespaの利用者、あるいは潜在的な利用者(アプリケーションの所有者 / PM、エンジニアまたは運用者)のためのものです。 +このドキュメンテーションはプロダクトの概念上の概要をカバーしており、それには背景的な理論や、プロダクトがなぜそのように開発されたかの説明が含まれています。 +これは開発者が最初に使う機能の詳しい説明に続きます。 + +Vespaプラグインの開発については、このドキュメンテーションは経験豊かなJava開発者を対象としています。 +初心者は対象としておらず、一般的なプログラミング技術やプログラミング言語の基礎はカバーしていません。 + +ドキュメンテーションの一部について読者はUnixライクなプラットフォームに精通しているべきで、これはVespaがLinux上で利用可能であるためです。 +VespaのAPIは、Vespaの内部動作に関する深い知識がなくても使い始められるようになっています。 +読者はエキスパートである必要は決してありませんが、Vespaの基本的性質をきちんと理解することで、テキストとサンプルをより簡単に理解することができるでしょう。 + +もし誤りや綴りの間違い、コードの欠陥を発見したりドキュメンテーションを改善したい場合は、pull requestを送るか [issueを作成](https://github.com/vespa-engine/vespa/issues)してください。 + +**イタリック文字** は以下の用途に用いられます: + +- パス名、ファイル名、プログラム名、ホスト名、URL +- 新しい用語が定義されている箇所 + +`等幅文字` は以下の用途に用いられます: + +- プログラミング言語の要素、コードの例、キーワード、関数、クラス、インタフェース、メソッド、その他 +- コマンドとコマンドラインの出力 + +注釈とその他の重要な情報は以下のように示されます: + +<Note> +**Note: ** 注意してほしい情報 + +コマンドラインで実行するコマンドは、以下のようにプロンプトの $ で始まり示されます: +```bash +$ export PATH=$VESPA_HOME/bin:$PATH +``` +</Note> \ No newline at end of file diff --git a/mintlify-docs/ja/overview.mdx b/mintlify-docs/ja/overview.mdx new file mode 100644 index 0000000000..80982c1f86 --- /dev/null +++ b/mintlify-docs/ja/overview.mdx @@ -0,0 +1,92 @@ +--- +title: "Vespaの概要" +--- + +Vespaはスケーラブルで低レイテンシな、ステートフルあるいはステートレスなバックエンドサービスを簡単に開発して稼働させることができるプラットフォームです。 +このドキュメントではプラットフォームの機能と主なコンポーネントの概要を説明します。 + + + +## イントロダクション + +Vespaを使うことで、レイテンシや信頼性を犠牲にせずに、大規模データや高負荷に耐えるバックエンドあるいはミドルウェアシステムを構築することができます。 +Vespaのインスタンスはいくつかの<em>ステートレスなJavaコンテナー・クラスター</em>と、データを保持する0個以上の<em>コンテント</em>・クラスターで構成されます。 + +<Frame> +![Vespa Overview](/assets/img/vespa-overview.svg) +</Frame> +[ステートレスな **コンテナー**・クラスター](/en/applications/containers)は、入力データと、リクエスト/クエリーとそのレスポンスの両方を処理するコンポーネントをホスティングします。 +これらのコンポーネントは(インデックス構築やクエリー実行の全ステージといった)プラットフォームに関する機能を提供するだけでなく、アプリケーションのミドルウェアのロジックも提供します。 + +アプリケーション開発者は、全ての機能を満たす単一のステートレスなクラスターとしてVespaシステムを設定することもできますし、タスクの種類に合わせて異なる複数のクラスターを設定することもできます。 + +そしてコンテナー・クラスターは +クエリーやデータ操作の命令を適切なコンテント・クラスターにパスします +— +アプリケーションが保持しないデータである場合には、そのデータを供給する外部サービスと連携することが可能です。 + +Vespaクラスターの [**コンテント**・クラスター](/en/content/elasticity)は +データ(ドキュメント)を保持して、それらに対する参照、分散された選択/グルーピング/集約のクエリー処理に責任を持ちます。 +コンテント・クラスターはシンプルなkey-valueの配信システムとして機能させることもできますし、構造化・非構造化データに対する複雑な検索を実行したり、関連度のモデルに従って並べ替えて検索結果をグルーピングしたり集約処理を実行したりもできます。 +これらの操作が低レイテンシで機能するよう細心の注意が払われています。それは、結果データを事前に計算することなく、大規模なデータセットに対してエンドユーザー・アプリケーションが直接使えるようにするためです。 + +スケーラビリティを提供するために、コンテント・クラスターは設定された冗長性のレベルを維持するためにバックグラウンドで自動的にデータを再バランスします。到達できないノードに対するフェイル・オーバーも行なうため、柔軟であり、自動リカバリー機能を備えているといえるのです。 + +コンテナー・クラスターでの中間処理の後、データはコンテント・クラスターに書き込まれます。 +書き込みは数ミリ秒の後に有効になり、与えられた時間内に成功するか失敗に関する情報を提供するかが保証されており、利用可能なリソースに合わせてスケールされます。 +書き込みはHTTPで直接送信することもできますし、Javaクライアントを使うこともできます — +[APIドキュメンテーション](/en/reference/api/api)を参照してください。 + +Vespaに蓄積されるドキュメントのインスタンスは設定された[スキーマ](/en/basics/schemas)を持たなければなりません。 +システムにおける各コンテント・クラスターは同時に複数の型のドキュメントを扱うことができます; +アプリケーションは異なる型のデータを異なるコンテント・クラスターに割り当てることもできますし、同じコンテント・クラスターに複数のデータ型を割り当てることもできます。 + +コンテナー・クラスターとコンテント・クラスターはVespaのすべてのエンドユーザーのトラフィックを処理しますが、3番目のタイプのクラスタがあります。それは<a href="/en/basics/applications"><strong>admin</strong>とconfigクラスター</a>で、これは他のクラスターを管理してシステムの設定変更のリクエストを扱います。 + +Vespaアプリケーションは<a href="/en/basics/applications"><em>アプリケーション・パッケージ</em></a>で完全に記述されますが、それはシステムの一部として稼働するクラスターに関する宣言、コンテントのスキーマ、アプリケーションで必要なJavaコンポーネントやその他の設定、データ・ファイルなどを含むディレクトリです。 +アプリケーションの所有者はアプリケーション・パッケージを単一のadminクラスターに<em>デプロイする</em>ことで稼働させることができ、また同じ手順で稼働中のアプリケーションに変更を加えることができます。 +アプリケーション設定の管理に加えて、adminクラスターはシステムのすべてのノードからリアルタイムでログを収集します。 +ノードにVespaがインストールされて起動されると、それはシステム全体が単一のユニットとして扱えるようにadminシステムによって管理されるようになり、そしてアプリケーションの所有者はシステムのノードのローカルで管理タスクを実行する必要はありません。 + +ドキュメントの残りでVespaが行なう機能の詳細について説明します。 + + + +## Vespaのオペレーション + +Vespaは以下のオペレーションを受け付けます: + +Vespaは以下のオペレーションを受け付けます: + +- 書き込み: ドキュメントの設置(追加と置き換え)と削除、それらのフィールドの更新。 +- IDによるドキュメント(または、そのサブセット)の参照。 +- [*選択*](/en/querying/query-language); マッチしたドキュメントは[*ソーティング*](/en/reference/querying/sorting-language)したり、[*ランキング*](/en/basics/ranking)したり、[*グルーピング*](/en/querying/grouping)することができます。 検索結果のランキングは*[ランキング式](/en/reference/ranking/ranking-expressions)*に従って実行されます。シンプルな数学関数や複雑なビジネスロジック、機械学習の検索ランキングモデルを使うことができます。 グルーピングは各グループがグループ内のデータの集約した値を含むことができるような階層的なグループの集合で、フィールドの値を使って行なわれます。 グルーピングは値を計算するために集約処理と組み合わせることができます。例えば: ナビゲーション補助、タグ・クラウド、グラフ、クラスタリング — すべては分散して処理され、大規模データセットで法外な計算コストになってしまうような、コンテナー・クラスターに全データを送り返すようなことはありません。 +- データのダンプ: [*visit*](/en/writing/visiting)オペレーションを使うことで、条件に一致するコンテントをストリーム出力することができます。これはバックグラウンドの再処理やバックアップといったことに使うことができます。 +- [その他のカスタムのネットワーク・リクエスト](/en/reference/applications/components) はコンテナー・クラスターにデプロイされたアプリケーション・コンポーネントで処理することができます。 + +これらのオペレーションで開発者はリッチな機能のアプリケーションを構築することができます。 それは選択やキーワード検索、オーガナイズやコンテントの処理が宣言的なクエリーで表現可能な蓄積されたコンテントで稼働するようなJavaのミドルウェア・ロジックとして記述されます。 + + +## ステートレス・コンテナー + +[コンテナー・クラスター](/en/applications/containers)は上記に掲載されたオペレーションと、それらの返却データの処理に従事するアプリケーション・コンポーネントをホスティングしています。 Vespaはコンポーネントのインフラとともに、すぐに使えるコンポーネント群を提供しています: adminサーバまたはアプリケーション・パッケージからの設定の注入のサポートが追加された[Guice](https://github.com/google/guice)の上に構築された依存関係の注入; OSGiをベースとしたコンポーント・モデル; メトリクスやロギングと同様にモジュール性のためハンドラのチェーンにコンポーネントを連結できる共有されたメカニズム。 さらにコンテナーはリモート・リクエストを処理したり発行できるネットワーク・レイヤーを提供しています - HTTPはすぐに使えますし、その他のプロトコル/トランスポートはコンポーネントとして透過的にプラグインできます。 + +開発者はアプリケーション・パッケージを単に再デプロイするだけでコンポーネント群に変更を(もちろんその設定も)加えることができます - システムはその場でリクエスト処理に影響を与えずに、クラスターのノードに対するコピー、コンポーネントのロード/アンロードを管理します。 + +## コンテント・クラスター + +[コンテント・クラスター](/en/content/elasticity) はデータを確実に保存して、検索と選択のためにデータの分散インデックスを維持します。クラスターがノードやディスクの喪失に対して自動で修復できるよう、データはアプリケーションで指定されたコピー数に応じて複数ノード間でレプリケーションされます。同じメカニズムを使って、クラスターは拡大したり縮小したりすることもできます。それはアプリケーション・パッケージで宣言された利用可能なノードの集合を単に変更するだけです。 + +個々のドキュメントの参照はそのドキュメントを保持するノードに直接的にルーティングされ、クエリーは対象ドキュメントを保持する一部のノードに分散されます。 複雑なクエリーはコンテナーとコンテント・ノードの間で複数ステップにまたがる分散アルゴリズムで処理されます; これはVespaのデザインのゴールのひとつである低レイテンシーを実現するためのものです。 + +## 管理と開発者のサポート + +[単一のadmin・configクラスター](/en/basics/applications)がシステムの他のクラスターを制御します。 アプリケーション開発者が詳細を気にせずに希望するシステムの高レベルな宣言ができるよう、 プロセスとコンポーネントの実体を含む個々のクラスターの低レベルな設定が導かれます。 アプリケーション・パッケージが再デプロイされるといつも、システムは設定の必要な変更を計算して、これらが分散されたコンポーネントにプッシュされます。 効率のため、変更されたコンポーネントとデータ・ファイルはBitTorrentで配布されます。 + +アプリケーション・パッケージはHTTP REST APIか[コマンドライン・インタフェース](/en/basics/applications#deploying-applications)で[変更したり、再デプロイしたり](/en/reference/api/deploy-v2)、[検証する](/en/reference/api/config-v2)ことができます。 設定の変更を唯一で一貫性のあるものにするため、また単一障害点を持つことを避けるため、管理クラスターは[ZooKeeper](https://zookeeper.apache.org/)の上で稼働します。 + +数百ノードで構成される大規模システムも、すべてのサービスを稼働させている単一ノードも、アプリケーションパッケージは同じように見え、またデプロイも同じ方法で行われます。 唯一の必要な変更はクラスターを構成するノードのリストです。 コンテナー・クラスターはメソッド呼び出しでアプリケーション・パッケージを「デプロイ」することで、単一のJava VMの中で起動することもできます。 これはIDEやユニット・テストの中でアプリケーションをテストするのに便利です。 コンポーネントを含むアプリケーション・パッケージは、サンプル・アプリケーションを始めとして、Mavenを使ったIDEで[開発する](/en/applications/developer-guide)ことができます。 + +## サマリー + +Vespaを使うことで、低レベルの複雑さに開発者が悩むことなく、スケーラブルで高い基準に従う、リッチに機能して高い可用性のあるアプリケーションを構築することができます。 開発者は時間の経過とともに、システムをオフラインにすることなくアプリケーションを進化させ、また成長させることができます。 そして、データを陳腐化させてパーソナライズできなくするような、複雑なデータやページの事前計算を避けることができます。なぜならそれは、同時かつ定期的に変化するユーザーのデータに対する複雑なクエリーをしばしば必要とするからです。 \ No newline at end of file diff --git a/mintlify-docs/ja/vespa-quick-start.mdx b/mintlify-docs/ja/vespa-quick-start.mdx new file mode 100644 index 0000000000..050e78e596 --- /dev/null +++ b/mintlify-docs/ja/vespa-quick-start.mdx @@ -0,0 +1,87 @@ +--- +# Copyright Vespa.ai. All rights reserved. +title: "Dockerを使ったVespaのクイック・スタート" +--- + +このガイドではDockerを使って1台のマシン上にVespaをインストールして起動する方法を説明します。 + +**必要条件**: + +- [Docker](https://docs.docker.com/engine/install/)がインストールされていること。 +- [Git](https://git-scm.com/downloads)がインストールされていること。 +- オペレーティング・システム: macOSまたはLinux +- アーキテクチャ: x86\_64 +- 少なくとも2GBのメモリがコンテナのインスタンスに割り当てられていること。 +1. **[GitHub](https://github.com/vespa-engine/sample-apps)からVespaのサンプル・アプリケーションをcloneする:** + +```bash +$ git clone https://github.com/vespa-engine/sample-apps.git +$ export VESPA\_SAMPLE\_APPS=\`pwd\`/sample-apps +``` + +2. **VespaのDockerコンテナを起動する:** + +```bash +$ docker run --detach --name vespa --hostname vespa-container --privileged \\ +--volume $VESPA\_SAMPLE\_APPS:/vespa-sample-apps --publish 8080:8080 vespaengine/vespa +``` + +`volume`オプションで、事前にダウンロードしたソースコードにDockerコンテナ内の`/vespa-sample-apps`としてアクセスできるようになります。 検索やフィード用のインタフェースにアクセスできるように、Dockerコンテナの外に`8080`ポートを公開します。 `vespa`の名前で同時に稼働できるDockerコンテナは1つまでです。必要あらば変更してください。 + 上記のコマンドの具体的なステップに興味がある場合は、[Dockerfile](https://github.com/vespa-engine/docker-image/blob/master/Dockerfile) と[起動スクリプト](https://github.com/vespa-engine/docker-image/blob/master/include/start-container.sh)を参照してください。 + +3. **設定サーバが起動するのを待つ - 200 OKのレスポンスを待つ:** + +```bash +$ docker exec vespa bash -c 'curl -s --head http://localhost:19071/ApplicationStatus' +``` +4. **サンプル・アプリケーションをデプロイしてアクティベートする:** + +```bash +$ docker exec vespa bash -c 'vespa-deploy prepare /vespa-sample-apps/basic-search/src/main/application/ && \\ + vespa-deploy activate' +``` + さらなるサンプル・アプリケーションは[sample-apps](https://github.com/vespa-engine/sample-apps/tree/master)で見つけることができます。 [アプリケーション・パッケージ](/en/basics/applications)のアプリケーションの項目を参照してください。 + +5. **アプリケーションがアクティブであることを確認する - 200 OKのレスポンスを待つ:** + +```bash +$ curl -s --head http://localhost:8080/ApplicationStatus +``` + +6. **ドキュメントをフィードする:**$ curl -s -H "Content-Type:application/json" --data-binary @ + +```bash +$ curl -s -H "Content-Type:application/json" --data-binary @${VESPA_SAMPLE_APPS}/basic-search/music-data-1.json \\ + http://localhost:8080/document/v1/music/music/docid/1 | python -m json.tool + $ curl -s -H "Content-Type:application/json" --data-binary @${VESPA\_SAMPLE\_APPS}/basic-search/music-data-2.json \\ + http://localhost:8080/document/v1/music/music/docid/2 | python -m json.tool +``` + この例では[ドキュメントAPI](/en/reference/api/document-v1)を使っています。 大規模なデータを高速にフィードするには[Java Feeding API](/en/clients/vespa-feed-client)を使ってください。 +7. **クエリーとドキュメント取得リクエストを実行する:** + +```bash +$ curl -s http://localhost:8080/search/?query=bad | python -m json.tool +``` + +```bash + $ curl -s http://localhost:8080/document/v1/music/music/docid/2 | python -m json.tool +``` + + ブラウザで[localhost:8080/search/?query=bad](http://localhost:8080/search/?query=bad)の結果を参照してください。 詳しくは[Query API](../en/querying/query-api.html)を参照してください。 +8. **終わったらクリーンアップする** + 必要なくなった稼働中のコンテナを停止する: +```bash + $ docker stop vespa +``` + 必要に応じて、停止したコンテナを完全に削除する: +```bash + $ docker rm vespa +``` + +## 次のステップ +- このアプリケーションは完全に機能してプロダクションで使うことができますが、冗長性のために [ノードを追加](/en/operations/self-managed/multinode-systems)した方がいいかもしれません。 +- Vespaアプリケーションにあなた独自のJavaコンポーネントを追加するには、 [アプリケーションの開発](/en/applications/developer-guide) を参照してください。 +- [Vespa API](/en/reference/api/api)はVespaのインタフェースの理解に役立つでしょう。 +- [サンプル・アプリケーション](https://github.com/vespa-engine/sample-apps/tree/master)を眺めてみましょう。 +- [Vespaのインストールをセキュア](/en/security/securing-your-vespa-installation)にします。 +- AWSで稼働させるには、[AWS EC2での複数ノードのクイック・スタート](/en/operations/self-managed/multinode-systems#aws-ec2)または [AWS ECSでの複数ノードのクイック・スタート](/en/operations/self-managed/multinode-systems#aws-ecs)を参照してください。 \ No newline at end of file diff --git a/mintlify-docs/logo/dark.svg b/mintlify-docs/logo/dark.svg new file mode 100644 index 0000000000..0276516f59 --- /dev/null +++ b/mintlify-docs/logo/dark.svg @@ -0,0 +1,9 @@ +<svg width="211" height="48" viewBox="0 0 211 48" fill="none" xmlns="http://www.w3.org/2000/svg"> +<path d="M13.5876 32.2135H56.2925L42.7049 45.2039H0V26.6467L13.5876 13.6562V32.2135Z" fill="white"></path> +<path d="M69.8804 0.666016V19.2233L56.2928 32.213V13.6558H13.5879L27.1762 0.666016H69.8804Z" fill="white"></path> +<path d="M95.7499 38.7409L83.4688 7.16113H88.5697L98.5833 34.049L108.691 7.16113H113.698L101.417 38.7409H95.7485H95.7499Z" fill="white"></path> +<path d="M133.678 20.8978C132.765 19.2598 131.475 17.9441 129.805 16.9512C128.136 15.9583 126.137 15.4619 123.807 15.4619C121.477 15.4619 119.429 15.9649 117.665 16.9735C115.901 17.9821 114.525 19.381 113.534 21.1689C112.54 22.9588 112.045 25.0565 112.045 27.4621C112.045 29.8676 112.541 31.8684 113.534 33.6426C114.525 35.4168 115.901 36.8006 117.665 37.7928C119.429 38.7857 121.444 39.2821 123.712 39.2821C125.57 39.2821 127.214 38.9658 128.647 38.3345C130.079 37.7031 131.284 36.8386 132.261 35.7403C133.236 34.642 133.929 33.4022 134.34 32.0184H129.616C129.144 33.101 128.419 33.9589 127.444 34.5903C126.467 35.2216 125.222 35.5379 123.712 35.5379C122.452 35.5379 121.286 35.2439 120.216 34.6577C119.145 34.0716 118.287 33.1999 117.641 32.0413C117.084 31.0432 116.768 29.8467 116.692 28.4543H134.954C134.984 28.0338 135.008 27.65 135.024 27.3042C135.04 26.9584 135.047 26.6349 135.047 26.3343C135.047 24.3492 134.592 22.5384 133.678 20.8971V20.8978ZM116.746 25.2524C116.873 24.1803 117.171 23.2548 117.641 22.4788C118.287 21.4099 119.144 20.5991 120.216 20.0418C121.286 19.4851 122.468 19.2068 123.76 19.2068C125.585 19.2068 127.114 19.7412 128.342 20.8081C129.57 21.8769 130.231 23.357 130.325 25.2524H116.747H116.746Z" fill="white"></path> +<path d="M148.274 39.2821C146.258 39.2821 144.495 38.9665 142.984 38.3345C141.472 37.7031 140.275 36.8164 139.394 35.6729C138.512 34.53 137.977 33.1913 137.788 31.6575H142.606C142.763 32.3792 143.071 33.0414 143.527 33.6426C143.983 34.2445 144.613 34.7252 145.416 35.086C146.219 35.4469 147.171 35.6277 148.274 35.6277C149.313 35.6277 150.171 35.4849 150.848 35.1993C151.525 34.9138 152.021 34.5228 152.336 34.0264C152.651 33.5299 152.809 32.9968 152.809 32.4251C152.809 31.5835 152.596 30.9443 152.171 30.5075C151.746 30.0719 151.115 29.7255 150.282 29.4701C149.447 29.2147 148.447 28.9815 147.282 28.7706C146.179 28.5899 145.117 28.3423 144.094 28.0266C143.071 27.7109 142.157 27.3128 141.355 26.8314C140.552 26.3507 139.914 25.7416 139.441 25.0041C138.969 24.2674 138.733 23.3577 138.733 22.2744C138.733 20.9816 139.094 19.8158 139.819 18.7778C140.543 17.7404 141.574 16.9283 142.913 16.3415C144.251 15.7547 145.833 15.4619 147.66 15.4619C150.305 15.4619 152.431 16.0638 154.036 17.2662C155.642 18.4693 156.587 20.1688 156.871 22.3642H152.289C152.163 21.3418 151.69 20.5526 150.872 19.9959C150.053 19.4399 148.966 19.1616 147.613 19.1616C146.259 19.1616 145.219 19.4177 144.495 19.9285C143.77 20.44 143.408 21.1172 143.408 21.9588C143.408 22.5004 143.613 22.9817 144.022 23.4022C144.431 23.824 145.037 24.177 145.841 24.4625C146.644 24.7487 147.628 25.0114 148.793 25.2517C150.462 25.553 151.958 25.9282 153.28 26.3795C154.603 26.8307 155.657 27.4929 156.445 28.3646C157.232 29.2369 157.626 30.4845 157.626 32.1088C157.657 33.5227 157.288 34.7704 156.516 35.853C155.744 36.9355 154.658 37.7784 153.258 38.3797C151.856 38.9809 150.195 39.2821 148.274 39.2821H148.274Z" fill="white"></path> +<path d="M161.971 48.6662V16.0039H166.222L166.694 19.5228C167.198 18.8312 167.828 18.1769 168.584 17.5606C169.339 16.9443 170.244 16.4407 171.3 16.049C172.354 15.6587 173.591 15.4629 175.007 15.4629C177.274 15.4629 179.266 15.9894 180.983 17.0419C182.699 18.095 184.037 19.5162 184.998 21.3055C185.958 23.0954 186.438 25.1328 186.438 27.4185C186.438 29.7042 185.95 31.7423 184.974 33.5316C183.998 35.3215 182.652 36.7276 180.936 37.7499C179.219 38.7723 177.228 39.2838 174.961 39.2838C173.102 39.2838 171.466 38.9301 170.048 38.2235C168.632 37.5168 167.514 36.5318 166.695 35.2684V48.6675H161.971L161.971 48.6662ZM174.205 35.3575C175.653 35.3575 176.936 35.0268 178.054 34.3653C179.171 33.7038 180.054 32.7712 180.699 31.5681C181.344 30.3657 181.667 28.9668 181.667 27.3727C181.667 25.7786 181.344 24.3882 180.699 23.1995C180.053 22.0115 179.171 21.0789 178.054 20.4023C176.936 19.7258 175.653 19.3872 174.205 19.3872C172.756 19.3872 171.425 19.7258 170.308 20.4023C169.19 21.0789 168.316 22.0115 167.686 23.1995C167.056 24.3882 166.741 25.7786 166.741 27.3727C166.741 28.9668 167.056 30.3657 167.686 31.5681C168.316 32.7712 169.189 33.7038 170.308 34.3653C171.425 35.0274 172.724 35.3575 174.205 35.3575Z" fill="white"></path> +<path d="M198.625 39.2821C196.672 39.2821 195.051 38.9665 193.76 38.3345C192.468 37.7031 191.508 36.8537 190.878 35.7855C190.248 34.718 189.934 33.5679 189.934 32.3341C189.934 30.8303 190.343 29.5447 191.162 28.4766C191.981 27.409 193.146 26.5897 194.657 26.018C196.168 25.4469 197.979 25.1607 200.089 25.1607H206.277C206.277 23.8377 206.071 22.7401 205.663 21.8677C205.253 20.996 204.647 20.3417 203.845 19.9056C203.042 19.47 202.026 19.2513 200.798 19.2513C199.381 19.2513 198.169 19.5748 197.161 20.2212C196.153 20.8683 195.523 21.8225 195.272 23.0859H190.548C190.737 21.4918 191.311 20.1309 192.272 19.0031C193.233 17.8753 194.469 17.0029 195.98 16.3867C197.492 15.7704 199.098 15.4619 200.798 15.4619C203.033 15.4619 204.908 15.8385 206.419 16.5897C207.93 17.3415 209.072 18.4019 209.844 19.77C210.615 21.1388 211.001 22.7702 211.001 24.6649V38.7405H206.891L206.513 34.9059C206.166 35.5078 205.757 36.0789 205.285 36.6205C204.813 37.1621 204.253 37.6285 203.607 38.0188C202.961 38.4098 202.222 38.7182 201.388 38.9435C200.553 39.1688 199.632 39.2821 198.624 39.2821H198.625ZM199.522 35.6277C200.529 35.6277 201.443 35.4325 202.262 35.0409C203.081 34.6505 203.781 34.1161 204.364 33.4396C204.946 32.763 205.387 31.9961 205.686 31.1388C205.985 30.2815 206.15 29.402 206.182 28.4995V28.3639H200.561C199.207 28.3639 198.112 28.5217 197.278 28.8374C196.444 29.1531 195.837 29.5821 195.459 30.123C195.081 30.6647 194.893 31.296 194.893 32.0177C194.893 32.7394 195.074 33.4094 195.436 33.9353C195.798 34.4619 196.325 34.8758 197.019 35.1758C197.711 35.477 198.546 35.627 199.522 35.627L199.522 35.6277Z" fill="white"></path> +</svg> \ No newline at end of file diff --git a/mintlify-docs/logo/hero-dark.svg b/mintlify-docs/logo/hero-dark.svg new file mode 100644 index 0000000000..9072d9af19 --- /dev/null +++ b/mintlify-docs/logo/hero-dark.svg @@ -0,0 +1,29 @@ +<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" id="artwork" x="0px" y="0px" viewBox="0 0 1000 1000" style="enable-background:new 0 0 1000 1000; fill: white;" xml:space="preserve"> +<style type="text/css"> + .st0{fill:#61D790;} +</style> +<path class="st0" d="M939.2,944.4H726.9c-1.3,0-2.7,0-4,0c-7.1,0-14.1-2.2-19.9-6.3c-9.4-6.7-14.1-17.5-14.1-28.9v-308 c0-0.7,0-1.5,0-2.2c0-18.7,15.1-33.8,33.8-33.8c0,0,0,0,0,0c0.9,0,1.9,0,2.8,0h201.1c4.1,0,8.3,0,12.4,0 c17.3,0.1,32.4,13.4,33.8,30.9c0.3,3.2,0.1,6.4,0.1,9.5v297.3c0,2.6,0,5.1,0,7.7C972.8,929.2,957.8,944.2,939.2,944.4 c-2.5,0-2.5,4,0,3.9c20-0.2,36.5-15.9,37.7-35.9c0.2-2.9,0-5.9,0-8.8V605c0.1-4,0-8.1-0.5-12.1c-2.8-18.2-19.1-31.6-37.3-31.7 c-3.9,0-7.8,0-11.7,0H725.3c-1.2,0-2.4,0-3.6,0c-9.6,0.3-18.8,4.2-25.6,11c-7.7,7.7-11.1,17.8-11.1,28.5v308.7 c0,12.5,5.3,24,15.4,31.5c6.5,4.8,14.4,7.3,22.4,7.3c1.2,0,2.4,0,3.6,0h212.7C941.8,948.3,941.8,944.4,939.2,944.4z"/> +<path class="st0" d="M687,647.6h276.3c3.7,0,7.5,0.1,11.2,0c0.2,0,0.3,0,0.5,0c2.5,0,2.5-3.9,0-3.9H698.7c-3.7,0-7.5-0.1-11.2,0 c-0.2,0-0.3,0-0.5,0C684.4,643.6,684.4,647.6,687,647.6L687,647.6z"/> +<path class="st0" d="M687,756.7h276.3c3.7,0,7.5,0.1,11.2,0c0.2,0,0.3,0,0.5,0c2.5,0,2.5-3.9,0-3.9H698.7c-3.7,0-7.5-0.1-11.2,0 c-0.2,0-0.3,0-0.5,0C684.4,752.8,684.4,756.7,687,756.7L687,756.7z"/> +<path class="st0" d="M687,865.9h276.3c3.7,0,7.5,0.1,11.2,0c0.2,0,0.3,0,0.5,0c2.5,0,2.5-3.9,0-3.9H698.7c-3.7,0-7.5-0.1-11.2,0 c-0.2,0-0.3,0-0.5,0C684.4,862,684.4,865.9,687,865.9L687,865.9z"/> +<path d="M258.7,241.1c-7.3-29-14.1-58.2-20.2-87.4c-1.5-7-3.1-14.1-4.2-21.2c-1.1-6.8-1.5-13.9,0.4-20.7 c3.1-11.3,12.5-19.1,22.2-24.8c11.2-6.6,24-12.3,37.1-12.8c11.1-0.4,23.3,3.4,29.9,12.9c6.2,8.8,7.2,20.1,9.6,30.3 c1,5.3,2.8,10.4,5.3,15.2c1.3,2.4,2.9,4.7,4.8,6.7c1.8,2,4.1,4.1,3.4,7c-0.3,1.6-1,3.1-1.5,4.7c-0.4,1.8-0.8,3.6-0.9,5.4 c-0.3,3.4-0.2,6.9,0.5,10.3c1.2,6.6,4.7,12.3,8.2,17.9c3.3,5.4,6.9,11.1,7.2,17.6c0.3,5.3-2.2,10.6-7.4,12.5c-2,0.8-4.2,0.7-6.1-0.2 c-1-0.5-2.2-0.2-2.7,0.8c-0.2,0.3-0.2,0.6-0.2,0.9c-0.8,8.9-8.7,15.4-17.6,14.6c-4.2-0.4-8-2.4-10.7-5.5c-0.6-0.6-1.6-0.8-2.4-0.3 c-8.1,6.2-20.2,1.8-25.7-6c-7.5-10.8-6.3-24.8-5.2-37.2c0.7-7.2,1.4-14.5,0.6-21.8c-0.7-6.2-2.3-12.9-7-17.4c-4.1-4-11-5.8-16.1-2.5 c-4.5,2.9-5.9,8.1-6,13.1c-0.2,5.5,1.4,11,0.4,16.4c-0.7,4.2-2.8,9.7-7.9,9.4c-2.5-0.1-2.5,3.8,0,3.9c5.1,0.3,8.6-3.4,10.4-7.8 c1.9-4.7,1.8-9.5,1.3-14.4c-0.5-4.6-1.1-10.2,1.2-14.5c2.6-4.8,8.7-4.8,12.8-1.8c5,3.8,6.4,10.9,7,16.7c0.7,7.5-0.2,15.1-0.9,22.5 c-1.1,12.5-1.6,26.5,5.9,37.2c6.5,9.4,21.1,15,31.1,7.4l-2.4-0.3c7.4,8.3,20.1,9.1,28.4,1.7c3.9-3.5,6.3-8.3,6.7-13.5l-3,1.7 c5.1,2.7,11.1,0.8,15-3.1c4.6-4.6,5.3-11.3,3.8-17.4c-1.6-6.9-5.7-12.7-9.3-18.6c-3.6-6-6.1-12.3-6-19.4c0.1-3.6,0.7-7.1,1.9-10.4 c0.6-1.5,0.8-3.2,0.7-4.8c-0.2-1.6-0.9-3.2-1.9-4.5c-1.7-2.3-3.9-4.3-5.5-6.7c-1.5-2.3-2.7-4.7-3.6-7.3c-3.9-10.7-4.3-22.3-8.5-32.8 c-2-5.4-5.3-10.2-9.7-13.9c-4.8-3.9-10.5-6.6-16.5-7.8c-13.6-2.8-27.2,1.1-39.5,6.8c-11.1,5.1-22.4,11.7-29.6,21.9 c-3.8,5.5-6,11.9-6.5,18.6c-0.6,7.2,0.6,14.4,2,21.5c6,30.6,13,61,20.5,91.2c0.9,3.7,1.8,7.3,2.8,11 C255.5,244.6,259.3,243.5,258.7,241.1L258.7,241.1z"/> +<path d="M316.5,133.5c3,2.6,7.2,3.5,11,2.4c1-0.3,1.6-1.4,1.3-2.4c-0.3-1-1.4-1.6-2.4-1.4c-0.4,0.1-0.7,0.2-1.1,0.3 c-0.1,0-0.2,0-0.3,0c0.1,0,0.4,0,0.1,0c-0.2,0-0.4,0-0.6,0c-0.4,0-0.7,0-1.1,0c-0.2,0-0.4-0.1-0.6-0.1c-0.6,0-1.1,0.4-1.1,1 c0,0.6,0.4,1.1,1,1.1c0.6,0,1.1-0.4,1.1-1c0-0.5-0.4-1-0.9-1.1c-0.1,0-0.2,0-0.3-0.1c-0.4-0.1-0.7-0.2-1.1-0.3 c-0.2-0.1-0.4-0.1-0.5-0.2c-0.3-0.1,0.3,0.2,0.1,0c-0.1,0-0.2-0.1-0.3-0.1c-0.3-0.2-0.7-0.3-1-0.5c-0.2-0.1-0.3-0.2-0.5-0.3 c0,0-0.3-0.2-0.1-0.1c0.2,0.2-0.1-0.1-0.1-0.1c-0.8-0.8-2-0.8-2.8,0S315.8,132.7,316.5,133.5C316.5,133.5,316.5,133.5,316.5,133.5 L316.5,133.5z"/> +<path d="M319.6,159c1.7,5.2,6.4,8.9,11.8,9.4c1.1,0,2-0.9,2-2c0-1.1-0.9-1.9-2-2c-0.1,0-0.6,0,0,0c-0.1,0-0.3,0-0.4-0.1 c-0.2,0-0.5-0.1-0.7-0.2c-0.5-0.1-1-0.3-1.5-0.5c0.4,0.3,0.1,0-0.1,0c-0.1-0.1-0.3-0.1-0.4-0.2c-0.2-0.1-0.4-0.2-0.6-0.3 c-0.2-0.1-0.5-0.3-0.7-0.5c-0.1-0.1-0.2-0.1-0.3-0.2c-0.1,0-0.1-0.1-0.2-0.1c0.2,0.1,0.2,0.2,0.1,0.1c-0.4-0.3-0.8-0.7-1.2-1.1 c-0.2-0.2-0.4-0.4-0.5-0.6c-0.1-0.1-0.4-0.4,0-0.1c-0.1-0.1-0.1-0.2-0.2-0.3c-0.3-0.4-0.6-0.9-0.8-1.3c-0.1-0.1-0.1-0.2-0.2-0.3 c0-0.1-0.1-0.1-0.1-0.2c-0.1-0.1,0-0.1,0.1,0.1c-0.1-0.3-0.2-0.6-0.3-0.8c-0.3-1.1-1.4-1.7-2.4-1.4S319.3,157.9,319.6,159 C319.6,159,319.6,159,319.6,159L319.6,159z"/> +<path d="M218.8,114.3c1.3,4.5,1.7,9.3,2.6,13.9c0.4,2.3,1.4,4.1,3.7,5c2.3,0.9,4.4-0.1,6.5-0.9c10.8-3.9,21.7-7.9,32.5-11.8 c19.3-7,38.7-14.4,56.2-25.4c4.4-2.7,8.6-5.6,12.6-8.9c1.9-1.5,2.7-4.1,2-6.4c-0.7-2.7-1.7-5.3-2.8-7.9c-1-2.5-2.2-5.1-3.4-7.5 c-1.1-2.1-2.4-4.2-4.9-4.6c-2.2-0.4-4,0.8-5.9,1.7c-0.7,0.4-1.4,0.6-2.2,0.6c-1.2-0.2-2.4-0.7-3.4-1.3c-18.4-8.7-39.3-10.9-59.3-7.7 c-5,0.8-9.8,1.9-14.6,3.4c-4.6,1.4-9.6,2.9-13.5,5.8c-3.9,2.8-7,7.1-6.7,12.1c0.1,4.3,3.2,7.9,7.4,8.9c2,0.5,4,0.1,5.8-0.9 c0.9-0.6,1.2-1.8,0.7-2.7c-0.1-0.1-0.2-0.3-0.3-0.4c-3.3-2.9-6.6,1.5-8.1,4.1c-1.8,3.2-2.9,6.7-3.1,10.3c-0.2,3.6,0.5,7.3,1.8,10.7 c0.6,1.6,1.4,3.5,0.9,5.2c-0.3,1.2-1.1,2.3-2.4,2.2c-2.5-0.2-2.5,3.8,0,3.9c3.3,0.2,5.8-2.7,6.3-5.8c0.6-3.8-1.6-7.1-2.4-10.6 c-1-4.8-0.2-9.8,2.2-14c0.4-0.7,0.8-1.3,1.3-1.9c0.2-0.3,0.4-0.5,0.6-0.8c0.1-0.1,0.3-0.3,0.4-0.3c0.3-0.1,0.2-0.2-0.4-0.3l0.4-3.1 c-2.6,1.6-6.1-0.3-6.9-3c-1.2-4,1.5-8.1,4.7-10.3c3.6-2.5,8-3.9,12.2-5.2c4.5-1.4,9-2.5,13.6-3.3c18-3.2,36.6-1.4,53.7,5.4 c2.2,0.9,4.3,2,6.5,2.9c2,0.8,4.3,0.8,6.2-0.2c1.1-0.5,2.7-1.9,4-1.6c0.8,0.2,1.2,1,1.6,1.7c0.6,1.2,1.2,2.4,1.8,3.6 c1.1,2.3,2.1,4.7,2.9,7.1c0.4,1.1,0.8,2.2,1.1,3.3c0.3,0.9,0.9,2.2,0.4,3.2c-0.5,0.7-1.2,1.3-2,1.8c-1,0.7-2,1.5-3,2.2 c-1.9,1.4-3.8,2.7-5.8,4c-15.8,10.3-33.3,17.4-50.9,23.9c-9.6,3.6-19.2,7-28.8,10.5l-7.5,2.7l-3.5,1.3c-0.8,0.3-1.9,0.9-2.8,0.8 c-1.7-0.3-1.7-2.6-1.9-3.9c-0.4-2.2-0.7-4.5-1.1-6.7c-0.3-2-0.7-3.9-1.3-5.9C221.9,110.8,218.1,111.8,218.8,114.3L218.8,114.3z"/> +<path d="M327.4,263.8c7.4-11,22.9-12.4,34.5-8.1c14.6,5.5,25.6,18.5,35.4,30.1c10.9,13.1,20.8,27.1,29.5,41.7 c7.2,11.8,14.2,23.6,21.9,35c4.3,6.5,9.1,12.8,14.1,18.9c2.3,2.6,4.7,5.1,7.2,7.5c1.3,1.2,3,2.8,3.2,4.7c0.1,0.7-1.1,3.2-2.1,2.4 v2.8c8.2-7.2,20.1-8.6,30.4-11c11.3-2.6,22.6-5.6,33.7-9c10.9-3.4,21.7-7.2,32.3-11.5c5.4-2.2,10.8-4.5,16.2-7 c4.4-1.9,8.6-4.4,12.4-7.4c4-3.3,7.6-7,11.8-9.9c1.9-1.4,3.9-2.5,6.1-3.3c1.1-0.4,2.3-0.7,3.4-0.8c0.5,0,1,0.1,1.5,0.3 c0.2,0.1,0.3,0.2,0.5,0.3c0.1,0.1,0.2,0.1,0.3,0.2c0.3,0.4,0.5,0.1,0.4-0.9l2.4,3.1c7.6-5.9,19.1-9.5,28.5-6.2 c3.6,1.3,7.2,4.2,7,8.4c-0.2,2.9-2.4,7.1-5.9,6.5l0.9,3.3c5.9-6.5,16.3-9.2,22.9-2.2c2.9,2.8,3.8,7.1,2.4,10.9 c-1.5,3.8-5.2,6.1-9.3,5.9l1.7,3c2.5-3.5,7.3-4.3,11.3-3.2c4.6,1.2,7.8,5.1,9.6,9.2c2.2,4.9,3,10.3,2.1,15.6 c-1,4.8-4.3,9.6-9.5,10.3c-4.5,0.6-9.8-4.2-7.3-8.8l-3.6-1.5c-1.1,3.2-4.8,5.1-7.5,6.9c-3.1,2.1-6.1,4.1-9.2,6.1 c-6.2,4.2-12.5,8.3-18.8,12.4c-12.5,8.1-25.2,16-38.2,23.3c-25.8,14.6-52.9,27.2-81.6,34.9c-14.7,3.9-29.9,6.9-45.2,7 c-11.2,0.1-23.3-1.2-33.1-6.8c-7.3-4.2-14-9.5-20.5-14.9c-8-6.7-15.7-13.8-23.2-21.1c-13.3-12.7-26-26-38-40 c-0.9-1.1-1.8-2.1-2.6-3.2c-1.6-2-4.4,0.8-2.8,2.8c11,13.8,23.5,26.5,36.1,38.8c7.7,7.6,15.6,15,23.8,22.1 c6.9,6.1,14.2,11.7,21.8,16.8c2.8,1.9,5.8,3.5,9,4.8c6.1,2.5,12.9,3.5,19.4,4.2c15.7,1.5,31.7-0.9,47-4.3 c29.4-6.6,57.3-18.7,83.7-33c26.8-14.5,52.5-31.3,77.5-48.5c2.7-1.9,5.1-3.9,6.2-7.1c0.7-2.1-2.6-3.5-3.6-1.5 c-2.8,5.3,0.3,11.9,5.7,14.1c6,2.4,12.9-1.3,16.1-6.5c3.7-6.1,3.3-14.2,1.3-20.8c-1.8-6-5.6-11.8-11.6-14.3 c-6.1-2.5-13.6-1.1-17.6,4.4c-1,1.3,0.4,2.8,1.7,3c5.4,0.5,10.4-3,12.7-7.8c2.2-4.8,1.4-10.4-2-14.5c-7.7-9.8-21.8-7.4-29.5,1 c-0.9,1-0.7,3,0.9,3.3c8.1,1.4,13.2-8.7,9.8-15.4c-4.4-8.9-16.7-9.4-25.1-7.6c-5.9,1.3-11.4,3.9-16.1,7.6c-1.8,1.4,0.5,4.3,2.4,3.1 c1.5-0.9,2.1-2.8,1.2-4.3c-0.1-0.3-0.3-0.5-0.5-0.7c-1.5-1.6-3.6-2.4-5.8-2.2c-4.7,0.5-9,3-12.7,5.8c-4,3-7.5,6.6-11.4,9.7 c-4.4,3.6-9.4,5.7-14.6,8c-23.3,10.2-47.5,18.3-72.3,24.2c-6.8,1.7-13.8,3-20.5,4.8c-6.2,1.7-12.3,4.2-17.2,8.5 c-0.8,0.8-0.8,2,0,2.8c2.6,2.2,6.1,1.1,7.9-1.7c2-3.2,0.4-6.7-1.8-9.3c-2.3-2.6-4.9-4.8-7.2-7.4c-2.5-2.8-4.9-5.7-7.1-8.7 c-9.4-12.5-17.6-25.9-25.6-39.3c-9.1-15-18.6-29.8-29.6-43.5c-5.5-7-11.4-13.8-17.7-20.1c-5.8-5.8-12.3-11.3-19.8-14.9 c-14.2-6.9-33.7-6.1-43.3,8C322.6,263.9,326,265.8,327.4,263.8L327.4,263.8z"/> +<path class="st0" d="M25,948.3h66H172c15.3,0,29.6-6.6,37.1-20.6c4.4-8.2,4.6-16.8,4.6-25.8V695.7c0-8.9,0-17.3-5.7-25 c-7.6-10.3-21-12.3-32.5-15.7l-46.2-13.4c-15.3-4.4-29.9-9.4-42.6-19.5c-19.5-15.5-32.1-38.1-35.1-62.8c-0.9-8.2-0.6-16.5-0.6-24.8 V399.9c0-7.7-0.2-15.4,0.2-23.1c1.2-23.8,8.1-47,20.1-67.7c23.7-40.7,66.2-68.7,113.1-73.9c47.3-5.2,94.2,12.1,126.8,46.7 c27.3,29.1,40.6,67,40.6,106.6v128.2c0,7.8-0.7,16.1,0.7,23.8c3.6,19,20,31.5,38.7,32.8c14.2,0.9,28.4,1.8,42.6,2.7 c14.2,0.9,29.8,0,41.3,9.8c20,17,15.6,49.9-8.3,61c-7.6,3.5-16.1,3.5-24.2,4.1l-37.3,2.7l-98.9,7l-34.7,2.5 c-9,0.6-17.7,1.1-25.2,7.2c-8.8,7.2-10.6,17.2-10.6,27.8v208c0,5.5-0.5,11.2,0.3,16.6c1.5,10.2,8.2,18.8,17.7,22.8 c4.8,2,9.7,2.2,14.8,2.2h86.3c12.1,0,23.8,0.2,35.2-4.9c19.9-9.1,32.8-28.9,32.9-50.8c0.1-11.7,0-23.4,0-35.1V529.4 c0-9.9-4.8-19.2-15.6-20.7c-6.4-0.9-12.2-1.6-15.3-8.2c-2.4-5.4-1.8-11.7,1.8-16.4c2.9-3.7,7.2-5.2,11.7-5.3c10.3-0.1,20.6,0,30.9,0 H968c2.5,0,2.5-3.9,0-3.9H435.2c-9.8,0-19.7-0.2-29.6,0c-14.8,0.3-24,17.1-15.5,29.7c2.7,3.9,6.7,6.6,11.4,7.6 c2.2,0.5,4.6,0.2,6.8,0.7c3.6,0.8,6.7,3.1,8.7,6.2c1.6,2.8,2.4,6,2.2,9.2c0.1,15.5,0,30.9,0,46.4V852c0,13.3,0.1,26.6,0,39.9 c0,20.7-12.3,39.5-31.3,47.8c-11.4,4.9-23.4,4.2-35.5,4.2h-84.9c-15.2,0-27.4-10.1-27.5-26c0-6.7,0-13.4,0-20.1V716.4 c0-7.3,0-14.7,0-22c0-14.2,9.7-25.4,24.1-26.6c22.1-1.8,44.3-3.2,66.4-4.7l96.3-6.8l25.7-1.8c15.4-1.1,29.2-9.1,35.8-23.4 c9.2-19.9,0.4-44.4-19.6-53.6c-11.5-5.3-26.1-4.4-38.4-5.2c-13.3-0.9-26.7-1.6-40-2.6c-19.5-1.3-34.6-17.6-34.5-37.1 c-0.1-24.9,0-49.9,0-74.8c0-24.1,0.1-48.2,0-72.4c0.2-85.3-68.7-154.6-154-154.8c-78.3-0.2-144.4,58.3-153.7,136.1 c-1.2,9.7-1.1,19.4-1.1,29.2v90.2c0,20.9-0.1,41.8,0,62.7c0.3,44.2,30,81.9,72.1,94.2c24.1,7,48.5,13.4,72.5,21 c10.7,3.3,18,13.1,18.2,24.2c0,1.6,0,3.3,0,4.9v205.8c0,7.3,0.5,14.8-1.9,21.8c-4.1,11.7-14,20.5-26.1,23.1 c-3.6,0.6-7.2,0.8-10.8,0.7H87.2H25C22.5,944.4,22.5,948.3,25,948.3L25,948.3z"/> +<path d="M322.6,263.8c-1.7,1.8-1.6,4.7,0.3,6.4c0,0,0,0,0,0c2.1,2,5.3,1.6,7.8,0.9c2.6-0.7,5.1-2.2,6.1-4.8c0.9-2.5,0.6-5.3-0.8-7.6 c-1.5-2.3-3.5-4.4-5.7-6.1c-2.1-1.8-4.3-3.5-6.6-5c-4.5-3.1-9.2-5.8-14.2-8.2c-10-4.7-20.4-8.4-31-11.3c-5.3-1.5-10.7-2.9-16.1-4.3 c-6.3-1.7-12.6-3.1-19.1-4c-5.5-0.7-12.5-0.9-16.2,4.1c-2.7,3.7-2.6,9.9,1.8,12.2c2.2,1.2,4.2-2.2,2-3.4c-2.5-1.3-2-5-0.4-6.9 c2.7-3.2,8.1-2.7,11.8-2.3c10.2,1.2,20.3,4.3,30.2,7c17.9,4.9,36,10.7,51.1,21.9c2.1,1.5,4.1,3.2,6,4.9c1.7,1.5,3.5,3.4,3.7,5.8 c0.2,2.5-1.7,3.7-3.9,4.3c-1,0.3-2.4,0.7-3.5,0.3c-0.3-0.1-1-0.7-0.6-1C327.2,264.8,324.4,262,322.6,263.8L322.6,263.8z"/> +<path d="M362.8,406.5c-3.8-3.5-6.9-7.8-9-12.6c-1.4-3.5-1.5-8.5,2-11c4-3,9.5,0.1,12,3.5c1.6,2.2,2.9,4.7,3.8,7.3 c1.2,3,2.3,6.1,3.4,9.1l14,37.5l3.5,9.3c1.2,2.9,2.1,5.8,3,8.8c0.8,2.6,0.9,5.4,0.3,8.1c-0.8,2.5-2.4,4.7-4.5,6.3 c-9.9,7.9-23.6,11-36,9.6c-1.1,0-2,0.9-2,2c0,1.1,0.9,1.9,2,2c12.7,1.4,25.5-1.7,36.1-8.8c2.6-1.6,4.9-3.8,6.6-6.3 c1.6-2.8,2.3-6,2-9.1c-0.5-6.7-3.6-13.1-5.9-19.4l-15.5-41.5c-2.2-6-4-12.7-8.1-17.7c-3.5-4.3-9.6-7.2-15-5c-5.7,2.3-7.8,9.5-6,15.1 c1.9,5.9,6,11.4,10.5,15.6C361.9,411.1,364.7,408.3,362.8,406.5L362.8,406.5z"/> +<path d="M355.9,482.5c-2.3-0.4-3.9-1.5-4-3.9c0-2.1,0.9-4.2,2.5-5.6c2.5-2.3,6-3.3,9.3-4c3.9-0.8,7.8-1.4,11.7-1.7 c16.1-1.3,32.2,0.9,47.8,5c15.9,4.2,31.2,10.1,46.3,16.4c15.5,6.5,30.9,13.1,46.2,20c26.3,11.9,53.7,24.3,74.3,45.1 c10,10.1,17.6,22,24.3,34.6c7.2,13.9,13.5,28.2,18.9,42.9c11.5,30.7,20.9,62.1,30.4,93.4c5,16.3,10.1,32.4,15.6,48.5 c4.2,12.3,8.6,26,3.6,38.7c-3.6,9.2-11.3,16.4-19.5,21.4c-8.8,5.4-18.7,8.8-29,10.1c-4.8,0.6-9.7,0.6-14.5-0.1 c-5.9-0.9-11.3-3.6-15.6-7.7c-5.1-4.8-9-10.7-13-16.4c-4.5-6.4-8.9-13-13.1-19.7c-16.8-26.7-30.8-55-43.5-83.8 c-6.3-14.4-12.3-29-18.2-43.6c-5.8-14.5-12.4-30.1-10.8-46c0.1-0.9,0.2-1.9,0.4-2.8c0.3-1,0.5-1.9,0.6-3l-0.7-0.3 c-0.6-0.5-0.9-0.4-0.9,0.2c-0.2,0.3-0.4,0.7-0.5,1c-0.4,0.7-1,1.2-1.6,1.7c-3,2.1-7.4,1.4-10.8,1c-2.5-0.3-2.5,3.7,0,3.9 c3.9,0.4,7.9,0.7,11.5-0.9c2.7-1.2,5.8-4.4,5.6-7.6c-0.1-1.8-1.6-3.1-3.4-3c-0.8,0.1-1.6,0.4-2.2,1c-1.1,1.3-1.2,3.5-1.5,5.2 c-0.4,2.2-0.6,4.3-0.7,6.5c-0.5,16.8,6.7,32.9,12.8,48.2c12.5,31.1,25.8,62,41.6,91.6c7.9,14.8,16.4,29.2,25.7,43.2 c4.4,6.6,8.9,13.1,13.8,19.4c4.3,5.5,9.3,10.6,15.8,13.4c9.8,4.2,21.7,3.3,31.7,0.9c10.9-2.5,21.1-7.4,29.8-14.5 c7.9-6.5,14.3-15.5,15.8-25.8c1.1-7.3-0.1-14.8-2-21.9c-2.3-8.6-5.5-17-8.3-25.4c-11.2-34-20.8-68.6-32.3-102.5 c-5.3-15.6-10.8-31-17.3-46.2c-6.4-14.8-13.5-29.7-22.8-42.9c-8.3-11.6-18.4-21.7-29.9-30c-12.3-9-25.9-16-39.7-22.6 c-16.3-7.9-32.8-15.1-49.4-22.2c-16-6.8-32-13.7-48.5-19.1c-16.4-5.4-33.4-9.3-50.6-9.8c-8.4-0.2-16.7,0.5-24.9,2.1 c-5.9,1.2-12.6,3.8-14.5,10.2c-1.6,5.1,1,10.1,6.4,11C357.3,486.8,358.4,483,355.9,482.5L355.9,482.5z"/> +<path d="M612.1,845c9.9,16.5,19.9,33,30,49.4c2.5,4.1,5.1,8.3,7.7,12.4c2.1,3.4,4.1,7.2,7,10.1c3,3,7.2,4.4,11.4,3.8 c4.1-0.4,8.2-1.2,12.2-2.4c16.1-4.6,30.4-13.8,43.9-23.6c3.3-2.4,6.6-4.9,9.9-7.4c3.2-2.5,6.5-4.9,9-8.2c4.4-5.6,6.4-14.7,0.5-20 c-2.9-2.6-6.7-3.5-10.5-3.9c-4.6-0.6-9.2-1-13.8-1.4l-7.1-0.7c-2-0.3-4-0.3-6.1,0c-3.4,0.7-5.9,3.9-4.8,7.4c0.4,1.5,1.6,2.6,3.1,2.9 c1.3,0.2,3-0.9,2.4-2.4c-5.7-15.2-12.6-29.9-20.8-44c-1.3-2.2-4.7-0.2-3.4,2c8,13.7,14.8,28.1,20.4,43l2.4-2.4 c-0.3-0.1-0.3,0-0.4-0.3c-0.1-0.4-0.1-0.7,0-1.1c0.4-1.1,1.9-1.3,2.9-1.4c2,0,3.9,0.1,5.9,0.4l6.6,0.7c4.1,0.4,8.3,0.7,12.4,1.3 c3.1,0.4,6.5,1.1,8.6,3.6c1.9,2.2,2.1,5.4,1.4,8.2c-0.9,3.3-2.8,6.3-5.4,8.5c-2.7,2.4-5.6,4.7-8.5,6.8c-3,2.3-6.1,4.5-9.1,6.8 c-12.2,8.8-25.1,17-39.6,21.3c-3.6,1.1-7.3,1.9-11.1,2.4c-1.9,0.3-3.8,0.3-5.7-0.1c-1.6-0.5-3-1.4-4.1-2.6c-2.6-2.6-4.3-6.1-6.3-9.2 c-2.3-3.7-4.6-7.4-6.9-11.1c-9.2-14.9-18.3-29.9-27.3-44.9c-1.1-1.9-2.3-3.8-3.4-5.7C614.3,840.9,610.8,842.9,612.1,845L612.1,845z"/> +<path d="M492,624.2c-22.8-0.5-45.8-1.1-68.1-6c-22.1-4.9-44.7-14.5-58.8-32.9c-7.7-10-12.6-22.4-12.7-35.1c0-5.1,0.5-11,3.9-15.1 c1.5-1.9,3.8-3,6.3-3c2.9,0.1,5.5,1.8,7.6,3.7c4.9,4.3,8.7,10,12.3,15.3c3.6,5.4,6.9,11,9.9,16.7c5.9,11.7,10.4,24.1,13.2,36.9 c5.7,26.4,3.8,53.4-3.2,79.3c-7,25.9-18.4,50.3-29.6,74.6c-2.9,6.2-5.7,12.4-8.5,18.7c-2.6,5.9-4.7,11.9-6.8,18 c-4,11.9-8.3,24.7-17.6,33.6c-16.4,15.6-43.1,10.9-59.5-2.2c-8-6.2-13.5-15.1-15.5-25.1c-1-5.3-1-10.7-0.3-16c1-7.5,2.3-15,3.6-22.5 c2.7-15.2,5.7-30.4,9.1-45.5c2.9-13,5.9-25.8,11.5-37.9c2.4-5.4,5.4-10.5,8.9-15.2c1.5-2-1.9-4-3.4-2c-7.9,10.3-12.7,23-16.3,35.3 c-2,6.9-3.4,14-5,21.1c-1.7,7.8-3.4,15.7-4.9,23.6c-1.6,7.9-3,15.8-4.4,23.7c-1.3,7.4-2.8,15-3.4,22.5c-0.9,10.8,1.6,21.8,7.8,30.7 c6.4,9.2,16,15.7,26.4,19.6c10.6,4,22.7,5.1,33.5,1.5c26.6-9,30.3-40.3,40.2-62.6c11.3-25.5,23.9-50.4,32.9-76.8 c9.1-26.6,14-54.6,10.6-82.7c-3.3-27.1-13.6-53-29.9-74.9c-3.8-5.1-7.8-10.9-13.6-13.8c-5.4-2.7-11.5-1.6-15.3,3.2 c-3.6,4.6-4.5,10.6-4.6,16.3c-0.1,6.6,1,13.1,3.2,19.4c8.3,24.1,29.6,40,52.9,48.3c22.6,8.1,47,10.1,70.7,10.9 c5.6,0.2,11.1,0.3,16.7,0.4C494.5,628.2,494.5,624.2,492,624.2L492,624.2z"/> +<path d="M261.4,806c-3.4,10.1-6.9,20.2-10.5,30.2c-1.8,5-3.7,9.9-5.6,14.8c-1.9,4.9-4,9.8-5.8,14.8c-1.6,4.4-2.4,9-1.2,13.7 c1.2,4.2,3.5,8,6.6,11c3.2,3.2,7.2,5.6,11.1,7.9c4.1,2.4,8.4,4.5,12.8,6.3c13.3,5.6,27.5,8.5,41.8,10.2c5,0.7,10,0.9,15,0.8 c4.4-0.2,9.4-0.7,13-3.3c3.9-2.8,4.6-7.3,3.1-11.6c-1.6-4.3-3.9-8.3-6.8-11.9c-1.4-1.8-3-3.6-4.6-5.3c-1.4-1.6-2.9-3.8-5.1-4.3 c-1.8-0.4-3.5,0.8-3.9,2.6c-0.1,0.5-0.1,1,0,1.4c0.5,2,3.1,1.7,3.8,0c7.3-18.6,14.6-37.3,22-55.9c0.3-1-0.3-2.1-1.4-2.4 c-1-0.3-2.1,0.3-2.4,1.4l-22,55.9h3.8l0-0.1l-0.3,1c0.2-0.3-0.4,0.1-0.4,0c0,0,0.4,0.3,0.4,0.3c0.3,0.4,0.6,0.7,1,1 c0.7,0.8,1.4,1.6,2.2,2.4c1.4,1.6,2.8,3.2,4.1,4.9c2.6,3.1,4.6,6.7,6,10.6c0.3,0.9,0.5,1.8,0.5,2.7c0,0.2,0,0.4,0,0.6 c0,0.1,0,0.2,0,0.3c0-0.1,0.1-0.4,0,0c-0.1,0.4-0.2,0.8-0.3,1.2c0,0,0.1-0.2-0.1,0.2c-0.1,0.2-0.2,0.4-0.3,0.5 c0,0.1-0.3,0.5-0.2,0.3c0.1-0.2-0.2,0.2-0.3,0.3c-0.1,0.1-0.3,0.3-0.4,0.4c-0.1,0.1-0.6,0.5-0.2,0.2c-0.3,0.2-0.7,0.5-1,0.7 c-0.7,0.4-1.5,0.8-2.2,1c-1,0.3-1.9,0.5-2.9,0.7c-2.1,0.4-4.2,0.6-6.4,0.6c-4.6,0-9.2-0.2-13.8-0.8c-1.1-0.1-1.8-0.2-2.7-0.4 c-0.8-0.1-1.7-0.2-2.5-0.4c-2.3-0.4-4.7-0.7-7-1.1c-8.7-1.5-17.2-3.9-25.5-7.1c-4.1-1.6-8-3.4-11.8-5.5c-3.8-2-7.4-4.2-10.9-6.8 c-3.1-2.3-5.5-5.4-7.1-8.9c-1.5-3.8-1.6-8.1-0.3-12c1.4-4.5,3.4-8.9,5.1-13.3c1.8-4.6,3.6-9.3,5.3-13.9c3.4-9.1,6.7-18.3,9.8-27.6 c0.8-2.3,1.6-4.6,2.3-6.9C266,804.6,262.2,803.6,261.4,806L261.4,806z"/> +<path class="st0" d="M867.6,298.2H542.8c-4.3,0-8.8-0.4-13.1,0c-0.2,0-0.4,0-0.6,0l1.9,2.5l4.1-22.8l9.9-54.4l12-65.8l10.4-56.9 l3.4-18.8c0.5-2.9,1.5-5.9,1.6-8.9c0-0.1,0-0.3,0.1-0.4l-1.9,1.4h324.8c4.3,0,8.8,0.4,13.1,0c0.2,0,0.4,0,0.6,0l-1.9-2.5L903,94.4 l-9.9,54.4l-12,65.8l-10.4,56.9c-1.7,9.2-3.5,18.4-5,27.6c0,0.1,0,0.3-0.1,0.4c-0.5,2.5,3.3,3.5,3.8,1l4.1-22.8l9.9-54.4l12-65.8 l10.4-56.9c1.7-9.2,3.5-18.4,5-27.7c0-0.1,0-0.3,0.1-0.4c0.3-1.1-0.4-2.2-1.4-2.4c-0.1,0-0.3-0.1-0.5-0.1H584.3 c-4.4,0-8.8-0.1-13.1,0c-0.2,0-0.4,0-0.6,0c-0.9,0-1.7,0.6-1.9,1.4l-4.1,22.8l-9.9,54.4l-12,65.8l-10.4,56.9 c-1.7,9.2-3.5,18.4-5,27.6c0,0.1,0,0.3-0.1,0.4c-0.3,1.1,0.4,2.2,1.4,2.4c0.1,0,0.3,0.1,0.5,0.1h324.8c4.4,0,8.8,0.1,13.1,0 c0.2,0,0.4,0,0.6,0C870.1,302.1,870.1,298.2,867.6,298.2z"/> +<path class="st0" d="M892.8,71.7l-1.1,6.1l-3,16.6l-4.5,24.5l-5.4,29.9l-6,32.8l-6,33.1l-5.6,30.8l-4.8,26.1l-3.4,18.7 c-0.5,3-1.1,5.9-1.6,8.9c0,0.1,0,0.3-0.1,0.4c-0.3,1.1,0.4,2.2,1.4,2.4c0.1,0,0.3,0.1,0.5,0.1h14.3c0.9,0,1.7-0.6,1.9-1.4l1.1-6.1 l3-16.6l4.5-24.5l5.4-29.9l6-32.8l6-33.1l5.6-30.8l4.8-26.1l3.4-18.7c0.5-3,1.1-5.9,1.6-8.9c0-0.1,0-0.3,0.1-0.4 c0.3-1.1-0.4-2.2-1.4-2.4c-0.1,0-0.3-0.1-0.5-0.1h-14.3c-2.5,0-2.5,3.9,0,3.9h14.3l-1.9-2.5l-1.1,6.1l-3,16.6l-4.5,24.5l-5.4,29.9 l-6,32.8l-6,33.1l-5.6,30.8l-4.8,26.1l-3.4,18.7c-0.5,2.9-1.3,5.9-1.6,8.9c0,0.1,0,0.3-0.1,0.4l1.9-1.4h-14.3l1.9,2.5l1.1-6.1 l3-16.6l4.5-24.5l5.4-29.9l6-32.8l6-33.1l5.6-30.8l4.8-26.1l3.4-18.7c0.5-3,1.1-5.9,1.6-8.9c0-0.1,0-0.3,0.1-0.4 C897.1,70.2,893.3,69.2,892.8,71.7z"/> +<path class="st0" d="M769.1,474.8c-7.1,0.1-14.1-1.3-20.6-4.2c-6.5-3-11.8-8-15.2-14.3c-3.3-6.2-4.6-13.3-4.9-20.2 c-0.3-7.6,0.4-15.3,1.9-22.8c0.9-4.3,1.9-8.5,2.7-12.7c0.8-4.1,1.8-8.6,1.2-12.7c-0.4-3.6-2.3-6.9-5.2-9c-2.3-1.7-5.1-2.2-7.9-2.6 c-2.7-0.4-5.5-0.6-7.9-2c-2.2-1.2-4-3-5.2-5.3c-2.4-4.5-2.5-9.9-0.3-14.5c2.9-5.9,9.3-8.4,15.6-8.7c6.7-0.1,13.3,1.7,19.1,5.2 c12.5,7.8,19.8,21.7,19.2,36.4c-0.3,6.6-3.7,12-6.7,17.7c-2.8,5.3-5.7,11.4-4.2,17.5c0.9,3.5,3.7,6.2,4.5,9.7 c0.2,0.8,0.2,1.6-0.1,2.4c-0.1,0.3-0.5,1.2-0.9,1.2c-1-0.2,0.5-1.8,0.7-2.1c3.8-5.8,8.8-10.8,14.5-14.7c12.7-8.7,28-12.1,43.3-11.7 c8.7,0.3,17.4,1.5,25.8,3.6c4.2,1,8.4,2.1,12.5,3.4c3.4,1,6.8,2.3,10,3.9c3.1,1.6,5.8,3.8,7.9,6.4c1,1.3,3.5,0.2,3.4-1.4 c-0.3-3.8-3-6.2-6.2-7.9c-2.8-1.5-6.6-2.8-8.2-5.8c-1.3-2.4-0.9-5.7-0.8-8.3c0.1-3.2,0.3-6.4,0.6-9.6c0.3-3,0.8-6,1.4-9 c0.3-1.7,0.8-3.6,2.6-4.1c2.8-0.8,4.1,1.7,5.5,3.6c1.4,1.9,3.6,3.9,3.6,6.5c0,1.1,0.9,2,2,2c0.3,0,0.6-0.1,0.9-0.3 c6.1-3.1,12.4-5.9,19.1-7c6.7-1.1,13.5-0.4,20.1,0.8c1.1,0.3,2.2-0.4,2.4-1.5c0.1-0.3,0.1-0.7,0-1c-0.2-1.5,0-3,0.7-4.4 c0.6-1.7,1.6-3.2,2.8-4.5c1-1,1.9,0.7,2.5,1.5c1.1,1.4,2.1,2.9,3,4.5c3.5,6,5.7,12.7,6.3,19.6c1.1,13.3-3.8,26.3-13.3,35.7 c-5.1,5.1-11.4,8.9-18.4,11.1l2.2,2.9c0.8-1.2,1.7-2.3,2.8-3.2c0.1-0.1,0.8-0.5,0.7-0.4c0.1,0,0.6-0.2,0.4-0.1c-0.2,0,0.3,0,0.5,0 c0.3,0-0.1,0,0.3,0c0.2,0.1,0.5,0.1,0.7,0.2c0.5,0.2,0.9,0.3,1.4,0.5c1.4,0.6,2.8,1.3,4.1,1.9c6,2.7,12.4,4.5,18.7,6.2 c3.3,0.9,6.7,1.8,10,2.8c3,0.9,6,1.9,9.1,2.7c7.1,1.8,16.7,2.1,21.2,8.8c1,1.4,1.6,3,1.6,4.7c-0.1,0.8-0.4,1.1-1.2,1.5 c-1.4,0.7-3.8,0.6-5.5,1.4c-2.3,1.1-0.3,4.5,2,3.4c-0.5,0.3,0.3,0,0.5-0.1c0.5-0.1,1-0.2,1.5-0.2c1.1-0.2,2.3-0.5,3.3-1 c2.8-1.3,3.7-4,3.2-6.9c-1.7-8.8-11.1-12.3-18.8-13.9c-4.1-0.9-8.2-1.9-12.2-3.1c-3.4-1-6.9-2-10.4-2.9c-6.9-1.9-14.1-3.6-20.6-6.6 c-2.8-1.2-6-3.5-9.1-3.4c-3.5,0-5.7,3.2-7.6,5.8c-1,1.4,0.7,3.4,2.2,2.9c14.5-4.6,26.1-15.5,31.6-29.6c5.7-14.8,3.3-32.3-5.7-45.3 c-1.2-1.7-2.5-4-4.4-5c-2-1-4.4-0.5-5.8,1.3c-2.8,3.4-5.2,7.9-4.2,12.4l2.4-2.4c-7.2-1.3-14.5-2-21.8-0.9c-7.2,1.2-14,4.1-20.4,7.5 l3,1.7c0-2.9-1.5-5.1-3.2-7.3c-1.6-2.1-2.8-4.6-5.2-5.9c-2.5-1.4-5.6-1.2-8,0.5c-1.9,1.5-2.7,3.7-3.1,6c-1.3,7-2.1,14.1-2.1,21.2 c0,3,0,6.3,1.7,9c1.2,1.7,2.8,3.1,4.6,4.2c1.9,1.2,4,2.1,5.9,3.3c1.4,0.9,2.8,2,3,3.8l3.4-1.4c-4.9-6.2-12.5-9.2-19.8-11.4 c-8.9-2.8-18.1-4.9-27.4-6.2c-17-2.2-34.5-0.7-50,7.1c-6.8,3.4-12.9,8-17.9,13.6c-1.3,1.4-2.5,2.9-3.6,4.5c-1,1.4-2.4,3.1-2.9,4.8 c-0.6,2.3,0.9,4.7,3.2,5.3c1,0.2,2.1,0.1,3-0.4c4-2.3,3.1-7.9,1.4-11.3c-0.8-1.7-2.1-3.2-2.8-5c-0.5-1.4-0.8-2.9-0.7-4.3 c0.2-3.1,1.1-6.1,2.6-8.8c1.5-3.1,3.3-6,4.8-9.1c1.6-2.9,2.8-6.1,3.5-9.4c0.7-4.4,0.6-8.8-0.2-13.2c-1.3-7.9-4.8-15.2-9.9-21.3 c-9-10.9-24.8-18.1-38.9-13.9c-6.6,1.8-11.8,7.1-13.4,13.7c-1.4,5.8-0.1,12,3.5,16.8c1.9,2.4,4.4,4.2,7.3,5.2c2.8,1,5.9,1.1,8.9,1.7 c3,0.6,5.4,1.9,6.9,4.7c2,4.1,0.9,8.9,0.1,13.2c-1.6,8.9-4.1,17.6-4.8,26.7c-0.8,7.7-0.3,15.4,1.3,23c1.7,7.6,5.6,14.5,11.2,19.8 c5.8,5.1,12.8,8.5,20.4,9.9c3.8,0.7,7.6,1.1,11.4,1.1C771.6,478.7,771.6,474.8,769.1,474.8L769.1,474.8z"/> +<path class="st0" d="M780.6,428.6c10.2-6.7,22.9-8.7,34.7-5.5c5.7,1.6,11.5,4.4,14.5,9.6c2.7,4.8,3,10.6,0.9,15.7 c-2.5,5.7-7.5,9.9-13.5,11.5l2.2,0.9l-0.1-0.2l0.3,1l0-0.2l-0.3,1c-0.2,0.1-0.2,0.2-0.1,0.1c0.3,0,0.5,0,0.8-0.1 c0.6,0,1.2-0.1,1.7-0.1l3.7-0.3l6.9-0.6c2.3-0.2,4.6-0.4,6.9-0.6c2-0.3,4.1-0.2,6.2,0.2c3.1,1,5.7,3.2,7.1,6.2 c1.1,2.1,2.2,6.2-0.5,7.7c-2.2,1.2-0.2,4.6,2,3.4c3.2-1.7,3.9-5.6,3.3-8.9c-0.7-3.5-2.5-6.7-5.2-9c-2.9-2.5-6.7-3.8-10.6-3.6 c-4.7,0.2-9.5,0.8-14.2,1.1c-2.6,0.2-5.3,0.4-7.9,0.7c-2.2,0.2-4.8,1.7-3.5,4.3c0.5,0.8,1.4,1.1,2.2,0.9c6.2-1.7,11.5-5.6,14.8-11.1 c3.3-5.6,3.9-12.3,1.7-18.4c-4.9-13.3-21.8-17.4-34.5-16.3c-7.7,0.6-15.2,3.2-21.7,7.4C776.5,426.6,778.5,430,780.6,428.6 L780.6,428.6z"/> +<path class="st0" d="M872.5,404.4c4.2-0.3,8.5,0.6,12.2,2.6c0.9,0.5,2.1,0.2,2.7-0.8c0.5-0.9,0.2-2.1-0.7-2.6 c-4.3-2.4-9.3-3.5-14.2-3.2c-1.1,0-1.9,0.9-2,2C870.5,403.5,871.4,404.4,872.5,404.4z"/> +<path class="st0" d="M919.4,395.3c-3-0.4-6.1,0.3-8.7,1.9c-1.2,0.7-2.4,1.5-3.5,2.3c-1.3,0.8-2.4,1.9-3.1,3.2 c-0.5,0.9-0.2,2.1,0.7,2.7c0.9,0.5,2.1,0.2,2.7-0.7c0.4-0.8,1.1-1.5,1.8-1.9c1-0.7,2-1.4,3.1-2c0.5-0.3,1-0.6,1.6-0.8 c0.4-0.2,0.9-0.4,1.4-0.5c0.3-0.1,0.6-0.2,0.8-0.2c0.1,0,0.3-0.1,0.4-0.1c0.3-0.1-0.2,0,0.1,0c0.3,0,0.6,0,0.9,0c0.1,0,0.3,0,0.4,0 c0,0,0.5,0.1,0.2,0c1,0.3,2.1-0.3,2.4-1.4C921,396.7,920.4,395.6,919.4,395.3L919.4,395.3z"/> +<path class="st0" d="M899.5,411.6c-0.3,0.7-0.9,1.1-1.6,1.3c-1.1,0.2-2.2,0.2-3.3,0c-1-0.3-2.1,0.3-2.4,1.4c-0.3,1,0.3,2.1,1.4,2.4 l1.9,0.6c0.1,0,0.9,0.2,0.9,0.3l-0.3-0.2c-0.2-0.7-0.3-0.8-0.3-0.4c0,0.7,0.2,1.3,0.4,1.9c0.1,0.4,0.2,0.8,0.1,1.2 c-0.2,1.2-1.3,2-2.5,1.8c-0.2,0-0.4-0.1-0.5-0.2l-1.5,3.6c1.7,0.6,3.5,0.8,5.3,0.6c0.9-0.2,1.8-0.6,2.5-1.2c0.1,0,0.6-0.5,0.4-0.4 c0.3,0,0.7,0.1,1,0.1c3.6,0.4,7.7-0.7,9.3-4.1c0.5-0.9,0.2-2.1-0.7-2.7c-0.9-0.5-2.1-0.2-2.7,0.7c-0.5,1-1.3,1.7-2.3,2 c-1.4,0.3-2.9,0.3-4.4,0.1c-0.8-0.1-1.7,0.2-2.4,0.7c-0.4,0.4-0.9,0.7-1.3,1c-1.1,0.4-2.5-0.1-3.7-0.5c-2.1-0.6-3.6,2.6-1.5,3.6 c3,1.6,6.7,0.4,8.3-2.6c0.2-0.4,0.4-0.9,0.5-1.3c0.2-0.9,0.2-1.9,0-2.8c-0.1-0.4-0.2-0.8-0.4-1.3c-0.1-0.4-0.1-0.8-0.2-1.2 c-0.6-2.1-3.2-2.5-5-3l-1,3.8c1.8,0.4,3.7,0.4,5.6,0c1.6-0.4,2.9-1.5,3.7-3c0.5-0.9,0.2-2.1-0.7-2.7 C901.3,410.4,900.1,410.7,899.5,411.6L899.5,411.6z"/> +<path d="M475.8,390.4c-14.9,5.4-30.3,9-46,10.7c-7.6,0.8-15.4,1.6-23.1,1c-6.6-0.5-13.4-2.7-17.6-8.1c-9.8-12.8,7.4-25.5,16.7-31.7 c6.6-4.4,13.5-8.5,20.2-12.8c6.9-4.3,13.8-8.8,20.9-12.9c6.8-4.1,13.9-7.7,21.2-10.8c3.7-1.6,7.4-3,11.2-4.2c1.6-0.5,6.1-2,6.6,0.8 c0,0.3,0,0.6,0,0.9c-0.1,0.1-0.3,0.3-0.1,0.3l1.4,3.4c4-3.6,8.9-6,14.2-7.1c1.8-0.4,4.6-0.9,6,0.6c0.3,0.4,0.5,0.9,0.4,1.5 c0,0.2-0.1,0.5-0.2,0.7c-0.1,0.2-0.1,0.2-0.3,0.2l1.4,3.4c7.6-6.7,18.8-8,28.1-4.7c4.6,1.6,9.8,4.9,12,9.4c2.7,5.6-3.8,9.1-8.4,9.6 c-3.5,0.4-7.1,0-10.6,0.4c-2.8,0.3-5.6,1-8.2,2.1c-5.4,2.2-10.2,6-13.6,10.7c-3.4,4.8-5.5,11.9-0.7,16.6c1.9,1.9,4.7,2.7,7.3,2.1 c2.5-0.5,4.7-1.9,7-2.9c2-0.9,4.4-1.5,6.1,0.1c1.5,1.6,2.1,3.7,1.8,5.9c-0.8,6-8.1,8.3-13.1,9.6l2.4,1.4c-0.3-0.9,0.1-1.9,1-2.3 c2.2-1.2,0.2-4.6-2-3.4c-2.4,1.3-3.5,4.1-2.8,6.7c0.3,1,1.4,1.6,2.4,1.4c5.5-1.5,12.1-3.9,14.8-9.4c2.1-4.3,1.4-10.3-2.6-13.3 c-2.2-1.5-4.9-1.9-7.4-1.1c-2.6,0.7-4.9,2.3-7.5,3.2c-2.4,0.8-4.7,0.1-5.8-2.3c-1.3-2.7-0.3-5.7,1.1-8.2c3.1-5.3,7.9-9.3,13.6-11.4 c2.9-1.1,5.9-1.7,9-1.7c3.7-0.1,7.4,0.1,11-0.7c5.8-1.4,11.3-6.7,9.3-13.1c-1.7-5.4-7.2-9.6-12.2-11.9c-11.3-5.3-25.5-3.8-34.9,4.5 c-0.8,0.8-0.7,2.1,0.1,2.8c0.4,0.3,0.8,0.5,1.3,0.6c3.2-0.1,4.8-3.2,4.3-6.1c-0.5-3.4-3.9-5-7.1-5c-3.6-0.1-7.4,1.2-10.7,2.6 c-3.5,1.5-6.7,3.5-9.5,6.1c-1.2,1.1-0.3,3.6,1.4,3.4c3.2-0.4,4.6-3.3,3.9-6.3c-0.9-3.6-4.7-4.7-8-4.2c-2,0.4-3.9,0.9-5.8,1.7 c-2.1,0.7-4.1,1.5-6.2,2.3c-4,1.6-8,3.4-11.9,5.3c-7.9,3.9-15.4,8.5-22.8,13.1c-7.7,4.8-15.3,9.6-23,14.4 c-7.1,4.4-14.2,8.9-20.2,14.6c-5.4,5.1-10.3,12.1-9.2,19.9c1,6.8,6.5,12.4,12.6,15.1c7.7,3.3,16.4,3,24.6,2.5 c17.3-1,34.4-4.2,50.9-9.6c2.1-0.7,4.2-1.4,6.2-2.1C479.2,393.4,478.1,389.6,475.8,390.4L475.8,390.4z"/> +</svg> \ No newline at end of file diff --git a/mintlify-docs/logo/hero-light.svg b/mintlify-docs/logo/hero-light.svg new file mode 100644 index 0000000000..73e417c1f7 --- /dev/null +++ b/mintlify-docs/logo/hero-light.svg @@ -0,0 +1,29 @@ +<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" id="artwork" x="0px" y="0px" viewBox="0 0 1000 1000" style="enable-background:new 0 0 1000 1000;" xml:space="preserve"> +<style type="text/css"> + .st0{fill:#61D790;} +</style> +<path class="st0" d="M939.2,944.4H726.9c-1.3,0-2.7,0-4,0c-7.1,0-14.1-2.2-19.9-6.3c-9.4-6.7-14.1-17.5-14.1-28.9v-308 c0-0.7,0-1.5,0-2.2c0-18.7,15.1-33.8,33.8-33.8c0,0,0,0,0,0c0.9,0,1.9,0,2.8,0h201.1c4.1,0,8.3,0,12.4,0 c17.3,0.1,32.4,13.4,33.8,30.9c0.3,3.2,0.1,6.4,0.1,9.5v297.3c0,2.6,0,5.1,0,7.7C972.8,929.2,957.8,944.2,939.2,944.4 c-2.5,0-2.5,4,0,3.9c20-0.2,36.5-15.9,37.7-35.9c0.2-2.9,0-5.9,0-8.8V605c0.1-4,0-8.1-0.5-12.1c-2.8-18.2-19.1-31.6-37.3-31.7 c-3.9,0-7.8,0-11.7,0H725.3c-1.2,0-2.4,0-3.6,0c-9.6,0.3-18.8,4.2-25.6,11c-7.7,7.7-11.1,17.8-11.1,28.5v308.7 c0,12.5,5.3,24,15.4,31.5c6.5,4.8,14.4,7.3,22.4,7.3c1.2,0,2.4,0,3.6,0h212.7C941.8,948.3,941.8,944.4,939.2,944.4z"/> +<path class="st0" d="M687,647.6h276.3c3.7,0,7.5,0.1,11.2,0c0.2,0,0.3,0,0.5,0c2.5,0,2.5-3.9,0-3.9H698.7c-3.7,0-7.5-0.1-11.2,0 c-0.2,0-0.3,0-0.5,0C684.4,643.6,684.4,647.6,687,647.6L687,647.6z"/> +<path class="st0" d="M687,756.7h276.3c3.7,0,7.5,0.1,11.2,0c0.2,0,0.3,0,0.5,0c2.5,0,2.5-3.9,0-3.9H698.7c-3.7,0-7.5-0.1-11.2,0 c-0.2,0-0.3,0-0.5,0C684.4,752.8,684.4,756.7,687,756.7L687,756.7z"/> +<path class="st0" d="M687,865.9h276.3c3.7,0,7.5,0.1,11.2,0c0.2,0,0.3,0,0.5,0c2.5,0,2.5-3.9,0-3.9H698.7c-3.7,0-7.5-0.1-11.2,0 c-0.2,0-0.3,0-0.5,0C684.4,862,684.4,865.9,687,865.9L687,865.9z"/> +<path d="M258.7,241.1c-7.3-29-14.1-58.2-20.2-87.4c-1.5-7-3.1-14.1-4.2-21.2c-1.1-6.8-1.5-13.9,0.4-20.7 c3.1-11.3,12.5-19.1,22.2-24.8c11.2-6.6,24-12.3,37.1-12.8c11.1-0.4,23.3,3.4,29.9,12.9c6.2,8.8,7.2,20.1,9.6,30.3 c1,5.3,2.8,10.4,5.3,15.2c1.3,2.4,2.9,4.7,4.8,6.7c1.8,2,4.1,4.1,3.4,7c-0.3,1.6-1,3.1-1.5,4.7c-0.4,1.8-0.8,3.6-0.9,5.4 c-0.3,3.4-0.2,6.9,0.5,10.3c1.2,6.6,4.7,12.3,8.2,17.9c3.3,5.4,6.9,11.1,7.2,17.6c0.3,5.3-2.2,10.6-7.4,12.5c-2,0.8-4.2,0.7-6.1-0.2 c-1-0.5-2.2-0.2-2.7,0.8c-0.2,0.3-0.2,0.6-0.2,0.9c-0.8,8.9-8.7,15.4-17.6,14.6c-4.2-0.4-8-2.4-10.7-5.5c-0.6-0.6-1.6-0.8-2.4-0.3 c-8.1,6.2-20.2,1.8-25.7-6c-7.5-10.8-6.3-24.8-5.2-37.2c0.7-7.2,1.4-14.5,0.6-21.8c-0.7-6.2-2.3-12.9-7-17.4c-4.1-4-11-5.8-16.1-2.5 c-4.5,2.9-5.9,8.1-6,13.1c-0.2,5.5,1.4,11,0.4,16.4c-0.7,4.2-2.8,9.7-7.9,9.4c-2.5-0.1-2.5,3.8,0,3.9c5.1,0.3,8.6-3.4,10.4-7.8 c1.9-4.7,1.8-9.5,1.3-14.4c-0.5-4.6-1.1-10.2,1.2-14.5c2.6-4.8,8.7-4.8,12.8-1.8c5,3.8,6.4,10.9,7,16.7c0.7,7.5-0.2,15.1-0.9,22.5 c-1.1,12.5-1.6,26.5,5.9,37.2c6.5,9.4,21.1,15,31.1,7.4l-2.4-0.3c7.4,8.3,20.1,9.1,28.4,1.7c3.9-3.5,6.3-8.3,6.7-13.5l-3,1.7 c5.1,2.7,11.1,0.8,15-3.1c4.6-4.6,5.3-11.3,3.8-17.4c-1.6-6.9-5.7-12.7-9.3-18.6c-3.6-6-6.1-12.3-6-19.4c0.1-3.6,0.7-7.1,1.9-10.4 c0.6-1.5,0.8-3.2,0.7-4.8c-0.2-1.6-0.9-3.2-1.9-4.5c-1.7-2.3-3.9-4.3-5.5-6.7c-1.5-2.3-2.7-4.7-3.6-7.3c-3.9-10.7-4.3-22.3-8.5-32.8 c-2-5.4-5.3-10.2-9.7-13.9c-4.8-3.9-10.5-6.6-16.5-7.8c-13.6-2.8-27.2,1.1-39.5,6.8c-11.1,5.1-22.4,11.7-29.6,21.9 c-3.8,5.5-6,11.9-6.5,18.6c-0.6,7.2,0.6,14.4,2,21.5c6,30.6,13,61,20.5,91.2c0.9,3.7,1.8,7.3,2.8,11 C255.5,244.6,259.3,243.5,258.7,241.1L258.7,241.1z"/> +<path d="M316.5,133.5c3,2.6,7.2,3.5,11,2.4c1-0.3,1.6-1.4,1.3-2.4c-0.3-1-1.4-1.6-2.4-1.4c-0.4,0.1-0.7,0.2-1.1,0.3 c-0.1,0-0.2,0-0.3,0c0.1,0,0.4,0,0.1,0c-0.2,0-0.4,0-0.6,0c-0.4,0-0.7,0-1.1,0c-0.2,0-0.4-0.1-0.6-0.1c-0.6,0-1.1,0.4-1.1,1 c0,0.6,0.4,1.1,1,1.1c0.6,0,1.1-0.4,1.1-1c0-0.5-0.4-1-0.9-1.1c-0.1,0-0.2,0-0.3-0.1c-0.4-0.1-0.7-0.2-1.1-0.3 c-0.2-0.1-0.4-0.1-0.5-0.2c-0.3-0.1,0.3,0.2,0.1,0c-0.1,0-0.2-0.1-0.3-0.1c-0.3-0.2-0.7-0.3-1-0.5c-0.2-0.1-0.3-0.2-0.5-0.3 c0,0-0.3-0.2-0.1-0.1c0.2,0.2-0.1-0.1-0.1-0.1c-0.8-0.8-2-0.8-2.8,0S315.8,132.7,316.5,133.5C316.5,133.5,316.5,133.5,316.5,133.5 L316.5,133.5z"/> +<path d="M319.6,159c1.7,5.2,6.4,8.9,11.8,9.4c1.1,0,2-0.9,2-2c0-1.1-0.9-1.9-2-2c-0.1,0-0.6,0,0,0c-0.1,0-0.3,0-0.4-0.1 c-0.2,0-0.5-0.1-0.7-0.2c-0.5-0.1-1-0.3-1.5-0.5c0.4,0.3,0.1,0-0.1,0c-0.1-0.1-0.3-0.1-0.4-0.2c-0.2-0.1-0.4-0.2-0.6-0.3 c-0.2-0.1-0.5-0.3-0.7-0.5c-0.1-0.1-0.2-0.1-0.3-0.2c-0.1,0-0.1-0.1-0.2-0.1c0.2,0.1,0.2,0.2,0.1,0.1c-0.4-0.3-0.8-0.7-1.2-1.1 c-0.2-0.2-0.4-0.4-0.5-0.6c-0.1-0.1-0.4-0.4,0-0.1c-0.1-0.1-0.1-0.2-0.2-0.3c-0.3-0.4-0.6-0.9-0.8-1.3c-0.1-0.1-0.1-0.2-0.2-0.3 c0-0.1-0.1-0.1-0.1-0.2c-0.1-0.1,0-0.1,0.1,0.1c-0.1-0.3-0.2-0.6-0.3-0.8c-0.3-1.1-1.4-1.7-2.4-1.4S319.3,157.9,319.6,159 C319.6,159,319.6,159,319.6,159L319.6,159z"/> +<path d="M218.8,114.3c1.3,4.5,1.7,9.3,2.6,13.9c0.4,2.3,1.4,4.1,3.7,5c2.3,0.9,4.4-0.1,6.5-0.9c10.8-3.9,21.7-7.9,32.5-11.8 c19.3-7,38.7-14.4,56.2-25.4c4.4-2.7,8.6-5.6,12.6-8.9c1.9-1.5,2.7-4.1,2-6.4c-0.7-2.7-1.7-5.3-2.8-7.9c-1-2.5-2.2-5.1-3.4-7.5 c-1.1-2.1-2.4-4.2-4.9-4.6c-2.2-0.4-4,0.8-5.9,1.7c-0.7,0.4-1.4,0.6-2.2,0.6c-1.2-0.2-2.4-0.7-3.4-1.3c-18.4-8.7-39.3-10.9-59.3-7.7 c-5,0.8-9.8,1.9-14.6,3.4c-4.6,1.4-9.6,2.9-13.5,5.8c-3.9,2.8-7,7.1-6.7,12.1c0.1,4.3,3.2,7.9,7.4,8.9c2,0.5,4,0.1,5.8-0.9 c0.9-0.6,1.2-1.8,0.7-2.7c-0.1-0.1-0.2-0.3-0.3-0.4c-3.3-2.9-6.6,1.5-8.1,4.1c-1.8,3.2-2.9,6.7-3.1,10.3c-0.2,3.6,0.5,7.3,1.8,10.7 c0.6,1.6,1.4,3.5,0.9,5.2c-0.3,1.2-1.1,2.3-2.4,2.2c-2.5-0.2-2.5,3.8,0,3.9c3.3,0.2,5.8-2.7,6.3-5.8c0.6-3.8-1.6-7.1-2.4-10.6 c-1-4.8-0.2-9.8,2.2-14c0.4-0.7,0.8-1.3,1.3-1.9c0.2-0.3,0.4-0.5,0.6-0.8c0.1-0.1,0.3-0.3,0.4-0.3c0.3-0.1,0.2-0.2-0.4-0.3l0.4-3.1 c-2.6,1.6-6.1-0.3-6.9-3c-1.2-4,1.5-8.1,4.7-10.3c3.6-2.5,8-3.9,12.2-5.2c4.5-1.4,9-2.5,13.6-3.3c18-3.2,36.6-1.4,53.7,5.4 c2.2,0.9,4.3,2,6.5,2.9c2,0.8,4.3,0.8,6.2-0.2c1.1-0.5,2.7-1.9,4-1.6c0.8,0.2,1.2,1,1.6,1.7c0.6,1.2,1.2,2.4,1.8,3.6 c1.1,2.3,2.1,4.7,2.9,7.1c0.4,1.1,0.8,2.2,1.1,3.3c0.3,0.9,0.9,2.2,0.4,3.2c-0.5,0.7-1.2,1.3-2,1.8c-1,0.7-2,1.5-3,2.2 c-1.9,1.4-3.8,2.7-5.8,4c-15.8,10.3-33.3,17.4-50.9,23.9c-9.6,3.6-19.2,7-28.8,10.5l-7.5,2.7l-3.5,1.3c-0.8,0.3-1.9,0.9-2.8,0.8 c-1.7-0.3-1.7-2.6-1.9-3.9c-0.4-2.2-0.7-4.5-1.1-6.7c-0.3-2-0.7-3.9-1.3-5.9C221.9,110.8,218.1,111.8,218.8,114.3L218.8,114.3z"/> +<path d="M327.4,263.8c7.4-11,22.9-12.4,34.5-8.1c14.6,5.5,25.6,18.5,35.4,30.1c10.9,13.1,20.8,27.1,29.5,41.7 c7.2,11.8,14.2,23.6,21.9,35c4.3,6.5,9.1,12.8,14.1,18.9c2.3,2.6,4.7,5.1,7.2,7.5c1.3,1.2,3,2.8,3.2,4.7c0.1,0.7-1.1,3.2-2.1,2.4 v2.8c8.2-7.2,20.1-8.6,30.4-11c11.3-2.6,22.6-5.6,33.7-9c10.9-3.4,21.7-7.2,32.3-11.5c5.4-2.2,10.8-4.5,16.2-7 c4.4-1.9,8.6-4.4,12.4-7.4c4-3.3,7.6-7,11.8-9.9c1.9-1.4,3.9-2.5,6.1-3.3c1.1-0.4,2.3-0.7,3.4-0.8c0.5,0,1,0.1,1.5,0.3 c0.2,0.1,0.3,0.2,0.5,0.3c0.1,0.1,0.2,0.1,0.3,0.2c0.3,0.4,0.5,0.1,0.4-0.9l2.4,3.1c7.6-5.9,19.1-9.5,28.5-6.2 c3.6,1.3,7.2,4.2,7,8.4c-0.2,2.9-2.4,7.1-5.9,6.5l0.9,3.3c5.9-6.5,16.3-9.2,22.9-2.2c2.9,2.8,3.8,7.1,2.4,10.9 c-1.5,3.8-5.2,6.1-9.3,5.9l1.7,3c2.5-3.5,7.3-4.3,11.3-3.2c4.6,1.2,7.8,5.1,9.6,9.2c2.2,4.9,3,10.3,2.1,15.6 c-1,4.8-4.3,9.6-9.5,10.3c-4.5,0.6-9.8-4.2-7.3-8.8l-3.6-1.5c-1.1,3.2-4.8,5.1-7.5,6.9c-3.1,2.1-6.1,4.1-9.2,6.1 c-6.2,4.2-12.5,8.3-18.8,12.4c-12.5,8.1-25.2,16-38.2,23.3c-25.8,14.6-52.9,27.2-81.6,34.9c-14.7,3.9-29.9,6.9-45.2,7 c-11.2,0.1-23.3-1.2-33.1-6.8c-7.3-4.2-14-9.5-20.5-14.9c-8-6.7-15.7-13.8-23.2-21.1c-13.3-12.7-26-26-38-40 c-0.9-1.1-1.8-2.1-2.6-3.2c-1.6-2-4.4,0.8-2.8,2.8c11,13.8,23.5,26.5,36.1,38.8c7.7,7.6,15.6,15,23.8,22.1 c6.9,6.1,14.2,11.7,21.8,16.8c2.8,1.9,5.8,3.5,9,4.8c6.1,2.5,12.9,3.5,19.4,4.2c15.7,1.5,31.7-0.9,47-4.3 c29.4-6.6,57.3-18.7,83.7-33c26.8-14.5,52.5-31.3,77.5-48.5c2.7-1.9,5.1-3.9,6.2-7.1c0.7-2.1-2.6-3.5-3.6-1.5 c-2.8,5.3,0.3,11.9,5.7,14.1c6,2.4,12.9-1.3,16.1-6.5c3.7-6.1,3.3-14.2,1.3-20.8c-1.8-6-5.6-11.8-11.6-14.3 c-6.1-2.5-13.6-1.1-17.6,4.4c-1,1.3,0.4,2.8,1.7,3c5.4,0.5,10.4-3,12.7-7.8c2.2-4.8,1.4-10.4-2-14.5c-7.7-9.8-21.8-7.4-29.5,1 c-0.9,1-0.7,3,0.9,3.3c8.1,1.4,13.2-8.7,9.8-15.4c-4.4-8.9-16.7-9.4-25.1-7.6c-5.9,1.3-11.4,3.9-16.1,7.6c-1.8,1.4,0.5,4.3,2.4,3.1 c1.5-0.9,2.1-2.8,1.2-4.3c-0.1-0.3-0.3-0.5-0.5-0.7c-1.5-1.6-3.6-2.4-5.8-2.2c-4.7,0.5-9,3-12.7,5.8c-4,3-7.5,6.6-11.4,9.7 c-4.4,3.6-9.4,5.7-14.6,8c-23.3,10.2-47.5,18.3-72.3,24.2c-6.8,1.7-13.8,3-20.5,4.8c-6.2,1.7-12.3,4.2-17.2,8.5 c-0.8,0.8-0.8,2,0,2.8c2.6,2.2,6.1,1.1,7.9-1.7c2-3.2,0.4-6.7-1.8-9.3c-2.3-2.6-4.9-4.8-7.2-7.4c-2.5-2.8-4.9-5.7-7.1-8.7 c-9.4-12.5-17.6-25.9-25.6-39.3c-9.1-15-18.6-29.8-29.6-43.5c-5.5-7-11.4-13.8-17.7-20.1c-5.8-5.8-12.3-11.3-19.8-14.9 c-14.2-6.9-33.7-6.1-43.3,8C322.6,263.9,326,265.8,327.4,263.8L327.4,263.8z"/> +<path class="st0" d="M25,948.3h66H172c15.3,0,29.6-6.6,37.1-20.6c4.4-8.2,4.6-16.8,4.6-25.8V695.7c0-8.9,0-17.3-5.7-25 c-7.6-10.3-21-12.3-32.5-15.7l-46.2-13.4c-15.3-4.4-29.9-9.4-42.6-19.5c-19.5-15.5-32.1-38.1-35.1-62.8c-0.9-8.2-0.6-16.5-0.6-24.8 V399.9c0-7.7-0.2-15.4,0.2-23.1c1.2-23.8,8.1-47,20.1-67.7c23.7-40.7,66.2-68.7,113.1-73.9c47.3-5.2,94.2,12.1,126.8,46.7 c27.3,29.1,40.6,67,40.6,106.6v128.2c0,7.8-0.7,16.1,0.7,23.8c3.6,19,20,31.5,38.7,32.8c14.2,0.9,28.4,1.8,42.6,2.7 c14.2,0.9,29.8,0,41.3,9.8c20,17,15.6,49.9-8.3,61c-7.6,3.5-16.1,3.5-24.2,4.1l-37.3,2.7l-98.9,7l-34.7,2.5 c-9,0.6-17.7,1.1-25.2,7.2c-8.8,7.2-10.6,17.2-10.6,27.8v208c0,5.5-0.5,11.2,0.3,16.6c1.5,10.2,8.2,18.8,17.7,22.8 c4.8,2,9.7,2.2,14.8,2.2h86.3c12.1,0,23.8,0.2,35.2-4.9c19.9-9.1,32.8-28.9,32.9-50.8c0.1-11.7,0-23.4,0-35.1V529.4 c0-9.9-4.8-19.2-15.6-20.7c-6.4-0.9-12.2-1.6-15.3-8.2c-2.4-5.4-1.8-11.7,1.8-16.4c2.9-3.7,7.2-5.2,11.7-5.3c10.3-0.1,20.6,0,30.9,0 H968c2.5,0,2.5-3.9,0-3.9H435.2c-9.8,0-19.7-0.2-29.6,0c-14.8,0.3-24,17.1-15.5,29.7c2.7,3.9,6.7,6.6,11.4,7.6 c2.2,0.5,4.6,0.2,6.8,0.7c3.6,0.8,6.7,3.1,8.7,6.2c1.6,2.8,2.4,6,2.2,9.2c0.1,15.5,0,30.9,0,46.4V852c0,13.3,0.1,26.6,0,39.9 c0,20.7-12.3,39.5-31.3,47.8c-11.4,4.9-23.4,4.2-35.5,4.2h-84.9c-15.2,0-27.4-10.1-27.5-26c0-6.7,0-13.4,0-20.1V716.4 c0-7.3,0-14.7,0-22c0-14.2,9.7-25.4,24.1-26.6c22.1-1.8,44.3-3.2,66.4-4.7l96.3-6.8l25.7-1.8c15.4-1.1,29.2-9.1,35.8-23.4 c9.2-19.9,0.4-44.4-19.6-53.6c-11.5-5.3-26.1-4.4-38.4-5.2c-13.3-0.9-26.7-1.6-40-2.6c-19.5-1.3-34.6-17.6-34.5-37.1 c-0.1-24.9,0-49.9,0-74.8c0-24.1,0.1-48.2,0-72.4c0.2-85.3-68.7-154.6-154-154.8c-78.3-0.2-144.4,58.3-153.7,136.1 c-1.2,9.7-1.1,19.4-1.1,29.2v90.2c0,20.9-0.1,41.8,0,62.7c0.3,44.2,30,81.9,72.1,94.2c24.1,7,48.5,13.4,72.5,21 c10.7,3.3,18,13.1,18.2,24.2c0,1.6,0,3.3,0,4.9v205.8c0,7.3,0.5,14.8-1.9,21.8c-4.1,11.7-14,20.5-26.1,23.1 c-3.6,0.6-7.2,0.8-10.8,0.7H87.2H25C22.5,944.4,22.5,948.3,25,948.3L25,948.3z"/> +<path d="M322.6,263.8c-1.7,1.8-1.6,4.7,0.3,6.4c0,0,0,0,0,0c2.1,2,5.3,1.6,7.8,0.9c2.6-0.7,5.1-2.2,6.1-4.8c0.9-2.5,0.6-5.3-0.8-7.6 c-1.5-2.3-3.5-4.4-5.7-6.1c-2.1-1.8-4.3-3.5-6.6-5c-4.5-3.1-9.2-5.8-14.2-8.2c-10-4.7-20.4-8.4-31-11.3c-5.3-1.5-10.7-2.9-16.1-4.3 c-6.3-1.7-12.6-3.1-19.1-4c-5.5-0.7-12.5-0.9-16.2,4.1c-2.7,3.7-2.6,9.9,1.8,12.2c2.2,1.2,4.2-2.2,2-3.4c-2.5-1.3-2-5-0.4-6.9 c2.7-3.2,8.1-2.7,11.8-2.3c10.2,1.2,20.3,4.3,30.2,7c17.9,4.9,36,10.7,51.1,21.9c2.1,1.5,4.1,3.2,6,4.9c1.7,1.5,3.5,3.4,3.7,5.8 c0.2,2.5-1.7,3.7-3.9,4.3c-1,0.3-2.4,0.7-3.5,0.3c-0.3-0.1-1-0.7-0.6-1C327.2,264.8,324.4,262,322.6,263.8L322.6,263.8z"/> +<path d="M362.8,406.5c-3.8-3.5-6.9-7.8-9-12.6c-1.4-3.5-1.5-8.5,2-11c4-3,9.5,0.1,12,3.5c1.6,2.2,2.9,4.7,3.8,7.3 c1.2,3,2.3,6.1,3.4,9.1l14,37.5l3.5,9.3c1.2,2.9,2.1,5.8,3,8.8c0.8,2.6,0.9,5.4,0.3,8.1c-0.8,2.5-2.4,4.7-4.5,6.3 c-9.9,7.9-23.6,11-36,9.6c-1.1,0-2,0.9-2,2c0,1.1,0.9,1.9,2,2c12.7,1.4,25.5-1.7,36.1-8.8c2.6-1.6,4.9-3.8,6.6-6.3 c1.6-2.8,2.3-6,2-9.1c-0.5-6.7-3.6-13.1-5.9-19.4l-15.5-41.5c-2.2-6-4-12.7-8.1-17.7c-3.5-4.3-9.6-7.2-15-5c-5.7,2.3-7.8,9.5-6,15.1 c1.9,5.9,6,11.4,10.5,15.6C361.9,411.1,364.7,408.3,362.8,406.5L362.8,406.5z"/> +<path d="M355.9,482.5c-2.3-0.4-3.9-1.5-4-3.9c0-2.1,0.9-4.2,2.5-5.6c2.5-2.3,6-3.3,9.3-4c3.9-0.8,7.8-1.4,11.7-1.7 c16.1-1.3,32.2,0.9,47.8,5c15.9,4.2,31.2,10.1,46.3,16.4c15.5,6.5,30.9,13.1,46.2,20c26.3,11.9,53.7,24.3,74.3,45.1 c10,10.1,17.6,22,24.3,34.6c7.2,13.9,13.5,28.2,18.9,42.9c11.5,30.7,20.9,62.1,30.4,93.4c5,16.3,10.1,32.4,15.6,48.5 c4.2,12.3,8.6,26,3.6,38.7c-3.6,9.2-11.3,16.4-19.5,21.4c-8.8,5.4-18.7,8.8-29,10.1c-4.8,0.6-9.7,0.6-14.5-0.1 c-5.9-0.9-11.3-3.6-15.6-7.7c-5.1-4.8-9-10.7-13-16.4c-4.5-6.4-8.9-13-13.1-19.7c-16.8-26.7-30.8-55-43.5-83.8 c-6.3-14.4-12.3-29-18.2-43.6c-5.8-14.5-12.4-30.1-10.8-46c0.1-0.9,0.2-1.9,0.4-2.8c0.3-1,0.5-1.9,0.6-3l-0.7-0.3 c-0.6-0.5-0.9-0.4-0.9,0.2c-0.2,0.3-0.4,0.7-0.5,1c-0.4,0.7-1,1.2-1.6,1.7c-3,2.1-7.4,1.4-10.8,1c-2.5-0.3-2.5,3.7,0,3.9 c3.9,0.4,7.9,0.7,11.5-0.9c2.7-1.2,5.8-4.4,5.6-7.6c-0.1-1.8-1.6-3.1-3.4-3c-0.8,0.1-1.6,0.4-2.2,1c-1.1,1.3-1.2,3.5-1.5,5.2 c-0.4,2.2-0.6,4.3-0.7,6.5c-0.5,16.8,6.7,32.9,12.8,48.2c12.5,31.1,25.8,62,41.6,91.6c7.9,14.8,16.4,29.2,25.7,43.2 c4.4,6.6,8.9,13.1,13.8,19.4c4.3,5.5,9.3,10.6,15.8,13.4c9.8,4.2,21.7,3.3,31.7,0.9c10.9-2.5,21.1-7.4,29.8-14.5 c7.9-6.5,14.3-15.5,15.8-25.8c1.1-7.3-0.1-14.8-2-21.9c-2.3-8.6-5.5-17-8.3-25.4c-11.2-34-20.8-68.6-32.3-102.5 c-5.3-15.6-10.8-31-17.3-46.2c-6.4-14.8-13.5-29.7-22.8-42.9c-8.3-11.6-18.4-21.7-29.9-30c-12.3-9-25.9-16-39.7-22.6 c-16.3-7.9-32.8-15.1-49.4-22.2c-16-6.8-32-13.7-48.5-19.1c-16.4-5.4-33.4-9.3-50.6-9.8c-8.4-0.2-16.7,0.5-24.9,2.1 c-5.9,1.2-12.6,3.8-14.5,10.2c-1.6,5.1,1,10.1,6.4,11C357.3,486.8,358.4,483,355.9,482.5L355.9,482.5z"/> +<path d="M612.1,845c9.9,16.5,19.9,33,30,49.4c2.5,4.1,5.1,8.3,7.7,12.4c2.1,3.4,4.1,7.2,7,10.1c3,3,7.2,4.4,11.4,3.8 c4.1-0.4,8.2-1.2,12.2-2.4c16.1-4.6,30.4-13.8,43.9-23.6c3.3-2.4,6.6-4.9,9.9-7.4c3.2-2.5,6.5-4.9,9-8.2c4.4-5.6,6.4-14.7,0.5-20 c-2.9-2.6-6.7-3.5-10.5-3.9c-4.6-0.6-9.2-1-13.8-1.4l-7.1-0.7c-2-0.3-4-0.3-6.1,0c-3.4,0.7-5.9,3.9-4.8,7.4c0.4,1.5,1.6,2.6,3.1,2.9 c1.3,0.2,3-0.9,2.4-2.4c-5.7-15.2-12.6-29.9-20.8-44c-1.3-2.2-4.7-0.2-3.4,2c8,13.7,14.8,28.1,20.4,43l2.4-2.4 c-0.3-0.1-0.3,0-0.4-0.3c-0.1-0.4-0.1-0.7,0-1.1c0.4-1.1,1.9-1.3,2.9-1.4c2,0,3.9,0.1,5.9,0.4l6.6,0.7c4.1,0.4,8.3,0.7,12.4,1.3 c3.1,0.4,6.5,1.1,8.6,3.6c1.9,2.2,2.1,5.4,1.4,8.2c-0.9,3.3-2.8,6.3-5.4,8.5c-2.7,2.4-5.6,4.7-8.5,6.8c-3,2.3-6.1,4.5-9.1,6.8 c-12.2,8.8-25.1,17-39.6,21.3c-3.6,1.1-7.3,1.9-11.1,2.4c-1.9,0.3-3.8,0.3-5.7-0.1c-1.6-0.5-3-1.4-4.1-2.6c-2.6-2.6-4.3-6.1-6.3-9.2 c-2.3-3.7-4.6-7.4-6.9-11.1c-9.2-14.9-18.3-29.9-27.3-44.9c-1.1-1.9-2.3-3.8-3.4-5.7C614.3,840.9,610.8,842.9,612.1,845L612.1,845z"/> +<path d="M492,624.2c-22.8-0.5-45.8-1.1-68.1-6c-22.1-4.9-44.7-14.5-58.8-32.9c-7.7-10-12.6-22.4-12.7-35.1c0-5.1,0.5-11,3.9-15.1 c1.5-1.9,3.8-3,6.3-3c2.9,0.1,5.5,1.8,7.6,3.7c4.9,4.3,8.7,10,12.3,15.3c3.6,5.4,6.9,11,9.9,16.7c5.9,11.7,10.4,24.1,13.2,36.9 c5.7,26.4,3.8,53.4-3.2,79.3c-7,25.9-18.4,50.3-29.6,74.6c-2.9,6.2-5.7,12.4-8.5,18.7c-2.6,5.9-4.7,11.9-6.8,18 c-4,11.9-8.3,24.7-17.6,33.6c-16.4,15.6-43.1,10.9-59.5-2.2c-8-6.2-13.5-15.1-15.5-25.1c-1-5.3-1-10.7-0.3-16c1-7.5,2.3-15,3.6-22.5 c2.7-15.2,5.7-30.4,9.1-45.5c2.9-13,5.9-25.8,11.5-37.9c2.4-5.4,5.4-10.5,8.9-15.2c1.5-2-1.9-4-3.4-2c-7.9,10.3-12.7,23-16.3,35.3 c-2,6.9-3.4,14-5,21.1c-1.7,7.8-3.4,15.7-4.9,23.6c-1.6,7.9-3,15.8-4.4,23.7c-1.3,7.4-2.8,15-3.4,22.5c-0.9,10.8,1.6,21.8,7.8,30.7 c6.4,9.2,16,15.7,26.4,19.6c10.6,4,22.7,5.1,33.5,1.5c26.6-9,30.3-40.3,40.2-62.6c11.3-25.5,23.9-50.4,32.9-76.8 c9.1-26.6,14-54.6,10.6-82.7c-3.3-27.1-13.6-53-29.9-74.9c-3.8-5.1-7.8-10.9-13.6-13.8c-5.4-2.7-11.5-1.6-15.3,3.2 c-3.6,4.6-4.5,10.6-4.6,16.3c-0.1,6.6,1,13.1,3.2,19.4c8.3,24.1,29.6,40,52.9,48.3c22.6,8.1,47,10.1,70.7,10.9 c5.6,0.2,11.1,0.3,16.7,0.4C494.5,628.2,494.5,624.2,492,624.2L492,624.2z"/> +<path d="M261.4,806c-3.4,10.1-6.9,20.2-10.5,30.2c-1.8,5-3.7,9.9-5.6,14.8c-1.9,4.9-4,9.8-5.8,14.8c-1.6,4.4-2.4,9-1.2,13.7 c1.2,4.2,3.5,8,6.6,11c3.2,3.2,7.2,5.6,11.1,7.9c4.1,2.4,8.4,4.5,12.8,6.3c13.3,5.6,27.5,8.5,41.8,10.2c5,0.7,10,0.9,15,0.8 c4.4-0.2,9.4-0.7,13-3.3c3.9-2.8,4.6-7.3,3.1-11.6c-1.6-4.3-3.9-8.3-6.8-11.9c-1.4-1.8-3-3.6-4.6-5.3c-1.4-1.6-2.9-3.8-5.1-4.3 c-1.8-0.4-3.5,0.8-3.9,2.6c-0.1,0.5-0.1,1,0,1.4c0.5,2,3.1,1.7,3.8,0c7.3-18.6,14.6-37.3,22-55.9c0.3-1-0.3-2.1-1.4-2.4 c-1-0.3-2.1,0.3-2.4,1.4l-22,55.9h3.8l0-0.1l-0.3,1c0.2-0.3-0.4,0.1-0.4,0c0,0,0.4,0.3,0.4,0.3c0.3,0.4,0.6,0.7,1,1 c0.7,0.8,1.4,1.6,2.2,2.4c1.4,1.6,2.8,3.2,4.1,4.9c2.6,3.1,4.6,6.7,6,10.6c0.3,0.9,0.5,1.8,0.5,2.7c0,0.2,0,0.4,0,0.6 c0,0.1,0,0.2,0,0.3c0-0.1,0.1-0.4,0,0c-0.1,0.4-0.2,0.8-0.3,1.2c0,0,0.1-0.2-0.1,0.2c-0.1,0.2-0.2,0.4-0.3,0.5 c0,0.1-0.3,0.5-0.2,0.3c0.1-0.2-0.2,0.2-0.3,0.3c-0.1,0.1-0.3,0.3-0.4,0.4c-0.1,0.1-0.6,0.5-0.2,0.2c-0.3,0.2-0.7,0.5-1,0.7 c-0.7,0.4-1.5,0.8-2.2,1c-1,0.3-1.9,0.5-2.9,0.7c-2.1,0.4-4.2,0.6-6.4,0.6c-4.6,0-9.2-0.2-13.8-0.8c-1.1-0.1-1.8-0.2-2.7-0.4 c-0.8-0.1-1.7-0.2-2.5-0.4c-2.3-0.4-4.7-0.7-7-1.1c-8.7-1.5-17.2-3.9-25.5-7.1c-4.1-1.6-8-3.4-11.8-5.5c-3.8-2-7.4-4.2-10.9-6.8 c-3.1-2.3-5.5-5.4-7.1-8.9c-1.5-3.8-1.6-8.1-0.3-12c1.4-4.5,3.4-8.9,5.1-13.3c1.8-4.6,3.6-9.3,5.3-13.9c3.4-9.1,6.7-18.3,9.8-27.6 c0.8-2.3,1.6-4.6,2.3-6.9C266,804.6,262.2,803.6,261.4,806L261.4,806z"/> +<path class="st0" d="M867.6,298.2H542.8c-4.3,0-8.8-0.4-13.1,0c-0.2,0-0.4,0-0.6,0l1.9,2.5l4.1-22.8l9.9-54.4l12-65.8l10.4-56.9 l3.4-18.8c0.5-2.9,1.5-5.9,1.6-8.9c0-0.1,0-0.3,0.1-0.4l-1.9,1.4h324.8c4.3,0,8.8,0.4,13.1,0c0.2,0,0.4,0,0.6,0l-1.9-2.5L903,94.4 l-9.9,54.4l-12,65.8l-10.4,56.9c-1.7,9.2-3.5,18.4-5,27.6c0,0.1,0,0.3-0.1,0.4c-0.5,2.5,3.3,3.5,3.8,1l4.1-22.8l9.9-54.4l12-65.8 l10.4-56.9c1.7-9.2,3.5-18.4,5-27.7c0-0.1,0-0.3,0.1-0.4c0.3-1.1-0.4-2.2-1.4-2.4c-0.1,0-0.3-0.1-0.5-0.1H584.3 c-4.4,0-8.8-0.1-13.1,0c-0.2,0-0.4,0-0.6,0c-0.9,0-1.7,0.6-1.9,1.4l-4.1,22.8l-9.9,54.4l-12,65.8l-10.4,56.9 c-1.7,9.2-3.5,18.4-5,27.6c0,0.1,0,0.3-0.1,0.4c-0.3,1.1,0.4,2.2,1.4,2.4c0.1,0,0.3,0.1,0.5,0.1h324.8c4.4,0,8.8,0.1,13.1,0 c0.2,0,0.4,0,0.6,0C870.1,302.1,870.1,298.2,867.6,298.2z"/> +<path class="st0" d="M892.8,71.7l-1.1,6.1l-3,16.6l-4.5,24.5l-5.4,29.9l-6,32.8l-6,33.1l-5.6,30.8l-4.8,26.1l-3.4,18.7 c-0.5,3-1.1,5.9-1.6,8.9c0,0.1,0,0.3-0.1,0.4c-0.3,1.1,0.4,2.2,1.4,2.4c0.1,0,0.3,0.1,0.5,0.1h14.3c0.9,0,1.7-0.6,1.9-1.4l1.1-6.1 l3-16.6l4.5-24.5l5.4-29.9l6-32.8l6-33.1l5.6-30.8l4.8-26.1l3.4-18.7c0.5-3,1.1-5.9,1.6-8.9c0-0.1,0-0.3,0.1-0.4 c0.3-1.1-0.4-2.2-1.4-2.4c-0.1,0-0.3-0.1-0.5-0.1h-14.3c-2.5,0-2.5,3.9,0,3.9h14.3l-1.9-2.5l-1.1,6.1l-3,16.6l-4.5,24.5l-5.4,29.9 l-6,32.8l-6,33.1l-5.6,30.8l-4.8,26.1l-3.4,18.7c-0.5,2.9-1.3,5.9-1.6,8.9c0,0.1,0,0.3-0.1,0.4l1.9-1.4h-14.3l1.9,2.5l1.1-6.1 l3-16.6l4.5-24.5l5.4-29.9l6-32.8l6-33.1l5.6-30.8l4.8-26.1l3.4-18.7c0.5-3,1.1-5.9,1.6-8.9c0-0.1,0-0.3,0.1-0.4 C897.1,70.2,893.3,69.2,892.8,71.7z"/> +<path class="st0" d="M769.1,474.8c-7.1,0.1-14.1-1.3-20.6-4.2c-6.5-3-11.8-8-15.2-14.3c-3.3-6.2-4.6-13.3-4.9-20.2 c-0.3-7.6,0.4-15.3,1.9-22.8c0.9-4.3,1.9-8.5,2.7-12.7c0.8-4.1,1.8-8.6,1.2-12.7c-0.4-3.6-2.3-6.9-5.2-9c-2.3-1.7-5.1-2.2-7.9-2.6 c-2.7-0.4-5.5-0.6-7.9-2c-2.2-1.2-4-3-5.2-5.3c-2.4-4.5-2.5-9.9-0.3-14.5c2.9-5.9,9.3-8.4,15.6-8.7c6.7-0.1,13.3,1.7,19.1,5.2 c12.5,7.8,19.8,21.7,19.2,36.4c-0.3,6.6-3.7,12-6.7,17.7c-2.8,5.3-5.7,11.4-4.2,17.5c0.9,3.5,3.7,6.2,4.5,9.7 c0.2,0.8,0.2,1.6-0.1,2.4c-0.1,0.3-0.5,1.2-0.9,1.2c-1-0.2,0.5-1.8,0.7-2.1c3.8-5.8,8.8-10.8,14.5-14.7c12.7-8.7,28-12.1,43.3-11.7 c8.7,0.3,17.4,1.5,25.8,3.6c4.2,1,8.4,2.1,12.5,3.4c3.4,1,6.8,2.3,10,3.9c3.1,1.6,5.8,3.8,7.9,6.4c1,1.3,3.5,0.2,3.4-1.4 c-0.3-3.8-3-6.2-6.2-7.9c-2.8-1.5-6.6-2.8-8.2-5.8c-1.3-2.4-0.9-5.7-0.8-8.3c0.1-3.2,0.3-6.4,0.6-9.6c0.3-3,0.8-6,1.4-9 c0.3-1.7,0.8-3.6,2.6-4.1c2.8-0.8,4.1,1.7,5.5,3.6c1.4,1.9,3.6,3.9,3.6,6.5c0,1.1,0.9,2,2,2c0.3,0,0.6-0.1,0.9-0.3 c6.1-3.1,12.4-5.9,19.1-7c6.7-1.1,13.5-0.4,20.1,0.8c1.1,0.3,2.2-0.4,2.4-1.5c0.1-0.3,0.1-0.7,0-1c-0.2-1.5,0-3,0.7-4.4 c0.6-1.7,1.6-3.2,2.8-4.5c1-1,1.9,0.7,2.5,1.5c1.1,1.4,2.1,2.9,3,4.5c3.5,6,5.7,12.7,6.3,19.6c1.1,13.3-3.8,26.3-13.3,35.7 c-5.1,5.1-11.4,8.9-18.4,11.1l2.2,2.9c0.8-1.2,1.7-2.3,2.8-3.2c0.1-0.1,0.8-0.5,0.7-0.4c0.1,0,0.6-0.2,0.4-0.1c-0.2,0,0.3,0,0.5,0 c0.3,0-0.1,0,0.3,0c0.2,0.1,0.5,0.1,0.7,0.2c0.5,0.2,0.9,0.3,1.4,0.5c1.4,0.6,2.8,1.3,4.1,1.9c6,2.7,12.4,4.5,18.7,6.2 c3.3,0.9,6.7,1.8,10,2.8c3,0.9,6,1.9,9.1,2.7c7.1,1.8,16.7,2.1,21.2,8.8c1,1.4,1.6,3,1.6,4.7c-0.1,0.8-0.4,1.1-1.2,1.5 c-1.4,0.7-3.8,0.6-5.5,1.4c-2.3,1.1-0.3,4.5,2,3.4c-0.5,0.3,0.3,0,0.5-0.1c0.5-0.1,1-0.2,1.5-0.2c1.1-0.2,2.3-0.5,3.3-1 c2.8-1.3,3.7-4,3.2-6.9c-1.7-8.8-11.1-12.3-18.8-13.9c-4.1-0.9-8.2-1.9-12.2-3.1c-3.4-1-6.9-2-10.4-2.9c-6.9-1.9-14.1-3.6-20.6-6.6 c-2.8-1.2-6-3.5-9.1-3.4c-3.5,0-5.7,3.2-7.6,5.8c-1,1.4,0.7,3.4,2.2,2.9c14.5-4.6,26.1-15.5,31.6-29.6c5.7-14.8,3.3-32.3-5.7-45.3 c-1.2-1.7-2.5-4-4.4-5c-2-1-4.4-0.5-5.8,1.3c-2.8,3.4-5.2,7.9-4.2,12.4l2.4-2.4c-7.2-1.3-14.5-2-21.8-0.9c-7.2,1.2-14,4.1-20.4,7.5 l3,1.7c0-2.9-1.5-5.1-3.2-7.3c-1.6-2.1-2.8-4.6-5.2-5.9c-2.5-1.4-5.6-1.2-8,0.5c-1.9,1.5-2.7,3.7-3.1,6c-1.3,7-2.1,14.1-2.1,21.2 c0,3,0,6.3,1.7,9c1.2,1.7,2.8,3.1,4.6,4.2c1.9,1.2,4,2.1,5.9,3.3c1.4,0.9,2.8,2,3,3.8l3.4-1.4c-4.9-6.2-12.5-9.2-19.8-11.4 c-8.9-2.8-18.1-4.9-27.4-6.2c-17-2.2-34.5-0.7-50,7.1c-6.8,3.4-12.9,8-17.9,13.6c-1.3,1.4-2.5,2.9-3.6,4.5c-1,1.4-2.4,3.1-2.9,4.8 c-0.6,2.3,0.9,4.7,3.2,5.3c1,0.2,2.1,0.1,3-0.4c4-2.3,3.1-7.9,1.4-11.3c-0.8-1.7-2.1-3.2-2.8-5c-0.5-1.4-0.8-2.9-0.7-4.3 c0.2-3.1,1.1-6.1,2.6-8.8c1.5-3.1,3.3-6,4.8-9.1c1.6-2.9,2.8-6.1,3.5-9.4c0.7-4.4,0.6-8.8-0.2-13.2c-1.3-7.9-4.8-15.2-9.9-21.3 c-9-10.9-24.8-18.1-38.9-13.9c-6.6,1.8-11.8,7.1-13.4,13.7c-1.4,5.8-0.1,12,3.5,16.8c1.9,2.4,4.4,4.2,7.3,5.2c2.8,1,5.9,1.1,8.9,1.7 c3,0.6,5.4,1.9,6.9,4.7c2,4.1,0.9,8.9,0.1,13.2c-1.6,8.9-4.1,17.6-4.8,26.7c-0.8,7.7-0.3,15.4,1.3,23c1.7,7.6,5.6,14.5,11.2,19.8 c5.8,5.1,12.8,8.5,20.4,9.9c3.8,0.7,7.6,1.1,11.4,1.1C771.6,478.7,771.6,474.8,769.1,474.8L769.1,474.8z"/> +<path class="st0" d="M780.6,428.6c10.2-6.7,22.9-8.7,34.7-5.5c5.7,1.6,11.5,4.4,14.5,9.6c2.7,4.8,3,10.6,0.9,15.7 c-2.5,5.7-7.5,9.9-13.5,11.5l2.2,0.9l-0.1-0.2l0.3,1l0-0.2l-0.3,1c-0.2,0.1-0.2,0.2-0.1,0.1c0.3,0,0.5,0,0.8-0.1 c0.6,0,1.2-0.1,1.7-0.1l3.7-0.3l6.9-0.6c2.3-0.2,4.6-0.4,6.9-0.6c2-0.3,4.1-0.2,6.2,0.2c3.1,1,5.7,3.2,7.1,6.2 c1.1,2.1,2.2,6.2-0.5,7.7c-2.2,1.2-0.2,4.6,2,3.4c3.2-1.7,3.9-5.6,3.3-8.9c-0.7-3.5-2.5-6.7-5.2-9c-2.9-2.5-6.7-3.8-10.6-3.6 c-4.7,0.2-9.5,0.8-14.2,1.1c-2.6,0.2-5.3,0.4-7.9,0.7c-2.2,0.2-4.8,1.7-3.5,4.3c0.5,0.8,1.4,1.1,2.2,0.9c6.2-1.7,11.5-5.6,14.8-11.1 c3.3-5.6,3.9-12.3,1.7-18.4c-4.9-13.3-21.8-17.4-34.5-16.3c-7.7,0.6-15.2,3.2-21.7,7.4C776.5,426.6,778.5,430,780.6,428.6 L780.6,428.6z"/> +<path class="st0" d="M872.5,404.4c4.2-0.3,8.5,0.6,12.2,2.6c0.9,0.5,2.1,0.2,2.7-0.8c0.5-0.9,0.2-2.1-0.7-2.6 c-4.3-2.4-9.3-3.5-14.2-3.2c-1.1,0-1.9,0.9-2,2C870.5,403.5,871.4,404.4,872.5,404.4z"/> +<path class="st0" d="M919.4,395.3c-3-0.4-6.1,0.3-8.7,1.9c-1.2,0.7-2.4,1.5-3.5,2.3c-1.3,0.8-2.4,1.9-3.1,3.2 c-0.5,0.9-0.2,2.1,0.7,2.7c0.9,0.5,2.1,0.2,2.7-0.7c0.4-0.8,1.1-1.5,1.8-1.9c1-0.7,2-1.4,3.1-2c0.5-0.3,1-0.6,1.6-0.8 c0.4-0.2,0.9-0.4,1.4-0.5c0.3-0.1,0.6-0.2,0.8-0.2c0.1,0,0.3-0.1,0.4-0.1c0.3-0.1-0.2,0,0.1,0c0.3,0,0.6,0,0.9,0c0.1,0,0.3,0,0.4,0 c0,0,0.5,0.1,0.2,0c1,0.3,2.1-0.3,2.4-1.4C921,396.7,920.4,395.6,919.4,395.3L919.4,395.3z"/> +<path class="st0" d="M899.5,411.6c-0.3,0.7-0.9,1.1-1.6,1.3c-1.1,0.2-2.2,0.2-3.3,0c-1-0.3-2.1,0.3-2.4,1.4c-0.3,1,0.3,2.1,1.4,2.4 l1.9,0.6c0.1,0,0.9,0.2,0.9,0.3l-0.3-0.2c-0.2-0.7-0.3-0.8-0.3-0.4c0,0.7,0.2,1.3,0.4,1.9c0.1,0.4,0.2,0.8,0.1,1.2 c-0.2,1.2-1.3,2-2.5,1.8c-0.2,0-0.4-0.1-0.5-0.2l-1.5,3.6c1.7,0.6,3.5,0.8,5.3,0.6c0.9-0.2,1.8-0.6,2.5-1.2c0.1,0,0.6-0.5,0.4-0.4 c0.3,0,0.7,0.1,1,0.1c3.6,0.4,7.7-0.7,9.3-4.1c0.5-0.9,0.2-2.1-0.7-2.7c-0.9-0.5-2.1-0.2-2.7,0.7c-0.5,1-1.3,1.7-2.3,2 c-1.4,0.3-2.9,0.3-4.4,0.1c-0.8-0.1-1.7,0.2-2.4,0.7c-0.4,0.4-0.9,0.7-1.3,1c-1.1,0.4-2.5-0.1-3.7-0.5c-2.1-0.6-3.6,2.6-1.5,3.6 c3,1.6,6.7,0.4,8.3-2.6c0.2-0.4,0.4-0.9,0.5-1.3c0.2-0.9,0.2-1.9,0-2.8c-0.1-0.4-0.2-0.8-0.4-1.3c-0.1-0.4-0.1-0.8-0.2-1.2 c-0.6-2.1-3.2-2.5-5-3l-1,3.8c1.8,0.4,3.7,0.4,5.6,0c1.6-0.4,2.9-1.5,3.7-3c0.5-0.9,0.2-2.1-0.7-2.7 C901.3,410.4,900.1,410.7,899.5,411.6L899.5,411.6z"/> +<path d="M475.8,390.4c-14.9,5.4-30.3,9-46,10.7c-7.6,0.8-15.4,1.6-23.1,1c-6.6-0.5-13.4-2.7-17.6-8.1c-9.8-12.8,7.4-25.5,16.7-31.7 c6.6-4.4,13.5-8.5,20.2-12.8c6.9-4.3,13.8-8.8,20.9-12.9c6.8-4.1,13.9-7.7,21.2-10.8c3.7-1.6,7.4-3,11.2-4.2c1.6-0.5,6.1-2,6.6,0.8 c0,0.3,0,0.6,0,0.9c-0.1,0.1-0.3,0.3-0.1,0.3l1.4,3.4c4-3.6,8.9-6,14.2-7.1c1.8-0.4,4.6-0.9,6,0.6c0.3,0.4,0.5,0.9,0.4,1.5 c0,0.2-0.1,0.5-0.2,0.7c-0.1,0.2-0.1,0.2-0.3,0.2l1.4,3.4c7.6-6.7,18.8-8,28.1-4.7c4.6,1.6,9.8,4.9,12,9.4c2.7,5.6-3.8,9.1-8.4,9.6 c-3.5,0.4-7.1,0-10.6,0.4c-2.8,0.3-5.6,1-8.2,2.1c-5.4,2.2-10.2,6-13.6,10.7c-3.4,4.8-5.5,11.9-0.7,16.6c1.9,1.9,4.7,2.7,7.3,2.1 c2.5-0.5,4.7-1.9,7-2.9c2-0.9,4.4-1.5,6.1,0.1c1.5,1.6,2.1,3.7,1.8,5.9c-0.8,6-8.1,8.3-13.1,9.6l2.4,1.4c-0.3-0.9,0.1-1.9,1-2.3 c2.2-1.2,0.2-4.6-2-3.4c-2.4,1.3-3.5,4.1-2.8,6.7c0.3,1,1.4,1.6,2.4,1.4c5.5-1.5,12.1-3.9,14.8-9.4c2.1-4.3,1.4-10.3-2.6-13.3 c-2.2-1.5-4.9-1.9-7.4-1.1c-2.6,0.7-4.9,2.3-7.5,3.2c-2.4,0.8-4.7,0.1-5.8-2.3c-1.3-2.7-0.3-5.7,1.1-8.2c3.1-5.3,7.9-9.3,13.6-11.4 c2.9-1.1,5.9-1.7,9-1.7c3.7-0.1,7.4,0.1,11-0.7c5.8-1.4,11.3-6.7,9.3-13.1c-1.7-5.4-7.2-9.6-12.2-11.9c-11.3-5.3-25.5-3.8-34.9,4.5 c-0.8,0.8-0.7,2.1,0.1,2.8c0.4,0.3,0.8,0.5,1.3,0.6c3.2-0.1,4.8-3.2,4.3-6.1c-0.5-3.4-3.9-5-7.1-5c-3.6-0.1-7.4,1.2-10.7,2.6 c-3.5,1.5-6.7,3.5-9.5,6.1c-1.2,1.1-0.3,3.6,1.4,3.4c3.2-0.4,4.6-3.3,3.9-6.3c-0.9-3.6-4.7-4.7-8-4.2c-2,0.4-3.9,0.9-5.8,1.7 c-2.1,0.7-4.1,1.5-6.2,2.3c-4,1.6-8,3.4-11.9,5.3c-7.9,3.9-15.4,8.5-22.8,13.1c-7.7,4.8-15.3,9.6-23,14.4 c-7.1,4.4-14.2,8.9-20.2,14.6c-5.4,5.1-10.3,12.1-9.2,19.9c1,6.8,6.5,12.4,12.6,15.1c7.7,3.3,16.4,3,24.6,2.5 c17.3-1,34.4-4.2,50.9-9.6c2.1-0.7,4.2-1.4,6.2-2.1C479.2,393.4,478.1,389.6,475.8,390.4L475.8,390.4z"/> +</svg> \ No newline at end of file diff --git a/mintlify-docs/logo/light.svg b/mintlify-docs/logo/light.svg new file mode 100644 index 0000000000..12c67c1baf --- /dev/null +++ b/mintlify-docs/logo/light.svg @@ -0,0 +1,9 @@ +<svg width="211" height="48" viewBox="0 0 211 48" fill="none" xmlns="http://www.w3.org/2000/svg"> +<path d="M13.5876 32.2135H56.2925L42.7049 45.2039H0V26.6467L13.5876 13.6562V32.2135Z" fill="black"></path> +<path d="M69.8804 0.666016V19.2233L56.2928 32.213V13.6558H13.5879L27.1762 0.666016H69.8804Z" fill="black"></path> +<path d="M95.7499 38.7409L83.4688 7.16113H88.5697L98.5833 34.049L108.691 7.16113H113.698L101.417 38.7409H95.7485H95.7499Z" fill="black"></path> +<path d="M133.678 20.8978C132.765 19.2598 131.475 17.9441 129.805 16.9512C128.136 15.9583 126.137 15.4619 123.807 15.4619C121.477 15.4619 119.429 15.9649 117.665 16.9735C115.901 17.9821 114.525 19.381 113.534 21.1689C112.54 22.9588 112.045 25.0565 112.045 27.4621C112.045 29.8676 112.541 31.8684 113.534 33.6426C114.525 35.4168 115.901 36.8006 117.665 37.7928C119.429 38.7857 121.444 39.2821 123.712 39.2821C125.57 39.2821 127.214 38.9658 128.647 38.3345C130.079 37.7031 131.284 36.8386 132.261 35.7403C133.236 34.642 133.929 33.4022 134.34 32.0184H129.616C129.144 33.101 128.419 33.9589 127.444 34.5903C126.467 35.2216 125.222 35.5379 123.712 35.5379C122.452 35.5379 121.286 35.2439 120.216 34.6577C119.145 34.0716 118.287 33.1999 117.641 32.0413C117.084 31.0432 116.768 29.8467 116.692 28.4543H134.954C134.984 28.0338 135.008 27.65 135.024 27.3042C135.04 26.9584 135.047 26.6349 135.047 26.3343C135.047 24.3492 134.592 22.5384 133.678 20.8971V20.8978ZM116.746 25.2524C116.873 24.1803 117.171 23.2548 117.641 22.4788C118.287 21.4099 119.144 20.5991 120.216 20.0418C121.286 19.4851 122.468 19.2068 123.76 19.2068C125.585 19.2068 127.114 19.7412 128.342 20.8081C129.57 21.8769 130.231 23.357 130.325 25.2524H116.747H116.746Z" fill="black"></path> +<path d="M148.274 39.2821C146.258 39.2821 144.495 38.9665 142.984 38.3345C141.472 37.7031 140.275 36.8164 139.394 35.6729C138.512 34.53 137.977 33.1913 137.788 31.6575H142.606C142.763 32.3792 143.071 33.0414 143.527 33.6426C143.983 34.2445 144.613 34.7252 145.416 35.086C146.219 35.4469 147.171 35.6277 148.274 35.6277C149.313 35.6277 150.171 35.4849 150.848 35.1993C151.525 34.9138 152.021 34.5228 152.336 34.0264C152.651 33.5299 152.809 32.9968 152.809 32.4251C152.809 31.5835 152.596 30.9443 152.171 30.5075C151.746 30.0719 151.115 29.7255 150.282 29.4701C149.447 29.2147 148.447 28.9815 147.282 28.7706C146.179 28.5899 145.117 28.3423 144.094 28.0266C143.071 27.7109 142.157 27.3128 141.355 26.8314C140.552 26.3507 139.914 25.7416 139.441 25.0041C138.969 24.2674 138.733 23.3577 138.733 22.2744C138.733 20.9816 139.094 19.8158 139.819 18.7778C140.543 17.7404 141.574 16.9283 142.913 16.3415C144.251 15.7547 145.833 15.4619 147.66 15.4619C150.305 15.4619 152.431 16.0638 154.036 17.2662C155.642 18.4693 156.587 20.1688 156.871 22.3642H152.289C152.163 21.3418 151.69 20.5526 150.872 19.9959C150.053 19.4399 148.966 19.1616 147.613 19.1616C146.259 19.1616 145.219 19.4177 144.495 19.9285C143.77 20.44 143.408 21.1172 143.408 21.9588C143.408 22.5004 143.613 22.9817 144.022 23.4022C144.431 23.824 145.037 24.177 145.841 24.4625C146.644 24.7487 147.628 25.0114 148.793 25.2517C150.462 25.553 151.958 25.9282 153.28 26.3795C154.603 26.8307 155.657 27.4929 156.445 28.3646C157.232 29.2369 157.626 30.4845 157.626 32.1088C157.657 33.5227 157.288 34.7704 156.516 35.853C155.744 36.9355 154.658 37.7784 153.258 38.3797C151.856 38.9809 150.195 39.2821 148.274 39.2821H148.274Z" fill="black"></path> +<path d="M161.971 48.6662V16.0039H166.222L166.694 19.5228C167.198 18.8312 167.828 18.1769 168.584 17.5606C169.339 16.9443 170.244 16.4407 171.3 16.049C172.354 15.6587 173.591 15.4629 175.007 15.4629C177.274 15.4629 179.266 15.9894 180.983 17.0419C182.699 18.095 184.037 19.5162 184.998 21.3055C185.958 23.0954 186.438 25.1328 186.438 27.4185C186.438 29.7042 185.95 31.7423 184.974 33.5316C183.998 35.3215 182.652 36.7276 180.936 37.7499C179.219 38.7723 177.228 39.2838 174.961 39.2838C173.102 39.2838 171.466 38.9301 170.048 38.2235C168.632 37.5168 167.514 36.5318 166.695 35.2684V48.6675H161.971L161.971 48.6662ZM174.205 35.3575C175.653 35.3575 176.936 35.0268 178.054 34.3653C179.171 33.7038 180.054 32.7712 180.699 31.5681C181.344 30.3657 181.667 28.9668 181.667 27.3727C181.667 25.7786 181.344 24.3882 180.699 23.1995C180.053 22.0115 179.171 21.0789 178.054 20.4023C176.936 19.7258 175.653 19.3872 174.205 19.3872C172.756 19.3872 171.425 19.7258 170.308 20.4023C169.19 21.0789 168.316 22.0115 167.686 23.1995C167.056 24.3882 166.741 25.7786 166.741 27.3727C166.741 28.9668 167.056 30.3657 167.686 31.5681C168.316 32.7712 169.189 33.7038 170.308 34.3653C171.425 35.0274 172.724 35.3575 174.205 35.3575Z" fill="black"></path> +<path d="M198.625 39.2821C196.672 39.2821 195.051 38.9665 193.76 38.3345C192.468 37.7031 191.508 36.8537 190.878 35.7855C190.248 34.718 189.934 33.5679 189.934 32.3341C189.934 30.8303 190.343 29.5447 191.162 28.4766C191.981 27.409 193.146 26.5897 194.657 26.018C196.168 25.4469 197.979 25.1607 200.089 25.1607H206.277C206.277 23.8377 206.071 22.7401 205.663 21.8677C205.253 20.996 204.647 20.3417 203.845 19.9056C203.042 19.47 202.026 19.2513 200.798 19.2513C199.381 19.2513 198.169 19.5748 197.161 20.2212C196.153 20.8683 195.523 21.8225 195.272 23.0859H190.548C190.737 21.4918 191.311 20.1309 192.272 19.0031C193.233 17.8753 194.469 17.0029 195.98 16.3867C197.492 15.7704 199.098 15.4619 200.798 15.4619C203.033 15.4619 204.908 15.8385 206.419 16.5897C207.93 17.3415 209.072 18.4019 209.844 19.77C210.615 21.1388 211.001 22.7702 211.001 24.6649V38.7405H206.891L206.513 34.9059C206.166 35.5078 205.757 36.0789 205.285 36.6205C204.813 37.1621 204.253 37.6285 203.607 38.0188C202.961 38.4098 202.222 38.7182 201.388 38.9435C200.553 39.1688 199.632 39.2821 198.624 39.2821H198.625ZM199.522 35.6277C200.529 35.6277 201.443 35.4325 202.262 35.0409C203.081 34.6505 203.781 34.1161 204.364 33.4396C204.946 32.763 205.387 31.9961 205.686 31.1388C205.985 30.2815 206.15 29.402 206.182 28.4995V28.3639H200.561C199.207 28.3639 198.112 28.5217 197.278 28.8374C196.444 29.1531 195.837 29.5821 195.459 30.123C195.081 30.6647 194.893 31.296 194.893 32.0177C194.893 32.7394 195.074 33.4094 195.436 33.9353C195.798 34.4619 196.325 34.8758 197.019 35.1758C197.711 35.477 198.546 35.627 199.522 35.627L199.522 35.6277Z" fill="black"></path> +</svg> \ No newline at end of file diff --git a/mintlify-docs/scripts/convert_query_tables.py b/mintlify-docs/scripts/convert_query_tables.py new file mode 100644 index 0000000000..c00251c3e1 --- /dev/null +++ b/mintlify-docs/scripts/convert_query_tables.py @@ -0,0 +1,477 @@ +#!/usr/bin/env python3 +"""Convert query API reference HTML tables to Mintlify markdown tables.""" + +from __future__ import annotations + +import html +import re +import subprocess +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from html_to_mdx import Node, TreeBuilder, convert_href, preprocess_body # noqa: E402 + +SOURCE_DIR = Path(__file__).resolve().parent.parent / "en" / "reference" / "api" +ROOT = SOURCE_DIR.parent.parent + +H2_RE = re.compile(r'<h2\s+id="([^"]*)"[^>]*>([^<]*)</h2>', re.I) +TABLE_CLASS_RE = re.compile(r"<table\s+class=\"table\"", re.I) +P_RE = re.compile(r"<p(?:\s[^>]*)?>(.*?)</p>", re.I | re.DOTALL) +HR_RE = re.compile(r"<hr\s*/?>", re.I) + + +def load_html_section() -> str: + raw = subprocess.check_output( + ["git", "show", "HEAD:en/reference/api/query.html"], + cwd=ROOT, + text=True, + ) + idx = raw.find('<h2 id="query">') + if idx == -1: + raise SystemExit("query section not found in HTML") + return raw[idx:] + + +def fix_short_td_rows(html: str) -> str: + """Fix rows like <tr><td>a<td>b</tr> missing </td> closers.""" + + def repl(m: re.Match) -> str: + row = m.group(0) + if "</td>" in row or "</th>" in row: + return row + pieces = re.split(r"(?=<t[dh]\b)", row) + out = [pieces[0]] + for piece in pieces[1:]: + tag_m = re.match(r"(<t[dh][^>]*>)([\s\S]*)", piece, re.I) + if not tag_m: + continue + tag, rest = tag_m.groups() + content = re.split(r"(?=</tr>)", rest, maxsplit=1)[0].strip() + close = "</th>" if tag.lower().startswith("<th") else "</td>" + out.append(f"{tag}{content}{close}") + if "</tr>" in row and not out[-1].endswith("</tr>"): + out.append("</tr>") + return "".join(out) + + return re.sub(r"<tr[\s\S]*?</tr>", repl, html, flags=re.IGNORECASE) + + +def fix_html_structure(body: str) -> str: + body = re.sub( + r"(<p(?:\s[^>]*)?>(?:(?!</p>)[\s\S])*?)(\s*</td>)", + r"\1</p>\2", + body, + flags=re.IGNORECASE, + ) + body = re.sub( + r"(<li>(?:(?!</li>)[\s\S])*?)(\s*<li>)", + lambda m: m.group(1) + "</li>" + m.group(2), + body, + ) + body = fix_short_td_rows(body) + body = re.sub( + r"<tr><th rowspan=\"2\">Value\s*<th colspan=\"3\">Results in</tr>\s*" + r"<tr>\s*(?:<th>)?composite</th><th>tokenization</th><th>syntax</th></tr>", + "<tr><th>Value</th><th>composite</th><th>tokenization</th><th>syntax</th></tr>", + body, + flags=re.IGNORECASE, + ) + # Close table rows that end at </table> without </tr> + body = re.sub( + r"(<tr>(?:(?!</tr>)[\s\S])*?)(</table>)", + lambda m: m.group(1) + "</td></tr>" + m.group(2), + body, + flags=re.IGNORECASE, + ) + return body + + +def escape_cell(text: str) -> str: + text = text.replace("\n", " ").replace("\r", "") + text = re.sub(r"\s+", " ", text).strip() + return text.replace("|", "\\|") + + +def emit_inline(node: Node | str, source_dir: Path) -> str: + if isinstance(node, str): + return html.unescape(node) + tag = node.tag or "" + inner = "".join(emit_inline(c, source_dir) for c in node.children) + if tag == "br": + return "<br />" + if tag == "a": + href = convert_href(node.attrs.get("href", ""), source_dir) + text = inner.strip() or href + return f"[{text}]({href})" + if tag == "code": + return f"`{inner}`" + if tag in ("em", "i"): + return f"*{inner}*" + if tag in ("strong", "b"): + return f"**{inner}**" + if tag == "mdx-warning": + return f"**Important:** {inner.strip()}" + if tag == "mdx-note": + return f"**Note:** {inner.strip()}" + return inner + + +def nested_table_to_md(node: Node, source_dir: Path) -> str: + rows: list[list[str]] = [] + for c in node.children: + if isinstance(c, Node) and c.tag == "tr": + cells = [ + escape_cell(emit_inline(cell, source_dir)) + for cell in c.children + if isinstance(cell, Node) and cell.tag in ("td", "th") + ] + if cells: + rows.append(cells) + if not rows: + return "" + if len(rows[0]) == 2: + return " ".join(f"`{r[0]}`: {r[1]}" for r in rows) + return " ".join(" — ".join(r) for r in rows) + + +def emit_cell(node: Node, source_dir: Path) -> str: # noqa: PLR0912 + parts: list[str] = [] + for c in node.children: + if isinstance(c, Node): + if c.tag == "table": + nested = nested_table_to_md(c, source_dir) + if nested: + parts.append(nested) + elif c.tag in ("ul", "ol"): + for li in c.children: + if isinstance(li, Node) and li.tag == "li": + parts.append(emit_inline(li, source_dir).strip()) + elif c.tag == "p": + text = emit_inline(c, source_dir).strip() + if text: + parts.append(text) + elif c.tag in ("mdx-warning", "mdx-note"): + parts.append(emit_inline(c, source_dir).strip()) + else: + text = emit_inline(c, source_dir).strip() + if text: + parts.append(text) + else: + t = html.unescape(c).strip() + if t: + parts.append(t) + return " ".join(parts) + + +def row_cells(tr: Node) -> list[Node]: + return [c for c in tr.children if isinstance(c, Node) and c.tag in ("th", "td")] + + +def cell_text(cell: Node, source_dir: Path) -> str: + if cell.tag == "th": + return escape_cell(emit_inline(cell, source_dir)) + return escape_cell(emit_cell(cell, source_dir)) + + +NESTED_TABLE_RE = re.compile(r"<table[^>]*>[\s\S]*?</table>", re.IGNORECASE) + + +def table_end(html: str, start: int) -> int: + j = html.find(">", start) + 1 + depth = 1 + while j < len(html) and depth > 0: + next_open = html.find("<table", j) + next_close = html.find("</table>", j) + if next_close == -1: + return len(html) + if next_open != -1 and next_open < next_close: + depth += 1 + j = html.find(">", next_open) + 1 + else: + depth -= 1 + j = next_close + len("</table>") + return j + + +def isolate_nested_tables(table_html: str) -> tuple[str, list[str]]: + """Replace every nested <table> inside the outer table with placeholders.""" + nested: list[str] = [] + first = re.search(r"<table\b", table_html, re.I) + if not first: + return table_html, nested + pos = first.end() + while True: + m = re.search(r"<table\b", table_html[pos:], re.I) + if not m: + break + start = pos + m.start() + end = table_end(table_html, start) + nested.append(table_html[start:end]) + placeholder = f"<!--NESTED{len(nested) - 1}-->" + table_html = table_html[:start] + placeholder + table_html[end:] + pos = start + len(placeholder) + return table_html, nested + + +def restore_nested(fragment: str, nested: list[str]) -> str: + for i, table_html in enumerate(nested): + fragment = fragment.replace(f"<!--NESTED{i}-->", table_html) + return fragment + + +def nested_table_to_md_html(table_html: str, source_dir: Path) -> str: + table_html = fix_short_td_rows(table_html) + rows: list[list[str]] = [] + for tr in re.finditer(r"<tr[^>]*>([\s\S]*?)</tr>", table_html, re.I): + cells = tr_cells_from_inner(tr.group(1), [], source_dir) + if cells: + rows.append(cells) + if not rows: + return "" + if rows and all(len(r) == 2 for r in rows): + return " ".join(f"`{r[0]}`: {r[1]}" for r in rows) + if rows and all(len(r) >= 4 for r in rows): + header = rows[0] + parts = [] + for row in rows[1:]: + parts.append( + f"`{row[0]}` → composite `{row[1]}`, tokenization `{row[2]}`, syntax `{row[3]}`" + ) + return " ".join(parts) + if len(rows) >= 2 and len(rows[0]) >= 3: + parts = [] + for row in rows[1:]: + if len(row) >= 4: + parts.append( + f"`{row[0]}` → composite `{row[1]}`, tokenization `{row[2]}`, syntax `{row[3]}`" + ) + elif len(row) >= 2: + parts.append(f"`{row[0]}`: {row[1]}") + return " ".join(parts) + return " ".join(" — ".join(r) for r in rows) + + +def html_inline_to_md(fragment: str, source_dir: Path) -> str: + if not fragment.strip(): + return "" + work = preprocess_body(fragment.strip()) + builder = TreeBuilder() + builder.feed(f"<div>{work}</div>") + builder.close() + div = next(c for c in builder.root.children if isinstance(c, Node)) + return emit_cell(div, source_dir) + + +def emit_cell_node(node: Node, source_dir: Path, nested: list[str] | None = None) -> str: + parts: list[str] = [] + for c in node.children: + if isinstance(c, str): + text = c.strip() + for m in re.finditer(r"<!--NESTED(\d+)-->", text): + if nested: + parts.append(nested_table_to_md_html(nested[int(m.group(1))], source_dir)) + text = re.sub(r"<!--NESTED\d+-->", "", text).strip() + if text: + parts.append(html.unescape(text)) + elif isinstance(c, Node): + if c.tag == "table": + parts.append(nested_table_to_md_html(node_to_html_table(c), source_dir)) + elif c.tag in ("ul", "ol"): + for li in c.children: + if isinstance(li, Node) and li.tag == "li": + parts.append(emit_inline(li, source_dir).strip()) + elif c.tag == "p": + text = emit_inline(c, source_dir).strip() + if text: + parts.append(text) + elif c.tag in ("mdx-warning", "mdx-note"): + parts.append(emit_inline(c, source_dir).strip()) + else: + text = emit_inline(c, source_dir).strip() + if text: + parts.append(text) + else: + t = html.unescape(c).strip() + if t: + parts.append(t) + return " ".join(parts) + + +def node_to_html_table(node: Node) -> str: + return "".join(node_to_html_table_part(c) for c in [node]) + + +def node_to_html_table_part(node: Node | str) -> str: + if isinstance(node, str): + return html.unescape(node) + tag = node.tag or "" + inner = "".join(node_to_html_table_part(c) for c in node.children) + attrs = " ".join(f'{k}="{v}"' for k, v in node.attrs.items()) + open_tag = f"<{tag} {attrs}>" if attrs else f"<{tag}>" + return f"{open_tag}{inner}</{tag}>" + + +def tr_cells_from_inner(tr_inner: str, nested: list[str], source_dir: Path) -> list[str]: + inner = restore_nested(tr_inner, nested) + builder = TreeBuilder() + builder.feed(f"<tr>{inner}</tr>") + builder.close() + tr = next(c for c in builder.root.children if isinstance(c, Node) and c.tag == "tr") + cells: list[str] = [] + for c in tr.children: + if isinstance(c, Node) and c.tag == "th": + cells.append(escape_cell(emit_inline(c, source_dir))) + elif isinstance(c, Node) and c.tag == "td": + cells.append(escape_cell(emit_cell_node(c, source_dir, nested))) + return cells + + +def extract_rows_regex(table_html: str, source_dir: Path) -> tuple[list[str], list[list[str]]]: + header: list[str] = [] + body_rows: list[list[str]] = [] + isolated, nested = isolate_nested_tables(table_html) + thead = re.search(r"<thead>([\s\S]*?)</thead>", isolated, re.I) + if thead: + htr = re.search(r"<tr[^>]*>([\s\S]*?)</tr>", thead.group(1), re.I) + if htr: + header = tr_cells_from_inner(htr.group(1), [], source_dir) + tbody = re.search(r"<tbody>([\s\S]*?)</tbody>", isolated, re.I) + chunk = tbody.group(1) if tbody else isolated + for tr in re.finditer(r"<tr[^>]*>([\s\S]*?)</tr>", chunk, re.I): + cells = tr_cells_from_inner(tr.group(1), nested, source_dir) + if cells: + body_rows.append(cells) + return header, body_rows + + +def html_table_to_markdown(table_html: str, source_dir: Path) -> str: + caption_m = re.search(r"<caption>([\s\S]*?)</caption>", table_html, re.I) + caption = html.unescape(caption_m.group(1)).strip() if caption_m else "" + + header, body_rows = extract_rows_regex(table_html, source_dir) + + if not header: + return "" + + ncol = len(header) + lines: list[str] = [] + if caption: + lines.extend([f"*{caption}*", ""]) + lines.append("| " + " | ".join(header) + " |") + lines.append("| " + " | ".join("---" for _ in header) + " |") + for row in body_rows: + while len(row) < ncol: + row.append("") + lines.append("| " + " | ".join(row[:ncol]) + " |") + return "\n".join(lines) + "\n\n" + + +def inline_html_to_md(fragment: str, source_dir: Path) -> str: + fragment = preprocess_body(fragment.strip()) + builder = TreeBuilder() + builder.feed(f"<div>{fragment}</div>") + builder.close() + parts: list[str] = [] + for c in builder.root.children: + if isinstance(c, Node) and c.tag == "div": + for cc in c.children: + if isinstance(cc, Node) and cc.tag == "p": + text = emit_inline(cc, source_dir).strip() + if text: + parts.append(text) + elif isinstance(cc, Node) and cc.tag in ("ul", "ol"): + for li in cc.children: + if isinstance(li, Node) and li.tag == "li": + parts.append(f"- {emit_inline(li, source_dir).strip()}") + elif isinstance(c, Node) and c.tag == "p": + text = emit_inline(c, source_dir).strip() + if text: + parts.append(text) + return "\n\n".join(parts) + ("\n\n" if parts else "") + + +def convert_section(chunk: str, source_dir: Path) -> str: + out: list[str] = [] + pos = 0 + for m in H2_RE.finditer(chunk): + before = chunk[pos : m.start()] + if before.strip(): + out.append(process_between(before, source_dir)) + title = html.unescape(m.group(2)).strip() + out.append(f"## {title}\n\n") + pos = m.end() + + tail = chunk[pos:] + if tail.strip(): + out.append(process_between(tail, source_dir)) + return "".join(out) + + +def extract_main_tables(fragment: str) -> list[tuple[int, int, str]]: + tables: list[tuple[int, int, str]] = [] + pos = 0 + while True: + m = TABLE_CLASS_RE.search(fragment, pos) + if not m: + break + start = m.start() + j = fragment.find(">", m.end()) + 1 + depth = 1 + while j < len(fragment) and depth > 0: + next_open = fragment.find("<table", j) + next_close = fragment.find("</table>", j) + if next_close == -1: + break + if next_open != -1 and next_open < next_close: + depth += 1 + j = fragment.find(">", next_open) + 1 + else: + depth -= 1 + j = next_close + len("</table>") + if depth == 0: + tables.append((start, j, fragment[start:j])) + pos = j + else: + break + return tables + + +def process_between(fragment: str, source_dir: Path) -> str: + out: list[str] = [] + pos = 0 + for start, end, table_html in extract_main_tables(fragment): + before = fragment[pos:start] + if before.strip(): + out.append(inline_html_to_md(before, source_dir)) + out.append(html_table_to_markdown(table_html, source_dir)) + pos = end + rest = fragment[pos:] + if rest.strip(): + out.append(inline_html_to_md(rest, source_dir)) + if HR_RE.search(fragment) and "---" not in "".join(out): + out.append("---\n\n") + return "".join(out) + + +def convert() -> str: + body = fix_html_structure(load_html_section()) + body = preprocess_body(body.strip()) + text = convert_section(body, SOURCE_DIR) + text = re.sub(r"\n{3,}", "\n\n", text) + return text.strip() + "\n" + + +def main() -> None: + mdx_path = SOURCE_DIR / "query.mdx" + original = mdx_path.read_text(encoding="utf-8") + lines = original.split("\n") + cut = next(i for i, line in enumerate(lines) if line.strip() == "## Query" and i > 20) + head = "\n".join(lines[:cut]) + new_body = convert() + mdx_path.write_text(f"{head}\n\n{new_body}", encoding="utf-8") + print(f"Wrote {mdx_path} ({len(mdx_path.read_text().splitlines())} lines)") + + +if __name__ == "__main__": + main() diff --git a/mintlify-docs/scripts/convert_reference_api.py b/mintlify-docs/scripts/convert_reference_api.py new file mode 100644 index 0000000000..52c34f2986 --- /dev/null +++ b/mintlify-docs/scripts/convert_reference_api.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 +"""Convert en/reference/api/*.html to Mintlify MDX.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from html_to_mdx import convert_file # noqa: E402 +from fix_mdx_parse_errors import ( # noqa: E402 + escape_mdx_curly_in_math, + fix_frontmatter, + generic_pre_to_fences, + jekyll_highlight_to_fences, + jekyll_note_to_mdx, +) + +API_DIR = Path(__file__).resolve().parent.parent / "en" / "reference" / "api" + + +def post_fix(path: Path) -> bool: + original = path.read_text(encoding="utf-8") + text = fix_frontmatter(original) + text = jekyll_note_to_mdx(text) + text = jekyll_highlight_to_fences(text) + text = generic_pre_to_fences(text) + text = escape_mdx_curly_in_math(text) + if text != original: + path.write_text(text, encoding="utf-8") + return True + return False + + +def main() -> None: + for path in sorted(API_DIR.glob("*.html")): + convert_file(path) + for path in sorted(API_DIR.glob("*.mdx")): + if post_fix(path): + print(f"Post-fixed {path.name}") + + +if __name__ == "__main__": + main() diff --git a/mintlify-docs/scripts/convert_reference_tables.py b/mintlify-docs/scripts/convert_reference_tables.py new file mode 100644 index 0000000000..c9ea79d2d1 --- /dev/null +++ b/mintlify-docs/scripts/convert_reference_tables.py @@ -0,0 +1,33 @@ +#!/usr/bin/env python3 +"""Convert HTML tables to markdown in all en/reference MDX files.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).resolve().parent)) +from mdx_html_tables import convert_tree # noqa: E402 + +ROOT = Path(__file__).resolve().parent.parent / "en" / "reference" + + +SKIP = {"query.mdx"} + + +def main() -> None: + target = Path(sys.argv[1]) if len(sys.argv) > 1 else ROOT + count = 0 + for path in sorted(target.rglob("*.mdx")): + if path.name in SKIP: + continue + from mdx_html_tables import convert_mdx_file + + if convert_mdx_file(path): + print(f"Converted tables in {path.name}") + count += 1 + print(f"Updated {count} files") + + +if __name__ == "__main__": + main() diff --git a/mintlify-docs/scripts/fix_mdx_parse_errors.py b/mintlify-docs/scripts/fix_mdx_parse_errors.py new file mode 100644 index 0000000000..d822ee9c17 --- /dev/null +++ b/mintlify-docs/scripts/fix_mdx_parse_errors.py @@ -0,0 +1,305 @@ +#!/usr/bin/env python3 +"""Fix common MDX parse errors in Vespa docs (Jekyll/HTML leftovers).""" + +from __future__ import annotations + +import html +import re +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent / "en" + + +def fix_frontmatter(text: str) -> str: + if not text.startswith("---"): + return text + end = text.find("\n---", 3) + if end == -1: + return text + meta, body = text[3:end], text[end + 4 :] + + def quote_desc(m: re.Match) -> str: + val = m.group(1).strip() + if (val.startswith('"') and val.endswith('"')) or ( + val.startswith("'") and val.endswith("'") + ): + return m.group(0) + escaped = val.replace("\\", "\\\\").replace('"', '\\"') + return f'description: "{escaped}"' + + meta = re.sub(r"^description:\s*(.+)$", quote_desc, meta, flags=re.M) + return f"---{meta}\n---{body}" + + +def jekyll_note_to_mdx(text: str) -> str: + def note_repl(m: re.Match) -> str: + content = html.unescape(m.group(2)).strip() + return f"\n<Note>\n**Note:**\n\n{content}\n</Note>\n" + + def important_repl(m: re.Match) -> str: + content = html.unescape(m.group(2)).strip() + return f"\n<Warning>\n**Important:**\n\n{content}\n</Warning>\n" + + def pre_req_repl(m: re.Match) -> str: + memory = m.group(1) + extra = m.group(2) or "" + extra = re.sub(r"<li>", "\n- ", extra) + extra = re.sub(r"</li>", "", extra) + extra = re.sub(r"<code>([^<]+)</code>", r"`\1`", extra) + return ( + f"\n<Note>\n**Prerequisites:**\n\n" + f"Memory: {memory}.{extra.strip()}\n</Note>\n" + ) + + text = re.sub( + r"\{%\s*include\s+note\.html\s+content=(['\"])(.*?)\1\s*%\}", + note_repl, + text, + flags=re.DOTALL, + ) + text = re.sub( + r"\{%\s*include\s+important\.html\s+content=(['\"])(.*?)\1\s*%\}", + important_repl, + text, + flags=re.DOTALL, + ) + text = re.sub( + r"\{%\s*include\s+pre-req\.html\s+memory=(['\"])([^'\"]+)\1(?:\s+extra-reqs=(['\"])(.*?)\3)?\s*%\}", + pre_req_repl, + text, + flags=re.DOTALL, + ) + text = re.sub(r"\{%\s*include\s+video-include\.html[^%]*%\}", "", text, flags=re.DOTALL) + text = re.sub( + r"\{%\s*include\s+warning\.html\s+content=(['\"])(.*?)\1\s*%\}", + important_repl, + text, + flags=re.DOTALL, + ) + text = re.sub( + r"\{%\s*include\s+setup\.html\s+appname=['\"]([^'\"]+)['\"]\s*%\}", + lambda m: "", + text, + ) + text = re.sub( + r"\{%\s*include\s+version\.html\s+version=[\"']([^\"']+)[\"']\s*%\}", + lambda m: f"Vespa {m.group(1)}+", + text, + ) + return text + + +def html_comments_to_mdx(text: str) -> str: + def repl(m: re.Match) -> str: + content = m.group(1).strip() + return f"{{/* {content} */}}" + return re.sub(r"<!--(.*?)-->", repl, text, flags=re.DOTALL) + + +def pre_parent_to_fences(text: str) -> str: + pattern = re.compile( + r'<div class="pre-parent">\s*<button[^>]*></button>\s*' + r"<pre(?:\s[^>]*)?>(.*?)</pre>\s*</div>", + re.DOTALL | re.IGNORECASE, + ) + + def to_fence(m: re.Match) -> str: + code = html.unescape(m.group(1)).strip() + code = re.sub(r"<[^>]+>", "", code) + lang = "bash" if code.startswith("$") else "txt" + return f"\n```{lang}\n{code}\n```\n" + + return pattern.sub(to_fence, text) + + +def jekyll_highlight_to_fences(text: str) -> str: + def pre_highlight(m: re.Match) -> str: + lang = m.group(1) or "txt" + code = html.unescape(m.group(2)).strip("\n") + return f"\n```{lang}\n{code}\n```\n" + + text = re.sub( + r"<pre>\s*\{%\s*highlight\s+(\w*)\s*%\}\s*(.*?)\s*\{%\s*endhighlight\s*%\}\s*</pre>", + pre_highlight, + text, + flags=re.DOTALL, + ) + text = re.sub( + r"\{%\s*highlight\s+(\w*)\s*%\}\s*(.*?)\s*\{%\s*endhighlight\s*%\}", + pre_highlight, + text, + flags=re.DOTALL, + ) + return text + + +def escape_mdx_curly_in_math(text: str) -> str: + """Escape { in $$ math blocks so MDX does not parse as JSX.""" + + def fix_math(m: re.Match) -> str: + inner = m.group(1) + inner = inner.replace("{", "\\{").replace("}", "\\}") + return f"$$ {inner} $$" + + return re.sub(r"\$\$\s*(.*?)\s*\$\$", fix_math, text, flags=re.DOTALL) + + +def fix_angle_in_tables(text: str) -> str: + """Fix |<| table cells that break MDX.""" + lines = [] + for line in text.split("\n"): + if "|" in line and re.search(r"\|[<>≤≥]\|", line): + line = line.replace("|<|", "|`<`|").replace("|>|", "|`>`|") + line = line.replace("|≤|", "|`≤`|").replace("|≥|", "|`≥`|") + lines.append(line) + return "\n".join(lines) + + +def generic_pre_to_fences(text: str) -> str: + pattern = re.compile(r"<pre(?:\s[^>]*)?>(.*?)</pre>", re.DOTALL | re.IGNORECASE) + + def to_fence(m: re.Match) -> str: + code = html.unescape(m.group(1)).strip() + # Only unwrap Jekyll/HTML wrappers; do not strip API placeholders like <namespace>. + code = re.sub(r"</?(?:code|span)(?:\s[^>]*)?>", "", code, flags=re.IGNORECASE) + if "{%" in code or "{% endhighlight" in code: + return m.group(0) + if code.startswith("$"): + lang = "bash" + elif code.startswith("{") or code.startswith("["): + lang = "json" + elif "schema " in code or "rank-profile" in code or "field " in code: + lang = "txt" + else: + lang = "txt" + return f"\n```{lang}\n{code}\n```\n" + + return pattern.sub(to_fence, text) + + +def fix_img_tags(text: str) -> str: + text = re.sub( + r'<img\s+src="([^"]+)"\s+alt="([^"]*)"\s*width="[^"]*"\s*height="[^"]*"\s*/?>', + r"\n<Frame>\n![\2](\1)\n</Frame>\n", + text, + flags=re.IGNORECASE, + ) + text = re.sub( + r'<img\s+src="([^"]+)"\s+alt="([^"]*)"\s*/?>', + r"\n<Frame>\n![\2](\1)\n</Frame>\n", + text, + flags=re.IGNORECASE, + ) + return text + + +def fix_angle_bracket_placeholders(text: str) -> str: + text = text.replace("<host:port>", "`host:port`") + return text + + +def wrap_inline_field_defs(text: str) -> str: + def repl(m: re.Match) -> str: + return f"`{m.group(0).strip()}`" + + return re.sub( + r"^field \w+ type \w+ \{ indexing:[^}]+\}", + repl, + text, + flags=re.MULTILINE, + ) + + +def html_tables_to_markdown(text: str) -> str: + """Convert simple HTML tables to markdown.""" + + def table_repl(m: re.Match) -> str: + rows = re.findall(r"<tr>(.*?)</tr>", m.group(1), re.DOTALL) + md_rows = [] + for row in rows: + cells = re.findall(r"<t[dh]>(.*?)</t[dh]>", row, re.DOTALL) + cells = [re.sub(r"<[^>]+>", "", c).strip() for c in cells] + if not cells: + continue + cells = [ + c.replace("|", "\\|") + .replace("<", "`<") + .replace(">", ">`") + if c in ("<", ">", "≤", "≥") + else c + for c in cells + ] + md_rows.append("| " + " | ".join(cells) + " |") + if len(md_rows) >= 1: + sep = "| " + " | ".join("---" for _ in md_rows[0].split("|")[1:-1]) + " |" + if len(md_rows) > 1: + return "\n" + md_rows[0] + "\n" + sep + "\n" + "\n".join(md_rows[1:]) + "\n" + return m.group(0) + + return re.sub( + r"<table[^>]*>(.*?)</table>", + table_repl, + text, + flags=re.DOTALL | re.IGNORECASE, + ) + + +def fix_bash_completion_redirect(text: str) -> str: + """Lines like source <(cmd) break MDX.""" + if "vespa_completion" not in str(text): + return text + lines = [] + in_fence = False + for line in text.split("\n"): + if line.strip().startswith("```"): + in_fence = not in_fence + lines.append(line) + continue + if not in_fence and "<(" in line: + lines.append("```bash") + lines.append(line.strip()) + lines.append("```") + continue + lines.append(line) + return "\n".join(lines) + + +def fix_tensor_pre_braces(text: str) -> str: + """Content in pre blocks with {category: ...} - ensure inside fences.""" + return text # handled by pre_parent_to_fences + + +def process_file(path: Path) -> bool: + original = path.read_text(encoding="utf-8") + text = original + text = fix_frontmatter(text) + text = jekyll_note_to_mdx(text) + text = html_comments_to_mdx(text) + text = pre_parent_to_fences(text) + text = generic_pre_to_fences(text) + text = jekyll_highlight_to_fences(text) + text = escape_mdx_curly_in_math(text) + text = fix_angle_in_tables(text) + text = html_tables_to_markdown(text) + text = fix_img_tags(text) + text = fix_angle_bracket_placeholders(text) + text = wrap_inline_field_defs(text) + text = fix_bash_completion_redirect(text) + if text != original: + path.write_text(text, encoding="utf-8") + return True + return False + + +def main() -> None: + files = sorted(ROOT.rglob("*.md")) + sorted(ROOT.rglob("*.mdx")) + count = 0 + for path in files: + if process_file(path): + print(f"Fixed {path.relative_to(ROOT.parent)}") + count += 1 + print(f"Updated {count} files") + + +if __name__ == "__main__": + main() diff --git a/mintlify-docs/scripts/html_to_mdx.py b/mintlify-docs/scripts/html_to_mdx.py new file mode 100644 index 0000000000..0d5a0f24ad --- /dev/null +++ b/mintlify-docs/scripts/html_to_mdx.py @@ -0,0 +1,537 @@ +#!/usr/bin/env python3 +"""Convert Vespa Jekyll HTML docs to Mintlify MDX.""" + +from __future__ import annotations + +import html +import re +from dataclasses import dataclass, field +from html.parser import HTMLParser +from pathlib import Path + +APPS_DIR = Path(__file__).resolve().parent.parent / "en" / "applications" +VOID_TAGS = frozenset({"br", "hr", "img", "input", "meta", "link"}) + + +@dataclass +class Node: + tag: str | None = None + attrs: dict[str, str] = field(default_factory=dict) + children: list[Node | str] = field(default_factory=list) + + def text_content(self) -> str: + parts: list[str] = [] + for c in self.children: + if isinstance(c, str): + parts.append(c) + else: + parts.append(c.text_content()) + return "".join(parts).strip() + + +class TreeBuilder(HTMLParser): + def __init__(self) -> None: + # Keep <...> in <pre> and tables; avoid treating placeholders as tags. + super().__init__(convert_charrefs=False) + self.root = Node(tag="root") + self.stack: list[Node] = [self.root] + + def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]) -> None: + tag = tag.lower() + if tag in VOID_TAGS: + node = Node(tag=tag, attrs={k: v or "" for k, v in attrs}) + self.stack[-1].children.append(node) + return + node = Node(tag=tag, attrs={k: v or "" for k, v in attrs}) + self.stack[-1].children.append(node) + self.stack.append(node) + + def handle_endtag(self, tag: str) -> None: + tag = tag.lower() + if tag in VOID_TAGS: + return + if len(self.stack) > 1: + self.stack.pop() + + def handle_data(self, data: str) -> None: + if data: + self.stack[-1].children.append(data) + + def handle_entityref(self, name: str) -> None: + self.stack[-1].children.append(f"&{name};") + + def handle_charref(self, name: str) -> None: + self.stack[-1].children.append(f"&#{name};") + + +def parse_frontmatter(text: str) -> tuple[dict[str, str], str]: + if not text.startswith("---"): + return {}, text + end = text.find("\n---", 3) + if end == -1: + return {}, text + fm = text[3:end].strip() + body = text[end + 4 :].lstrip("\n") + meta: dict[str, str] = {} + for line in fm.splitlines(): + if line.startswith("#") or not line.strip(): + continue + if ":" in line: + key, _, val = line.partition(":") + meta[key.strip()] = val.strip().strip('"') + return meta, body + + +def convert_href(href: str, source_dir: Path) -> str: + if not href or href.startswith(("http://", "https://", "mailto:", "#")): + return href + if href.startswith("/"): + parts = href.split("#", 1) + path = parts[0].replace(".html", "") + anchor = f"#{parts[1]}" if len(parts) > 1 else "" + return path + anchor + parts = href.split("#", 1) + path = parts[0].replace(".html", "") + anchor = f"#{parts[1]}" if len(parts) > 1 else "" + resolved = (source_dir / path).resolve() + try: + rel = resolved.relative_to(APPS_DIR.parent.parent) + return "/" + str(rel).replace("\\", "/") + anchor + except ValueError: + return path + anchor + + +def preprocess_body(body: str) -> str: + # Unclosed <p> wrapping lists or trailing content + body = re.sub(r"<p>((?:(?!</p>)[\s\S])*?)<ul>", r"<p>\1</p>\n<ul>", body, flags=re.IGNORECASE) + body = re.sub(r"<p>((?:(?!</p>)[\s\S])*?)<ol>", r"<p>\1</p>\n<ol>", body, flags=re.IGNORECASE) + body = re.sub(r"</ul>\s*</p>", "</ul>", body, flags=re.IGNORECASE) + body = re.sub(r"</ol>\s*</p>", "</ol>", body, flags=re.IGNORECASE) + body = re.sub(r"<p>\s*<ul>", "<ul>", body, flags=re.IGNORECASE) + body = re.sub(r"<p>\s*<ol>", "<ol>", body, flags=re.IGNORECASE) + body = re.sub(r"<i>", "<em>", body, flags=re.IGNORECASE) + body = re.sub(r"</i>", "</em>", body, flags=re.IGNORECASE) + + def important_repl(m: re.Match) -> str: + content = html.unescape(m.group(2)).strip() + return f"<mdx-warning>{content}</mdx-warning>" + + def note_repl(m: re.Match) -> str: + content = html.unescape(m.group(2)).strip() + return f"<mdx-note>{content}</mdx-note>" + + body = re.sub( + r"\{%\s*include\s+important\.html\s+content=(['\"])(.*?)\1\s*%\}", + important_repl, + body, + flags=re.DOTALL, + ) + body = re.sub( + r"\{%\s*include\s+note\.html\s+content=(['\"])(.*?)\1\s*%\}", + note_repl, + body, + flags=re.DOTALL, + ) + body = re.sub(r"\{%\s*include\s+video-include\.html[^%]*%\}", "", body, flags=re.DOTALL) + + def deprecated_repl(m: re.Match) -> str: + content = html.unescape(m.group(2)).strip() + return f"<mdx-warning>{content}</mdx-warning>" + + body = re.sub( + r"\{%\s*include\s+deprecated\.html\s+content=(['\"])(.*?)\1\s*%\}", + deprecated_repl, + body, + flags=re.DOTALL, + ) + body = re.sub( + r"\{%\s*include\s+version\.html\s+version=[\"']([^\"']+)[\"']\s*%\}", + lambda m: f"Vespa {m.group(1)}+", + body, + ) + body = re.sub( + r'<span class="pre-hilite">(.*?)</span>', + lambda m: f"<code>{html.unescape(m.group(1))}</code>", + body, + flags=re.DOTALL, + ) + # Jekyll sources sometimes omit </li> before the next <li> + body = re.sub( + r"(<li>(?:(?!</li>)[\s\S])*?)(\s*<li>)", + lambda m: m.group(1) + "</li>" + m.group(2), + body, + ) + + def escape_pre_content(m: re.Match) -> str: + inner = m.group(1) + if "{%" in inner or "data-lang=" in inner: + return m.group(0) + if "<" in inner: + return m.group(0) + escaped = inner.replace("&", "&").replace("<", "<").replace(">", ">") + return f"<pre>{escaped}</pre>" + + body = re.sub( + r"<pre(?:\s[^>]*)?>(.*?)</pre>", + escape_pre_content, + body, + flags=re.DOTALL | re.IGNORECASE, + ) + + def pre_highlight(m: re.Match) -> str: + lang = m.group(1) or "txt" + code = html.unescape(m.group(2)).strip("\n") + escaped = html.escape(code, quote=False) + return f'<pre data-lang="{lang}">{escaped}</pre>' + + body = re.sub( + r"<pre>\s*\{%\s*highlight\s+(\w*)\s*%\}\s*(.*?)\s*\{%\s*endhighlight\s*%\}\s*</pre>", + pre_highlight, + body, + flags=re.DOTALL, + ) + body = re.sub( + r"\{%\s*highlight\s+(\w*)\s*%\}\s*(.*?)\s*\{%\s*endhighlight\s*%\}", + pre_highlight, + body, + flags=re.DOTALL, + ) + + def img_repl(m: re.Match) -> str: + alt = m.group(1) or "" + src = m.group(2) + return f'<mdx-frame><img src="{src}" alt="{alt}"/></mdx-frame>' + + body = re.sub( + r'<img\s+[^>]*src=["\']([^"\']+)["\'][^>]*alt=["\']([^"\']*)["\'][^>]*/?>', + lambda m: f'<mdx-frame><img src="{m.group(1)}" alt="{m.group(2)}"/></mdx-frame>', + body, + flags=re.IGNORECASE, + ) + body = re.sub( + r'<img\s+[^>]*alt=["\']([^"\']*)["\'][^>]*src=["\']([^"\']+)["\'][^>]*/?>', + lambda m: f'<mdx-frame><img src="{m.group(2)}" alt="{m.group(1)}"/></mdx-frame>', + body, + flags=re.IGNORECASE, + ) + body = re.sub( + r'<img\s+[^>]*src=["\']([^"\']+)["\'][^>]*/?>', + lambda m: f'<mdx-frame><img src="{m.group(1)}" alt=""/></mdx-frame>', + body, + flags=re.IGNORECASE, + ) + return body + + +BLOCK_IN_CELL = frozenset({"p", "ul", "ol", "pre", "table", "dl", "h2", "h3", "h4"}) + + +def cell_has_blocks(node: Node) -> bool: + for c in node.children: + if isinstance(c, Node): + if c.tag in BLOCK_IN_CELL or cell_has_blocks(c): + return True + return False + + +def table_has_rich_cells(node: Node) -> bool: + for c in node.children: + if isinstance(c, Node): + if c.tag in ("td", "th") and cell_has_blocks(c): + return True + if table_has_rich_cells(c): + return True + return False + + +def attrs_str(attrs: dict[str, str]) -> str: + if not attrs: + return "" + return " " + " ".join(f'{k}="{html.escape(v, quote=True)}"' for k, v in attrs.items()) + + +def node_to_html(node: Node | str, source_dir: Path) -> str: + if isinstance(node, str): + return html.escape(html.unescape(node)) + tag = node.tag or "" + if tag in VOID_TAGS: + return f"<{tag}{attrs_str(node.attrs)}/>" + inner = "".join(node_to_html(c, source_dir) for c in node.children) + if tag == "a": + href = convert_href(node.attrs.get("href", ""), source_dir) + return f'<a href="{html.escape(href, quote=True)}">{inner}</a>' + if tag == "code": + return f"<code>{inner}</code>" + if tag in ("em", "i"): + return f"<em>{inner}</em>" + if tag in ("strong", "b"): + return f"<strong>{inner}</strong>" + if tag == "br": + return "<br/>" + if tag == "pre": + code = node.text_content() + return f"<pre><code>{code}</code></pre>" + if tag == "mdx-warning": + return f"<Warning><strong>Important:</strong> {inner}</Warning>" + if tag == "mdx-note": + return f"<Note><strong>Note:</strong> {inner}</Note>" + if tag == "p": + text = "".join( + node_to_html(c, source_dir) if isinstance(c, Node) else html.escape(html.unescape(c)) + for c in node.children + ).strip() + return f"<p>{text}</p>" if text else "" + if tag == "ul": + items = "".join( + f"<li>{node_to_html(c, source_dir)}</li>" + for c in node.children + if isinstance(c, Node) and c.tag == "li" + ) + return f"<ul>{items}</ul>" + if tag == "li": + return inner + return f"<{tag}{attrs_str(node.attrs)}>{inner}</{tag}>" + + +def inline_md(node: Node | str, source_dir: Path) -> str: + if isinstance(node, str): + return html.unescape(node) + tag = node.tag or "" + kids = "".join(inline_md(c, source_dir) for c in node.children) + + if tag == "a": + href = convert_href(node.attrs.get("href", ""), source_dir) + text = kids.strip() or href + return f"[{text}]({href})" + if tag == "code": + return f"`{kids}`" + if tag in ("em", "i"): + return f"*{kids}*" + if tag in ("strong", "b"): + return f"**{kids}**" + if tag == "br": + return "\n" + if tag == "mdx-warning": + return f"\n<Warning>\n**Important:**\n\n{kids.strip()}\n</Warning>\n" + if tag == "mdx-note": + return f"\n<Note>\n**Note:**\n\n{kids.strip()}\n</Note>\n" + if tag == "mdx-frame": + img = next((c for c in node.children if isinstance(c, Node) and c.tag == "img"), None) + if img: + alt = img.attrs.get("alt", "") + src = img.attrs.get("src", "") + return f"\n<Frame>\n![{alt}]({src})\n</Frame>\n" + if tag == "img": + alt = node.attrs.get("alt", "") + src = node.attrs.get("src", "") + return f"\n<Frame>\n![{alt}]({src})\n</Frame>\n" + if tag in ("ul", "ol", "li", "p", "h1", "h2", "h3", "h4", "pre", "table", "tr", "td", "th"): + return kids + return kids + + +def block_md(node: Node, source_dir: Path, depth: int = 0) -> str: + if isinstance(node, str): + return html.unescape(node) + tag = node.tag or "" + indent = " " * depth + + if tag in ("h1", "h2", "h3", "h4", "h5", "h6"): + level = int(tag[1]) + text = inline_md(node, source_dir).strip() + return f"\n{'#' * level} {text}\n\n" + + if tag == "p": + text = inline_md(node, source_dir).strip() + return f"{text}\n\n" if text else "" + + if tag == "pre": + lang = node.attrs.get("data-lang", "txt") or "txt" + code = html.unescape(node.text_content()) + return f"\n```{lang}\n{code}\n```\n\n" + + if tag == "ul": + lines: list[str] = [] + for c in node.children: + if isinstance(c, Node) and c.tag == "li": + item = block_md(c, source_dir, depth).strip() + item_lines = item.split("\n") + lines.append(f"{indent}- {item_lines[0]}") + for extra in item_lines[1:]: + lines.append(f"{indent} {extra}") + return "\n".join(lines) + "\n\n" + + if tag == "ol": + lines = [] + n = 0 + for c in node.children: + if isinstance(c, Node) and c.tag == "li": + n += 1 + item = block_md(c, source_dir, depth).strip() + item_lines = item.split("\n") + lines.append(f"{indent}{n}. {item_lines[0]}") + for extra in item_lines[1:]: + lines.append(f"{indent} {extra}") + return "\n".join(lines) + "\n\n" + + if tag == "li": + parts: list[str] = [] + for c in node.children: + if isinstance(c, Node) and c.tag in ("ul", "ol"): + parts.append("\n" + block_md(c, source_dir, depth + 1).rstrip()) + elif isinstance(c, Node) and c.tag == "p": + parts.append(inline_md(c, source_dir).strip()) + else: + parts.append(inline_md(c, source_dir)) + return "".join(parts).strip() + + if tag == "table": + if table_has_rich_cells(node): + return "\n" + node_to_html(node, source_dir) + "\n\n" + rows: list[list[str]] = [] + for c in node.children: + if isinstance(c, Node) and c.tag in ("thead", "tbody", "tr"): + if c.tag == "tr": + rows.append([inline_md(cell, source_dir).strip() for cell in c.children if isinstance(cell, Node) and cell.tag in ("td", "th")]) + else: + for row in c.children: + if isinstance(row, Node) and row.tag == "tr": + rows.append([inline_md(cell, source_dir).strip() for cell in row.children if isinstance(cell, Node) and cell.tag in ("td", "th")]) + if not rows: + return "" + out = ["| " + " | ".join(rows[0]) + " |", "| " + " | ".join("---" for _ in rows[0]) + " |"] + for row in rows[1:]: + while len(row) < len(rows[0]): + row.append("") + out.append("| " + " | ".join(row[: len(rows[0])]) + " |") + return "\n".join(out) + "\n\n" + + if tag == "dl": + parts: list[str] = [] + for c in node.children: + if isinstance(c, Node) and c.tag == "dt": + parts.append(f"\n#### {inline_md(c, source_dir).strip()}\n\n") + elif isinstance(c, Node) and c.tag == "dd": + for child in c.children: + if isinstance(child, Node): + parts.append(block_md(child, source_dir, depth)) + else: + t = html.unescape(child).strip() + if t: + parts.append(t + "\n\n") + elif isinstance(c, Node): + parts.append(block_md(c, source_dir, depth)) + return "".join(parts) + + if tag in ("dt", "dd"): + return "" + + if tag in ("thead", "tbody", "tr", "td", "th"): + return "" + + if tag in ("mdx-warning", "mdx-note", "mdx-frame"): + return inline_md(node, source_dir) + + if tag == "root": + parts = [] + for c in node.children: + if isinstance(c, Node): + parts.append(block_md(c, source_dir, depth)) + else: + t = html.unescape(c).strip() + if t: + parts.append(t + "\n\n") + result = "".join(parts) + result = re.sub(r"\n{3,}", "\n\n", result) + # Strip HTML-source indentation from prose lines + result = re.sub(r"(?m)^ +(?![\s`-])", "", result) + return result.strip() + "\n" + + # default: render children + return "".join( + block_md(c, source_dir, depth) if isinstance(c, Node) else html.unescape(c) + for c in node.children + ) + + +def first_description(root: Node) -> str: + for c in root.children: + if isinstance(c, Node) and c.tag == "p": + text = c.text_content() + text = re.sub(r"\s+", " ", text).strip() + if len(text) > 200: + text = text[:197].rsplit(" ", 1)[0] + "..." + return text + return "" + + +def convert_file(path: Path) -> None: + raw = path.read_text(encoding="utf-8") + meta, body = parse_frontmatter(raw) + title = meta.get("title", path.stem.replace("-", " ").title()) + + if path.suffix == ".md" and not body.strip().startswith("<"): + # Pure markdown source + mdx_body = body.strip() + "\n" + desc_m = re.search(r"^([^\n#].+?\.)", mdx_body, re.MULTILINE) + description = desc_m.group(1).strip() if desc_m else f"{title} in Vespa applications." + out_path = path.with_suffix(".mdx") + out_path.write_text( + f'---\ntitle: "{title}"\ndescription: {description}\n---\n\n{mdx_body}', + encoding="utf-8", + ) + if path != out_path: + path.unlink() + print(f"Converted {path.name} -> {out_path.name}") + return + + body = preprocess_body(body.strip()) + builder = TreeBuilder() + builder.feed(f"<body>{body}</body>") + builder.close() + mdx_body = block_md(builder.root, path.parent) + description = first_description(builder.root) or f"{title} in Vespa applications." + + out_path = path.with_suffix(".mdx") + out_path.write_text( + f'---\ntitle: "{title}"\ndescription: {description}\n---\n\n{mdx_body}', + encoding="utf-8", + ) + if path.suffix == ".html": + path.unlink() + elif path.suffix == ".md": + path.unlink() + print(f"Converted {path.name} -> {out_path.name}") + + +def main() -> None: + for mdx in APPS_DIR.glob("*.mdx"): + mdx.unlink() + # Restore HTML from git if missing + import subprocess + + html_files = list(APPS_DIR.glob("*.html")) + if not html_files: + subprocess.run( + ["git", "checkout", "HEAD", "--", "en/applications/*.html"], + cwd=APPS_DIR.parent.parent, + check=False, + ) + targets = sorted(APPS_DIR.glob("*.html")) + zk = APPS_DIR / "using-zookeeper.md" + if not zk.exists(): + subprocess.run( + ["git", "show", "HEAD:en/applications/using-zookeeper.md"], + cwd=APPS_DIR.parent.parent, + capture_output=True, + text=True, + check=False, + ) + if zk.exists(): + targets.append(zk) + for p in targets: + convert_file(p) + + +if __name__ == "__main__": + main() diff --git a/mintlify-docs/scripts/indent_code_blocks.py b/mintlify-docs/scripts/indent_code_blocks.py new file mode 100644 index 0000000000..b8ef61a727 --- /dev/null +++ b/mintlify-docs/scripts/indent_code_blocks.py @@ -0,0 +1,166 @@ +#!/usr/bin/env python3 +"""Indent content inside fenced code blocks in MDX files.""" + +from __future__ import annotations + +import re +from pathlib import Path + +APPS_DIR = Path(__file__).resolve().parent.parent / "en" / "applications" +PAD = " " + + +def looks_like_xml(code: str) -> bool: + s = code.strip() + return s.startswith("<") or "<services" in s or "<container" in s or "<chain" in s + + +def looks_like_java(code: str) -> bool: + return ( + "import " in code + or "public class" in code + or "public void" in code + or "@Override" in code + or "extends " in code + ) + + +def indent_xml(code: str) -> str: + lines = code.split("\n") + out: list[str] = [] + level = 0 + stack: list[str] = [] + for raw in lines: + s = raw.strip() + if not s: + out.append("") + continue + if s.startswith("<?") or s.startswith("<!--"): + out.append(s) + continue + if s in ("...", "…"): + out.append(PAD * level + s) + continue + if s.startswith("</"): + tag = re.match(r"</(\w+)", s) + if tag and stack and stack[-1] == tag.group(1): + stack.pop() + level = max(0, level - 1) + elif level > 0: + level -= 1 + out.append(PAD * level + s) + continue + if s.endswith("/>") or re.match(r"<[^>]+>.*</[^>]+>$", s): + out.append(PAD * level + s) + continue + if s.startswith("<"): + tag = re.match(r"<(\w+)", s) + out.append(PAD * level + s) + if tag and not s.endswith("/>") and "</" not in s[1:]: + stack.append(tag.group(1)) + level += 1 + continue + out.append(PAD * level + s) + return "\n".join(out) + + +def indent_java(code: str) -> str: + """Basic Java brace-based indentation for flattened blocks.""" + if "\n" in code and code.count("\n") > 3: + # Already multiline — normalize brace indent only + lines = code.split("\n") + else: + # Single line or few lines — split on common tokens + s = code.strip() + if "{" not in s and ";" not in s: + return code + # Split after ; and { } for readability + s = re.sub(r"\s*;\s*", ";\n", s) + s = re.sub(r"\s*\{\s*", " {\n", s) + s = re.sub(r"\s*\}\s*", "\n}\n", s) + lines = [ln.strip() for ln in s.split("\n") if ln.strip()] + + out: list[str] = [] + level = 0 + for line in lines: + s = line.strip() + if not s: + continue + if s.startswith("}"): + level = max(0, level - 1) + out.append(PAD * level + s) + if s.endswith("{") and not s.startswith("}"): + level += 1 + return "\n".join(out) + + +def indent_shell(code: str) -> str: + lines = code.split("\n") + out = [] + for line in lines: + s = line.strip() + if not s: + out.append("") + continue + if s.endswith("\\"): + out.append(s) + elif s.startswith("-D") or s.startswith("$"): + out.append(s) + else: + out.append(s) + return "\n".join(out) + + +def indent_block(code: str, lang: str) -> str: + lang = (lang or "").lower() + if lang in ("xml", "txt", "yaml", "yml") and looks_like_xml(code): + return indent_xml(code) + if lang in ("java",) or looks_like_java(code): + return indent_java(code) + if lang in ("bash", "shell", "sh") and code.strip().startswith("$"): + return indent_shell(code) + if looks_like_xml(code): + return indent_xml(code) + return code + + +def process_file(path: Path) -> bool: + text = path.read_text(encoding="utf-8") + parts = re.split(r"(^```[\w-]*\n)(.*?)(^```\s*$)", text, flags=re.MULTILINE | re.DOTALL) + if len(parts) < 2: + return False + + changed = False + new_parts = [parts[0]] + i = 1 + while i < len(parts): + if i + 2 < len(parts) and parts[i].startswith("```"): + fence_open = parts[i] + body = parts[i + 1] + fence_close = parts[i + 2] + lang_match = re.match(r"^```([\w-]*)", fence_open) + lang = lang_match.group(1) if lang_match else "" + new_body = indent_block(body.rstrip("\n"), lang) + if new_body != body.rstrip("\n"): + changed = True + new_parts.append(fence_open) + new_parts.append(new_body + "\n") + new_parts.append(fence_close) + i += 3 + else: + new_parts.append(parts[i]) + i += 1 + + if changed: + path.write_text("".join(new_parts), encoding="utf-8") + return changed + + +def main() -> None: + for path in sorted(APPS_DIR.glob("*.mdx")): + if process_file(path): + print(f"Updated {path.name}") + + +if __name__ == "__main__": + main() diff --git a/mintlify-docs/scripts/join_paragraphs.py b/mintlify-docs/scripts/join_paragraphs.py new file mode 100644 index 0000000000..8eb49c3d2c --- /dev/null +++ b/mintlify-docs/scripts/join_paragraphs.py @@ -0,0 +1,165 @@ +#!/usr/bin/env python3 +"""Join wrapped prose lines into single lines in MDX files.""" + +from __future__ import annotations + +import re +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent + + +def is_fence(line: str) -> bool: + return line.strip().startswith("```") + + +def is_structural(line: str) -> bool: + s = line.strip() + if not s: + return False + if re.match(r"^#{1,6}\s", s): + return True + if re.match( + r"^</?(Accordion|AccordionGroup|Warning|Note|Frame|Steps|Step|Card|Tabs|Tab|Info|Tip|Check|Callout|Danger)\b", + s, + ): + return True + if re.match(r"^</?(table|tbody|thead|tr|th|td|caption|dl|dt|dd)\b", s, re.I): + return True + if s == "---": + return True + if re.match(r"^(title|sidebarTitle|description):", s): + return True + if s.startswith("{/*") or s.endswith("*/}") or s == "*/}": + return True + if re.match(r"^\|", s): + return True + if re.match(r"^!\[", s): + return True + if s.startswith("{`") or (s.startswith("{") and "$$" in s): + return True + if re.match(r"^####\s", s): + return True + return False + + +def is_list_item(line: str) -> bool: + return bool(re.match(r"^\s*[-*]\s+", line)) or bool(re.match(r"^\s*\d+\.\s+", line)) + + +def join_lines(group: list[str]) -> str: + return " ".join(l.strip() for l in group) + + +def join_p_block(lines: list[str]) -> str: + """Join a multi-line <p>...</p> block into one line.""" + text = " ".join(l.strip() for l in lines) + return re.sub(r"\s+", " ", text) + + +def process_block(block: list[str]) -> list[str]: + result: list[str] = [] + i = 0 + while i < len(block): + line = block[i] + s = line.strip() + if s.startswith("<p") and "</p>" not in s: + group = [line] + i += 1 + while i < len(block) and "</p>" not in block[i]: + group.append(block[i]) + i += 1 + if i < len(block): + group.append(block[i]) + i += 1 + result.append(join_p_block(group)) + continue + if is_structural(line): + result.append(line) + i += 1 + continue + if is_list_item(line): + group = [line] + i += 1 + while i < len(block) and not is_list_item(block[i]) and not is_structural(block[i]): + if block[i].strip().startswith("<p"): + break + group.append(block[i]) + i += 1 + result.append(join_lines(group)) + continue + group = [line] + i += 1 + while i < len(block) and not is_list_item(block[i]) and not is_structural(block[i]): + if block[i].strip().startswith("<p"): + break + group.append(block[i]) + i += 1 + result.append(join_lines(group)) + return result + + +def process_file(path: Path) -> tuple[int, int]: + lines = path.read_text(encoding="utf-8").split("\n") + output: list[str] = [] + i = 0 + in_code = False + in_frontmatter = False + fm_count = 0 + + while i < len(lines): + line = lines[i] + + if i == 0 and line.strip() == "---": + in_frontmatter = True + fm_count = 1 + + if in_frontmatter: + output.append(line) + if line.strip() == "---" and fm_count > 0 and i > 0: + fm_count += 1 + if fm_count >= 2: + in_frontmatter = False + i += 1 + continue + + if is_fence(line): + in_code = not in_code + output.append(line) + i += 1 + continue + if in_code: + output.append(line) + i += 1 + continue + if not line.strip(): + output.append(line) + i += 1 + continue + + block: list[str] = [] + while i < len(lines) and lines[i].strip() and not is_fence(lines[i]): + block.append(lines[i]) + i += 1 + output.extend(process_block(block)) + + before = len(lines) + after = len(output) + path.write_text("\n".join(output), encoding="utf-8") + return before, after + + +def main() -> None: + target = Path(sys.argv[1]) if len(sys.argv) > 1 else ROOT / "en" / "reference" / "api" + if target.is_file(): + paths = [target] + else: + paths = sorted(target.glob("*.mdx")) + for path in paths: + before, after = process_file(path) + print(f"{path.name}: {before} -> {after} lines") + + +if __name__ == "__main__": + main() diff --git a/mintlify-docs/scripts/mdx_html_tables.py b/mintlify-docs/scripts/mdx_html_tables.py new file mode 100644 index 0000000000..6d8e182f9e --- /dev/null +++ b/mintlify-docs/scripts/mdx_html_tables.py @@ -0,0 +1,386 @@ +#!/usr/bin/env python3 +"""Convert HTML tables in MDX files to Mintlify markdown tables.""" + +from __future__ import annotations + +import html +import re +from pathlib import Path + +from html_to_mdx import Node, TreeBuilder, convert_href, preprocess_body + +TABLE_TAG_RE = re.compile(r"<table\b", re.IGNORECASE) +H2_HTML_RE = re.compile(r'<h2\s+id="[^"]*">\s*([^<]+?)\s*</h2>', re.IGNORECASE) +HR_RE = re.compile(r"<hr\s*/?>", re.IGNORECASE) + + +def fix_short_td_rows(html_text: str) -> str: + def repl(m: re.Match) -> str: + row = m.group(0) + if "</td>" in row or "</th>" in row: + return row + pieces = re.split(r"(?=<t[dh]\b)", row) + out = [pieces[0]] + for piece in pieces[1:]: + tag_m = re.match(r"(<t[dh][^>]*>)([\s\S]*)", piece, re.I) + if not tag_m: + continue + tag, rest = tag_m.groups() + content = re.split(r"(?=</tr>)", rest, maxsplit=1)[0].strip() + close = "</th>" if tag.lower().startswith("<th") else "</td>" + out.append(f"{tag}{content}{close}") + if "</tr>" in row and not out[-1].endswith("</tr>"): + out.append("</tr>") + return "".join(out) + + return re.sub(r"<tr[\s\S]*?</tr>", repl, html_text, flags=re.IGNORECASE) + + +def fix_html_structure(body: str) -> str: + body = re.sub( + r"(<p(?:\s[^>]*)?>(?:(?!</p>)[\s\S])*?)(\s*</td>)", + r"\1</p>\2", + body, + flags=re.IGNORECASE, + ) + body = re.sub( + r"(<li>(?:(?!</li>)[\s\S])*?)(\s*<li>)", + lambda m: m.group(1) + "</li>" + m.group(2), + body, + ) + body = fix_short_td_rows(body) + body = re.sub( + r"<tr><th rowspan=\"2\">Value\s*<th colspan=\"3\">Results in</tr>\s*" + r"<tr>\s*(?:<th>)?composite</th><th>tokenization</th><th>syntax</th></tr>", + "<tr><th>Value</th><th>composite</th><th>tokenization</th><th>syntax</th></tr>", + body, + flags=re.IGNORECASE, + ) + body = re.sub( + r"(<tr>(?:(?!</tr>)[\s\S])*?)(</table>)", + lambda m: m.group(1) + "</td></tr>" + m.group(2), + body, + flags=re.IGNORECASE, + ) + return body + + +def escape_cell(text: str) -> str: + text = text.replace("\n", " ").replace("\r", "") + text = re.sub(r"\s+", " ", text).strip() + return text.replace("|", "\\|") + + +def emit_inline(node: Node | str, source_dir: Path) -> str: + if isinstance(node, str): + return html.unescape(node) + tag = node.tag or "" + inner = "".join(emit_inline(c, source_dir) for c in node.children) + if tag == "br": + return "<br />" + if tag == "a": + href = convert_href(node.attrs.get("href", ""), source_dir) + text = inner.strip() or href + return f"[{text}]({href})" + if tag == "code": + return f"`{inner}`" + if tag == "pre": + code = html.unescape(node.text_content()).strip() + if "\n" in code: + return " ".join(f"`{line.strip()}`" for line in code.split("\n") if line.strip()) + return f"`{code}`" + if tag in ("em", "i"): + return f"*{inner}*" + if tag in ("strong", "b"): + return f"**{inner}**" + if tag == "mdx-warning": + return f"**Important:** {inner.strip()}" + if tag == "mdx-note": + return f"**Note:** {inner.strip()}" + return inner + + +def table_end(html_text: str, start: int) -> int: + j = html_text.find(">", start) + 1 + depth = 1 + while j < len(html_text) and depth > 0: + next_open = html_text.find("<table", j) + next_close = html_text.find("</table>", j) + if next_close == -1: + return len(html_text) + if next_open != -1 and next_open < next_close: + depth += 1 + j = html_text.find(">", next_open) + 1 + else: + depth -= 1 + j = next_close + len("</table>") + return j + + +def isolate_nested_tables(table_html: str) -> tuple[str, list[str]]: + nested: list[str] = [] + first = TABLE_TAG_RE.search(table_html) + if not first: + return table_html, nested + pos = first.end() + while True: + m = TABLE_TAG_RE.search(table_html[pos:]) + if not m: + break + start = pos + m.start() + end = table_end(table_html, start) + nested.append(table_html[start:end]) + placeholder = f"<!--NESTED{len(nested) - 1}-->" + table_html = table_html[:start] + placeholder + table_html[end:] + pos = start + len(placeholder) + return table_html, nested + + +def restore_nested(fragment: str, nested: list[str]) -> str: + for i, table_html in enumerate(nested): + fragment = fragment.replace(f"<!--NESTED{i}-->", table_html) + return fragment + + +def normalize_table_html(table_html: str) -> str: + def fence_sub(m: re.Match) -> str: + lang = m.group(1) or "txt" + code = html.escape(m.group(2).strip("\n")) + return f'<pre data-lang="{lang}">{code}</pre>' + + table_html = re.sub(r"```(\w*)\n([\s\S]*?)```", fence_sub, table_html) + return preprocess_body(fix_html_structure(table_html)) + + +def nested_table_to_md_html(table_html: str, source_dir: Path) -> str: + header, body_rows = extract_rows(table_html, source_dir) + if not header or not body_rows: + return "" + if len(header) == 4 and all(len(r) >= 4 for r in body_rows): + return " ".join( + f"`{row[0]}` → composite `{row[1]}`, tokenization `{row[2]}`, syntax `{row[3]}`" + for row in body_rows + ) + lines: list[str] = [] + for row in body_rows: + while len(row) < len(header): + row.append("") + parts = [f"**{h}:** {v}" for h, v in zip(header, row) if v.strip()] + if parts: + lines.append("; ".join(parts)) + return "<br />".join(lines) + + +def emit_cell_node(node: Node, source_dir: Path, nested: list[str] | None = None) -> str: + parts: list[str] = [] + for c in node.children: + if isinstance(c, str): + text = c.strip() + for m in re.finditer(r"<!--NESTED(\d+)-->", text): + if nested: + parts.append(nested_table_to_md_html(nested[int(m.group(1))], source_dir)) + text = re.sub(r"<!--NESTED\d+-->", "", text).strip() + if text: + parts.append(html.unescape(text)) + elif isinstance(c, Node): + if c.tag == "table": + parts.append(nested_table_to_md_html(node_to_html_table(node), source_dir)) + elif c.tag in ("ul", "ol"): + for li in c.children: + if isinstance(li, Node) and li.tag == "li": + parts.append(emit_inline(li, source_dir).strip()) + elif c.tag == "p": + text = emit_inline(c, source_dir).strip() + if text: + parts.append(text) + elif c.tag == "pre": + text = emit_inline(c, source_dir).strip() + if text: + parts.append(text) + elif c.tag in ("mdx-warning", "mdx-note"): + parts.append(emit_inline(c, source_dir).strip()) + else: + text = emit_inline(c, source_dir).strip() + if text: + parts.append(text) + return " ".join(parts) + + +def node_to_html_table(node: Node) -> str: + tag = node.tag or "" + inner = "".join(node_to_html_table_part(c) for c in node.children) + attrs = " ".join(f'{k}="{html.escape(v, quote=True)}"' for k, v in node.attrs.items()) + open_tag = f"<{tag} {attrs}>" if attrs else f"<{tag}>" + return f"{open_tag}{inner}</{tag}>" + + +def node_to_html_table_part(node: Node | str) -> str: + if isinstance(node, str): + return html.unescape(node) + tag = node.tag or "" + inner = "".join(node_to_html_table_part(c) for c in node.children) + attrs = " ".join(f'{k}="{html.escape(v, quote=True)}"' for k, v in node.attrs.items()) + open_tag = f"<{tag} {attrs}>" if attrs else f"<{tag}>" + return f"{open_tag}{inner}</{tag}>" + + +def tr_cells_from_inner(tr_inner: str, nested: list[str], source_dir: Path) -> list[str]: + inner = restore_nested(tr_inner, nested) + builder = TreeBuilder() + builder.feed(f"<tr>{inner}</tr>") + builder.close() + tr = next(c for c in builder.root.children if isinstance(c, Node) and c.tag == "tr") + cells: list[str] = [] + for c in tr.children: + if isinstance(c, Node) and c.tag == "th": + cells.append(escape_cell(emit_inline(c, source_dir))) + elif isinstance(c, Node) and c.tag == "td": + cells.append(escape_cell(emit_cell_node(c, source_dir, nested))) + return cells + + +def infer_header(first_row: list[str]) -> list[str]: + n = len(first_row) + if n == 2: + return ["Term", "Description"] + if n == 3: + return ["Name", "Default", "Description"] + if n == 4: + return ["Name", "Type", "Default", "Description"] + return [f"Column {i + 1}" for i in range(n)] + + +def extract_rows(table_html: str, source_dir: Path) -> tuple[list[str], list[list[str]]]: + table_html = normalize_table_html(table_html) + header: list[str] = [] + body_rows: list[list[str]] = [] + isolated, nested = isolate_nested_tables(table_html) + thead = re.search(r"<thead>([\s\S]*?)</thead>", isolated, re.I) + if thead: + htr = re.search(r"<tr[^>]*>([\s\S]*?)</tr>", thead.group(1), re.I) + if htr: + header = tr_cells_from_inner(htr.group(1), [], source_dir) + tbody = re.search(r"<tbody>([\s\S]*?)</tbody>", isolated, re.I) + chunk = tbody.group(1) if tbody else isolated + for tr in re.finditer(r"<tr[^>]*>([\s\S]*?)</tr>", chunk, re.I): + cells = tr_cells_from_inner(tr.group(1), nested, source_dir) + if cells and any(c.strip() for c in cells): + body_rows.append(cells) + if not header and body_rows: + header = infer_header(body_rows[0]) + return header, body_rows + + +def html_table_to_markdown(table_html: str, source_dir: Path) -> str: + caption_m = re.search(r"<caption>([\s\S]*?)</caption>", table_html, re.I) + caption = html.unescape(caption_m.group(1)).strip() if caption_m else "" + header, body_rows = extract_rows(table_html, source_dir) + if not header: + return table_html + ncol = len(header) + lines: list[str] = [] + if caption: + lines.extend([f"*{caption}*", ""]) + lines.append("| " + " | ".join(header) + " |") + lines.append("| " + " | ".join("---" for _ in header) + " |") + for row in body_rows: + while len(row) < ncol: + row.append("") + lines.append("| " + " | ".join(row[:ncol]) + " |") + return "\n".join(lines) + "\n\n" + + +def inline_html_to_md(fragment: str, source_dir: Path) -> str: + if not fragment.strip(): + return "" + work = preprocess_body(fragment.strip()) + builder = TreeBuilder() + builder.feed(f"<div>{work}</div>") + builder.close() + parts: list[str] = [] + for c in builder.root.children: + if not isinstance(c, Node): + continue + if c.tag == "div": + for cc in c.children: + if isinstance(cc, Node) and cc.tag == "p": + text = emit_inline(cc, source_dir).strip() + if text: + parts.append(text) + elif isinstance(cc, Node) and cc.tag in ("ul", "ol"): + for li in cc.children: + if isinstance(li, Node) and li.tag == "li": + parts.append(f"- {emit_inline(li, source_dir).strip()}") + elif c.tag == "p": + text = emit_inline(c, source_dir).strip() + if text: + parts.append(text) + return "\n\n".join(parts) + ("\n\n" if parts else "") + + +def extract_all_tables(fragment: str) -> list[tuple[int, int, str]]: + tables: list[tuple[int, int, str]] = [] + pos = 0 + while True: + m = TABLE_TAG_RE.search(fragment[pos:]) + if not m: + break + start = pos + m.start() + end = table_end(fragment, start) + tables.append((start, end, fragment[start:end])) + pos = end + return tables + + +def convert_html_fragment(fragment: str, source_dir: Path) -> str: + fragment = H2_HTML_RE.sub(r"## \1\n\n", fragment) + out: list[str] = [] + pos = 0 + for start, end, table_html in extract_all_tables(fragment): + before = fragment[pos:start] + if before.strip(): + out.append(inline_html_to_md(before, source_dir)) + md = html_table_to_markdown(table_html, source_dir) + if md and not md.startswith("<table"): + out.append(md) + else: + out.append(table_html) + pos = end + rest = fragment[pos:] + if rest.strip(): + out.append(inline_html_to_md(rest, source_dir)) + text = "".join(out) + text = re.sub(r"\n{3,}", "\n\n", text) + return text + + +def split_frontmatter(text: str) -> tuple[str, str]: + if not text.startswith("---"): + return "", text + m = re.match(r"^---\r?\n(.*?)\r?\n---(?:\r?\n|$)", text, re.DOTALL) + if not m: + return "", text + return text[: m.end()], text[m.end() :] + + +def convert_mdx_file(path: Path) -> bool: + original = path.read_text(encoding="utf-8") + if "<table" not in original.lower(): + return False + fm, body = split_frontmatter(original) + converted = convert_html_fragment(body, path.parent) + if converted == body: + return False + fm_out = fm.rstrip("\n") + "\n" + body_out = converted.lstrip("\n") + path.write_text(fm_out + body_out, encoding="utf-8") + return True + + +def convert_tree(root: Path) -> int: + count = 0 + for path in sorted(root.rglob("*.mdx")): + if convert_mdx_file(path): + print(f"Converted tables in {path.relative_to(root.parent.parent)}") + count += 1 + return count diff --git a/mintlify-docs/scripts/unescape-mdx-code.ps1 b/mintlify-docs/scripts/unescape-mdx-code.ps1 new file mode 100644 index 0000000000..a72705070f --- /dev/null +++ b/mintlify-docs/scripts/unescape-mdx-code.ps1 @@ -0,0 +1,104 @@ +# Remove unnecessary markdown escape backslashes inside ``` fences and inline `code` +$root = Resolve-Path (Join-Path $PSScriptRoot "..") +$dirs = @("en\querying", "en\writing", "en\schemas") + +function Unescape-CodeText([string]$text) { + if ([string]::IsNullOrEmpty($text)) { return $text } + + $lines = $text -split "`n", -1 + $result = New-Object System.Collections.Generic.List[string] + + foreach ($line in $lines) { + $trimmed = $line.TrimEnd() + $trail = $line.Substring($trimmed.Length) + $hasLineCont = $trimmed -match '\\$' + + if ($hasLineCont) { + $work = $trimmed.Substring(0, $trimmed.Length - 1) + } else { + $work = $trimmed + } + + $placeholder = @{} + $i = 0 + $work = [regex]::Replace($work, '\\{2,}', { + param($m) + $key = "___BS$($i)___" + $placeholder[$key] = $m.Value + $script:i++ + return $key + }) + $work = [regex]::Replace($work, '\\["'']', { + param($m) + $key = "___QS$($i)___" + $placeholder[$key] = $m.Value + $script:i++ + return $key + }) + + $work = $work -replace '\\([*_\[\]{}|.+#&?-])', '$1' + + foreach ($kv in $placeholder.GetEnumerator()) { + $work = $work.Replace($kv.Key, $kv.Value) + } + + if ($hasLineCont) { $work += '\' } + $result.Add($work + $trail) + } + + return ($result -join "`n") +} + +function Process-InlineCode([string]$segment) { + $parts = [regex]::Split($segment, '(`[^`]+`)') + for ($j = 0; $j -lt $parts.Length; $j++) { + if ($j % 2 -eq 1) { + $inner = $parts[$j].Substring(1, $parts[$j].Length - 2) + $parts[$j] = '`' + (Unescape-CodeText $inner) + '`' + } + } + return ($parts -join '') +} + +function Process-FenceBody([string]$body) { + if ($body -match '^(?m)^') { + $nl = $body.IndexOf("`n") + if ($nl -lt 0) { return $body } + $langLine = $body.Substring(0, $nl + 1) + $code = $body.Substring($nl + 1) + return $langLine + (Unescape-CodeText $code) + } + return (Unescape-CodeText $body) +} + +$files = foreach ($d in $dirs) { + Get-ChildItem (Join-Path $root $d) -Recurse -Filter "*.mdx" +} + +foreach ($file in $files) { + $content = [IO.File]::ReadAllText($file.FullName) + $original = $content + $parts = [regex]::Split($content, '(```)') + $inFence = $false + + for ($i = 0; $i -lt $parts.Length; $i++) { + $part = $parts[$i] + if ($part -eq '```') { + $inFence = -not $inFence + continue + } + if ($inFence) { + $parts[$i] = Process-FenceBody $part + } else { + $parts[$i] = Process-InlineCode $part + } + } + + $newContent = ($parts -join '') + if ($newContent -ne $original) { + [IO.File]::WriteAllText($file.FullName, $newContent) + Write-Output "updated: $($file.Name)" + } +} + +Write-Output "done. $($files.Count) files scanned." diff --git a/mintlify-docs/scripts/wrap_steps.py b/mintlify-docs/scripts/wrap_steps.py new file mode 100644 index 0000000000..5a9885f676 --- /dev/null +++ b/mintlify-docs/scripts/wrap_steps.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +"""Wrap sequential numbered lists in Mintlify Steps/Step components.""" + +from __future__ import annotations + +import re +from pathlib import Path + +APPS_DIR = Path(__file__).resolve().parent.parent / "en" / "applications" +STEP_START = re.compile(r"^(\d+)\.\s+(.*)$") +HEADING = re.compile(r"^#{1,6}\s") +STRUCTURAL = re.compile( + r"^</?(Steps|Step|Frame|Warning|Note|Accordion|AccordionGroup)\b" +) + + +def is_fence(line: str) -> bool: + return line.strip().startswith("```") + + +def collect_numbered_block(lines: list[str], start: int) -> tuple[list[list[str]], int] | None: + """Collect a sequential 1..n numbered list starting at `start`. Returns steps, end index.""" + if start >= len(lines): + return None + m = STEP_START.match(lines[start]) + if not m or m.group(1) != "1": + return None + + steps: list[list[str]] = [] + current: list[str] = [m.group(2)] + expected = 2 + i = start + 1 + in_fence = False + + while i < len(lines): + line = lines[i] + + if is_fence(line): + in_fence = not in_fence + current.append(line) + i += 1 + continue + + if in_fence: + current.append(line) + i += 1 + continue + + if HEADING.match(line) or STRUCTURAL.match(line): + break + + sm = STEP_START.match(line) + if sm: + num = int(sm.group(1)) + if num == expected: + steps.append(current) + current = [sm.group(2)] + expected += 1 + i += 1 + continue + if num == 1 and expected > 2: + # Nested sub-list restart — keep inside current step + current.append(line) + i += 1 + continue + break + + # Continuation of current step (blank lines, indented text, code-adjacent prose) + if line.strip() == "" or not STEP_START.match(line): + current.append(line) + i += 1 + continue + + break + + if len(current) > 0 or not steps: + steps.append(current) + + if len(steps) < 2: + return None + + return steps, i + + +def steps_to_mdx(steps: list[list[str]]) -> list[str]: + out = ["<Steps>"] + for step_lines in steps: + out.append(" <Step>") + body = "\n".join(step_lines).strip("\n") + if body: + for bl in body.split("\n"): + out.append(f" {bl}" if bl else "") + out.append(" </Step>") + out.append("") + out.append("</Steps>") + return out + + +def process_file(path: Path) -> bool: + lines = path.read_text(encoding="utf-8").split("\n") + out: list[str] = [] + i = 0 + in_frontmatter = False + fm_done = 0 + changed = False + + while i < len(lines): + line = lines[i] + + if i == 0 and line.strip() == "---": + in_frontmatter = True + fm_done = 1 + if in_frontmatter: + out.append(line) + if line.strip() == "---" and fm_done > 0 and i > 0: + fm_done += 1 + if fm_done >= 2: + in_frontmatter = False + i += 1 + continue + + if line.strip().startswith("<Steps>"): + # Already wrapped — copy until </Steps> + out.append(line) + i += 1 + while i < len(lines) and lines[i].strip() != "</Steps>": + out.append(lines[i]) + i += 1 + if i < len(lines): + out.append(lines[i]) + i += 1 + continue + + block = collect_numbered_block(lines, i) + if block: + steps, end = block + out.extend(steps_to_mdx(steps)) + out.append("") + i = end + changed = True + continue + + out.append(line) + i += 1 + + if changed: + text = "\n".join(out) + text = re.sub(r"\n{3,}", "\n\n", text) + path.write_text(text.rstrip() + "\n", encoding="utf-8") + return changed + + +def main() -> None: + for path in sorted(APPS_DIR.glob("*.mdx")): + if process_file(path): + print(f"Updated {path.name}") + + +if __name__ == "__main__": + main()