Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions pageindex/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .events import QueryEvent
from .errors import (
PageIndexError,
PageIndexAPIError,
CollectionNotFoundError,
DocumentNotFoundError,
IndexingError,
Expand All @@ -32,6 +33,7 @@
"StorageEngine",
"QueryEvent",
"PageIndexError",
"PageIndexAPIError",
"CollectionNotFoundError",
"DocumentNotFoundError",
"IndexingError",
Expand Down
105 changes: 103 additions & 2 deletions pageindex/client.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# pageindex/client.py
from __future__ import annotations
from pathlib import Path
from typing import Any, Iterator

from .collection import Collection
from .config import IndexConfig
from .errors import PageIndexAPIError
from .parser.protocol import DocumentParser


Expand Down Expand Up @@ -39,21 +42,25 @@ class PageIndexClient:
# Or use LocalClient / CloudClient for explicit mode selection
"""

def __init__(self, api_key: str = None, model: str = None,
def __init__(self, api_key: str | None = None, model: str = None,
retrieve_model: str = None, storage_path: str = None,
storage=None, index_config: IndexConfig | dict = None):
if api_key:
if api_key is not None and api_key != "":
self._init_cloud(api_key)
else:
self._init_local(model, retrieve_model, storage_path, storage, index_config)

def _init_cloud(self, api_key: str):
from .backend.cloud import CloudBackend
from .cloud_api import LegacyCloudAPI
self._backend = CloudBackend(api_key=api_key)
self._legacy_cloud_api = LegacyCloudAPI(api_key=api_key)

def _init_local(self, model: str = None, retrieve_model: str = None,
storage_path: str = None, storage=None,
index_config: IndexConfig | dict = None):
self._legacy_cloud_api = None

# Build IndexConfig: merge model/retrieve_model with index_config
overrides = {}
if model:
Expand Down Expand Up @@ -123,6 +130,100 @@ def register_parser(self, parser: DocumentParser) -> None:
raise PageIndexError("Custom parsers are not supported in cloud mode")
self._backend.register_parser(parser)

def _require_cloud_api(self):
if self._legacy_cloud_api is None:
from .errors import PageIndexAPIError
raise PageIndexAPIError(
"This method is part of the pageindex 0.2.x cloud SDK API. "
"Initialize with api_key to use it."
)
return self._legacy_cloud_api

# pageindex 0.2.x cloud SDK compatibility methods
def submit_document(
self,
file_path: str,
mode: str | None = None,
beta_headers: list[str] | None = None,
folder_id: str | None = None,
) -> dict[str, Any]:
return self._require_cloud_api().submit_document(
file_path=file_path,
mode=mode,
beta_headers=beta_headers,
folder_id=folder_id,
)

def get_ocr(self, doc_id: str, format: str = "page") -> dict[str, Any]:
return self._require_cloud_api().get_ocr(doc_id=doc_id, format=format)

def get_tree(self, doc_id: str, node_summary: bool = False) -> dict[str, Any]:
return self._require_cloud_api().get_tree(doc_id=doc_id, node_summary=node_summary)

def is_retrieval_ready(self, doc_id: str) -> bool:
return self._require_cloud_api().is_retrieval_ready(doc_id=doc_id)

def submit_query(self, doc_id: str, query: str, thinking: bool = False) -> dict[str, Any]:
return self._require_cloud_api().submit_query(
doc_id=doc_id,
query=query,
thinking=thinking,
)

def get_retrieval(self, retrieval_id: str) -> dict[str, Any]:
return self._require_cloud_api().get_retrieval(retrieval_id=retrieval_id)

def chat_completions(
self,
messages: list[dict[str, str]],
stream: bool = False,
doc_id: str | list[str] | None = None,
temperature: float | None = None,
stream_metadata: bool = False,
enable_citations: bool = False,
) -> dict[str, Any] | Iterator[str] | Iterator[dict[str, Any]]:
return self._require_cloud_api().chat_completions(
messages=messages,
stream=stream,
doc_id=doc_id,
temperature=temperature,
stream_metadata=stream_metadata,
enable_citations=enable_citations,
)

def get_document(self, doc_id: str) -> dict[str, Any]:
return self._require_cloud_api().get_document(doc_id=doc_id)

def delete_document(self, doc_id: str) -> dict[str, Any]:
return self._require_cloud_api().delete_document(doc_id=doc_id)

def list_documents(
self,
limit: int = 50,
offset: int = 0,
folder_id: str | None = None,
) -> dict[str, Any]:
return self._require_cloud_api().list_documents(
limit=limit,
offset=offset,
folder_id=folder_id,
)

def create_folder(
self,
name: str,
description: str | None = None,
parent_folder_id: str | None = None,
) -> dict[str, Any]:
return self._require_cloud_api().create_folder(
name=name,
description=description,
parent_folder_id=parent_folder_id,
)

def list_folders(self, parent_folder_id: str | None = None) -> dict[str, Any]:
return self._require_cloud_api().list_folders(parent_folder_id=parent_folder_id)


class LocalClient(PageIndexClient):
"""Local mode — indexes and queries documents on your machine.
Expand Down
Loading