refactor(api): migrate console/service_api.dataset.document to BaseModel (#36506)

Co-authored-by: WH-2099 <wh2099@pm.me>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
chariri
2026-05-30 23:38:27 +09:00
committed by GitHub
parent 6805d9bfc0
commit 599960024d
17 changed files with 1412 additions and 611 deletions
@@ -9,7 +9,7 @@ from uuid import UUID
import sqlalchemy as sa
from flask import request, send_file
from flask_restx import Resource, marshal
from flask_restx import Resource
from pydantic import BaseModel, Field, field_validator
from sqlalchemy import asc, desc, func, select
from werkzeug.exceptions import Forbidden, NotFound
@@ -34,14 +34,16 @@ from core.rag.index_processor.constant.index_type import IndexTechniqueType
from extensions.ext_database import db
from fields.base import ResponseModel
from fields.document_fields import (
document_fields,
document_status_fields,
document_with_segments_fields,
DocumentMetadataResponse,
DocumentResponse,
DocumentStatusListResponse,
DocumentStatusResponse,
normalize_enum,
)
from graphon.model_runtime.entities.model_entities import ModelType
from graphon.model_runtime.errors.invoke import InvokeAuthorizationError
from libs.datetime_utils import naive_utc_now
from libs.helper import to_timestamp
from libs.helper import dump_response, to_timestamp
from libs.login import current_account_with_tenant, login_required
from models import DatasetProcessRule, Document, DocumentSegment, UploadFile
from models.dataset import DocumentPipelineExecutionLog
@@ -74,12 +76,6 @@ from ..wraps import (
logger = logging.getLogger(__name__)
def _normalize_enum(value: Any) -> Any:
if isinstance(value, str) or value is None:
return value
return getattr(value, "value", value)
class DatasetResponse(ResponseModel):
id: str
name: str
@@ -93,7 +89,7 @@ class DatasetResponse(ResponseModel):
@field_validator("data_source_type", "indexing_technique", mode="before")
@classmethod
def _normalize_enum_fields(cls, value: Any) -> Any:
return _normalize_enum(value)
return normalize_enum(value)
@field_validator("created_at", mode="before")
@classmethod
@@ -101,61 +97,10 @@ class DatasetResponse(ResponseModel):
return to_timestamp(value)
class DocumentMetadataResponse(ResponseModel):
id: str
name: str
type: str
value: str | None = None
class DocumentResponse(ResponseModel):
id: str
position: int | None = None
data_source_type: str | None = None
data_source_info: Any = Field(default=None, validation_alias="data_source_info_dict")
data_source_detail_dict: Any = None
dataset_process_rule_id: str | None = None
name: str
created_from: str | None = None
created_by: str | None = None
created_at: int | None = None
tokens: int | None = None
indexing_status: str | None = None
error: str | None = None
enabled: bool | None = None
disabled_at: int | None = None
disabled_by: str | None = None
archived: bool | None = None
display_status: str | None = None
word_count: int | None = None
hit_count: int | None = None
doc_form: str | None = None
doc_metadata: list[DocumentMetadataResponse] = Field(default_factory=list, validation_alias="doc_metadata_details")
summary_index_status: str | None = None
need_summary: bool | None = None
@field_validator("data_source_type", "indexing_status", "display_status", "doc_form", mode="before")
@classmethod
def _normalize_enum_fields(cls, value: Any) -> Any:
return _normalize_enum(value)
@field_validator("doc_metadata", mode="before")
@classmethod
def _normalize_doc_metadata(cls, value: Any) -> list[Any]:
if value is None:
return []
return value
@field_validator("created_at", "disabled_at", mode="before")
@classmethod
def _normalize_timestamp(cls, value: datetime | int | None) -> int | None:
return to_timestamp(value)
class DocumentWithSegmentsResponse(DocumentResponse):
process_rule_dict: Any = None
completed_segments: int | None = None
total_segments: int | None = None
completed_segments: int | None = Field(default=None, exclude_if=lambda value: value is None)
total_segments: int | None = Field(default=None, exclude_if=lambda value: value is None)
class DatasetAndDocumentResponse(ResponseModel):
@@ -190,6 +135,14 @@ class DocumentDatasetListParam(BaseModel):
fetch_val: str = Field("false", alias="fetch")
class DocumentWithSegmentsListResponse(ResponseModel):
data: list[DocumentWithSegmentsResponse]
has_more: bool
limit: int
total: int
page: int
register_schema_models(
console_ns,
KnowledgeConfig,
@@ -200,13 +153,19 @@ register_schema_models(
GenerateSummaryPayload,
DocumentMetadataUpdatePayload,
DocumentBatchDownloadZipPayload,
)
register_response_schema_models(
console_ns,
SimpleResultMessageResponse,
SimpleResultResponse,
UrlResponse,
DatasetResponse,
DocumentMetadataResponse,
DocumentResponse,
DocumentWithSegmentsResponse,
DatasetAndDocumentResponse,
DocumentWithSegmentsListResponse,
)
register_response_schema_models(console_ns, SimpleResultMessageResponse, SimpleResultResponse, UrlResponse)
class DocumentResource(Resource):
@@ -312,7 +271,11 @@ class DatasetDocumentListApi(Resource):
"status": "Filter documents by display status",
}
)
@console_ns.response(200, "Documents retrieved successfully")
@console_ns.response(
200,
"Documents retrieved successfully",
console_ns.models[DocumentWithSegmentsListResponse.__name__],
)
@setup_required
@login_required
@account_initialization_required
@@ -425,18 +388,15 @@ class DatasetDocumentListApi(Resource):
)
document.completed_segments = completed_segments
document.total_segments = total_segments
data = marshal(documents, document_with_segments_fields)
else:
data = marshal(documents, document_fields)
response = {
"data": data,
"data": documents,
"has_more": len(documents) == limit,
"limit": limit,
"total": paginated_documents.total,
"page": page,
}
return response
return dump_response(DocumentWithSegmentsListResponse, response)
@setup_required
@login_required
@@ -482,9 +442,7 @@ class DatasetDocumentListApi(Resource):
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
return DatasetAndDocumentResponse.model_validate(
{"dataset": dataset, "documents": documents, "batch": batch}, from_attributes=True
).model_dump(mode="json")
return dump_response(DatasetAndDocumentResponse, {"dataset": dataset, "documents": documents, "batch": batch})
@setup_required
@login_required
@@ -567,9 +525,7 @@ class DatasetInitApi(Resource):
except ModelCurrentlyNotSupportError:
raise ProviderModelCurrentlyNotSupportError()
return DatasetAndDocumentResponse.model_validate(
{"dataset": dataset, "documents": documents, "batch": batch}, from_attributes=True
).model_dump(mode="json")
return dump_response(DatasetAndDocumentResponse, {"dataset": dataset, "documents": documents, "batch": batch})
@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/indexing-estimate")
@@ -742,6 +698,9 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):
@console_ns.route("/datasets/<uuid:dataset_id>/batch/<string:batch>/indexing-status")
class DocumentBatchIndexingStatusApi(DocumentResource):
@console_ns.response(
200, "Indexing status retrieved successfully", console_ns.models[DocumentStatusListResponse.__name__]
)
@setup_required
@login_required
@account_initialization_required
@@ -784,9 +743,8 @@ class DocumentBatchIndexingStatusApi(DocumentResource):
"completed_segments": completed_segments,
"total_segments": total_segments,
}
documents_status.append(marshal(document_dict, document_status_fields))
data = {"data": documents_status}
return data
documents_status.append(document_dict)
return dump_response(DocumentStatusListResponse, {"data": documents_status})
@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/indexing-status")
@@ -794,7 +752,9 @@ class DocumentIndexingStatusApi(DocumentResource):
@console_ns.doc("get_document_indexing_status")
@console_ns.doc(description="Get document indexing status")
@console_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
@console_ns.response(200, "Indexing status retrieved successfully")
@console_ns.response(
200, "Indexing status retrieved successfully", console_ns.models[DocumentStatusResponse.__name__]
)
@console_ns.response(404, "Document not found")
@setup_required
@login_required
@@ -839,7 +799,7 @@ class DocumentIndexingStatusApi(DocumentResource):
"completed_segments": completed_segments,
"total_segments": total_segments,
}
return marshal(document_dict, document_status_fields)
return dump_response(DocumentStatusResponse, document_dict)
@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
@@ -1304,7 +1264,7 @@ class DocumentRenameApi(DocumentResource):
except services.errors.document.DocumentIndexingError:
raise DocumentIndexingError("Cannot delete document during indexing.")
return DocumentResponse.model_validate(document, from_attributes=True).model_dump(mode="json")
return dump_response(DocumentResponse, document)
@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/website-sync")
+100 -21
View File
@@ -12,7 +12,6 @@ from typing import Self
from uuid import UUID
from flask import request, send_file
from flask_restx import marshal
from pydantic import BaseModel, Field, field_validator, model_validator
from sqlalchemy import desc, func, select
from werkzeug.exceptions import Forbidden, NotFound
@@ -27,7 +26,12 @@ from controllers.common.errors import (
UnsupportedFileTypeError,
)
from controllers.common.fields import UrlResponse
from controllers.common.schema import register_enum_models, register_response_schema_models, register_schema_models
from controllers.common.schema import (
query_params_from_model,
register_enum_models,
register_response_schema_models,
register_schema_models,
)
from controllers.service_api import service_api_ns
from controllers.service_api.app.error import ProviderNotInitializeError
from controllers.service_api.dataset.error import (
@@ -44,7 +48,13 @@ from core.errors.error import ProviderTokenNotInitError
from core.rag.entities import PreProcessingRule, Rule, Segmentation
from core.rag.retrieval.retrieval_methods import RetrievalMethod
from extensions.ext_database import db
from fields.document_fields import document_fields, document_status_fields
from fields.base import ResponseModel
from fields.document_fields import (
DocumentListResponse,
DocumentResponse,
DocumentStatusListResponse,
)
from libs.helper import dump_response
from libs.login import current_user
from models.dataset import Dataset, Document, DocumentSegment
from models.enums import SegmentStatus
@@ -107,6 +117,44 @@ class DocumentListQuery(BaseModel):
status: str | None = Field(default=None, description="Document status filter")
DOCUMENT_CREATE_BY_FILE_PARAMS = {
"dataset_id": "Dataset ID",
"file": {
"in": "formData",
"type": "file",
"required": True,
"description": "Document file to upload.",
},
"data": {
"in": "formData",
"type": "string",
"required": False,
"description": "Optional JSON string with document creation settings.",
},
}
DOCUMENT_UPDATE_BY_FILE_PARAMS = {
"dataset_id": "Dataset ID",
"document_id": "Document ID",
"file": {
"in": "formData",
"type": "file",
"required": False,
"description": "Replacement document file.",
},
"data": {
"in": "formData",
"type": "string",
"required": False,
"description": "Optional JSON string with document update settings.",
},
}
class DocumentAndBatchResponse(ResponseModel):
document: DocumentResponse
batch: str
register_enum_models(service_api_ns, RetrievalMethod)
register_schema_models(
@@ -121,7 +169,14 @@ register_schema_models(
PreProcessingRule,
Segmentation,
)
register_response_schema_models(service_api_ns, UrlResponse)
register_response_schema_models(
service_api_ns,
UrlResponse,
DocumentResponse,
DocumentAndBatchResponse,
DocumentListResponse,
DocumentStatusListResponse,
)
def _create_document_by_text(tenant_id: str, dataset_id: UUID) -> tuple[Mapping[str, object], int]:
@@ -188,8 +243,7 @@ def _create_document_by_text(tenant_id: str, dataset_id: UUID) -> tuple[Mapping[
raise ProviderNotInitializeError(ex.description)
document = documents[0]
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch}
return documents_and_batch_fields, 200
return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200
def _update_document_by_text(tenant_id: str, dataset_id: UUID, document_id: UUID) -> tuple[Mapping[str, object], int]:
@@ -248,8 +302,7 @@ def _update_document_by_text(tenant_id: str, dataset_id: UUID, document_id: UUID
raise ProviderNotInitializeError(ex.description)
document = documents[0]
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch}
return documents_and_batch_fields, 200
return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200
@service_api_ns.route("/datasets/<uuid:dataset_id>/document/create-by-text")
@@ -267,6 +320,9 @@ class DocumentAddByTextApi(DatasetApiResource):
400: "Bad request - invalid parameters",
}
)
@service_api_ns.response(
200, "Document created successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_resource_check("documents", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
@@ -296,6 +352,9 @@ class DeprecatedDocumentAddByTextApi(DatasetApiResource):
400: "Bad request - invalid parameters",
}
)
@service_api_ns.response(
200, "Document created successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_resource_check("documents", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
@@ -319,6 +378,9 @@ class DocumentUpdateByTextApi(DatasetApiResource):
404: "Document not found",
}
)
@service_api_ns.response(
200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
@@ -347,6 +409,9 @@ class DeprecatedDocumentUpdateByTextApi(DatasetApiResource):
404: "Document not found",
}
)
@service_api_ns.response(
200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
@@ -363,7 +428,7 @@ class DocumentAddByFileApi(DatasetApiResource):
@service_api_ns.doc("create_document_by_file")
@service_api_ns.doc(description="Create a new document by uploading a file")
@service_api_ns.doc(params={"dataset_id": "Dataset ID"})
@service_api_ns.doc(consumes=["multipart/form-data"], params=DOCUMENT_CREATE_BY_FILE_PARAMS)
@service_api_ns.doc(
responses={
200: "Document created successfully",
@@ -371,6 +436,9 @@ class DocumentAddByFileApi(DatasetApiResource):
400: "Bad request - invalid file or parameters",
}
)
@service_api_ns.response(
200, "Document created successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_resource_check("documents", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
@@ -462,8 +530,7 @@ class DocumentAddByFileApi(DatasetApiResource):
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
document = documents[0]
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch}
return documents_and_batch_fields, 200
return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200
def _update_document_by_file(tenant_id: str, dataset_id: UUID, document_id: UUID) -> tuple[Mapping[str, object], int]:
@@ -539,8 +606,7 @@ def _update_document_by_file(tenant_id: str, dataset_id: UUID, document_id: UUID
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
document = documents[0]
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": document.batch}
return documents_and_batch_fields, 200
return dump_response(DocumentAndBatchResponse, {"document": document, "batch": document.batch}), 200
@service_api_ns.route(
@@ -558,7 +624,7 @@ class DeprecatedDocumentUpdateByFileApi(DatasetApiResource):
"Use PATCH /datasets/{dataset_id}/documents/{document_id} instead."
)
)
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
@service_api_ns.doc(consumes=["multipart/form-data"], params=DOCUMENT_UPDATE_BY_FILE_PARAMS)
@service_api_ns.doc(
responses={
200: "Document updated successfully",
@@ -566,6 +632,9 @@ class DeprecatedDocumentUpdateByFileApi(DatasetApiResource):
404: "Document not found",
}
)
@service_api_ns.response(
200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
@@ -577,7 +646,7 @@ class DeprecatedDocumentUpdateByFileApi(DatasetApiResource):
class DocumentListApi(DatasetApiResource):
@service_api_ns.doc("list_documents")
@service_api_ns.doc(description="List all documents in a dataset")
@service_api_ns.doc(params={"dataset_id": "Dataset ID"})
@service_api_ns.doc(params={"dataset_id": "Dataset ID", **query_params_from_model(DocumentListQuery)})
@service_api_ns.doc(
responses={
200: "Documents retrieved successfully",
@@ -585,6 +654,9 @@ class DocumentListApi(DatasetApiResource):
404: "Dataset not found",
}
)
@service_api_ns.response(
200, "Documents retrieved successfully", service_api_ns.models[DocumentListResponse.__name__]
)
def get(self, tenant_id, dataset_id: UUID):
dataset_id_str = str(dataset_id)
tenant_id = str(tenant_id)
@@ -618,14 +690,14 @@ class DocumentListApi(DatasetApiResource):
)
response = {
"data": marshal(documents, document_fields),
"data": documents,
"has_more": len(documents) == query_params.limit,
"limit": query_params.limit,
"total": paginated_documents.total,
"page": query_params.page,
}
return response
return dump_response(DocumentListResponse, response)
@service_api_ns.route("/datasets/<uuid:dataset_id>/documents/download-zip")
@@ -680,6 +752,11 @@ class DocumentIndexingStatusApi(DatasetApiResource):
404: "Dataset or documents not found",
}
)
@service_api_ns.response(
200,
"Indexing status retrieved successfully",
service_api_ns.models[DocumentStatusListResponse.__name__],
)
def get(self, tenant_id, dataset_id: UUID, batch: str):
dataset_id_str = str(dataset_id)
tenant_id = str(tenant_id)
@@ -729,9 +806,8 @@ class DocumentIndexingStatusApi(DatasetApiResource):
"completed_segments": completed_segments,
"total_segments": total_segments,
}
documents_status.append(marshal(document_dict, document_status_fields))
data = {"data": documents_status}
return data
documents_status.append(document_dict)
return dump_response(DocumentStatusListResponse, {"data": documents_status})
@service_api_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/download")
@@ -890,7 +966,7 @@ class DocumentApi(DatasetApiResource):
@service_api_ns.doc("update_document_by_file")
@service_api_ns.doc(description="Update an existing document by uploading a file")
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
@service_api_ns.doc(consumes=["multipart/form-data"], params=DOCUMENT_UPDATE_BY_FILE_PARAMS)
@service_api_ns.doc(
responses={
200: "Document updated successfully",
@@ -898,6 +974,9 @@ class DocumentApi(DatasetApiResource):
404: "Document not found",
}
)
@service_api_ns.response(
200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def patch(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
+24 -7
View File
@@ -103,7 +103,11 @@ def _replace_schema_table_type(markdown: str, definition_name: str, row_name: st
lines[index] = "|".join(cells)
break
return "\n".join(lines)
return "\n".join(lines) + ("\n" if markdown.endswith("\n") else "")
def _has_union_schema(schema: object) -> bool:
return isinstance(schema, dict) and (isinstance(schema.get("oneOf"), list) or isinstance(schema.get("anyOf"), list))
def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str:
@@ -117,8 +121,20 @@ def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str:
for definition_name, schema in definitions.items():
if not isinstance(definition_name, str) or not isinstance(schema, dict):
continue
one_of = schema.get("oneOf")
if not isinstance(one_of, list):
properties = schema.get("properties")
if isinstance(properties, dict):
for property_name, property_schema in properties.items():
if isinstance(property_name, str) and _has_union_schema(property_schema):
markdown = _replace_schema_table_type(
markdown,
definition_name,
property_name,
_schema_markdown_type(property_schema),
)
union_variants = schema.get("oneOf") or schema.get("anyOf")
if not isinstance(union_variants, list):
continue
markdown = _replace_schema_table_type(
@@ -128,7 +144,7 @@ def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str:
_schema_markdown_type(schema),
)
for variant in one_of:
for variant in union_variants:
variant_name = _definition_ref_name(variant)
variant_schema = definitions.get(variant_name) if variant_name is not None else None
if not isinstance(variant_name, str) or not isinstance(variant_schema, dict):
@@ -150,7 +166,7 @@ def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str:
def _convert_spec_to_markdown(spec_path: Path, markdown_path: Path) -> None:
markdown_path.parent.mkdir(parents=True, exist_ok=True)
with tempfile.TemporaryDirectory(prefix=f"{markdown_path.stem}-", dir=markdown_path.parent) as temp_dir:
with tempfile.TemporaryDirectory(prefix=f"{markdown_path.stem}-") as temp_dir:
temp_markdown_path = Path(temp_dir) / markdown_path.name
result = subprocess.run(
[
@@ -158,12 +174,13 @@ def _convert_spec_to_markdown(spec_path: Path, markdown_path: Path) -> None:
"--yes",
SWAGGER_MARKDOWN_PACKAGE,
"-i",
str(spec_path),
str(spec_path.resolve()),
"-o",
str(temp_markdown_path),
str(temp_markdown_path.resolve()),
],
check=False,
capture_output=True,
cwd=temp_dir,
text=True,
)
if result.returncode != 0:
+105 -88
View File
@@ -1,95 +1,112 @@
from flask_restx import fields
"""Response schemas for dataset document endpoints."""
from fields.dataset_fields import dataset_fields
from libs.helper import TimestampField
from datetime import datetime
from typing import Any
document_metadata_fields = {
"id": fields.String,
"name": fields.String,
"type": fields.String,
"value": fields.String,
}
from pydantic import Field, field_validator
document_fields = {
"id": fields.String,
"position": fields.Integer,
"data_source_type": fields.String,
"data_source_info": fields.Raw(attribute="data_source_info_dict"),
"data_source_detail_dict": fields.Raw(attribute="data_source_detail_dict"),
"dataset_process_rule_id": fields.String,
"name": fields.String,
"created_from": fields.String,
"created_by": fields.String,
"created_at": TimestampField,
"tokens": fields.Integer,
"indexing_status": fields.String,
"error": fields.String,
"enabled": fields.Boolean,
"disabled_at": TimestampField,
"disabled_by": fields.String,
"archived": fields.Boolean,
"display_status": fields.String,
"word_count": fields.Integer,
"hit_count": fields.Integer,
"doc_form": fields.String,
"doc_metadata": fields.List(fields.Nested(document_metadata_fields), attribute="doc_metadata_details"),
# Summary index generation status:
# "SUMMARIZING" (when task is queued and generating)
"summary_index_status": fields.String,
# Whether this document needs summary index generation
"need_summary": fields.Boolean,
}
from fields.base import ResponseModel
from libs.helper import to_timestamp
document_with_segments_fields = {
"id": fields.String,
"position": fields.Integer,
"data_source_type": fields.String,
"data_source_info": fields.Raw(attribute="data_source_info_dict"),
"data_source_detail_dict": fields.Raw(attribute="data_source_detail_dict"),
"dataset_process_rule_id": fields.String,
"process_rule_dict": fields.Raw(attribute="process_rule_dict"),
"name": fields.String,
"created_from": fields.String,
"created_by": fields.String,
"created_at": TimestampField,
"tokens": fields.Integer,
"indexing_status": fields.String,
"error": fields.String,
"enabled": fields.Boolean,
"disabled_at": TimestampField,
"disabled_by": fields.String,
"archived": fields.Boolean,
"display_status": fields.String,
"word_count": fields.Integer,
"hit_count": fields.Integer,
"completed_segments": fields.Integer,
"total_segments": fields.Integer,
"doc_metadata": fields.List(fields.Nested(document_metadata_fields), attribute="doc_metadata_details"),
# Summary index generation status:
# "SUMMARIZING" (when task is queued and generating)
"summary_index_status": fields.String,
"need_summary": fields.Boolean, # Whether this document needs summary index generation
}
dataset_and_document_fields = {
"dataset": fields.Nested(dataset_fields),
"documents": fields.List(fields.Nested(document_fields)),
"batch": fields.String,
}
def normalize_enum(value: Any) -> Any:
if isinstance(value, str) or value is None:
return value
return getattr(value, "value", value)
document_status_fields = {
"id": fields.String,
"indexing_status": fields.String,
"processing_started_at": TimestampField,
"parsing_completed_at": TimestampField,
"cleaning_completed_at": TimestampField,
"splitting_completed_at": TimestampField,
"completed_at": TimestampField,
"paused_at": TimestampField,
"error": fields.String,
"stopped_at": TimestampField,
"completed_segments": fields.Integer,
"total_segments": fields.Integer,
}
document_status_fields_list = {"data": fields.List(fields.Nested(document_status_fields))}
class DocumentMetadataResponse(ResponseModel):
id: str
name: str
type: str
value: str | int | float | bool | None = None
class DocumentResponse(ResponseModel):
id: str
position: int | None = None
data_source_type: str | None = None
data_source_info: Any = Field(default=None, validation_alias="data_source_info_dict")
data_source_detail_dict: Any = None
dataset_process_rule_id: str | None = None
name: str
created_from: str | None = None
created_by: str | None = None
created_at: int | None = None
tokens: int | None = None
indexing_status: str | None = None
error: str | None = None
enabled: bool | None = None
disabled_at: int | None = None
disabled_by: str | None = None
archived: bool | None = None
display_status: str | None = None
word_count: int | None = None
hit_count: int | None = None
doc_form: str | None = None
doc_metadata: list[DocumentMetadataResponse] = Field(default_factory=list, validation_alias="doc_metadata_details")
summary_index_status: str | None = None
need_summary: bool | None = None
@field_validator("data_source_type", "indexing_status", "display_status", "doc_form", mode="before")
@classmethod
def _normalize_enum_fields(cls, value: Any) -> Any:
return normalize_enum(value)
@field_validator("doc_metadata", mode="before")
@classmethod
def _normalize_doc_metadata(cls, value: Any) -> list[Any]:
if value is None:
return []
return value
@field_validator("created_at", "disabled_at", mode="before")
@classmethod
def _normalize_timestamp(cls, value: datetime | int | None) -> int | None:
return to_timestamp(value)
class DocumentListResponse(ResponseModel):
data: list[DocumentResponse]
has_more: bool
limit: int
total: int
page: int
class DocumentStatusResponse(ResponseModel):
id: str
indexing_status: str
processing_started_at: int | None
parsing_completed_at: int | None
cleaning_completed_at: int | None
splitting_completed_at: int | None
completed_at: int | None
paused_at: int | None
error: str | None
stopped_at: int | None
completed_segments: int | None = None
total_segments: int | None = None
@field_validator("indexing_status", mode="before")
@classmethod
def _normalize_indexing_status(cls, value: Any) -> Any:
return normalize_enum(value)
@field_validator(
"processing_started_at",
"parsing_completed_at",
"cleaning_completed_at",
"splitting_completed_at",
"completed_at",
"paused_at",
"stopped_at",
mode="before",
)
@classmethod
def _normalize_timestamp(cls, value: datetime | int | None) -> int | None:
return to_timestamp(value)
class DocumentStatusListResponse(ResponseModel):
data: list[DocumentStatusResponse]
+30 -20
View File
@@ -4792,9 +4792,9 @@ Get dataset auto disable logs
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Success |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Indexing status retrieved successfully | [DocumentStatusListResponse](#documentstatuslistresponse) |
### /datasets/{dataset_id}/documents
@@ -4830,9 +4830,9 @@ Get documents in a dataset
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Documents retrieved successfully |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Documents retrieved successfully | [DocumentWithSegmentsListResponse](#documentwithsegmentslistresponse) |
#### POST
##### Parameters
@@ -5028,10 +5028,10 @@ Get document indexing status
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Indexing status retrieved successfully |
| 404 | Document not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Indexing status retrieved successfully | [DocumentStatusResponse](#documentstatusresponse) |
| 404 | Document not found | |
### /datasets/{dataset_id}/documents/{document_id}/metadata
@@ -11432,7 +11432,7 @@ Enum class for api provider schema type.
| description | string | | Yes |
| id | string | | Yes |
| name | string | | Yes |
| parameters | | | Yes |
| parameters | object<br>[ object ]<br>string | | Yes |
| server_code | string | | Yes |
| status | [AppMCPServerStatus](#appmcpserverstatus) | | Yes |
| updated_at | integer | | No |
@@ -11903,7 +11903,7 @@ Condition detail
| ---- | ---- | ----------- | -------- |
| comparison_operator | string | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes |
| name | string | | Yes |
| value | | | No |
| value | string<br>[ string ]<br>integer<br>number | | No |
#### ConsoleDatasetListQuery
@@ -12820,7 +12820,7 @@ Request payload for bulk downloading documents as a zip archive.
| id | string | | Yes |
| name | string | | Yes |
| type | string | | Yes |
| value | string | | No |
| value | string<br>integer<br>number<br>boolean | | No |
#### DocumentMetadataUpdatePayload
@@ -12844,14 +12844,14 @@ Request payload for bulk downloading documents as a zip archive.
| created_by | string | | No |
| created_from | string | | No |
| data_source_detail_dict | | | No |
| data_source_info_dict | | | No |
| data_source_info | | | No |
| data_source_type | string | | No |
| dataset_process_rule_id | string | | No |
| disabled_at | integer | | No |
| disabled_by | string | | No |
| display_status | string | | No |
| doc_form | string | | No |
| doc_metadata_details | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No |
| doc_metadata | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No |
| enabled | boolean | | No |
| error | string | | No |
| hit_count | integer | | No |
@@ -12893,6 +12893,16 @@ Request payload for bulk downloading documents as a zip archive.
| stopped_at | integer | | Yes |
| total_segments | integer | | No |
#### DocumentWithSegmentsListResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| data | [ [DocumentWithSegmentsResponse](#documentwithsegmentsresponse) ] | | Yes |
| has_more | boolean | | Yes |
| limit | integer | | Yes |
| page | integer | | Yes |
| total | integer | | Yes |
#### DocumentWithSegmentsResponse
| Name | Type | Description | Required |
@@ -12903,14 +12913,14 @@ Request payload for bulk downloading documents as a zip archive.
| created_by | string | | No |
| created_from | string | | No |
| data_source_detail_dict | | | No |
| data_source_info_dict | | | No |
| data_source_info | | | No |
| data_source_type | string | | No |
| dataset_process_rule_id | string | | No |
| disabled_at | integer | | No |
| disabled_by | string | | No |
| display_status | string | | No |
| doc_form | string | | No |
| doc_metadata_details | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No |
| doc_metadata | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No |
| enabled | boolean | | No |
| error | string | | No |
| hit_count | integer | | No |
@@ -14000,7 +14010,7 @@ Enum class for large language model mode.
| ---- | ---- | ----------- | -------- |
| id | string | | Yes |
| name | string | | Yes |
| value | | | No |
| value | string<br>integer<br>number | | No |
#### MetadataFilteringCondition
@@ -14595,7 +14605,7 @@ Form input definition.
| ---- | ---- | ----------- | -------- |
| current_identifier | string | | No |
| type | [Type](#type) | | Yes |
| value | | | Yes |
| value | [Github](#github)<br>[Marketplace](#marketplace)<br>[Package](#package) | | Yes |
#### PluginEndpointListResponse
@@ -15130,7 +15140,7 @@ Form input definition.
| description | string | | No |
| icon | string | | No |
| icon_background | string | | No |
| icon_type | | | No |
| icon_type | string<br>[IconType](#icontype) | | No |
| privacy_policy | string | | No |
| prompt_public | boolean | | No |
| show_workflow_steps | boolean | | No |
+153 -61
View File
@@ -753,15 +753,17 @@ Create a new document by uploading a file
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| data | formData | Optional JSON string with document creation settings. | No | string |
| file | formData | Document file to upload. | Yes | file |
| dataset_id | path | Dataset ID | Yes | string |
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document created successfully |
| 400 | Bad request - invalid file or parameters |
| 401 | Unauthorized - invalid API token |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 400 | Bad request - invalid file or parameters | |
| 401 | Unauthorized - invalid API token | |
### /datasets/{dataset_id}/document/create-by-text
@@ -779,11 +781,11 @@ Create a new document by providing text content
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document created successfully |
| 400 | Bad request - invalid parameters |
| 401 | Unauthorized - invalid API token |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 400 | Bad request - invalid parameters | |
| 401 | Unauthorized - invalid API token | |
### /datasets/{dataset_id}/document/create_by_file
@@ -796,15 +798,17 @@ Create a new document by uploading a file
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| data | formData | Optional JSON string with document creation settings. | No | string |
| file | formData | Document file to upload. | Yes | file |
| dataset_id | path | Dataset ID | Yes | string |
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document created successfully |
| 400 | Bad request - invalid file or parameters |
| 401 | Unauthorized - invalid API token |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 400 | Bad request - invalid file or parameters | |
| 401 | Unauthorized - invalid API token | |
### /datasets/{dataset_id}/document/create_by_text
@@ -823,11 +827,11 @@ Deprecated legacy alias for creating a new document by providing text content. U
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document created successfully |
| 400 | Bad request - invalid parameters |
| 401 | Unauthorized - invalid API token |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 400 | Bad request - invalid parameters | |
| 401 | Unauthorized - invalid API token | |
### /datasets/{dataset_id}/documents
@@ -841,14 +845,18 @@ List all documents in a dataset
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| dataset_id | path | Dataset ID | Yes | string |
| keyword | query | Search keyword | No | string |
| limit | query | Number of items per page | No | integer |
| page | query | Page number | No | integer |
| status | query | Document status filter | No | string |
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Documents retrieved successfully |
| 401 | Unauthorized - invalid API token |
| 404 | Dataset not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Documents retrieved successfully | [DocumentListResponse](#documentlistresponse) |
| 401 | Unauthorized - invalid API token | |
| 404 | Dataset not found | |
### /datasets/{dataset_id}/documents/download-zip
@@ -956,11 +964,11 @@ Get indexing status for documents in a batch
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Indexing status retrieved successfully |
| 401 | Unauthorized - invalid API token |
| 404 | Dataset or documents not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Indexing status retrieved successfully | [DocumentStatusListResponse](#documentstatuslistresponse) |
| 401 | Unauthorized - invalid API token | |
| 404 | Dataset or documents not found | |
### /datasets/{dataset_id}/documents/{document_id}
@@ -1019,16 +1027,18 @@ Update an existing document by uploading a file
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| data | formData | Optional JSON string with document update settings. | No | string |
| file | formData | Replacement document file. | No | file |
| dataset_id | path | Dataset ID | Yes | string |
| document_id | path | Document ID | Yes | string |
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document updated successfully |
| 401 | Unauthorized - invalid API token |
| 404 | Document not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 401 | Unauthorized - invalid API token | |
| 404 | Document not found | |
### /datasets/{dataset_id}/documents/{document_id}/download
@@ -1274,16 +1284,18 @@ Deprecated legacy alias for updating an existing document by uploading a file. U
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| data | formData | Optional JSON string with document update settings. | No | string |
| file | formData | Replacement document file. | No | file |
| dataset_id | path | Dataset ID | Yes | string |
| document_id | path | Document ID | Yes | string |
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document updated successfully |
| 401 | Unauthorized - invalid API token |
| 404 | Document not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 401 | Unauthorized - invalid API token | |
| 404 | Document not found | |
### /datasets/{dataset_id}/documents/{document_id}/update-by-text
@@ -1302,11 +1314,11 @@ Update an existing document by providing text content
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document updated successfully |
| 401 | Unauthorized - invalid API token |
| 404 | Document not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 401 | Unauthorized - invalid API token | |
| 404 | Document not found | |
### /datasets/{dataset_id}/documents/{document_id}/update_by_file
@@ -1320,16 +1332,18 @@ Deprecated legacy alias for updating an existing document by uploading a file. U
| Name | Located in | Description | Required | Schema |
| ---- | ---------- | ----------- | -------- | ------ |
| data | formData | Optional JSON string with document update settings. | No | string |
| file | formData | Replacement document file. | No | file |
| dataset_id | path | Dataset ID | Yes | string |
| document_id | path | Document ID | Yes | string |
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document updated successfully |
| 401 | Unauthorized - invalid API token |
| 404 | Document not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 401 | Unauthorized - invalid API token | |
| 404 | Document not found | |
### /datasets/{dataset_id}/documents/{document_id}/update_by_text
@@ -1349,11 +1363,11 @@ Deprecated legacy alias for updating an existing document by providing text cont
##### Responses
| Code | Description |
| ---- | ----------- |
| 200 | Document updated successfully |
| 401 | Unauthorized - invalid API token |
| 404 | Document not found |
| Code | Description | Schema |
| ---- | ----------- | ------ |
| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
| 401 | Unauthorized - invalid API token | |
| 404 | Document not found | |
### /datasets/{dataset_id}/hit-testing
@@ -2288,7 +2302,7 @@ Condition detail
| ---- | ---- | ----------- | -------- |
| comparison_operator | string | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes |
| name | string | | Yes |
| value | | | No |
| value | string<br>[ string ]<br>integer<br>number | | No |
#### ConversationListQuery
@@ -2637,6 +2651,13 @@ Condition detail
| inputs | object | | Yes |
| is_published | boolean | | Yes |
#### DocumentAndBatchResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| batch | string | | Yes |
| document | [DocumentResponse](#documentresponse) | | Yes |
#### DocumentBatchDownloadZipPayload
Request payload for bulk downloading documents as a zip archive.
@@ -2654,6 +2675,16 @@ Request payload for bulk downloading documents as a zip archive.
| page | integer | Page number | No |
| status | string | Document status filter | No |
#### DocumentListResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| data | [ [DocumentResponse](#documentresponse) ] | | Yes |
| has_more | boolean | | Yes |
| limit | integer | | Yes |
| page | integer | | Yes |
| total | integer | | Yes |
#### DocumentMetadataOperation
| Name | Type | Description | Required |
@@ -2662,6 +2693,67 @@ Request payload for bulk downloading documents as a zip archive.
| metadata_list | [ [MetadataDetail](#metadatadetail) ] | | Yes |
| partial_update | boolean | | No |
#### DocumentMetadataResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| id | string | | Yes |
| name | string | | Yes |
| type | string | | Yes |
| value | string<br>integer<br>number<br>boolean | | No |
#### DocumentResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| archived | boolean | | No |
| created_at | integer | | No |
| created_by | string | | No |
| created_from | string | | No |
| data_source_detail_dict | | | No |
| data_source_info | | | No |
| data_source_type | string | | No |
| dataset_process_rule_id | string | | No |
| disabled_at | integer | | No |
| disabled_by | string | | No |
| display_status | string | | No |
| doc_form | string | | No |
| doc_metadata | [ [DocumentMetadataResponse](#documentmetadataresponse) ] | | No |
| enabled | boolean | | No |
| error | string | | No |
| hit_count | integer | | No |
| id | string | | Yes |
| indexing_status | string | | No |
| name | string | | Yes |
| need_summary | boolean | | No |
| position | integer | | No |
| summary_index_status | string | | No |
| tokens | integer | | No |
| word_count | integer | | No |
#### DocumentStatusListResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| data | [ [DocumentStatusResponse](#documentstatusresponse) ] | | Yes |
#### DocumentStatusResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| cleaning_completed_at | integer | | Yes |
| completed_at | integer | | Yes |
| completed_segments | integer | | No |
| error | string | | Yes |
| id | string | | Yes |
| indexing_status | string | | Yes |
| parsing_completed_at | integer | | Yes |
| paused_at | integer | | Yes |
| processing_started_at | integer | | Yes |
| splitting_completed_at | integer | | Yes |
| stopped_at | integer | | Yes |
| total_segments | integer | | No |
#### DocumentTextCreatePayload
| Name | Type | Description | Required |
@@ -2896,7 +2988,7 @@ Note: The SQLAlchemy model defines an `is_anonymous` property for Flask-Login se
| ---- | ---- | ----------- | -------- |
| id | string | | Yes |
| name | string | | Yes |
| value | | | No |
| value | string<br>integer<br>number | | No |
#### MetadataFilteringCondition
@@ -3247,7 +3339,7 @@ Accept the legacy single-tag Service API payload while exposing a normalized tag
| created_by_end_user | [SimpleEndUser](#simpleenduser) | | No |
| created_by_role | string | | No |
| created_from | string | | No |
| details | | | No |
| details | object<br>[ object ]<br>string<br>integer<br>number<br>boolean | | No |
| id | string | | Yes |
| workflow_run | [WorkflowRunForLogResponse](#workflowrunforlogresponse) | | No |
@@ -3269,7 +3361,7 @@ Accept the legacy single-tag Service API payload while exposing a normalized tag
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| created_at | integer | | No |
| elapsed_time | | | No |
| elapsed_time | number<br>integer | | No |
| error | string | | No |
| exceptions_count | integer | | No |
| finished_at | integer | | No |
@@ -3293,11 +3385,11 @@ Accept the legacy single-tag Service API payload while exposing a normalized tag
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| created_at | integer | | No |
| elapsed_time | | | No |
| elapsed_time | number<br>integer | | No |
| error | string | | No |
| finished_at | integer | | No |
| id | string | | Yes |
| inputs | | | No |
| inputs | object<br>[ object ]<br>string<br>integer<br>number<br>boolean | | No |
| outputs | object | | No |
| status | string | | Yes |
| total_steps | integer | | No |
@@ -188,6 +188,45 @@ def test_patch_union_schema_markdown_fills_converter_blank_schema_types(tmp_path
assert "| allowed_file_types | [ [FileType](#filetype) ] | | No |" in patched
def test_patch_union_schema_markdown_fills_regular_definition_union_property(tmp_path):
module = _load_generate_swagger_markdown_docs_module()
spec_path = tmp_path / "service-swagger.json"
spec_path.write_text(
json.dumps(
{
"definitions": {
"DocumentMetadataResponse": {
"properties": {
"id": {"type": "string"},
"value": {
"anyOf": [
{"type": "string"},
{"type": "integer"},
{"type": "number"},
{"type": "boolean"},
{"type": "null"},
],
},
},
},
}
}
),
encoding="utf-8",
)
markdown = """#### DocumentMetadataResponse
| Name | Type | Description | Required |
| ---- | ---- | ----------- | -------- |
| id | string | | Yes |
| value | string | | No |
"""
patched = module._patch_union_schema_markdown(markdown, spec_path)
assert "| value | string<br>integer<br>number<br>boolean | | No |" in patched
def test_patch_union_schema_markdown_ignores_specs_without_definitions(tmp_path):
module = _load_generate_swagger_markdown_docs_module()
spec_path = tmp_path / "console-swagger.json"
@@ -236,7 +275,7 @@ def test_patch_union_schema_markdown_ignores_unrenderable_shapes(tmp_path):
== "#### Definition\n| field |"
)
assert module._patch_union_schema_markdown("#### BrokenUnion\n", spec_path) == "#### BrokenUnion"
assert module._patch_union_schema_markdown("#### BrokenUnion\n", spec_path) == "#### BrokenUnion\n"
def test_convert_spec_to_markdown_patches_generated_union_tables(tmp_path, monkeypatch):
@@ -1,4 +1,3 @@
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
@@ -9,6 +8,7 @@ import services
from controllers.console import console_ns
from controllers.console.datasets.datasets_document import (
DatasetDocumentListApi,
DatasetInitApi,
DocumentApi,
DocumentBatchDownloadZipApi,
DocumentBatchIndexingEstimateApi,
@@ -20,6 +20,7 @@ from controllers.console.datasets.datasets_document import (
DocumentMetadataApi,
DocumentPipelineExecutionLogApi,
DocumentProcessingApi,
DocumentRenameApi,
DocumentRetryApi,
DocumentStatusApi,
DocumentSummaryStatusApi,
@@ -33,7 +34,9 @@ from controllers.console.datasets.error import (
InvalidMetadataError,
)
from core.rag.index_processor.constant.index_type import IndexStructureType
from models.enums import DataSourceType, IndexingStatus
from models.dataset import Dataset
from models.dataset import Document as DatasetDocument
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus
def unwrap(func):
@@ -42,6 +45,79 @@ def unwrap(func):
return func
def make_serializable_document(**overrides):
attrs = {
"id": "doc-1",
"position": 1,
"data_source_type": "upload_file",
"data_source_info_dict": {"upload_file_id": "file-1"},
"data_source_detail_dict": {},
"dataset_process_rule_id": None,
"name": "Document",
"created_from": "web",
"created_by": "u1",
"created_at": None,
"tokens": None,
"indexing_status": "completed",
"error": None,
"enabled": True,
"disabled_at": None,
"disabled_by": None,
"archived": False,
"display_status": "available",
"word_count": None,
"hit_count": 0,
"doc_form": "text_model",
"doc_metadata_details": None,
"summary_index_status": None,
"need_summary": False,
"process_rule_dict": None,
"completed_segments": None,
"total_segments": None,
}
attrs.update(overrides)
document = MagicMock(spec_set=list(attrs))
for name, value in attrs.items():
setattr(document, name, value)
return document
def make_dataset(**overrides):
attrs = {
"id": "ds-1",
"tenant_id": "tenant-1",
"name": "Dataset",
"indexing_technique": "economy",
"created_by": "u1",
"summary_index_setting": {"enable": True},
}
attrs.update(overrides)
return Dataset(**attrs)
def make_document(**overrides):
attrs = {
"id": "doc-1",
"tenant_id": "tenant-1",
"dataset_id": "ds-1",
"position": 1,
"data_source_type": DataSourceType.UPLOAD_FILE,
"data_source_info": None,
"batch": "batch-1",
"name": "Document",
"created_from": DocumentCreatedFrom.WEB,
"created_by": "u1",
"indexing_status": IndexingStatus.COMPLETED,
"enabled": True,
"archived": False,
"doc_metadata": None,
"doc_form": IndexStructureType.PARAGRAPH_INDEX,
"need_summary": False,
}
attrs.update(overrides)
return DatasetDocument(**attrs)
@pytest.fixture
def tenant_ctx():
return (MagicMock(is_dataset_editor=True, id="u1"), "tenant-1")
@@ -58,7 +134,7 @@ def patch_tenant(tenant_ctx):
@pytest.fixture
def dataset():
return MagicMock(id="ds-1", indexing_technique="economy", summary_index_setting={"enable": True})
return make_dataset()
@pytest.fixture
@@ -130,11 +206,9 @@ class TestDatasetDocumentListApi:
api = DatasetDocumentListApi()
method = unwrap(api.get)
doc = MagicMock(id="doc-1")
doc = make_serializable_document()
pagination = MagicMock(items=[doc], total=1)
count_mock = MagicMock(return_value=2)
with (
app.test_request_context("/?fetch=true"),
patch(
@@ -149,14 +223,12 @@ class TestDatasetDocumentListApi:
"controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_document.marshal",
return_value=[{"id": "doc-1"}],
),
):
resp = method(api, "ds-1")
assert resp["data"]
assert resp["data"][0]["id"] == "doc-1"
assert resp["data"][0]["completed_segments"] == 2
assert resp["data"][0]["total_segments"] == 2
def test_get_with_search_status_and_created_at_sort(
self, app: Flask, patch_tenant, patch_dataset, patch_permission
@@ -164,7 +236,7 @@ class TestDatasetDocumentListApi:
api = DatasetDocumentListApi()
method = unwrap(api.get)
pagination = MagicMock(items=[MagicMock()], total=1)
pagination = MagicMock(items=[make_serializable_document()], total=1)
with (
app.test_request_context("/?keyword=test&status=enabled&sort=created_at"),
@@ -180,10 +252,6 @@ class TestDatasetDocumentListApi:
"controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_document.marshal",
return_value=[{"id": "doc-1"}],
),
):
resp = method(api, "ds-1")
@@ -193,7 +261,7 @@ class TestDatasetDocumentListApi:
api = DatasetDocumentListApi()
method = unwrap(api.get)
pagination = MagicMock(items=[MagicMock()], total=1)
pagination = MagicMock(items=[make_serializable_document()], total=1)
with (
app.test_request_context("/"),
@@ -205,22 +273,21 @@ class TestDatasetDocumentListApi:
"controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_document.marshal",
return_value=[{"id": "doc-1"}],
),
):
response = method(api, "ds-1")
assert response["total"] == 1
assert response["data"][0]["id"] == "doc-1"
assert "completed_segments" not in response["data"][0]
assert "total_segments" not in response["data"][0]
def test_post_success(self, app: Flask, patch_tenant, patch_dataset, patch_permission):
api = DatasetDocumentListApi()
method = unwrap(api.post)
payload = {"indexing_technique": "economy"}
created_dataset = SimpleNamespace(id="ds-1", name="Dataset", indexing_technique="economy")
created_document = SimpleNamespace(id="doc-1", name="Document", doc_metadata_details=None)
created_dataset = make_dataset()
created_document = make_document()
with (
app.test_request_context("/", json=payload),
@@ -237,10 +304,17 @@ class TestDatasetDocumentListApi:
"controllers.console.datasets.datasets_document.DocumentService.save_document_with_dataset_id",
return_value=([created_document], "batch-1"),
),
patch("models.dataset.db.session.scalar", return_value=0),
):
response = method(api, "ds-1")
assert "documents" in response
assert response["dataset"]["id"] == "ds-1"
assert response["documents"][0]["id"] == "doc-1"
assert response["documents"][0]["data_source_info"] == {}
assert response["documents"][0]["doc_metadata"] == []
assert "data_source_info_dict" not in response["documents"][0]
assert "doc_metadata_details" not in response["documents"][0]
def test_post_forbidden(self, app: Flask):
api = DatasetDocumentListApi()
@@ -267,7 +341,7 @@ class TestDatasetDocumentListApi:
api = DatasetDocumentListApi()
method = unwrap(api.get)
pagination = MagicMock(items=[MagicMock()], total=1)
pagination = MagicMock(items=[make_serializable_document()], total=1)
with (
app.test_request_context("/?fetch=maybe"),
@@ -279,10 +353,6 @@ class TestDatasetDocumentListApi:
"controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_document.marshal",
return_value=[{"id": "doc-1"}],
),
):
response = method(api, "ds-1")
@@ -310,6 +380,37 @@ class TestDatasetDocumentListApi:
assert response["total"] == 0
class TestDatasetInitApi:
def test_post_success_serializes_created_dataset_and_documents(self, app: Flask, patch_tenant):
api = DatasetInitApi()
method = unwrap(api.post)
payload = {"indexing_technique": "economy"}
created_dataset = make_dataset()
created_document = make_document(id="doc-init")
with (
app.test_request_context("/", json=payload),
patch.object(type(console_ns), "payload", payload),
patch(
"controllers.console.datasets.datasets_document.DocumentService.document_create_args_validate",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_document.DocumentService.save_document_without_dataset_id",
return_value=(created_dataset, [created_document], "batch-init"),
),
patch("models.dataset.db.session.scalar", return_value=0),
):
response = method(api)
assert response["dataset"]["id"] == "ds-1"
assert response["documents"][0]["id"] == "doc-init"
assert response["documents"][0]["data_source_info"] == {}
assert response["documents"][0]["doc_metadata"] == []
assert response["batch"] == "batch-init"
class TestDocumentApi:
def test_get_success(self, app: Flask, patch_tenant):
api = DocumentApi()
@@ -899,7 +1000,7 @@ class TestDocumentBatchDownloadZipApi:
api = DocumentBatchDownloadZipApi()
method = unwrap(api.post)
payload = {"document_ids": []}
payload: dict[str, list[str]] = {"document_ids": []}
with app.test_request_context("/", json=payload), patch.object(type(console_ns), "payload", payload):
with pytest.raises(ValueError):
@@ -1046,6 +1147,53 @@ class TestDocumentBatchIndexingEstimateApi:
class TestDocumentBatchIndexingStatusApi:
def test_get_batch_status_success_serializes_status_shape(self, app: Flask, patch_tenant):
api = DocumentBatchIndexingStatusApi()
method = unwrap(api.get)
document = MagicMock(
id="doc-1",
indexing_status=IndexingStatus.COMPLETED,
is_paused=False,
processing_started_at=None,
parsing_completed_at=None,
cleaning_completed_at=None,
splitting_completed_at=None,
completed_at=None,
paused_at=None,
error=None,
stopped_at=None,
)
with (
app.test_request_context("/"),
patch.object(api, "get_batch_documents", return_value=[document]),
patch(
"controllers.console.datasets.datasets_document.db.session.scalar",
side_effect=[2, 3],
),
):
response = method(api, "ds-1", "batch-1")
assert response == {
"data": [
{
"id": "doc-1",
"indexing_status": "completed",
"processing_started_at": None,
"parsing_completed_at": None,
"cleaning_completed_at": None,
"splitting_completed_at": None,
"completed_at": None,
"paused_at": None,
"error": None,
"stopped_at": None,
"completed_segments": 2,
"total_segments": 3,
}
]
}
def test_get_batch_status_invalid_batch(self, app: Flask, patch_tenant):
"""Test batch status with invalid batch"""
api = DocumentBatchIndexingStatusApi()
@@ -1057,6 +1205,39 @@ class TestDocumentBatchIndexingStatusApi:
class TestDocumentIndexingStatusApi:
def test_get_status_success_serializes_status_shape(self, app: Flask, patch_tenant):
api = DocumentIndexingStatusApi()
method = unwrap(api.get)
document = MagicMock(
id="doc-1",
indexing_status=IndexingStatus.INDEXING,
is_paused=False,
processing_started_at=None,
parsing_completed_at=None,
cleaning_completed_at=None,
splitting_completed_at=None,
completed_at=None,
paused_at=None,
error=None,
stopped_at=None,
)
with (
app.test_request_context("/"),
patch.object(api, "get_document", return_value=document),
patch(
"controllers.console.datasets.datasets_document.db.session.scalar",
side_effect=[1, 4],
),
):
response = method(api, "ds-1", "doc-1")
assert response["id"] == "doc-1"
assert response["indexing_status"] == "indexing"
assert response["completed_segments"] == 1
assert response["total_segments"] == 4
def test_get_status_document_not_found(self, app: Flask, patch_tenant):
"""Test getting status for non-existent document"""
api = DocumentIndexingStatusApi()
@@ -1067,6 +1248,40 @@ class TestDocumentIndexingStatusApi:
method(api, "ds-1", "invalid-doc")
class TestDocumentRenameApi:
def test_post_success_serializes_document_shape(self, app: Flask, patch_tenant):
api = DocumentRenameApi()
method = unwrap(api.post)
payload = {"name": "Renamed Document"}
renamed_document = make_document(id="doc-renamed", name="Renamed Document")
with (
app.test_request_context("/", json=payload),
patch.object(type(console_ns), "payload", payload),
patch(
"controllers.console.datasets.datasets_document.DatasetService.get_dataset",
return_value=make_dataset(),
),
patch(
"controllers.console.datasets.datasets_document.DatasetService.check_dataset_operator_permission",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_document.DocumentService.rename_document",
return_value=renamed_document,
),
patch("models.dataset.db.session.scalar", return_value=0),
):
response = method(api, "ds-1", "doc-1")
assert response["id"] == "doc-renamed"
assert response["name"] == "Renamed Document"
assert response["data_source_info"] == {}
assert response["doc_metadata"] == []
assert "data_source_info_dict" not in response
class TestDocumentApiMetadata:
def test_get_with_only_option(self, app: Flask, patch_tenant):
"""Test get with 'only' metadata option"""
@@ -1291,7 +1506,7 @@ class TestDocumentListAdvancedCases:
api = DatasetDocumentListApi()
method = unwrap(api.get)
pagination = MagicMock(items=[MagicMock()], total=1)
pagination = MagicMock(items=[make_serializable_document()], total=1)
with (
app.test_request_context("/?sort=updated_at"),
@@ -1303,10 +1518,6 @@ class TestDocumentListAdvancedCases:
"controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_document.marshal",
return_value=[{"id": "doc-1"}],
),
):
response = method(api, "ds-1")
@@ -44,6 +44,41 @@ from services.dataset_service import DocumentService
from services.entities.knowledge_entities.knowledge_entities import ProcessRule, RetrievalModel
def make_serializable_document(**overrides: object) -> Mock:
attrs: dict[str, object] = {
"id": str(uuid.uuid4()),
"position": 1,
"data_source_type": "upload_file",
"data_source_info_dict": {"upload_file_id": "file-1"},
"data_source_detail_dict": {},
"dataset_process_rule_id": None,
"batch": "batch-1",
"name": "Test Document",
"created_from": "api",
"created_by": "user-1",
"created_at": None,
"tokens": None,
"indexing_status": "completed",
"error": None,
"enabled": True,
"disabled_at": None,
"disabled_by": None,
"archived": False,
"display_status": "available",
"word_count": None,
"hit_count": 0,
"doc_form": "text_model",
"doc_metadata_details": None,
"summary_index_status": None,
"need_summary": False,
}
attrs.update(overrides)
document = Mock(spec_set=list(attrs))
for name, value in attrs.items():
setattr(document, name, value)
return document
class TestDocumentTextCreatePayload:
"""Test suite for DocumentTextCreatePayload Pydantic model."""
@@ -226,7 +261,7 @@ class TestDocumentService:
assert hasattr(DocumentService, "batch_update_document_status")
@patch.object(DocumentService, "get_document")
def test_get_document_returns_document(self, mock_get):
def test_get_document_returns_document(self, mock_get: Mock) -> None:
"""Test get_document returns document object."""
mock_doc = Mock()
mock_doc.id = str(uuid.uuid4())
@@ -235,6 +270,7 @@ class TestDocumentService:
mock_get.return_value = mock_doc
result = DocumentService.get_document(dataset_id="dataset_id", document_id="doc_id")
assert result is not None
assert result.name == "Test Document"
assert result.indexing_status == "completed"
@@ -510,7 +546,7 @@ class TestDocumentApiGet:
"""
@pytest.fixture
def mock_doc_detail(self, mock_tenant):
def mock_doc_detail(self, mock_tenant: Mock) -> Mock:
"""A document mock with every attribute ``DocumentApi.get`` reads."""
doc = Mock()
doc.id = str(uuid.uuid4())
@@ -551,8 +587,8 @@ class TestDocumentApiGet:
@patch("controllers.service_api.dataset.document.DatasetService")
@patch("controllers.service_api.dataset.document.DocumentService")
def test_get_document_success_with_all_metadata(
self, mock_doc_svc, mock_dataset_svc, app: Flask, mock_tenant, mock_doc_detail
):
self, mock_doc_svc: Mock, mock_dataset_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock
) -> None:
"""Test successful document retrieval with metadata='all'."""
# Arrange
dataset_id = str(uuid.uuid4())
@@ -569,8 +605,8 @@ class TestDocumentApiGet:
method="GET",
):
api = DocumentApi()
api.get_dataset = Mock(return_value=mock_dataset)
response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
with patch.object(api, "get_dataset", return_value=mock_dataset):
response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
# Assert
assert response["id"] == mock_doc_detail.id
@@ -580,7 +616,7 @@ class TestDocumentApiGet:
assert "doc_metadata" in response
@patch("controllers.service_api.dataset.document.DocumentService")
def test_get_document_not_found(self, mock_doc_svc, app: Flask, mock_tenant):
def test_get_document_not_found(self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock) -> None:
"""Test 404 when document is not found."""
# Arrange
dataset_id = str(uuid.uuid4())
@@ -595,12 +631,14 @@ class TestDocumentApiGet:
method="GET",
):
api = DocumentApi()
api.get_dataset = Mock(return_value=mock_dataset)
with pytest.raises(NotFound):
api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id="nonexistent")
with patch.object(api, "get_dataset", return_value=mock_dataset):
with pytest.raises(NotFound):
api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id="nonexistent")
@patch("controllers.service_api.dataset.document.DocumentService")
def test_get_document_forbidden_wrong_tenant(self, mock_doc_svc, app: Flask, mock_tenant, mock_doc_detail):
def test_get_document_forbidden_wrong_tenant(
self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock
) -> None:
"""Test 403 when document tenant doesn't match request tenant."""
# Arrange
dataset_id = str(uuid.uuid4())
@@ -616,12 +654,14 @@ class TestDocumentApiGet:
method="GET",
):
api = DocumentApi()
api.get_dataset = Mock(return_value=mock_dataset)
with pytest.raises(Forbidden):
api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
with patch.object(api, "get_dataset", return_value=mock_dataset):
with pytest.raises(Forbidden):
api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
@patch("controllers.service_api.dataset.document.DocumentService")
def test_get_document_metadata_only(self, mock_doc_svc, app: Flask, mock_tenant, mock_doc_detail):
def test_get_document_metadata_only(
self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock
) -> None:
"""Test document retrieval with metadata='only'."""
# Arrange
dataset_id = str(uuid.uuid4())
@@ -637,8 +677,8 @@ class TestDocumentApiGet:
method="GET",
):
api = DocumentApi()
api.get_dataset = Mock(return_value=mock_dataset)
response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
with patch.object(api, "get_dataset", return_value=mock_dataset):
response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
# Assert — metadata='only' returns only id, doc_type, doc_metadata
assert response["id"] == mock_doc_detail.id
@@ -649,8 +689,8 @@ class TestDocumentApiGet:
@patch("controllers.service_api.dataset.document.DatasetService")
@patch("controllers.service_api.dataset.document.DocumentService")
def test_get_document_metadata_without(
self, mock_doc_svc, mock_dataset_svc, app: Flask, mock_tenant, mock_doc_detail
):
self, mock_doc_svc: Mock, mock_dataset_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock
) -> None:
"""Test document retrieval with metadata='without'."""
# Arrange
dataset_id = str(uuid.uuid4())
@@ -667,8 +707,8 @@ class TestDocumentApiGet:
method="GET",
):
api = DocumentApi()
api.get_dataset = Mock(return_value=mock_dataset)
response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
with patch.object(api, "get_dataset", return_value=mock_dataset):
response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
# Assert — metadata='without' omits doc_type / doc_metadata
assert response["id"] == mock_doc_detail.id
@@ -677,7 +717,9 @@ class TestDocumentApiGet:
assert "name" in response
@patch("controllers.service_api.dataset.document.DocumentService")
def test_get_document_invalid_metadata_value(self, mock_doc_svc, app: Flask, mock_tenant, mock_doc_detail):
def test_get_document_invalid_metadata_value(
self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock
) -> None:
"""Test error when metadata parameter has invalid value."""
# Arrange
dataset_id = str(uuid.uuid4())
@@ -693,9 +735,9 @@ class TestDocumentApiGet:
method="GET",
):
api = DocumentApi()
api.get_dataset = Mock(return_value=mock_dataset)
with pytest.raises(InvalidMetadataError):
api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
with patch.object(api, "get_dataset", return_value=mock_dataset):
with pytest.raises(InvalidMetadataError):
api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
class TestDocumentApiDelete:
@@ -808,21 +850,26 @@ class TestDocumentApiDelete:
class TestDocumentListApi:
"""Test suite for DocumentListApi endpoint."""
@patch("controllers.service_api.dataset.document.marshal")
@patch("controllers.service_api.dataset.document.DocumentService")
@patch("controllers.service_api.dataset.document.db")
def test_list_documents_success(self, mock_db, mock_doc_svc, mock_marshal, app: Flask, mock_tenant, mock_dataset):
def test_list_documents_success(self, mock_db, mock_doc_svc, app: Flask, mock_tenant, mock_dataset):
"""Test successful document list retrieval."""
# Arrange
mock_db.session.scalar.return_value = mock_dataset
mock_pagination = Mock()
mock_pagination.items = [Mock(), Mock()]
mock_pagination.items = [
make_serializable_document(
id="doc-1",
name="Document 1",
doc_metadata_details=[{"id": "meta-1", "name": "amount", "type": "number", "value": 42}],
),
make_serializable_document(id="doc-2", name="Document 2"),
]
mock_pagination.total = 2
mock_db.paginate.return_value = mock_pagination
mock_doc_svc.enrich_documents_with_summary_index_status.return_value = None
mock_marshal.return_value = [{"id": "doc1"}, {"id": "doc2"}]
# Act
with app.test_request_context(
@@ -838,6 +885,11 @@ class TestDocumentListApi:
assert response["page"] == 1
assert response["limit"] == 20
assert response["total"] == 2
assert response["data"][0]["id"] == "doc-1"
assert response["data"][0]["data_source_info"] == {"upload_file_id": "file-1"}
assert response["data"][0]["doc_metadata"][0]["value"] == 42
assert "data_source_info_dict" not in response["data"][0]
assert "doc_metadata_details" not in response["data"][0]
@patch("controllers.service_api.dataset.document.db")
def test_list_documents_dataset_not_found(self, mock_db, app: Flask, mock_tenant, mock_dataset):
@@ -858,12 +910,9 @@ class TestDocumentListApi:
class TestDocumentIndexingStatusApi:
"""Test suite for DocumentIndexingStatusApi endpoint."""
@patch("controllers.service_api.dataset.document.marshal")
@patch("controllers.service_api.dataset.document.DocumentService")
@patch("controllers.service_api.dataset.document.db")
def test_get_indexing_status_success(
self, mock_db, mock_doc_svc, mock_marshal, app: Flask, mock_tenant, mock_dataset
):
def test_get_indexing_status_success(self, mock_db, mock_doc_svc, app: Flask, mock_tenant, mock_dataset):
"""Test successful indexing status retrieval."""
# Arrange
batch_id = "batch_123"
@@ -884,7 +933,6 @@ class TestDocumentIndexingStatusApi:
# scalar() called 3 times: dataset lookup, completed_segments count, total_segments count
mock_db.session.scalar.side_effect = [mock_dataset, 5, 5]
mock_marshal.return_value = {"id": mock_doc.id, "indexing_status": "completed"}
# Act
with app.test_request_context(
@@ -897,6 +945,12 @@ class TestDocumentIndexingStatusApi:
# Assert
assert "data" in response
assert len(response["data"]) == 1
item = response["data"][0]
assert item["id"] == mock_doc.id
assert item["indexing_status"] == "completed"
assert item["completed_segments"] == 5
assert item["total_segments"] == 5
assert item["processing_started_at"] is None
@patch("controllers.service_api.dataset.document.db")
def test_get_indexing_status_dataset_not_found(self, mock_db, app: Flask, mock_tenant, mock_dataset):
@@ -973,7 +1027,6 @@ class TestDocumentAddByTextApi:
mock_rate_limit.enabled = False
mock_feature_svc.get_knowledge_rate_limit.return_value = mock_rate_limit
@patch("controllers.service_api.dataset.document.marshal")
@patch("controllers.service_api.dataset.document.DocumentService")
@patch("controllers.service_api.dataset.document.KnowledgeConfig")
@patch("controllers.service_api.dataset.document.FileService")
@@ -990,7 +1043,6 @@ class TestDocumentAddByTextApi:
mock_file_svc_cls,
mock_knowledge_config,
mock_doc_svc,
mock_marshal,
app: Flask,
mock_tenant,
mock_dataset,
@@ -1012,11 +1064,9 @@ class TestDocumentAddByTextApi:
mock_config = Mock()
mock_knowledge_config.model_validate.return_value = mock_config
mock_doc = Mock()
mock_doc.id = str(uuid.uuid4())
mock_doc = make_serializable_document(id="doc-create-text", name="Test Document")
mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_doc], "batch_123")
mock_doc_svc.document_create_args_validate.return_value = None
mock_marshal.return_value = {"id": mock_doc.id, "name": "Test Document"}
# Act
with app.test_request_context(
@@ -1037,6 +1087,10 @@ class TestDocumentAddByTextApi:
assert "document" in response
assert "batch" in response
assert response["batch"] == "batch_123"
assert response["document"]["id"] == "doc-create-text"
assert response["document"]["data_source_info"] == {"upload_file_id": "file-1"}
assert response["document"]["doc_metadata"] == []
assert "data_source_info_dict" not in response["document"]
@patch("controllers.service_api.wraps.FeatureService")
@patch("controllers.service_api.wraps.validate_and_get_api_token")
@@ -1162,7 +1216,6 @@ class TestDocumentUpdateByTextApiPost:
``@cloud_edition_billing_rate_limit_check``.
"""
@patch("controllers.service_api.dataset.document.marshal")
@patch("controllers.service_api.dataset.document.DocumentService")
@patch("controllers.service_api.dataset.document.FileService")
@patch("controllers.service_api.dataset.document.current_user")
@@ -1177,7 +1230,6 @@ class TestDocumentUpdateByTextApiPost:
mock_current_user,
mock_file_svc_cls,
mock_doc_svc,
mock_marshal,
app: Flask,
mock_tenant,
mock_dataset,
@@ -1193,10 +1245,9 @@ class TestDocumentUpdateByTextApiPost:
mock_upload.id = str(uuid.uuid4())
mock_file_svc_cls.return_value.upload_text.return_value = mock_upload
mock_document = Mock()
mock_document = make_serializable_document(id="doc-update-text", name="Updated Doc")
mock_doc_svc.document_create_args_validate.return_value = None
mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_document], "batch-1")
mock_marshal.return_value = {"id": "doc-1"}
doc_id = str(uuid.uuid4())
with app.test_request_context(
@@ -1214,6 +1265,9 @@ class TestDocumentUpdateByTextApiPost:
assert status == 200
assert "document" in response
assert response["batch"] == "batch-1"
assert response["document"]["id"] == "doc-update-text"
assert response["document"]["doc_metadata"] == []
@patch("controllers.service_api.dataset.document.db")
@patch("controllers.service_api.wraps.FeatureService")
@@ -1254,6 +1308,61 @@ class TestDocumentAddByFileApiPost:
decorators and ``@cloud_edition_billing_rate_limit_check``.
"""
@patch("controllers.service_api.dataset.document.DocumentService")
@patch("controllers.service_api.dataset.document.FileService")
@patch("controllers.service_api.dataset.document.current_user")
@patch("controllers.service_api.dataset.document.db")
@patch("controllers.service_api.wraps.FeatureService")
@patch("controllers.service_api.wraps.validate_and_get_api_token")
def test_add_by_file_success_serializes_document_and_batch_shape(
self,
mock_validate_token,
mock_feature_svc,
mock_db,
mock_current_user,
mock_file_svc_cls,
mock_doc_svc,
app: Flask,
mock_tenant,
mock_dataset,
):
"""Test successful document creation by file."""
_setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id)
mock_dataset.provider = "vendor"
mock_dataset.indexing_technique = "economy"
mock_dataset.chunk_structure = None
mock_dataset.latest_process_rule = Mock()
mock_dataset.created_by_account = Mock()
mock_db.session.scalar.return_value = mock_dataset
mock_current_user.id = "user-1"
mock_upload = Mock()
mock_upload.id = str(uuid.uuid4())
mock_file_svc_cls.return_value.upload_file.return_value = mock_upload
mock_document = make_serializable_document(id="doc-create-file", name="File Document")
mock_doc_svc.document_create_args_validate.return_value = None
mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_document], "batch-file")
from io import BytesIO
data = {"file": (BytesIO(b"content"), "test.pdf", "application/pdf")}
with app.test_request_context(
f"/datasets/{mock_dataset.id}/document/create-by-file",
method="POST",
content_type="multipart/form-data",
data=data,
headers={"Authorization": "Bearer test_token"},
):
api = DocumentAddByFileApi()
response, status = api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id)
assert status == 200
assert response["batch"] == "batch-file"
assert response["document"]["id"] == "doc-create-file"
assert response["document"]["data_source_info"] == {"upload_file_id": "file-1"}
assert response["document"]["doc_metadata"] == []
@patch("controllers.service_api.dataset.document.db")
@patch("controllers.service_api.wraps.FeatureService")
@patch("controllers.service_api.wraps.validate_and_get_api_token")
@@ -1498,7 +1607,6 @@ class TestDocumentUpdateByFileApiPatch:
document_id=doc_id,
)
@patch("controllers.service_api.dataset.document.marshal")
@patch("controllers.service_api.dataset.document.DocumentService")
@patch("controllers.service_api.dataset.document.FileService")
@patch("controllers.service_api.dataset.document.current_user")
@@ -1513,7 +1621,6 @@ class TestDocumentUpdateByFileApiPatch:
mock_current_user,
mock_file_svc_cls,
mock_doc_svc,
mock_marshal,
app: Flask,
mock_tenant,
mock_dataset,
@@ -1532,11 +1639,9 @@ class TestDocumentUpdateByFileApiPatch:
mock_upload.id = str(uuid.uuid4())
mock_file_svc_cls.return_value.upload_file.return_value = mock_upload
mock_document = Mock()
mock_document.batch = "batch-1"
mock_document = make_serializable_document(id="doc-update-file", name="File Document", batch="batch-1")
mock_doc_svc.document_create_args_validate.return_value = None
mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_document], None)
mock_marshal.return_value = {"id": "doc-1"}
from io import BytesIO
@@ -1558,3 +1663,6 @@ class TestDocumentUpdateByFileApiPatch:
assert status == 200
assert "document" in response
assert response["batch"] == "batch-1"
assert response["document"]["id"] == "doc-update-file"
assert response["document"]["data_source_info"] == {"upload_file_id": "file-1"}
@@ -18,6 +18,19 @@ def _definition_refs(value: object) -> set[str]:
return refs
def _parameters_by_name(operation: dict[str, object]) -> dict[str, dict[str, object]]:
parameters = operation.get("parameters", [])
assert isinstance(parameters, list)
result: dict[str, dict[str, object]] = {}
for parameter in parameters:
if not isinstance(parameter, dict):
continue
name = parameter.get("name")
if isinstance(name, str):
result[name] = parameter
return result
@pytest.mark.parametrize(
("first_kwargs", "second_kwargs"),
[
@@ -70,3 +83,60 @@ def test_swagger_json_endpoints_render(monkeypatch: pytest.MonkeyPatch):
assert not sorted(ref for ref in missing_refs if ref.startswith("_AnonymousInlineModel"))
assert app.config["RESTX_INCLUDE_ALL_MODELS"] is True
def test_service_document_file_routes_document_multipart_form_data(monkeypatch: pytest.MonkeyPatch):
from configs import dify_config
from controllers.service_api import bp as service_api_bp
monkeypatch.setattr(dify_config, "SWAGGER_UI_ENABLED", True)
app = Flask(__name__)
app.config["TESTING"] = True
app.config["RESTX_INCLUDE_ALL_MODELS"] = True
app.register_blueprint(service_api_bp)
payload = app.test_client().get("/v1/swagger.json").get_json()
paths = payload["paths"]
create_operation = paths["/datasets/{dataset_id}/document/create-by-file"]["post"]
create_params = _parameters_by_name(create_operation)
assert create_operation["consumes"] == ["multipart/form-data"]
assert create_params["file"]["in"] == "formData"
assert create_params["file"]["type"] == "file"
assert create_params["file"]["required"] is True
assert create_params["data"]["in"] == "formData"
assert create_params["data"]["type"] == "string"
for path in (
"/datasets/{dataset_id}/documents/{document_id}",
"/datasets/{dataset_id}/documents/{document_id}/update-by-file",
"/datasets/{dataset_id}/documents/{document_id}/update_by_file",
):
update_operation = paths[path]["patch" if path.endswith("{document_id}") else "post"]
update_params = _parameters_by_name(update_operation)
assert update_operation["consumes"] == ["multipart/form-data"]
assert update_params["file"]["in"] == "formData"
assert update_params["file"]["type"] == "file"
assert update_params["file"]["required"] is False
assert update_params["data"]["in"] == "formData"
assert update_params["data"]["type"] == "string"
def test_service_document_list_documents_query_params_render(monkeypatch: pytest.MonkeyPatch):
from configs import dify_config
from controllers.service_api import bp as service_api_bp
monkeypatch.setattr(dify_config, "SWAGGER_UI_ENABLED", True)
app = Flask(__name__)
app.config["TESTING"] = True
app.config["RESTX_INCLUDE_ALL_MODELS"] = True
app.register_blueprint(service_api_bp)
payload = app.test_client().get("/v1/swagger.json").get_json()
operation = payload["paths"]["/datasets/{dataset_id}/documents"]["get"]
params = _parameters_by_name(operation)
for name in ("page", "limit", "keyword", "status"):
assert params[name]["in"] == "query"