refactor(api): migrate console/service_api.dataset.document to BaseModel (#36506)

Co-authored-by: WH-2099 <wh2099@pm.me> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
2026-06-03 08:16:37 +08:00 · 2026-05-30 23:38:27 +09:00
parent 6805d9bfc0
commit 599960024d
17 changed files with 1412 additions and 611 deletions
@@ -9,7 +9,7 @@ from uuid import UUID

 import sqlalchemy as sa
 from flask import request, send_file
-from flask_restx import Resource, marshal
+from flask_restx import Resource
 from pydantic import BaseModel, Field, field_validator
 from sqlalchemy import asc, desc, func, select
 from werkzeug.exceptions import Forbidden, NotFound
@@ -34,14 +34,16 @@ from core.rag.index_processor.constant.index_type import IndexTechniqueType
 from extensions.ext_database import db
 from fields.base import ResponseModel
 from fields.document_fields import (
-    document_fields,
-    document_status_fields,
-    document_with_segments_fields,
+    DocumentMetadataResponse,
+    DocumentResponse,
+    DocumentStatusListResponse,
+    DocumentStatusResponse,
+    normalize_enum,
 )
 from graphon.model_runtime.entities.model_entities import ModelType
 from graphon.model_runtime.errors.invoke import InvokeAuthorizationError
 from libs.datetime_utils import naive_utc_now
-from libs.helper import to_timestamp
+from libs.helper import dump_response, to_timestamp
 from libs.login import current_account_with_tenant, login_required
 from models import DatasetProcessRule, Document, DocumentSegment, UploadFile
 from models.dataset import DocumentPipelineExecutionLog
@@ -74,12 +76,6 @@ from ..wraps import (
 logger = logging.getLogger(__name__)


-def _normalize_enum(value: Any) -> Any:
-    if isinstance(value, str) or value is None:
-        return value
-    return getattr(value, "value", value)
-
-
 class DatasetResponse(ResponseModel):
    id: str
    name: str
@@ -93,7 +89,7 @@ class DatasetResponse(ResponseModel):
    @field_validator("data_source_type", "indexing_technique", mode="before")
    @classmethod
    def _normalize_enum_fields(cls, value: Any) -> Any:
-        return _normalize_enum(value)
+        return normalize_enum(value)

    @field_validator("created_at", mode="before")
    @classmethod
@@ -101,61 +97,10 @@ class DatasetResponse(ResponseModel):
        return to_timestamp(value)


-class DocumentMetadataResponse(ResponseModel):
-    id: str
-    name: str
-    type: str
-    value: str | None = None
-
-
-class DocumentResponse(ResponseModel):
-    id: str
-    position: int | None = None
-    data_source_type: str | None = None
-    data_source_info: Any = Field(default=None, validation_alias="data_source_info_dict")
-    data_source_detail_dict: Any = None
-    dataset_process_rule_id: str | None = None
-    name: str
-    created_from: str | None = None
-    created_by: str | None = None
-    created_at: int | None = None
-    tokens: int | None = None
-    indexing_status: str | None = None
-    error: str | None = None
-    enabled: bool | None = None
-    disabled_at: int | None = None
-    disabled_by: str | None = None
-    archived: bool | None = None
-    display_status: str | None = None
-    word_count: int | None = None
-    hit_count: int | None = None
-    doc_form: str | None = None
-    doc_metadata: list[DocumentMetadataResponse] = Field(default_factory=list, validation_alias="doc_metadata_details")
-    summary_index_status: str | None = None
-    need_summary: bool | None = None
-
-    @field_validator("data_source_type", "indexing_status", "display_status", "doc_form", mode="before")
-    @classmethod
-    def _normalize_enum_fields(cls, value: Any) -> Any:
-        return _normalize_enum(value)
-
-    @field_validator("doc_metadata", mode="before")
-    @classmethod
-    def _normalize_doc_metadata(cls, value: Any) -> list[Any]:
-        if value is None:
-            return []
-        return value
-
-    @field_validator("created_at", "disabled_at", mode="before")
-    @classmethod
-    def _normalize_timestamp(cls, value: datetime | int | None) -> int | None:
-        return to_timestamp(value)
-
-
 class DocumentWithSegmentsResponse(DocumentResponse):
    process_rule_dict: Any = None
-    completed_segments: int | None = None
-    total_segments: int | None = None
+    completed_segments: int | None = Field(default=None, exclude_if=lambda value: value is None)
+    total_segments: int | None = Field(default=None, exclude_if=lambda value: value is None)


 class DatasetAndDocumentResponse(ResponseModel):
@@ -190,6 +135,14 @@ class DocumentDatasetListParam(BaseModel):
    fetch_val: str = Field("false", alias="fetch")


+class DocumentWithSegmentsListResponse(ResponseModel):
+    data: list[DocumentWithSegmentsResponse]
+    has_more: bool
+    limit: int
+    total: int
+    page: int
+
+
 register_schema_models(
    console_ns,
    KnowledgeConfig,
@@ -200,13 +153,19 @@ register_schema_models(
    GenerateSummaryPayload,
    DocumentMetadataUpdatePayload,
    DocumentBatchDownloadZipPayload,
+)
+register_response_schema_models(
+    console_ns,
+    SimpleResultMessageResponse,
+    SimpleResultResponse,
+    UrlResponse,
    DatasetResponse,
    DocumentMetadataResponse,
    DocumentResponse,
    DocumentWithSegmentsResponse,
    DatasetAndDocumentResponse,
+    DocumentWithSegmentsListResponse,
 )
-register_response_schema_models(console_ns, SimpleResultMessageResponse, SimpleResultResponse, UrlResponse)


 class DocumentResource(Resource):
@@ -312,7 +271,11 @@ class DatasetDocumentListApi(Resource):
            "status": "Filter documents by display status",
        }
    )
-    @console_ns.response(200, "Documents retrieved successfully")
+    @console_ns.response(
+        200,
+        "Documents retrieved successfully",
+        console_ns.models[DocumentWithSegmentsListResponse.__name__],
+    )
    @setup_required
    @login_required
    @account_initialization_required
@@ -425,18 +388,15 @@ class DatasetDocumentListApi(Resource):
                )
                document.completed_segments = completed_segments
                document.total_segments = total_segments
-            data = marshal(documents, document_with_segments_fields)
-        else:
-            data = marshal(documents, document_fields)
        response = {
-            "data": data,
+            "data": documents,
            "has_more": len(documents) == limit,
            "limit": limit,
            "total": paginated_documents.total,
            "page": page,
        }

-        return response
+        return dump_response(DocumentWithSegmentsListResponse, response)

    @setup_required
    @login_required
@@ -482,9 +442,7 @@ class DatasetDocumentListApi(Resource):
        except ModelCurrentlyNotSupportError:
            raise ProviderModelCurrentlyNotSupportError()

-        return DatasetAndDocumentResponse.model_validate(
-            {"dataset": dataset, "documents": documents, "batch": batch}, from_attributes=True
-        ).model_dump(mode="json")
+        return dump_response(DatasetAndDocumentResponse, {"dataset": dataset, "documents": documents, "batch": batch})

    @setup_required
    @login_required
@@ -567,9 +525,7 @@ class DatasetInitApi(Resource):
        except ModelCurrentlyNotSupportError:
            raise ProviderModelCurrentlyNotSupportError()

-        return DatasetAndDocumentResponse.model_validate(
-            {"dataset": dataset, "documents": documents, "batch": batch}, from_attributes=True
-        ).model_dump(mode="json")
+        return dump_response(DatasetAndDocumentResponse, {"dataset": dataset, "documents": documents, "batch": batch})


@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/indexing-estimate")
@@ -742,6 +698,9 @@ class DocumentBatchIndexingEstimateApi(DocumentResource):

@console_ns.route("/datasets/<uuid:dataset_id>/batch/<string:batch>/indexing-status")
 class DocumentBatchIndexingStatusApi(DocumentResource):
+    @console_ns.response(
+        200, "Indexing status retrieved successfully", console_ns.models[DocumentStatusListResponse.__name__]
+    )
    @setup_required
    @login_required
    @account_initialization_required
@@ -784,9 +743,8 @@ class DocumentBatchIndexingStatusApi(DocumentResource):
                "completed_segments": completed_segments,
                "total_segments": total_segments,
            }
-            documents_status.append(marshal(document_dict, document_status_fields))
-        data = {"data": documents_status}
-        return data
+            documents_status.append(document_dict)
+        return dump_response(DocumentStatusListResponse, {"data": documents_status})


@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/indexing-status")
@@ -794,7 +752,9 @@ class DocumentIndexingStatusApi(DocumentResource):
    @console_ns.doc("get_document_indexing_status")
    @console_ns.doc(description="Get document indexing status")
    @console_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
-    @console_ns.response(200, "Indexing status retrieved successfully")
+    @console_ns.response(
+        200, "Indexing status retrieved successfully", console_ns.models[DocumentStatusResponse.__name__]
+    )
    @console_ns.response(404, "Document not found")
    @setup_required
    @login_required
@@ -839,7 +799,7 @@ class DocumentIndexingStatusApi(DocumentResource):
            "completed_segments": completed_segments,
            "total_segments": total_segments,
        }
-        return marshal(document_dict, document_status_fields)
+        return dump_response(DocumentStatusResponse, document_dict)


@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>")
@@ -1304,7 +1264,7 @@ class DocumentRenameApi(DocumentResource):
        except services.errors.document.DocumentIndexingError:
            raise DocumentIndexingError("Cannot delete document during indexing.")

-        return DocumentResponse.model_validate(document, from_attributes=True).model_dump(mode="json")
+        return dump_response(DocumentResponse, document)


@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/website-sync")
@@ -12,7 +12,6 @@ from typing import Self
 from uuid import UUID

 from flask import request, send_file
-from flask_restx import marshal
 from pydantic import BaseModel, Field, field_validator, model_validator
 from sqlalchemy import desc, func, select
 from werkzeug.exceptions import Forbidden, NotFound
@@ -27,7 +26,12 @@ from controllers.common.errors import (
    UnsupportedFileTypeError,
 )
 from controllers.common.fields import UrlResponse
-from controllers.common.schema import register_enum_models, register_response_schema_models, register_schema_models
+from controllers.common.schema import (
+    query_params_from_model,
+    register_enum_models,
+    register_response_schema_models,
+    register_schema_models,
+)
 from controllers.service_api import service_api_ns
 from controllers.service_api.app.error import ProviderNotInitializeError
 from controllers.service_api.dataset.error import (
@@ -44,7 +48,13 @@ from core.errors.error import ProviderTokenNotInitError
 from core.rag.entities import PreProcessingRule, Rule, Segmentation
 from core.rag.retrieval.retrieval_methods import RetrievalMethod
 from extensions.ext_database import db
-from fields.document_fields import document_fields, document_status_fields
+from fields.base import ResponseModel
+from fields.document_fields import (
+    DocumentListResponse,
+    DocumentResponse,
+    DocumentStatusListResponse,
+)
+from libs.helper import dump_response
 from libs.login import current_user
 from models.dataset import Dataset, Document, DocumentSegment
 from models.enums import SegmentStatus
@@ -107,6 +117,44 @@ class DocumentListQuery(BaseModel):
    status: str | None = Field(default=None, description="Document status filter")


+DOCUMENT_CREATE_BY_FILE_PARAMS = {
+    "dataset_id": "Dataset ID",
+    "file": {
+        "in": "formData",
+        "type": "file",
+        "required": True,
+        "description": "Document file to upload.",
+    },
+    "data": {
+        "in": "formData",
+        "type": "string",
+        "required": False,
+        "description": "Optional JSON string with document creation settings.",
+    },
+}
+DOCUMENT_UPDATE_BY_FILE_PARAMS = {
+    "dataset_id": "Dataset ID",
+    "document_id": "Document ID",
+    "file": {
+        "in": "formData",
+        "type": "file",
+        "required": False,
+        "description": "Replacement document file.",
+    },
+    "data": {
+        "in": "formData",
+        "type": "string",
+        "required": False,
+        "description": "Optional JSON string with document update settings.",
+    },
+}
+
+
+class DocumentAndBatchResponse(ResponseModel):
+    document: DocumentResponse
+    batch: str
+
+
 register_enum_models(service_api_ns, RetrievalMethod)

 register_schema_models(
@@ -121,7 +169,14 @@ register_schema_models(
    PreProcessingRule,
    Segmentation,
 )
-register_response_schema_models(service_api_ns, UrlResponse)
+register_response_schema_models(
+    service_api_ns,
+    UrlResponse,
+    DocumentResponse,
+    DocumentAndBatchResponse,
+    DocumentListResponse,
+    DocumentStatusListResponse,
+)


 def _create_document_by_text(tenant_id: str, dataset_id: UUID) -> tuple[Mapping[str, object], int]:
@@ -188,8 +243,7 @@ def _create_document_by_text(tenant_id: str, dataset_id: UUID) -> tuple[Mapping[
        raise ProviderNotInitializeError(ex.description)
    document = documents[0]

-    documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch}
-    return documents_and_batch_fields, 200
+    return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200


 def _update_document_by_text(tenant_id: str, dataset_id: UUID, document_id: UUID) -> tuple[Mapping[str, object], int]:
@@ -248,8 +302,7 @@ def _update_document_by_text(tenant_id: str, dataset_id: UUID, document_id: UUID
        raise ProviderNotInitializeError(ex.description)
    document = documents[0]

-    documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch}
-    return documents_and_batch_fields, 200
+    return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200


@service_api_ns.route("/datasets/<uuid:dataset_id>/document/create-by-text")
@@ -267,6 +320,9 @@ class DocumentAddByTextApi(DatasetApiResource):
            400: "Bad request - invalid parameters",
        }
    )
+    @service_api_ns.response(
+        200, "Document created successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
+    )
    @cloud_edition_billing_resource_check("vector_space", "dataset")
    @cloud_edition_billing_resource_check("documents", "dataset")
    @cloud_edition_billing_rate_limit_check("knowledge", "dataset")
@@ -296,6 +352,9 @@ class DeprecatedDocumentAddByTextApi(DatasetApiResource):
            400: "Bad request - invalid parameters",
        }
    )
+    @service_api_ns.response(
+        200, "Document created successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
+    )
    @cloud_edition_billing_resource_check("vector_space", "dataset")
    @cloud_edition_billing_resource_check("documents", "dataset")
    @cloud_edition_billing_rate_limit_check("knowledge", "dataset")
@@ -319,6 +378,9 @@ class DocumentUpdateByTextApi(DatasetApiResource):
            404: "Document not found",
        }
    )
+    @service_api_ns.response(
+        200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
+    )
    @cloud_edition_billing_resource_check("vector_space", "dataset")
    @cloud_edition_billing_rate_limit_check("knowledge", "dataset")
    def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
@@ -347,6 +409,9 @@ class DeprecatedDocumentUpdateByTextApi(DatasetApiResource):
            404: "Document not found",
        }
    )
+    @service_api_ns.response(
+        200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
+    )
    @cloud_edition_billing_resource_check("vector_space", "dataset")
    @cloud_edition_billing_rate_limit_check("knowledge", "dataset")
    def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
@@ -363,7 +428,7 @@ class DocumentAddByFileApi(DatasetApiResource):

    @service_api_ns.doc("create_document_by_file")
    @service_api_ns.doc(description="Create a new document by uploading a file")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID"})
+    @service_api_ns.doc(consumes=["multipart/form-data"], params=DOCUMENT_CREATE_BY_FILE_PARAMS)
    @service_api_ns.doc(
        responses={
            200: "Document created successfully",
@@ -371,6 +436,9 @@ class DocumentAddByFileApi(DatasetApiResource):
            400: "Bad request - invalid file or parameters",
        }
    )
+    @service_api_ns.response(
+        200, "Document created successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
+    )
    @cloud_edition_billing_resource_check("vector_space", "dataset")
    @cloud_edition_billing_resource_check("documents", "dataset")
    @cloud_edition_billing_rate_limit_check("knowledge", "dataset")
@@ -462,8 +530,7 @@ class DocumentAddByFileApi(DatasetApiResource):
        except ProviderTokenNotInitError as ex:
            raise ProviderNotInitializeError(ex.description)
        document = documents[0]
-        documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": batch}
-        return documents_and_batch_fields, 200
+        return dump_response(DocumentAndBatchResponse, {"document": document, "batch": batch}), 200


 def _update_document_by_file(tenant_id: str, dataset_id: UUID, document_id: UUID) -> tuple[Mapping[str, object], int]:
@@ -539,8 +606,7 @@ def _update_document_by_file(tenant_id: str, dataset_id: UUID, document_id: UUID
    except ProviderTokenNotInitError as ex:
        raise ProviderNotInitializeError(ex.description)
    document = documents[0]
-    documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": document.batch}
-    return documents_and_batch_fields, 200
+    return dump_response(DocumentAndBatchResponse, {"document": document, "batch": document.batch}), 200


@service_api_ns.route(
@@ -558,7 +624,7 @@ class DeprecatedDocumentUpdateByFileApi(DatasetApiResource):
            "Use PATCH /datasets/{dataset_id}/documents/{document_id} instead."
        )
    )
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
+    @service_api_ns.doc(consumes=["multipart/form-data"], params=DOCUMENT_UPDATE_BY_FILE_PARAMS)
    @service_api_ns.doc(
        responses={
            200: "Document updated successfully",
@@ -566,6 +632,9 @@ class DeprecatedDocumentUpdateByFileApi(DatasetApiResource):
            404: "Document not found",
        }
    )
+    @service_api_ns.response(
+        200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
+    )
    @cloud_edition_billing_resource_check("vector_space", "dataset")
    @cloud_edition_billing_rate_limit_check("knowledge", "dataset")
    def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
@@ -577,7 +646,7 @@ class DeprecatedDocumentUpdateByFileApi(DatasetApiResource):
 class DocumentListApi(DatasetApiResource):
    @service_api_ns.doc("list_documents")
    @service_api_ns.doc(description="List all documents in a dataset")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID"})
+    @service_api_ns.doc(params={"dataset_id": "Dataset ID", **query_params_from_model(DocumentListQuery)})
    @service_api_ns.doc(
        responses={
            200: "Documents retrieved successfully",
@@ -585,6 +654,9 @@ class DocumentListApi(DatasetApiResource):
            404: "Dataset not found",
        }
    )
+    @service_api_ns.response(
+        200, "Documents retrieved successfully", service_api_ns.models[DocumentListResponse.__name__]
+    )
    def get(self, tenant_id, dataset_id: UUID):
        dataset_id_str = str(dataset_id)
        tenant_id = str(tenant_id)
@@ -618,14 +690,14 @@ class DocumentListApi(DatasetApiResource):
        )

        response = {
-            "data": marshal(documents, document_fields),
+            "data": documents,
            "has_more": len(documents) == query_params.limit,
            "limit": query_params.limit,
            "total": paginated_documents.total,
            "page": query_params.page,
        }

-        return response
+        return dump_response(DocumentListResponse, response)


@service_api_ns.route("/datasets/<uuid:dataset_id>/documents/download-zip")
@@ -680,6 +752,11 @@ class DocumentIndexingStatusApi(DatasetApiResource):
            404: "Dataset or documents not found",
        }
    )
+    @service_api_ns.response(
+        200,
+        "Indexing status retrieved successfully",
+        service_api_ns.models[DocumentStatusListResponse.__name__],
+    )
    def get(self, tenant_id, dataset_id: UUID, batch: str):
        dataset_id_str = str(dataset_id)
        tenant_id = str(tenant_id)
@@ -729,9 +806,8 @@ class DocumentIndexingStatusApi(DatasetApiResource):
                "completed_segments": completed_segments,
                "total_segments": total_segments,
            }
-            documents_status.append(marshal(document_dict, document_status_fields))
-        data = {"data": documents_status}
-        return data
+            documents_status.append(document_dict)
+        return dump_response(DocumentStatusListResponse, {"data": documents_status})


@service_api_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/download")
@@ -890,7 +966,7 @@ class DocumentApi(DatasetApiResource):

    @service_api_ns.doc("update_document_by_file")
    @service_api_ns.doc(description="Update an existing document by uploading a file")
-    @service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
+    @service_api_ns.doc(consumes=["multipart/form-data"], params=DOCUMENT_UPDATE_BY_FILE_PARAMS)
    @service_api_ns.doc(
        responses={
            200: "Document updated successfully",
@@ -898,6 +974,9 @@ class DocumentApi(DatasetApiResource):
            404: "Document not found",
        }
    )
+    @service_api_ns.response(
+        200, "Document updated successfully", service_api_ns.models[DocumentAndBatchResponse.__name__]
+    )
    @cloud_edition_billing_resource_check("vector_space", "dataset")
    @cloud_edition_billing_rate_limit_check("knowledge", "dataset")
    def patch(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
@@ -103,7 +103,11 @@ def _replace_schema_table_type(markdown: str, definition_name: str, row_name: st
        lines[index] = "|".join(cells)
        break

-    return "\n".join(lines)
+    return "\n".join(lines) + ("\n" if markdown.endswith("\n") else "")
+
+
+def _has_union_schema(schema: object) -> bool:
+    return isinstance(schema, dict) and (isinstance(schema.get("oneOf"), list) or isinstance(schema.get("anyOf"), list))


 def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str:
@@ -117,8 +121,20 @@ def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str:
    for definition_name, schema in definitions.items():
        if not isinstance(definition_name, str) or not isinstance(schema, dict):
            continue
-        one_of = schema.get("oneOf")
-        if not isinstance(one_of, list):
+
+        properties = schema.get("properties")
+        if isinstance(properties, dict):
+            for property_name, property_schema in properties.items():
+                if isinstance(property_name, str) and _has_union_schema(property_schema):
+                    markdown = _replace_schema_table_type(
+                        markdown,
+                        definition_name,
+                        property_name,
+                        _schema_markdown_type(property_schema),
+                    )
+
+        union_variants = schema.get("oneOf") or schema.get("anyOf")
+        if not isinstance(union_variants, list):
            continue

        markdown = _replace_schema_table_type(
@@ -128,7 +144,7 @@ def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str:
            _schema_markdown_type(schema),
        )

-        for variant in one_of:
+        for variant in union_variants:
            variant_name = _definition_ref_name(variant)
            variant_schema = definitions.get(variant_name) if variant_name is not None else None
            if not isinstance(variant_name, str) or not isinstance(variant_schema, dict):
@@ -150,7 +166,7 @@ def _patch_union_schema_markdown(markdown: str, spec_path: Path) -> str:

 def _convert_spec_to_markdown(spec_path: Path, markdown_path: Path) -> None:
    markdown_path.parent.mkdir(parents=True, exist_ok=True)
-    with tempfile.TemporaryDirectory(prefix=f"{markdown_path.stem}-", dir=markdown_path.parent) as temp_dir:
+    with tempfile.TemporaryDirectory(prefix=f"{markdown_path.stem}-") as temp_dir:
        temp_markdown_path = Path(temp_dir) / markdown_path.name
        result = subprocess.run(
            [
@@ -158,12 +174,13 @@ def _convert_spec_to_markdown(spec_path: Path, markdown_path: Path) -> None:
                "--yes",
                SWAGGER_MARKDOWN_PACKAGE,
                "-i",
-                str(spec_path),
+                str(spec_path.resolve()),
                "-o",
-                str(temp_markdown_path),
+                str(temp_markdown_path.resolve()),
            ],
            check=False,
            capture_output=True,
+            cwd=temp_dir,
            text=True,
        )
        if result.returncode != 0:
@@ -1,95 +1,112 @@
-from flask_restx import fields
+"""Response schemas for dataset document endpoints."""

-from fields.dataset_fields import dataset_fields
-from libs.helper import TimestampField
+from datetime import datetime
+from typing import Any

-document_metadata_fields = {
-    "id": fields.String,
-    "name": fields.String,
-    "type": fields.String,
-    "value": fields.String,
-}
+from pydantic import Field, field_validator

-document_fields = {
-    "id": fields.String,
-    "position": fields.Integer,
-    "data_source_type": fields.String,
-    "data_source_info": fields.Raw(attribute="data_source_info_dict"),
-    "data_source_detail_dict": fields.Raw(attribute="data_source_detail_dict"),
-    "dataset_process_rule_id": fields.String,
-    "name": fields.String,
-    "created_from": fields.String,
-    "created_by": fields.String,
-    "created_at": TimestampField,
-    "tokens": fields.Integer,
-    "indexing_status": fields.String,
-    "error": fields.String,
-    "enabled": fields.Boolean,
-    "disabled_at": TimestampField,
-    "disabled_by": fields.String,
-    "archived": fields.Boolean,
-    "display_status": fields.String,
-    "word_count": fields.Integer,
-    "hit_count": fields.Integer,
-    "doc_form": fields.String,
-    "doc_metadata": fields.List(fields.Nested(document_metadata_fields), attribute="doc_metadata_details"),
-    # Summary index generation status:
-    # "SUMMARIZING" (when task is queued and generating)
-    "summary_index_status": fields.String,
-    # Whether this document needs summary index generation
-    "need_summary": fields.Boolean,
-}
+from fields.base import ResponseModel
+from libs.helper import to_timestamp

-document_with_segments_fields = {
-    "id": fields.String,
-    "position": fields.Integer,
-    "data_source_type": fields.String,
-    "data_source_info": fields.Raw(attribute="data_source_info_dict"),
-    "data_source_detail_dict": fields.Raw(attribute="data_source_detail_dict"),
-    "dataset_process_rule_id": fields.String,
-    "process_rule_dict": fields.Raw(attribute="process_rule_dict"),
-    "name": fields.String,
-    "created_from": fields.String,
-    "created_by": fields.String,
-    "created_at": TimestampField,
-    "tokens": fields.Integer,
-    "indexing_status": fields.String,
-    "error": fields.String,
-    "enabled": fields.Boolean,
-    "disabled_at": TimestampField,
-    "disabled_by": fields.String,
-    "archived": fields.Boolean,
-    "display_status": fields.String,
-    "word_count": fields.Integer,
-    "hit_count": fields.Integer,
-    "completed_segments": fields.Integer,
-    "total_segments": fields.Integer,
-    "doc_metadata": fields.List(fields.Nested(document_metadata_fields), attribute="doc_metadata_details"),
-    # Summary index generation status:
-    # "SUMMARIZING" (when task is queued and generating)
-    "summary_index_status": fields.String,
-    "need_summary": fields.Boolean,  # Whether this document needs summary index generation
-}

-dataset_and_document_fields = {
-    "dataset": fields.Nested(dataset_fields),
-    "documents": fields.List(fields.Nested(document_fields)),
-    "batch": fields.String,
-}
+def normalize_enum(value: Any) -> Any:
+    if isinstance(value, str) or value is None:
+        return value
+    return getattr(value, "value", value)

-document_status_fields = {
-    "id": fields.String,
-    "indexing_status": fields.String,
-    "processing_started_at": TimestampField,
-    "parsing_completed_at": TimestampField,
-    "cleaning_completed_at": TimestampField,
-    "splitting_completed_at": TimestampField,
-    "completed_at": TimestampField,
-    "paused_at": TimestampField,
-    "error": fields.String,
-    "stopped_at": TimestampField,
-    "completed_segments": fields.Integer,
-    "total_segments": fields.Integer,
-}

-document_status_fields_list = {"data": fields.List(fields.Nested(document_status_fields))}
+class DocumentMetadataResponse(ResponseModel):
+    id: str
+    name: str
+    type: str
+    value: str | int | float | bool | None = None
+
+
+class DocumentResponse(ResponseModel):
+    id: str
+    position: int | None = None
+    data_source_type: str | None = None
+    data_source_info: Any = Field(default=None, validation_alias="data_source_info_dict")
+    data_source_detail_dict: Any = None
+    dataset_process_rule_id: str | None = None
+    name: str
+    created_from: str | None = None
+    created_by: str | None = None
+    created_at: int | None = None
+    tokens: int | None = None
+    indexing_status: str | None = None
+    error: str | None = None
+    enabled: bool | None = None
+    disabled_at: int | None = None
+    disabled_by: str | None = None
+    archived: bool | None = None
+    display_status: str | None = None
+    word_count: int | None = None
+    hit_count: int | None = None
+    doc_form: str | None = None
+    doc_metadata: list[DocumentMetadataResponse] = Field(default_factory=list, validation_alias="doc_metadata_details")
+    summary_index_status: str | None = None
+    need_summary: bool | None = None
+
+    @field_validator("data_source_type", "indexing_status", "display_status", "doc_form", mode="before")
+    @classmethod
+    def _normalize_enum_fields(cls, value: Any) -> Any:
+        return normalize_enum(value)
+
+    @field_validator("doc_metadata", mode="before")
+    @classmethod
+    def _normalize_doc_metadata(cls, value: Any) -> list[Any]:
+        if value is None:
+            return []
+        return value
+
+    @field_validator("created_at", "disabled_at", mode="before")
+    @classmethod
+    def _normalize_timestamp(cls, value: datetime | int | None) -> int | None:
+        return to_timestamp(value)
+
+
+class DocumentListResponse(ResponseModel):
+    data: list[DocumentResponse]
+    has_more: bool
+    limit: int
+    total: int
+    page: int
+
+
+class DocumentStatusResponse(ResponseModel):
+    id: str
+    indexing_status: str
+    processing_started_at: int | None
+    parsing_completed_at: int | None
+    cleaning_completed_at: int | None
+    splitting_completed_at: int | None
+    completed_at: int | None
+    paused_at: int | None
+    error: str | None
+    stopped_at: int | None
+    completed_segments: int | None = None
+    total_segments: int | None = None
+
+    @field_validator("indexing_status", mode="before")
+    @classmethod
+    def _normalize_indexing_status(cls, value: Any) -> Any:
+        return normalize_enum(value)
+
+    @field_validator(
+        "processing_started_at",
+        "parsing_completed_at",
+        "cleaning_completed_at",
+        "splitting_completed_at",
+        "completed_at",
+        "paused_at",
+        "stopped_at",
+        mode="before",
+    )
+    @classmethod
+    def _normalize_timestamp(cls, value: datetime | int | None) -> int | None:
+        return to_timestamp(value)
+
+
+class DocumentStatusListResponse(ResponseModel):
+    data: list[DocumentStatusResponse]
@@ -4792,9 +4792,9 @@ Get dataset auto disable logs

 ##### Responses

-| Code | Description |
-| ---- | ----------- |
-| 200 | Success |
+| Code | Description | Schema |
+| ---- | ----------- | ------ |
+| 200 | Indexing status retrieved successfully | [DocumentStatusListResponse](#documentstatuslistresponse) |

 ### /datasets/{dataset_id}/documents

@@ -4830,9 +4830,9 @@ Get documents in a dataset

 ##### Responses

-| Code | Description |
-| ---- | ----------- |
-| 200 | Documents retrieved successfully |
+| Code | Description | Schema |
+| ---- | ----------- | ------ |
+| 200 | Documents retrieved successfully | [DocumentWithSegmentsListResponse](#documentwithsegmentslistresponse) |

 #### POST
 ##### Parameters
@@ -5028,10 +5028,10 @@ Get document indexing status

 ##### Responses

-| Code | Description |
-| ---- | ----------- |
-| 200 | Indexing status retrieved successfully |
-| 404 | Document not found |
+| Code | Description | Schema |
+| ---- | ----------- | ------ |
+| 200 | Indexing status retrieved successfully | [DocumentStatusResponse](#documentstatusresponse) |
+| 404 | Document not found |  |

 ### /datasets/{dataset_id}/documents/{document_id}/metadata

@@ -11432,7 +11432,7 @@ Enum class for api provider schema type.
 | description | string |  | Yes |
 | id | string |  | Yes |
 | name | string |  | Yes |
-| parameters |  |  | Yes |
+| parameters | object<br>[ object ]<br>string |  | Yes |
 | server_code | string |  | Yes |
 | status | [AppMCPServerStatus](#appmcpserverstatus) |  | Yes |
 | updated_at | integer |  | No |
@@ -11903,7 +11903,7 @@ Condition detail
 | ---- | ---- | ----------- | -------- |
 | comparison_operator | string | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes |
 | name | string |  | Yes |
-| value |  |  | No |
+| value | string<br>[ string ]<br>integer<br>number |  | No |

 #### ConsoleDatasetListQuery

@@ -12820,7 +12820,7 @@ Request payload for bulk downloading documents as a zip archive.
 | id | string |  | Yes |
 | name | string |  | Yes |
 | type | string |  | Yes |
-| value | string |  | No |
+| value | string<br>integer<br>number<br>boolean |  | No |

 #### DocumentMetadataUpdatePayload

@@ -12844,14 +12844,14 @@ Request payload for bulk downloading documents as a zip archive.
 | created_by | string |  | No |
 | created_from | string |  | No |
 | data_source_detail_dict |  |  | No |
-| data_source_info_dict |  |  | No |
+| data_source_info |  |  | No |
 | data_source_type | string |  | No |
 | dataset_process_rule_id | string |  | No |
 | disabled_at | integer |  | No |
 | disabled_by | string |  | No |
 | display_status | string |  | No |
 | doc_form | string |  | No |
-| doc_metadata_details | [ [DocumentMetadataResponse](#documentmetadataresponse) ] |  | No |
+| doc_metadata | [ [DocumentMetadataResponse](#documentmetadataresponse) ] |  | No |
 | enabled | boolean |  | No |
 | error | string |  | No |
 | hit_count | integer |  | No |
@@ -12893,6 +12893,16 @@ Request payload for bulk downloading documents as a zip archive.
 | stopped_at | integer |  | Yes |
 | total_segments | integer |  | No |

+#### DocumentWithSegmentsListResponse
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| data | [ [DocumentWithSegmentsResponse](#documentwithsegmentsresponse) ] |  | Yes |
+| has_more | boolean |  | Yes |
+| limit | integer |  | Yes |
+| page | integer |  | Yes |
+| total | integer |  | Yes |
+
 #### DocumentWithSegmentsResponse

 | Name | Type | Description | Required |
@@ -12903,14 +12913,14 @@ Request payload for bulk downloading documents as a zip archive.
 | created_by | string |  | No |
 | created_from | string |  | No |
 | data_source_detail_dict |  |  | No |
-| data_source_info_dict |  |  | No |
+| data_source_info |  |  | No |
 | data_source_type | string |  | No |
 | dataset_process_rule_id | string |  | No |
 | disabled_at | integer |  | No |
 | disabled_by | string |  | No |
 | display_status | string |  | No |
 | doc_form | string |  | No |
-| doc_metadata_details | [ [DocumentMetadataResponse](#documentmetadataresponse) ] |  | No |
+| doc_metadata | [ [DocumentMetadataResponse](#documentmetadataresponse) ] |  | No |
 | enabled | boolean |  | No |
 | error | string |  | No |
 | hit_count | integer |  | No |
@@ -14000,7 +14010,7 @@ Enum class for large language model mode.
 | ---- | ---- | ----------- | -------- |
 | id | string |  | Yes |
 | name | string |  | Yes |
-| value |  |  | No |
+| value | string<br>integer<br>number |  | No |

 #### MetadataFilteringCondition

@@ -14595,7 +14605,7 @@ Form input definition.
 | ---- | ---- | ----------- | -------- |
 | current_identifier | string |  | No |
 | type | [Type](#type) |  | Yes |
-| value |  |  | Yes |
+| value | [Github](#github)<br>[Marketplace](#marketplace)<br>[Package](#package) |  | Yes |

 #### PluginEndpointListResponse

@@ -15130,7 +15140,7 @@ Form input definition.
 | description | string |  | No |
 | icon | string |  | No |
 | icon_background | string |  | No |
-| icon_type |  |  | No |
+| icon_type | string<br>[IconType](#icontype) |  | No |
 | privacy_policy | string |  | No |
 | prompt_public | boolean |  | No |
 | show_workflow_steps | boolean |  | No |
@@ -753,15 +753,17 @@ Create a new document by uploading a file

 | Name | Located in | Description | Required | Schema |
 | ---- | ---------- | ----------- | -------- | ------ |
+| data | formData | Optional JSON string with document creation settings. | No | string |
+| file | formData | Document file to upload. | Yes | file |
 | dataset_id | path | Dataset ID | Yes | string |

 ##### Responses

-| Code | Description |
-| ---- | ----------- |
-| 200 | Document created successfully |
-| 400 | Bad request - invalid file or parameters |
-| 401 | Unauthorized - invalid API token |
+| Code | Description | Schema |
+| ---- | ----------- | ------ |
+| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
+| 400 | Bad request - invalid file or parameters |  |
+| 401 | Unauthorized - invalid API token |  |

 ### /datasets/{dataset_id}/document/create-by-text

@@ -779,11 +781,11 @@ Create a new document by providing text content

 ##### Responses

-| Code | Description |
-| ---- | ----------- |
-| 200 | Document created successfully |
-| 400 | Bad request - invalid parameters |
-| 401 | Unauthorized - invalid API token |
+| Code | Description | Schema |
+| ---- | ----------- | ------ |
+| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
+| 400 | Bad request - invalid parameters |  |
+| 401 | Unauthorized - invalid API token |  |

 ### /datasets/{dataset_id}/document/create_by_file

@@ -796,15 +798,17 @@ Create a new document by uploading a file

 | Name | Located in | Description | Required | Schema |
 | ---- | ---------- | ----------- | -------- | ------ |
+| data | formData | Optional JSON string with document creation settings. | No | string |
+| file | formData | Document file to upload. | Yes | file |
 | dataset_id | path | Dataset ID | Yes | string |

 ##### Responses

-| Code | Description |
-| ---- | ----------- |
-| 200 | Document created successfully |
-| 400 | Bad request - invalid file or parameters |
-| 401 | Unauthorized - invalid API token |
+| Code | Description | Schema |
+| ---- | ----------- | ------ |
+| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
+| 400 | Bad request - invalid file or parameters |  |
+| 401 | Unauthorized - invalid API token |  |

 ### /datasets/{dataset_id}/document/create_by_text

@@ -823,11 +827,11 @@ Deprecated legacy alias for creating a new document by providing text content. U

 ##### Responses

-| Code | Description |
-| ---- | ----------- |
-| 200 | Document created successfully |
-| 400 | Bad request - invalid parameters |
-| 401 | Unauthorized - invalid API token |
+| Code | Description | Schema |
+| ---- | ----------- | ------ |
+| 200 | Document created successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
+| 400 | Bad request - invalid parameters |  |
+| 401 | Unauthorized - invalid API token |  |

 ### /datasets/{dataset_id}/documents

@@ -841,14 +845,18 @@ List all documents in a dataset
 | Name | Located in | Description | Required | Schema |
 | ---- | ---------- | ----------- | -------- | ------ |
 | dataset_id | path | Dataset ID | Yes | string |
+| keyword | query | Search keyword | No | string |
+| limit | query | Number of items per page | No | integer |
+| page | query | Page number | No | integer |
+| status | query | Document status filter | No | string |

 ##### Responses

-| Code | Description |
-| ---- | ----------- |
-| 200 | Documents retrieved successfully |
-| 401 | Unauthorized - invalid API token |
-| 404 | Dataset not found |
+| Code | Description | Schema |
+| ---- | ----------- | ------ |
+| 200 | Documents retrieved successfully | [DocumentListResponse](#documentlistresponse) |
+| 401 | Unauthorized - invalid API token |  |
+| 404 | Dataset not found |  |

 ### /datasets/{dataset_id}/documents/download-zip

@@ -956,11 +964,11 @@ Get indexing status for documents in a batch

 ##### Responses

-| Code | Description |
-| ---- | ----------- |
-| 200 | Indexing status retrieved successfully |
-| 401 | Unauthorized - invalid API token |
-| 404 | Dataset or documents not found |
+| Code | Description | Schema |
+| ---- | ----------- | ------ |
+| 200 | Indexing status retrieved successfully | [DocumentStatusListResponse](#documentstatuslistresponse) |
+| 401 | Unauthorized - invalid API token |  |
+| 404 | Dataset or documents not found |  |

 ### /datasets/{dataset_id}/documents/{document_id}

@@ -1019,16 +1027,18 @@ Update an existing document by uploading a file

 | Name | Located in | Description | Required | Schema |
 | ---- | ---------- | ----------- | -------- | ------ |
+| data | formData | Optional JSON string with document update settings. | No | string |
+| file | formData | Replacement document file. | No | file |
 | dataset_id | path | Dataset ID | Yes | string |
 | document_id | path | Document ID | Yes | string |

 ##### Responses

-| Code | Description |
-| ---- | ----------- |
-| 200 | Document updated successfully |
-| 401 | Unauthorized - invalid API token |
-| 404 | Document not found |
+| Code | Description | Schema |
+| ---- | ----------- | ------ |
+| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
+| 401 | Unauthorized - invalid API token |  |
+| 404 | Document not found |  |

 ### /datasets/{dataset_id}/documents/{document_id}/download

@@ -1274,16 +1284,18 @@ Deprecated legacy alias for updating an existing document by uploading a file. U

 | Name | Located in | Description | Required | Schema |
 | ---- | ---------- | ----------- | -------- | ------ |
+| data | formData | Optional JSON string with document update settings. | No | string |
+| file | formData | Replacement document file. | No | file |
 | dataset_id | path | Dataset ID | Yes | string |
 | document_id | path | Document ID | Yes | string |

 ##### Responses

-| Code | Description |
-| ---- | ----------- |
-| 200 | Document updated successfully |
-| 401 | Unauthorized - invalid API token |
-| 404 | Document not found |
+| Code | Description | Schema |
+| ---- | ----------- | ------ |
+| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
+| 401 | Unauthorized - invalid API token |  |
+| 404 | Document not found |  |

 ### /datasets/{dataset_id}/documents/{document_id}/update-by-text

@@ -1302,11 +1314,11 @@ Update an existing document by providing text content

 ##### Responses

-| Code | Description |
-| ---- | ----------- |
-| 200 | Document updated successfully |
-| 401 | Unauthorized - invalid API token |
-| 404 | Document not found |
+| Code | Description | Schema |
+| ---- | ----------- | ------ |
+| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
+| 401 | Unauthorized - invalid API token |  |
+| 404 | Document not found |  |

 ### /datasets/{dataset_id}/documents/{document_id}/update_by_file

@@ -1320,16 +1332,18 @@ Deprecated legacy alias for updating an existing document by uploading a file. U

 | Name | Located in | Description | Required | Schema |
 | ---- | ---------- | ----------- | -------- | ------ |
+| data | formData | Optional JSON string with document update settings. | No | string |
+| file | formData | Replacement document file. | No | file |
 | dataset_id | path | Dataset ID | Yes | string |
 | document_id | path | Document ID | Yes | string |

 ##### Responses

-| Code | Description |
-| ---- | ----------- |
-| 200 | Document updated successfully |
-| 401 | Unauthorized - invalid API token |
-| 404 | Document not found |
+| Code | Description | Schema |
+| ---- | ----------- | ------ |
+| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
+| 401 | Unauthorized - invalid API token |  |
+| 404 | Document not found |  |

 ### /datasets/{dataset_id}/documents/{document_id}/update_by_text

@@ -1349,11 +1363,11 @@ Deprecated legacy alias for updating an existing document by providing text cont

 ##### Responses

-| Code | Description |
-| ---- | ----------- |
-| 200 | Document updated successfully |
-| 401 | Unauthorized - invalid API token |
-| 404 | Document not found |
+| Code | Description | Schema |
+| ---- | ----------- | ------ |
+| 200 | Document updated successfully | [DocumentAndBatchResponse](#documentandbatchresponse) |
+| 401 | Unauthorized - invalid API token |  |
+| 404 | Document not found |  |

 ### /datasets/{dataset_id}/hit-testing

@@ -2288,7 +2302,7 @@ Condition detail
 | ---- | ---- | ----------- | -------- |
 | comparison_operator | string | *Enum:* `"<"`, `"="`, `">"`, `"after"`, `"before"`, `"contains"`, `"empty"`, `"end with"`, `"in"`, `"is"`, `"is not"`, `"not contains"`, `"not empty"`, `"not in"`, `"start with"`, `"≠"`, `"≤"`, `"≥"` | Yes |
 | name | string |  | Yes |
-| value |  |  | No |
+| value | string<br>[ string ]<br>integer<br>number |  | No |

 #### ConversationListQuery

@@ -2637,6 +2651,13 @@ Condition detail
 | inputs | object |  | Yes |
 | is_published | boolean |  | Yes |

+#### DocumentAndBatchResponse
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| batch | string |  | Yes |
+| document | [DocumentResponse](#documentresponse) |  | Yes |
+
 #### DocumentBatchDownloadZipPayload

 Request payload for bulk downloading documents as a zip archive.
@@ -2654,6 +2675,16 @@ Request payload for bulk downloading documents as a zip archive.
 | page | integer | Page number | No |
 | status | string | Document status filter | No |

+#### DocumentListResponse
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| data | [ [DocumentResponse](#documentresponse) ] |  | Yes |
+| has_more | boolean |  | Yes |
+| limit | integer |  | Yes |
+| page | integer |  | Yes |
+| total | integer |  | Yes |
+
 #### DocumentMetadataOperation

 | Name | Type | Description | Required |
@@ -2662,6 +2693,67 @@ Request payload for bulk downloading documents as a zip archive.
 | metadata_list | [ [MetadataDetail](#metadatadetail) ] |  | Yes |
 | partial_update | boolean |  | No |

+#### DocumentMetadataResponse
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| id | string |  | Yes |
+| name | string |  | Yes |
+| type | string |  | Yes |
+| value | string<br>integer<br>number<br>boolean |  | No |
+
+#### DocumentResponse
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| archived | boolean |  | No |
+| created_at | integer |  | No |
+| created_by | string |  | No |
+| created_from | string |  | No |
+| data_source_detail_dict |  |  | No |
+| data_source_info |  |  | No |
+| data_source_type | string |  | No |
+| dataset_process_rule_id | string |  | No |
+| disabled_at | integer |  | No |
+| disabled_by | string |  | No |
+| display_status | string |  | No |
+| doc_form | string |  | No |
+| doc_metadata | [ [DocumentMetadataResponse](#documentmetadataresponse) ] |  | No |
+| enabled | boolean |  | No |
+| error | string |  | No |
+| hit_count | integer |  | No |
+| id | string |  | Yes |
+| indexing_status | string |  | No |
+| name | string |  | Yes |
+| need_summary | boolean |  | No |
+| position | integer |  | No |
+| summary_index_status | string |  | No |
+| tokens | integer |  | No |
+| word_count | integer |  | No |
+
+#### DocumentStatusListResponse
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| data | [ [DocumentStatusResponse](#documentstatusresponse) ] |  | Yes |
+
+#### DocumentStatusResponse
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| cleaning_completed_at | integer |  | Yes |
+| completed_at | integer |  | Yes |
+| completed_segments | integer |  | No |
+| error | string |  | Yes |
+| id | string |  | Yes |
+| indexing_status | string |  | Yes |
+| parsing_completed_at | integer |  | Yes |
+| paused_at | integer |  | Yes |
+| processing_started_at | integer |  | Yes |
+| splitting_completed_at | integer |  | Yes |
+| stopped_at | integer |  | Yes |
+| total_segments | integer |  | No |
+
 #### DocumentTextCreatePayload

 | Name | Type | Description | Required |
@@ -2896,7 +2988,7 @@ Note: The SQLAlchemy model defines an `is_anonymous` property for Flask-Login se
 | ---- | ---- | ----------- | -------- |
 | id | string |  | Yes |
 | name | string |  | Yes |
-| value |  |  | No |
+| value | string<br>integer<br>number |  | No |

 #### MetadataFilteringCondition

@@ -3247,7 +3339,7 @@ Accept the legacy single-tag Service API payload while exposing a normalized tag
 | created_by_end_user | [SimpleEndUser](#simpleenduser) |  | No |
 | created_by_role | string |  | No |
 | created_from | string |  | No |
-| details |  |  | No |
+| details | object<br>[ object ]<br>string<br>integer<br>number<br>boolean |  | No |
 | id | string |  | Yes |
 | workflow_run | [WorkflowRunForLogResponse](#workflowrunforlogresponse) |  | No |

@@ -3269,7 +3361,7 @@ Accept the legacy single-tag Service API payload while exposing a normalized tag
 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
 | created_at | integer |  | No |
-| elapsed_time |  |  | No |
+| elapsed_time | number<br>integer |  | No |
 | error | string |  | No |
 | exceptions_count | integer |  | No |
 | finished_at | integer |  | No |
@@ -3293,11 +3385,11 @@ Accept the legacy single-tag Service API payload while exposing a normalized tag
 | Name | Type | Description | Required |
 | ---- | ---- | ----------- | -------- |
 | created_at | integer |  | No |
-| elapsed_time |  |  | No |
+| elapsed_time | number<br>integer |  | No |
 | error | string |  | No |
 | finished_at | integer |  | No |
 | id | string |  | Yes |
-| inputs |  |  | No |
+| inputs | object<br>[ object ]<br>string<br>integer<br>number<br>boolean |  | No |
 | outputs | object |  | No |
 | status | string |  | Yes |
 | total_steps | integer |  | No |
@@ -188,6 +188,45 @@ def test_patch_union_schema_markdown_fills_converter_blank_schema_types(tmp_path
    assert "| allowed_file_types | [ [FileType](#filetype) ] |  | No |" in patched


+def test_patch_union_schema_markdown_fills_regular_definition_union_property(tmp_path):
+    module = _load_generate_swagger_markdown_docs_module()
+    spec_path = tmp_path / "service-swagger.json"
+    spec_path.write_text(
+        json.dumps(
+            {
+                "definitions": {
+                    "DocumentMetadataResponse": {
+                        "properties": {
+                            "id": {"type": "string"},
+                            "value": {
+                                "anyOf": [
+                                    {"type": "string"},
+                                    {"type": "integer"},
+                                    {"type": "number"},
+                                    {"type": "boolean"},
+                                    {"type": "null"},
+                                ],
+                            },
+                        },
+                    },
+                }
+            }
+        ),
+        encoding="utf-8",
+    )
+    markdown = """#### DocumentMetadataResponse
+
+| Name | Type | Description | Required |
+| ---- | ---- | ----------- | -------- |
+| id | string |  | Yes |
+| value | string |  | No |
+"""
+
+    patched = module._patch_union_schema_markdown(markdown, spec_path)
+
+    assert "| value | string<br>integer<br>number<br>boolean |  | No |" in patched
+
+
 def test_patch_union_schema_markdown_ignores_specs_without_definitions(tmp_path):
    module = _load_generate_swagger_markdown_docs_module()
    spec_path = tmp_path / "console-swagger.json"
@@ -236,7 +275,7 @@ def test_patch_union_schema_markdown_ignores_unrenderable_shapes(tmp_path):
        == "#### Definition\n| field |"
    )

-    assert module._patch_union_schema_markdown("#### BrokenUnion\n", spec_path) == "#### BrokenUnion"
+    assert module._patch_union_schema_markdown("#### BrokenUnion\n", spec_path) == "#### BrokenUnion\n"


 def test_convert_spec_to_markdown_patches_generated_union_tables(tmp_path, monkeypatch):
@@ -1,4 +1,3 @@
-from types import SimpleNamespace
 from unittest.mock import MagicMock, patch

 import pytest
@@ -9,6 +8,7 @@ import services
 from controllers.console import console_ns
 from controllers.console.datasets.datasets_document import (
    DatasetDocumentListApi,
+    DatasetInitApi,
    DocumentApi,
    DocumentBatchDownloadZipApi,
    DocumentBatchIndexingEstimateApi,
@@ -20,6 +20,7 @@ from controllers.console.datasets.datasets_document import (
    DocumentMetadataApi,
    DocumentPipelineExecutionLogApi,
    DocumentProcessingApi,
+    DocumentRenameApi,
    DocumentRetryApi,
    DocumentStatusApi,
    DocumentSummaryStatusApi,
@@ -33,7 +34,9 @@ from controllers.console.datasets.error import (
    InvalidMetadataError,
 )
 from core.rag.index_processor.constant.index_type import IndexStructureType
-from models.enums import DataSourceType, IndexingStatus
+from models.dataset import Dataset
+from models.dataset import Document as DatasetDocument
+from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus


 def unwrap(func):
@@ -42,6 +45,79 @@ def unwrap(func):
    return func


+def make_serializable_document(**overrides):
+    attrs = {
+        "id": "doc-1",
+        "position": 1,
+        "data_source_type": "upload_file",
+        "data_source_info_dict": {"upload_file_id": "file-1"},
+        "data_source_detail_dict": {},
+        "dataset_process_rule_id": None,
+        "name": "Document",
+        "created_from": "web",
+        "created_by": "u1",
+        "created_at": None,
+        "tokens": None,
+        "indexing_status": "completed",
+        "error": None,
+        "enabled": True,
+        "disabled_at": None,
+        "disabled_by": None,
+        "archived": False,
+        "display_status": "available",
+        "word_count": None,
+        "hit_count": 0,
+        "doc_form": "text_model",
+        "doc_metadata_details": None,
+        "summary_index_status": None,
+        "need_summary": False,
+        "process_rule_dict": None,
+        "completed_segments": None,
+        "total_segments": None,
+    }
+    attrs.update(overrides)
+    document = MagicMock(spec_set=list(attrs))
+    for name, value in attrs.items():
+        setattr(document, name, value)
+    return document
+
+
+def make_dataset(**overrides):
+    attrs = {
+        "id": "ds-1",
+        "tenant_id": "tenant-1",
+        "name": "Dataset",
+        "indexing_technique": "economy",
+        "created_by": "u1",
+        "summary_index_setting": {"enable": True},
+    }
+    attrs.update(overrides)
+    return Dataset(**attrs)
+
+
+def make_document(**overrides):
+    attrs = {
+        "id": "doc-1",
+        "tenant_id": "tenant-1",
+        "dataset_id": "ds-1",
+        "position": 1,
+        "data_source_type": DataSourceType.UPLOAD_FILE,
+        "data_source_info": None,
+        "batch": "batch-1",
+        "name": "Document",
+        "created_from": DocumentCreatedFrom.WEB,
+        "created_by": "u1",
+        "indexing_status": IndexingStatus.COMPLETED,
+        "enabled": True,
+        "archived": False,
+        "doc_metadata": None,
+        "doc_form": IndexStructureType.PARAGRAPH_INDEX,
+        "need_summary": False,
+    }
+    attrs.update(overrides)
+    return DatasetDocument(**attrs)
+
+
@pytest.fixture
 def tenant_ctx():
    return (MagicMock(is_dataset_editor=True, id="u1"), "tenant-1")
@@ -58,7 +134,7 @@ def patch_tenant(tenant_ctx):

@pytest.fixture
 def dataset():
-    return MagicMock(id="ds-1", indexing_technique="economy", summary_index_setting={"enable": True})
+    return make_dataset()


@pytest.fixture
@@ -130,11 +206,9 @@ class TestDatasetDocumentListApi:
        api = DatasetDocumentListApi()
        method = unwrap(api.get)

-        doc = MagicMock(id="doc-1")
+        doc = make_serializable_document()
        pagination = MagicMock(items=[doc], total=1)

-        count_mock = MagicMock(return_value=2)
-
        with (
            app.test_request_context("/?fetch=true"),
            patch(
@@ -149,14 +223,12 @@ class TestDatasetDocumentListApi:
                "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status",
                return_value=None,
            ),
-            patch(
-                "controllers.console.datasets.datasets_document.marshal",
-                return_value=[{"id": "doc-1"}],
-            ),
        ):
            resp = method(api, "ds-1")

-        assert resp["data"]
+        assert resp["data"][0]["id"] == "doc-1"
+        assert resp["data"][0]["completed_segments"] == 2
+        assert resp["data"][0]["total_segments"] == 2

    def test_get_with_search_status_and_created_at_sort(
        self, app: Flask, patch_tenant, patch_dataset, patch_permission
@@ -164,7 +236,7 @@ class TestDatasetDocumentListApi:
        api = DatasetDocumentListApi()
        method = unwrap(api.get)

-        pagination = MagicMock(items=[MagicMock()], total=1)
+        pagination = MagicMock(items=[make_serializable_document()], total=1)

        with (
            app.test_request_context("/?keyword=test&status=enabled&sort=created_at"),
@@ -180,10 +252,6 @@ class TestDatasetDocumentListApi:
                "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status",
                return_value=None,
            ),
-            patch(
-                "controllers.console.datasets.datasets_document.marshal",
-                return_value=[{"id": "doc-1"}],
-            ),
        ):
            resp = method(api, "ds-1")

@@ -193,7 +261,7 @@ class TestDatasetDocumentListApi:
        api = DatasetDocumentListApi()
        method = unwrap(api.get)

-        pagination = MagicMock(items=[MagicMock()], total=1)
+        pagination = MagicMock(items=[make_serializable_document()], total=1)

        with (
            app.test_request_context("/"),
@@ -205,22 +273,21 @@ class TestDatasetDocumentListApi:
                "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status",
                return_value=None,
            ),
-            patch(
-                "controllers.console.datasets.datasets_document.marshal",
-                return_value=[{"id": "doc-1"}],
-            ),
        ):
            response = method(api, "ds-1")

        assert response["total"] == 1
+        assert response["data"][0]["id"] == "doc-1"
+        assert "completed_segments" not in response["data"][0]
+        assert "total_segments" not in response["data"][0]

    def test_post_success(self, app: Flask, patch_tenant, patch_dataset, patch_permission):
        api = DatasetDocumentListApi()
        method = unwrap(api.post)

        payload = {"indexing_technique": "economy"}
-        created_dataset = SimpleNamespace(id="ds-1", name="Dataset", indexing_technique="economy")
-        created_document = SimpleNamespace(id="doc-1", name="Document", doc_metadata_details=None)
+        created_dataset = make_dataset()
+        created_document = make_document()

        with (
            app.test_request_context("/", json=payload),
@@ -237,10 +304,17 @@ class TestDatasetDocumentListApi:
                "controllers.console.datasets.datasets_document.DocumentService.save_document_with_dataset_id",
                return_value=([created_document], "batch-1"),
            ),
+            patch("models.dataset.db.session.scalar", return_value=0),
        ):
            response = method(api, "ds-1")

        assert "documents" in response
+        assert response["dataset"]["id"] == "ds-1"
+        assert response["documents"][0]["id"] == "doc-1"
+        assert response["documents"][0]["data_source_info"] == {}
+        assert response["documents"][0]["doc_metadata"] == []
+        assert "data_source_info_dict" not in response["documents"][0]
+        assert "doc_metadata_details" not in response["documents"][0]

    def test_post_forbidden(self, app: Flask):
        api = DatasetDocumentListApi()
@@ -267,7 +341,7 @@ class TestDatasetDocumentListApi:
        api = DatasetDocumentListApi()
        method = unwrap(api.get)

-        pagination = MagicMock(items=[MagicMock()], total=1)
+        pagination = MagicMock(items=[make_serializable_document()], total=1)

        with (
            app.test_request_context("/?fetch=maybe"),
@@ -279,10 +353,6 @@ class TestDatasetDocumentListApi:
                "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status",
                return_value=None,
            ),
-            patch(
-                "controllers.console.datasets.datasets_document.marshal",
-                return_value=[{"id": "doc-1"}],
-            ),
        ):
            response = method(api, "ds-1")

@@ -310,6 +380,37 @@ class TestDatasetDocumentListApi:
        assert response["total"] == 0


+class TestDatasetInitApi:
+    def test_post_success_serializes_created_dataset_and_documents(self, app: Flask, patch_tenant):
+        api = DatasetInitApi()
+        method = unwrap(api.post)
+
+        payload = {"indexing_technique": "economy"}
+        created_dataset = make_dataset()
+        created_document = make_document(id="doc-init")
+
+        with (
+            app.test_request_context("/", json=payload),
+            patch.object(type(console_ns), "payload", payload),
+            patch(
+                "controllers.console.datasets.datasets_document.DocumentService.document_create_args_validate",
+                return_value=None,
+            ),
+            patch(
+                "controllers.console.datasets.datasets_document.DocumentService.save_document_without_dataset_id",
+                return_value=(created_dataset, [created_document], "batch-init"),
+            ),
+            patch("models.dataset.db.session.scalar", return_value=0),
+        ):
+            response = method(api)
+
+        assert response["dataset"]["id"] == "ds-1"
+        assert response["documents"][0]["id"] == "doc-init"
+        assert response["documents"][0]["data_source_info"] == {}
+        assert response["documents"][0]["doc_metadata"] == []
+        assert response["batch"] == "batch-init"
+
+
 class TestDocumentApi:
    def test_get_success(self, app: Flask, patch_tenant):
        api = DocumentApi()
@@ -899,7 +1000,7 @@ class TestDocumentBatchDownloadZipApi:
        api = DocumentBatchDownloadZipApi()
        method = unwrap(api.post)

-        payload = {"document_ids": []}
+        payload: dict[str, list[str]] = {"document_ids": []}

        with app.test_request_context("/", json=payload), patch.object(type(console_ns), "payload", payload):
            with pytest.raises(ValueError):
@@ -1046,6 +1147,53 @@ class TestDocumentBatchIndexingEstimateApi:


 class TestDocumentBatchIndexingStatusApi:
+    def test_get_batch_status_success_serializes_status_shape(self, app: Flask, patch_tenant):
+        api = DocumentBatchIndexingStatusApi()
+        method = unwrap(api.get)
+
+        document = MagicMock(
+            id="doc-1",
+            indexing_status=IndexingStatus.COMPLETED,
+            is_paused=False,
+            processing_started_at=None,
+            parsing_completed_at=None,
+            cleaning_completed_at=None,
+            splitting_completed_at=None,
+            completed_at=None,
+            paused_at=None,
+            error=None,
+            stopped_at=None,
+        )
+
+        with (
+            app.test_request_context("/"),
+            patch.object(api, "get_batch_documents", return_value=[document]),
+            patch(
+                "controllers.console.datasets.datasets_document.db.session.scalar",
+                side_effect=[2, 3],
+            ),
+        ):
+            response = method(api, "ds-1", "batch-1")
+
+        assert response == {
+            "data": [
+                {
+                    "id": "doc-1",
+                    "indexing_status": "completed",
+                    "processing_started_at": None,
+                    "parsing_completed_at": None,
+                    "cleaning_completed_at": None,
+                    "splitting_completed_at": None,
+                    "completed_at": None,
+                    "paused_at": None,
+                    "error": None,
+                    "stopped_at": None,
+                    "completed_segments": 2,
+                    "total_segments": 3,
+                }
+            ]
+        }
+
    def test_get_batch_status_invalid_batch(self, app: Flask, patch_tenant):
        """Test batch status with invalid batch"""
        api = DocumentBatchIndexingStatusApi()
@@ -1057,6 +1205,39 @@ class TestDocumentBatchIndexingStatusApi:


 class TestDocumentIndexingStatusApi:
+    def test_get_status_success_serializes_status_shape(self, app: Flask, patch_tenant):
+        api = DocumentIndexingStatusApi()
+        method = unwrap(api.get)
+
+        document = MagicMock(
+            id="doc-1",
+            indexing_status=IndexingStatus.INDEXING,
+            is_paused=False,
+            processing_started_at=None,
+            parsing_completed_at=None,
+            cleaning_completed_at=None,
+            splitting_completed_at=None,
+            completed_at=None,
+            paused_at=None,
+            error=None,
+            stopped_at=None,
+        )
+
+        with (
+            app.test_request_context("/"),
+            patch.object(api, "get_document", return_value=document),
+            patch(
+                "controllers.console.datasets.datasets_document.db.session.scalar",
+                side_effect=[1, 4],
+            ),
+        ):
+            response = method(api, "ds-1", "doc-1")
+
+        assert response["id"] == "doc-1"
+        assert response["indexing_status"] == "indexing"
+        assert response["completed_segments"] == 1
+        assert response["total_segments"] == 4
+
    def test_get_status_document_not_found(self, app: Flask, patch_tenant):
        """Test getting status for non-existent document"""
        api = DocumentIndexingStatusApi()
@@ -1067,6 +1248,40 @@ class TestDocumentIndexingStatusApi:
                method(api, "ds-1", "invalid-doc")


+class TestDocumentRenameApi:
+    def test_post_success_serializes_document_shape(self, app: Flask, patch_tenant):
+        api = DocumentRenameApi()
+        method = unwrap(api.post)
+
+        payload = {"name": "Renamed Document"}
+        renamed_document = make_document(id="doc-renamed", name="Renamed Document")
+
+        with (
+            app.test_request_context("/", json=payload),
+            patch.object(type(console_ns), "payload", payload),
+            patch(
+                "controllers.console.datasets.datasets_document.DatasetService.get_dataset",
+                return_value=make_dataset(),
+            ),
+            patch(
+                "controllers.console.datasets.datasets_document.DatasetService.check_dataset_operator_permission",
+                return_value=None,
+            ),
+            patch(
+                "controllers.console.datasets.datasets_document.DocumentService.rename_document",
+                return_value=renamed_document,
+            ),
+            patch("models.dataset.db.session.scalar", return_value=0),
+        ):
+            response = method(api, "ds-1", "doc-1")
+
+        assert response["id"] == "doc-renamed"
+        assert response["name"] == "Renamed Document"
+        assert response["data_source_info"] == {}
+        assert response["doc_metadata"] == []
+        assert "data_source_info_dict" not in response
+
+
 class TestDocumentApiMetadata:
    def test_get_with_only_option(self, app: Flask, patch_tenant):
        """Test get with 'only' metadata option"""
@@ -1291,7 +1506,7 @@ class TestDocumentListAdvancedCases:
        api = DatasetDocumentListApi()
        method = unwrap(api.get)

-        pagination = MagicMock(items=[MagicMock()], total=1)
+        pagination = MagicMock(items=[make_serializable_document()], total=1)

        with (
            app.test_request_context("/?sort=updated_at"),
@@ -1303,10 +1518,6 @@ class TestDocumentListAdvancedCases:
                "controllers.console.datasets.datasets_document.DocumentService.enrich_documents_with_summary_index_status",
                return_value=None,
            ),
-            patch(
-                "controllers.console.datasets.datasets_document.marshal",
-                return_value=[{"id": "doc-1"}],
-            ),
        ):
            response = method(api, "ds-1")

@@ -44,6 +44,41 @@ from services.dataset_service import DocumentService
 from services.entities.knowledge_entities.knowledge_entities import ProcessRule, RetrievalModel


+def make_serializable_document(**overrides: object) -> Mock:
+    attrs: dict[str, object] = {
+        "id": str(uuid.uuid4()),
+        "position": 1,
+        "data_source_type": "upload_file",
+        "data_source_info_dict": {"upload_file_id": "file-1"},
+        "data_source_detail_dict": {},
+        "dataset_process_rule_id": None,
+        "batch": "batch-1",
+        "name": "Test Document",
+        "created_from": "api",
+        "created_by": "user-1",
+        "created_at": None,
+        "tokens": None,
+        "indexing_status": "completed",
+        "error": None,
+        "enabled": True,
+        "disabled_at": None,
+        "disabled_by": None,
+        "archived": False,
+        "display_status": "available",
+        "word_count": None,
+        "hit_count": 0,
+        "doc_form": "text_model",
+        "doc_metadata_details": None,
+        "summary_index_status": None,
+        "need_summary": False,
+    }
+    attrs.update(overrides)
+    document = Mock(spec_set=list(attrs))
+    for name, value in attrs.items():
+        setattr(document, name, value)
+    return document
+
+
 class TestDocumentTextCreatePayload:
    """Test suite for DocumentTextCreatePayload Pydantic model."""

@@ -226,7 +261,7 @@ class TestDocumentService:
        assert hasattr(DocumentService, "batch_update_document_status")

    @patch.object(DocumentService, "get_document")
-    def test_get_document_returns_document(self, mock_get):
+    def test_get_document_returns_document(self, mock_get: Mock) -> None:
        """Test get_document returns document object."""
        mock_doc = Mock()
        mock_doc.id = str(uuid.uuid4())
@@ -235,6 +270,7 @@ class TestDocumentService:
        mock_get.return_value = mock_doc

        result = DocumentService.get_document(dataset_id="dataset_id", document_id="doc_id")
+        assert result is not None
        assert result.name == "Test Document"
        assert result.indexing_status == "completed"

@@ -510,7 +546,7 @@ class TestDocumentApiGet:
    """

    @pytest.fixture
-    def mock_doc_detail(self, mock_tenant):
+    def mock_doc_detail(self, mock_tenant: Mock) -> Mock:
        """A document mock with every attribute ``DocumentApi.get`` reads."""
        doc = Mock()
        doc.id = str(uuid.uuid4())
@@ -551,8 +587,8 @@ class TestDocumentApiGet:
    @patch("controllers.service_api.dataset.document.DatasetService")
    @patch("controllers.service_api.dataset.document.DocumentService")
    def test_get_document_success_with_all_metadata(
-        self, mock_doc_svc, mock_dataset_svc, app: Flask, mock_tenant, mock_doc_detail
-    ):
+        self, mock_doc_svc: Mock, mock_dataset_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock
+    ) -> None:
        """Test successful document retrieval with metadata='all'."""
        # Arrange
        dataset_id = str(uuid.uuid4())
@@ -569,8 +605,8 @@ class TestDocumentApiGet:
            method="GET",
        ):
            api = DocumentApi()
-            api.get_dataset = Mock(return_value=mock_dataset)
-            response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
+            with patch.object(api, "get_dataset", return_value=mock_dataset):
+                response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)

        # Assert
        assert response["id"] == mock_doc_detail.id
@@ -580,7 +616,7 @@ class TestDocumentApiGet:
        assert "doc_metadata" in response

    @patch("controllers.service_api.dataset.document.DocumentService")
-    def test_get_document_not_found(self, mock_doc_svc, app: Flask, mock_tenant):
+    def test_get_document_not_found(self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock) -> None:
        """Test 404 when document is not found."""
        # Arrange
        dataset_id = str(uuid.uuid4())
@@ -595,12 +631,14 @@ class TestDocumentApiGet:
            method="GET",
        ):
            api = DocumentApi()
-            api.get_dataset = Mock(return_value=mock_dataset)
-            with pytest.raises(NotFound):
-                api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id="nonexistent")
+            with patch.object(api, "get_dataset", return_value=mock_dataset):
+                with pytest.raises(NotFound):
+                    api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id="nonexistent")

    @patch("controllers.service_api.dataset.document.DocumentService")
-    def test_get_document_forbidden_wrong_tenant(self, mock_doc_svc, app: Flask, mock_tenant, mock_doc_detail):
+    def test_get_document_forbidden_wrong_tenant(
+        self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock
+    ) -> None:
        """Test 403 when document tenant doesn't match request tenant."""
        # Arrange
        dataset_id = str(uuid.uuid4())
@@ -616,12 +654,14 @@ class TestDocumentApiGet:
            method="GET",
        ):
            api = DocumentApi()
-            api.get_dataset = Mock(return_value=mock_dataset)
-            with pytest.raises(Forbidden):
-                api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
+            with patch.object(api, "get_dataset", return_value=mock_dataset):
+                with pytest.raises(Forbidden):
+                    api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)

    @patch("controllers.service_api.dataset.document.DocumentService")
-    def test_get_document_metadata_only(self, mock_doc_svc, app: Flask, mock_tenant, mock_doc_detail):
+    def test_get_document_metadata_only(
+        self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock
+    ) -> None:
        """Test document retrieval with metadata='only'."""
        # Arrange
        dataset_id = str(uuid.uuid4())
@@ -637,8 +677,8 @@ class TestDocumentApiGet:
            method="GET",
        ):
            api = DocumentApi()
-            api.get_dataset = Mock(return_value=mock_dataset)
-            response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
+            with patch.object(api, "get_dataset", return_value=mock_dataset):
+                response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)

        # Assert — metadata='only' returns only id, doc_type, doc_metadata
        assert response["id"] == mock_doc_detail.id
@@ -649,8 +689,8 @@ class TestDocumentApiGet:
    @patch("controllers.service_api.dataset.document.DatasetService")
    @patch("controllers.service_api.dataset.document.DocumentService")
    def test_get_document_metadata_without(
-        self, mock_doc_svc, mock_dataset_svc, app: Flask, mock_tenant, mock_doc_detail
-    ):
+        self, mock_doc_svc: Mock, mock_dataset_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock
+    ) -> None:
        """Test document retrieval with metadata='without'."""
        # Arrange
        dataset_id = str(uuid.uuid4())
@@ -667,8 +707,8 @@ class TestDocumentApiGet:
            method="GET",
        ):
            api = DocumentApi()
-            api.get_dataset = Mock(return_value=mock_dataset)
-            response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
+            with patch.object(api, "get_dataset", return_value=mock_dataset):
+                response = api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)

        # Assert — metadata='without' omits doc_type / doc_metadata
        assert response["id"] == mock_doc_detail.id
@@ -677,7 +717,9 @@ class TestDocumentApiGet:
        assert "name" in response

    @patch("controllers.service_api.dataset.document.DocumentService")
-    def test_get_document_invalid_metadata_value(self, mock_doc_svc, app: Flask, mock_tenant, mock_doc_detail):
+    def test_get_document_invalid_metadata_value(
+        self, mock_doc_svc: Mock, app: Flask, mock_tenant: Mock, mock_doc_detail: Mock
+    ) -> None:
        """Test error when metadata parameter has invalid value."""
        # Arrange
        dataset_id = str(uuid.uuid4())
@@ -693,9 +735,9 @@ class TestDocumentApiGet:
            method="GET",
        ):
            api = DocumentApi()
-            api.get_dataset = Mock(return_value=mock_dataset)
-            with pytest.raises(InvalidMetadataError):
-                api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)
+            with patch.object(api, "get_dataset", return_value=mock_dataset):
+                with pytest.raises(InvalidMetadataError):
+                    api.get(tenant_id=mock_tenant.id, dataset_id=dataset_id, document_id=mock_doc_detail.id)


 class TestDocumentApiDelete:
@@ -808,21 +850,26 @@ class TestDocumentApiDelete:
 class TestDocumentListApi:
    """Test suite for DocumentListApi endpoint."""

-    @patch("controllers.service_api.dataset.document.marshal")
    @patch("controllers.service_api.dataset.document.DocumentService")
    @patch("controllers.service_api.dataset.document.db")
-    def test_list_documents_success(self, mock_db, mock_doc_svc, mock_marshal, app: Flask, mock_tenant, mock_dataset):
+    def test_list_documents_success(self, mock_db, mock_doc_svc, app: Flask, mock_tenant, mock_dataset):
        """Test successful document list retrieval."""
        # Arrange
        mock_db.session.scalar.return_value = mock_dataset

        mock_pagination = Mock()
-        mock_pagination.items = [Mock(), Mock()]
+        mock_pagination.items = [
+            make_serializable_document(
+                id="doc-1",
+                name="Document 1",
+                doc_metadata_details=[{"id": "meta-1", "name": "amount", "type": "number", "value": 42}],
+            ),
+            make_serializable_document(id="doc-2", name="Document 2"),
+        ]
        mock_pagination.total = 2
        mock_db.paginate.return_value = mock_pagination

        mock_doc_svc.enrich_documents_with_summary_index_status.return_value = None
-        mock_marshal.return_value = [{"id": "doc1"}, {"id": "doc2"}]

        # Act
        with app.test_request_context(
@@ -838,6 +885,11 @@ class TestDocumentListApi:
        assert response["page"] == 1
        assert response["limit"] == 20
        assert response["total"] == 2
+        assert response["data"][0]["id"] == "doc-1"
+        assert response["data"][0]["data_source_info"] == {"upload_file_id": "file-1"}
+        assert response["data"][0]["doc_metadata"][0]["value"] == 42
+        assert "data_source_info_dict" not in response["data"][0]
+        assert "doc_metadata_details" not in response["data"][0]

    @patch("controllers.service_api.dataset.document.db")
    def test_list_documents_dataset_not_found(self, mock_db, app: Flask, mock_tenant, mock_dataset):
@@ -858,12 +910,9 @@ class TestDocumentListApi:
 class TestDocumentIndexingStatusApi:
    """Test suite for DocumentIndexingStatusApi endpoint."""

-    @patch("controllers.service_api.dataset.document.marshal")
    @patch("controllers.service_api.dataset.document.DocumentService")
    @patch("controllers.service_api.dataset.document.db")
-    def test_get_indexing_status_success(
-        self, mock_db, mock_doc_svc, mock_marshal, app: Flask, mock_tenant, mock_dataset
-    ):
+    def test_get_indexing_status_success(self, mock_db, mock_doc_svc, app: Flask, mock_tenant, mock_dataset):
        """Test successful indexing status retrieval."""
        # Arrange
        batch_id = "batch_123"
@@ -884,7 +933,6 @@ class TestDocumentIndexingStatusApi:

        # scalar() called 3 times: dataset lookup, completed_segments count, total_segments count
        mock_db.session.scalar.side_effect = [mock_dataset, 5, 5]
-        mock_marshal.return_value = {"id": mock_doc.id, "indexing_status": "completed"}

        # Act
        with app.test_request_context(
@@ -897,6 +945,12 @@ class TestDocumentIndexingStatusApi:
        # Assert
        assert "data" in response
        assert len(response["data"]) == 1
+        item = response["data"][0]
+        assert item["id"] == mock_doc.id
+        assert item["indexing_status"] == "completed"
+        assert item["completed_segments"] == 5
+        assert item["total_segments"] == 5
+        assert item["processing_started_at"] is None

    @patch("controllers.service_api.dataset.document.db")
    def test_get_indexing_status_dataset_not_found(self, mock_db, app: Flask, mock_tenant, mock_dataset):
@@ -973,7 +1027,6 @@ class TestDocumentAddByTextApi:
        mock_rate_limit.enabled = False
        mock_feature_svc.get_knowledge_rate_limit.return_value = mock_rate_limit

-    @patch("controllers.service_api.dataset.document.marshal")
    @patch("controllers.service_api.dataset.document.DocumentService")
    @patch("controllers.service_api.dataset.document.KnowledgeConfig")
    @patch("controllers.service_api.dataset.document.FileService")
@@ -990,7 +1043,6 @@ class TestDocumentAddByTextApi:
        mock_file_svc_cls,
        mock_knowledge_config,
        mock_doc_svc,
-        mock_marshal,
        app: Flask,
        mock_tenant,
        mock_dataset,
@@ -1012,11 +1064,9 @@ class TestDocumentAddByTextApi:
        mock_config = Mock()
        mock_knowledge_config.model_validate.return_value = mock_config

-        mock_doc = Mock()
-        mock_doc.id = str(uuid.uuid4())
+        mock_doc = make_serializable_document(id="doc-create-text", name="Test Document")
        mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_doc], "batch_123")
        mock_doc_svc.document_create_args_validate.return_value = None
-        mock_marshal.return_value = {"id": mock_doc.id, "name": "Test Document"}

        # Act
        with app.test_request_context(
@@ -1037,6 +1087,10 @@ class TestDocumentAddByTextApi:
        assert "document" in response
        assert "batch" in response
        assert response["batch"] == "batch_123"
+        assert response["document"]["id"] == "doc-create-text"
+        assert response["document"]["data_source_info"] == {"upload_file_id": "file-1"}
+        assert response["document"]["doc_metadata"] == []
+        assert "data_source_info_dict" not in response["document"]

    @patch("controllers.service_api.wraps.FeatureService")
    @patch("controllers.service_api.wraps.validate_and_get_api_token")
@@ -1162,7 +1216,6 @@ class TestDocumentUpdateByTextApiPost:
    ``@cloud_edition_billing_rate_limit_check``.
    """

-    @patch("controllers.service_api.dataset.document.marshal")
    @patch("controllers.service_api.dataset.document.DocumentService")
    @patch("controllers.service_api.dataset.document.FileService")
    @patch("controllers.service_api.dataset.document.current_user")
@@ -1177,7 +1230,6 @@ class TestDocumentUpdateByTextApiPost:
        mock_current_user,
        mock_file_svc_cls,
        mock_doc_svc,
-        mock_marshal,
        app: Flask,
        mock_tenant,
        mock_dataset,
@@ -1193,10 +1245,9 @@ class TestDocumentUpdateByTextApiPost:
        mock_upload.id = str(uuid.uuid4())
        mock_file_svc_cls.return_value.upload_text.return_value = mock_upload

-        mock_document = Mock()
+        mock_document = make_serializable_document(id="doc-update-text", name="Updated Doc")
        mock_doc_svc.document_create_args_validate.return_value = None
        mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_document], "batch-1")
-        mock_marshal.return_value = {"id": "doc-1"}

        doc_id = str(uuid.uuid4())
        with app.test_request_context(
@@ -1214,6 +1265,9 @@ class TestDocumentUpdateByTextApiPost:

        assert status == 200
        assert "document" in response
+        assert response["batch"] == "batch-1"
+        assert response["document"]["id"] == "doc-update-text"
+        assert response["document"]["doc_metadata"] == []

    @patch("controllers.service_api.dataset.document.db")
    @patch("controllers.service_api.wraps.FeatureService")
@@ -1254,6 +1308,61 @@ class TestDocumentAddByFileApiPost:
    decorators and ``@cloud_edition_billing_rate_limit_check``.
    """

+    @patch("controllers.service_api.dataset.document.DocumentService")
+    @patch("controllers.service_api.dataset.document.FileService")
+    @patch("controllers.service_api.dataset.document.current_user")
+    @patch("controllers.service_api.dataset.document.db")
+    @patch("controllers.service_api.wraps.FeatureService")
+    @patch("controllers.service_api.wraps.validate_and_get_api_token")
+    def test_add_by_file_success_serializes_document_and_batch_shape(
+        self,
+        mock_validate_token,
+        mock_feature_svc,
+        mock_db,
+        mock_current_user,
+        mock_file_svc_cls,
+        mock_doc_svc,
+        app: Flask,
+        mock_tenant,
+        mock_dataset,
+    ):
+        """Test successful document creation by file."""
+        _setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id)
+        mock_dataset.provider = "vendor"
+        mock_dataset.indexing_technique = "economy"
+        mock_dataset.chunk_structure = None
+        mock_dataset.latest_process_rule = Mock()
+        mock_dataset.created_by_account = Mock()
+        mock_db.session.scalar.return_value = mock_dataset
+
+        mock_current_user.id = "user-1"
+        mock_upload = Mock()
+        mock_upload.id = str(uuid.uuid4())
+        mock_file_svc_cls.return_value.upload_file.return_value = mock_upload
+
+        mock_document = make_serializable_document(id="doc-create-file", name="File Document")
+        mock_doc_svc.document_create_args_validate.return_value = None
+        mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_document], "batch-file")
+
+        from io import BytesIO
+
+        data = {"file": (BytesIO(b"content"), "test.pdf", "application/pdf")}
+        with app.test_request_context(
+            f"/datasets/{mock_dataset.id}/document/create-by-file",
+            method="POST",
+            content_type="multipart/form-data",
+            data=data,
+            headers={"Authorization": "Bearer test_token"},
+        ):
+            api = DocumentAddByFileApi()
+            response, status = api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id)
+
+        assert status == 200
+        assert response["batch"] == "batch-file"
+        assert response["document"]["id"] == "doc-create-file"
+        assert response["document"]["data_source_info"] == {"upload_file_id": "file-1"}
+        assert response["document"]["doc_metadata"] == []
+
    @patch("controllers.service_api.dataset.document.db")
    @patch("controllers.service_api.wraps.FeatureService")
    @patch("controllers.service_api.wraps.validate_and_get_api_token")
@@ -1498,7 +1607,6 @@ class TestDocumentUpdateByFileApiPatch:
                    document_id=doc_id,
                )

-    @patch("controllers.service_api.dataset.document.marshal")
    @patch("controllers.service_api.dataset.document.DocumentService")
    @patch("controllers.service_api.dataset.document.FileService")
    @patch("controllers.service_api.dataset.document.current_user")
@@ -1513,7 +1621,6 @@ class TestDocumentUpdateByFileApiPatch:
        mock_current_user,
        mock_file_svc_cls,
        mock_doc_svc,
-        mock_marshal,
        app: Flask,
        mock_tenant,
        mock_dataset,
@@ -1532,11 +1639,9 @@ class TestDocumentUpdateByFileApiPatch:
        mock_upload.id = str(uuid.uuid4())
        mock_file_svc_cls.return_value.upload_file.return_value = mock_upload

-        mock_document = Mock()
-        mock_document.batch = "batch-1"
+        mock_document = make_serializable_document(id="doc-update-file", name="File Document", batch="batch-1")
        mock_doc_svc.document_create_args_validate.return_value = None
        mock_doc_svc.save_document_with_dataset_id.return_value = ([mock_document], None)
-        mock_marshal.return_value = {"id": "doc-1"}

        from io import BytesIO

@@ -1558,3 +1663,6 @@ class TestDocumentUpdateByFileApiPatch:

        assert status == 200
        assert "document" in response
+        assert response["batch"] == "batch-1"
+        assert response["document"]["id"] == "doc-update-file"
+        assert response["document"]["data_source_info"] == {"upload_file_id": "file-1"}
@@ -18,6 +18,19 @@ def _definition_refs(value: object) -> set[str]:
    return refs


+def _parameters_by_name(operation: dict[str, object]) -> dict[str, dict[str, object]]:
+    parameters = operation.get("parameters", [])
+    assert isinstance(parameters, list)
+    result: dict[str, dict[str, object]] = {}
+    for parameter in parameters:
+        if not isinstance(parameter, dict):
+            continue
+        name = parameter.get("name")
+        if isinstance(name, str):
+            result[name] = parameter
+    return result
+
+
@pytest.mark.parametrize(
    ("first_kwargs", "second_kwargs"),
    [
@@ -70,3 +83,60 @@ def test_swagger_json_endpoints_render(monkeypatch: pytest.MonkeyPatch):
        assert not sorted(ref for ref in missing_refs if ref.startswith("_AnonymousInlineModel"))

    assert app.config["RESTX_INCLUDE_ALL_MODELS"] is True
+
+
+def test_service_document_file_routes_document_multipart_form_data(monkeypatch: pytest.MonkeyPatch):
+    from configs import dify_config
+    from controllers.service_api import bp as service_api_bp
+
+    monkeypatch.setattr(dify_config, "SWAGGER_UI_ENABLED", True)
+
+    app = Flask(__name__)
+    app.config["TESTING"] = True
+    app.config["RESTX_INCLUDE_ALL_MODELS"] = True
+    app.register_blueprint(service_api_bp)
+
+    payload = app.test_client().get("/v1/swagger.json").get_json()
+    paths = payload["paths"]
+
+    create_operation = paths["/datasets/{dataset_id}/document/create-by-file"]["post"]
+    create_params = _parameters_by_name(create_operation)
+    assert create_operation["consumes"] == ["multipart/form-data"]
+    assert create_params["file"]["in"] == "formData"
+    assert create_params["file"]["type"] == "file"
+    assert create_params["file"]["required"] is True
+    assert create_params["data"]["in"] == "formData"
+    assert create_params["data"]["type"] == "string"
+
+    for path in (
+        "/datasets/{dataset_id}/documents/{document_id}",
+        "/datasets/{dataset_id}/documents/{document_id}/update-by-file",
+        "/datasets/{dataset_id}/documents/{document_id}/update_by_file",
+    ):
+        update_operation = paths[path]["patch" if path.endswith("{document_id}") else "post"]
+        update_params = _parameters_by_name(update_operation)
+        assert update_operation["consumes"] == ["multipart/form-data"]
+        assert update_params["file"]["in"] == "formData"
+        assert update_params["file"]["type"] == "file"
+        assert update_params["file"]["required"] is False
+        assert update_params["data"]["in"] == "formData"
+        assert update_params["data"]["type"] == "string"
+
+
+def test_service_document_list_documents_query_params_render(monkeypatch: pytest.MonkeyPatch):
+    from configs import dify_config
+    from controllers.service_api import bp as service_api_bp
+
+    monkeypatch.setattr(dify_config, "SWAGGER_UI_ENABLED", True)
+
+    app = Flask(__name__)
+    app.config["TESTING"] = True
+    app.config["RESTX_INCLUDE_ALL_MODELS"] = True
+    app.register_blueprint(service_api_bp)
+
+    payload = app.test_client().get("/v1/swagger.json").get_json()
+    operation = payload["paths"]["/datasets/{dataset_id}/documents"]["get"]
+    params = _parameters_by_name(operation)
+
+    for name in ("page", "limit", "keyword", "status"):
+        assert params[name]["in"] == "query"