mirror of
https://github.com/langgenius/dify.git
synced 2026-06-03 08:16:37 +08:00
refactor(api): migrate console/service_api.dataset.segment to BaseModel (#36522)
Co-authored-by: WH-2099 <wh2099@pm.me> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -6,10 +6,11 @@ These helpers keep that translation centralized so models registered through
|
||||
`register_schema_models` emit resolvable Swagger 2.0 references.
|
||||
"""
|
||||
|
||||
from collections.abc import Mapping
|
||||
from collections.abc import Iterable, Mapping
|
||||
from enum import StrEnum
|
||||
from typing import Any, Literal, NotRequired, TypedDict
|
||||
from typing import Any, Literal, NotRequired, Protocol, TypedDict
|
||||
|
||||
from flask import request
|
||||
from flask_restx import Namespace
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
|
||||
@@ -36,6 +37,12 @@ QueryParamDoc = TypedDict(
|
||||
)
|
||||
|
||||
|
||||
class QueryArgs(Protocol):
|
||||
def to_dict(self, flat: bool = True) -> dict[str, str]: ...
|
||||
|
||||
def getlist(self, key: str) -> list[str]: ...
|
||||
|
||||
|
||||
def _register_json_schema(namespace: Namespace, name: str, schema: dict) -> None:
|
||||
"""Register a JSON schema and promote any nested Pydantic `$defs`."""
|
||||
|
||||
@@ -167,6 +174,58 @@ def query_params_from_model(model: type[BaseModel]) -> dict[str, QueryParamDoc]:
|
||||
return params
|
||||
|
||||
|
||||
def query_params_from_request[ModelT: BaseModel](
|
||||
model: type[ModelT],
|
||||
*,
|
||||
list_fields: Iterable[str] = (),
|
||||
args: QueryArgs | None = None,
|
||||
use_defaults_for_malformed_ints: bool = False,
|
||||
) -> ModelT:
|
||||
"""Validate query args with Pydantic while preserving Flask query parsing behavior.
|
||||
|
||||
Repeated params need explicit ``getlist()`` handling because Werkzeug's
|
||||
``to_dict()`` keeps only one value. For malformed scalar integers, Flask's
|
||||
For endpoints migrated from ``request.args.get(..., type=int, default=...)``,
|
||||
set ``use_defaults_for_malformed_ints`` to preserve Flask's fallback to
|
||||
defaults for malformed optional integer params.
|
||||
"""
|
||||
|
||||
query_args = args or request.args
|
||||
params: dict[str, Any] = query_args.to_dict()
|
||||
for field_name in list_fields:
|
||||
params[field_name] = query_args.getlist(field_name)
|
||||
|
||||
if use_defaults_for_malformed_ints:
|
||||
_drop_malformed_defaulted_integer_params(model, params)
|
||||
return model.model_validate(params)
|
||||
|
||||
|
||||
def _drop_malformed_defaulted_integer_params(model: type[BaseModel], params: dict[str, Any]) -> None:
|
||||
properties = model.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0).get("properties", {})
|
||||
if not isinstance(properties, Mapping):
|
||||
return
|
||||
|
||||
for name, value in list(params.items()):
|
||||
if not isinstance(value, str):
|
||||
continue
|
||||
|
||||
field = model.model_fields.get(name)
|
||||
if field is None or field.is_required():
|
||||
continue
|
||||
|
||||
property_schema = properties.get(name)
|
||||
if not isinstance(property_schema, Mapping):
|
||||
continue
|
||||
|
||||
if _nullable_property_schema(property_schema).get("type") != "integer":
|
||||
continue
|
||||
|
||||
try:
|
||||
int(value)
|
||||
except ValueError:
|
||||
params.pop(name)
|
||||
|
||||
|
||||
def _query_param_from_property(property_schema: Mapping[str, Any], *, required: bool) -> QueryParamDoc:
|
||||
param_schema = _nullable_property_schema(property_schema)
|
||||
param_doc: QueryParamDoc = {"in": "query", "required": required}
|
||||
@@ -239,6 +298,7 @@ __all__ = [
|
||||
"DEFAULT_REF_TEMPLATE_SWAGGER_2_0",
|
||||
"get_or_create_model",
|
||||
"query_params_from_model",
|
||||
"query_params_from_request",
|
||||
"register_enum_models",
|
||||
"register_response_schema_model",
|
||||
"register_response_schema_models",
|
||||
|
||||
@@ -1,9 +1,10 @@
|
||||
import uuid
|
||||
from typing import Literal
|
||||
from typing import cast as type_cast
|
||||
from uuid import UUID
|
||||
|
||||
from flask import request
|
||||
from flask_restx import Resource, marshal
|
||||
from flask_restx import Resource
|
||||
from pydantic import BaseModel, Field
|
||||
from sqlalchemy import String, case, cast, func, literal, or_, select
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
@@ -13,7 +14,12 @@ import services
|
||||
from configs import dify_config
|
||||
from controllers.common.controller_schemas import ChildChunkCreatePayload, ChildChunkUpdatePayload
|
||||
from controllers.common.fields import SimpleResultResponse
|
||||
from controllers.common.schema import register_response_schema_models, register_schema_models
|
||||
from controllers.common.schema import (
|
||||
query_params_from_model,
|
||||
query_params_from_request,
|
||||
register_response_schema_models,
|
||||
register_schema_models,
|
||||
)
|
||||
from controllers.console import console_ns
|
||||
from controllers.console.app.error import ProviderNotInitializeError
|
||||
from controllers.console.datasets.error import (
|
||||
@@ -34,9 +40,17 @@ from core.rag.index_processor.constant.index_type import IndexTechniqueType
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from fields.base import ResponseModel
|
||||
from fields.segment_fields import child_chunk_fields, segment_fields
|
||||
from fields.segment_fields import (
|
||||
ChildChunkDetailResponse,
|
||||
ChildChunkListResponse,
|
||||
ChildChunkResponse,
|
||||
SegmentDetailResponse,
|
||||
SegmentResponse,
|
||||
segment_response_with_summary,
|
||||
segment_responses_with_summaries,
|
||||
)
|
||||
from graphon.model_runtime.entities.model_entities import ModelType
|
||||
from libs.helper import escape_like_pattern
|
||||
from libs.helper import dump_response, escape_like_pattern
|
||||
from libs.login import current_account_with_tenant, login_required
|
||||
from models.dataset import ChildChunk, DocumentSegment
|
||||
from models.model import UploadFile
|
||||
@@ -44,20 +58,10 @@ from services.dataset_service import DatasetService, DocumentService, SegmentSer
|
||||
from services.entities.knowledge_entities.knowledge_entities import ChildChunkUpdateArgs, SegmentUpdateArgs
|
||||
from services.errors.chunk import ChildChunkDeleteIndexError as ChildChunkDeleteIndexServiceError
|
||||
from services.errors.chunk import ChildChunkIndexingError as ChildChunkIndexingServiceError
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
from tasks.batch_create_segment_to_index_task import batch_create_segment_to_index_task
|
||||
|
||||
|
||||
def _get_segment_with_summary(segment, dataset_id):
|
||||
"""Helper function to marshal segment and add summary information."""
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
segment_dict = dict(marshal(segment, segment_fields)) # type: ignore
|
||||
# Query summary for this segment (only enabled summaries)
|
||||
summary = SummaryIndexService.get_segment_summary(segment_id=segment.id, dataset_id=dataset_id)
|
||||
segment_dict["summary"] = summary.summary_content if summary else None
|
||||
return segment_dict
|
||||
|
||||
|
||||
class SegmentListQuery(BaseModel):
|
||||
limit: int = Field(default=20, ge=1, le=100)
|
||||
status: list[str] = Field(default_factory=list)
|
||||
@@ -67,6 +71,16 @@ class SegmentListQuery(BaseModel):
|
||||
page: int = Field(default=1, ge=1)
|
||||
|
||||
|
||||
class SegmentIdListQuery(BaseModel):
|
||||
segment_id: list[str] = Field(default_factory=list, description="Segment IDs")
|
||||
|
||||
|
||||
class ChildChunkListQuery(BaseModel):
|
||||
limit: int = Field(default=20, ge=1, le=100)
|
||||
keyword: str | None = None
|
||||
page: int = Field(default=1, ge=1)
|
||||
|
||||
|
||||
class SegmentCreatePayload(BaseModel):
|
||||
content: str
|
||||
answer: str | None = None
|
||||
@@ -92,13 +106,35 @@ class SegmentBatchImportStatusResponse(ResponseModel):
|
||||
job_status: str
|
||||
|
||||
|
||||
class ConsoleSegmentListResponse(ResponseModel):
|
||||
data: list[SegmentResponse]
|
||||
limit: int
|
||||
total: int
|
||||
total_pages: int
|
||||
page: int
|
||||
|
||||
|
||||
class ChildChunkBatchUpdateResponse(ResponseModel):
|
||||
data: list[ChildChunkResponse]
|
||||
|
||||
|
||||
class ChildChunkBatchUpdatePayload(BaseModel):
|
||||
chunks: list[ChildChunkUpdateArgs]
|
||||
|
||||
|
||||
class SegmentDocParams:
|
||||
DATASET_DOCUMENT = {"dataset_id": "Dataset ID", "document_id": "Document ID"}
|
||||
DATASET_DOCUMENT_ACTION = {**DATASET_DOCUMENT, "action": "Action"}
|
||||
DATASET_DOCUMENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Segment ID"}
|
||||
DATASET_DOCUMENT_PARENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Parent segment ID"}
|
||||
DATASET_DOCUMENT_CHILD_CHUNK = {**DATASET_DOCUMENT_PARENT_SEGMENT, "child_chunk_id": "Child chunk ID"}
|
||||
|
||||
|
||||
register_schema_models(
|
||||
console_ns,
|
||||
SegmentListQuery,
|
||||
SegmentIdListQuery,
|
||||
ChildChunkListQuery,
|
||||
SegmentCreatePayload,
|
||||
SegmentUpdatePayload,
|
||||
BatchImportPayload,
|
||||
@@ -107,11 +143,24 @@ register_schema_models(
|
||||
ChildChunkBatchUpdatePayload,
|
||||
ChildChunkUpdateArgs,
|
||||
)
|
||||
register_response_schema_models(console_ns, SegmentBatchImportStatusResponse, SimpleResultResponse)
|
||||
register_response_schema_models(
|
||||
console_ns,
|
||||
SegmentResponse,
|
||||
ConsoleSegmentListResponse,
|
||||
SegmentDetailResponse,
|
||||
ChildChunkDetailResponse,
|
||||
ChildChunkListResponse,
|
||||
ChildChunkBatchUpdateResponse,
|
||||
SegmentBatchImportStatusResponse,
|
||||
SimpleResultResponse,
|
||||
)
|
||||
|
||||
|
||||
@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments")
|
||||
class DatasetDocumentSegmentListApi(Resource):
|
||||
@console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT)
|
||||
@console_ns.doc(params=query_params_from_model(SegmentListQuery))
|
||||
@console_ns.response(200, "Segments retrieved successfully", console_ns.models[ConsoleSegmentListResponse.__name__])
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@@ -134,12 +183,7 @@ class DatasetDocumentSegmentListApi(Resource):
|
||||
if not document:
|
||||
raise NotFound("Document not found.")
|
||||
|
||||
args = SegmentListQuery.model_validate(
|
||||
{
|
||||
**request.args.to_dict(),
|
||||
"status": request.args.getlist("status"),
|
||||
}
|
||||
)
|
||||
args = query_params_from_request(SegmentListQuery, list_fields=("status",))
|
||||
|
||||
page = args.page
|
||||
limit = min(args.limit, 100)
|
||||
@@ -205,38 +249,30 @@ class DatasetDocumentSegmentListApi(Resource):
|
||||
|
||||
segments = db.paginate(select=query, page=page, per_page=limit, max_per_page=100, error_out=False)
|
||||
|
||||
# Query summaries for all segments in this page (batch query for efficiency)
|
||||
segment_ids = [segment.id for segment in segments.items]
|
||||
summaries = {}
|
||||
segment_list = list(segments.items)
|
||||
segment_ids = [segment.id for segment in segment_list]
|
||||
summaries: dict[str, str | None] = {}
|
||||
if segment_ids:
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
summary_records = SummaryIndexService.get_segments_summaries(
|
||||
segment_ids=segment_ids, dataset_id=dataset_id_str
|
||||
)
|
||||
# Only include enabled summaries (already filtered by service)
|
||||
summaries = {chunk_id: summary.summary_content for chunk_id, summary in summary_records.items()}
|
||||
|
||||
# Add summary to each segment
|
||||
segments_with_summary = []
|
||||
for segment in segments.items:
|
||||
segment_dict = dict(marshal(segment, segment_fields)) # type: ignore
|
||||
segment_dict["summary"] = summaries.get(segment.id)
|
||||
segments_with_summary.append(segment_dict)
|
||||
|
||||
response = {
|
||||
"data": segments_with_summary,
|
||||
"data": segment_responses_with_summaries(segment_list, summaries),
|
||||
"limit": limit,
|
||||
"total": segments.total,
|
||||
"total_pages": segments.pages,
|
||||
"page": page,
|
||||
}
|
||||
return response, 200
|
||||
return dump_response(ConsoleSegmentListResponse, response), 200
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_rate_limit_check("knowledge")
|
||||
@console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT)
|
||||
@console_ns.doc(params=query_params_from_model(SegmentIdListQuery))
|
||||
@console_ns.response(204, "Segments deleted successfully")
|
||||
def delete(self, dataset_id: UUID, document_id: UUID):
|
||||
current_user, _ = current_account_with_tenant()
|
||||
@@ -268,6 +304,8 @@ class DatasetDocumentSegmentListApi(Resource):
|
||||
|
||||
@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segment/<string:action>")
|
||||
class DatasetDocumentSegmentApi(Resource):
|
||||
@console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_ACTION)
|
||||
@console_ns.doc(params=query_params_from_model(SegmentIdListQuery))
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@@ -321,11 +359,12 @@ class DatasetDocumentSegmentApi(Resource):
|
||||
SegmentService.update_segments_status(segment_ids, action, dataset, document)
|
||||
except Exception as e:
|
||||
raise InvalidActionError(str(e))
|
||||
return {"result": "success"}, 200
|
||||
return dump_response(SimpleResultResponse, {"result": "success"}), 200
|
||||
|
||||
|
||||
@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segment")
|
||||
class DatasetDocumentSegmentAddApi(Resource):
|
||||
@console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT)
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@@ -333,6 +372,7 @@ class DatasetDocumentSegmentAddApi(Resource):
|
||||
@cloud_edition_billing_knowledge_limit_check("add_segment")
|
||||
@cloud_edition_billing_rate_limit_check("knowledge")
|
||||
@console_ns.expect(console_ns.models[SegmentCreatePayload.__name__])
|
||||
@console_ns.response(200, "Segment created successfully", console_ns.models[SegmentDetailResponse.__name__])
|
||||
def post(self, dataset_id: UUID, document_id: UUID):
|
||||
current_user, current_tenant_id = current_account_with_tenant()
|
||||
|
||||
@@ -372,18 +412,25 @@ class DatasetDocumentSegmentAddApi(Resource):
|
||||
payload = SegmentCreatePayload.model_validate(console_ns.payload or {})
|
||||
payload_dict = payload.model_dump(exclude_none=True)
|
||||
SegmentService.segment_create_args_validate(payload_dict, document)
|
||||
segment = SegmentService.create_segment(payload_dict, document, dataset)
|
||||
return {"data": _get_segment_with_summary(segment, dataset_id_str), "doc_form": document.doc_form}, 200
|
||||
segment = type_cast(DocumentSegment, SegmentService.create_segment(payload_dict, document, dataset))
|
||||
summary = SummaryIndexService.get_segment_summary(segment_id=segment.id, dataset_id=dataset_id_str)
|
||||
response = {
|
||||
"data": segment_response_with_summary(segment, summary.summary_content if summary else None),
|
||||
"doc_form": document.doc_form,
|
||||
}
|
||||
return dump_response(SegmentDetailResponse, response), 200
|
||||
|
||||
|
||||
@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>")
|
||||
class DatasetDocumentSegmentUpdateApi(Resource):
|
||||
@console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_SEGMENT)
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_resource_check("vector_space")
|
||||
@cloud_edition_billing_rate_limit_check("knowledge")
|
||||
@console_ns.expect(console_ns.models[SegmentUpdatePayload.__name__])
|
||||
@console_ns.response(200, "Segment updated successfully", console_ns.models[SegmentDetailResponse.__name__])
|
||||
def patch(self, dataset_id: UUID, document_id: UUID, segment_id: UUID):
|
||||
current_user, current_tenant_id = current_account_with_tenant()
|
||||
|
||||
@@ -440,12 +487,18 @@ class DatasetDocumentSegmentUpdateApi(Resource):
|
||||
segment = SegmentService.update_segment(
|
||||
SegmentUpdateArgs.model_validate(payload.model_dump(exclude_none=True)), segment, document, dataset
|
||||
)
|
||||
return {"data": _get_segment_with_summary(segment, dataset_id_str), "doc_form": document.doc_form}, 200
|
||||
summary = SummaryIndexService.get_segment_summary(segment_id=segment.id, dataset_id=dataset_id_str)
|
||||
response = {
|
||||
"data": segment_response_with_summary(segment, summary.summary_content if summary else None),
|
||||
"doc_form": document.doc_form,
|
||||
}
|
||||
return dump_response(SegmentDetailResponse, response), 200
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_rate_limit_check("knowledge")
|
||||
@console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_SEGMENT)
|
||||
@console_ns.response(204, "Segment deleted successfully")
|
||||
def delete(self, dataset_id: UUID, document_id: UUID, segment_id: UUID):
|
||||
current_user, current_tenant_id = current_account_with_tenant()
|
||||
@@ -523,11 +576,11 @@ class DatasetDocumentSegmentBatchImportApi(Resource):
|
||||
try:
|
||||
# async job
|
||||
job_id = str(uuid.uuid4())
|
||||
indexing_cache_key = f"segment_batch_import_{str(job_id)}"
|
||||
indexing_cache_key = f"segment_batch_import_{job_id}"
|
||||
# send batch add segments task
|
||||
redis_client.setnx(indexing_cache_key, "waiting")
|
||||
batch_create_segment_to_index_task.delay(
|
||||
str(job_id),
|
||||
job_id,
|
||||
upload_file_id,
|
||||
dataset_id_str,
|
||||
document_id_str,
|
||||
@@ -536,7 +589,7 @@ class DatasetDocumentSegmentBatchImportApi(Resource):
|
||||
)
|
||||
except Exception as e:
|
||||
return {"error": str(e)}, 500
|
||||
return {"job_id": job_id, "job_status": "waiting"}, 200
|
||||
return dump_response(SegmentBatchImportStatusResponse, {"job_id": job_id, "job_status": "waiting"}), 200
|
||||
|
||||
@console_ns.response(200, "Batch import status", console_ns.models[SegmentBatchImportStatusResponse.__name__])
|
||||
@setup_required
|
||||
@@ -551,11 +604,13 @@ class DatasetDocumentSegmentBatchImportApi(Resource):
|
||||
if cache_result is None:
|
||||
raise ValueError("The job does not exist.")
|
||||
|
||||
return {"job_id": job_id, "job_status": cache_result.decode()}, 200
|
||||
response = {"job_id": job_id, "job_status": cache_result.decode()}
|
||||
return dump_response(SegmentBatchImportStatusResponse, response), 200
|
||||
|
||||
|
||||
@console_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>/child_chunks")
|
||||
class ChildChunkAddApi(Resource):
|
||||
@console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_PARENT_SEGMENT)
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@@ -563,6 +618,7 @@ class ChildChunkAddApi(Resource):
|
||||
@cloud_edition_billing_knowledge_limit_check("add_segment")
|
||||
@cloud_edition_billing_rate_limit_check("knowledge")
|
||||
@console_ns.expect(console_ns.models[ChildChunkCreatePayload.__name__])
|
||||
@console_ns.response(200, "Child chunk created successfully", console_ns.models[ChildChunkDetailResponse.__name__])
|
||||
def post(self, dataset_id: UUID, document_id: UUID, segment_id: UUID):
|
||||
current_user, current_tenant_id = current_account_with_tenant()
|
||||
|
||||
@@ -613,8 +669,11 @@ class ChildChunkAddApi(Resource):
|
||||
child_chunk = SegmentService.create_child_chunk(payload.content, segment, document, dataset)
|
||||
except ChildChunkIndexingServiceError as e:
|
||||
raise ChildChunkIndexingError(str(e))
|
||||
return {"data": marshal(child_chunk, child_chunk_fields)}, 200
|
||||
return dump_response(ChildChunkDetailResponse, {"data": child_chunk}), 200
|
||||
|
||||
@console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_PARENT_SEGMENT)
|
||||
@console_ns.doc(params=query_params_from_model(ChildChunkListQuery))
|
||||
@console_ns.response(200, "Child chunks retrieved successfully", console_ns.models[ChildChunkListResponse.__name__])
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@@ -642,13 +701,7 @@ class ChildChunkAddApi(Resource):
|
||||
)
|
||||
if not segment:
|
||||
raise NotFound("Segment not found.")
|
||||
args = SegmentListQuery.model_validate(
|
||||
{
|
||||
"limit": request.args.get("limit", default=20, type=int),
|
||||
"keyword": request.args.get("keyword"),
|
||||
"page": request.args.get("page", default=1, type=int),
|
||||
}
|
||||
)
|
||||
args = query_params_from_request(ChildChunkListQuery, use_defaults_for_malformed_ints=True)
|
||||
|
||||
page = args.page
|
||||
limit = min(args.limit, 100)
|
||||
@@ -657,19 +710,27 @@ class ChildChunkAddApi(Resource):
|
||||
child_chunks = SegmentService.get_child_chunks(
|
||||
segment_id_str, document_id_str, dataset_id_str, page, limit, keyword
|
||||
)
|
||||
return {
|
||||
"data": marshal(child_chunks.items, child_chunk_fields),
|
||||
response = {
|
||||
"data": child_chunks.items,
|
||||
"total": child_chunks.total,
|
||||
"total_pages": child_chunks.pages,
|
||||
"page": page,
|
||||
"limit": limit,
|
||||
}, 200
|
||||
}
|
||||
return dump_response(ChildChunkListResponse, response), 200
|
||||
|
||||
@setup_required
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_resource_check("vector_space")
|
||||
@cloud_edition_billing_rate_limit_check("knowledge")
|
||||
@console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_PARENT_SEGMENT)
|
||||
@console_ns.response(
|
||||
200,
|
||||
"Child chunks updated successfully",
|
||||
console_ns.models[ChildChunkBatchUpdateResponse.__name__],
|
||||
)
|
||||
@console_ns.expect(console_ns.models[ChildChunkBatchUpdatePayload.__name__])
|
||||
def patch(self, dataset_id: UUID, document_id: UUID, segment_id: UUID):
|
||||
current_user, current_tenant_id = current_account_with_tenant()
|
||||
|
||||
@@ -707,7 +768,7 @@ class ChildChunkAddApi(Resource):
|
||||
child_chunks = SegmentService.update_child_chunks(payload.chunks, segment, document, dataset)
|
||||
except ChildChunkIndexingServiceError as e:
|
||||
raise ChildChunkIndexingError(str(e))
|
||||
return {"data": marshal(child_chunks, child_chunk_fields)}, 200
|
||||
return dump_response(ChildChunkBatchUpdateResponse, {"data": child_chunks}), 200
|
||||
|
||||
|
||||
@console_ns.route(
|
||||
@@ -718,6 +779,7 @@ class ChildChunkUpdateApi(Resource):
|
||||
@login_required
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_rate_limit_check("knowledge")
|
||||
@console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_CHILD_CHUNK)
|
||||
@console_ns.response(204, "Child chunk deleted successfully")
|
||||
def delete(self, dataset_id: UUID, document_id: UUID, segment_id: UUID, child_chunk_id: UUID):
|
||||
current_user, current_tenant_id = current_account_with_tenant()
|
||||
@@ -748,7 +810,7 @@ class ChildChunkUpdateApi(Resource):
|
||||
child_chunk = db.session.scalar(
|
||||
select(ChildChunk)
|
||||
.where(
|
||||
ChildChunk.id == str(child_chunk_id_str),
|
||||
ChildChunk.id == child_chunk_id_str,
|
||||
ChildChunk.tenant_id == current_tenant_id,
|
||||
ChildChunk.segment_id == segment.id,
|
||||
ChildChunk.document_id == document_id_str,
|
||||
@@ -775,7 +837,9 @@ class ChildChunkUpdateApi(Resource):
|
||||
@account_initialization_required
|
||||
@cloud_edition_billing_resource_check("vector_space")
|
||||
@cloud_edition_billing_rate_limit_check("knowledge")
|
||||
@console_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_CHILD_CHUNK)
|
||||
@console_ns.expect(console_ns.models[ChildChunkUpdatePayload.__name__])
|
||||
@console_ns.response(200, "Child chunk updated successfully", console_ns.models[ChildChunkDetailResponse.__name__])
|
||||
def patch(self, dataset_id: UUID, document_id: UUID, segment_id: UUID, child_chunk_id: UUID):
|
||||
current_user, current_tenant_id = current_account_with_tenant()
|
||||
|
||||
@@ -805,7 +869,7 @@ class ChildChunkUpdateApi(Resource):
|
||||
child_chunk = db.session.scalar(
|
||||
select(ChildChunk)
|
||||
.where(
|
||||
ChildChunk.id == str(child_chunk_id_str),
|
||||
ChildChunk.id == child_chunk_id_str,
|
||||
ChildChunk.tenant_id == current_tenant_id,
|
||||
ChildChunk.segment_id == segment.id,
|
||||
ChildChunk.document_id == document_id_str,
|
||||
@@ -827,4 +891,4 @@ class ChildChunkUpdateApi(Resource):
|
||||
child_chunk = SegmentService.update_child_chunk(payload.content, child_chunk, segment, document, dataset)
|
||||
except ChildChunkIndexingServiceError as e:
|
||||
raise ChildChunkIndexingError(str(e))
|
||||
return {"data": marshal(child_chunk, child_chunk_fields)}, 200
|
||||
return dump_response(ChildChunkDetailResponse, {"data": child_chunk}), 200
|
||||
|
||||
@@ -1,15 +1,18 @@
|
||||
from typing import Any
|
||||
from typing import cast
|
||||
from uuid import UUID
|
||||
|
||||
from flask import request
|
||||
from flask_restx import marshal
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, ValidationError, field_validator
|
||||
from sqlalchemy import select
|
||||
from werkzeug.exceptions import NotFound
|
||||
|
||||
from configs import dify_config
|
||||
from controllers.common.controller_schemas import ChildChunkCreatePayload, ChildChunkUpdatePayload
|
||||
from controllers.common.schema import register_schema_models
|
||||
from controllers.common.schema import (
|
||||
query_params_from_model,
|
||||
query_params_from_request,
|
||||
register_response_schema_models,
|
||||
register_schema_models,
|
||||
)
|
||||
from controllers.service_api import service_api_ns
|
||||
from controllers.service_api.app.error import ProviderNotInitializeError
|
||||
from controllers.service_api.wraps import (
|
||||
@@ -22,10 +25,19 @@ from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
||||
from core.model_manager import ModelManager
|
||||
from core.rag.index_processor.constant.index_type import IndexTechniqueType
|
||||
from extensions.ext_database import db
|
||||
from fields.segment_fields import child_chunk_fields, segment_fields
|
||||
from fields.base import ResponseModel
|
||||
from fields.segment_fields import (
|
||||
ChildChunkDetailResponse,
|
||||
ChildChunkListResponse,
|
||||
SegmentDetailResponse,
|
||||
SegmentResponse,
|
||||
segment_response_with_summary,
|
||||
segment_responses_with_summaries,
|
||||
)
|
||||
from graphon.model_runtime.entities.model_entities import ModelType
|
||||
from libs.helper import dump_response
|
||||
from libs.login import current_account_with_tenant
|
||||
from models.dataset import Dataset
|
||||
from models.dataset import Dataset, DocumentSegment
|
||||
from services.dataset_service import DatasetService, DocumentService, SegmentService
|
||||
from services.entities.knowledge_entities.knowledge_entities import SegmentUpdateArgs
|
||||
from services.errors.chunk import ChildChunkDeleteIndexError, ChildChunkIndexingError
|
||||
@@ -34,35 +46,27 @@ from services.errors.chunk import ChildChunkIndexingError as ChildChunkIndexingS
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
|
||||
def _marshal_segment_with_summary(segment, dataset_id: str) -> dict[str, Any]:
|
||||
"""Marshal a single segment and enrich it with summary content."""
|
||||
segment_dict: dict[str, Any] = dict(marshal(segment, segment_fields)) # type: ignore[arg-type]
|
||||
summary = SummaryIndexService.get_segment_summary(segment_id=segment.id, dataset_id=dataset_id)
|
||||
segment_dict["summary"] = summary.summary_content if summary else None
|
||||
return segment_dict
|
||||
class SegmentCreateItemPayload(BaseModel):
|
||||
content: str = Field(min_length=1)
|
||||
answer: str | None = None
|
||||
keywords: list[str] | None = None
|
||||
attachment_ids: list[str] | None = None
|
||||
|
||||
|
||||
def _marshal_segments_with_summary(segments, dataset_id: str) -> list[dict[str, Any]]:
|
||||
"""Marshal multiple segments and enrich them with summary content (batch query)."""
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
summaries: dict[str, str | None] = {}
|
||||
if segment_ids:
|
||||
summary_records = SummaryIndexService.get_segments_summaries(segment_ids=segment_ids, dataset_id=dataset_id)
|
||||
summaries = {chunk_id: record.summary_content for chunk_id, record in summary_records.items()}
|
||||
|
||||
result: list[dict[str, Any]] = []
|
||||
for segment in segments:
|
||||
segment_dict: dict[str, Any] = dict(marshal(segment, segment_fields)) # type: ignore[arg-type]
|
||||
segment_dict["summary"] = summaries.get(segment.id)
|
||||
result.append(segment_dict)
|
||||
return result
|
||||
@field_validator("content")
|
||||
@classmethod
|
||||
def validate_content(cls, value: str) -> str:
|
||||
if not value.strip():
|
||||
raise ValueError("Content is empty")
|
||||
return value
|
||||
|
||||
|
||||
class SegmentCreatePayload(BaseModel):
|
||||
segments: list[dict[str, Any]] | None = None
|
||||
segments: list[SegmentCreateItemPayload] = Field(min_length=1)
|
||||
|
||||
|
||||
class SegmentListQuery(BaseModel):
|
||||
limit: int = Field(default=20, ge=1)
|
||||
page: int = Field(default=1, ge=1)
|
||||
status: list[str] = Field(default_factory=list)
|
||||
keyword: str | None = None
|
||||
|
||||
@@ -77,9 +81,31 @@ class ChildChunkListQuery(BaseModel):
|
||||
page: int = Field(default=1, ge=1)
|
||||
|
||||
|
||||
class SegmentDocParams:
|
||||
DATASET_DOCUMENT = {"dataset_id": "Dataset ID", "document_id": "Document ID"}
|
||||
DATASET_DOCUMENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Segment ID"}
|
||||
DATASET_DOCUMENT_PARENT_SEGMENT = {**DATASET_DOCUMENT, "segment_id": "Parent segment ID"}
|
||||
DATASET_DOCUMENT_CHILD_CHUNK = {**DATASET_DOCUMENT_PARENT_SEGMENT, "child_chunk_id": "Child chunk ID"}
|
||||
|
||||
|
||||
class SegmentCreateListResponse(ResponseModel):
|
||||
data: list[SegmentResponse]
|
||||
doc_form: str
|
||||
|
||||
|
||||
class SegmentListResponse(ResponseModel):
|
||||
data: list[SegmentResponse]
|
||||
doc_form: str
|
||||
total: int
|
||||
has_more: bool
|
||||
limit: int
|
||||
page: int
|
||||
|
||||
|
||||
register_schema_models(
|
||||
service_api_ns,
|
||||
SegmentCreatePayload,
|
||||
SegmentCreateItemPayload,
|
||||
SegmentListQuery,
|
||||
SegmentUpdateArgs,
|
||||
SegmentUpdatePayload,
|
||||
@@ -87,6 +113,15 @@ register_schema_models(
|
||||
ChildChunkListQuery,
|
||||
ChildChunkUpdatePayload,
|
||||
)
|
||||
register_response_schema_models(
|
||||
service_api_ns,
|
||||
SegmentResponse,
|
||||
SegmentCreateListResponse,
|
||||
SegmentListResponse,
|
||||
SegmentDetailResponse,
|
||||
ChildChunkDetailResponse,
|
||||
ChildChunkListResponse,
|
||||
)
|
||||
|
||||
|
||||
@service_api_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments")
|
||||
@@ -96,7 +131,7 @@ class SegmentApi(DatasetApiResource):
|
||||
@service_api_ns.expect(service_api_ns.models[SegmentCreatePayload.__name__])
|
||||
@service_api_ns.doc("create_segments")
|
||||
@service_api_ns.doc(description="Create segments in a document")
|
||||
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
|
||||
@service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT)
|
||||
@service_api_ns.doc(
|
||||
responses={
|
||||
200: "Segments created successfully",
|
||||
@@ -105,6 +140,11 @@ class SegmentApi(DatasetApiResource):
|
||||
404: "Dataset or document not found",
|
||||
}
|
||||
)
|
||||
@service_api_ns.response(
|
||||
200,
|
||||
"Segments created successfully",
|
||||
service_api_ns.models[SegmentCreateListResponse.__name__],
|
||||
)
|
||||
@cloud_edition_billing_resource_check("vector_space", "dataset")
|
||||
@cloud_edition_billing_knowledge_limit_check("add_segment", "dataset")
|
||||
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
|
||||
@@ -144,26 +184,35 @@ class SegmentApi(DatasetApiResource):
|
||||
except ProviderTokenNotInitError as ex:
|
||||
raise ProviderNotInitializeError(ex.description)
|
||||
# validate args
|
||||
payload = SegmentCreatePayload.model_validate(service_api_ns.payload or {})
|
||||
if payload.segments is not None:
|
||||
segments_limit = dify_config.DATASET_MAX_SEGMENTS_PER_REQUEST
|
||||
if segments_limit > 0 and len(payload.segments) > segments_limit:
|
||||
raise ValueError(f"Exceeded maximum segments limit of {segments_limit}.")
|
||||
try:
|
||||
payload = SegmentCreatePayload.model_validate(service_api_ns.payload or {})
|
||||
except ValidationError as e:
|
||||
return {"error": str(e)}, 400
|
||||
segments_limit = dify_config.DATASET_MAX_SEGMENTS_PER_REQUEST
|
||||
if segments_limit > 0 and len(payload.segments) > segments_limit:
|
||||
raise ValueError(f"Exceeded maximum segments limit of {segments_limit}.")
|
||||
segment_items = [segment.model_dump(exclude_none=True) for segment in payload.segments]
|
||||
|
||||
for args_item in payload.segments:
|
||||
SegmentService.segment_create_args_validate(args_item, document)
|
||||
segments = SegmentService.multi_create_segment(payload.segments, document, dataset)
|
||||
return {
|
||||
"data": _marshal_segments_with_summary(segments, dataset_id_str),
|
||||
"doc_form": document.doc_form,
|
||||
}, 200
|
||||
else:
|
||||
return {"error": "Segments is required"}, 400
|
||||
for args_item in segment_items:
|
||||
SegmentService.segment_create_args_validate(args_item, document)
|
||||
segments = cast(list[DocumentSegment], SegmentService.multi_create_segment(segment_items, document, dataset))
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
summaries: dict[str, str | None] = {}
|
||||
if segment_ids:
|
||||
summary_records = SummaryIndexService.get_segments_summaries(
|
||||
segment_ids=segment_ids, dataset_id=dataset_id_str
|
||||
)
|
||||
summaries = {chunk_id: record.summary_content for chunk_id, record in summary_records.items()}
|
||||
response = {
|
||||
"data": segment_responses_with_summaries(segments, summaries),
|
||||
"doc_form": document.doc_form,
|
||||
}
|
||||
return dump_response(SegmentCreateListResponse, response), 200
|
||||
|
||||
@service_api_ns.expect(service_api_ns.models[SegmentListQuery.__name__])
|
||||
@service_api_ns.doc("list_segments")
|
||||
@service_api_ns.doc(description="List segments in a document")
|
||||
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
|
||||
@service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT)
|
||||
@service_api_ns.doc(params=query_params_from_model(SegmentListQuery))
|
||||
@service_api_ns.doc(
|
||||
responses={
|
||||
200: "Segments retrieved successfully",
|
||||
@@ -171,12 +220,22 @@ class SegmentApi(DatasetApiResource):
|
||||
404: "Dataset or document not found",
|
||||
}
|
||||
)
|
||||
@service_api_ns.response(
|
||||
200,
|
||||
"Segments retrieved successfully",
|
||||
service_api_ns.models[SegmentListResponse.__name__],
|
||||
)
|
||||
def get(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
|
||||
_, current_tenant_id = current_account_with_tenant()
|
||||
"""Get segments."""
|
||||
# check dataset
|
||||
page = request.args.get("page", default=1, type=int)
|
||||
limit = request.args.get("limit", default=20, type=int)
|
||||
args = query_params_from_request(
|
||||
SegmentListQuery,
|
||||
list_fields=("status",),
|
||||
use_defaults_for_malformed_ints=True,
|
||||
)
|
||||
page = args.page
|
||||
limit = args.limit
|
||||
dataset_id_str = str(dataset_id)
|
||||
dataset = db.session.scalar(
|
||||
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id_str).limit(1)
|
||||
@@ -205,13 +264,6 @@ class SegmentApi(DatasetApiResource):
|
||||
except ProviderTokenNotInitError as ex:
|
||||
raise ProviderNotInitializeError(ex.description)
|
||||
|
||||
args = SegmentListQuery.model_validate(
|
||||
{
|
||||
"status": request.args.getlist("status"),
|
||||
"keyword": request.args.get("keyword"),
|
||||
}
|
||||
)
|
||||
|
||||
segments, total = SegmentService.get_segments(
|
||||
document_id=document_id_str,
|
||||
tenant_id=current_tenant_id,
|
||||
@@ -220,9 +272,16 @@ class SegmentApi(DatasetApiResource):
|
||||
page=page,
|
||||
limit=limit,
|
||||
)
|
||||
segment_ids = [segment.id for segment in segments]
|
||||
summaries: dict[str, str | None] = {}
|
||||
if segment_ids:
|
||||
summary_records = SummaryIndexService.get_segments_summaries(
|
||||
segment_ids=segment_ids, dataset_id=dataset_id_str
|
||||
)
|
||||
summaries = {chunk_id: record.summary_content for chunk_id, record in summary_records.items()}
|
||||
|
||||
response = {
|
||||
"data": _marshal_segments_with_summary(segments, dataset_id_str),
|
||||
"data": segment_responses_with_summaries(segments, summaries),
|
||||
"doc_form": document.doc_form,
|
||||
"total": total,
|
||||
"has_more": len(segments) == limit,
|
||||
@@ -230,16 +289,14 @@ class SegmentApi(DatasetApiResource):
|
||||
"page": page,
|
||||
}
|
||||
|
||||
return response, 200
|
||||
return dump_response(SegmentListResponse, response), 200
|
||||
|
||||
|
||||
@service_api_ns.route("/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/segments/<uuid:segment_id>")
|
||||
class DatasetSegmentApi(DatasetApiResource):
|
||||
@service_api_ns.doc("delete_segment")
|
||||
@service_api_ns.doc(description="Delete a specific segment")
|
||||
@service_api_ns.doc(
|
||||
params={"dataset_id": "Dataset ID", "document_id": "Document ID", "segment_id": "Segment ID to delete"}
|
||||
)
|
||||
@service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_SEGMENT)
|
||||
@service_api_ns.doc(
|
||||
responses={
|
||||
204: "Segment deleted successfully",
|
||||
@@ -275,9 +332,7 @@ class DatasetSegmentApi(DatasetApiResource):
|
||||
@service_api_ns.expect(service_api_ns.models[SegmentUpdatePayload.__name__])
|
||||
@service_api_ns.doc("update_segment")
|
||||
@service_api_ns.doc(description="Update a specific segment")
|
||||
@service_api_ns.doc(
|
||||
params={"dataset_id": "Dataset ID", "document_id": "Document ID", "segment_id": "Segment ID to update"}
|
||||
)
|
||||
@service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_SEGMENT)
|
||||
@service_api_ns.doc(
|
||||
responses={
|
||||
200: "Segment updated successfully",
|
||||
@@ -285,6 +340,7 @@ class DatasetSegmentApi(DatasetApiResource):
|
||||
404: "Dataset, document, or segment not found",
|
||||
}
|
||||
)
|
||||
@service_api_ns.response(200, "Segment updated successfully", service_api_ns.models[SegmentDetailResponse.__name__])
|
||||
@cloud_edition_billing_resource_check("vector_space", "dataset")
|
||||
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
|
||||
def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID, segment_id: UUID):
|
||||
@@ -328,13 +384,16 @@ class DatasetSegmentApi(DatasetApiResource):
|
||||
payload = SegmentUpdatePayload.model_validate(service_api_ns.payload or {})
|
||||
|
||||
updated_segment = SegmentService.update_segment(payload.segment, segment, document, dataset)
|
||||
return {
|
||||
"data": _marshal_segment_with_summary(updated_segment, dataset_id_str),
|
||||
summary = SummaryIndexService.get_segment_summary(segment_id=updated_segment.id, dataset_id=dataset_id_str)
|
||||
response = {
|
||||
"data": segment_response_with_summary(updated_segment, summary.summary_content if summary else None),
|
||||
"doc_form": document.doc_form,
|
||||
}, 200
|
||||
}
|
||||
return dump_response(SegmentDetailResponse, response), 200
|
||||
|
||||
@service_api_ns.doc("get_segment")
|
||||
@service_api_ns.doc(description="Get a specific segment by ID")
|
||||
@service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_SEGMENT)
|
||||
@service_api_ns.doc(
|
||||
responses={
|
||||
200: "Segment retrieved successfully",
|
||||
@@ -342,6 +401,11 @@ class DatasetSegmentApi(DatasetApiResource):
|
||||
404: "Dataset, document, or segment not found",
|
||||
}
|
||||
)
|
||||
@service_api_ns.response(
|
||||
200,
|
||||
"Segment retrieved successfully",
|
||||
service_api_ns.models[SegmentDetailResponse.__name__],
|
||||
)
|
||||
def get(self, tenant_id: str, dataset_id: UUID, document_id: UUID, segment_id: UUID):
|
||||
_, current_tenant_id = current_account_with_tenant()
|
||||
dataset_id_str = str(dataset_id)
|
||||
@@ -364,7 +428,12 @@ class DatasetSegmentApi(DatasetApiResource):
|
||||
if not segment:
|
||||
raise NotFound("Segment not found.")
|
||||
|
||||
return {"data": _marshal_segment_with_summary(segment, dataset_id_str), "doc_form": document.doc_form}, 200
|
||||
summary = SummaryIndexService.get_segment_summary(segment_id=segment.id, dataset_id=dataset_id_str)
|
||||
response = {
|
||||
"data": segment_response_with_summary(segment, summary.summary_content if summary else None),
|
||||
"doc_form": document.doc_form,
|
||||
}
|
||||
return dump_response(SegmentDetailResponse, response), 200
|
||||
|
||||
|
||||
@service_api_ns.route(
|
||||
@@ -376,9 +445,7 @@ class ChildChunkApi(DatasetApiResource):
|
||||
@service_api_ns.expect(service_api_ns.models[ChildChunkCreatePayload.__name__])
|
||||
@service_api_ns.doc("create_child_chunk")
|
||||
@service_api_ns.doc(description="Create a new child chunk for a segment")
|
||||
@service_api_ns.doc(
|
||||
params={"dataset_id": "Dataset ID", "document_id": "Document ID", "segment_id": "Parent segment ID"}
|
||||
)
|
||||
@service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_PARENT_SEGMENT)
|
||||
@service_api_ns.doc(
|
||||
responses={
|
||||
200: "Child chunk created successfully",
|
||||
@@ -386,6 +453,11 @@ class ChildChunkApi(DatasetApiResource):
|
||||
404: "Dataset, document, or segment not found",
|
||||
}
|
||||
)
|
||||
@service_api_ns.response(
|
||||
200,
|
||||
"Child chunk created successfully",
|
||||
service_api_ns.models[ChildChunkDetailResponse.__name__],
|
||||
)
|
||||
@cloud_edition_billing_resource_check("vector_space", "dataset")
|
||||
@cloud_edition_billing_knowledge_limit_check("add_segment", "dataset")
|
||||
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
|
||||
@@ -437,14 +509,12 @@ class ChildChunkApi(DatasetApiResource):
|
||||
except ChildChunkIndexingServiceError as e:
|
||||
raise ChildChunkIndexingError(str(e))
|
||||
|
||||
return {"data": marshal(child_chunk, child_chunk_fields)}, 200
|
||||
return dump_response(ChildChunkDetailResponse, {"data": child_chunk}), 200
|
||||
|
||||
@service_api_ns.expect(service_api_ns.models[ChildChunkListQuery.__name__])
|
||||
@service_api_ns.doc("list_child_chunks")
|
||||
@service_api_ns.doc(description="List child chunks for a segment")
|
||||
@service_api_ns.doc(
|
||||
params={"dataset_id": "Dataset ID", "document_id": "Document ID", "segment_id": "Parent segment ID"}
|
||||
)
|
||||
@service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_PARENT_SEGMENT)
|
||||
@service_api_ns.doc(params=query_params_from_model(ChildChunkListQuery))
|
||||
@service_api_ns.doc(
|
||||
responses={
|
||||
200: "Child chunks retrieved successfully",
|
||||
@@ -452,6 +522,11 @@ class ChildChunkApi(DatasetApiResource):
|
||||
404: "Dataset, document, or segment not found",
|
||||
}
|
||||
)
|
||||
@service_api_ns.response(
|
||||
200,
|
||||
"Child chunks retrieved successfully",
|
||||
service_api_ns.models[ChildChunkListResponse.__name__],
|
||||
)
|
||||
def get(self, tenant_id: str, dataset_id: UUID, document_id: UUID, segment_id: UUID):
|
||||
_, current_tenant_id = current_account_with_tenant()
|
||||
"""Get child chunks."""
|
||||
@@ -475,13 +550,7 @@ class ChildChunkApi(DatasetApiResource):
|
||||
if not segment:
|
||||
raise NotFound("Segment not found.")
|
||||
|
||||
args = ChildChunkListQuery.model_validate(
|
||||
{
|
||||
"limit": request.args.get("limit", default=20, type=int),
|
||||
"keyword": request.args.get("keyword"),
|
||||
"page": request.args.get("page", default=1, type=int),
|
||||
}
|
||||
)
|
||||
args = query_params_from_request(ChildChunkListQuery, use_defaults_for_malformed_ints=True)
|
||||
|
||||
page = args.page
|
||||
limit = min(args.limit, 100)
|
||||
@@ -491,13 +560,14 @@ class ChildChunkApi(DatasetApiResource):
|
||||
segment_id_str, document_id_str, dataset_id_str, page, limit, keyword
|
||||
)
|
||||
|
||||
return {
|
||||
"data": marshal(child_chunks.items, child_chunk_fields),
|
||||
response = {
|
||||
"data": child_chunks.items,
|
||||
"total": child_chunks.total,
|
||||
"total_pages": child_chunks.pages,
|
||||
"page": page,
|
||||
"limit": limit,
|
||||
}, 200
|
||||
}
|
||||
return dump_response(ChildChunkListResponse, response), 200
|
||||
|
||||
|
||||
@service_api_ns.route(
|
||||
@@ -508,14 +578,7 @@ class DatasetChildChunkApi(DatasetApiResource):
|
||||
|
||||
@service_api_ns.doc("delete_child_chunk")
|
||||
@service_api_ns.doc(description="Delete a specific child chunk")
|
||||
@service_api_ns.doc(
|
||||
params={
|
||||
"dataset_id": "Dataset ID",
|
||||
"document_id": "Document ID",
|
||||
"segment_id": "Parent segment ID",
|
||||
"child_chunk_id": "Child chunk ID to delete",
|
||||
}
|
||||
)
|
||||
@service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_CHILD_CHUNK)
|
||||
@service_api_ns.doc(
|
||||
responses={
|
||||
204: "Child chunk deleted successfully",
|
||||
@@ -549,7 +612,7 @@ class DatasetChildChunkApi(DatasetApiResource):
|
||||
raise NotFound("Segment not found.")
|
||||
|
||||
# validate segment belongs to the specified document
|
||||
if str(segment.document_id) != str(document_id_str):
|
||||
if segment.document_id != document_id_str:
|
||||
raise NotFound("Document not found.")
|
||||
|
||||
child_chunk_id_str = str(child_chunk_id)
|
||||
@@ -561,7 +624,7 @@ class DatasetChildChunkApi(DatasetApiResource):
|
||||
raise NotFound("Child chunk not found.")
|
||||
|
||||
# validate child chunk belongs to the specified segment
|
||||
if str(child_chunk.segment_id) != str(segment.id):
|
||||
if child_chunk.segment_id != segment.id:
|
||||
raise NotFound("Child chunk not found.")
|
||||
|
||||
try:
|
||||
@@ -574,14 +637,7 @@ class DatasetChildChunkApi(DatasetApiResource):
|
||||
@service_api_ns.expect(service_api_ns.models[ChildChunkUpdatePayload.__name__])
|
||||
@service_api_ns.doc("update_child_chunk")
|
||||
@service_api_ns.doc(description="Update a specific child chunk")
|
||||
@service_api_ns.doc(
|
||||
params={
|
||||
"dataset_id": "Dataset ID",
|
||||
"document_id": "Document ID",
|
||||
"segment_id": "Parent segment ID",
|
||||
"child_chunk_id": "Child chunk ID to update",
|
||||
}
|
||||
)
|
||||
@service_api_ns.doc(params=SegmentDocParams.DATASET_DOCUMENT_CHILD_CHUNK)
|
||||
@service_api_ns.doc(
|
||||
responses={
|
||||
200: "Child chunk updated successfully",
|
||||
@@ -589,6 +645,11 @@ class DatasetChildChunkApi(DatasetApiResource):
|
||||
404: "Dataset, document, segment, or child chunk not found",
|
||||
}
|
||||
)
|
||||
@service_api_ns.response(
|
||||
200,
|
||||
"Child chunk updated successfully",
|
||||
service_api_ns.models[ChildChunkDetailResponse.__name__],
|
||||
)
|
||||
@cloud_edition_billing_resource_check("vector_space", "dataset")
|
||||
@cloud_edition_billing_knowledge_limit_check("add_segment", "dataset")
|
||||
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
|
||||
@@ -616,7 +677,7 @@ class DatasetChildChunkApi(DatasetApiResource):
|
||||
raise NotFound("Segment not found.")
|
||||
|
||||
# validate segment belongs to the specified document
|
||||
if str(segment.document_id) != str(document_id_str):
|
||||
if segment.document_id != document_id_str:
|
||||
raise NotFound("Segment not found.")
|
||||
|
||||
child_chunk_id_str = str(child_chunk_id)
|
||||
@@ -628,7 +689,7 @@ class DatasetChildChunkApi(DatasetApiResource):
|
||||
raise NotFound("Child chunk not found.")
|
||||
|
||||
# validate child chunk belongs to the specified segment
|
||||
if str(child_chunk.segment_id) != str(segment.id):
|
||||
if child_chunk.segment_id != segment.id:
|
||||
raise NotFound("Child chunk not found.")
|
||||
|
||||
# validate args
|
||||
@@ -639,4 +700,4 @@ class DatasetChildChunkApi(DatasetApiResource):
|
||||
except ChildChunkIndexingServiceError as e:
|
||||
raise ChildChunkIndexingError(str(e))
|
||||
|
||||
return {"data": marshal(child_chunk, child_chunk_fields)}, 200
|
||||
return dump_response(ChildChunkDetailResponse, {"data": child_chunk}), 200
|
||||
|
||||
+105
-49
@@ -1,53 +1,109 @@
|
||||
from flask_restx import fields
|
||||
from collections.abc import Iterable, Mapping
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from libs.helper import TimestampField
|
||||
from pydantic import field_serializer
|
||||
|
||||
child_chunk_fields = {
|
||||
"id": fields.String,
|
||||
"segment_id": fields.String,
|
||||
"content": fields.String,
|
||||
"position": fields.Integer,
|
||||
"word_count": fields.Integer,
|
||||
"type": fields.String,
|
||||
"created_at": TimestampField,
|
||||
"updated_at": TimestampField,
|
||||
}
|
||||
from fields.base import ResponseModel
|
||||
from libs.helper import to_timestamp
|
||||
|
||||
attachment_fields = {
|
||||
"id": fields.String,
|
||||
"name": fields.String,
|
||||
"size": fields.Integer,
|
||||
"extension": fields.String,
|
||||
"mime_type": fields.String,
|
||||
"source_url": fields.String,
|
||||
}
|
||||
|
||||
segment_fields = {
|
||||
"id": fields.String,
|
||||
"position": fields.Integer,
|
||||
"document_id": fields.String,
|
||||
"content": fields.String,
|
||||
"sign_content": fields.String,
|
||||
"answer": fields.String,
|
||||
"word_count": fields.Integer,
|
||||
"tokens": fields.Integer,
|
||||
"keywords": fields.List(fields.String),
|
||||
"index_node_id": fields.String,
|
||||
"index_node_hash": fields.String,
|
||||
"hit_count": fields.Integer,
|
||||
"enabled": fields.Boolean,
|
||||
"disabled_at": TimestampField,
|
||||
"disabled_by": fields.String,
|
||||
"status": fields.String,
|
||||
"created_by": fields.String,
|
||||
"created_at": TimestampField,
|
||||
"updated_at": TimestampField,
|
||||
"updated_by": fields.String,
|
||||
"indexing_at": TimestampField,
|
||||
"completed_at": TimestampField,
|
||||
"error": fields.String,
|
||||
"stopped_at": TimestampField,
|
||||
"child_chunks": fields.List(fields.Nested(child_chunk_fields)),
|
||||
"attachments": fields.List(fields.Nested(attachment_fields)),
|
||||
"summary": fields.String, # Summary content for the segment
|
||||
}
|
||||
class SegmentAttachmentResponse(ResponseModel):
|
||||
id: str
|
||||
name: str
|
||||
size: int
|
||||
extension: str
|
||||
mime_type: str | None
|
||||
source_url: str
|
||||
|
||||
|
||||
class ChildChunkResponse(ResponseModel):
|
||||
id: str
|
||||
segment_id: str
|
||||
content: str
|
||||
position: int
|
||||
word_count: int
|
||||
type: str
|
||||
created_at: datetime | int
|
||||
updated_at: datetime | int
|
||||
|
||||
@field_serializer("created_at", "updated_at")
|
||||
def serialize_timestamp(self, value: datetime | int) -> int:
|
||||
return to_timestamp(value)
|
||||
|
||||
|
||||
class SegmentResponse(ResponseModel):
|
||||
id: str
|
||||
position: int
|
||||
document_id: str
|
||||
content: str
|
||||
sign_content: str
|
||||
answer: str | None
|
||||
word_count: int
|
||||
tokens: int
|
||||
keywords: list[str] | None
|
||||
index_node_id: str | None
|
||||
index_node_hash: str | None
|
||||
hit_count: int
|
||||
enabled: bool
|
||||
disabled_at: datetime | int | None
|
||||
disabled_by: str | None
|
||||
status: str
|
||||
created_by: str
|
||||
created_at: datetime | int
|
||||
updated_at: datetime | int
|
||||
updated_by: str | None
|
||||
indexing_at: datetime | int | None
|
||||
completed_at: datetime | int | None
|
||||
error: str | None
|
||||
stopped_at: datetime | int | None
|
||||
child_chunks: list[ChildChunkResponse]
|
||||
attachments: list[SegmentAttachmentResponse]
|
||||
summary: str | None
|
||||
|
||||
@field_serializer("created_at", "updated_at")
|
||||
def serialize_required_timestamp(self, value: datetime | int) -> int:
|
||||
return to_timestamp(value)
|
||||
|
||||
@field_serializer("disabled_at", "indexing_at", "completed_at", "stopped_at")
|
||||
def serialize_optional_timestamp(self, value: datetime | int | None) -> int | None:
|
||||
return to_timestamp(value)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SegmentWithSummary:
|
||||
segment: Any
|
||||
summary: str | None
|
||||
|
||||
def __getattr__(self, name: str) -> Any:
|
||||
return getattr(self.segment, name)
|
||||
|
||||
|
||||
def segment_response_with_summary(segment: Any, summary: str | None) -> SegmentResponse:
|
||||
response_source = SegmentWithSummary(segment=segment, summary=summary)
|
||||
return SegmentResponse.model_validate(response_source, from_attributes=True)
|
||||
|
||||
|
||||
def segment_responses_with_summaries(
|
||||
segments: Iterable[Any],
|
||||
summaries: Mapping[str, str | None],
|
||||
) -> list[SegmentResponse]:
|
||||
return [segment_response_with_summary(segment, summaries.get(segment.id)) for segment in segments]
|
||||
|
||||
|
||||
class SegmentDetailResponse(ResponseModel):
|
||||
data: SegmentResponse
|
||||
doc_form: str
|
||||
|
||||
|
||||
class ChildChunkDetailResponse(ResponseModel):
|
||||
data: ChildChunkResponse
|
||||
|
||||
|
||||
class ChildChunkListResponse(ResponseModel):
|
||||
data: list[ChildChunkResponse]
|
||||
total: int
|
||||
total_pages: int
|
||||
page: int
|
||||
limit: int
|
||||
|
||||
@@ -5175,15 +5175,15 @@ Update document processing status (pause/resume)
|
||||
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| dataset_id | path | | Yes | string |
|
||||
| document_id | path | | Yes | string |
|
||||
| payload | body | | Yes | [SegmentCreatePayload](#segmentcreatepayload) |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
|
||||
##### Responses
|
||||
|
||||
| Code | Description |
|
||||
| ---- | ----------- |
|
||||
| 200 | Success |
|
||||
| Code | Description | Schema |
|
||||
| ---- | ----------- | ------ |
|
||||
| 200 | Segment created successfully | [SegmentDetailResponse](#segmentdetailresponse) |
|
||||
|
||||
### /datasets/{dataset_id}/documents/{document_id}/segment/{action}
|
||||
|
||||
@@ -5192,9 +5192,10 @@ Update document processing status (pause/resume)
|
||||
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| action | path | | Yes | string |
|
||||
| dataset_id | path | | Yes | string |
|
||||
| document_id | path | | Yes | string |
|
||||
| action | path | Action | Yes | string |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| segment_id | query | Segment IDs | No | [ string ] |
|
||||
|
||||
##### Responses
|
||||
|
||||
@@ -5209,8 +5210,9 @@ Update document processing status (pause/resume)
|
||||
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| dataset_id | path | | Yes | string |
|
||||
| document_id | path | | Yes | string |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| segment_id | query | Segment IDs | No | [ string ] |
|
||||
|
||||
##### Responses
|
||||
|
||||
@@ -5223,14 +5225,20 @@ Update document processing status (pause/resume)
|
||||
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| dataset_id | path | | Yes | string |
|
||||
| document_id | path | | Yes | string |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| enabled | query | | No | string |
|
||||
| hit_count_gte | query | | No | integer |
|
||||
| keyword | query | | No | string |
|
||||
| limit | query | | No | integer |
|
||||
| page | query | | No | integer |
|
||||
| status | query | | No | [ string ] |
|
||||
|
||||
##### Responses
|
||||
|
||||
| Code | Description |
|
||||
| ---- | ----------- |
|
||||
| 200 | Success |
|
||||
| Code | Description | Schema |
|
||||
| ---- | ----------- | ------ |
|
||||
| 200 | Segments retrieved successfully | [ConsoleSegmentListResponse](#consolesegmentlistresponse) |
|
||||
|
||||
### /datasets/{dataset_id}/documents/{document_id}/segments/batch_import
|
||||
|
||||
@@ -5270,9 +5278,9 @@ Update document processing status (pause/resume)
|
||||
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| dataset_id | path | | Yes | string |
|
||||
| document_id | path | | Yes | string |
|
||||
| segment_id | path | | Yes | string |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| segment_id | path | Segment ID | Yes | string |
|
||||
|
||||
##### Responses
|
||||
|
||||
@@ -5285,16 +5293,16 @@ Update document processing status (pause/resume)
|
||||
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| dataset_id | path | | Yes | string |
|
||||
| document_id | path | | Yes | string |
|
||||
| segment_id | path | | Yes | string |
|
||||
| payload | body | | Yes | [SegmentUpdatePayload](#segmentupdatepayload) |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| segment_id | path | Segment ID | Yes | string |
|
||||
|
||||
##### Responses
|
||||
|
||||
| Code | Description |
|
||||
| ---- | ----------- |
|
||||
| 200 | Success |
|
||||
| Code | Description | Schema |
|
||||
| ---- | ----------- | ------ |
|
||||
| 200 | Segment updated successfully | [SegmentDetailResponse](#segmentdetailresponse) |
|
||||
|
||||
### /datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks
|
||||
|
||||
@@ -5303,46 +5311,50 @@ Update document processing status (pause/resume)
|
||||
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| dataset_id | path | | Yes | string |
|
||||
| document_id | path | | Yes | string |
|
||||
| segment_id | path | | Yes | string |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| segment_id | path | Parent segment ID | Yes | string |
|
||||
| keyword | query | | No | string |
|
||||
| limit | query | | No | integer |
|
||||
| page | query | | No | integer |
|
||||
|
||||
##### Responses
|
||||
|
||||
| Code | Description |
|
||||
| ---- | ----------- |
|
||||
| 200 | Success |
|
||||
| Code | Description | Schema |
|
||||
| ---- | ----------- | ------ |
|
||||
| 200 | Child chunks retrieved successfully | [ChildChunkListResponse](#childchunklistresponse) |
|
||||
|
||||
#### PATCH
|
||||
##### Parameters
|
||||
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| dataset_id | path | | Yes | string |
|
||||
| document_id | path | | Yes | string |
|
||||
| segment_id | path | | Yes | string |
|
||||
| payload | body | | Yes | [ChildChunkBatchUpdatePayload](#childchunkbatchupdatepayload) |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| segment_id | path | Parent segment ID | Yes | string |
|
||||
|
||||
##### Responses
|
||||
|
||||
| Code | Description |
|
||||
| ---- | ----------- |
|
||||
| 200 | Success |
|
||||
| Code | Description | Schema |
|
||||
| ---- | ----------- | ------ |
|
||||
| 200 | Child chunks updated successfully | [ChildChunkBatchUpdateResponse](#childchunkbatchupdateresponse) |
|
||||
|
||||
#### POST
|
||||
##### Parameters
|
||||
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| dataset_id | path | | Yes | string |
|
||||
| document_id | path | | Yes | string |
|
||||
| segment_id | path | | Yes | string |
|
||||
| payload | body | | Yes | [ChildChunkCreatePayload](#childchunkcreatepayload) |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| segment_id | path | Parent segment ID | Yes | string |
|
||||
|
||||
##### Responses
|
||||
|
||||
| Code | Description |
|
||||
| ---- | ----------- |
|
||||
| 200 | Success |
|
||||
| Code | Description | Schema |
|
||||
| ---- | ----------- | ------ |
|
||||
| 200 | Child chunk created successfully | [ChildChunkDetailResponse](#childchunkdetailresponse) |
|
||||
|
||||
### /datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}
|
||||
|
||||
@@ -5351,10 +5363,10 @@ Update document processing status (pause/resume)
|
||||
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| child_chunk_id | path | | Yes | string |
|
||||
| dataset_id | path | | Yes | string |
|
||||
| document_id | path | | Yes | string |
|
||||
| segment_id | path | | Yes | string |
|
||||
| child_chunk_id | path | Child chunk ID | Yes | string |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| segment_id | path | Parent segment ID | Yes | string |
|
||||
|
||||
##### Responses
|
||||
|
||||
@@ -5367,17 +5379,17 @@ Update document processing status (pause/resume)
|
||||
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| child_chunk_id | path | | Yes | string |
|
||||
| dataset_id | path | | Yes | string |
|
||||
| document_id | path | | Yes | string |
|
||||
| segment_id | path | | Yes | string |
|
||||
| payload | body | | Yes | [ChildChunkUpdatePayload](#childchunkupdatepayload) |
|
||||
| child_chunk_id | path | Child chunk ID | Yes | string |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| segment_id | path | Parent segment ID | Yes | string |
|
||||
|
||||
##### Responses
|
||||
|
||||
| Code | Description |
|
||||
| ---- | ----------- |
|
||||
| 200 | Success |
|
||||
| Code | Description | Schema |
|
||||
| ---- | ----------- | ------ |
|
||||
| 200 | Child chunk updated successfully | [ChildChunkDetailResponse](#childchunkdetailresponse) |
|
||||
|
||||
### /datasets/{dataset_id}/documents/{document_id}/summary-status
|
||||
|
||||
@@ -11718,12 +11730,55 @@ Button styles for user actions.
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| chunks | [ [ChildChunkUpdateArgs](#childchunkupdateargs) ] | | Yes |
|
||||
|
||||
#### ChildChunkBatchUpdateResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| data | [ [ChildChunkResponse](#childchunkresponse) ] | | Yes |
|
||||
|
||||
#### ChildChunkCreatePayload
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| content | string | | Yes |
|
||||
|
||||
#### ChildChunkDetailResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| data | [ChildChunkResponse](#childchunkresponse) | | Yes |
|
||||
|
||||
#### ChildChunkListQuery
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| keyword | string | | No |
|
||||
| limit | integer | | No |
|
||||
| page | integer | | No |
|
||||
|
||||
#### ChildChunkListResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| data | [ [ChildChunkResponse](#childchunkresponse) ] | | Yes |
|
||||
| limit | integer | | Yes |
|
||||
| page | integer | | Yes |
|
||||
| total | integer | | Yes |
|
||||
| total_pages | integer | | Yes |
|
||||
|
||||
#### ChildChunkResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| content | string | | Yes |
|
||||
| created_at | integer | | Yes |
|
||||
| id | string | | Yes |
|
||||
| position | integer | | Yes |
|
||||
| segment_id | string | | Yes |
|
||||
| type | string | | Yes |
|
||||
| updated_at | integer | | Yes |
|
||||
| word_count | integer | | Yes |
|
||||
|
||||
#### ChildChunkUpdateArgs
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
@@ -11861,6 +11916,16 @@ Condition detail
|
||||
| page | integer | Page number | No |
|
||||
| tag_ids | [ string ] | Filter by tag IDs | No |
|
||||
|
||||
#### ConsoleSegmentListResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| data | [ [SegmentResponse](#segmentresponse) ] | | Yes |
|
||||
| limit | integer | | Yes |
|
||||
| page | integer | | Yes |
|
||||
| total | integer | | Yes |
|
||||
| total_pages | integer | | Yes |
|
||||
|
||||
#### Conversation
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
@@ -14865,6 +14930,17 @@ Form input definition.
|
||||
| last_id | string | | No |
|
||||
| limit | integer | | No |
|
||||
|
||||
#### SegmentAttachmentResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| extension | string | | Yes |
|
||||
| id | string | | Yes |
|
||||
| mime_type | string | | Yes |
|
||||
| name | string | | Yes |
|
||||
| size | integer | | Yes |
|
||||
| source_url | string | | Yes |
|
||||
|
||||
#### SegmentBatchImportStatusResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
@@ -14881,6 +14957,19 @@ Form input definition.
|
||||
| content | string | | Yes |
|
||||
| keywords | [ string ] | | No |
|
||||
|
||||
#### SegmentDetailResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| data | [SegmentResponse](#segmentresponse) | | Yes |
|
||||
| doc_form | string | | Yes |
|
||||
|
||||
#### SegmentIdListQuery
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| segment_id | [ string ] | Segment IDs | No |
|
||||
|
||||
#### SegmentListQuery
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
@@ -14892,6 +14981,38 @@ Form input definition.
|
||||
| page | integer | | No |
|
||||
| status | [ string ] | | No |
|
||||
|
||||
#### SegmentResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| answer | string | | Yes |
|
||||
| attachments | [ [SegmentAttachmentResponse](#segmentattachmentresponse) ] | | Yes |
|
||||
| child_chunks | [ [ChildChunkResponse](#childchunkresponse) ] | | Yes |
|
||||
| completed_at | integer | | Yes |
|
||||
| content | string | | Yes |
|
||||
| created_at | integer | | Yes |
|
||||
| created_by | string | | Yes |
|
||||
| disabled_at | integer | | Yes |
|
||||
| disabled_by | string | | Yes |
|
||||
| document_id | string | | Yes |
|
||||
| enabled | boolean | | Yes |
|
||||
| error | string | | Yes |
|
||||
| hit_count | integer | | Yes |
|
||||
| id | string | | Yes |
|
||||
| index_node_hash | string | | Yes |
|
||||
| index_node_id | string | | Yes |
|
||||
| indexing_at | integer | | Yes |
|
||||
| keywords | [ string ] | | Yes |
|
||||
| position | integer | | Yes |
|
||||
| sign_content | string | | Yes |
|
||||
| status | string | | Yes |
|
||||
| stopped_at | integer | | Yes |
|
||||
| summary | string | | Yes |
|
||||
| tokens | integer | | Yes |
|
||||
| updated_at | integer | | Yes |
|
||||
| updated_by | string | | Yes |
|
||||
| word_count | integer | | Yes |
|
||||
|
||||
#### SegmentUpdatePayload
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
|
||||
@@ -1064,17 +1064,20 @@ List segments in a document
|
||||
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| payload | body | | Yes | [SegmentListQuery](#segmentlistquery) |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| keyword | query | | No | string |
|
||||
| limit | query | | No | integer |
|
||||
| page | query | | No | integer |
|
||||
| status | query | | No | [ string ] |
|
||||
|
||||
##### Responses
|
||||
|
||||
| Code | Description |
|
||||
| ---- | ----------- |
|
||||
| 200 | Segments retrieved successfully |
|
||||
| 401 | Unauthorized - invalid API token |
|
||||
| 404 | Dataset or document not found |
|
||||
| Code | Description | Schema |
|
||||
| ---- | ----------- | ------ |
|
||||
| 200 | Segments retrieved successfully | [SegmentListResponse](#segmentlistresponse) |
|
||||
| 401 | Unauthorized - invalid API token | |
|
||||
| 404 | Dataset or document not found | |
|
||||
|
||||
#### POST
|
||||
##### Description
|
||||
@@ -1091,12 +1094,12 @@ Create segments in a document
|
||||
|
||||
##### Responses
|
||||
|
||||
| Code | Description |
|
||||
| ---- | ----------- |
|
||||
| 200 | Segments created successfully |
|
||||
| 400 | Bad request - segments data is missing |
|
||||
| 401 | Unauthorized - invalid API token |
|
||||
| 404 | Dataset or document not found |
|
||||
| Code | Description | Schema |
|
||||
| ---- | ----------- | ------ |
|
||||
| 200 | Segments created successfully | [SegmentCreateListResponse](#segmentcreatelistresponse) |
|
||||
| 400 | Bad request - segments data is missing | |
|
||||
| 401 | Unauthorized - invalid API token | |
|
||||
| 404 | Dataset or document not found | |
|
||||
|
||||
### /datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}
|
||||
|
||||
@@ -1111,7 +1114,7 @@ Delete a specific segment
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| segment_id | path | Segment ID to delete | Yes | string |
|
||||
| segment_id | path | Segment ID | Yes | string |
|
||||
|
||||
##### Responses
|
||||
|
||||
@@ -1130,17 +1133,17 @@ Get a specific segment by ID
|
||||
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| dataset_id | path | | Yes | string |
|
||||
| document_id | path | | Yes | string |
|
||||
| segment_id | path | | Yes | string |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| segment_id | path | Segment ID | Yes | string |
|
||||
|
||||
##### Responses
|
||||
|
||||
| Code | Description |
|
||||
| ---- | ----------- |
|
||||
| 200 | Segment retrieved successfully |
|
||||
| 401 | Unauthorized - invalid API token |
|
||||
| 404 | Dataset, document, or segment not found |
|
||||
| Code | Description | Schema |
|
||||
| ---- | ----------- | ------ |
|
||||
| 200 | Segment retrieved successfully | [SegmentDetailResponse](#segmentdetailresponse) |
|
||||
| 401 | Unauthorized - invalid API token | |
|
||||
| 404 | Dataset, document, or segment not found | |
|
||||
|
||||
#### POST
|
||||
##### Description
|
||||
@@ -1154,15 +1157,15 @@ Update a specific segment
|
||||
| payload | body | | Yes | [SegmentUpdatePayload](#segmentupdatepayload) |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| segment_id | path | Segment ID to update | Yes | string |
|
||||
| segment_id | path | Segment ID | Yes | string |
|
||||
|
||||
##### Responses
|
||||
|
||||
| Code | Description |
|
||||
| ---- | ----------- |
|
||||
| 200 | Segment updated successfully |
|
||||
| 401 | Unauthorized - invalid API token |
|
||||
| 404 | Dataset, document, or segment not found |
|
||||
| Code | Description | Schema |
|
||||
| ---- | ----------- | ------ |
|
||||
| 200 | Segment updated successfully | [SegmentDetailResponse](#segmentdetailresponse) |
|
||||
| 401 | Unauthorized - invalid API token | |
|
||||
| 404 | Dataset, document, or segment not found | |
|
||||
|
||||
### /datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks
|
||||
|
||||
@@ -1175,18 +1178,20 @@ List child chunks for a segment
|
||||
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| payload | body | | Yes | [ChildChunkListQuery](#childchunklistquery) |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| segment_id | path | Parent segment ID | Yes | string |
|
||||
| keyword | query | | No | string |
|
||||
| limit | query | | No | integer |
|
||||
| page | query | | No | integer |
|
||||
|
||||
##### Responses
|
||||
|
||||
| Code | Description |
|
||||
| ---- | ----------- |
|
||||
| 200 | Child chunks retrieved successfully |
|
||||
| 401 | Unauthorized - invalid API token |
|
||||
| 404 | Dataset, document, or segment not found |
|
||||
| Code | Description | Schema |
|
||||
| ---- | ----------- | ------ |
|
||||
| 200 | Child chunks retrieved successfully | [ChildChunkListResponse](#childchunklistresponse) |
|
||||
| 401 | Unauthorized - invalid API token | |
|
||||
| 404 | Dataset, document, or segment not found | |
|
||||
|
||||
#### POST
|
||||
##### Description
|
||||
@@ -1204,11 +1209,11 @@ Create a new child chunk for a segment
|
||||
|
||||
##### Responses
|
||||
|
||||
| Code | Description |
|
||||
| ---- | ----------- |
|
||||
| 200 | Child chunk created successfully |
|
||||
| 401 | Unauthorized - invalid API token |
|
||||
| 404 | Dataset, document, or segment not found |
|
||||
| Code | Description | Schema |
|
||||
| ---- | ----------- | ------ |
|
||||
| 200 | Child chunk created successfully | [ChildChunkDetailResponse](#childchunkdetailresponse) |
|
||||
| 401 | Unauthorized - invalid API token | |
|
||||
| 404 | Dataset, document, or segment not found | |
|
||||
|
||||
### /datasets/{dataset_id}/documents/{document_id}/segments/{segment_id}/child_chunks/{child_chunk_id}
|
||||
|
||||
@@ -1221,7 +1226,7 @@ Delete a specific child chunk
|
||||
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| child_chunk_id | path | Child chunk ID to delete | Yes | string |
|
||||
| child_chunk_id | path | Child chunk ID | Yes | string |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| segment_id | path | Parent segment ID | Yes | string |
|
||||
@@ -1244,18 +1249,18 @@ Update a specific child chunk
|
||||
| Name | Located in | Description | Required | Schema |
|
||||
| ---- | ---------- | ----------- | -------- | ------ |
|
||||
| payload | body | | Yes | [ChildChunkUpdatePayload](#childchunkupdatepayload) |
|
||||
| child_chunk_id | path | Child chunk ID to update | Yes | string |
|
||||
| child_chunk_id | path | Child chunk ID | Yes | string |
|
||||
| dataset_id | path | Dataset ID | Yes | string |
|
||||
| document_id | path | Document ID | Yes | string |
|
||||
| segment_id | path | Parent segment ID | Yes | string |
|
||||
|
||||
##### Responses
|
||||
|
||||
| Code | Description |
|
||||
| ---- | ----------- |
|
||||
| 200 | Child chunk updated successfully |
|
||||
| 401 | Unauthorized - invalid API token |
|
||||
| 404 | Dataset, document, segment, or child chunk not found |
|
||||
| Code | Description | Schema |
|
||||
| ---- | ----------- | ------ |
|
||||
| 200 | Child chunk updated successfully | [ChildChunkDetailResponse](#childchunkdetailresponse) |
|
||||
| 401 | Unauthorized - invalid API token | |
|
||||
| 404 | Dataset, document, segment, or child chunk not found | |
|
||||
|
||||
### /datasets/{dataset_id}/documents/{document_id}/update-by-file
|
||||
|
||||
@@ -2222,6 +2227,12 @@ Returns a list of available models for the specified model type.
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| content | string | | Yes |
|
||||
|
||||
#### ChildChunkDetailResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| data | [ChildChunkResponse](#childchunkresponse) | | Yes |
|
||||
|
||||
#### ChildChunkListQuery
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
@@ -2230,6 +2241,29 @@ Returns a list of available models for the specified model type.
|
||||
| limit | integer | | No |
|
||||
| page | integer | | No |
|
||||
|
||||
#### ChildChunkListResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| data | [ [ChildChunkResponse](#childchunkresponse) ] | | Yes |
|
||||
| limit | integer | | Yes |
|
||||
| page | integer | | Yes |
|
||||
| total | integer | | Yes |
|
||||
| total_pages | integer | | Yes |
|
||||
|
||||
#### ChildChunkResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| content | string | | Yes |
|
||||
| created_at | integer | | Yes |
|
||||
| id | string | | Yes |
|
||||
| position | integer | | Yes |
|
||||
| segment_id | string | | Yes |
|
||||
| type | string | | Yes |
|
||||
| updated_at | integer | | Yes |
|
||||
| word_count | integer | | Yes |
|
||||
|
||||
#### ChildChunkUpdatePayload
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
@@ -2954,19 +2988,98 @@ Metadata operation data
|
||||
| segmentation | [Segmentation](#segmentation) | | No |
|
||||
| subchunk_segmentation | [Segmentation](#segmentation) | | No |
|
||||
|
||||
#### SegmentAttachmentResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| extension | string | | Yes |
|
||||
| id | string | | Yes |
|
||||
| mime_type | string | | Yes |
|
||||
| name | string | | Yes |
|
||||
| size | integer | | Yes |
|
||||
| source_url | string | | Yes |
|
||||
|
||||
#### SegmentCreateItemPayload
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| answer | string | | No |
|
||||
| attachment_ids | [ string ] | | No |
|
||||
| content | string | | Yes |
|
||||
| keywords | [ string ] | | No |
|
||||
|
||||
#### SegmentCreateListResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| data | [ [SegmentResponse](#segmentresponse) ] | | Yes |
|
||||
| doc_form | string | | Yes |
|
||||
|
||||
#### SegmentCreatePayload
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| segments | [ object ] | | No |
|
||||
| segments | [ [SegmentCreateItemPayload](#segmentcreateitempayload) ] | | Yes |
|
||||
|
||||
#### SegmentDetailResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| data | [SegmentResponse](#segmentresponse) | | Yes |
|
||||
| doc_form | string | | Yes |
|
||||
|
||||
#### SegmentListQuery
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| keyword | string | | No |
|
||||
| limit | integer | | No |
|
||||
| page | integer | | No |
|
||||
| status | [ string ] | | No |
|
||||
|
||||
#### SegmentListResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| data | [ [SegmentResponse](#segmentresponse) ] | | Yes |
|
||||
| doc_form | string | | Yes |
|
||||
| has_more | boolean | | Yes |
|
||||
| limit | integer | | Yes |
|
||||
| page | integer | | Yes |
|
||||
| total | integer | | Yes |
|
||||
|
||||
#### SegmentResponse
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
| ---- | ---- | ----------- | -------- |
|
||||
| answer | string | | Yes |
|
||||
| attachments | [ [SegmentAttachmentResponse](#segmentattachmentresponse) ] | | Yes |
|
||||
| child_chunks | [ [ChildChunkResponse](#childchunkresponse) ] | | Yes |
|
||||
| completed_at | integer | | Yes |
|
||||
| content | string | | Yes |
|
||||
| created_at | integer | | Yes |
|
||||
| created_by | string | | Yes |
|
||||
| disabled_at | integer | | Yes |
|
||||
| disabled_by | string | | Yes |
|
||||
| document_id | string | | Yes |
|
||||
| enabled | boolean | | Yes |
|
||||
| error | string | | Yes |
|
||||
| hit_count | integer | | Yes |
|
||||
| id | string | | Yes |
|
||||
| index_node_hash | string | | Yes |
|
||||
| index_node_id | string | | Yes |
|
||||
| indexing_at | integer | | Yes |
|
||||
| keywords | [ string ] | | Yes |
|
||||
| position | integer | | Yes |
|
||||
| sign_content | string | | Yes |
|
||||
| status | string | | Yes |
|
||||
| stopped_at | integer | | Yes |
|
||||
| summary | string | | Yes |
|
||||
| tokens | integer | | Yes |
|
||||
| updated_at | integer | | Yes |
|
||||
| updated_by | string | | Yes |
|
||||
| word_count | integer | | Yes |
|
||||
|
||||
#### SegmentUpdateArgs
|
||||
|
||||
| Name | Type | Description | Required |
|
||||
|
||||
+97
@@ -0,0 +1,97 @@
|
||||
"""DB-backed integration tests for console dataset segment endpoints."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import uuid4
|
||||
|
||||
from flask.testing import FlaskClient
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
|
||||
from models.dataset import Dataset, Document, DocumentSegment, DocumentSegmentSummary
|
||||
from models.enums import DataSourceType, DocumentCreatedFrom, IndexingStatus, SegmentStatus, SummaryStatus
|
||||
from tests.test_containers_integration_tests.controllers.console.helpers import (
|
||||
authenticate_console_client,
|
||||
create_console_account_and_tenant,
|
||||
)
|
||||
|
||||
|
||||
def test_list_segments_uses_real_db_query_and_console_response_shape(
|
||||
test_client_with_containers: FlaskClient,
|
||||
db_session_with_containers: Session,
|
||||
) -> None:
|
||||
account, tenant = create_console_account_and_tenant(db_session_with_containers)
|
||||
dataset = Dataset(
|
||||
tenant_id=tenant.id,
|
||||
name=f"Console Segment Dataset {uuid4()}",
|
||||
description="Console segment integration dataset",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
indexing_technique=IndexTechniqueType.ECONOMY,
|
||||
created_by=account.id,
|
||||
permission="only_me",
|
||||
provider="vendor",
|
||||
)
|
||||
db_session_with_containers.add(dataset)
|
||||
db_session_with_containers.commit()
|
||||
|
||||
document = Document(
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
position=1,
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch=f"batch-{uuid4()}",
|
||||
name="console-segment-doc.txt",
|
||||
created_from=DocumentCreatedFrom.WEB,
|
||||
created_by=account.id,
|
||||
enabled=True,
|
||||
archived=False,
|
||||
indexing_status=IndexingStatus.COMPLETED,
|
||||
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||
word_count=3,
|
||||
tokens=4,
|
||||
)
|
||||
db_session_with_containers.add(document)
|
||||
db_session_with_containers.commit()
|
||||
|
||||
segment = DocumentSegment(
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
position=1,
|
||||
content="Console integration segment",
|
||||
word_count=3,
|
||||
tokens=4,
|
||||
keywords=["console", "integration"],
|
||||
status=SegmentStatus.COMPLETED,
|
||||
created_by=account.id,
|
||||
)
|
||||
db_session_with_containers.add(segment)
|
||||
db_session_with_containers.commit()
|
||||
segment_id = segment.id
|
||||
|
||||
db_session_with_containers.add(
|
||||
DocumentSegmentSummary(
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
chunk_id=segment.id,
|
||||
summary_content="Console DB summary",
|
||||
status=SummaryStatus.COMPLETED,
|
||||
)
|
||||
)
|
||||
db_session_with_containers.commit()
|
||||
|
||||
response = test_client_with_containers.get(
|
||||
f"/console/api/datasets/{dataset.id}/documents/{document.id}/segments"
|
||||
"?page=1&limit=10&status=completed&keyword=integration&enabled=all",
|
||||
headers=authenticate_console_client(test_client_with_containers, account),
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
body = response.get_json()
|
||||
assert set(body) == {"data", "limit", "total", "total_pages", "page"}
|
||||
assert body["limit"] == 10
|
||||
assert body["total"] == 1
|
||||
assert body["total_pages"] == 1
|
||||
assert "has_more" not in body
|
||||
assert body["data"][0]["id"] == segment_id
|
||||
assert body["data"][0]["summary"] == "Console DB summary"
|
||||
+153
@@ -0,0 +1,153 @@
|
||||
"""DB-backed integration tests for service API dataset segment endpoints."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from uuid import uuid4
|
||||
|
||||
from flask.testing import FlaskClient
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType, IndexTechniqueType
|
||||
from models.dataset import ChildChunk, Dataset, Document, DocumentSegment, DocumentSegmentSummary
|
||||
from models.enums import (
|
||||
ApiTokenType,
|
||||
DataSourceType,
|
||||
DocumentCreatedFrom,
|
||||
IndexingStatus,
|
||||
SegmentStatus,
|
||||
SegmentType,
|
||||
SummaryStatus,
|
||||
)
|
||||
from models.model import ApiToken
|
||||
from tests.test_containers_integration_tests.controllers.console.helpers import create_console_account_and_tenant
|
||||
|
||||
|
||||
def _create_dataset_graph(db_session: Session) -> tuple[Dataset, Document, DocumentSegment]:
|
||||
account, tenant = create_console_account_and_tenant(db_session)
|
||||
dataset = Dataset(
|
||||
tenant_id=tenant.id,
|
||||
name=f"Segment Dataset {uuid4()}",
|
||||
description="Segment integration dataset",
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
indexing_technique=IndexTechniqueType.ECONOMY,
|
||||
created_by=account.id,
|
||||
permission="only_me",
|
||||
provider="vendor",
|
||||
enable_api=True,
|
||||
)
|
||||
db_session.add(dataset)
|
||||
db_session.commit()
|
||||
|
||||
document = Document(
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
position=1,
|
||||
data_source_type=DataSourceType.UPLOAD_FILE,
|
||||
batch=f"batch-{uuid4()}",
|
||||
name="segment-doc.txt",
|
||||
created_from=DocumentCreatedFrom.API,
|
||||
created_by=account.id,
|
||||
enabled=True,
|
||||
archived=False,
|
||||
indexing_status=IndexingStatus.COMPLETED,
|
||||
doc_form=IndexStructureType.PARAGRAPH_INDEX,
|
||||
word_count=4,
|
||||
tokens=5,
|
||||
)
|
||||
db_session.add(document)
|
||||
db_session.commit()
|
||||
|
||||
segment = DocumentSegment(
|
||||
tenant_id=tenant.id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
position=1,
|
||||
content="Segment content for integration",
|
||||
word_count=4,
|
||||
tokens=5,
|
||||
keywords=["segment", "integration"],
|
||||
status=SegmentStatus.COMPLETED,
|
||||
created_by=account.id,
|
||||
)
|
||||
db_session.add(segment)
|
||||
db_session.commit()
|
||||
|
||||
summary = DocumentSegmentSummary(
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
chunk_id=segment.id,
|
||||
summary_content="DB summary",
|
||||
status=SummaryStatus.COMPLETED,
|
||||
)
|
||||
db_session.add(summary)
|
||||
|
||||
api_token = ApiToken(
|
||||
tenant_id=tenant.id,
|
||||
type=ApiTokenType.DATASET,
|
||||
token=f"dataset-{uuid4().hex}",
|
||||
)
|
||||
db_session.add(api_token)
|
||||
db_session.commit()
|
||||
return dataset, document, segment
|
||||
|
||||
|
||||
def _auth_headers(db_session: Session, dataset: Dataset) -> dict[str, str]:
|
||||
token = db_session.query(ApiToken).filter_by(tenant_id=dataset.tenant_id, type=ApiTokenType.DATASET).one()
|
||||
return {"Authorization": f"Bearer {token.token}"}
|
||||
|
||||
|
||||
def test_list_segments_uses_real_services_and_service_api_shape(
|
||||
test_client_with_containers: FlaskClient,
|
||||
db_session_with_containers: Session,
|
||||
) -> None:
|
||||
dataset, document, segment = _create_dataset_graph(db_session_with_containers)
|
||||
segment_id = segment.id
|
||||
|
||||
response = test_client_with_containers.get(
|
||||
f"/v1/datasets/{dataset.id}/documents/{document.id}/segments"
|
||||
"?page=1&limit=20&status=completed&keyword=integration",
|
||||
headers=_auth_headers(db_session_with_containers, dataset),
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
body = response.get_json()
|
||||
assert set(body) == {"data", "doc_form", "total", "has_more", "limit", "page"}
|
||||
assert body["doc_form"] == "text_model"
|
||||
assert body["total"] == 1
|
||||
assert "total_pages" not in body
|
||||
assert body["data"][0]["id"] == segment_id
|
||||
assert body["data"][0]["summary"] == "DB summary"
|
||||
assert body["data"][0]["attachments"] == []
|
||||
assert body["data"][0]["child_chunks"] == []
|
||||
|
||||
|
||||
def test_list_child_chunks_uses_real_segment_service(
|
||||
test_client_with_containers: FlaskClient,
|
||||
db_session_with_containers: Session,
|
||||
) -> None:
|
||||
dataset, document, segment = _create_dataset_graph(db_session_with_containers)
|
||||
child_chunk = ChildChunk(
|
||||
tenant_id=dataset.tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
document_id=document.id,
|
||||
segment_id=segment.id,
|
||||
position=1,
|
||||
content="Child integration content",
|
||||
word_count=3,
|
||||
type=SegmentType.CUSTOMIZED,
|
||||
created_by=document.created_by,
|
||||
)
|
||||
db_session_with_containers.add(child_chunk)
|
||||
db_session_with_containers.commit()
|
||||
|
||||
response = test_client_with_containers.get(
|
||||
f"/v1/datasets/{dataset.id}/documents/{document.id}/segments/{segment.id}/child_chunks"
|
||||
"?page=1&limit=20&keyword=integration",
|
||||
headers=_auth_headers(db_session_with_containers, dataset),
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
body = response.get_json()
|
||||
assert set(body) == {"data", "total", "total_pages", "page", "limit"}
|
||||
assert body["total"] == 1
|
||||
assert body["data"][0]["content"] == "Child integration content"
|
||||
@@ -4,6 +4,7 @@ from typing import Literal
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from flask import Flask
|
||||
from flask_restx import Namespace
|
||||
from pydantic import BaseModel, ConfigDict, Field
|
||||
|
||||
@@ -47,6 +48,13 @@ class QueryModel(BaseModel):
|
||||
ambiguous: int | str | None = Field(default=None, description="Ambiguous query parameter")
|
||||
|
||||
|
||||
class HelperQueryModel(BaseModel):
|
||||
page: int = 1
|
||||
limit: int = 20
|
||||
status: list[str] = Field(default_factory=list)
|
||||
keyword: str | None = None
|
||||
|
||||
|
||||
class NullableSchemaModel(BaseModel):
|
||||
name: str | None = None
|
||||
tags: list[str] | None = None
|
||||
@@ -320,3 +328,41 @@ def test_query_params_from_model_builds_flask_restx_doc_params():
|
||||
"required": False,
|
||||
"description": "Ambiguous query parameter",
|
||||
}
|
||||
|
||||
|
||||
def test_query_params_from_request_preserves_repeated_list_params():
|
||||
from controllers.common.schema import query_params_from_request
|
||||
|
||||
app = Flask(__name__)
|
||||
with app.test_request_context("/?page=2&limit=30&status=active&status=inactive&keyword=hello"):
|
||||
query = query_params_from_request(HelperQueryModel, list_fields=("status",))
|
||||
|
||||
assert query.page == 2
|
||||
assert query.limit == 30
|
||||
assert query.status == ["active", "inactive"]
|
||||
assert query.keyword == "hello"
|
||||
|
||||
|
||||
def test_query_params_from_request_raises_for_malformed_ints_by_default():
|
||||
from controllers.common.schema import query_params_from_request
|
||||
|
||||
app = Flask(__name__)
|
||||
with app.test_request_context("/?page=bad&limit="):
|
||||
with pytest.raises(ValueError):
|
||||
query_params_from_request(HelperQueryModel, list_fields=("status",))
|
||||
|
||||
|
||||
def test_query_params_from_request_can_use_model_default_for_malformed_defaulted_ints():
|
||||
from controllers.common.schema import query_params_from_request
|
||||
|
||||
app = Flask(__name__)
|
||||
with app.test_request_context("/?page=bad&limit="):
|
||||
query = query_params_from_request(
|
||||
HelperQueryModel,
|
||||
list_fields=("status",),
|
||||
use_defaults_for_malformed_ints=True,
|
||||
)
|
||||
|
||||
assert query.page == 1
|
||||
assert query.limit == 20
|
||||
assert query.status == []
|
||||
|
||||
@@ -10,13 +10,13 @@ from controllers.console import console_ns
|
||||
from controllers.console.app.error import ProviderNotInitializeError
|
||||
from controllers.console.datasets.datasets_segments import (
|
||||
ChildChunkAddApi,
|
||||
ChildChunkBatchUpdatePayload,
|
||||
ChildChunkUpdateApi,
|
||||
DatasetDocumentSegmentAddApi,
|
||||
DatasetDocumentSegmentApi,
|
||||
DatasetDocumentSegmentBatchImportApi,
|
||||
DatasetDocumentSegmentListApi,
|
||||
DatasetDocumentSegmentUpdateApi,
|
||||
_get_segment_with_summary,
|
||||
)
|
||||
from controllers.console.datasets.error import (
|
||||
ChildChunkDeleteIndexError,
|
||||
@@ -25,9 +25,13 @@ from controllers.console.datasets.error import (
|
||||
)
|
||||
from core.errors.error import LLMBadRequestError, ProviderTokenNotInitError
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||
from fields.segment_fields import segment_response_with_summary
|
||||
from libs.datetime_utils import naive_utc_now
|
||||
from models.dataset import ChildChunk, DocumentSegment
|
||||
from models.enums import SegmentStatus, SegmentType
|
||||
from models.model import UploadFile
|
||||
from services.errors.chunk import ChildChunkDeleteIndexError as ChildChunkDeleteIndexServiceError
|
||||
from services.errors.chunk import ChildChunkIndexingError as ChildChunkIndexingServiceError
|
||||
|
||||
|
||||
def unwrap(func):
|
||||
@@ -37,49 +41,89 @@ def unwrap(func):
|
||||
|
||||
|
||||
def _segment():
|
||||
return SimpleNamespace(
|
||||
id="s1",
|
||||
segment = DocumentSegment(
|
||||
tenant_id="tenant-1",
|
||||
dataset_id="ds-1",
|
||||
document_id="doc-1",
|
||||
position=1,
|
||||
document_id="d1",
|
||||
content="c",
|
||||
sign_content="c",
|
||||
answer="a",
|
||||
word_count=1,
|
||||
tokens=1,
|
||||
keywords=[],
|
||||
index_node_id="n1",
|
||||
index_node_hash="h",
|
||||
hit_count=0,
|
||||
enabled=True,
|
||||
disabled_at=None,
|
||||
disabled_by=None,
|
||||
status="normal",
|
||||
created_by="u1",
|
||||
created_at=naive_utc_now(),
|
||||
updated_at=naive_utc_now(),
|
||||
updated_by="u1",
|
||||
indexing_at=None,
|
||||
completed_at=None,
|
||||
error=None,
|
||||
stopped_at=None,
|
||||
child_chunks=[],
|
||||
attachments=[],
|
||||
summary=None,
|
||||
)
|
||||
segment.id = "seg-1"
|
||||
segment.answer = "a"
|
||||
segment.keywords = ["test"]
|
||||
segment.index_node_id = "n1"
|
||||
segment.index_node_hash = "h"
|
||||
segment.status = SegmentStatus.COMPLETED
|
||||
segment.created_at = naive_utc_now()
|
||||
segment.updated_at = naive_utc_now()
|
||||
segment.updated_by = "u1"
|
||||
return segment
|
||||
|
||||
|
||||
def test_get_segment_with_summary(monkeypatch: pytest.MonkeyPatch):
|
||||
def _child_chunk():
|
||||
child_chunk = ChildChunk(
|
||||
tenant_id="tenant-1",
|
||||
dataset_id="ds-1",
|
||||
document_id="doc-1",
|
||||
segment_id="seg-1",
|
||||
position=1,
|
||||
content="child",
|
||||
word_count=1,
|
||||
created_by="u1",
|
||||
)
|
||||
child_chunk.id = "cc-1"
|
||||
child_chunk.type = SegmentType.CUSTOMIZED
|
||||
child_chunk.created_at = naive_utc_now()
|
||||
child_chunk.updated_at = naive_utc_now()
|
||||
return child_chunk
|
||||
|
||||
|
||||
def _segment_response_dict():
|
||||
return {
|
||||
"id": "seg-1",
|
||||
"position": 1,
|
||||
"document_id": "doc-1",
|
||||
"content": "c",
|
||||
"sign_content": "c",
|
||||
"answer": "a",
|
||||
"word_count": 1,
|
||||
"tokens": 1,
|
||||
"keywords": ["test"],
|
||||
"index_node_id": "n1",
|
||||
"index_node_hash": "h",
|
||||
"hit_count": 0,
|
||||
"enabled": True,
|
||||
"disabled_at": None,
|
||||
"disabled_by": None,
|
||||
"status": "completed",
|
||||
"created_by": "u1",
|
||||
"created_at": 1779678000,
|
||||
"updated_at": 1779678000,
|
||||
"updated_by": "u1",
|
||||
"indexing_at": None,
|
||||
"completed_at": None,
|
||||
"error": None,
|
||||
"stopped_at": None,
|
||||
"child_chunks": [],
|
||||
"attachments": [],
|
||||
"summary": None,
|
||||
}
|
||||
|
||||
|
||||
def test_segment_response_with_summary():
|
||||
segment = _segment()
|
||||
summary = SimpleNamespace(summary_content="summary")
|
||||
|
||||
monkeypatch.setattr(
|
||||
"services.summary_index_service.SummaryIndexService.get_segment_summary",
|
||||
lambda *_args, **_kwargs: summary,
|
||||
)
|
||||
with (
|
||||
patch("models.dataset.db.session.scalar", return_value=None),
|
||||
patch("models.dataset.db.session.execute", return_value=MagicMock(all=MagicMock(return_value=[]))),
|
||||
):
|
||||
result = segment_response_with_summary(segment, "summary")
|
||||
|
||||
result = _get_segment_with_summary(segment, dataset_id="d1")
|
||||
|
||||
assert result["summary"] == "summary"
|
||||
assert result.summary == "summary"
|
||||
assert result.id == segment.id
|
||||
|
||||
|
||||
class TestDatasetDocumentSegmentListApi:
|
||||
@@ -90,8 +134,7 @@ class TestDatasetDocumentSegmentListApi:
|
||||
dataset = MagicMock()
|
||||
document = MagicMock()
|
||||
|
||||
segment = MagicMock(spec=DocumentSegment)
|
||||
segment.id = "seg-1"
|
||||
segment = _segment()
|
||||
|
||||
pagination = MagicMock()
|
||||
pagination.items = [segment]
|
||||
@@ -124,10 +167,8 @@ class TestDatasetDocumentSegmentListApi:
|
||||
"services.summary_index_service.SummaryIndexService.get_segments_summaries",
|
||||
return_value={},
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets_segments.marshal",
|
||||
return_value={"id": "seg-1"},
|
||||
),
|
||||
patch("models.dataset.db.session.scalar", return_value=None),
|
||||
patch("models.dataset.db.session.execute", return_value=MagicMock(all=MagicMock(return_value=[]))),
|
||||
):
|
||||
response, status = method(api, "ds-1", "doc-1")
|
||||
|
||||
@@ -370,8 +411,7 @@ class TestDatasetDocumentSegmentAddApi:
|
||||
document = MagicMock()
|
||||
document.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||
|
||||
segment = MagicMock()
|
||||
segment.id = "seg-1"
|
||||
segment = _segment()
|
||||
|
||||
with (
|
||||
app.test_request_context("/", json=payload),
|
||||
@@ -401,13 +441,11 @@ class TestDatasetDocumentSegmentAddApi:
|
||||
return_value=segment,
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets_segments.marshal",
|
||||
return_value={"id": "seg-1"},
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets_segments._get_segment_with_summary",
|
||||
return_value={"id": "seg-1"},
|
||||
"controllers.console.datasets.datasets_segments.SummaryIndexService.get_segment_summary",
|
||||
return_value=None,
|
||||
),
|
||||
patch("models.dataset.db.session.scalar", return_value=None),
|
||||
patch("models.dataset.db.session.execute", return_value=MagicMock(all=MagicMock(return_value=[]))),
|
||||
):
|
||||
response, status = method(api, "ds-1", "doc-1")
|
||||
|
||||
@@ -509,7 +547,7 @@ class TestDatasetDocumentSegmentUpdateApi:
|
||||
document = MagicMock()
|
||||
document.doc_form = IndexStructureType.PARAGRAPH_INDEX
|
||||
|
||||
segment = MagicMock()
|
||||
segment = _segment()
|
||||
|
||||
with (
|
||||
app.test_request_context("/", json=payload),
|
||||
@@ -528,7 +566,7 @@ class TestDatasetDocumentSegmentUpdateApi:
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets_segments.db.session.scalar",
|
||||
return_value=segment,
|
||||
side_effect=[segment, None],
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission",
|
||||
@@ -543,9 +581,10 @@ class TestDatasetDocumentSegmentUpdateApi:
|
||||
return_value=segment,
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets_segments._get_segment_with_summary",
|
||||
return_value={"id": "seg-1"},
|
||||
"controllers.console.datasets.datasets_segments.SummaryIndexService.get_segment_summary",
|
||||
return_value=None,
|
||||
),
|
||||
patch("models.dataset.db.session.execute", return_value=MagicMock(all=MagicMock(return_value=[]))),
|
||||
):
|
||||
response, status = method(api, "ds-1", "doc-1", "seg-1")
|
||||
|
||||
@@ -800,6 +839,52 @@ class TestDatasetDocumentSegmentBatchImportApi:
|
||||
|
||||
|
||||
class TestChildChunkAddApi:
|
||||
def test_patch_documents_batch_update_payload(self):
|
||||
api_doc = unwrap(ChildChunkAddApi.patch).__apidoc__
|
||||
expected_model = ChildChunkBatchUpdatePayload.__name__
|
||||
|
||||
assert [model.name for model in api_doc["expect"]] == [expected_model]
|
||||
|
||||
def test_get_uses_default_pagination_for_malformed_ints(self, app: Flask):
|
||||
api = ChildChunkAddApi()
|
||||
method = unwrap(api.get)
|
||||
|
||||
pagination = MagicMock(items=[], total=0, pages=0)
|
||||
|
||||
with (
|
||||
app.test_request_context("/?page=bad&limit="),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets_segments.current_account_with_tenant",
|
||||
return_value=(MagicMock(), "tenant-1"),
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets_segments.DatasetService.get_dataset",
|
||||
return_value=MagicMock(),
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets_segments.DatasetService.check_dataset_model_setting",
|
||||
return_value=None,
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets_segments.DocumentService.get_document",
|
||||
return_value=MagicMock(),
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets_segments.db.session.scalar",
|
||||
return_value=MagicMock(),
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets_segments.SegmentService.get_child_chunks",
|
||||
return_value=pagination,
|
||||
) as get_child_chunks,
|
||||
):
|
||||
response, status = method(api, "ds-1", "doc-1", "seg-1")
|
||||
|
||||
assert status == 200
|
||||
assert response["page"] == 1
|
||||
assert response["limit"] == 20
|
||||
get_child_chunks.assert_called_once_with("seg-1", "doc-1", "ds-1", 1, 20, None)
|
||||
|
||||
def test_post_success(self, app: Flask):
|
||||
api = ChildChunkAddApi()
|
||||
method = unwrap(api.post)
|
||||
@@ -814,7 +899,7 @@ class TestChildChunkAddApi:
|
||||
|
||||
document = MagicMock()
|
||||
segment = MagicMock()
|
||||
child_chunk = MagicMock(spec=ChildChunk)
|
||||
child_chunk = _child_chunk()
|
||||
|
||||
with (
|
||||
app.test_request_context("/", json=payload),
|
||||
@@ -843,10 +928,6 @@ class TestChildChunkAddApi:
|
||||
"controllers.console.datasets.datasets_segments.SegmentService.create_child_chunk",
|
||||
return_value=child_chunk,
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets_segments.marshal",
|
||||
return_value={"id": "cc-1"},
|
||||
),
|
||||
):
|
||||
response, status = method(api, "ds-1", "doc-1", "seg-1")
|
||||
|
||||
@@ -890,7 +971,7 @@ class TestChildChunkAddApi:
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets_segments.SegmentService.create_child_chunk",
|
||||
side_effect=services.errors.chunk.ChildChunkIndexingError("fail"),
|
||||
side_effect=ChildChunkIndexingServiceError("fail"),
|
||||
),
|
||||
):
|
||||
with pytest.raises(ChildChunkIndexingError):
|
||||
@@ -977,7 +1058,7 @@ class TestChildChunkUpdateApi:
|
||||
),
|
||||
patch(
|
||||
"controllers.console.datasets.datasets_segments.SegmentService.delete_child_chunk",
|
||||
side_effect=services.errors.chunk.ChildChunkDeleteIndexError("fail"),
|
||||
side_effect=ChildChunkDeleteIndexServiceError("fail"),
|
||||
),
|
||||
):
|
||||
with pytest.raises(ChildChunkDeleteIndexError):
|
||||
@@ -992,10 +1073,7 @@ class TestSegmentListAdvancedCases:
|
||||
dataset = MagicMock()
|
||||
document = MagicMock()
|
||||
|
||||
segment = MagicMock(spec=DocumentSegment)
|
||||
segment.id = "seg-1"
|
||||
segment.keywords = ["test"]
|
||||
segment.enabled = True
|
||||
segment = _segment()
|
||||
|
||||
pagination = MagicMock(items=[segment], total=1, pages=1)
|
||||
|
||||
@@ -1025,6 +1103,8 @@ class TestSegmentListAdvancedCases:
|
||||
"services.summary_index_service.SummaryIndexService.get_segments_summaries",
|
||||
return_value={},
|
||||
),
|
||||
patch("models.dataset.db.session.scalar", return_value=None),
|
||||
patch("models.dataset.db.session.execute", return_value=MagicMock(all=MagicMock(return_value=[]))),
|
||||
):
|
||||
result = method(api, "ds-1", "doc-1")
|
||||
|
||||
|
||||
@@ -29,15 +29,67 @@ from controllers.service_api.dataset.segment import (
|
||||
DatasetChildChunkApi,
|
||||
DatasetSegmentApi,
|
||||
SegmentApi,
|
||||
SegmentCreateItemPayload,
|
||||
SegmentCreatePayload,
|
||||
SegmentListQuery,
|
||||
)
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||
from libs.datetime_utils import naive_utc_now
|
||||
from models.dataset import ChildChunk, Dataset, Document, DocumentSegment
|
||||
from models.enums import IndexingStatus
|
||||
from models.enums import IndexingStatus, SegmentType
|
||||
from services.dataset_service import DocumentService, SegmentService
|
||||
|
||||
|
||||
def _segment_response_dict(summary: str | None = None):
|
||||
return {
|
||||
"id": "seg-1",
|
||||
"position": 1,
|
||||
"document_id": "doc-id",
|
||||
"content": "segment content",
|
||||
"sign_content": "segment content",
|
||||
"answer": None,
|
||||
"word_count": 2,
|
||||
"tokens": 3,
|
||||
"keywords": ["segment"],
|
||||
"index_node_id": None,
|
||||
"index_node_hash": None,
|
||||
"hit_count": 0,
|
||||
"enabled": True,
|
||||
"disabled_at": None,
|
||||
"disabled_by": None,
|
||||
"status": "completed",
|
||||
"created_by": "account-1",
|
||||
"created_at": 1779678000,
|
||||
"updated_at": 1779678000,
|
||||
"updated_by": None,
|
||||
"indexing_at": None,
|
||||
"completed_at": None,
|
||||
"error": None,
|
||||
"stopped_at": None,
|
||||
"child_chunks": [],
|
||||
"attachments": [],
|
||||
"summary": summary,
|
||||
}
|
||||
|
||||
|
||||
def _child_chunk() -> ChildChunk:
|
||||
child_chunk = ChildChunk(
|
||||
tenant_id="tenant-1",
|
||||
dataset_id="dataset-1",
|
||||
document_id="doc-id",
|
||||
segment_id="seg-id",
|
||||
position=1,
|
||||
content="child chunk content",
|
||||
word_count=3,
|
||||
created_by="account-1",
|
||||
)
|
||||
child_chunk.id = "child-1"
|
||||
child_chunk.type = SegmentType.CUSTOMIZED
|
||||
child_chunk.created_at = naive_utc_now()
|
||||
child_chunk.updated_at = naive_utc_now()
|
||||
return child_chunk
|
||||
|
||||
|
||||
class TestSegmentCreatePayload:
|
||||
"""Test suite for SegmentCreatePayload Pydantic model."""
|
||||
|
||||
@@ -48,18 +100,34 @@ class TestSegmentCreatePayload:
|
||||
{"content": "Second segment", "keywords": ["key1", "key2"]},
|
||||
]
|
||||
payload = SegmentCreatePayload(segments=segments)
|
||||
assert payload.segments == segments
|
||||
assert payload.segments is not None
|
||||
assert [segment.model_dump(exclude_none=True) for segment in payload.segments] == segments
|
||||
assert len(payload.segments) == 2
|
||||
|
||||
def test_payload_with_none_segments(self):
|
||||
"""Test payload with None segments (should be valid)."""
|
||||
payload = SegmentCreatePayload(segments=None)
|
||||
assert payload.segments is None
|
||||
"""Test payload with None segments is rejected."""
|
||||
with pytest.raises(ValueError):
|
||||
SegmentCreatePayload.model_validate({"segments": None})
|
||||
|
||||
def test_payload_with_empty_segments(self):
|
||||
"""Test payload with empty segments list."""
|
||||
payload = SegmentCreatePayload(segments=[])
|
||||
assert payload.segments == []
|
||||
"""Test payload with empty segments list is rejected."""
|
||||
with pytest.raises(ValueError):
|
||||
SegmentCreatePayload.model_validate({"segments": []})
|
||||
|
||||
def test_payload_requires_segments(self):
|
||||
"""Test payload requires a segments field."""
|
||||
with pytest.raises(ValueError):
|
||||
SegmentCreatePayload.model_validate({})
|
||||
|
||||
def test_payload_rejects_segment_without_content(self):
|
||||
"""Test each segment requires content."""
|
||||
with pytest.raises(ValueError):
|
||||
SegmentCreatePayload.model_validate({"segments": [{"answer": "Answer only"}]})
|
||||
|
||||
def test_payload_rejects_blank_content(self):
|
||||
"""Test content cannot be whitespace-only."""
|
||||
with pytest.raises(ValueError):
|
||||
SegmentCreateItemPayload.model_validate({"content": " "})
|
||||
|
||||
def test_payload_with_complex_segment_data(self):
|
||||
"""Test payload with complex segment structure."""
|
||||
@@ -72,8 +140,9 @@ class TestSegmentCreatePayload:
|
||||
}
|
||||
]
|
||||
payload = SegmentCreatePayload(segments=segments)
|
||||
assert payload.segments[0]["content"] == "Complex segment"
|
||||
assert payload.segments[0]["keywords"] == ["keyword1", "keyword2"]
|
||||
assert payload.segments is not None
|
||||
assert payload.segments[0].content == "Complex segment"
|
||||
assert payload.segments[0].keywords == ["keyword1", "keyword2"]
|
||||
|
||||
|
||||
class TestSegmentListQuery:
|
||||
@@ -117,7 +186,7 @@ class TestChildChunkCreatePayload:
|
||||
def test_payload_requires_content(self):
|
||||
"""Test that content is required."""
|
||||
with pytest.raises(ValueError):
|
||||
ChildChunkCreatePayload()
|
||||
ChildChunkCreatePayload.model_validate({})
|
||||
|
||||
def test_payload_with_long_content(self):
|
||||
"""Test payload with very long content."""
|
||||
@@ -157,12 +226,12 @@ class TestChildChunkListQuery:
|
||||
def test_query_limit_minimum(self):
|
||||
"""Test query limit minimum validation."""
|
||||
with pytest.raises(ValueError):
|
||||
ChildChunkListQuery(limit=0)
|
||||
ChildChunkListQuery.model_validate({"limit": 0})
|
||||
|
||||
def test_query_page_minimum(self):
|
||||
"""Test query page minimum validation."""
|
||||
with pytest.raises(ValueError):
|
||||
ChildChunkListQuery(page=0)
|
||||
ChildChunkListQuery.model_validate({"page": 0})
|
||||
|
||||
def test_query_with_keyword(self):
|
||||
"""Test query with keyword filter."""
|
||||
@@ -292,6 +361,7 @@ class TestSegmentServiceMockedBehavior:
|
||||
segments=[{"content": "Test"}, {"content": "Test 2"}], document=mock_document, dataset=mock_dataset
|
||||
)
|
||||
|
||||
assert result is not None
|
||||
assert len(result) == 2
|
||||
mock_create.assert_called_once()
|
||||
|
||||
@@ -301,7 +371,12 @@ class TestSegmentServiceMockedBehavior:
|
||||
mock_segments = [Mock(), Mock()]
|
||||
mock_get.return_value = (mock_segments, 2)
|
||||
|
||||
segments, count = SegmentService.get_segments(document_id=mock_document.id, page=1, limit=20)
|
||||
segments, count = SegmentService.get_segments(
|
||||
document_id=mock_document.id,
|
||||
tenant_id=mock_document.tenant_id,
|
||||
page=1,
|
||||
limit=20,
|
||||
)
|
||||
|
||||
assert len(segments) == 2
|
||||
assert count == 2
|
||||
@@ -429,13 +504,13 @@ class TestDocumentValidation:
|
||||
"""Test that enabled=True is valid."""
|
||||
document = Mock(spec=Document)
|
||||
document.enabled = True
|
||||
assert document.enabled is True
|
||||
assert document.enabled
|
||||
|
||||
def test_document_enabled_false_is_invalid(self):
|
||||
"""Test that enabled=False is invalid for segment operations."""
|
||||
document = Mock(spec=Document)
|
||||
document.enabled = False
|
||||
assert document.enabled is False
|
||||
assert not document.enabled
|
||||
|
||||
|
||||
class TestDatasetModels:
|
||||
@@ -462,7 +537,7 @@ class TestDatasetModels:
|
||||
|
||||
assert segment.id is not None
|
||||
assert segment.document_id is not None
|
||||
assert segment.content is not None
|
||||
assert segment.content == "Test content"
|
||||
|
||||
def test_child_chunk_has_required_fields(self):
|
||||
"""Test ChildChunk model has required fields."""
|
||||
@@ -473,7 +548,7 @@ class TestDatasetModels:
|
||||
|
||||
assert chunk.id is not None
|
||||
assert chunk.segment_id is not None
|
||||
assert chunk.content is not None
|
||||
assert chunk.content == "Chunk content"
|
||||
|
||||
|
||||
class TestSegmentUpdatePayload:
|
||||
@@ -594,6 +669,7 @@ class TestSegmentCreateArgs:
|
||||
from services.entities.knowledge_entities.knowledge_entities import SegmentCreateArgs
|
||||
|
||||
args = SegmentCreateArgs(content="Test content", keywords=["machine learning", "AI", "neural networks"])
|
||||
assert args.keywords is not None
|
||||
assert len(args.keywords) == 3
|
||||
|
||||
|
||||
@@ -690,7 +766,7 @@ class TestSegmentIndexingRequirements:
|
||||
|
||||
# Both conditions must be true
|
||||
assert document.indexing_status == "completed"
|
||||
assert document.enabled is True
|
||||
assert document.enabled
|
||||
|
||||
|
||||
class TestSegmentLimits:
|
||||
@@ -753,7 +829,7 @@ class TestSegmentPagination:
|
||||
#
|
||||
# Strategy per decorator type:
|
||||
# - No billing decorator → call the method directly; only patch ``db``,
|
||||
# services, ``current_account_with_tenant``, and ``marshal``.
|
||||
# services, ``current_account_with_tenant``, and response helpers when needed.
|
||||
# - ``@cloud_edition_billing_rate_limit_check`` (preserves ``__wrapped__``)
|
||||
# → call via ``method.__wrapped__(self, …)`` to skip the decorator.
|
||||
# - ``@cloud_edition_billing_resource_check`` (no ``__wrapped__``) → patch
|
||||
@@ -766,11 +842,11 @@ class TestSegmentApiGet:
|
||||
"""Test suite for SegmentApi.get() endpoint.
|
||||
|
||||
``get`` has no billing decorators but calls
|
||||
``current_account_with_tenant()`` and ``marshal``.
|
||||
``current_account_with_tenant()`` and response serialization.
|
||||
"""
|
||||
|
||||
@patch("controllers.service_api.dataset.segment.SummaryIndexService")
|
||||
@patch("controllers.service_api.dataset.segment.marshal")
|
||||
@patch("controllers.service_api.dataset.segment.segment_responses_with_summaries")
|
||||
@patch("controllers.service_api.dataset.segment.SummaryIndexService.get_segments_summaries")
|
||||
@patch("controllers.service_api.dataset.segment.SegmentService")
|
||||
@patch("controllers.service_api.dataset.segment.DocumentService")
|
||||
@patch("controllers.service_api.dataset.segment.current_account_with_tenant")
|
||||
@@ -781,8 +857,8 @@ class TestSegmentApiGet:
|
||||
mock_account_fn,
|
||||
mock_doc_svc,
|
||||
mock_seg_svc,
|
||||
mock_marshal,
|
||||
mock_summary_svc,
|
||||
mock_get_summaries,
|
||||
mock_dump_segments,
|
||||
app: Flask,
|
||||
mock_tenant,
|
||||
mock_dataset,
|
||||
@@ -794,8 +870,8 @@ class TestSegmentApiGet:
|
||||
mock_db.session.scalar.return_value = mock_dataset
|
||||
mock_doc_svc.get_document.return_value = Mock(doc_form=IndexStructureType.PARAGRAPH_INDEX)
|
||||
mock_seg_svc.get_segments.return_value = ([mock_segment], 1)
|
||||
mock_marshal.return_value = {"id": mock_segment.id}
|
||||
mock_summary_svc.get_segments_summaries.return_value = {}
|
||||
mock_get_summaries.return_value = {}
|
||||
mock_dump_segments.return_value = [_segment_response_dict()]
|
||||
|
||||
# Act
|
||||
with app.test_request_context(
|
||||
@@ -881,8 +957,8 @@ class TestSegmentApiPost:
|
||||
mock_rate_limit.enabled = False
|
||||
mock_feature_svc.get_knowledge_rate_limit.return_value = mock_rate_limit
|
||||
|
||||
@patch("controllers.service_api.dataset.segment.SummaryIndexService")
|
||||
@patch("controllers.service_api.dataset.segment.marshal")
|
||||
@patch("controllers.service_api.dataset.segment.segment_responses_with_summaries")
|
||||
@patch("controllers.service_api.dataset.segment.SummaryIndexService.get_segments_summaries")
|
||||
@patch("controllers.service_api.dataset.segment.SegmentService")
|
||||
@patch("controllers.service_api.dataset.segment.DocumentService")
|
||||
@patch("controllers.service_api.dataset.segment.current_account_with_tenant")
|
||||
@@ -897,8 +973,8 @@ class TestSegmentApiPost:
|
||||
mock_account_fn,
|
||||
mock_doc_svc,
|
||||
mock_seg_svc,
|
||||
mock_marshal,
|
||||
mock_summary_svc,
|
||||
mock_get_summaries,
|
||||
mock_dump_segments,
|
||||
app: Flask,
|
||||
mock_tenant,
|
||||
mock_dataset,
|
||||
@@ -920,8 +996,8 @@ class TestSegmentApiPost:
|
||||
|
||||
mock_seg_svc.segment_create_args_validate.return_value = None
|
||||
mock_seg_svc.multi_create_segment.return_value = [mock_segment]
|
||||
mock_marshal.return_value = {"id": mock_segment.id}
|
||||
mock_summary_svc.get_segments_summaries.return_value = {}
|
||||
mock_get_summaries.return_value = {}
|
||||
mock_dump_segments.return_value = [_segment_response_dict()]
|
||||
|
||||
segments_data = [{"content": "Test segment content", "answer": "Test answer"}]
|
||||
|
||||
@@ -1222,8 +1298,8 @@ class TestDatasetSegmentApiUpdate:
|
||||
mock_rate_limit.enabled = False
|
||||
mock_feature_svc.get_knowledge_rate_limit.return_value = mock_rate_limit
|
||||
|
||||
@patch("controllers.service_api.dataset.segment.SummaryIndexService")
|
||||
@patch("controllers.service_api.dataset.segment.marshal")
|
||||
@patch("controllers.service_api.dataset.segment.segment_response_with_summary")
|
||||
@patch("controllers.service_api.dataset.segment.SummaryIndexService.get_segment_summary")
|
||||
@patch("controllers.service_api.dataset.segment.SegmentService")
|
||||
@patch("controllers.service_api.dataset.segment.DocumentService")
|
||||
@patch("controllers.service_api.dataset.segment.DatasetService")
|
||||
@@ -1240,8 +1316,8 @@ class TestDatasetSegmentApiUpdate:
|
||||
mock_dataset_svc,
|
||||
mock_doc_svc,
|
||||
mock_seg_svc,
|
||||
mock_marshal,
|
||||
mock_summary_svc,
|
||||
mock_get_summary,
|
||||
mock_dump_segment,
|
||||
app: Flask,
|
||||
mock_tenant,
|
||||
mock_dataset,
|
||||
@@ -1253,12 +1329,13 @@ class TestDatasetSegmentApiUpdate:
|
||||
mock_dataset.indexing_technique = "economy"
|
||||
mock_db.session.scalar.return_value = mock_dataset
|
||||
mock_dataset_svc.check_dataset_model_setting.return_value = None
|
||||
mock_doc_svc.get_document.return_value = Mock()
|
||||
mock_doc_svc.get_document.return_value = Mock(doc_form=IndexStructureType.PARAGRAPH_INDEX)
|
||||
mock_seg_svc.get_segment_by_id.return_value = mock_segment
|
||||
updated = Mock()
|
||||
updated.id = "updated-seg"
|
||||
mock_seg_svc.update_segment.return_value = updated
|
||||
mock_marshal.return_value = {"id": mock_segment.id}
|
||||
mock_summary_svc.get_segment_summary.return_value = None
|
||||
mock_get_summary.return_value = None
|
||||
mock_dump_segment.return_value = _segment_response_dict()
|
||||
|
||||
with app.test_request_context(
|
||||
f"/datasets/{mock_dataset.id}/documents/doc-id/segments/{mock_segment.id}",
|
||||
@@ -1365,11 +1442,11 @@ class TestDatasetSegmentApiGetSingle:
|
||||
"""Test suite for DatasetSegmentApi.get() (single segment) endpoint.
|
||||
|
||||
``get`` has no billing decorators but calls
|
||||
``current_account_with_tenant()`` and ``marshal``.
|
||||
``current_account_with_tenant()`` and response serialization.
|
||||
"""
|
||||
|
||||
@patch("controllers.service_api.dataset.segment.SummaryIndexService")
|
||||
@patch("controllers.service_api.dataset.segment.marshal")
|
||||
@patch("controllers.service_api.dataset.segment.segment_response_with_summary")
|
||||
@patch("controllers.service_api.dataset.segment.SummaryIndexService.get_segment_summary")
|
||||
@patch("controllers.service_api.dataset.segment.SegmentService")
|
||||
@patch("controllers.service_api.dataset.segment.DocumentService")
|
||||
@patch("controllers.service_api.dataset.segment.DatasetService")
|
||||
@@ -1382,8 +1459,8 @@ class TestDatasetSegmentApiGetSingle:
|
||||
mock_dataset_svc,
|
||||
mock_doc_svc,
|
||||
mock_seg_svc,
|
||||
mock_marshal,
|
||||
mock_summary_svc,
|
||||
mock_get_summary,
|
||||
mock_dump_segment,
|
||||
app: Flask,
|
||||
mock_tenant,
|
||||
mock_dataset,
|
||||
@@ -1396,8 +1473,8 @@ class TestDatasetSegmentApiGetSingle:
|
||||
mock_doc = Mock(doc_form=IndexStructureType.PARAGRAPH_INDEX)
|
||||
mock_doc_svc.get_document.return_value = mock_doc
|
||||
mock_seg_svc.get_segment_by_id.return_value = mock_segment
|
||||
mock_marshal.return_value = {"id": mock_segment.id}
|
||||
mock_summary_svc.get_segment_summary.return_value = None
|
||||
mock_get_summary.return_value = None
|
||||
mock_dump_segment.return_value = _segment_response_dict()
|
||||
|
||||
with app.test_request_context(
|
||||
f"/datasets/{mock_dataset.id}/documents/doc-id/segments/{mock_segment.id}",
|
||||
@@ -1415,8 +1492,8 @@ class TestDatasetSegmentApiGetSingle:
|
||||
assert "data" in response
|
||||
assert response["doc_form"] == IndexStructureType.PARAGRAPH_INDEX
|
||||
|
||||
@patch("controllers.service_api.dataset.segment.SummaryIndexService")
|
||||
@patch("controllers.service_api.dataset.segment.marshal")
|
||||
@patch("controllers.service_api.dataset.segment.segment_response_with_summary")
|
||||
@patch("controllers.service_api.dataset.segment.SummaryIndexService.get_segment_summary")
|
||||
@patch("controllers.service_api.dataset.segment.SegmentService")
|
||||
@patch("controllers.service_api.dataset.segment.DocumentService")
|
||||
@patch("controllers.service_api.dataset.segment.DatasetService")
|
||||
@@ -1429,8 +1506,8 @@ class TestDatasetSegmentApiGetSingle:
|
||||
mock_dataset_svc,
|
||||
mock_doc_svc,
|
||||
mock_seg_svc,
|
||||
mock_marshal,
|
||||
mock_summary_svc,
|
||||
mock_get_summary,
|
||||
mock_dump_segment,
|
||||
app: Flask,
|
||||
mock_tenant,
|
||||
mock_dataset,
|
||||
@@ -1443,11 +1520,9 @@ class TestDatasetSegmentApiGetSingle:
|
||||
mock_doc = Mock(doc_form=IndexStructureType.PARAGRAPH_INDEX)
|
||||
mock_doc_svc.get_document.return_value = mock_doc
|
||||
mock_seg_svc.get_segment_by_id.return_value = mock_segment
|
||||
mock_marshal.return_value = {"id": mock_segment.id, "summary": None}
|
||||
|
||||
mock_summary_record = Mock()
|
||||
mock_summary_record.summary_content = "This is the segment summary"
|
||||
mock_summary_svc.get_segment_summary.return_value = mock_summary_record
|
||||
mock_summary_record = Mock(summary_content="This is the segment summary")
|
||||
mock_get_summary.return_value = mock_summary_record
|
||||
mock_dump_segment.return_value = _segment_response_dict("This is the segment summary")
|
||||
|
||||
with app.test_request_context(
|
||||
f"/datasets/{mock_dataset.id}/documents/doc-id/segments/{mock_segment.id}",
|
||||
@@ -1565,10 +1640,9 @@ class TestChildChunkApiGet:
|
||||
"""Test suite for ChildChunkApi.get() endpoint.
|
||||
|
||||
``get`` has no billing decorators but calls
|
||||
``current_account_with_tenant()``, ``marshal``, and ``db``.
|
||||
``current_account_with_tenant()``, response serialization, and ``db``.
|
||||
"""
|
||||
|
||||
@patch("controllers.service_api.dataset.segment.marshal")
|
||||
@patch("controllers.service_api.dataset.segment.SegmentService")
|
||||
@patch("controllers.service_api.dataset.segment.DocumentService")
|
||||
@patch("controllers.service_api.dataset.segment.current_account_with_tenant")
|
||||
@@ -1579,7 +1653,6 @@ class TestChildChunkApiGet:
|
||||
mock_account_fn,
|
||||
mock_doc_svc,
|
||||
mock_seg_svc,
|
||||
mock_marshal,
|
||||
app: Flask,
|
||||
mock_tenant,
|
||||
mock_dataset,
|
||||
@@ -1591,11 +1664,10 @@ class TestChildChunkApiGet:
|
||||
mock_seg_svc.get_segment_by_id.return_value = Mock()
|
||||
|
||||
mock_pagination = Mock()
|
||||
mock_pagination.items = [Mock(), Mock()]
|
||||
mock_pagination.items = [_child_chunk(), _child_chunk()]
|
||||
mock_pagination.total = 2
|
||||
mock_pagination.pages = 1
|
||||
mock_seg_svc.get_child_chunks.return_value = mock_pagination
|
||||
mock_marshal.return_value = [{"id": "c1"}, {"id": "c2"}]
|
||||
|
||||
with app.test_request_context(
|
||||
f"/datasets/{mock_dataset.id}/documents/doc-id/segments/seg-id/child_chunks?page=1&limit=20",
|
||||
@@ -1727,7 +1799,6 @@ class TestChildChunkApiPost:
|
||||
mock_rate_limit.enabled = False
|
||||
mock_feature_svc.get_knowledge_rate_limit.return_value = mock_rate_limit
|
||||
|
||||
@patch("controllers.service_api.dataset.segment.marshal")
|
||||
@patch("controllers.service_api.dataset.segment.SegmentService")
|
||||
@patch("controllers.service_api.dataset.segment.DocumentService")
|
||||
@patch("controllers.service_api.dataset.segment.current_account_with_tenant")
|
||||
@@ -1742,7 +1813,6 @@ class TestChildChunkApiPost:
|
||||
mock_account_fn,
|
||||
mock_doc_svc,
|
||||
mock_seg_svc,
|
||||
mock_marshal,
|
||||
app: Flask,
|
||||
mock_tenant,
|
||||
mock_dataset,
|
||||
@@ -1754,9 +1824,8 @@ class TestChildChunkApiPost:
|
||||
mock_db.session.scalar.return_value = mock_dataset
|
||||
mock_doc_svc.get_document.return_value = Mock()
|
||||
mock_seg_svc.get_segment_by_id.return_value = Mock()
|
||||
mock_child = Mock()
|
||||
mock_child = _child_chunk()
|
||||
mock_seg_svc.create_child_chunk.return_value = mock_child
|
||||
mock_marshal.return_value = {"id": "child-1"}
|
||||
|
||||
with app.test_request_context(
|
||||
f"/datasets/{mock_dataset.id}/documents/doc-id/segments/seg-id/child_chunks",
|
||||
|
||||
Reference in New Issue
Block a user