diff --git a/backend/openapi/specs/data-management.yaml b/backend/openapi/specs/data-management.yaml index e505ee576..98babc4ac 100644 --- a/backend/openapi/specs/data-management.yaml +++ b/backend/openapi/specs/data-management.yaml @@ -1,890 +1,1461 @@ -openapi: 3.0.3 -info: - title: Data Management Service API - description: | - 数据管理服务API,提供数据集的创建、管理和文件操作功能。 - - 主要功能: - - 数据集的创建和管理 - - 多种数据集类型支持(图像、文本、音频、视频、多模态等) - - 数据集文件管理 - - 数据集标签和元数据管理 - - 数据集统计信息 - version: 1.0.0 - -servers: - - url: http://localhost:8092/api/v1/data-management - description: Development server - -tags: - - name: Dataset - description: 数据集管理 - - name: DatasetFile - description: 数据集文件管理 - - name: DatasetType - description: 数据集类型管理 - - name: Tag - description: 标签管理 - -paths: - /data-management/datasets: - get: - tags: [Dataset] - operationId: getDatasets - summary: 获取数据集列表 - description: 分页查询数据集列表,支持按类型、标签等条件筛选 - parameters: - - name: page - in: query - schema: - type: integer - default: 0 - description: 页码,从1开始 - - name: size - in: query - schema: - type: integer - default: 20 - description: 每页大小 - - name: type - in: query - schema: - type: string - description: 数据集类型过滤 - - name: tags - in: query - schema: - type: string - description: 标签过滤,多个标签用逗号分隔 - - name: keyword - in: query - schema: - type: string - description: 关键词搜索(名称、描述) - - name: status - in: query - schema: - type: string - enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED] - description: 数据集状态过滤 - responses: - '200': - description: 成功 - content: - application/json: - schema: - $ref: '#/components/schemas/PagedDatasetResponse' - '400': - description: 请求参数错误 - content: - application/json: - schema: - $ref: '#/components/schemas/ErrorResponse' - - post: - tags: [Dataset] - operationId: createDataset - summary: 创建数据集 - description: 创建新的数据集 - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/CreateDatasetRequest' - responses: - '201': - description: 创建成功 - content: - application/json: - schema: - $ref: '#/components/schemas/DatasetResponse' - '400': - description: 请求参数错误 - content: - application/json: - schema: - $ref: '#/components/schemas/ErrorResponse' - - /data-management/datasets/{datasetId}: - get: - tags: [Dataset] - operationId: getDatasetById - summary: 获取数据集详情 - description: 根据ID获取数据集详细信息 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - responses: - '200': - description: 成功 - content: - application/json: - schema: - $ref: '#/components/schemas/DatasetResponse' - '404': - description: 数据集不存在 - content: - application/json: - schema: - $ref: '#/components/schemas/ErrorResponse' - - put: - tags: [Dataset] - summary: 更新数据集 - operationId: updateDataset - description: 更新数据集信息 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/UpdateDatasetRequest' - responses: - '200': - description: 更新成功 - content: - application/json: - schema: - $ref: '#/components/schemas/DatasetResponse' - '404': - description: 数据集不存在 - content: - application/json: - schema: - $ref: '#/components/schemas/ErrorResponse' - - delete: - tags: [Dataset] - operationId: deleteDataset - summary: 删除数据集 - description: 删除指定的数据集 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - responses: - '204': - description: 删除成功 - '404': - description: 数据集不存在 - content: - application/json: - schema: - $ref: '#/components/schemas/ErrorResponse' - - /data-management/datasets/{datasetId}/files: - get: - tags: [DatasetFile] - summary: 获取数据集文件列表 - operationId: getDatasetFiles - description: 分页获取数据集中的文件列表 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - - name: page - in: query - schema: - type: integer - default: 0 - description: 页码,从0开始 - - name: size - in: query - schema: - type: integer - default: 20 - description: 每页大小 - - name: fileType - in: query - schema: - type: string - description: 文件类型过滤 - - name: status - in: query - schema: - type: string - enum: [UPLOADED, PROCESSING, COMPLETED, ERROR] - description: 文件状态过滤 - responses: - '200': - description: 成功 - content: - application/json: - schema: - $ref: '#/components/schemas/PagedDatasetFileResponse' - - /data-management/datasets/{datasetId}/files/directories: - post: - tags: [ DatasetFile ] - operationId: createDirectory - summary: 在数据集下创建子目录 - description: 在指定数据集下的某个前缀路径中创建一个新的子目录 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/CreateDirectoryRequest' - responses: - '200': - description: 创建成功 - - /data-management/datasets/{datasetId}/files/{fileId}: - get: - tags: [DatasetFile] - summary: 获取文件详情 - description: 获取数据集中指定文件的详细信息 - operationId: getDatasetFileById - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - - name: fileId - in: path - required: true - schema: - type: string - description: 文件ID - responses: - '200': - description: 成功 - content: - application/json: - schema: - $ref: '#/components/schemas/DatasetFileResponse' - - delete: - tags: [DatasetFile] - summary: 删除文件 - operationId: deleteDatasetFile - description: 从数据集中删除指定文件 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - - name: fileId - in: path - required: true - schema: - type: string - description: 文件ID - responses: - '204': - description: 删除成功 - - /data-management/datasets/{datasetId}/files/{fileId}/download: - get: - tags: [DatasetFile] - operationId: downloadDatasetFile - summary: 下载文件 - description: 下载数据集中的指定文件 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - - name: fileId - in: path - required: true - schema: - type: string - description: 文件ID - responses: - '200': - description: 文件内容 - content: - application/octet-stream: - schema: - type: string - format: binary - - /data-management/datasets/{datasetId}/files/download: - get: - tags: [ DatasetFile ] - operationId: downloadDatasetFileAsZip - summary: 下载文件 - description: 下载数据集中全部文件 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - responses: - '200': - description: 文件内容 - content: - application/octet-stream: - schema: - type: string - format: binary - - /data-management/datasets/{datasetId}/files/upload/add: - post: - tags: [ DatasetFile ] - operationId: addFilesToDataset - summary: 添加文件到数据集(仅创建数据库记录) - description: 将指定源文件路径列表添加到数据集,仅在数据库中创建记录,不执行物理文件系统操作。 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/AddFilesRequest' - responses: - '200': - description: 添加成功,返回创建的文件记录列表 - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/DatasetFileResponse' - - /data-management/datasets/{datasetId}/files/upload/pre-upload: - post: - tags: [ DatasetFile ] - operationId: preUpload - summary: 切片上传预上传 - description: 预上传接口,返回后续分片上传所需的请求ID - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/UploadFilesPreRequest' - responses: - '200': - description: 预上传成功,返回请求ID - content: - application/json: - schema: - type: string - - /data-management/datasets/{datasetId}/files/upload/chunk: - post: - tags: [ DatasetFile ] - operationId: chunkUpload - summary: 切片上传 - description: 使用预上传返回的请求ID进行分片上传 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - requestBody: - required: true - content: - multipart/form-data: - schema: - $ref: '#/components/schemas/UploadFileRequest' - responses: - '200': - description: 上传成功 - - /data-management/dataset-types: - get: - operationId: getDatasetTypes - tags: [DatasetType] - summary: 获取数据集类型列表 - description: 获取所有支持的数据集类型 - responses: - '200': - description: 成功 - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/DatasetTypeResponse' - - /data-management/tags: - get: - tags: [Tag] - operationId: getTags - summary: 获取标签列表 - description: 获取所有可用的标签 - parameters: - - name: keyword - in: query - schema: - type: string - description: 标签名称关键词搜索 - responses: - '200': - description: 成功 - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/TagResponse' - - post: - tags: [Tag] - operationId: createTag - summary: 创建标签 - description: 创建新的标签 - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/CreateTagRequest' - responses: - '201': - description: 创建成功 - content: - application/json: - schema: - $ref: '#/components/schemas/TagResponse' - - /data-management/datasets/{datasetId}/statistics: - get: - tags: [Dataset] - operationId: getDatasetStatistics - summary: 获取数据集统计信息 - description: 获取数据集的统计信息(文件数量、大小、完成度等) - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - responses: - '200': - description: 成功 - content: - application/json: - schema: - $ref: '#/components/schemas/DatasetStatisticsResponse' - -components: - schemas: - PagedDatasetResponse: - type: object - properties: - content: - type: array - items: - $ref: '#/components/schemas/DatasetResponse' - page: - type: integer - description: 当前页码 - size: - type: integer - description: 每页大小 - totalElements: - type: integer - description: 总元素数 - totalPages: - type: integer - description: 总页数 - first: - type: boolean - description: 是否为第一页 - last: - type: boolean - description: 是否为最后一页 - - DatasetResponse: - type: object - properties: - id: - type: string - description: 数据集ID - name: - type: string - description: 数据集名称 - description: - type: string - description: 数据集描述 - type: - $ref: '#/components/schemas/DatasetTypeResponse' - status: - type: string - enum: [ACTIVE, INACTIVE, PROCESSING] - description: 数据集状态 - tags: - type: array - items: - $ref: '#/components/schemas/TagResponse' - description: 标签列表 - dataSource: - type: string - description: 数据源 - targetLocation: - type: string - description: 目标位置 - fileCount: - type: integer - description: 文件数量 - totalSize: - type: integer - format: int64 - description: 总大小(字节) - completionRate: - type: number - format: float - description: 完成率(0-100) - createdAt: - type: string - format: date-time - description: 创建时间 - updatedAt: - type: string - format: date-time - description: 更新时间 - createdBy: - type: string - description: 创建者 - - CreateDatasetRequest: - type: object - required: - - name - - type - properties: - name: - type: string - description: 数据集名称 - minLength: 1 - maxLength: 100 - description: - type: string - description: 数据集描述 - maxLength: 500 - type: - type: string - description: 数据集类型 - tags: - type: array - items: - type: string - description: 标签列表 - dataSource: - type: string - description: 数据源 - targetLocation: - type: string - description: 目标位置 - - UpdateDatasetRequest: - type: object - properties: - name: - type: string - description: 数据集名称 - maxLength: 100 - description: - type: string - description: 数据集描述 - maxLength: 500 - tags: - type: array - items: - type: string - description: 标签列表 - status: - type: string - enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED] - description: 数据集状态 - - UploadFilesPreRequest: - type: object - description: 切片上传预上传请求 - properties: - hasArchive: - type: boolean - description: 是否为压缩包上传 - default: false - totalFileNum: - type: integer - format: int32 - minimum: 1 - description: 总文件数量 - totalSize: - type: integer - format: int64 - description: 总文件大小(字节) - prefix: - type: string - description: 目标子目录前缀,例如 "images/",为空表示数据集根目录 - required: [ totalFileNum ] - - CreateDirectoryRequest: - type: object - description: 创建数据集子目录请求 - properties: - parentPrefix: - type: string - description: 父级前缀路径,例如 "images/",为空表示数据集根目录 - directoryName: - type: string - description: 新建目录名称 - required: [ directoryName ] - - UploadFileRequest: - type: object - description: 分片上传请求 - properties: - reqId: - type: string - description: 预上传返回的请求ID - fileNo: - type: integer - format: int32 - description: 文件编号(批量中的第几个) - fileName: - type: string - description: 文件名称 - totalChunkNum: - type: integer - format: int32 - description: 文件总分片数量 - chunkNo: - type: integer - format: int32 - description: 当前分片编号(从1开始) - file: - type: string - format: binary - description: 分片二进制内容 - checkSumHex: - type: string - description: 分片校验和(十六进制) - required: [ reqId, fileNo, fileName, totalChunkNum, chunkNo, file ] - - DatasetTypeResponse: - type: object - properties: - code: - type: string - description: 类型编码 - name: - type: string - description: 类型名称 - description: - type: string - description: 类型描述 - supportedFormats: - type: array - items: - type: string - description: 支持的文件格式 - icon: - type: string - description: 图标 - - PagedDatasetFileResponse: - type: object - properties: - content: - type: array - items: - $ref: '#/components/schemas/DatasetFileResponse' - page: - type: integer - description: 当前页码 - size: - type: integer - description: 每页大小 - totalElements: - type: integer - description: 总元素数 - totalPages: - type: integer - description: 总页数 - first: - type: boolean - description: 是否为第一页 - last: - type: boolean - description: 是否为最后一页 - - DatasetFileResponse: - type: object - properties: - id: - type: string - description: 文件ID - fileName: - type: string - description: 文件名 - originalName: - type: string - description: 原始文件名 - fileType: - type: string - description: 文件类型 - fileSize: - type: integer - format: int64 - description: 文件大小(字节) - status: - type: string - enum: [UPLOADED, PROCESSING, COMPLETED, ERROR] - description: 文件状态 - description: - type: string - description: 文件描述 - filePath: - type: string - description: 文件路径 - uploadTime: - type: string - format: date-time - description: 上传时间 - uploadedBy: - type: string - description: 上传者 - - TagResponse: - type: object - properties: - id: - type: string - description: 标签ID - name: - type: string - description: 标签名称 - color: - type: string - description: 标签颜色 - description: - type: string - description: 标签描述 - usageCount: - type: integer - description: 使用次数 - - CreateTagRequest: - type: object - required: - - name - properties: - name: - type: string - description: 标签名称 - minLength: 1 - maxLength: 50 - color: - type: string - description: 标签颜色 - pattern: '^#[0-9A-Fa-f]{6}$' - description: - type: string - description: 标签描述 - maxLength: 200 - - DatasetStatisticsResponse: - type: object - properties: - totalFiles: - type: integer - description: 总文件数 - completedFiles: - type: integer - description: 已完成文件数 - totalSize: - type: integer - format: int64 - description: 总大小(字节) - completionRate: - type: number - format: float - description: 完成率(0-100) - fileTypeDistribution: - type: object - additionalProperties: - type: integer - description: 文件类型分布 - statusDistribution: - type: object - additionalProperties: - type: integer - description: 状态分布 - - ErrorResponse: - type: object - properties: - error: - type: string - description: 错误代码 - message: - type: string - description: 错误消息 - timestamp: - type: string - format: date-time - description: 错误时间 - path: - type: string - description: 请求路径 - - AddFilesRequest: - type: object - description: 将源文件路径添加到数据集的请求 - properties: - sourcePaths: - type: array - items: - type: string - description: 源文件路径列表(相对或绝对路径),每个元素表示一个要添加的文件或目录路径 - softAdd: - type: boolean - description: 如果为 true,则仅在数据库中创建记录(默认 false) - default: false - required: - - sourcePaths +openapi: 3.0.3 +info: + title: Data Management Service API + description: | + 数据管理服务API,提供数据集的创建、管理和文件操作功能。 + + 主要功能: + - 数据集的创建和管理 + - 多种数据集类型支持(图像、文本、音频、视频、多模态等) + - 数据集文件管理 + - 数据集标签和元数据管理 + - 数据集统计信息 + version: 1.0.0 + +servers: + - url: http://localhost:8092/api/data-management + description: Development server + +tags: + - name: Dataset + description: 数据集管理 + - name: DatasetFile + description: 数据集文件管理 + - name: DatasetType + description: 数据集类型管理 + - name: Tag + description: 标签管理 + +paths: + /api/data-management/datasets: + get: + tags: [Dataset] + operationId: getDatasets + summary: 获取数据集列表 + description: 分页查询数据集列表,支持按类型、标签等条件筛选 + parameters: + - name: page + in: query + schema: + type: integer + default: 0 + description: 页码,从1开始 + - name: size + in: query + schema: + type: integer + default: 20 + description: 每页大小 + - name: type + in: query + schema: + type: string + description: 数据集类型过滤 + - name: tags + in: query + schema: + type: string + description: 标签过滤,多个标签用逗号分隔 + - name: keyword + in: query + schema: + type: string + description: 关键词搜索(名称、描述) + - name: status + in: query + schema: + type: string + enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED] + description: 数据集状态过滤 + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/PagedDatasetResponse' + '400': + description: 请求参数错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + post: + tags: [Dataset] + operationId: createDataset + summary: 创建数据集 + description: 创建新的数据集 + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateDatasetRequest' + responses: + '200': + description: 创建成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetResponse' + + /api/data-management/datasets/{datasetId}: + get: + tags: [Dataset] + operationId: getDatasetById + summary: 获取数据集详情 + description: 根据ID获取数据集详细信息 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetResponse' + '404': + description: 数据集不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + put: + tags: [Dataset] + summary: 更新数据集 + operationId: updateDataset + description: 更新数据集信息 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateDatasetRequest' + responses: + '200': + description: 更新成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetResponse' + '404': + description: 数据集不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + delete: + tags: [Dataset] + operationId: deleteDataset + summary: 删除数据集 + description: 删除指定的数据集 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + responses: + '204': + description: 删除成功 + '404': + description: 数据集不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /api/data-management/datasets/{datasetId}/files: + get: + tags: [DatasetFile] + summary: 获取数据集文件列表 + operationId: getDatasetFiles + description: 分页获取数据集中的文件列表 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: isWithDirectory + in: query + schema: + type: boolean + default: false + description: 是否包含目录结构 + - name: page + in: query + schema: + type: integer + default: 0 + description: 页码,从0开始 + - name: size + in: query + schema: + type: integer + default: 20 + description: 每页大小 + - name: prefix + in: query + schema: + type: string + default: "" + description: 路径前缀,用于过滤子目录下的文件 + - name: fileType + in: query + schema: + type: string + description: 文件类型过滤 + - name: status + in: query + schema: + type: string + enum: [UPLOADED, PROCESSING, COMPLETED, ERROR] + description: 文件状态过滤 + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/PagedDatasetFileResponseWrapper' + + /api/data-management/datasets/{datasetId}/files/directories: + post: + tags: [ DatasetFile ] + operationId: createDirectory + summary: 在数据集下创建子目录 + description: 在指定数据集下的某个前缀路径中创建一个新的子目录 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateDirectoryRequest' + responses: + '200': + description: 创建成功 + + /api/data-management/datasets/{datasetId}/files/{fileId}: + get: + tags: [DatasetFile] + summary: 获取文件详情 + description: 获取数据集中指定文件的详细信息 + operationId: getDatasetFileById + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: fileId + in: path + required: true + schema: + type: string + description: 文件ID + - name: prefix + in: query + schema: + type: string + default: "" + description: 路径前缀,用于定位文件 + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetFileResponseWrapper' + '404': + description: 文件不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + delete: + tags: [DatasetFile] + summary: 删除文件 + operationId: deleteDatasetFile + description: 从数据集中删除指定文件 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: fileId + in: path + required: true + schema: + type: string + description: 文件ID + - name: prefix + in: query + schema: + type: string + default: "" + description: 路径前缀,用于定位文件 + responses: + '200': + description: 删除成功 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + '404': + description: 文件不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /api/data-management/datasets/{datasetId}/files/{fileId}/download: + get: + tags: [DatasetFile] + operationId: downloadDatasetFile + summary: 下载文件 + description: 下载数据集中的指定文件 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: fileId + in: path + required: true + schema: + type: string + description: 文件ID + responses: + '200': + description: 文件内容 + content: + application/octet-stream: + schema: + type: string + format: binary + + /api/data-management/datasets/{datasetId}/files/download: + get: + tags: [ DatasetFile ] + operationId: downloadDatasetFileAsZip + summary: 下载文件 + description: 下载数据集中全部文件 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + responses: + '200': + description: 文件内容 + content: + application/octet-stream: + schema: + type: string + format: binary + + /api/data-management/datasets/{datasetId}/files/directories/download: + get: + tags: [DatasetFile] + operationId: downloadDirectory + summary: 下载目录 + description: 下载指定目录(压缩为 ZIP) + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: prefix + in: query + schema: + type: string + default: "" + description: 目录前缀路径 + responses: + '200': + description: 成功 + content: + application/zip: + schema: + type: string + format: binary + + delete: + tags: [DatasetFile] + operationId: deleteDirectory + summary: 删除目录 + description: 删除目录及其所有内容 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: prefix + in: query + schema: + type: string + default: "" + description: 目录前缀路径 + responses: + '200': + description: 删除成功 + + put: + tags: [DatasetFile] + operationId: renameDirectory + summary: 重命名目录 + description: 重命名数据集目录 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RenameDirectoryRequest' + responses: + '200': + description: 重命名成功 + + /api/data-management/datasets/{datasetId}/files/{fileId}/rename: + put: + tags: [DatasetFile] + operationId: renameFile + summary: 重命名文件 + description: 重命名数据集文件 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: fileId + in: path + required: true + schema: + type: string + description: 文件ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RenameFileRequest' + responses: + '200': + description: 重命名成功 + + /api/data-management/datasets/{datasetId}/files/upload/add: + post: + tags: [ DatasetFile ] + operationId: addFilesToDataset + summary: 添加文件到数据集(仅创建数据库记录) + description: 将指定源文件路径列表添加到数据集,仅在数据库中创建记录,不执行物理文件系统操作。 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/AddFilesRequest' + responses: + '200': + description: 添加成功,返回创建的文件记录列表 + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/DatasetFileResponse' + + /api/data-management/datasets/{datasetId}/files/upload/copy: + post: + tags: [DatasetFile] + operationId: copyFilesToDataset + summary: 拷贝文件到数据集 + description: 将指定路径中的文件拷贝到数据集目录下 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CopyFilesRequest' + responses: + '200': + description: 拷贝成功,返回创建的文件记录列表 + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/DatasetFileResponse' + + /api/data-management/datasets/{datasetId}/files/upload/pre-upload: + post: + tags: [ DatasetFile ] + operationId: preUpload + summary: 切片上传预上传 + description: 预上传接口,返回后续分片上传所需的请求ID + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UploadFilesPreRequest' + responses: + '200': + description: 预上传成功,返回请求ID + content: + application/json: + schema: + $ref: '#/components/schemas/StringResponseWrapper' + + /api/data-management/datasets/{datasetId}/files/upload/chunk: + post: + tags: [ DatasetFile ] + operationId: chunkUpload + summary: 切片上传 + description: 使用预上传返回的请求ID进行分片上传 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/UploadFileRequest' + responses: + '200': + description: 上传成功 + + /api/data-management/dataset-types: + get: + operationId: getDatasetTypes + tags: [DatasetType] + summary: 获取数据集类型列表 + description: 获取所有支持的数据集类型 + responses: + '200': + description: 成功 + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/DatasetTypeResponse' + + /api/data-management/tags: + get: + tags: [Tag] + operationId: getTags + summary: 获取标签列表 + description: 获取所有可用的标签 + parameters: + - name: keyword + in: query + schema: + type: string + description: 标签名称关键词搜索 + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/TagListResponse' + + post: + tags: [Tag] + operationId: createTag + summary: 创建标签 + description: 创建新的标签 + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateTagRequest' + responses: + '200': + description: 创建成功 + content: + application/json: + schema: + $ref: '#/components/schemas/TagResponseWrapper' + '400': + description: 请求参数错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + put: + tags: [Tag] + operationId: updateTag + summary: 更新标签 + description: 更新现有标签信息 + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateTagRequest' + responses: + '200': + description: 更新成功 + content: + application/json: + schema: + $ref: '#/components/schemas/TagResponseWrapper' + + delete: + tags: [Tag] + operationId: deleteTag + summary: 删除标签 + description: 删除指定的标签(批量删除) + parameters: + - name: ids + in: query + required: true + schema: + type: array + items: + type: string + maxItems: 10 + description: 标签ID列表,最多10个 + style: form + explode: false + responses: + '200': + description: 删除成功 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + '400': + description: 请求参数错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /api/data-management/datasets/{datasetId}/lineage: + get: + tags: [Dataset] + operationId: getDatasetLineage + summary: 获取数据集血缘信息 + description: 获取数据集的血缘关系信息 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + format: uuid + description: 数据集ID + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetLineage' + '404': + description: 数据集不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /api/data-management/datasets/{datasetId}/statistics: + get: + tags: [Dataset] + operationId: getDatasetStatistics + summary: 获取数据集统计信息 + description: 获取数据集的统计信息(文件数量、大小、完成度等) + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetStatisticsResponseWrapper' + '404': + description: 数据集不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /api/data-management/datasets/statistics: + get: + tags: [Dataset] + operationId: getAllDatasetStatistics + summary: 获取所有数据集统计信息 + description: 获取所有数据集的统计信息汇总 + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/AllDatasetStatisticsResponseWrapper' + +components: + schemas: + PagedDatasetResponse: + type: object + properties: + content: + type: array + items: + $ref: '#/components/schemas/DatasetResponse' + page: + type: integer + description: 当前页码 + size: + type: integer + description: 每页大小 + totalElements: + type: integer + description: 总元素数 + totalPages: + type: integer + description: 总页数 + first: + type: boolean + description: 是否为第一页 + last: + type: boolean + description: 是否为最后一页 + number: + type: integer + description: 页码 + + DatasetResponse: + type: object + properties: + id: + type: string + description: 数据集ID + name: + type: string + description: 数据集名称 + description: + type: string + description: 数据集描述 + datasetType: + type: string + description: 数据集类型 + status: + type: string + description: 数据集状态 + tags: + type: array + items: + $ref: '#/components/schemas/TagResponse' + description: 标签列表 + retentionDays: + type: integer + description: 数据集保留天数 + dataSource: + type: string + description: 数据源 + targetLocation: + type: string + description: 目标位置 + fileCount: + type: integer + description: 文件数量 + totalSize: + type: integer + format: int64 + description: 总大小(字节) + completionRate: + type: number + format: float + description: 完成率(0-100) + createdAt: + type: string + format: date-time + description: 创建时间 + updatedAt: + type: string + format: date-time + description: 更新时间 + createdBy: + type: string + description: 创建者 + updatedBy: + type: string + description: 更新者 + distribution: + type: object + additionalProperties: + type: object + additionalProperties: + type: integer + format: int64 + description: 分布信息 + pvcName: + type: string + description: 数据集pvc名称 + + CreateDatasetRequest: + type: object + required: + - name + - datasetType + properties: + name: + type: string + description: 数据集名称 + minLength: 1 + maxLength: 255 + description: + type: string + description: 数据集描述 + maxLength: 500 + datasetType: + type: string + description: 数据集类型,取值范围为TEXT/IMAGE/VIDEO/AUDIO/OTHER + enum: [TEXT, IMAGE, VIDEO, AUDIO, OTHER] + tags: + type: array + items: + type: string + description: 标签列表 + maxItems: 20 + dataSource: + type: string + description: 数据源 + maxLength: 255 + retentionDays: + type: integer + description: 保留天数 + minimum: 0 + status: + type: string + enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED] + description: 数据集状态 + + UpdateDatasetRequest: + type: object + required: + - name + properties: + name: + type: string + description: 数据集名称 + maxLength: 255 + description: + type: string + description: 数据集描述 + maxLength: 500 + dataSource: + type: string + description: 数据源 + maxLength: 255 + tags: + type: array + items: + type: string + description: 标签列表 + maxItems: 20 + status: + type: string + enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED] + description: 数据集状态 + + UploadFilesPreRequest: + type: object + description: 切片上传预上传请求 + properties: + hasArchive: + type: boolean + description: 是否为压缩包上传 + default: false + totalFileNum: + type: integer + format: int32 + minimum: 1 + description: 总文件数量 + totalSize: + type: integer + format: int64 + minimum: 1 + description: 总文件大小(字节) + prefix: + type: string + description: 目标子目录前缀,例如 "images/",为空表示数据集根目录 + maxLength: 500 + required: [ totalFileNum ] + + AddFilesRequest: + type: object + description: 将源文件路径添加到数据集的请求 + properties: + files: + type: array + description: 文件列表 + items: + $ref: '#/components/schemas/FileRequest' + minItems: 1 + maxItems: 1000 + softAdd: + type: boolean + description: 如果为 true,则仅在数据库中创建记录(默认 false) + default: false + prefix: + type: string + description: 目标子目录前缀 + maxLength: 500 + default: "" + required: + - files + + FileRequest: + type: object + description: 文件请求对象 + properties: + filePath: + type: string + description: 文件路径 + maxLength: 1000 + metadata: + type: object + additionalProperties: true + description: 文件元数据 + required: + - filePath + + CreateDirectoryRequest: + type: object + description: 创建数据集子目录请求 + required: + - directoryName + properties: + parentPrefix: + type: string + description: 父级前缀路径,例如 "images/",为空表示数据集根目录 + directoryName: + type: string + description: 新建目录名称 + maxLength: 255 + + UploadFileRequest: + type: object + description: 分片上传请求 + properties: + reqId: + type: string + description: 预上传返回的请求ID + fileNo: + type: integer + format: int32 + description: 文件编号(批量中的第几个) + minimum: 0 + fileName: + type: string + description: 文件名称 + maxLength: 255 + totalChunkNum: + type: integer + format: int32 + description: 文件总分片数量 + minimum: 1 + chunkNo: + type: integer + format: int32 + description: 当前分片编号(从1开始) + minimum: 1 + file: + type: string + format: binary + description: 分片二进制内容 + checkSumHex: + type: string + description: 分片校验和(十六进制字符串,64位) + pattern: '^[0-9a-fA-F]{64}$' + required: [ reqId, fileNo, fileName, totalChunkNum, chunkNo, file ] + + DatasetTypeResponse: + type: object + properties: + code: + type: string + description: 类型编码 + name: + type: string + description: 类型名称 + description: + type: string + description: 类型描述 + supportedFormats: + type: array + items: + type: string + description: 支持的文件格式 + icon: + type: string + description: 图标 + + PagedDatasetFileResponse: + type: object + properties: + content: + type: array + items: + $ref: '#/components/schemas/DatasetFileResponse' + page: + type: integer + description: 当前页码 + size: + type: integer + description: 每页大小 + totalElements: + type: integer + description: 总元素数 + totalPages: + type: integer + description: 总页数 + first: + type: boolean + description: 是否为第一页 + last: + type: boolean + description: 是否为最后一页 + number: + type: integer + description: 页码 + + DatasetFileResponse: + type: object + properties: + id: + type: string + description: 文件ID + fileName: + type: string + description: 文件名 + originalName: + type: string + description: 原始文件名 + fileType: + type: string + description: 文件类型 + fileSize: + type: integer + format: int64 + description: 文件大小(字节) + status: + type: string + enum: [UPLOADED, PROCESSING, COMPLETED, ERROR] + description: 文件状态 + description: + type: string + description: 文件描述 + filePath: + type: string + description: 文件路径 + tags: + type: string + description: 文件标签(JSON 字符串) + tagsUpdatedAt: + type: string + format: date-time + description: 标签更新时间 + metadata: + type: string + description: 文件元数据(包含标注信息等,JSON 字符串) + uploadTime: + type: string + format: date-time + description: 上传时间 + lastAccessTime: + type: string + format: date-time + description: 最后更新时间 + uploadedBy: + type: string + description: 上传者 + directory: + type: boolean + description: 是否为目录 + fileCount: + type: integer + format: int64 + description: 目录文件数量 + + TagResponse: + type: object + properties: + id: + type: string + description: 标签ID + name: + type: string + description: 标签名称 + color: + type: string + description: 标签颜色 + description: + type: string + description: 标签描述 + usageCount: + type: integer + description: 使用次数 + category: + type: string + description: 标签分类 + + CreateTagRequest: + type: object + required: + - name + properties: + name: + type: string + description: 标签名称 + minLength: 1 + maxLength: 100 + color: + type: string + description: 标签颜色 + pattern: '^#[0-9A-Fa-f]{6}$' + description: + type: string + description: 标签描述 + maxLength: 500 + category: + type: string + description: 标签分类 + maxLength: 50 + + DatasetStatisticsResponse: + type: object + properties: + totalFiles: + type: integer + description: 总文件数 + completedFiles: + type: integer + description: 已完成文件数 + totalSize: + type: integer + format: int64 + description: 总大小(字节) + completionRate: + type: number + format: float + description: 完成率(0-100) + fileTypeDistribution: + type: object + additionalProperties: + type: integer + description: 文件类型分布 + statusDistribution: + type: object + additionalProperties: + type: integer + description: 状态分布 + + ErrorResponse: + type: object + properties: + error: + type: string + description: 错误代码 + message: + type: string + description: 错误消息 + timestamp: + type: string + format: date-time + description: 错误时间 + path: + type: string + description: 请求路径 + + CopyFilesRequest: + type: object + description: 拷贝文件到数据集的请求 + properties: + sourcePaths: + type: array + items: + type: string + description: 源文件路径列表(相对或绝对路径),每个元素表示一个要添加的文件或目录路径 + required: + - sourcePaths + + UpdateTagRequest: + type: object + description: 更新标签请求 + required: + - id + properties: + id: + type: string + description: 标签ID + name: + type: string + description: 标签名称 + maxLength: 100 + color: + type: string + description: 标签颜色,十六进制格式 + pattern: '^#[0-9A-Fa-f]{6}$' + description: + type: string + description: 标签描述 + maxLength: 500 + + RenameFileRequest: + type: object + description: 重命名数据集文件请求 + required: + - newName + properties: + newName: + type: string + description: 新的文件名称(不包含后缀) + maxLength: 255 + + RenameDirectoryRequest: + type: object + description: 重命名数据集目录请求 + required: + - prefix + - newName + properties: + prefix: + type: string + description: 目录前缀,例如 "images/",与列表/删除目录接口保持一致 + maxLength: 500 + newName: + type: string + description: 新的目录名称 + maxLength: 255 + + DatasetLineage: + type: object + description: 数据集血缘信息 + properties: + upstreamDatasets: + type: array + description: 上游数据集 + items: + $ref: '#/components/schemas/DatasetResponse' + downstreamDatasets: + type: array + description: 下游数据集 + items: + $ref: '#/components/schemas/DatasetResponse' + tasks: + type: array + description: 相关任务 + items: + type: object + properties: + id: + type: string + name: + type: string + type: + type: string + + AllDatasetStatisticsResponse: + type: object + description: 所有数据集统计信息汇总 + properties: + totalDatasets: + type: integer + description: 总数据集数量 + totalFiles: + type: integer + description: 总文件数量 + totalSize: + type: integer + format: int64 + description: 总大小(字节) + datasetsByType: + type: object + additionalProperties: + type: integer + description: 按类型分组的统计 + datasetsByStatus: + type: object + additionalProperties: + type: integer + description: 按状态分组的统计 + + TagListResponse: + type: object + description: 标签列表响应 + properties: + code: + type: integer + description: 响应代码 + message: + type: string + description: 响应消息 + data: + type: array + items: + $ref: '#/components/schemas/TagResponse' + description: 标签列表 + + DatasetStatisticsResponseWrapper: + type: object + description: 数据集统计信息响应包装器 + properties: + code: + type: integer + description: 响应代码 + message: + type: string + description: 响应消息 + data: + $ref: '#/components/schemas/DatasetStatisticsResponse' + + AllDatasetStatisticsResponseWrapper: + type: object + description: 所有数据集统计信息响应包装器 + properties: + code: + type: integer + description: 响应代码 + message: + type: string + description: 响应消息 + data: + $ref: '#/components/schemas/AllDatasetStatisticsResponse' + + PagedDatasetFileResponseWrapper: + type: object + description: 数据集文件分页响应包装器 + properties: + code: + type: integer + description: 响应代码 + message: + type: string + description: 响应消息 + data: + $ref: '#/components/schemas/PagedDatasetFileResponse' + + DatasetFileResponseWrapper: + type: object + description: 数据集文件响应包装器 + properties: + code: + type: integer + description: 响应代码 + message: + type: string + description: 响应消息 + data: + $ref: '#/components/schemas/DatasetFileResponse' + + StringResponseWrapper: + type: object + description: 字符串响应包装器 + properties: + code: + type: integer + description: 响应代码 + message: + type: string + description: 响应消息 + data: + type: string + description: 响应数据 + + TagResponseWrapper: + type: object + description: 标签响应包装器 + properties: + code: + type: integer + description: 响应代码 + message: + type: string + description: 响应消息 + data: + $ref: '#/components/schemas/TagResponse' diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/exception/DataManagementErrorCode.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/exception/DataManagementErrorCode.java index c1d941e86..9c5de7a5a 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/exception/DataManagementErrorCode.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/exception/DataManagementErrorCode.java @@ -1,47 +1,87 @@ -package com.datamate.datamanagement.infrastructure.exception; - -import com.datamate.common.infrastructure.exception.ErrorCode; -import lombok.AllArgsConstructor; -import lombok.Getter; - -/** - * 数据管理模块错误码 - * - * @author dallas - * @since 2025-10-20 - */ -@Getter -@AllArgsConstructor -public enum DataManagementErrorCode implements ErrorCode { - /** - * 数据集不存在 - */ - DATASET_NOT_FOUND("data_management.0001", "数据集不存在"), - /** - * 数据集已存在 - */ - DATASET_ALREADY_EXISTS("data_management.0002", "数据集已存在"), - /** - * 数据集状态错误 - */ - DATASET_STATUS_ERROR("data_management.0003", "数据集状态错误"), - /** - * 数据集标签不存在 - */ - DATASET_TAG_NOT_FOUND("data_management.0004", "数据集标签不存在"), - /** - * 数据集标签已存在 - */ - DATASET_TAG_ALREADY_EXISTS("data_management.0005", "数据集标签已存在"), - /** - * 数据集文件已存在 - */ - DATASET_FILE_ALREADY_EXISTS("data_management.0006", "数据集文件已存在"), - /** - * 目录不存在 - */ - DIRECTORY_NOT_FOUND("data_management.0007", "目录不存在"); - - private final String code; - private final String message; -} +package com.datamate.datamanagement.infrastructure.exception; + +import com.datamate.common.infrastructure.exception.ErrorCode; +import lombok.AllArgsConstructor; +import lombok.Getter; + +/** + * 数据管理模块错误码 + * + * @author dallas + * @since 2025-10-20 + */ +@Getter +@AllArgsConstructor +public enum DataManagementErrorCode implements ErrorCode { + /** + * 数据集不存在 + */ + DATASET_NOT_FOUND("data_management.0001", "数据集不存在"), + /** + * 数据集已存在 + */ + DATASET_ALREADY_EXISTS("data_management.0002", "数据集已存在"), + /** + * 数据集状态错误 + */ + DATASET_STATUS_ERROR("data_management.0003", "数据集状态错误"), + /** + * 数据集标签不存在 + */ + DATASET_TAG_NOT_FOUND("data_management.0004", "数据集标签不存在"), + /** + * 数据集标签已存在 + */ + DATASET_TAG_ALREADY_EXISTS("data_management.0005", "数据集标签已存在"), + /** + * 数据集文件已存在 + */ + DATASET_FILE_ALREADY_EXISTS("data_management.0006", "数据集文件已存在"), + /** + * 目录不存在 + */ + DIRECTORY_NOT_FOUND("data_management.0007", "目录不存在"), + /** + * 数据集名称长度不能超过255个字符 + */ + DATASET_NAME_TOO_LONG("data_management.0008", "数据集名称长度不能超过255个字符"), + /** + * 数据集类型不合法 + */ + DATASET_TYPE_INVALID("data_management.0009", "数据集类型不合法"), + /** + * 数据集状态不合法 + */ + DATASET_STATUS_INVALID("data_management.0010", "数据集状态不合法"), + /** + * 标签名称长度不能超过100个字符 + */ + TAG_NAME_TOO_LONG("data_management.0011", "标签名称长度不能超过100个字符"), + /** + * 标签名称已存在 + */ + TAG_NAME_DUPLICATE("data_management.0012", "标签名称已存在"), + /** + * 标签颜色格式不正确,应为十六进制颜色代码 + */ + TAG_COLOR_INVALID("data_management.0013", "标签颜色格式不正确,应为十六进制颜色代码"), + /** + * 保留天数必须为非负整数 + */ + RETENTION_DAYS_INVALID("data_management.0014", "保留天数必须为非负整数"), + /** + * 路径长度不能超过限制 + */ + PATH_TOO_LONG("data_management.0015", "路径长度不能超过限制"), + /** + * 文件名包含非法字符 + */ + FILE_NAME_INVALID("data_management.0016", "文件名包含非法字符"), + /** + * 路径前缀不能以点开头 + */ + PREFIX_INVALID("data_management.0017", "路径前缀不能以点开头"); + + private final String code; + private final String message; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/AddFilesRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/AddFilesRequest.java index 89390577f..094510bdb 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/AddFilesRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/AddFilesRequest.java @@ -1,5 +1,10 @@ package com.datamate.datamanagement.interfaces.dto; +import com.datamate.datamanagement.interfaces.validation.ValidPath; +import jakarta.validation.Valid; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.Size; import lombok.AllArgsConstructor; import lombok.Getter; import lombok.NoArgsConstructor; @@ -34,6 +39,8 @@ public AddFilesRequest(List paths) { @Getter @Setter public static class FileRequest { + @NotBlank(message = "文件路径不能为空") + @Size(max = 1000, message = "文件路径长度不能超过1000个字符") private String filePath; private Map metadata; @@ -41,8 +48,12 @@ public static class FileRequest { private boolean softAdd; + @ValidPath() private String prefix = ""; + @NotEmpty(message = "文件列表不能为空") + @Size(max = 1000, message = "文件数量不能超过1000个") + @Valid private List files; public boolean isValidPrefix() { diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CopyFilesRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CopyFilesRequest.java index 83234bae4..c97c9d4eb 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CopyFilesRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CopyFilesRequest.java @@ -1,6 +1,7 @@ package com.datamate.datamanagement.interfaces.dto; import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.Size; import java.util.List; @@ -10,5 +11,8 @@ * @author dallas * @since 2025-11-13 */ -public record CopyFilesRequest(@NotEmpty List sourcePaths) { +public record CopyFilesRequest( + @NotEmpty(message = "源文件路径列表不能为空") + @Size(max = 1000, message = "文件数量不能超过1000个") + List sourcePaths) { } diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDatasetRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDatasetRequest.java index 6e8534c18..3ff1ba6b7 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDatasetRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDatasetRequest.java @@ -1,6 +1,8 @@ package com.datamate.datamanagement.interfaces.dto; +import com.datamate.datamanagement.common.enums.DatasetStatusType; import com.datamate.datamanagement.common.enums.DatasetType; +import jakarta.validation.constraints.Min; import jakarta.validation.constraints.NotBlank; import jakarta.validation.constraints.NotNull; import jakarta.validation.constraints.Size; @@ -21,12 +23,12 @@ @AllArgsConstructor public class CreateDatasetRequest { /** 数据集名称 */ - @Size(min = 1, max = 100) + @Size(min = 1, max = 255, message = "数据集名称长度不能超过255个字符") @NotBlank(message = "数据集名称不能为空") @McpToolParam(description = "数据集名称") private String name; /** 数据集描述 */ - @Size(max = 500) + @Size(max = 500, message = "数据集描述长度不能超过500个字符") @McpToolParam(description = "数据集描述", required = false) private String description; /** 数据集类型 */ @@ -34,15 +36,18 @@ public class CreateDatasetRequest { @McpToolParam(description = "数据集类型,取值范围为TEXT/IMAGE/VIDEO/AUDIO/OTHER") private DatasetType datasetType; /** 标签列表 */ + @Size(max = 20, message = "标签数量不能超过20个") @McpToolParam(description = "标签列表", required = false) private List tags; /** 数据源 */ + @Size(max = 255, message = "数据源长度不能超过255个字符") @McpToolParam(description = "数据源", required = false) private String dataSource; /** 保留天数 */ + @Min(value = 0, message = "保留天数必须为非负整数") @McpToolParam(description = "保留天数", required = false) private Integer retentionDays; /** 数据集状态 */ @McpToolParam(description = "数据集状态", required = false) - private String status; + private DatasetStatusType status; } diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDirectoryRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDirectoryRequest.java index 441cc74a0..cc9b02d3b 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDirectoryRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDirectoryRequest.java @@ -1,20 +1,26 @@ -package com.datamate.datamanagement.interfaces.dto; - -import jakarta.validation.constraints.NotBlank; -import lombok.Getter; -import lombok.Setter; - -/** - * 创建数据集子目录请求 - */ -@Getter -@Setter -public class CreateDirectoryRequest { - - /** 父级前缀路径,例如 "images/",为空表示数据集根目录 */ - private String parentPrefix; - - /** 新建目录名称 */ - @NotBlank - private String directoryName; -} +package com.datamate.datamanagement.interfaces.dto; + +import com.datamate.datamanagement.interfaces.validation.ValidFileName; +import com.datamate.datamanagement.interfaces.validation.ValidPath; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.Size; +import lombok.Getter; +import lombok.Setter; + +/** + * 创建数据集子目录请求 + */ +@Getter +@Setter +public class CreateDirectoryRequest { + + /** 父级前缀路径,例如 "images/",为空表示数据集根目录 */ + @ValidPath() + private String parentPrefix; + + /** 新建目录名称 */ + @NotBlank(message = "目录名称不能为空") + @ValidFileName + @Size(max = 255, message = "目录名称长度不能超过255个字符") + private String directoryName; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateTagRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateTagRequest.java index dca22bb94..2b35ff3a0 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateTagRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateTagRequest.java @@ -1,5 +1,8 @@ package com.datamate.datamanagement.interfaces.dto; +import com.datamate.datamanagement.interfaces.validation.ValidHexColor; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.Size; import lombok.Getter; import lombok.Setter; @@ -10,9 +13,16 @@ @Setter public class CreateTagRequest { /** 标签名称 */ + @NotBlank(message = "标签名称不能为空") + @Size(min = 1, max = 100, message = "标签名称长度不能超过100个字符") private String name; /** 标签颜色 */ + @ValidHexColor private String color; /** 标签描述 */ + @Size(max = 500, message = "标签描述长度不能超过500个字符") private String description; + /** 标签分类 */ + @Size(max = 50, message = "标签分类长度不能超过50个字符") + private String category; } diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetPagingQuery.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetPagingQuery.java index e9bbb4c77..22fd8eed2 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetPagingQuery.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetPagingQuery.java @@ -3,6 +3,7 @@ import com.datamate.common.interfaces.PagingQuery; import com.datamate.datamanagement.common.enums.DatasetStatusType; import com.datamate.datamanagement.common.enums.DatasetType; +import jakarta.validation.constraints.Size; import lombok.AllArgsConstructor; import lombok.Getter; import lombok.NoArgsConstructor; @@ -30,12 +31,14 @@ public class DatasetPagingQuery extends PagingQuery { /** * 标签名过滤 */ + @Size(max = 10, message = "过滤标签数量不能超过10个") @McpToolParam(description = "标签名过滤", required = false) private List tags = new ArrayList<>(); /** * 关键词搜索(名称或描述) */ + @Size(max = 100, message = "关键词长度不能超过100个字符") @McpToolParam(description = "关键词搜索(名称或描述)", required = false) private String keyword; diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameDirectoryRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameDirectoryRequest.java index 76c07aec0..7fd44a2da 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameDirectoryRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameDirectoryRequest.java @@ -1,21 +1,27 @@ -package com.datamate.datamanagement.interfaces.dto; - -import jakarta.validation.constraints.NotBlank; -import lombok.Getter; -import lombok.Setter; - -/** - * 重命名数据集目录请求 - */ -@Getter -@Setter -public class RenameDirectoryRequest { - - /** 目录前缀,例如 "images/",与列表/删除目录接口保持一致 */ - @NotBlank - private String prefix; - - /** 新的目录名称 */ - @NotBlank - private String newName; -} +package com.datamate.datamanagement.interfaces.dto; + +import com.datamate.datamanagement.interfaces.validation.ValidFileName; +import com.datamate.datamanagement.interfaces.validation.ValidPath; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.Size; +import lombok.Getter; +import lombok.Setter; + +/** + * 重命名数据集目录请求 + */ +@Getter +@Setter +public class RenameDirectoryRequest { + + /** 目录前缀,例如 "images/",与列表/删除目录接口保持一致 */ + @NotBlank(message = "目录前缀不能为空") + @ValidPath(maxLength = 500) + private String prefix; + + /** 新的目录名称 */ + @NotBlank(message = "新目录名称不能为空") + @ValidFileName + @Size(max = 255, message = "目录名称长度不能超过255个字符") + private String newName; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameFileRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameFileRequest.java index 602dc8652..ef4dbd9de 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameFileRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameFileRequest.java @@ -1,17 +1,21 @@ -package com.datamate.datamanagement.interfaces.dto; - -import jakarta.validation.constraints.NotBlank; -import lombok.Getter; -import lombok.Setter; - -/** - * 重命名数据集文件请求 - */ -@Getter -@Setter -public class RenameFileRequest { - - /** 新的文件名称(不包含后缀) */ - @NotBlank - private String newName; -} +package com.datamate.datamanagement.interfaces.dto; + +import com.datamate.datamanagement.interfaces.validation.ValidFileName; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.Size; +import lombok.Getter; +import lombok.Setter; + +/** + * 重命名数据集文件请求 + */ +@Getter +@Setter +public class RenameFileRequest { + + /** 新的文件名称(不包含后缀) */ + @NotBlank(message = "新文件名不能为空") + @ValidFileName + @Size(max = 255, message = "文件名称长度不能超过255个字符") + private String newName; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateDatasetRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateDatasetRequest.java index 91070f391..dc344df6a 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateDatasetRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateDatasetRequest.java @@ -15,15 +15,17 @@ @Setter public class UpdateDatasetRequest { /** 数据集名称 */ - @Size(min = 1, max = 100) + @Size(min = 1, max = 255, message = "数据集名称长度不能超过255个字符") @NotBlank(message = "数据集名称不能为空") private String name; /** 数据集描述 */ - @Size(max = 500) + @Size(max = 500, message = "数据集描述长度不能超过500个字符") private String description; /** 归集任务id */ + @Size(max = 255, message = "数据源长度不能超过255个字符") private String dataSource; /** 标签列表 */ + @Size(max = 20, message = "标签数量不能超过20个") private List tags; /** 数据集状态 */ private DatasetStatusType status; diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateTagRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateTagRequest.java index 1fb6d13c7..3348a4516 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateTagRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateTagRequest.java @@ -1,5 +1,8 @@ package com.datamate.datamanagement.interfaces.dto; +import com.datamate.datamanagement.interfaces.validation.ValidHexColor; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.Size; import lombok.Getter; import lombok.Setter; @@ -10,11 +13,15 @@ @Setter public class UpdateTagRequest { /** 标签 ID */ + @NotBlank(message = "标签ID不能为空") private String id; /** 标签名称 */ + @Size(max = 100, message = "标签名称长度不能超过100个字符") private String name; /** 标签颜色 */ + @ValidHexColor private String color; /** 标签描述 */ + @Size(max = 500, message = "标签描述长度不能超过500个字符") private String description; } diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFileRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFileRequest.java index e8c2b6984..6713ea5c3 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFileRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFileRequest.java @@ -1,5 +1,11 @@ package com.datamate.datamanagement.interfaces.dto; +import com.datamate.datamanagement.interfaces.validation.ValidFileName; +import jakarta.validation.constraints.Min; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.NotNull; +import jakarta.validation.constraints.Pattern; +import jakarta.validation.constraints.Size; import lombok.Getter; import lombok.Setter; import org.springframework.web.multipart.MultipartFile; @@ -12,23 +18,32 @@ @Setter public class UploadFileRequest { /** 预上传返回的id,用来确认同一个任务 */ + @NotBlank(message = "请求ID不能为空") private String reqId; /** 文件编号,用于标识批量上传中的第几个文件 */ + @Min(value = 0, message = "文件编号必须为非负整数") private int fileNo; /** 文件名称 */ + @NotBlank(message = "文件名称不能为空") + @ValidFileName + @Size(max = 255, message = "文件名称长度不能超过255个字符") private String fileName; /** 文件总分块数量 */ + @Min(value = 1, message = "总分块数量必须大于0") private int totalChunkNum; /** 当前分块编号,从1开始 */ + @Min(value = 1, message = "分块编号必须大于0") private int chunkNo; /** 上传的文件分块内容 */ + @NotNull(message = "文件内容不能为空") private MultipartFile file; /** 文件分块的校验和(十六进制字符串),用于验证文件完整性 */ + @Pattern(regexp = "^[0-9a-fA-F]{64}$", message = "校验和格式不正确,应为64位十六进制字符串") private String checkSumHex; } diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFilesPreRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFilesPreRequest.java index 9b7ced05e..85d8372e5 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFilesPreRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFilesPreRequest.java @@ -1,25 +1,28 @@ -package com.datamate.datamanagement.interfaces.dto; - -import jakarta.validation.constraints.Min; -import lombok.Getter; -import lombok.Setter; - -/** - * 切片上传预上传请求 - */ -@Getter -@Setter -public class UploadFilesPreRequest { - /** 是否为压缩包上传 */ - private boolean hasArchive; - - /** 总文件数量 */ - @Min(1) - private int totalFileNum; - - /** 总文件大小 */ - private long totalSize; - - /** 目标子目录前缀,例如 "images/",为空表示数据集根目录 */ - private String prefix; -} +package com.datamate.datamanagement.interfaces.dto; + +import com.datamate.datamanagement.interfaces.validation.ValidPath; +import jakarta.validation.constraints.Min; +import lombok.Getter; +import lombok.Setter; + +/** + * 切片上传预上传请求 + */ +@Getter +@Setter +public class UploadFilesPreRequest { + /** 是否为压缩包上传 */ + private boolean hasArchive; + + /** 总文件数量 */ + @Min(value = 1, message = "总文件数量必须大于0") + private int totalFileNum; + + /** 总文件大小 */ + @Min(value = 1, message = "总文件大小必须大于0") + private long totalSize; + + /** 目标子目录前缀,例如 "images/",为空表示数据集根目录 */ + @ValidPath(maxLength = 500) + private String prefix; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetController.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetController.java index 364921438..5bf8800fe 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetController.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetController.java @@ -9,11 +9,13 @@ import com.datamate.datamanagement.interfaces.converter.DatasetConverter; import com.datamate.datamanagement.interfaces.dto.*; import jakarta.validation.Valid; +import jakarta.validation.constraints.Pattern; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springaicommunity.mcp.annotation.McpTool; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; +import org.springframework.validation.annotation.Validated; import org.springframework.web.bind.annotation.*; import java.util.Map; @@ -25,6 +27,7 @@ @RestController @RequiredArgsConstructor @RequestMapping("/data-management/datasets") +@Validated public class DatasetController { private final DatasetApplicationService datasetApplicationService; @@ -60,7 +63,10 @@ public DatasetResponse createDataset(@RequestBody @Valid CreateDatasetRequest cr * @return 数据集响应 */ @GetMapping("/{datasetId}") - public DatasetResponse getDatasetById(@PathVariable("datasetId") String datasetId) { + public DatasetResponse getDatasetById( + @PathVariable("datasetId") + @Pattern(regexp = "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$", message = "数据集ID格式不正确,应为UUID格式") + String datasetId) { DatasetResponse dataset = DatasetConverter.INSTANCE.convertToResponse(datasetApplicationService.getDataset(datasetId)); dataset.setPvcName(datasetApplicationService.getDatasetPvcName()); return dataset; @@ -74,14 +80,20 @@ public DatasetResponse getDatasetById(@PathVariable("datasetId") String datasetI * @return 更新后的数据集响应 */ @PutMapping("/{datasetId}") - public DatasetResponse updateDataset(@PathVariable("datasetId") String datasetId, - @RequestBody UpdateDatasetRequest updateDatasetRequest) { + public DatasetResponse updateDataset( + @PathVariable("datasetId") + @Pattern(regexp = "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$", message = "数据集ID格式不正确,应为UUID格式") + String datasetId, + @RequestBody UpdateDatasetRequest updateDatasetRequest) { Dataset dataset = datasetApplicationService.updateDataset(datasetId, updateDatasetRequest); return DatasetConverter.INSTANCE.convertToResponse(dataset); } @GetMapping("/{datasetId}/lineage") - public DatasetLineage getDatasetLineage(@PathVariable("datasetId") String datasetId) { + public DatasetLineage getDatasetLineage( + @PathVariable("datasetId") + @Pattern(regexp = "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$", message = "数据集ID格式不正确,应为UUID格式") + String datasetId) { return datasetApplicationService.getDatasetLineage(datasetId); } @@ -91,13 +103,18 @@ public DatasetLineage getDatasetLineage(@PathVariable("datasetId") String datase * @param datasetId 数据集ID */ @DeleteMapping("/{datasetId}") - public void deleteDataset(@PathVariable("datasetId") String datasetId) { + public void deleteDataset( + @PathVariable("datasetId") + @Pattern(regexp = "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$", message = "数据集ID格式不正确,应为UUID格式") + String datasetId) { datasetApplicationService.deleteDataset(datasetId); } @GetMapping("/{datasetId}/statistics") public ResponseEntity> getDatasetStatistics( - @PathVariable("datasetId") String datasetId) { + @PathVariable("datasetId") + @Pattern(regexp = "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$", message = "数据集ID格式不正确,应为UUID格式") + String datasetId) { try { Map stats = datasetApplicationService.getDatasetStatistics(datasetId); diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidFileName.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidFileName.java new file mode 100644 index 000000000..ad303acc4 --- /dev/null +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidFileName.java @@ -0,0 +1,26 @@ +package com.datamate.datamanagement.interfaces.validation; + +import jakarta.validation.Constraint; +import jakarta.validation.Payload; + +import java.lang.annotation.*; + +/** + * 文件名校验注解 + * 验证文件名不包含非法字符 + * + * @author DataMate + * @since 2026/02/11 + */ +@Documented +@Constraint(validatedBy = ValidFileNameValidator.class) +@Target({ElementType.FIELD, ElementType.PARAMETER}) +@Retention(RetentionPolicy.RUNTIME) +public @interface ValidFileName { + + String message() default "文件名包含非法字符"; + + Class[] groups() default {}; + + Class[] payload() default {}; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidFileNameValidator.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidFileNameValidator.java new file mode 100644 index 000000000..ae1b27486 --- /dev/null +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidFileNameValidator.java @@ -0,0 +1,43 @@ +package com.datamate.datamanagement.interfaces.validation; + +import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode; +import jakarta.validation.ConstraintValidator; +import jakarta.validation.ConstraintValidatorContext; + +import java.util.regex.Pattern; + +/** + * 文件名校验器 + * + * @author DataMate + * @since 2026/02/11 + */ +public class ValidFileNameValidator implements ConstraintValidator { + + /** + * 文件名正则表达式 + * 不允许包含特殊字符: / \ : * ? " < > | \0 + * 允许字母、数字、中文、常见符号(- _ . space) + */ + private static final Pattern FILE_NAME_PATTERN = Pattern.compile( + "^[^/\\\\:*?\"<>|\\x00]+$" + ); + + @Override + public boolean isValid(String value, ConstraintValidatorContext context) { + if (value == null || value.isEmpty()) { + return true; // 空值由 @NotBlank 等其他注解处理 + } + + boolean isValid = FILE_NAME_PATTERN.matcher(value).matches(); + + if (!isValid) { + context.disableDefaultConstraintViolation(); + context.buildConstraintViolationWithTemplate( + DataManagementErrorCode.FILE_NAME_INVALID.getMessage() + ).addConstraintViolation(); + } + + return isValid; + } +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidHexColor.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidHexColor.java new file mode 100644 index 000000000..a978f3909 --- /dev/null +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidHexColor.java @@ -0,0 +1,26 @@ +package com.datamate.datamanagement.interfaces.validation; + +import jakarta.validation.Constraint; +import jakarta.validation.Payload; + +import java.lang.annotation.*; + +/** + * 十六进制颜色格式校验注解 + * 验证字符串是否符合十六进制颜色格式 (#RRGGBB 或 #RGB) + * + * @author DataMate + * @since 2026/02/11 + */ +@Documented +@Constraint(validatedBy = ValidHexColorValidator.class) +@Target({ElementType.FIELD, ElementType.PARAMETER}) +@Retention(RetentionPolicy.RUNTIME) +public @interface ValidHexColor { + + String message() default "标签颜色格式不正确,应为十六进制颜色代码"; + + Class[] groups() default {}; + + Class[] payload() default {}; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidHexColorValidator.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidHexColorValidator.java new file mode 100644 index 000000000..f071b7e28 --- /dev/null +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidHexColorValidator.java @@ -0,0 +1,43 @@ +package com.datamate.datamanagement.interfaces.validation; + +import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode; +import com.datamate.common.infrastructure.exception.ErrorCode; +import jakarta.validation.ConstraintValidator; +import jakarta.validation.ConstraintValidatorContext; +import org.hibernate.validator.internal.constraintvalidators.bv.size.SizeValidatorForCharSequence; + +import java.util.regex.Pattern; + +/** + * 十六进制颜色格式校验器 + * + * @author DataMate + * @since 2026/02/11 + */ +public class ValidHexColorValidator implements ConstraintValidator { + + /** + * 十六进制颜色正则表达式 + * 支持 #RGB 和 #RRGGBB 格式 + */ + private static final Pattern HEX_COLOR_PATTERN = Pattern.compile("^#[0-9a-fA-F]{6}$"); + + @Override + public boolean isValid(String value, ConstraintValidatorContext context) { + if (value == null) { + return true; // null 值由 @NotBlank 等其他注解处理 + } + + boolean isValid = HEX_COLOR_PATTERN.matcher(value).matches(); + + if (!isValid) { + // 自定义错误消息和错误码 + context.disableDefaultConstraintViolation(); + context.buildConstraintViolationWithTemplate( + DataManagementErrorCode.TAG_COLOR_INVALID.getMessage() + ).addConstraintViolation(); + } + + return isValid; + } +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidPath.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidPath.java new file mode 100644 index 000000000..8eaee9449 --- /dev/null +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidPath.java @@ -0,0 +1,31 @@ +package com.datamate.datamanagement.interfaces.validation; + +import jakarta.validation.Constraint; +import jakarta.validation.Payload; + +import java.lang.annotation.*; + +/** + * 路径格式校验注解 + * 验证路径格式和长度限制 + * + * @author DataMate + * @since 2026/02/11 + */ +@Documented +@Constraint(validatedBy = ValidPathValidator.class) +@Target({ElementType.FIELD, ElementType.PARAMETER}) +@Retention(RetentionPolicy.RUNTIME) +public @interface ValidPath { + + String message() default "路径格式不正确"; + + Class[] groups() default {}; + + Class[] payload() default {}; + + /** + * 最大路径长度,默认500 + */ + int maxLength() default 500; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidPathValidator.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidPathValidator.java new file mode 100644 index 000000000..69bb297b9 --- /dev/null +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidPathValidator.java @@ -0,0 +1,66 @@ +package com.datamate.datamanagement.interfaces.validation; + +import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode; +import jakarta.validation.ConstraintValidator; +import jakarta.validation.ConstraintValidatorContext; + +import java.util.regex.Pattern; + +/** + * 路径格式校验器 + * + * @author DataMate + * @since 2026/02/11 + */ +public class ValidPathValidator implements ConstraintValidator { + + /** + * 路径正则表达式 + * 不允许以点开头(隐藏文件/目录) + * 不允许包含特殊字符如 \0, <, >, :, ", |, ?, * + */ + private static final Pattern PATH_PATTERN = Pattern.compile( + "^[^<>:\"|?*\\x00]+" // 不以点开头,不包含特殊字符 + ); + + private int maxLength = 500; + + @Override + public void initialize(ValidPath constraintAnnotation) { + this.maxLength = constraintAnnotation.maxLength(); + } + + @Override + public boolean isValid(String value, ConstraintValidatorContext context) { + if (value == null || value.isEmpty()) { + return true; // 空值由其他注解处理 + } + + // 检查长度 + if (value.length() > maxLength) { + context.disableDefaultConstraintViolation(); + context.buildConstraintViolationWithTemplate( + DataManagementErrorCode.PATH_TOO_LONG.getMessage() + ).addConstraintViolation(); + return false; + } + + // 检查是否以点开头 + if (value.startsWith(".")) { + context.disableDefaultConstraintViolation(); + context.buildConstraintViolationWithTemplate( + DataManagementErrorCode.PREFIX_INVALID.getMessage() + ).addConstraintViolation(); + return false; + } + + // 检查是否包含非法字符 + if (!PATH_PATTERN.matcher(value).matches()) { + context.disableDefaultConstraintViolation(); + context.buildConstraintViolationWithTemplate("路径包含非法字符").addConstraintViolation(); + return false; + } + + return true; + } +} diff --git a/frontend/src/pages/DataManagement/Create/CreateDataset.tsx b/frontend/src/pages/DataManagement/Create/CreateDataset.tsx index 2416dede7..7179fe9df 100644 --- a/frontend/src/pages/DataManagement/Create/CreateDataset.tsx +++ b/frontend/src/pages/DataManagement/Create/CreateDataset.tsx @@ -34,7 +34,7 @@ export default function DatasetCreate() { navigate("/data/management/detail/" + data.id); } catch (error) { console.error(error); - message.error(t("dataManagement.messages.createFailed")); + message.error(error.message); return; } };