From df0537615d28dea6bebf174cb7dd4604c1176599 Mon Sep 17 00:00:00 2001 From: uname <2986773479@qq.com> Date: Tue, 24 Feb 2026 14:21:11 +0800 Subject: [PATCH 1/2] fix: fix paging issue --- .../DatasetApplicationService.java | 2 +- .../DatasetFileApplicationService.java | 1850 ++++++++--------- .../application/KnowledgeBaseService.java | 6 +- .../common/interfaces/PagedResponse.java | 26 +- .../ModelConfigApplicationService.java | 2 +- 5 files changed, 933 insertions(+), 953 deletions(-) diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetApplicationService.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetApplicationService.java index 924c14ab7..c8f30720d 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetApplicationService.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetApplicationService.java @@ -188,7 +188,7 @@ public PagedResponse getDatasets(DatasetPagingQuery query) { String datasetPvcName = getDatasetPvcName(); List datasetResponses = DatasetConverter.INSTANCE.convertToResponse(page.getRecords()); datasetResponses.forEach(dataset -> dataset.setPvcName(datasetPvcName)); - return PagedResponse.of(datasetResponses, page.getCurrent(), page.getTotal(), page.getPages()); + return PagedResponse.of(page.getCurrent(), page.getSize(), page.getTotal(), page.getPages(), datasetResponses); } /** diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java index be4f3b153..99d237593 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java @@ -1,925 +1,925 @@ -package com.datamate.datamanagement.application; - -import com.baomidou.mybatisplus.core.metadata.IPage; -import com.baomidou.mybatisplus.extension.plugins.pagination.Page; -import com.datamate.common.domain.model.ChunkUploadPreRequest; -import com.datamate.common.domain.model.FileUploadResult; -import com.datamate.common.domain.service.FileService; -import com.datamate.common.domain.utils.AnalyzerUtils; -import com.datamate.common.domain.utils.ArchiveAnalyzer; -import com.datamate.common.domain.utils.CommonUtils; -import com.datamate.common.infrastructure.exception.BusinessAssert; -import com.datamate.common.infrastructure.exception.BusinessException; -import com.datamate.common.infrastructure.exception.CommonErrorCode; -import com.datamate.common.infrastructure.exception.SystemErrorCode; -import com.datamate.common.interfaces.PagedResponse; -import com.datamate.common.interfaces.PagingQuery; -import com.datamate.datamanagement.common.enums.DuplicateMethod; -import com.datamate.datamanagement.domain.contants.DatasetConstant; -import com.datamate.datamanagement.domain.model.dataset.Dataset; -import com.datamate.datamanagement.domain.model.dataset.DatasetFile; -import com.datamate.datamanagement.domain.model.dataset.DatasetFileUploadCheckInfo; -import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode; -import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository; -import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository; -import com.datamate.datamanagement.interfaces.converter.DatasetConverter; -import com.datamate.datamanagement.interfaces.dto.AddFilesRequest; -import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest; -import com.datamate.datamanagement.interfaces.dto.UploadFileRequest; -import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest; -import com.datamate.datamanagement.interfaces.dto.RenameFileRequest; -import com.datamate.datamanagement.interfaces.dto.RenameDirectoryRequest; -import com.fasterxml.jackson.core.JsonProcessingException; -import com.fasterxml.jackson.databind.ObjectMapper; -import jakarta.servlet.http.HttpServletResponse; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; -import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; -import org.apache.commons.io.IOUtils; -import org.apache.commons.lang3.StringUtils; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.core.io.Resource; -import org.springframework.core.io.UrlResource; -import org.springframework.http.HttpHeaders; -import org.springframework.stereotype.Service; -import org.springframework.transaction.annotation.Transactional; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.net.MalformedURLException; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.attribute.BasicFileAttributes; -import java.time.LocalDateTime; -import java.time.ZoneId; -import java.time.format.DateTimeFormatter; -import java.util.*; -import java.util.function.Function; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -/** - * 数据集文件应用服务 - */ -@Slf4j -@Service -@Transactional -public class DatasetFileApplicationService { - - private final DatasetFileRepository datasetFileRepository; - private final DatasetRepository datasetRepository; - private final FileService fileService; - - @Value("${datamate.data-management.base-path:/dataset}") - private String datasetBasePath; - - @Value("${datamate.data-management.file.duplicate:COVER}") - private DuplicateMethod duplicateMethod; - - @Autowired - public DatasetFileApplicationService(DatasetFileRepository datasetFileRepository, - DatasetRepository datasetRepository, FileService fileService) { - this.datasetFileRepository = datasetFileRepository; - this.datasetRepository = datasetRepository; - this.fileService = fileService; - } - - /** - * 获取数据集文件列表 - */ - @Transactional(readOnly = true) - public PagedResponse getDatasetFiles(String datasetId, String fileType, String status, String name, PagingQuery pagingQuery) { - IPage page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize()); - IPage files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, page); - return PagedResponse.of(files); - } - - /** - * 获取数据集文件列表 - */ - @Transactional(readOnly = true) - public PagedResponse getDatasetFilesWithDirectory(String datasetId, String prefix, PagingQuery pagingQuery) { - Dataset dataset = datasetRepository.getById(datasetId); - int page = Math.max(pagingQuery.getPage(), 1); - int size = pagingQuery.getSize() == null || pagingQuery.getSize() < 0 ? 20 : pagingQuery.getSize(); - if (dataset == null) { - return PagedResponse.of(new Page<>(page, size)); - } - String datasetPath = dataset.getPath(); - Path queryPath = Path.of(dataset.getPath() + File.separator + prefix); - Map datasetFilesMap = datasetFileRepository.findAllByDatasetId(datasetId) - .stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity())); - try (Stream pathStream = Files.list(queryPath)) { - List allFiles = pathStream - .filter(path -> path.toString().startsWith(datasetPath)) - .sorted(Comparator - .comparing((Path path) -> !Files.isDirectory(path)) - .thenComparing(path -> path.getFileName().toString())) - .collect(Collectors.toList()); - - // 计算分页 - int total = allFiles.size(); - int totalPages = (int) Math.ceil((double) total / size); - - // 获取当前页数据 - int fromIndex = (page - 1) * size; - fromIndex = Math.max(fromIndex, 0); - int toIndex = Math.min(fromIndex + size, total); - - List pageData = new ArrayList<>(); - if (fromIndex < total) { - pageData = allFiles.subList(fromIndex, toIndex); - } - List datasetFiles = pageData.stream().map(path -> getDatasetFile(path, datasetFilesMap)).toList(); - - return new PagedResponse<>(page, size, total, totalPages, datasetFiles); - } catch (IOException e) { - log.warn("list dataset path error"); - return PagedResponse.of(new Page<>(page, size)); - } - } - - private DatasetFile getDatasetFile(Path path, Map datasetFilesMap) { - DatasetFile datasetFile = new DatasetFile(); - LocalDateTime localDateTime = LocalDateTime.now(); - try { - localDateTime = Files.getLastModifiedTime(path).toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); - } catch (IOException e) { - log.error("get last modified time error", e); - } - datasetFile.setFileName(path.getFileName().toString()); - datasetFile.setUploadTime(localDateTime); - - // 目录与普通文件区分处理 - if (Files.isDirectory(path)) { - datasetFile.setId("directory-" + datasetFile.getFileName()); - datasetFile.setDirectory(true); - - // 统计目录下文件数量和总大小 - try { - long fileCount; - long totalSize; - - try (Stream walk = Files.walk(path)) { - fileCount = walk.filter(Files::isRegularFile).count(); - } - - try (Stream walk = Files.walk(path)) { - totalSize = walk - .filter(Files::isRegularFile) - .mapToLong(p -> { - try { - return Files.size(p); - } catch (IOException e) { - log.error("get file size error", e); - return 0L; - } - }) - .sum(); - } - - datasetFile.setFileCount(fileCount); - datasetFile.setFileSize(totalSize); - } catch (IOException e) { - log.error("stat directory info error", e); - } - } else { - DatasetFile exist = datasetFilesMap.get(path.toString()); - if (exist == null) { - datasetFile.setId(datasetFile.getFileName()); - datasetFile.setFileSize(path.toFile().length()); - } else { - datasetFile = exist; - } - } - return datasetFile; - } - - /** - * 获取文件详情 - */ - @Transactional(readOnly = true) - public DatasetFile getDatasetFile(Dataset dataset, String fileId, String prefix) { - prefix = StringUtils.isBlank(prefix) ? "" : prefix; - if (dataset != null && !CommonUtils.isUUID(fileId) && !fileId.startsWith(".") && !prefix.startsWith(".")) { - DatasetFile file = new DatasetFile(); - file.setId(fileId); - file.setFileName(fileId); - file.setDatasetId(dataset.getId()); - file.setFileSize(0L); - file.setFilePath(dataset.getPath() + File.separator + prefix + fileId); - return file; - } - DatasetFile file = datasetFileRepository.getById(fileId); - if (file == null || dataset == null) { - throw new IllegalArgumentException("File not found: " + fileId); - } - if (!file.getDatasetId().equals(dataset.getId())) { - throw new IllegalArgumentException("File does not belong to the specified dataset"); - } - return file; - } - - /** - * 删除文件 - */ - @Transactional - public void deleteDatasetFile(String datasetId, String fileId, String prefix) { - Dataset dataset = datasetRepository.getById(datasetId); - DatasetFile file = getDatasetFile(dataset, fileId, prefix); - dataset.setFiles(new ArrayList<>(Collections.singleton(file))); - datasetFileRepository.removeById(fileId); - if (CommonUtils.isUUID(fileId)) { - dataset.removeFile(file); - } - datasetRepository.updateById(dataset); - // 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录 - if (file.getFilePath().startsWith(dataset.getPath())) { - try { - Path filePath = Paths.get(file.getFilePath()); - Files.deleteIfExists(filePath); - } catch (IOException ex) { - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - } - } - - /** - * 下载文件 - */ - @Transactional(readOnly = true) - public Resource downloadFile(DatasetFile file) { - try { - Path filePath = Paths.get(file.getFilePath()).normalize(); - log.info("start download file {}", file.getFilePath()); - Resource resource = new UrlResource(filePath.toUri()); - if (resource.exists()) { - return resource; - } else { - throw new RuntimeException("File not found: " + file.getFileName()); - } - } catch (MalformedURLException ex) { - throw new RuntimeException("File not found: " + file.getFileName(), ex); - } - } - - /** - * 下载数据集所有文件为 ZIP - */ - @Transactional(readOnly = true) - public void downloadDatasetFileAsZip(String datasetId, HttpServletResponse response) { - Dataset dataset = datasetRepository.getById(datasetId); - if (Objects.isNull(dataset)) { - throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); - } - String datasetPath = dataset.getPath(); - Path downloadPath = Paths.get(datasetPath).normalize(); - - // 检查路径是否存在 - if (!Files.exists(downloadPath) || !Files.isDirectory(downloadPath)) { - throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); - } - - response.setContentType("application/zip"); - String zipName = String.format("dataset_%s_%s.zip", - dataset.getName() != null ? dataset.getName().replaceAll("[^a-zA-Z0-9_-]", "_") : "dataset", - LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss"))); - response.setHeader(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"" + zipName + "\""); - - try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(response.getOutputStream())) { - try (Stream pathStream = Files.walk(downloadPath)) { - pathStream - .filter(path -> { - // 确保路径在数据集目录内,防止路径遍历攻击 - Path normalized = path.normalize(); - return normalized.startsWith(downloadPath); - }) - .forEach(path -> { - try { - addToZipFile(path, downloadPath, zos); - } catch (IOException e) { - log.error("Failed to add file to zip: {}", path, e); - } - }); - } - zos.finish(); - } catch (IOException e) { - log.error("Failed to download dataset files as zip for dataset {}", datasetId, e); - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - } - - private void addToZipFile(Path path, Path basePath, ZipArchiveOutputStream zos) throws IOException { - String entryName = basePath.relativize(path) - .toString() - .replace(File.separator, "/"); - - // 处理目录 - if (Files.isDirectory(path)) { - if (!entryName.isEmpty()) { - entryName += "/"; - ZipArchiveEntry dirEntry = new ZipArchiveEntry(entryName); - zos.putArchiveEntry(dirEntry); - zos.closeArchiveEntry(); - } - } else { - // 处理文件 - ZipArchiveEntry fileEntry = new ZipArchiveEntry(path.toFile(), entryName); - - // 设置更多属性 - BasicFileAttributes attrs = Files.readAttributes(path, BasicFileAttributes.class); - fileEntry.setSize(attrs.size()); - fileEntry.setLastModifiedTime(attrs.lastModifiedTime()); - - zos.putArchiveEntry(fileEntry); - - try (InputStream is = Files.newInputStream(path)) { - IOUtils.copy(is, zos); - } - zos.closeArchiveEntry(); - } - } - - /** - * 预上传 - * - * @param chunkUploadRequest 上传请求 - * @param datasetId 数据集id - * @return 请求id - */ - @Transactional - public String preUpload(UploadFilesPreRequest chunkUploadRequest, String datasetId) { - if (Objects.isNull(datasetRepository.getById(datasetId))) { - throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); - } - - // 构建上传路径,如果有 prefix 则追加到路径中 - String prefix = Optional.ofNullable(chunkUploadRequest.getPrefix()).orElse("").trim(); - prefix = prefix.replace("\\", "/"); - while (prefix.startsWith("/")) { - prefix = prefix.substring(1); - } - - String uploadPath = datasetBasePath + File.separator + datasetId; - if (!prefix.isEmpty()) { - uploadPath = uploadPath + File.separator + prefix.replace("/", File.separator); - } - - ChunkUploadPreRequest request = ChunkUploadPreRequest.builder().build(); - request.setUploadPath(uploadPath); - request.setTotalFileNum(chunkUploadRequest.getTotalFileNum()); - request.setServiceId(DatasetConstant.SERVICE_ID); - DatasetFileUploadCheckInfo checkInfo = new DatasetFileUploadCheckInfo(); - checkInfo.setDatasetId(datasetId); - checkInfo.setHasArchive(chunkUploadRequest.isHasArchive()); - checkInfo.setPrefix(prefix); - try { - ObjectMapper objectMapper = new ObjectMapper(); - String checkInfoJson = objectMapper.writeValueAsString(checkInfo); - request.setCheckInfo(checkInfoJson); - } catch (JsonProcessingException e) { - log.warn("Failed to serialize checkInfo to JSON", e); - } - return fileService.preUpload(request); - } - - /** - * 切片上传 - * - * @param uploadFileRequest 上传请求 - */ - @Transactional - public void chunkUpload(String datasetId, UploadFileRequest uploadFileRequest) { - FileUploadResult uploadResult = fileService.chunkUpload(DatasetConverter.INSTANCE.toChunkUploadRequest(uploadFileRequest)); - saveFileInfoToDb(uploadResult, datasetId); - } - - private void saveFileInfoToDb(FileUploadResult fileUploadResult, String datasetId) { - if (Objects.isNull(fileUploadResult.getSavedFile())) { - // 文件切片上传没有完成 - return; - } - DatasetFileUploadCheckInfo checkInfo; - try { - ObjectMapper objectMapper = new ObjectMapper(); - checkInfo = objectMapper.readValue(fileUploadResult.getCheckInfo(), DatasetFileUploadCheckInfo.class); - if (!Objects.equals(checkInfo.getDatasetId(), datasetId)) { - throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); - } - } catch (IllegalArgumentException | JsonProcessingException e) { - log.warn("Failed to convert checkInfo to DatasetFileUploadCheckInfo", e); - throw BusinessException.of(CommonErrorCode.PRE_UPLOAD_REQUEST_NOT_EXIST); - } - List files; - if (checkInfo.isHasArchive() && AnalyzerUtils.isPackage(fileUploadResult.getSavedFile().getPath())) { - files = ArchiveAnalyzer.process(fileUploadResult); - } else { - files = Collections.singletonList(fileUploadResult); - } - addFileToDataset(datasetId, files); - } - - private void addFileToDataset(String datasetId, List unpacked) { - Dataset dataset = datasetRepository.getById(datasetId); - dataset.setFiles(datasetFileRepository.findAllByDatasetId(datasetId)); - for (FileUploadResult file : unpacked) { - File savedFile = file.getSavedFile(); - LocalDateTime currentTime = LocalDateTime.now(); - // 统一 fileName:无论是否通过文件夹/压缩包上传,都只保留纯文件名 - String originalFileName = file.getFileName(); - String baseFileName = originalFileName; - if (originalFileName != null) { - String normalized = originalFileName.replace("\\", "/"); - int lastSlash = normalized.lastIndexOf('/'); - if (lastSlash >= 0 && lastSlash + 1 < normalized.length()) { - baseFileName = normalized.substring(lastSlash + 1); - } - } - DatasetFile datasetFile = DatasetFile.builder() - .id(UUID.randomUUID().toString()) - .datasetId(datasetId) - .fileSize(savedFile.length()) - .uploadTime(currentTime) - .lastAccessTime(currentTime) - .fileName(baseFileName) - .filePath(savedFile.getPath()) - .fileType(AnalyzerUtils.getExtension(file.getFileName())) - .build(); - setDatasetFileId(datasetFile, dataset); - datasetFileRepository.saveOrUpdate(datasetFile); - dataset.addFile(datasetFile); - } - dataset.active(); - datasetRepository.updateById(dataset); - } - - /** - * 在数据集下创建子目录 - */ - @Transactional - public void createDirectory(String datasetId, CreateDirectoryRequest req) { - Dataset dataset = datasetRepository.getById(datasetId); - if (dataset == null) { - throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); - } - String datasetPath = dataset.getPath(); - String parentPrefix = Optional.ofNullable(req.getParentPrefix()).orElse("").trim(); - parentPrefix = parentPrefix.replace("\\", "/"); - while (parentPrefix.startsWith("/")) { - parentPrefix = parentPrefix.substring(1); - } - - String directoryName = Optional.ofNullable(req.getDirectoryName()).orElse("").trim(); - if (directoryName.isEmpty()) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - if (directoryName.contains("..") || directoryName.contains("/") || directoryName.contains("\\")) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - - Path basePath = Paths.get(datasetPath); - Path targetPath = parentPrefix.isEmpty() - ? basePath.resolve(directoryName) - : basePath.resolve(parentPrefix).resolve(directoryName); - - Path normalized = targetPath.normalize(); - if (!normalized.startsWith(basePath)) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - - try { - Files.createDirectories(normalized); - } catch (IOException e) { - log.error("Failed to create directory {} for dataset {}", normalized, datasetId, e); - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - } - - /** - * 下载目录为 ZIP 文件 - */ - @Transactional(readOnly = true) - public void downloadDirectory(String datasetId, String prefix, HttpServletResponse response) { - Dataset dataset = datasetRepository.getById(datasetId); - if (dataset == null) { - throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); - } - - String datasetPath = dataset.getPath(); - prefix = Optional.ofNullable(prefix).orElse("").trim(); - prefix = prefix.replace("\\", "/"); - while (prefix.startsWith("/")) { - prefix = prefix.substring(1); - } - while (prefix.endsWith("/")) { - prefix = prefix.substring(0, prefix.length() - 1); - } - - Path basePath = Paths.get(datasetPath); - Path targetPath = prefix.isEmpty() ? basePath : basePath.resolve(prefix); - Path normalized = targetPath.normalize(); - - if (!normalized.startsWith(basePath)) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - - if (!Files.exists(normalized) || !Files.isDirectory(normalized)) { - throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); - } - - String zipFileName = prefix.isEmpty() ? dataset.getName() : prefix.replace("/", "_"); - zipFileName = zipFileName + "_" + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")) + ".zip"; - - try { - response.setContentType("application/zip"); - response.setHeader(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"" + zipFileName + "\""); - - try (ZipArchiveOutputStream zipOut = new ZipArchiveOutputStream(response.getOutputStream())) { - zipDirectory(normalized, normalized, zipOut); - zipOut.finish(); - } - } catch (IOException e) { - log.error("Failed to download directory {} for dataset {}", normalized, datasetId, e); - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - } - - /** - * 递归压缩目录 - */ - private void zipDirectory(Path sourceDir, Path basePath, ZipArchiveOutputStream zipOut) throws IOException { - try (Stream paths = Files.walk(sourceDir)) { - paths.filter(path -> !Files.isDirectory(path)) - .forEach(path -> { - try { - Path relativePath = basePath.relativize(path); - ZipArchiveEntry zipEntry = new ZipArchiveEntry(relativePath.toString()); - zipOut.putArchiveEntry(zipEntry); - try (InputStream fis = Files.newInputStream(path)) { - IOUtils.copy(fis, zipOut); - } - zipOut.closeArchiveEntry(); - } catch (IOException e) { - log.error("Failed to add file to zip: {}", path, e); - } - }); - } - } - - /** - * 删除目录及其所有内容 - */ - @Transactional - public void deleteDirectory(String datasetId, String prefix) { - Dataset dataset = datasetRepository.getById(datasetId); - if (dataset == null) { - throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); - } - - prefix = Optional.ofNullable(prefix).orElse("").trim(); - prefix = prefix.replace("\\", "/"); - while (prefix.startsWith("/")) { - prefix = prefix.substring(1); - } - while (prefix.endsWith("/")) { - prefix = prefix.substring(0, prefix.length() - 1); - } - - if (prefix.isEmpty()) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - - String datasetPath = dataset.getPath(); - Path basePath = Paths.get(datasetPath); - Path targetPath = basePath.resolve(prefix); - Path normalized = targetPath.normalize(); - - if (!normalized.startsWith(basePath)) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - - if (!Files.exists(normalized) || !Files.isDirectory(normalized)) { - throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); - } - - // 删除数据库中该目录下的所有文件记录(基于数据集内相对路径判断) - String datasetPathNorm = datasetPath.replace("\\", "/"); - String logicalPrefix = prefix; // 已经去掉首尾斜杠 - List filesToDelete = datasetFileRepository.findAllByDatasetId(datasetId).stream() - .filter(file -> { - if (file.getFilePath() == null) { - return false; - } - String filePath = file.getFilePath().replace("\\", "/"); - if (!filePath.startsWith(datasetPathNorm)) { - return false; - } - String relative = filePath.substring(datasetPathNorm.length()); - while (relative.startsWith("/")) { - relative = relative.substring(1); - } - return relative.equals(logicalPrefix) || relative.startsWith(logicalPrefix + "/"); - }) - .collect(Collectors.toList()); - - for (DatasetFile file : filesToDelete) { - datasetFileRepository.removeById(file.getId()); - } - - // 删除文件系统中的目录 - try { - deleteDirectoryRecursively(normalized); - } catch (IOException e) { - log.error("Failed to delete directory {} for dataset {}", normalized, datasetId, e); - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - - // 更新数据集 - dataset.setFiles(filesToDelete); - for (DatasetFile file : filesToDelete) { - dataset.removeFile(file); - } - datasetRepository.updateById(dataset); - } - - /** - * 重命名数据集文件(仅允许修改主名称,文件后缀保持不变) - */ - @Transactional - public void renameFile(String datasetId, String fileId, RenameFileRequest request) { - Dataset dataset = datasetRepository.getById(datasetId); - if (dataset == null) { - throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); - } - - DatasetFile file = getDatasetFile(dataset, fileId, null); - String newName = Optional.ofNullable(request.getNewName()).orElse("").trim(); - if (newName.isEmpty()) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - - String originalFileName = file.getFileName(); - String extension = ""; - int dotIndex = originalFileName.lastIndexOf('.'); - if (dotIndex > 0 && dotIndex < originalFileName.length() - 1) { - extension = originalFileName.substring(dotIndex); // 包含点号,如 .jpg - } - - // 只接收主名称,后缀始终使用原始后缀 - String finalFileName = newName + extension; - - Path oldPath = Paths.get(file.getFilePath()).normalize(); - Path basePath = Paths.get(dataset.getPath()).normalize(); - - // 仅允许重命名数据集自身目录下的文件 - if (!oldPath.startsWith(basePath)) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - - Path parentDir = oldPath.getParent(); - if (parentDir == null) { - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - Path newPath = parentDir.resolve(finalFileName).normalize(); - - if (!newPath.startsWith(basePath)) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - - if (Files.exists(newPath)) { - throw BusinessException.of(DataManagementErrorCode.DATASET_FILE_ALREADY_EXISTS); - } - - try { - Files.move(oldPath, newPath); - } catch (IOException e) { - log.error("Failed to rename file from {} to {}", oldPath, newPath, e); - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - - file.setFileName(finalFileName); - file.setFilePath(newPath.toString()); - file.setFileType(AnalyzerUtils.getExtension(finalFileName)); - file.setLastAccessTime(LocalDateTime.now()); - datasetFileRepository.updateById(file); - } - - /** - * 重命名目录 - */ - @Transactional - public void renameDirectory(String datasetId, RenameDirectoryRequest request) { - Dataset dataset = datasetRepository.getById(datasetId); - if (dataset == null) { - throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); - } - - String prefix = Optional.ofNullable(request.getPrefix()).orElse("").trim(); - prefix = prefix.replace("\\", "/"); - while (prefix.startsWith("/")) { - prefix = prefix.substring(1); - } - while (prefix.endsWith("/")) { - prefix = prefix.substring(0, prefix.length() - 1); - } - - if (prefix.isEmpty()) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - - String newName = Optional.ofNullable(request.getNewName()).orElse("").trim(); - if (newName.isEmpty() || newName.contains("..") || newName.contains("/") || newName.contains("\\")) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - - String datasetPath = dataset.getPath(); - Path basePath = Paths.get(datasetPath).normalize(); - Path oldDir = basePath.resolve(prefix).normalize(); - - if (!oldDir.startsWith(basePath)) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - - if (!Files.exists(oldDir) || !Files.isDirectory(oldDir)) { - throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); - } - - Path parentDir = oldDir.getParent(); - if (parentDir == null) { - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - Path newDir = parentDir.resolve(newName).normalize(); - - if (!newDir.startsWith(basePath)) { - throw BusinessException.of(CommonErrorCode.PARAM_ERROR); - } - - if (Files.exists(newDir)) { - throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); - } - - try { - Files.move(oldDir, newDir); - } catch (IOException e) { - log.error("Failed to rename directory from {} to {}", oldDir, newDir, e); - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - - // 同步更新数据库中该目录下所有文件的 filePath - String oldDirPath = oldDir.toString().replace("\\", "/"); - String newDirPath = newDir.toString().replace("\\", "/"); - - List allFiles = datasetFileRepository.findAllByDatasetId(datasetId); - for (DatasetFile file : allFiles) { - String filePath = Optional.ofNullable(file.getFilePath()).orElse("").replace("\\", "/"); - if (filePath.startsWith(oldDirPath + "/")) { - String relative = filePath.substring(oldDirPath.length() + 1); - Path updatedPath = Paths.get(newDirPath).resolve(relative); - file.setFilePath(updatedPath.toString()); - file.setLastAccessTime(LocalDateTime.now()); - datasetFileRepository.updateById(file); - } - } - } - - /** - * 递归删除目录 - */ - private void deleteDirectoryRecursively(Path directory) throws IOException { - try (Stream paths = Files.walk(directory)) { - paths.sorted(Comparator.reverseOrder()) - .forEach(path -> { - try { - Files.delete(path); - } catch (IOException e) { - log.error("Failed to delete: {}", path, e); - } - }); - } - } - - /** - * 为数据集文件设置文件id - * - * @param datasetFile 要设置id的文件 - * @param dataset 数据集(包含文件列表) - */ - private void setDatasetFileId(DatasetFile datasetFile, Dataset dataset) { - Map existDatasetFilMap = dataset.getFiles().stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity())); - DatasetFile existDatasetFile = existDatasetFilMap.get(datasetFile.getFilePath()); - if (Objects.isNull(existDatasetFile)) { - return; - } - if (duplicateMethod == DuplicateMethod.ERROR) { - log.error("file {} already exists in dataset {}", datasetFile.getFileName(), datasetFile.getDatasetId()); - throw BusinessException.of(DataManagementErrorCode.DATASET_FILE_ALREADY_EXISTS); - } - if (duplicateMethod == DuplicateMethod.COVER) { - dataset.removeFile(existDatasetFile); - datasetFile.setId(existDatasetFile.getId()); - } - } - - /** - * 添加文件到数据集(仅创建数据库记录,不执行文件系统操作) - * - * @param datasetId 数据集id - * @param req 添加文件请求 - * @return 添加的文件列表 - */ - @Transactional - public List addFilesToDataset(String datasetId, AddFilesRequest req) { - if (!req.isValidPrefix()) { - throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); - } - Dataset dataset = datasetRepository.getById(datasetId); - BusinessAssert.notNull(dataset, SystemErrorCode.RESOURCE_NOT_FOUND); - List addedFiles = new ArrayList<>(); - List existDatasetFiles = datasetFileRepository.findAllByDatasetId(datasetId); - dataset.setFiles(existDatasetFiles); - try { - ObjectMapper objectMapper = new ObjectMapper(); - - for (AddFilesRequest.FileRequest file : req.getFiles()) { - DatasetFile datasetFile = getDatasetFileForAdd(req, file, dataset, objectMapper); - setDatasetFileId(datasetFile, dataset); - dataset.addFile(datasetFile); - addedFiles.add(datasetFile); - addFile(file.getFilePath(), datasetFile.getFilePath(), req.isSoftAdd()); - } - } catch (BusinessException e) { - throw e; - } catch (Exception e) { - log.error("Failed to add file to dataset {}", dataset.getName(), e); - throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR); - } - - datasetFileRepository.saveOrUpdateBatch(addedFiles, 100); - dataset.active(); - datasetRepository.updateById(dataset); - return addedFiles; - } - - private void addFile(String sourPath, String targetPath, boolean softAdd) { - if (StringUtils.isBlank(sourPath) || StringUtils.isBlank(targetPath)) { - return; - } - Path source = Paths.get(sourPath).normalize(); - Path target = Paths.get(targetPath).normalize(); - - // 检查源文件是否存在且为普通文件 - if (!Files.exists(source) || !Files.isRegularFile(source)) { - log.warn("Source file does not exist or is not a regular file: {}", sourPath); - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - - try { - Path parent = target.getParent(); - // 创建目标目录(如果需要) - if (parent != null) { - Files.createDirectories(parent); - } - Files.deleteIfExists(target); - if (softAdd) { - // 优先尝试创建硬链接,失败后尝试创建符号链接;若均失败抛出异常 - try { - Files.createLink(target, source); - return; - } catch (Throwable hardEx) { - log.warn("create hard link failed from {} to {}: {}", source, target, hardEx.getMessage()); - } - Files.createSymbolicLink(target, source); - } else { - // 覆盖已存在的目标文件,保持与其他地方行为一致 - Files.copy(source, target); - } - } catch (IOException e) { - log.error("Failed to add file from {} to {}", source, target, e); - throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); - } - } - - private static DatasetFile getDatasetFileForAdd(AddFilesRequest req, AddFilesRequest.FileRequest file, - Dataset dataset, ObjectMapper objectMapper) throws JsonProcessingException { - Path sourcePath = Paths.get(file.getFilePath()); - File sourceFile = sourcePath.toFile(); - file.getMetadata().put("softAdd", req.isSoftAdd()); - LocalDateTime currentTime = LocalDateTime.now(); - String fileName = sourcePath.getFileName().toString(); - - return DatasetFile.builder() - .id(UUID.randomUUID().toString()) - .datasetId(dataset.getId()) - .fileName(fileName) - .fileType(AnalyzerUtils.getExtension(fileName)) - .fileSize(sourceFile.length()) - .filePath(Paths.get(dataset.getPath(), req.getPrefix(), fileName).toString()) - .uploadTime(currentTime) - .lastAccessTime(currentTime) - .metadata(objectMapper.writeValueAsString(file.getMetadata())) - .build(); - } -} +package com.datamate.datamanagement.application; + +import com.baomidou.mybatisplus.core.metadata.IPage; +import com.baomidou.mybatisplus.extension.plugins.pagination.Page; +import com.datamate.common.domain.model.ChunkUploadPreRequest; +import com.datamate.common.domain.model.FileUploadResult; +import com.datamate.common.domain.service.FileService; +import com.datamate.common.domain.utils.AnalyzerUtils; +import com.datamate.common.domain.utils.ArchiveAnalyzer; +import com.datamate.common.domain.utils.CommonUtils; +import com.datamate.common.infrastructure.exception.BusinessAssert; +import com.datamate.common.infrastructure.exception.BusinessException; +import com.datamate.common.infrastructure.exception.CommonErrorCode; +import com.datamate.common.infrastructure.exception.SystemErrorCode; +import com.datamate.common.interfaces.PagedResponse; +import com.datamate.common.interfaces.PagingQuery; +import com.datamate.datamanagement.common.enums.DuplicateMethod; +import com.datamate.datamanagement.domain.contants.DatasetConstant; +import com.datamate.datamanagement.domain.model.dataset.Dataset; +import com.datamate.datamanagement.domain.model.dataset.DatasetFile; +import com.datamate.datamanagement.domain.model.dataset.DatasetFileUploadCheckInfo; +import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode; +import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetFileRepository; +import com.datamate.datamanagement.infrastructure.persistence.repository.DatasetRepository; +import com.datamate.datamanagement.interfaces.converter.DatasetConverter; +import com.datamate.datamanagement.interfaces.dto.AddFilesRequest; +import com.datamate.datamanagement.interfaces.dto.CreateDirectoryRequest; +import com.datamate.datamanagement.interfaces.dto.UploadFileRequest; +import com.datamate.datamanagement.interfaces.dto.UploadFilesPreRequest; +import com.datamate.datamanagement.interfaces.dto.RenameFileRequest; +import com.datamate.datamanagement.interfaces.dto.RenameDirectoryRequest; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import jakarta.servlet.http.HttpServletResponse; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.compress.archivers.zip.ZipArchiveEntry; +import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.core.io.Resource; +import org.springframework.core.io.UrlResource; +import org.springframework.http.HttpHeaders; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.attribute.BasicFileAttributes; +import java.time.LocalDateTime; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.*; +import java.util.function.Function; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * 数据集文件应用服务 + */ +@Slf4j +@Service +@Transactional +public class DatasetFileApplicationService { + + private final DatasetFileRepository datasetFileRepository; + private final DatasetRepository datasetRepository; + private final FileService fileService; + + @Value("${datamate.data-management.base-path:/dataset}") + private String datasetBasePath; + + @Value("${datamate.data-management.file.duplicate:COVER}") + private DuplicateMethod duplicateMethod; + + @Autowired + public DatasetFileApplicationService(DatasetFileRepository datasetFileRepository, + DatasetRepository datasetRepository, FileService fileService) { + this.datasetFileRepository = datasetFileRepository; + this.datasetRepository = datasetRepository; + this.fileService = fileService; + } + + /** + * 获取数据集文件列表 + */ + @Transactional(readOnly = true) + public PagedResponse getDatasetFiles(String datasetId, String fileType, String status, String name, PagingQuery pagingQuery) { + IPage page = new Page<>(pagingQuery.getPage(), pagingQuery.getSize()); + IPage files = datasetFileRepository.findByCriteria(datasetId, fileType, status, name, page); + return PagedResponse.of(files); + } + + /** + * 获取数据集文件列表 + */ + @Transactional(readOnly = true) + public PagedResponse getDatasetFilesWithDirectory(String datasetId, String prefix, PagingQuery pagingQuery) { + Dataset dataset = datasetRepository.getById(datasetId); + int page = Math.max(pagingQuery.getPage(), 1); + int size = pagingQuery.getSize() == null || pagingQuery.getSize() < 0 ? 20 : pagingQuery.getSize(); + if (dataset == null) { + return PagedResponse.of(new Page<>(page, size)); + } + String datasetPath = dataset.getPath(); + Path queryPath = Path.of(dataset.getPath() + File.separator + prefix); + Map datasetFilesMap = datasetFileRepository.findAllByDatasetId(datasetId) + .stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity())); + try (Stream pathStream = Files.list(queryPath)) { + List allFiles = pathStream + .filter(path -> path.toString().startsWith(datasetPath)) + .sorted(Comparator + .comparing((Path path) -> !Files.isDirectory(path)) + .thenComparing(path -> path.getFileName().toString())) + .collect(Collectors.toList()); + + // 计算分页 + int total = allFiles.size(); + int totalPages = (int) Math.ceil((double) total / size); + + // 获取当前页数据 + int fromIndex = (page - 1) * size; + fromIndex = Math.max(fromIndex, 0); + int toIndex = Math.min(fromIndex + size, total); + + List pageData = new ArrayList<>(); + if (fromIndex < total) { + pageData = allFiles.subList(fromIndex, toIndex); + } + List datasetFiles = pageData.stream().map(path -> getDatasetFile(path, datasetFilesMap)).toList(); + + return PagedResponse.of(page, size, total, totalPages, datasetFiles); + } catch (IOException e) { + log.warn("list dataset path error"); + return PagedResponse.of(new Page<>(page, size)); + } + } + + private DatasetFile getDatasetFile(Path path, Map datasetFilesMap) { + DatasetFile datasetFile = new DatasetFile(); + LocalDateTime localDateTime = LocalDateTime.now(); + try { + localDateTime = Files.getLastModifiedTime(path).toInstant().atZone(ZoneId.systemDefault()).toLocalDateTime(); + } catch (IOException e) { + log.error("get last modified time error", e); + } + datasetFile.setFileName(path.getFileName().toString()); + datasetFile.setUploadTime(localDateTime); + + // 目录与普通文件区分处理 + if (Files.isDirectory(path)) { + datasetFile.setId("directory-" + datasetFile.getFileName()); + datasetFile.setDirectory(true); + + // 统计目录下文件数量和总大小 + try { + long fileCount; + long totalSize; + + try (Stream walk = Files.walk(path)) { + fileCount = walk.filter(Files::isRegularFile).count(); + } + + try (Stream walk = Files.walk(path)) { + totalSize = walk + .filter(Files::isRegularFile) + .mapToLong(p -> { + try { + return Files.size(p); + } catch (IOException e) { + log.error("get file size error", e); + return 0L; + } + }) + .sum(); + } + + datasetFile.setFileCount(fileCount); + datasetFile.setFileSize(totalSize); + } catch (IOException e) { + log.error("stat directory info error", e); + } + } else { + DatasetFile exist = datasetFilesMap.get(path.toString()); + if (exist == null) { + datasetFile.setId(datasetFile.getFileName()); + datasetFile.setFileSize(path.toFile().length()); + } else { + datasetFile = exist; + } + } + return datasetFile; + } + + /** + * 获取文件详情 + */ + @Transactional(readOnly = true) + public DatasetFile getDatasetFile(Dataset dataset, String fileId, String prefix) { + prefix = StringUtils.isBlank(prefix) ? "" : prefix; + if (dataset != null && !CommonUtils.isUUID(fileId) && !fileId.startsWith(".") && !prefix.startsWith(".")) { + DatasetFile file = new DatasetFile(); + file.setId(fileId); + file.setFileName(fileId); + file.setDatasetId(dataset.getId()); + file.setFileSize(0L); + file.setFilePath(dataset.getPath() + File.separator + prefix + fileId); + return file; + } + DatasetFile file = datasetFileRepository.getById(fileId); + if (file == null || dataset == null) { + throw new IllegalArgumentException("File not found: " + fileId); + } + if (!file.getDatasetId().equals(dataset.getId())) { + throw new IllegalArgumentException("File does not belong to the specified dataset"); + } + return file; + } + + /** + * 删除文件 + */ + @Transactional + public void deleteDatasetFile(String datasetId, String fileId, String prefix) { + Dataset dataset = datasetRepository.getById(datasetId); + DatasetFile file = getDatasetFile(dataset, fileId, prefix); + dataset.setFiles(new ArrayList<>(Collections.singleton(file))); + datasetFileRepository.removeById(fileId); + if (CommonUtils.isUUID(fileId)) { + dataset.removeFile(file); + } + datasetRepository.updateById(dataset); + // 删除文件时,上传到数据集中的文件会同时删除数据库中的记录和文件系统中的文件,归集过来的文件仅删除数据库中的记录 + if (file.getFilePath().startsWith(dataset.getPath())) { + try { + Path filePath = Paths.get(file.getFilePath()); + Files.deleteIfExists(filePath); + } catch (IOException ex) { + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + } + } + + /** + * 下载文件 + */ + @Transactional(readOnly = true) + public Resource downloadFile(DatasetFile file) { + try { + Path filePath = Paths.get(file.getFilePath()).normalize(); + log.info("start download file {}", file.getFilePath()); + Resource resource = new UrlResource(filePath.toUri()); + if (resource.exists()) { + return resource; + } else { + throw new RuntimeException("File not found: " + file.getFileName()); + } + } catch (MalformedURLException ex) { + throw new RuntimeException("File not found: " + file.getFileName(), ex); + } + } + + /** + * 下载数据集所有文件为 ZIP + */ + @Transactional(readOnly = true) + public void downloadDatasetFileAsZip(String datasetId, HttpServletResponse response) { + Dataset dataset = datasetRepository.getById(datasetId); + if (Objects.isNull(dataset)) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + String datasetPath = dataset.getPath(); + Path downloadPath = Paths.get(datasetPath).normalize(); + + // 检查路径是否存在 + if (!Files.exists(downloadPath) || !Files.isDirectory(downloadPath)) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + + response.setContentType("application/zip"); + String zipName = String.format("dataset_%s_%s.zip", + dataset.getName() != null ? dataset.getName().replaceAll("[^a-zA-Z0-9_-]", "_") : "dataset", + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMddHHmmss"))); + response.setHeader(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"" + zipName + "\""); + + try (ZipArchiveOutputStream zos = new ZipArchiveOutputStream(response.getOutputStream())) { + try (Stream pathStream = Files.walk(downloadPath)) { + pathStream + .filter(path -> { + // 确保路径在数据集目录内,防止路径遍历攻击 + Path normalized = path.normalize(); + return normalized.startsWith(downloadPath); + }) + .forEach(path -> { + try { + addToZipFile(path, downloadPath, zos); + } catch (IOException e) { + log.error("Failed to add file to zip: {}", path, e); + } + }); + } + zos.finish(); + } catch (IOException e) { + log.error("Failed to download dataset files as zip for dataset {}", datasetId, e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + } + + private void addToZipFile(Path path, Path basePath, ZipArchiveOutputStream zos) throws IOException { + String entryName = basePath.relativize(path) + .toString() + .replace(File.separator, "/"); + + // 处理目录 + if (Files.isDirectory(path)) { + if (!entryName.isEmpty()) { + entryName += "/"; + ZipArchiveEntry dirEntry = new ZipArchiveEntry(entryName); + zos.putArchiveEntry(dirEntry); + zos.closeArchiveEntry(); + } + } else { + // 处理文件 + ZipArchiveEntry fileEntry = new ZipArchiveEntry(path.toFile(), entryName); + + // 设置更多属性 + BasicFileAttributes attrs = Files.readAttributes(path, BasicFileAttributes.class); + fileEntry.setSize(attrs.size()); + fileEntry.setLastModifiedTime(attrs.lastModifiedTime()); + + zos.putArchiveEntry(fileEntry); + + try (InputStream is = Files.newInputStream(path)) { + IOUtils.copy(is, zos); + } + zos.closeArchiveEntry(); + } + } + + /** + * 预上传 + * + * @param chunkUploadRequest 上传请求 + * @param datasetId 数据集id + * @return 请求id + */ + @Transactional + public String preUpload(UploadFilesPreRequest chunkUploadRequest, String datasetId) { + if (Objects.isNull(datasetRepository.getById(datasetId))) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + + // 构建上传路径,如果有 prefix 则追加到路径中 + String prefix = Optional.ofNullable(chunkUploadRequest.getPrefix()).orElse("").trim(); + prefix = prefix.replace("\\", "/"); + while (prefix.startsWith("/")) { + prefix = prefix.substring(1); + } + + String uploadPath = datasetBasePath + File.separator + datasetId; + if (!prefix.isEmpty()) { + uploadPath = uploadPath + File.separator + prefix.replace("/", File.separator); + } + + ChunkUploadPreRequest request = ChunkUploadPreRequest.builder().build(); + request.setUploadPath(uploadPath); + request.setTotalFileNum(chunkUploadRequest.getTotalFileNum()); + request.setServiceId(DatasetConstant.SERVICE_ID); + DatasetFileUploadCheckInfo checkInfo = new DatasetFileUploadCheckInfo(); + checkInfo.setDatasetId(datasetId); + checkInfo.setHasArchive(chunkUploadRequest.isHasArchive()); + checkInfo.setPrefix(prefix); + try { + ObjectMapper objectMapper = new ObjectMapper(); + String checkInfoJson = objectMapper.writeValueAsString(checkInfo); + request.setCheckInfo(checkInfoJson); + } catch (JsonProcessingException e) { + log.warn("Failed to serialize checkInfo to JSON", e); + } + return fileService.preUpload(request); + } + + /** + * 切片上传 + * + * @param uploadFileRequest 上传请求 + */ + @Transactional + public void chunkUpload(String datasetId, UploadFileRequest uploadFileRequest) { + FileUploadResult uploadResult = fileService.chunkUpload(DatasetConverter.INSTANCE.toChunkUploadRequest(uploadFileRequest)); + saveFileInfoToDb(uploadResult, datasetId); + } + + private void saveFileInfoToDb(FileUploadResult fileUploadResult, String datasetId) { + if (Objects.isNull(fileUploadResult.getSavedFile())) { + // 文件切片上传没有完成 + return; + } + DatasetFileUploadCheckInfo checkInfo; + try { + ObjectMapper objectMapper = new ObjectMapper(); + checkInfo = objectMapper.readValue(fileUploadResult.getCheckInfo(), DatasetFileUploadCheckInfo.class); + if (!Objects.equals(checkInfo.getDatasetId(), datasetId)) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + } catch (IllegalArgumentException | JsonProcessingException e) { + log.warn("Failed to convert checkInfo to DatasetFileUploadCheckInfo", e); + throw BusinessException.of(CommonErrorCode.PRE_UPLOAD_REQUEST_NOT_EXIST); + } + List files; + if (checkInfo.isHasArchive() && AnalyzerUtils.isPackage(fileUploadResult.getSavedFile().getPath())) { + files = ArchiveAnalyzer.process(fileUploadResult); + } else { + files = Collections.singletonList(fileUploadResult); + } + addFileToDataset(datasetId, files); + } + + private void addFileToDataset(String datasetId, List unpacked) { + Dataset dataset = datasetRepository.getById(datasetId); + dataset.setFiles(datasetFileRepository.findAllByDatasetId(datasetId)); + for (FileUploadResult file : unpacked) { + File savedFile = file.getSavedFile(); + LocalDateTime currentTime = LocalDateTime.now(); + // 统一 fileName:无论是否通过文件夹/压缩包上传,都只保留纯文件名 + String originalFileName = file.getFileName(); + String baseFileName = originalFileName; + if (originalFileName != null) { + String normalized = originalFileName.replace("\\", "/"); + int lastSlash = normalized.lastIndexOf('/'); + if (lastSlash >= 0 && lastSlash + 1 < normalized.length()) { + baseFileName = normalized.substring(lastSlash + 1); + } + } + DatasetFile datasetFile = DatasetFile.builder() + .id(UUID.randomUUID().toString()) + .datasetId(datasetId) + .fileSize(savedFile.length()) + .uploadTime(currentTime) + .lastAccessTime(currentTime) + .fileName(baseFileName) + .filePath(savedFile.getPath()) + .fileType(AnalyzerUtils.getExtension(file.getFileName())) + .build(); + setDatasetFileId(datasetFile, dataset); + datasetFileRepository.saveOrUpdate(datasetFile); + dataset.addFile(datasetFile); + } + dataset.active(); + datasetRepository.updateById(dataset); + } + + /** + * 在数据集下创建子目录 + */ + @Transactional + public void createDirectory(String datasetId, CreateDirectoryRequest req) { + Dataset dataset = datasetRepository.getById(datasetId); + if (dataset == null) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + String datasetPath = dataset.getPath(); + String parentPrefix = Optional.ofNullable(req.getParentPrefix()).orElse("").trim(); + parentPrefix = parentPrefix.replace("\\", "/"); + while (parentPrefix.startsWith("/")) { + parentPrefix = parentPrefix.substring(1); + } + + String directoryName = Optional.ofNullable(req.getDirectoryName()).orElse("").trim(); + if (directoryName.isEmpty()) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + if (directoryName.contains("..") || directoryName.contains("/") || directoryName.contains("\\")) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + Path basePath = Paths.get(datasetPath); + Path targetPath = parentPrefix.isEmpty() + ? basePath.resolve(directoryName) + : basePath.resolve(parentPrefix).resolve(directoryName); + + Path normalized = targetPath.normalize(); + if (!normalized.startsWith(basePath)) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + try { + Files.createDirectories(normalized); + } catch (IOException e) { + log.error("Failed to create directory {} for dataset {}", normalized, datasetId, e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + } + + /** + * 下载目录为 ZIP 文件 + */ + @Transactional(readOnly = true) + public void downloadDirectory(String datasetId, String prefix, HttpServletResponse response) { + Dataset dataset = datasetRepository.getById(datasetId); + if (dataset == null) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + + String datasetPath = dataset.getPath(); + prefix = Optional.ofNullable(prefix).orElse("").trim(); + prefix = prefix.replace("\\", "/"); + while (prefix.startsWith("/")) { + prefix = prefix.substring(1); + } + while (prefix.endsWith("/")) { + prefix = prefix.substring(0, prefix.length() - 1); + } + + Path basePath = Paths.get(datasetPath); + Path targetPath = prefix.isEmpty() ? basePath : basePath.resolve(prefix); + Path normalized = targetPath.normalize(); + + if (!normalized.startsWith(basePath)) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + if (!Files.exists(normalized) || !Files.isDirectory(normalized)) { + throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); + } + + String zipFileName = prefix.isEmpty() ? dataset.getName() : prefix.replace("/", "_"); + zipFileName = zipFileName + "_" + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")) + ".zip"; + + try { + response.setContentType("application/zip"); + response.setHeader(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"" + zipFileName + "\""); + + try (ZipArchiveOutputStream zipOut = new ZipArchiveOutputStream(response.getOutputStream())) { + zipDirectory(normalized, normalized, zipOut); + zipOut.finish(); + } + } catch (IOException e) { + log.error("Failed to download directory {} for dataset {}", normalized, datasetId, e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + } + + /** + * 递归压缩目录 + */ + private void zipDirectory(Path sourceDir, Path basePath, ZipArchiveOutputStream zipOut) throws IOException { + try (Stream paths = Files.walk(sourceDir)) { + paths.filter(path -> !Files.isDirectory(path)) + .forEach(path -> { + try { + Path relativePath = basePath.relativize(path); + ZipArchiveEntry zipEntry = new ZipArchiveEntry(relativePath.toString()); + zipOut.putArchiveEntry(zipEntry); + try (InputStream fis = Files.newInputStream(path)) { + IOUtils.copy(fis, zipOut); + } + zipOut.closeArchiveEntry(); + } catch (IOException e) { + log.error("Failed to add file to zip: {}", path, e); + } + }); + } + } + + /** + * 删除目录及其所有内容 + */ + @Transactional + public void deleteDirectory(String datasetId, String prefix) { + Dataset dataset = datasetRepository.getById(datasetId); + if (dataset == null) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + + prefix = Optional.ofNullable(prefix).orElse("").trim(); + prefix = prefix.replace("\\", "/"); + while (prefix.startsWith("/")) { + prefix = prefix.substring(1); + } + while (prefix.endsWith("/")) { + prefix = prefix.substring(0, prefix.length() - 1); + } + + if (prefix.isEmpty()) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + String datasetPath = dataset.getPath(); + Path basePath = Paths.get(datasetPath); + Path targetPath = basePath.resolve(prefix); + Path normalized = targetPath.normalize(); + + if (!normalized.startsWith(basePath)) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + if (!Files.exists(normalized) || !Files.isDirectory(normalized)) { + throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); + } + + // 删除数据库中该目录下的所有文件记录(基于数据集内相对路径判断) + String datasetPathNorm = datasetPath.replace("\\", "/"); + String logicalPrefix = prefix; // 已经去掉首尾斜杠 + List filesToDelete = datasetFileRepository.findAllByDatasetId(datasetId).stream() + .filter(file -> { + if (file.getFilePath() == null) { + return false; + } + String filePath = file.getFilePath().replace("\\", "/"); + if (!filePath.startsWith(datasetPathNorm)) { + return false; + } + String relative = filePath.substring(datasetPathNorm.length()); + while (relative.startsWith("/")) { + relative = relative.substring(1); + } + return relative.equals(logicalPrefix) || relative.startsWith(logicalPrefix + "/"); + }) + .collect(Collectors.toList()); + + for (DatasetFile file : filesToDelete) { + datasetFileRepository.removeById(file.getId()); + } + + // 删除文件系统中的目录 + try { + deleteDirectoryRecursively(normalized); + } catch (IOException e) { + log.error("Failed to delete directory {} for dataset {}", normalized, datasetId, e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + + // 更新数据集 + dataset.setFiles(filesToDelete); + for (DatasetFile file : filesToDelete) { + dataset.removeFile(file); + } + datasetRepository.updateById(dataset); + } + + /** + * 重命名数据集文件(仅允许修改主名称,文件后缀保持不变) + */ + @Transactional + public void renameFile(String datasetId, String fileId, RenameFileRequest request) { + Dataset dataset = datasetRepository.getById(datasetId); + if (dataset == null) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + + DatasetFile file = getDatasetFile(dataset, fileId, null); + String newName = Optional.ofNullable(request.getNewName()).orElse("").trim(); + if (newName.isEmpty()) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + String originalFileName = file.getFileName(); + String extension = ""; + int dotIndex = originalFileName.lastIndexOf('.'); + if (dotIndex > 0 && dotIndex < originalFileName.length() - 1) { + extension = originalFileName.substring(dotIndex); // 包含点号,如 .jpg + } + + // 只接收主名称,后缀始终使用原始后缀 + String finalFileName = newName + extension; + + Path oldPath = Paths.get(file.getFilePath()).normalize(); + Path basePath = Paths.get(dataset.getPath()).normalize(); + + // 仅允许重命名数据集自身目录下的文件 + if (!oldPath.startsWith(basePath)) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + Path parentDir = oldPath.getParent(); + if (parentDir == null) { + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + Path newPath = parentDir.resolve(finalFileName).normalize(); + + if (!newPath.startsWith(basePath)) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + if (Files.exists(newPath)) { + throw BusinessException.of(DataManagementErrorCode.DATASET_FILE_ALREADY_EXISTS); + } + + try { + Files.move(oldPath, newPath); + } catch (IOException e) { + log.error("Failed to rename file from {} to {}", oldPath, newPath, e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + + file.setFileName(finalFileName); + file.setFilePath(newPath.toString()); + file.setFileType(AnalyzerUtils.getExtension(finalFileName)); + file.setLastAccessTime(LocalDateTime.now()); + datasetFileRepository.updateById(file); + } + + /** + * 重命名目录 + */ + @Transactional + public void renameDirectory(String datasetId, RenameDirectoryRequest request) { + Dataset dataset = datasetRepository.getById(datasetId); + if (dataset == null) { + throw BusinessException.of(DataManagementErrorCode.DATASET_NOT_FOUND); + } + + String prefix = Optional.ofNullable(request.getPrefix()).orElse("").trim(); + prefix = prefix.replace("\\", "/"); + while (prefix.startsWith("/")) { + prefix = prefix.substring(1); + } + while (prefix.endsWith("/")) { + prefix = prefix.substring(0, prefix.length() - 1); + } + + if (prefix.isEmpty()) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + String newName = Optional.ofNullable(request.getNewName()).orElse("").trim(); + if (newName.isEmpty() || newName.contains("..") || newName.contains("/") || newName.contains("\\")) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + String datasetPath = dataset.getPath(); + Path basePath = Paths.get(datasetPath).normalize(); + Path oldDir = basePath.resolve(prefix).normalize(); + + if (!oldDir.startsWith(basePath)) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + if (!Files.exists(oldDir) || !Files.isDirectory(oldDir)) { + throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); + } + + Path parentDir = oldDir.getParent(); + if (parentDir == null) { + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + Path newDir = parentDir.resolve(newName).normalize(); + + if (!newDir.startsWith(basePath)) { + throw BusinessException.of(CommonErrorCode.PARAM_ERROR); + } + + if (Files.exists(newDir)) { + throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); + } + + try { + Files.move(oldDir, newDir); + } catch (IOException e) { + log.error("Failed to rename directory from {} to {}", oldDir, newDir, e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + + // 同步更新数据库中该目录下所有文件的 filePath + String oldDirPath = oldDir.toString().replace("\\", "/"); + String newDirPath = newDir.toString().replace("\\", "/"); + + List allFiles = datasetFileRepository.findAllByDatasetId(datasetId); + for (DatasetFile file : allFiles) { + String filePath = Optional.ofNullable(file.getFilePath()).orElse("").replace("\\", "/"); + if (filePath.startsWith(oldDirPath + "/")) { + String relative = filePath.substring(oldDirPath.length() + 1); + Path updatedPath = Paths.get(newDirPath).resolve(relative); + file.setFilePath(updatedPath.toString()); + file.setLastAccessTime(LocalDateTime.now()); + datasetFileRepository.updateById(file); + } + } + } + + /** + * 递归删除目录 + */ + private void deleteDirectoryRecursively(Path directory) throws IOException { + try (Stream paths = Files.walk(directory)) { + paths.sorted(Comparator.reverseOrder()) + .forEach(path -> { + try { + Files.delete(path); + } catch (IOException e) { + log.error("Failed to delete: {}", path, e); + } + }); + } + } + + /** + * 为数据集文件设置文件id + * + * @param datasetFile 要设置id的文件 + * @param dataset 数据集(包含文件列表) + */ + private void setDatasetFileId(DatasetFile datasetFile, Dataset dataset) { + Map existDatasetFilMap = dataset.getFiles().stream().collect(Collectors.toMap(DatasetFile::getFilePath, Function.identity())); + DatasetFile existDatasetFile = existDatasetFilMap.get(datasetFile.getFilePath()); + if (Objects.isNull(existDatasetFile)) { + return; + } + if (duplicateMethod == DuplicateMethod.ERROR) { + log.error("file {} already exists in dataset {}", datasetFile.getFileName(), datasetFile.getDatasetId()); + throw BusinessException.of(DataManagementErrorCode.DATASET_FILE_ALREADY_EXISTS); + } + if (duplicateMethod == DuplicateMethod.COVER) { + dataset.removeFile(existDatasetFile); + datasetFile.setId(existDatasetFile.getId()); + } + } + + /** + * 添加文件到数据集(仅创建数据库记录,不执行文件系统操作) + * + * @param datasetId 数据集id + * @param req 添加文件请求 + * @return 添加的文件列表 + */ + @Transactional + public List addFilesToDataset(String datasetId, AddFilesRequest req) { + if (!req.isValidPrefix()) { + throw BusinessException.of(DataManagementErrorCode.DIRECTORY_NOT_FOUND); + } + Dataset dataset = datasetRepository.getById(datasetId); + BusinessAssert.notNull(dataset, SystemErrorCode.RESOURCE_NOT_FOUND); + List addedFiles = new ArrayList<>(); + List existDatasetFiles = datasetFileRepository.findAllByDatasetId(datasetId); + dataset.setFiles(existDatasetFiles); + try { + ObjectMapper objectMapper = new ObjectMapper(); + + for (AddFilesRequest.FileRequest file : req.getFiles()) { + DatasetFile datasetFile = getDatasetFileForAdd(req, file, dataset, objectMapper); + setDatasetFileId(datasetFile, dataset); + dataset.addFile(datasetFile); + addedFiles.add(datasetFile); + addFile(file.getFilePath(), datasetFile.getFilePath(), req.isSoftAdd()); + } + } catch (BusinessException e) { + throw e; + } catch (Exception e) { + log.error("Failed to add file to dataset {}", dataset.getName(), e); + throw BusinessException.of(SystemErrorCode.UNKNOWN_ERROR); + } + + datasetFileRepository.saveOrUpdateBatch(addedFiles, 100); + dataset.active(); + datasetRepository.updateById(dataset); + return addedFiles; + } + + private void addFile(String sourPath, String targetPath, boolean softAdd) { + if (StringUtils.isBlank(sourPath) || StringUtils.isBlank(targetPath)) { + return; + } + Path source = Paths.get(sourPath).normalize(); + Path target = Paths.get(targetPath).normalize(); + + // 检查源文件是否存在且为普通文件 + if (!Files.exists(source) || !Files.isRegularFile(source)) { + log.warn("Source file does not exist or is not a regular file: {}", sourPath); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + + try { + Path parent = target.getParent(); + // 创建目标目录(如果需要) + if (parent != null) { + Files.createDirectories(parent); + } + Files.deleteIfExists(target); + if (softAdd) { + // 优先尝试创建硬链接,失败后尝试创建符号链接;若均失败抛出异常 + try { + Files.createLink(target, source); + return; + } catch (Throwable hardEx) { + log.warn("create hard link failed from {} to {}: {}", source, target, hardEx.getMessage()); + } + Files.createSymbolicLink(target, source); + } else { + // 覆盖已存在的目标文件,保持与其他地方行为一致 + Files.copy(source, target); + } + } catch (IOException e) { + log.error("Failed to add file from {} to {}", source, target, e); + throw BusinessException.of(SystemErrorCode.FILE_SYSTEM_ERROR); + } + } + + private static DatasetFile getDatasetFileForAdd(AddFilesRequest req, AddFilesRequest.FileRequest file, + Dataset dataset, ObjectMapper objectMapper) throws JsonProcessingException { + Path sourcePath = Paths.get(file.getFilePath()); + File sourceFile = sourcePath.toFile(); + file.getMetadata().put("softAdd", req.isSoftAdd()); + LocalDateTime currentTime = LocalDateTime.now(); + String fileName = sourcePath.getFileName().toString(); + + return DatasetFile.builder() + .id(UUID.randomUUID().toString()) + .datasetId(dataset.getId()) + .fileName(fileName) + .fileType(AnalyzerUtils.getExtension(fileName)) + .fileSize(sourceFile.length()) + .filePath(Paths.get(dataset.getPath(), req.getPrefix(), fileName).toString()) + .uploadTime(currentTime) + .lastAccessTime(currentTime) + .metadata(objectMapper.writeValueAsString(file.getMetadata())) + .build(); + } +} diff --git a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java index 959401118..f5ce0c75a 100644 --- a/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java +++ b/backend/services/rag-indexer-service/src/main/java/com/datamate/rag/indexer/application/KnowledgeBaseService.java @@ -149,7 +149,7 @@ public PagedResponse list(KnowledgeBaseQueryReq request) { // 将 KnowledgeBase 转换为 KnowledgeBaseResp,并计算 fileCount 和 chunkCount List respList = page.getRecords().stream().map(this::getKnowledgeBaseResp).toList(); - return PagedResponse.of(respList, page.getCurrent(), page.getTotal(), page.getPages()); + return PagedResponse.of(page.getCurrent(), page.getSize(), page.getTotal(), page.getPages(), respList); } @Transactional(rollbackFor = Exception.class) @@ -173,7 +173,7 @@ public PagedResponse listFiles(String knowledgeBaseId, RagFileReq reque IPage page = new Page<>(request.getPage(), request.getSize()); request.setKnowledgeBaseId(knowledgeBaseId); page = ragFileRepository.page(page, request); - return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages()); + return PagedResponse.of(page.getCurrent(), page.getSize(), page.getTotal(), page.getPages(), page.getRecords()); } @Transactional(rollbackFor = Exception.class) @@ -214,7 +214,7 @@ public PagedResponse getChunks(String knowledgeBaseId, String ragFileI .build()); long totalCount = Long.parseLong(countResults.getQueryResults().getFirst().getEntity().get("count(*)").toString()); - return PagedResponse.of(ragChunks, pagingQuery.getPage(), totalCount, (int) Math.ceil((double) totalCount / pagingQuery.getSize())); + return PagedResponse.of(pagingQuery.getPage(), pagingQuery.getSize(), totalCount, (int) Math.ceil((double) totalCount / pagingQuery.getSize()), ragChunks); } /** diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java b/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java index 17270d971..dca8db46b 100644 --- a/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/interfaces/PagedResponse.java @@ -19,23 +19,7 @@ public class PagedResponse { private long totalPages; private List content; - public PagedResponse(List content) { - this.page = 0; - this.size = content.size(); - this.totalElements = content.size(); - this.totalPages = 1; - this.content = content; - } - - public PagedResponse(List content, long page, long totalElements, long totalPages) { - this.page = page; - this.size = content.size(); - this.totalElements = totalElements; - this.totalPages = totalPages; - this.content = content; - } - - public PagedResponse(long page, long size, long totalElements, long totalPages, List content) { + private PagedResponse(long page, long size, long totalElements, long totalPages, List content) { this.page = page; this.size = size; this.totalElements = totalElements; @@ -43,12 +27,8 @@ public PagedResponse(long page, long size, long totalElements, long totalPages, this.content = content; } - public static PagedResponse of(List content) { - return new PagedResponse<>(content); - } - - public static PagedResponse of(List content, long page, long totalElements, long totalPages) { - return new PagedResponse<>(content, page, totalElements, totalPages); + public static PagedResponse of(long page, long size, long totalElements, long totalPages, List content) { + return new PagedResponse<>(page, size, totalElements, totalPages, content); } public static PagedResponse of(IPage page) { diff --git a/backend/shared/domain-common/src/main/java/com/datamate/common/setting/application/ModelConfigApplicationService.java b/backend/shared/domain-common/src/main/java/com/datamate/common/setting/application/ModelConfigApplicationService.java index 3119472d5..0434f7677 100644 --- a/backend/shared/domain-common/src/main/java/com/datamate/common/setting/application/ModelConfigApplicationService.java +++ b/backend/shared/domain-common/src/main/java/com/datamate/common/setting/application/ModelConfigApplicationService.java @@ -43,7 +43,7 @@ public List getProviders() { public PagedResponse getModels(QueryModelRequest queryModelRequest) { // 从数据库查询模型配置 IPage page = modelConfigRepository.page(queryModelRequest); - return PagedResponse.of(page.getRecords(), page.getCurrent(), page.getTotal(), page.getPages()); + return PagedResponse.of(page); } public ModelConfig getModelDetail(String modelId) { From a766eb474beccb68f122380aed6d364917e64296 Mon Sep 17 00:00:00 2001 From: uname <2986773479@qq.com> Date: Tue, 24 Feb 2026 14:51:52 +0800 Subject: [PATCH 2/2] feature: add reasonable parameter verification to data management --- backend/openapi/specs/data-management.yaml | 2351 ++++++++++------- .../exception/DataManagementErrorCode.java | 134 +- .../interfaces/dto/AddFilesRequest.java | 11 + .../interfaces/dto/CopyFilesRequest.java | 6 +- .../interfaces/dto/CreateDatasetRequest.java | 11 +- .../dto/CreateDirectoryRequest.java | 46 +- .../interfaces/dto/CreateTagRequest.java | 10 + .../interfaces/dto/DatasetPagingQuery.java | 3 + .../dto/RenameDirectoryRequest.java | 48 +- .../interfaces/dto/RenameFileRequest.java | 38 +- .../interfaces/dto/UpdateDatasetRequest.java | 6 +- .../interfaces/dto/UpdateTagRequest.java | 7 + .../interfaces/dto/UploadFileRequest.java | 15 + .../interfaces/dto/UploadFilesPreRequest.java | 53 +- .../interfaces/rest/DatasetController.java | 29 +- .../interfaces/validation/ValidFileName.java | 26 + .../validation/ValidFileNameValidator.java | 43 + .../interfaces/validation/ValidHexColor.java | 26 + .../validation/ValidHexColorValidator.java | 43 + .../interfaces/validation/ValidPath.java | 31 + .../validation/ValidPathValidator.java | 66 + .../DataManagement/Create/CreateDataset.tsx | 2 +- 22 files changed, 1972 insertions(+), 1033 deletions(-) create mode 100644 backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidFileName.java create mode 100644 backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidFileNameValidator.java create mode 100644 backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidHexColor.java create mode 100644 backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidHexColorValidator.java create mode 100644 backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidPath.java create mode 100644 backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidPathValidator.java diff --git a/backend/openapi/specs/data-management.yaml b/backend/openapi/specs/data-management.yaml index e505ee576..98babc4ac 100644 --- a/backend/openapi/specs/data-management.yaml +++ b/backend/openapi/specs/data-management.yaml @@ -1,890 +1,1461 @@ -openapi: 3.0.3 -info: - title: Data Management Service API - description: | - 数据管理服务API,提供数据集的创建、管理和文件操作功能。 - - 主要功能: - - 数据集的创建和管理 - - 多种数据集类型支持(图像、文本、音频、视频、多模态等) - - 数据集文件管理 - - 数据集标签和元数据管理 - - 数据集统计信息 - version: 1.0.0 - -servers: - - url: http://localhost:8092/api/v1/data-management - description: Development server - -tags: - - name: Dataset - description: 数据集管理 - - name: DatasetFile - description: 数据集文件管理 - - name: DatasetType - description: 数据集类型管理 - - name: Tag - description: 标签管理 - -paths: - /data-management/datasets: - get: - tags: [Dataset] - operationId: getDatasets - summary: 获取数据集列表 - description: 分页查询数据集列表,支持按类型、标签等条件筛选 - parameters: - - name: page - in: query - schema: - type: integer - default: 0 - description: 页码,从1开始 - - name: size - in: query - schema: - type: integer - default: 20 - description: 每页大小 - - name: type - in: query - schema: - type: string - description: 数据集类型过滤 - - name: tags - in: query - schema: - type: string - description: 标签过滤,多个标签用逗号分隔 - - name: keyword - in: query - schema: - type: string - description: 关键词搜索(名称、描述) - - name: status - in: query - schema: - type: string - enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED] - description: 数据集状态过滤 - responses: - '200': - description: 成功 - content: - application/json: - schema: - $ref: '#/components/schemas/PagedDatasetResponse' - '400': - description: 请求参数错误 - content: - application/json: - schema: - $ref: '#/components/schemas/ErrorResponse' - - post: - tags: [Dataset] - operationId: createDataset - summary: 创建数据集 - description: 创建新的数据集 - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/CreateDatasetRequest' - responses: - '201': - description: 创建成功 - content: - application/json: - schema: - $ref: '#/components/schemas/DatasetResponse' - '400': - description: 请求参数错误 - content: - application/json: - schema: - $ref: '#/components/schemas/ErrorResponse' - - /data-management/datasets/{datasetId}: - get: - tags: [Dataset] - operationId: getDatasetById - summary: 获取数据集详情 - description: 根据ID获取数据集详细信息 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - responses: - '200': - description: 成功 - content: - application/json: - schema: - $ref: '#/components/schemas/DatasetResponse' - '404': - description: 数据集不存在 - content: - application/json: - schema: - $ref: '#/components/schemas/ErrorResponse' - - put: - tags: [Dataset] - summary: 更新数据集 - operationId: updateDataset - description: 更新数据集信息 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/UpdateDatasetRequest' - responses: - '200': - description: 更新成功 - content: - application/json: - schema: - $ref: '#/components/schemas/DatasetResponse' - '404': - description: 数据集不存在 - content: - application/json: - schema: - $ref: '#/components/schemas/ErrorResponse' - - delete: - tags: [Dataset] - operationId: deleteDataset - summary: 删除数据集 - description: 删除指定的数据集 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - responses: - '204': - description: 删除成功 - '404': - description: 数据集不存在 - content: - application/json: - schema: - $ref: '#/components/schemas/ErrorResponse' - - /data-management/datasets/{datasetId}/files: - get: - tags: [DatasetFile] - summary: 获取数据集文件列表 - operationId: getDatasetFiles - description: 分页获取数据集中的文件列表 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - - name: page - in: query - schema: - type: integer - default: 0 - description: 页码,从0开始 - - name: size - in: query - schema: - type: integer - default: 20 - description: 每页大小 - - name: fileType - in: query - schema: - type: string - description: 文件类型过滤 - - name: status - in: query - schema: - type: string - enum: [UPLOADED, PROCESSING, COMPLETED, ERROR] - description: 文件状态过滤 - responses: - '200': - description: 成功 - content: - application/json: - schema: - $ref: '#/components/schemas/PagedDatasetFileResponse' - - /data-management/datasets/{datasetId}/files/directories: - post: - tags: [ DatasetFile ] - operationId: createDirectory - summary: 在数据集下创建子目录 - description: 在指定数据集下的某个前缀路径中创建一个新的子目录 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/CreateDirectoryRequest' - responses: - '200': - description: 创建成功 - - /data-management/datasets/{datasetId}/files/{fileId}: - get: - tags: [DatasetFile] - summary: 获取文件详情 - description: 获取数据集中指定文件的详细信息 - operationId: getDatasetFileById - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - - name: fileId - in: path - required: true - schema: - type: string - description: 文件ID - responses: - '200': - description: 成功 - content: - application/json: - schema: - $ref: '#/components/schemas/DatasetFileResponse' - - delete: - tags: [DatasetFile] - summary: 删除文件 - operationId: deleteDatasetFile - description: 从数据集中删除指定文件 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - - name: fileId - in: path - required: true - schema: - type: string - description: 文件ID - responses: - '204': - description: 删除成功 - - /data-management/datasets/{datasetId}/files/{fileId}/download: - get: - tags: [DatasetFile] - operationId: downloadDatasetFile - summary: 下载文件 - description: 下载数据集中的指定文件 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - - name: fileId - in: path - required: true - schema: - type: string - description: 文件ID - responses: - '200': - description: 文件内容 - content: - application/octet-stream: - schema: - type: string - format: binary - - /data-management/datasets/{datasetId}/files/download: - get: - tags: [ DatasetFile ] - operationId: downloadDatasetFileAsZip - summary: 下载文件 - description: 下载数据集中全部文件 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - responses: - '200': - description: 文件内容 - content: - application/octet-stream: - schema: - type: string - format: binary - - /data-management/datasets/{datasetId}/files/upload/add: - post: - tags: [ DatasetFile ] - operationId: addFilesToDataset - summary: 添加文件到数据集(仅创建数据库记录) - description: 将指定源文件路径列表添加到数据集,仅在数据库中创建记录,不执行物理文件系统操作。 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/AddFilesRequest' - responses: - '200': - description: 添加成功,返回创建的文件记录列表 - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/DatasetFileResponse' - - /data-management/datasets/{datasetId}/files/upload/pre-upload: - post: - tags: [ DatasetFile ] - operationId: preUpload - summary: 切片上传预上传 - description: 预上传接口,返回后续分片上传所需的请求ID - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/UploadFilesPreRequest' - responses: - '200': - description: 预上传成功,返回请求ID - content: - application/json: - schema: - type: string - - /data-management/datasets/{datasetId}/files/upload/chunk: - post: - tags: [ DatasetFile ] - operationId: chunkUpload - summary: 切片上传 - description: 使用预上传返回的请求ID进行分片上传 - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - requestBody: - required: true - content: - multipart/form-data: - schema: - $ref: '#/components/schemas/UploadFileRequest' - responses: - '200': - description: 上传成功 - - /data-management/dataset-types: - get: - operationId: getDatasetTypes - tags: [DatasetType] - summary: 获取数据集类型列表 - description: 获取所有支持的数据集类型 - responses: - '200': - description: 成功 - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/DatasetTypeResponse' - - /data-management/tags: - get: - tags: [Tag] - operationId: getTags - summary: 获取标签列表 - description: 获取所有可用的标签 - parameters: - - name: keyword - in: query - schema: - type: string - description: 标签名称关键词搜索 - responses: - '200': - description: 成功 - content: - application/json: - schema: - type: array - items: - $ref: '#/components/schemas/TagResponse' - - post: - tags: [Tag] - operationId: createTag - summary: 创建标签 - description: 创建新的标签 - requestBody: - required: true - content: - application/json: - schema: - $ref: '#/components/schemas/CreateTagRequest' - responses: - '201': - description: 创建成功 - content: - application/json: - schema: - $ref: '#/components/schemas/TagResponse' - - /data-management/datasets/{datasetId}/statistics: - get: - tags: [Dataset] - operationId: getDatasetStatistics - summary: 获取数据集统计信息 - description: 获取数据集的统计信息(文件数量、大小、完成度等) - parameters: - - name: datasetId - in: path - required: true - schema: - type: string - description: 数据集ID - responses: - '200': - description: 成功 - content: - application/json: - schema: - $ref: '#/components/schemas/DatasetStatisticsResponse' - -components: - schemas: - PagedDatasetResponse: - type: object - properties: - content: - type: array - items: - $ref: '#/components/schemas/DatasetResponse' - page: - type: integer - description: 当前页码 - size: - type: integer - description: 每页大小 - totalElements: - type: integer - description: 总元素数 - totalPages: - type: integer - description: 总页数 - first: - type: boolean - description: 是否为第一页 - last: - type: boolean - description: 是否为最后一页 - - DatasetResponse: - type: object - properties: - id: - type: string - description: 数据集ID - name: - type: string - description: 数据集名称 - description: - type: string - description: 数据集描述 - type: - $ref: '#/components/schemas/DatasetTypeResponse' - status: - type: string - enum: [ACTIVE, INACTIVE, PROCESSING] - description: 数据集状态 - tags: - type: array - items: - $ref: '#/components/schemas/TagResponse' - description: 标签列表 - dataSource: - type: string - description: 数据源 - targetLocation: - type: string - description: 目标位置 - fileCount: - type: integer - description: 文件数量 - totalSize: - type: integer - format: int64 - description: 总大小(字节) - completionRate: - type: number - format: float - description: 完成率(0-100) - createdAt: - type: string - format: date-time - description: 创建时间 - updatedAt: - type: string - format: date-time - description: 更新时间 - createdBy: - type: string - description: 创建者 - - CreateDatasetRequest: - type: object - required: - - name - - type - properties: - name: - type: string - description: 数据集名称 - minLength: 1 - maxLength: 100 - description: - type: string - description: 数据集描述 - maxLength: 500 - type: - type: string - description: 数据集类型 - tags: - type: array - items: - type: string - description: 标签列表 - dataSource: - type: string - description: 数据源 - targetLocation: - type: string - description: 目标位置 - - UpdateDatasetRequest: - type: object - properties: - name: - type: string - description: 数据集名称 - maxLength: 100 - description: - type: string - description: 数据集描述 - maxLength: 500 - tags: - type: array - items: - type: string - description: 标签列表 - status: - type: string - enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED] - description: 数据集状态 - - UploadFilesPreRequest: - type: object - description: 切片上传预上传请求 - properties: - hasArchive: - type: boolean - description: 是否为压缩包上传 - default: false - totalFileNum: - type: integer - format: int32 - minimum: 1 - description: 总文件数量 - totalSize: - type: integer - format: int64 - description: 总文件大小(字节) - prefix: - type: string - description: 目标子目录前缀,例如 "images/",为空表示数据集根目录 - required: [ totalFileNum ] - - CreateDirectoryRequest: - type: object - description: 创建数据集子目录请求 - properties: - parentPrefix: - type: string - description: 父级前缀路径,例如 "images/",为空表示数据集根目录 - directoryName: - type: string - description: 新建目录名称 - required: [ directoryName ] - - UploadFileRequest: - type: object - description: 分片上传请求 - properties: - reqId: - type: string - description: 预上传返回的请求ID - fileNo: - type: integer - format: int32 - description: 文件编号(批量中的第几个) - fileName: - type: string - description: 文件名称 - totalChunkNum: - type: integer - format: int32 - description: 文件总分片数量 - chunkNo: - type: integer - format: int32 - description: 当前分片编号(从1开始) - file: - type: string - format: binary - description: 分片二进制内容 - checkSumHex: - type: string - description: 分片校验和(十六进制) - required: [ reqId, fileNo, fileName, totalChunkNum, chunkNo, file ] - - DatasetTypeResponse: - type: object - properties: - code: - type: string - description: 类型编码 - name: - type: string - description: 类型名称 - description: - type: string - description: 类型描述 - supportedFormats: - type: array - items: - type: string - description: 支持的文件格式 - icon: - type: string - description: 图标 - - PagedDatasetFileResponse: - type: object - properties: - content: - type: array - items: - $ref: '#/components/schemas/DatasetFileResponse' - page: - type: integer - description: 当前页码 - size: - type: integer - description: 每页大小 - totalElements: - type: integer - description: 总元素数 - totalPages: - type: integer - description: 总页数 - first: - type: boolean - description: 是否为第一页 - last: - type: boolean - description: 是否为最后一页 - - DatasetFileResponse: - type: object - properties: - id: - type: string - description: 文件ID - fileName: - type: string - description: 文件名 - originalName: - type: string - description: 原始文件名 - fileType: - type: string - description: 文件类型 - fileSize: - type: integer - format: int64 - description: 文件大小(字节) - status: - type: string - enum: [UPLOADED, PROCESSING, COMPLETED, ERROR] - description: 文件状态 - description: - type: string - description: 文件描述 - filePath: - type: string - description: 文件路径 - uploadTime: - type: string - format: date-time - description: 上传时间 - uploadedBy: - type: string - description: 上传者 - - TagResponse: - type: object - properties: - id: - type: string - description: 标签ID - name: - type: string - description: 标签名称 - color: - type: string - description: 标签颜色 - description: - type: string - description: 标签描述 - usageCount: - type: integer - description: 使用次数 - - CreateTagRequest: - type: object - required: - - name - properties: - name: - type: string - description: 标签名称 - minLength: 1 - maxLength: 50 - color: - type: string - description: 标签颜色 - pattern: '^#[0-9A-Fa-f]{6}$' - description: - type: string - description: 标签描述 - maxLength: 200 - - DatasetStatisticsResponse: - type: object - properties: - totalFiles: - type: integer - description: 总文件数 - completedFiles: - type: integer - description: 已完成文件数 - totalSize: - type: integer - format: int64 - description: 总大小(字节) - completionRate: - type: number - format: float - description: 完成率(0-100) - fileTypeDistribution: - type: object - additionalProperties: - type: integer - description: 文件类型分布 - statusDistribution: - type: object - additionalProperties: - type: integer - description: 状态分布 - - ErrorResponse: - type: object - properties: - error: - type: string - description: 错误代码 - message: - type: string - description: 错误消息 - timestamp: - type: string - format: date-time - description: 错误时间 - path: - type: string - description: 请求路径 - - AddFilesRequest: - type: object - description: 将源文件路径添加到数据集的请求 - properties: - sourcePaths: - type: array - items: - type: string - description: 源文件路径列表(相对或绝对路径),每个元素表示一个要添加的文件或目录路径 - softAdd: - type: boolean - description: 如果为 true,则仅在数据库中创建记录(默认 false) - default: false - required: - - sourcePaths +openapi: 3.0.3 +info: + title: Data Management Service API + description: | + 数据管理服务API,提供数据集的创建、管理和文件操作功能。 + + 主要功能: + - 数据集的创建和管理 + - 多种数据集类型支持(图像、文本、音频、视频、多模态等) + - 数据集文件管理 + - 数据集标签和元数据管理 + - 数据集统计信息 + version: 1.0.0 + +servers: + - url: http://localhost:8092/api/data-management + description: Development server + +tags: + - name: Dataset + description: 数据集管理 + - name: DatasetFile + description: 数据集文件管理 + - name: DatasetType + description: 数据集类型管理 + - name: Tag + description: 标签管理 + +paths: + /api/data-management/datasets: + get: + tags: [Dataset] + operationId: getDatasets + summary: 获取数据集列表 + description: 分页查询数据集列表,支持按类型、标签等条件筛选 + parameters: + - name: page + in: query + schema: + type: integer + default: 0 + description: 页码,从1开始 + - name: size + in: query + schema: + type: integer + default: 20 + description: 每页大小 + - name: type + in: query + schema: + type: string + description: 数据集类型过滤 + - name: tags + in: query + schema: + type: string + description: 标签过滤,多个标签用逗号分隔 + - name: keyword + in: query + schema: + type: string + description: 关键词搜索(名称、描述) + - name: status + in: query + schema: + type: string + enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED] + description: 数据集状态过滤 + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/PagedDatasetResponse' + '400': + description: 请求参数错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + post: + tags: [Dataset] + operationId: createDataset + summary: 创建数据集 + description: 创建新的数据集 + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateDatasetRequest' + responses: + '200': + description: 创建成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetResponse' + + /api/data-management/datasets/{datasetId}: + get: + tags: [Dataset] + operationId: getDatasetById + summary: 获取数据集详情 + description: 根据ID获取数据集详细信息 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetResponse' + '404': + description: 数据集不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + put: + tags: [Dataset] + summary: 更新数据集 + operationId: updateDataset + description: 更新数据集信息 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateDatasetRequest' + responses: + '200': + description: 更新成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetResponse' + '404': + description: 数据集不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + delete: + tags: [Dataset] + operationId: deleteDataset + summary: 删除数据集 + description: 删除指定的数据集 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + responses: + '204': + description: 删除成功 + '404': + description: 数据集不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /api/data-management/datasets/{datasetId}/files: + get: + tags: [DatasetFile] + summary: 获取数据集文件列表 + operationId: getDatasetFiles + description: 分页获取数据集中的文件列表 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: isWithDirectory + in: query + schema: + type: boolean + default: false + description: 是否包含目录结构 + - name: page + in: query + schema: + type: integer + default: 0 + description: 页码,从0开始 + - name: size + in: query + schema: + type: integer + default: 20 + description: 每页大小 + - name: prefix + in: query + schema: + type: string + default: "" + description: 路径前缀,用于过滤子目录下的文件 + - name: fileType + in: query + schema: + type: string + description: 文件类型过滤 + - name: status + in: query + schema: + type: string + enum: [UPLOADED, PROCESSING, COMPLETED, ERROR] + description: 文件状态过滤 + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/PagedDatasetFileResponseWrapper' + + /api/data-management/datasets/{datasetId}/files/directories: + post: + tags: [ DatasetFile ] + operationId: createDirectory + summary: 在数据集下创建子目录 + description: 在指定数据集下的某个前缀路径中创建一个新的子目录 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateDirectoryRequest' + responses: + '200': + description: 创建成功 + + /api/data-management/datasets/{datasetId}/files/{fileId}: + get: + tags: [DatasetFile] + summary: 获取文件详情 + description: 获取数据集中指定文件的详细信息 + operationId: getDatasetFileById + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: fileId + in: path + required: true + schema: + type: string + description: 文件ID + - name: prefix + in: query + schema: + type: string + default: "" + description: 路径前缀,用于定位文件 + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetFileResponseWrapper' + '404': + description: 文件不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + delete: + tags: [DatasetFile] + summary: 删除文件 + operationId: deleteDatasetFile + description: 从数据集中删除指定文件 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: fileId + in: path + required: true + schema: + type: string + description: 文件ID + - name: prefix + in: query + schema: + type: string + default: "" + description: 路径前缀,用于定位文件 + responses: + '200': + description: 删除成功 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + '404': + description: 文件不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /api/data-management/datasets/{datasetId}/files/{fileId}/download: + get: + tags: [DatasetFile] + operationId: downloadDatasetFile + summary: 下载文件 + description: 下载数据集中的指定文件 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: fileId + in: path + required: true + schema: + type: string + description: 文件ID + responses: + '200': + description: 文件内容 + content: + application/octet-stream: + schema: + type: string + format: binary + + /api/data-management/datasets/{datasetId}/files/download: + get: + tags: [ DatasetFile ] + operationId: downloadDatasetFileAsZip + summary: 下载文件 + description: 下载数据集中全部文件 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + responses: + '200': + description: 文件内容 + content: + application/octet-stream: + schema: + type: string + format: binary + + /api/data-management/datasets/{datasetId}/files/directories/download: + get: + tags: [DatasetFile] + operationId: downloadDirectory + summary: 下载目录 + description: 下载指定目录(压缩为 ZIP) + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: prefix + in: query + schema: + type: string + default: "" + description: 目录前缀路径 + responses: + '200': + description: 成功 + content: + application/zip: + schema: + type: string + format: binary + + delete: + tags: [DatasetFile] + operationId: deleteDirectory + summary: 删除目录 + description: 删除目录及其所有内容 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: prefix + in: query + schema: + type: string + default: "" + description: 目录前缀路径 + responses: + '200': + description: 删除成功 + + put: + tags: [DatasetFile] + operationId: renameDirectory + summary: 重命名目录 + description: 重命名数据集目录 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RenameDirectoryRequest' + responses: + '200': + description: 重命名成功 + + /api/data-management/datasets/{datasetId}/files/{fileId}/rename: + put: + tags: [DatasetFile] + operationId: renameFile + summary: 重命名文件 + description: 重命名数据集文件 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + - name: fileId + in: path + required: true + schema: + type: string + description: 文件ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/RenameFileRequest' + responses: + '200': + description: 重命名成功 + + /api/data-management/datasets/{datasetId}/files/upload/add: + post: + tags: [ DatasetFile ] + operationId: addFilesToDataset + summary: 添加文件到数据集(仅创建数据库记录) + description: 将指定源文件路径列表添加到数据集,仅在数据库中创建记录,不执行物理文件系统操作。 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/AddFilesRequest' + responses: + '200': + description: 添加成功,返回创建的文件记录列表 + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/DatasetFileResponse' + + /api/data-management/datasets/{datasetId}/files/upload/copy: + post: + tags: [DatasetFile] + operationId: copyFilesToDataset + summary: 拷贝文件到数据集 + description: 将指定路径中的文件拷贝到数据集目录下 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CopyFilesRequest' + responses: + '200': + description: 拷贝成功,返回创建的文件记录列表 + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/DatasetFileResponse' + + /api/data-management/datasets/{datasetId}/files/upload/pre-upload: + post: + tags: [ DatasetFile ] + operationId: preUpload + summary: 切片上传预上传 + description: 预上传接口,返回后续分片上传所需的请求ID + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UploadFilesPreRequest' + responses: + '200': + description: 预上传成功,返回请求ID + content: + application/json: + schema: + $ref: '#/components/schemas/StringResponseWrapper' + + /api/data-management/datasets/{datasetId}/files/upload/chunk: + post: + tags: [ DatasetFile ] + operationId: chunkUpload + summary: 切片上传 + description: 使用预上传返回的请求ID进行分片上传 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + requestBody: + required: true + content: + multipart/form-data: + schema: + $ref: '#/components/schemas/UploadFileRequest' + responses: + '200': + description: 上传成功 + + /api/data-management/dataset-types: + get: + operationId: getDatasetTypes + tags: [DatasetType] + summary: 获取数据集类型列表 + description: 获取所有支持的数据集类型 + responses: + '200': + description: 成功 + content: + application/json: + schema: + type: array + items: + $ref: '#/components/schemas/DatasetTypeResponse' + + /api/data-management/tags: + get: + tags: [Tag] + operationId: getTags + summary: 获取标签列表 + description: 获取所有可用的标签 + parameters: + - name: keyword + in: query + schema: + type: string + description: 标签名称关键词搜索 + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/TagListResponse' + + post: + tags: [Tag] + operationId: createTag + summary: 创建标签 + description: 创建新的标签 + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/CreateTagRequest' + responses: + '200': + description: 创建成功 + content: + application/json: + schema: + $ref: '#/components/schemas/TagResponseWrapper' + '400': + description: 请求参数错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + put: + tags: [Tag] + operationId: updateTag + summary: 更新标签 + description: 更新现有标签信息 + requestBody: + required: true + content: + application/json: + schema: + $ref: '#/components/schemas/UpdateTagRequest' + responses: + '200': + description: 更新成功 + content: + application/json: + schema: + $ref: '#/components/schemas/TagResponseWrapper' + + delete: + tags: [Tag] + operationId: deleteTag + summary: 删除标签 + description: 删除指定的标签(批量删除) + parameters: + - name: ids + in: query + required: true + schema: + type: array + items: + type: string + maxItems: 10 + description: 标签ID列表,最多10个 + style: form + explode: false + responses: + '200': + description: 删除成功 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + '400': + description: 请求参数错误 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /api/data-management/datasets/{datasetId}/lineage: + get: + tags: [Dataset] + operationId: getDatasetLineage + summary: 获取数据集血缘信息 + description: 获取数据集的血缘关系信息 + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + format: uuid + description: 数据集ID + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetLineage' + '404': + description: 数据集不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /api/data-management/datasets/{datasetId}/statistics: + get: + tags: [Dataset] + operationId: getDatasetStatistics + summary: 获取数据集统计信息 + description: 获取数据集的统计信息(文件数量、大小、完成度等) + parameters: + - name: datasetId + in: path + required: true + schema: + type: string + description: 数据集ID + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/DatasetStatisticsResponseWrapper' + '404': + description: 数据集不存在 + content: + application/json: + schema: + $ref: '#/components/schemas/ErrorResponse' + + /api/data-management/datasets/statistics: + get: + tags: [Dataset] + operationId: getAllDatasetStatistics + summary: 获取所有数据集统计信息 + description: 获取所有数据集的统计信息汇总 + responses: + '200': + description: 成功 + content: + application/json: + schema: + $ref: '#/components/schemas/AllDatasetStatisticsResponseWrapper' + +components: + schemas: + PagedDatasetResponse: + type: object + properties: + content: + type: array + items: + $ref: '#/components/schemas/DatasetResponse' + page: + type: integer + description: 当前页码 + size: + type: integer + description: 每页大小 + totalElements: + type: integer + description: 总元素数 + totalPages: + type: integer + description: 总页数 + first: + type: boolean + description: 是否为第一页 + last: + type: boolean + description: 是否为最后一页 + number: + type: integer + description: 页码 + + DatasetResponse: + type: object + properties: + id: + type: string + description: 数据集ID + name: + type: string + description: 数据集名称 + description: + type: string + description: 数据集描述 + datasetType: + type: string + description: 数据集类型 + status: + type: string + description: 数据集状态 + tags: + type: array + items: + $ref: '#/components/schemas/TagResponse' + description: 标签列表 + retentionDays: + type: integer + description: 数据集保留天数 + dataSource: + type: string + description: 数据源 + targetLocation: + type: string + description: 目标位置 + fileCount: + type: integer + description: 文件数量 + totalSize: + type: integer + format: int64 + description: 总大小(字节) + completionRate: + type: number + format: float + description: 完成率(0-100) + createdAt: + type: string + format: date-time + description: 创建时间 + updatedAt: + type: string + format: date-time + description: 更新时间 + createdBy: + type: string + description: 创建者 + updatedBy: + type: string + description: 更新者 + distribution: + type: object + additionalProperties: + type: object + additionalProperties: + type: integer + format: int64 + description: 分布信息 + pvcName: + type: string + description: 数据集pvc名称 + + CreateDatasetRequest: + type: object + required: + - name + - datasetType + properties: + name: + type: string + description: 数据集名称 + minLength: 1 + maxLength: 255 + description: + type: string + description: 数据集描述 + maxLength: 500 + datasetType: + type: string + description: 数据集类型,取值范围为TEXT/IMAGE/VIDEO/AUDIO/OTHER + enum: [TEXT, IMAGE, VIDEO, AUDIO, OTHER] + tags: + type: array + items: + type: string + description: 标签列表 + maxItems: 20 + dataSource: + type: string + description: 数据源 + maxLength: 255 + retentionDays: + type: integer + description: 保留天数 + minimum: 0 + status: + type: string + enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED] + description: 数据集状态 + + UpdateDatasetRequest: + type: object + required: + - name + properties: + name: + type: string + description: 数据集名称 + maxLength: 255 + description: + type: string + description: 数据集描述 + maxLength: 500 + dataSource: + type: string + description: 数据源 + maxLength: 255 + tags: + type: array + items: + type: string + description: 标签列表 + maxItems: 20 + status: + type: string + enum: [DRAFT, ACTIVE, PROCESSING, ARCHIVED, PUBLISHED, DEPRECATED] + description: 数据集状态 + + UploadFilesPreRequest: + type: object + description: 切片上传预上传请求 + properties: + hasArchive: + type: boolean + description: 是否为压缩包上传 + default: false + totalFileNum: + type: integer + format: int32 + minimum: 1 + description: 总文件数量 + totalSize: + type: integer + format: int64 + minimum: 1 + description: 总文件大小(字节) + prefix: + type: string + description: 目标子目录前缀,例如 "images/",为空表示数据集根目录 + maxLength: 500 + required: [ totalFileNum ] + + AddFilesRequest: + type: object + description: 将源文件路径添加到数据集的请求 + properties: + files: + type: array + description: 文件列表 + items: + $ref: '#/components/schemas/FileRequest' + minItems: 1 + maxItems: 1000 + softAdd: + type: boolean + description: 如果为 true,则仅在数据库中创建记录(默认 false) + default: false + prefix: + type: string + description: 目标子目录前缀 + maxLength: 500 + default: "" + required: + - files + + FileRequest: + type: object + description: 文件请求对象 + properties: + filePath: + type: string + description: 文件路径 + maxLength: 1000 + metadata: + type: object + additionalProperties: true + description: 文件元数据 + required: + - filePath + + CreateDirectoryRequest: + type: object + description: 创建数据集子目录请求 + required: + - directoryName + properties: + parentPrefix: + type: string + description: 父级前缀路径,例如 "images/",为空表示数据集根目录 + directoryName: + type: string + description: 新建目录名称 + maxLength: 255 + + UploadFileRequest: + type: object + description: 分片上传请求 + properties: + reqId: + type: string + description: 预上传返回的请求ID + fileNo: + type: integer + format: int32 + description: 文件编号(批量中的第几个) + minimum: 0 + fileName: + type: string + description: 文件名称 + maxLength: 255 + totalChunkNum: + type: integer + format: int32 + description: 文件总分片数量 + minimum: 1 + chunkNo: + type: integer + format: int32 + description: 当前分片编号(从1开始) + minimum: 1 + file: + type: string + format: binary + description: 分片二进制内容 + checkSumHex: + type: string + description: 分片校验和(十六进制字符串,64位) + pattern: '^[0-9a-fA-F]{64}$' + required: [ reqId, fileNo, fileName, totalChunkNum, chunkNo, file ] + + DatasetTypeResponse: + type: object + properties: + code: + type: string + description: 类型编码 + name: + type: string + description: 类型名称 + description: + type: string + description: 类型描述 + supportedFormats: + type: array + items: + type: string + description: 支持的文件格式 + icon: + type: string + description: 图标 + + PagedDatasetFileResponse: + type: object + properties: + content: + type: array + items: + $ref: '#/components/schemas/DatasetFileResponse' + page: + type: integer + description: 当前页码 + size: + type: integer + description: 每页大小 + totalElements: + type: integer + description: 总元素数 + totalPages: + type: integer + description: 总页数 + first: + type: boolean + description: 是否为第一页 + last: + type: boolean + description: 是否为最后一页 + number: + type: integer + description: 页码 + + DatasetFileResponse: + type: object + properties: + id: + type: string + description: 文件ID + fileName: + type: string + description: 文件名 + originalName: + type: string + description: 原始文件名 + fileType: + type: string + description: 文件类型 + fileSize: + type: integer + format: int64 + description: 文件大小(字节) + status: + type: string + enum: [UPLOADED, PROCESSING, COMPLETED, ERROR] + description: 文件状态 + description: + type: string + description: 文件描述 + filePath: + type: string + description: 文件路径 + tags: + type: string + description: 文件标签(JSON 字符串) + tagsUpdatedAt: + type: string + format: date-time + description: 标签更新时间 + metadata: + type: string + description: 文件元数据(包含标注信息等,JSON 字符串) + uploadTime: + type: string + format: date-time + description: 上传时间 + lastAccessTime: + type: string + format: date-time + description: 最后更新时间 + uploadedBy: + type: string + description: 上传者 + directory: + type: boolean + description: 是否为目录 + fileCount: + type: integer + format: int64 + description: 目录文件数量 + + TagResponse: + type: object + properties: + id: + type: string + description: 标签ID + name: + type: string + description: 标签名称 + color: + type: string + description: 标签颜色 + description: + type: string + description: 标签描述 + usageCount: + type: integer + description: 使用次数 + category: + type: string + description: 标签分类 + + CreateTagRequest: + type: object + required: + - name + properties: + name: + type: string + description: 标签名称 + minLength: 1 + maxLength: 100 + color: + type: string + description: 标签颜色 + pattern: '^#[0-9A-Fa-f]{6}$' + description: + type: string + description: 标签描述 + maxLength: 500 + category: + type: string + description: 标签分类 + maxLength: 50 + + DatasetStatisticsResponse: + type: object + properties: + totalFiles: + type: integer + description: 总文件数 + completedFiles: + type: integer + description: 已完成文件数 + totalSize: + type: integer + format: int64 + description: 总大小(字节) + completionRate: + type: number + format: float + description: 完成率(0-100) + fileTypeDistribution: + type: object + additionalProperties: + type: integer + description: 文件类型分布 + statusDistribution: + type: object + additionalProperties: + type: integer + description: 状态分布 + + ErrorResponse: + type: object + properties: + error: + type: string + description: 错误代码 + message: + type: string + description: 错误消息 + timestamp: + type: string + format: date-time + description: 错误时间 + path: + type: string + description: 请求路径 + + CopyFilesRequest: + type: object + description: 拷贝文件到数据集的请求 + properties: + sourcePaths: + type: array + items: + type: string + description: 源文件路径列表(相对或绝对路径),每个元素表示一个要添加的文件或目录路径 + required: + - sourcePaths + + UpdateTagRequest: + type: object + description: 更新标签请求 + required: + - id + properties: + id: + type: string + description: 标签ID + name: + type: string + description: 标签名称 + maxLength: 100 + color: + type: string + description: 标签颜色,十六进制格式 + pattern: '^#[0-9A-Fa-f]{6}$' + description: + type: string + description: 标签描述 + maxLength: 500 + + RenameFileRequest: + type: object + description: 重命名数据集文件请求 + required: + - newName + properties: + newName: + type: string + description: 新的文件名称(不包含后缀) + maxLength: 255 + + RenameDirectoryRequest: + type: object + description: 重命名数据集目录请求 + required: + - prefix + - newName + properties: + prefix: + type: string + description: 目录前缀,例如 "images/",与列表/删除目录接口保持一致 + maxLength: 500 + newName: + type: string + description: 新的目录名称 + maxLength: 255 + + DatasetLineage: + type: object + description: 数据集血缘信息 + properties: + upstreamDatasets: + type: array + description: 上游数据集 + items: + $ref: '#/components/schemas/DatasetResponse' + downstreamDatasets: + type: array + description: 下游数据集 + items: + $ref: '#/components/schemas/DatasetResponse' + tasks: + type: array + description: 相关任务 + items: + type: object + properties: + id: + type: string + name: + type: string + type: + type: string + + AllDatasetStatisticsResponse: + type: object + description: 所有数据集统计信息汇总 + properties: + totalDatasets: + type: integer + description: 总数据集数量 + totalFiles: + type: integer + description: 总文件数量 + totalSize: + type: integer + format: int64 + description: 总大小(字节) + datasetsByType: + type: object + additionalProperties: + type: integer + description: 按类型分组的统计 + datasetsByStatus: + type: object + additionalProperties: + type: integer + description: 按状态分组的统计 + + TagListResponse: + type: object + description: 标签列表响应 + properties: + code: + type: integer + description: 响应代码 + message: + type: string + description: 响应消息 + data: + type: array + items: + $ref: '#/components/schemas/TagResponse' + description: 标签列表 + + DatasetStatisticsResponseWrapper: + type: object + description: 数据集统计信息响应包装器 + properties: + code: + type: integer + description: 响应代码 + message: + type: string + description: 响应消息 + data: + $ref: '#/components/schemas/DatasetStatisticsResponse' + + AllDatasetStatisticsResponseWrapper: + type: object + description: 所有数据集统计信息响应包装器 + properties: + code: + type: integer + description: 响应代码 + message: + type: string + description: 响应消息 + data: + $ref: '#/components/schemas/AllDatasetStatisticsResponse' + + PagedDatasetFileResponseWrapper: + type: object + description: 数据集文件分页响应包装器 + properties: + code: + type: integer + description: 响应代码 + message: + type: string + description: 响应消息 + data: + $ref: '#/components/schemas/PagedDatasetFileResponse' + + DatasetFileResponseWrapper: + type: object + description: 数据集文件响应包装器 + properties: + code: + type: integer + description: 响应代码 + message: + type: string + description: 响应消息 + data: + $ref: '#/components/schemas/DatasetFileResponse' + + StringResponseWrapper: + type: object + description: 字符串响应包装器 + properties: + code: + type: integer + description: 响应代码 + message: + type: string + description: 响应消息 + data: + type: string + description: 响应数据 + + TagResponseWrapper: + type: object + description: 标签响应包装器 + properties: + code: + type: integer + description: 响应代码 + message: + type: string + description: 响应消息 + data: + $ref: '#/components/schemas/TagResponse' diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/exception/DataManagementErrorCode.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/exception/DataManagementErrorCode.java index c1d941e86..9c5de7a5a 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/exception/DataManagementErrorCode.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/infrastructure/exception/DataManagementErrorCode.java @@ -1,47 +1,87 @@ -package com.datamate.datamanagement.infrastructure.exception; - -import com.datamate.common.infrastructure.exception.ErrorCode; -import lombok.AllArgsConstructor; -import lombok.Getter; - -/** - * 数据管理模块错误码 - * - * @author dallas - * @since 2025-10-20 - */ -@Getter -@AllArgsConstructor -public enum DataManagementErrorCode implements ErrorCode { - /** - * 数据集不存在 - */ - DATASET_NOT_FOUND("data_management.0001", "数据集不存在"), - /** - * 数据集已存在 - */ - DATASET_ALREADY_EXISTS("data_management.0002", "数据集已存在"), - /** - * 数据集状态错误 - */ - DATASET_STATUS_ERROR("data_management.0003", "数据集状态错误"), - /** - * 数据集标签不存在 - */ - DATASET_TAG_NOT_FOUND("data_management.0004", "数据集标签不存在"), - /** - * 数据集标签已存在 - */ - DATASET_TAG_ALREADY_EXISTS("data_management.0005", "数据集标签已存在"), - /** - * 数据集文件已存在 - */ - DATASET_FILE_ALREADY_EXISTS("data_management.0006", "数据集文件已存在"), - /** - * 目录不存在 - */ - DIRECTORY_NOT_FOUND("data_management.0007", "目录不存在"); - - private final String code; - private final String message; -} +package com.datamate.datamanagement.infrastructure.exception; + +import com.datamate.common.infrastructure.exception.ErrorCode; +import lombok.AllArgsConstructor; +import lombok.Getter; + +/** + * 数据管理模块错误码 + * + * @author dallas + * @since 2025-10-20 + */ +@Getter +@AllArgsConstructor +public enum DataManagementErrorCode implements ErrorCode { + /** + * 数据集不存在 + */ + DATASET_NOT_FOUND("data_management.0001", "数据集不存在"), + /** + * 数据集已存在 + */ + DATASET_ALREADY_EXISTS("data_management.0002", "数据集已存在"), + /** + * 数据集状态错误 + */ + DATASET_STATUS_ERROR("data_management.0003", "数据集状态错误"), + /** + * 数据集标签不存在 + */ + DATASET_TAG_NOT_FOUND("data_management.0004", "数据集标签不存在"), + /** + * 数据集标签已存在 + */ + DATASET_TAG_ALREADY_EXISTS("data_management.0005", "数据集标签已存在"), + /** + * 数据集文件已存在 + */ + DATASET_FILE_ALREADY_EXISTS("data_management.0006", "数据集文件已存在"), + /** + * 目录不存在 + */ + DIRECTORY_NOT_FOUND("data_management.0007", "目录不存在"), + /** + * 数据集名称长度不能超过255个字符 + */ + DATASET_NAME_TOO_LONG("data_management.0008", "数据集名称长度不能超过255个字符"), + /** + * 数据集类型不合法 + */ + DATASET_TYPE_INVALID("data_management.0009", "数据集类型不合法"), + /** + * 数据集状态不合法 + */ + DATASET_STATUS_INVALID("data_management.0010", "数据集状态不合法"), + /** + * 标签名称长度不能超过100个字符 + */ + TAG_NAME_TOO_LONG("data_management.0011", "标签名称长度不能超过100个字符"), + /** + * 标签名称已存在 + */ + TAG_NAME_DUPLICATE("data_management.0012", "标签名称已存在"), + /** + * 标签颜色格式不正确,应为十六进制颜色代码 + */ + TAG_COLOR_INVALID("data_management.0013", "标签颜色格式不正确,应为十六进制颜色代码"), + /** + * 保留天数必须为非负整数 + */ + RETENTION_DAYS_INVALID("data_management.0014", "保留天数必须为非负整数"), + /** + * 路径长度不能超过限制 + */ + PATH_TOO_LONG("data_management.0015", "路径长度不能超过限制"), + /** + * 文件名包含非法字符 + */ + FILE_NAME_INVALID("data_management.0016", "文件名包含非法字符"), + /** + * 路径前缀不能以点开头 + */ + PREFIX_INVALID("data_management.0017", "路径前缀不能以点开头"); + + private final String code; + private final String message; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/AddFilesRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/AddFilesRequest.java index 89390577f..094510bdb 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/AddFilesRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/AddFilesRequest.java @@ -1,5 +1,10 @@ package com.datamate.datamanagement.interfaces.dto; +import com.datamate.datamanagement.interfaces.validation.ValidPath; +import jakarta.validation.Valid; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.Size; import lombok.AllArgsConstructor; import lombok.Getter; import lombok.NoArgsConstructor; @@ -34,6 +39,8 @@ public AddFilesRequest(List paths) { @Getter @Setter public static class FileRequest { + @NotBlank(message = "文件路径不能为空") + @Size(max = 1000, message = "文件路径长度不能超过1000个字符") private String filePath; private Map metadata; @@ -41,8 +48,12 @@ public static class FileRequest { private boolean softAdd; + @ValidPath() private String prefix = ""; + @NotEmpty(message = "文件列表不能为空") + @Size(max = 1000, message = "文件数量不能超过1000个") + @Valid private List files; public boolean isValidPrefix() { diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CopyFilesRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CopyFilesRequest.java index 83234bae4..c97c9d4eb 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CopyFilesRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CopyFilesRequest.java @@ -1,6 +1,7 @@ package com.datamate.datamanagement.interfaces.dto; import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.Size; import java.util.List; @@ -10,5 +11,8 @@ * @author dallas * @since 2025-11-13 */ -public record CopyFilesRequest(@NotEmpty List sourcePaths) { +public record CopyFilesRequest( + @NotEmpty(message = "源文件路径列表不能为空") + @Size(max = 1000, message = "文件数量不能超过1000个") + List sourcePaths) { } diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDatasetRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDatasetRequest.java index 6e8534c18..3ff1ba6b7 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDatasetRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDatasetRequest.java @@ -1,6 +1,8 @@ package com.datamate.datamanagement.interfaces.dto; +import com.datamate.datamanagement.common.enums.DatasetStatusType; import com.datamate.datamanagement.common.enums.DatasetType; +import jakarta.validation.constraints.Min; import jakarta.validation.constraints.NotBlank; import jakarta.validation.constraints.NotNull; import jakarta.validation.constraints.Size; @@ -21,12 +23,12 @@ @AllArgsConstructor public class CreateDatasetRequest { /** 数据集名称 */ - @Size(min = 1, max = 100) + @Size(min = 1, max = 255, message = "数据集名称长度不能超过255个字符") @NotBlank(message = "数据集名称不能为空") @McpToolParam(description = "数据集名称") private String name; /** 数据集描述 */ - @Size(max = 500) + @Size(max = 500, message = "数据集描述长度不能超过500个字符") @McpToolParam(description = "数据集描述", required = false) private String description; /** 数据集类型 */ @@ -34,15 +36,18 @@ public class CreateDatasetRequest { @McpToolParam(description = "数据集类型,取值范围为TEXT/IMAGE/VIDEO/AUDIO/OTHER") private DatasetType datasetType; /** 标签列表 */ + @Size(max = 20, message = "标签数量不能超过20个") @McpToolParam(description = "标签列表", required = false) private List tags; /** 数据源 */ + @Size(max = 255, message = "数据源长度不能超过255个字符") @McpToolParam(description = "数据源", required = false) private String dataSource; /** 保留天数 */ + @Min(value = 0, message = "保留天数必须为非负整数") @McpToolParam(description = "保留天数", required = false) private Integer retentionDays; /** 数据集状态 */ @McpToolParam(description = "数据集状态", required = false) - private String status; + private DatasetStatusType status; } diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDirectoryRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDirectoryRequest.java index 441cc74a0..cc9b02d3b 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDirectoryRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateDirectoryRequest.java @@ -1,20 +1,26 @@ -package com.datamate.datamanagement.interfaces.dto; - -import jakarta.validation.constraints.NotBlank; -import lombok.Getter; -import lombok.Setter; - -/** - * 创建数据集子目录请求 - */ -@Getter -@Setter -public class CreateDirectoryRequest { - - /** 父级前缀路径,例如 "images/",为空表示数据集根目录 */ - private String parentPrefix; - - /** 新建目录名称 */ - @NotBlank - private String directoryName; -} +package com.datamate.datamanagement.interfaces.dto; + +import com.datamate.datamanagement.interfaces.validation.ValidFileName; +import com.datamate.datamanagement.interfaces.validation.ValidPath; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.Size; +import lombok.Getter; +import lombok.Setter; + +/** + * 创建数据集子目录请求 + */ +@Getter +@Setter +public class CreateDirectoryRequest { + + /** 父级前缀路径,例如 "images/",为空表示数据集根目录 */ + @ValidPath() + private String parentPrefix; + + /** 新建目录名称 */ + @NotBlank(message = "目录名称不能为空") + @ValidFileName + @Size(max = 255, message = "目录名称长度不能超过255个字符") + private String directoryName; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateTagRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateTagRequest.java index dca22bb94..2b35ff3a0 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateTagRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/CreateTagRequest.java @@ -1,5 +1,8 @@ package com.datamate.datamanagement.interfaces.dto; +import com.datamate.datamanagement.interfaces.validation.ValidHexColor; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.Size; import lombok.Getter; import lombok.Setter; @@ -10,9 +13,16 @@ @Setter public class CreateTagRequest { /** 标签名称 */ + @NotBlank(message = "标签名称不能为空") + @Size(min = 1, max = 100, message = "标签名称长度不能超过100个字符") private String name; /** 标签颜色 */ + @ValidHexColor private String color; /** 标签描述 */ + @Size(max = 500, message = "标签描述长度不能超过500个字符") private String description; + /** 标签分类 */ + @Size(max = 50, message = "标签分类长度不能超过50个字符") + private String category; } diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetPagingQuery.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetPagingQuery.java index e9bbb4c77..22fd8eed2 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetPagingQuery.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/DatasetPagingQuery.java @@ -3,6 +3,7 @@ import com.datamate.common.interfaces.PagingQuery; import com.datamate.datamanagement.common.enums.DatasetStatusType; import com.datamate.datamanagement.common.enums.DatasetType; +import jakarta.validation.constraints.Size; import lombok.AllArgsConstructor; import lombok.Getter; import lombok.NoArgsConstructor; @@ -30,12 +31,14 @@ public class DatasetPagingQuery extends PagingQuery { /** * 标签名过滤 */ + @Size(max = 10, message = "过滤标签数量不能超过10个") @McpToolParam(description = "标签名过滤", required = false) private List tags = new ArrayList<>(); /** * 关键词搜索(名称或描述) */ + @Size(max = 100, message = "关键词长度不能超过100个字符") @McpToolParam(description = "关键词搜索(名称或描述)", required = false) private String keyword; diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameDirectoryRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameDirectoryRequest.java index 76c07aec0..7fd44a2da 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameDirectoryRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameDirectoryRequest.java @@ -1,21 +1,27 @@ -package com.datamate.datamanagement.interfaces.dto; - -import jakarta.validation.constraints.NotBlank; -import lombok.Getter; -import lombok.Setter; - -/** - * 重命名数据集目录请求 - */ -@Getter -@Setter -public class RenameDirectoryRequest { - - /** 目录前缀,例如 "images/",与列表/删除目录接口保持一致 */ - @NotBlank - private String prefix; - - /** 新的目录名称 */ - @NotBlank - private String newName; -} +package com.datamate.datamanagement.interfaces.dto; + +import com.datamate.datamanagement.interfaces.validation.ValidFileName; +import com.datamate.datamanagement.interfaces.validation.ValidPath; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.Size; +import lombok.Getter; +import lombok.Setter; + +/** + * 重命名数据集目录请求 + */ +@Getter +@Setter +public class RenameDirectoryRequest { + + /** 目录前缀,例如 "images/",与列表/删除目录接口保持一致 */ + @NotBlank(message = "目录前缀不能为空") + @ValidPath(maxLength = 500) + private String prefix; + + /** 新的目录名称 */ + @NotBlank(message = "新目录名称不能为空") + @ValidFileName + @Size(max = 255, message = "目录名称长度不能超过255个字符") + private String newName; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameFileRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameFileRequest.java index 602dc8652..ef4dbd9de 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameFileRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/RenameFileRequest.java @@ -1,17 +1,21 @@ -package com.datamate.datamanagement.interfaces.dto; - -import jakarta.validation.constraints.NotBlank; -import lombok.Getter; -import lombok.Setter; - -/** - * 重命名数据集文件请求 - */ -@Getter -@Setter -public class RenameFileRequest { - - /** 新的文件名称(不包含后缀) */ - @NotBlank - private String newName; -} +package com.datamate.datamanagement.interfaces.dto; + +import com.datamate.datamanagement.interfaces.validation.ValidFileName; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.Size; +import lombok.Getter; +import lombok.Setter; + +/** + * 重命名数据集文件请求 + */ +@Getter +@Setter +public class RenameFileRequest { + + /** 新的文件名称(不包含后缀) */ + @NotBlank(message = "新文件名不能为空") + @ValidFileName + @Size(max = 255, message = "文件名称长度不能超过255个字符") + private String newName; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateDatasetRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateDatasetRequest.java index 91070f391..dc344df6a 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateDatasetRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateDatasetRequest.java @@ -15,15 +15,17 @@ @Setter public class UpdateDatasetRequest { /** 数据集名称 */ - @Size(min = 1, max = 100) + @Size(min = 1, max = 255, message = "数据集名称长度不能超过255个字符") @NotBlank(message = "数据集名称不能为空") private String name; /** 数据集描述 */ - @Size(max = 500) + @Size(max = 500, message = "数据集描述长度不能超过500个字符") private String description; /** 归集任务id */ + @Size(max = 255, message = "数据源长度不能超过255个字符") private String dataSource; /** 标签列表 */ + @Size(max = 20, message = "标签数量不能超过20个") private List tags; /** 数据集状态 */ private DatasetStatusType status; diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateTagRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateTagRequest.java index 1fb6d13c7..3348a4516 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateTagRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UpdateTagRequest.java @@ -1,5 +1,8 @@ package com.datamate.datamanagement.interfaces.dto; +import com.datamate.datamanagement.interfaces.validation.ValidHexColor; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.Size; import lombok.Getter; import lombok.Setter; @@ -10,11 +13,15 @@ @Setter public class UpdateTagRequest { /** 标签 ID */ + @NotBlank(message = "标签ID不能为空") private String id; /** 标签名称 */ + @Size(max = 100, message = "标签名称长度不能超过100个字符") private String name; /** 标签颜色 */ + @ValidHexColor private String color; /** 标签描述 */ + @Size(max = 500, message = "标签描述长度不能超过500个字符") private String description; } diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFileRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFileRequest.java index e8c2b6984..6713ea5c3 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFileRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFileRequest.java @@ -1,5 +1,11 @@ package com.datamate.datamanagement.interfaces.dto; +import com.datamate.datamanagement.interfaces.validation.ValidFileName; +import jakarta.validation.constraints.Min; +import jakarta.validation.constraints.NotBlank; +import jakarta.validation.constraints.NotNull; +import jakarta.validation.constraints.Pattern; +import jakarta.validation.constraints.Size; import lombok.Getter; import lombok.Setter; import org.springframework.web.multipart.MultipartFile; @@ -12,23 +18,32 @@ @Setter public class UploadFileRequest { /** 预上传返回的id,用来确认同一个任务 */ + @NotBlank(message = "请求ID不能为空") private String reqId; /** 文件编号,用于标识批量上传中的第几个文件 */ + @Min(value = 0, message = "文件编号必须为非负整数") private int fileNo; /** 文件名称 */ + @NotBlank(message = "文件名称不能为空") + @ValidFileName + @Size(max = 255, message = "文件名称长度不能超过255个字符") private String fileName; /** 文件总分块数量 */ + @Min(value = 1, message = "总分块数量必须大于0") private int totalChunkNum; /** 当前分块编号,从1开始 */ + @Min(value = 1, message = "分块编号必须大于0") private int chunkNo; /** 上传的文件分块内容 */ + @NotNull(message = "文件内容不能为空") private MultipartFile file; /** 文件分块的校验和(十六进制字符串),用于验证文件完整性 */ + @Pattern(regexp = "^[0-9a-fA-F]{64}$", message = "校验和格式不正确,应为64位十六进制字符串") private String checkSumHex; } diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFilesPreRequest.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFilesPreRequest.java index 9b7ced05e..85d8372e5 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFilesPreRequest.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/dto/UploadFilesPreRequest.java @@ -1,25 +1,28 @@ -package com.datamate.datamanagement.interfaces.dto; - -import jakarta.validation.constraints.Min; -import lombok.Getter; -import lombok.Setter; - -/** - * 切片上传预上传请求 - */ -@Getter -@Setter -public class UploadFilesPreRequest { - /** 是否为压缩包上传 */ - private boolean hasArchive; - - /** 总文件数量 */ - @Min(1) - private int totalFileNum; - - /** 总文件大小 */ - private long totalSize; - - /** 目标子目录前缀,例如 "images/",为空表示数据集根目录 */ - private String prefix; -} +package com.datamate.datamanagement.interfaces.dto; + +import com.datamate.datamanagement.interfaces.validation.ValidPath; +import jakarta.validation.constraints.Min; +import lombok.Getter; +import lombok.Setter; + +/** + * 切片上传预上传请求 + */ +@Getter +@Setter +public class UploadFilesPreRequest { + /** 是否为压缩包上传 */ + private boolean hasArchive; + + /** 总文件数量 */ + @Min(value = 1, message = "总文件数量必须大于0") + private int totalFileNum; + + /** 总文件大小 */ + @Min(value = 1, message = "总文件大小必须大于0") + private long totalSize; + + /** 目标子目录前缀,例如 "images/",为空表示数据集根目录 */ + @ValidPath(maxLength = 500) + private String prefix; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetController.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetController.java index 364921438..5bf8800fe 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetController.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/rest/DatasetController.java @@ -9,11 +9,13 @@ import com.datamate.datamanagement.interfaces.converter.DatasetConverter; import com.datamate.datamanagement.interfaces.dto.*; import jakarta.validation.Valid; +import jakarta.validation.constraints.Pattern; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springaicommunity.mcp.annotation.McpTool; import org.springframework.http.HttpStatus; import org.springframework.http.ResponseEntity; +import org.springframework.validation.annotation.Validated; import org.springframework.web.bind.annotation.*; import java.util.Map; @@ -25,6 +27,7 @@ @RestController @RequiredArgsConstructor @RequestMapping("/data-management/datasets") +@Validated public class DatasetController { private final DatasetApplicationService datasetApplicationService; @@ -60,7 +63,10 @@ public DatasetResponse createDataset(@RequestBody @Valid CreateDatasetRequest cr * @return 数据集响应 */ @GetMapping("/{datasetId}") - public DatasetResponse getDatasetById(@PathVariable("datasetId") String datasetId) { + public DatasetResponse getDatasetById( + @PathVariable("datasetId") + @Pattern(regexp = "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$", message = "数据集ID格式不正确,应为UUID格式") + String datasetId) { DatasetResponse dataset = DatasetConverter.INSTANCE.convertToResponse(datasetApplicationService.getDataset(datasetId)); dataset.setPvcName(datasetApplicationService.getDatasetPvcName()); return dataset; @@ -74,14 +80,20 @@ public DatasetResponse getDatasetById(@PathVariable("datasetId") String datasetI * @return 更新后的数据集响应 */ @PutMapping("/{datasetId}") - public DatasetResponse updateDataset(@PathVariable("datasetId") String datasetId, - @RequestBody UpdateDatasetRequest updateDatasetRequest) { + public DatasetResponse updateDataset( + @PathVariable("datasetId") + @Pattern(regexp = "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$", message = "数据集ID格式不正确,应为UUID格式") + String datasetId, + @RequestBody UpdateDatasetRequest updateDatasetRequest) { Dataset dataset = datasetApplicationService.updateDataset(datasetId, updateDatasetRequest); return DatasetConverter.INSTANCE.convertToResponse(dataset); } @GetMapping("/{datasetId}/lineage") - public DatasetLineage getDatasetLineage(@PathVariable("datasetId") String datasetId) { + public DatasetLineage getDatasetLineage( + @PathVariable("datasetId") + @Pattern(regexp = "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$", message = "数据集ID格式不正确,应为UUID格式") + String datasetId) { return datasetApplicationService.getDatasetLineage(datasetId); } @@ -91,13 +103,18 @@ public DatasetLineage getDatasetLineage(@PathVariable("datasetId") String datase * @param datasetId 数据集ID */ @DeleteMapping("/{datasetId}") - public void deleteDataset(@PathVariable("datasetId") String datasetId) { + public void deleteDataset( + @PathVariable("datasetId") + @Pattern(regexp = "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$", message = "数据集ID格式不正确,应为UUID格式") + String datasetId) { datasetApplicationService.deleteDataset(datasetId); } @GetMapping("/{datasetId}/statistics") public ResponseEntity> getDatasetStatistics( - @PathVariable("datasetId") String datasetId) { + @PathVariable("datasetId") + @Pattern(regexp = "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$", message = "数据集ID格式不正确,应为UUID格式") + String datasetId) { try { Map stats = datasetApplicationService.getDatasetStatistics(datasetId); diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidFileName.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidFileName.java new file mode 100644 index 000000000..ad303acc4 --- /dev/null +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidFileName.java @@ -0,0 +1,26 @@ +package com.datamate.datamanagement.interfaces.validation; + +import jakarta.validation.Constraint; +import jakarta.validation.Payload; + +import java.lang.annotation.*; + +/** + * 文件名校验注解 + * 验证文件名不包含非法字符 + * + * @author DataMate + * @since 2026/02/11 + */ +@Documented +@Constraint(validatedBy = ValidFileNameValidator.class) +@Target({ElementType.FIELD, ElementType.PARAMETER}) +@Retention(RetentionPolicy.RUNTIME) +public @interface ValidFileName { + + String message() default "文件名包含非法字符"; + + Class[] groups() default {}; + + Class[] payload() default {}; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidFileNameValidator.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidFileNameValidator.java new file mode 100644 index 000000000..ae1b27486 --- /dev/null +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidFileNameValidator.java @@ -0,0 +1,43 @@ +package com.datamate.datamanagement.interfaces.validation; + +import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode; +import jakarta.validation.ConstraintValidator; +import jakarta.validation.ConstraintValidatorContext; + +import java.util.regex.Pattern; + +/** + * 文件名校验器 + * + * @author DataMate + * @since 2026/02/11 + */ +public class ValidFileNameValidator implements ConstraintValidator { + + /** + * 文件名正则表达式 + * 不允许包含特殊字符: / \ : * ? " < > | \0 + * 允许字母、数字、中文、常见符号(- _ . space) + */ + private static final Pattern FILE_NAME_PATTERN = Pattern.compile( + "^[^/\\\\:*?\"<>|\\x00]+$" + ); + + @Override + public boolean isValid(String value, ConstraintValidatorContext context) { + if (value == null || value.isEmpty()) { + return true; // 空值由 @NotBlank 等其他注解处理 + } + + boolean isValid = FILE_NAME_PATTERN.matcher(value).matches(); + + if (!isValid) { + context.disableDefaultConstraintViolation(); + context.buildConstraintViolationWithTemplate( + DataManagementErrorCode.FILE_NAME_INVALID.getMessage() + ).addConstraintViolation(); + } + + return isValid; + } +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidHexColor.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidHexColor.java new file mode 100644 index 000000000..a978f3909 --- /dev/null +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidHexColor.java @@ -0,0 +1,26 @@ +package com.datamate.datamanagement.interfaces.validation; + +import jakarta.validation.Constraint; +import jakarta.validation.Payload; + +import java.lang.annotation.*; + +/** + * 十六进制颜色格式校验注解 + * 验证字符串是否符合十六进制颜色格式 (#RRGGBB 或 #RGB) + * + * @author DataMate + * @since 2026/02/11 + */ +@Documented +@Constraint(validatedBy = ValidHexColorValidator.class) +@Target({ElementType.FIELD, ElementType.PARAMETER}) +@Retention(RetentionPolicy.RUNTIME) +public @interface ValidHexColor { + + String message() default "标签颜色格式不正确,应为十六进制颜色代码"; + + Class[] groups() default {}; + + Class[] payload() default {}; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidHexColorValidator.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidHexColorValidator.java new file mode 100644 index 000000000..f071b7e28 --- /dev/null +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidHexColorValidator.java @@ -0,0 +1,43 @@ +package com.datamate.datamanagement.interfaces.validation; + +import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode; +import com.datamate.common.infrastructure.exception.ErrorCode; +import jakarta.validation.ConstraintValidator; +import jakarta.validation.ConstraintValidatorContext; +import org.hibernate.validator.internal.constraintvalidators.bv.size.SizeValidatorForCharSequence; + +import java.util.regex.Pattern; + +/** + * 十六进制颜色格式校验器 + * + * @author DataMate + * @since 2026/02/11 + */ +public class ValidHexColorValidator implements ConstraintValidator { + + /** + * 十六进制颜色正则表达式 + * 支持 #RGB 和 #RRGGBB 格式 + */ + private static final Pattern HEX_COLOR_PATTERN = Pattern.compile("^#[0-9a-fA-F]{6}$"); + + @Override + public boolean isValid(String value, ConstraintValidatorContext context) { + if (value == null) { + return true; // null 值由 @NotBlank 等其他注解处理 + } + + boolean isValid = HEX_COLOR_PATTERN.matcher(value).matches(); + + if (!isValid) { + // 自定义错误消息和错误码 + context.disableDefaultConstraintViolation(); + context.buildConstraintViolationWithTemplate( + DataManagementErrorCode.TAG_COLOR_INVALID.getMessage() + ).addConstraintViolation(); + } + + return isValid; + } +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidPath.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidPath.java new file mode 100644 index 000000000..8eaee9449 --- /dev/null +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidPath.java @@ -0,0 +1,31 @@ +package com.datamate.datamanagement.interfaces.validation; + +import jakarta.validation.Constraint; +import jakarta.validation.Payload; + +import java.lang.annotation.*; + +/** + * 路径格式校验注解 + * 验证路径格式和长度限制 + * + * @author DataMate + * @since 2026/02/11 + */ +@Documented +@Constraint(validatedBy = ValidPathValidator.class) +@Target({ElementType.FIELD, ElementType.PARAMETER}) +@Retention(RetentionPolicy.RUNTIME) +public @interface ValidPath { + + String message() default "路径格式不正确"; + + Class[] groups() default {}; + + Class[] payload() default {}; + + /** + * 最大路径长度,默认500 + */ + int maxLength() default 500; +} diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidPathValidator.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidPathValidator.java new file mode 100644 index 000000000..69bb297b9 --- /dev/null +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/interfaces/validation/ValidPathValidator.java @@ -0,0 +1,66 @@ +package com.datamate.datamanagement.interfaces.validation; + +import com.datamate.datamanagement.infrastructure.exception.DataManagementErrorCode; +import jakarta.validation.ConstraintValidator; +import jakarta.validation.ConstraintValidatorContext; + +import java.util.regex.Pattern; + +/** + * 路径格式校验器 + * + * @author DataMate + * @since 2026/02/11 + */ +public class ValidPathValidator implements ConstraintValidator { + + /** + * 路径正则表达式 + * 不允许以点开头(隐藏文件/目录) + * 不允许包含特殊字符如 \0, <, >, :, ", |, ?, * + */ + private static final Pattern PATH_PATTERN = Pattern.compile( + "^[^<>:\"|?*\\x00]+" // 不以点开头,不包含特殊字符 + ); + + private int maxLength = 500; + + @Override + public void initialize(ValidPath constraintAnnotation) { + this.maxLength = constraintAnnotation.maxLength(); + } + + @Override + public boolean isValid(String value, ConstraintValidatorContext context) { + if (value == null || value.isEmpty()) { + return true; // 空值由其他注解处理 + } + + // 检查长度 + if (value.length() > maxLength) { + context.disableDefaultConstraintViolation(); + context.buildConstraintViolationWithTemplate( + DataManagementErrorCode.PATH_TOO_LONG.getMessage() + ).addConstraintViolation(); + return false; + } + + // 检查是否以点开头 + if (value.startsWith(".")) { + context.disableDefaultConstraintViolation(); + context.buildConstraintViolationWithTemplate( + DataManagementErrorCode.PREFIX_INVALID.getMessage() + ).addConstraintViolation(); + return false; + } + + // 检查是否包含非法字符 + if (!PATH_PATTERN.matcher(value).matches()) { + context.disableDefaultConstraintViolation(); + context.buildConstraintViolationWithTemplate("路径包含非法字符").addConstraintViolation(); + return false; + } + + return true; + } +} diff --git a/frontend/src/pages/DataManagement/Create/CreateDataset.tsx b/frontend/src/pages/DataManagement/Create/CreateDataset.tsx index 2416dede7..7179fe9df 100644 --- a/frontend/src/pages/DataManagement/Create/CreateDataset.tsx +++ b/frontend/src/pages/DataManagement/Create/CreateDataset.tsx @@ -34,7 +34,7 @@ export default function DatasetCreate() { navigate("/data/management/detail/" + data.id); } catch (error) { console.error(error); - message.error(t("dataManagement.messages.createFailed")); + message.error(error.message); return; } };