diff --git a/README-zh.md b/README-zh.md index c051e6f1f..91e443d3a 100644 --- a/README-zh.md +++ b/README-zh.md @@ -35,6 +35,12 @@ - kubernetes (用于部署服务-k8s方式) - Helm (用于部署服务-k8s方式) +### Docker一键部署 +```shell +wget -qO docker-compose.yml https://raw.githubusercontent.com/ModelEngine-Group/DataMate/refs/heads/main/deployment/docker/datamate/docker-compose.yml \ + && REGISTRY=ghcr.io/modelengine-group/ docker compose up -d +``` + ### 拉取代码 ```bash diff --git a/README.md b/README.md index a890a786a..8b30c5973 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,12 @@ If you like this project, please give it a Star⭐️! - Kubernetes (for service deployment - k8s method) - Helm (for service deployment - k8s method) +### Docker Quick deploy +```shell +wget -qO docker-compose.yml https://raw.githubusercontent.com/ModelEngine-Group/DataMate/refs/heads/main/deployment/docker/datamate/docker-compose.yml \ + && REGISTRY=ghcr.io/modelengine-group/ docker compose up -d +``` + ### Clone the Code ```bash diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java index 99d237593..d95f3564f 100644 --- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java +++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java @@ -877,6 +877,10 @@ private void addFile(String sourPath, String targetPath, boolean softAdd) { } try { + if (Files.exists(target) && Files.isSameFile(source, target)) { + return; + } + Path parent = target.getParent(); // 创建目标目录(如果需要) if (parent != null) { diff --git a/frontend/public/config/error-code.json b/frontend/public/config/error-code.json index 8270f4db8..0a7bc6518 100644 --- a/frontend/public/config/error-code.json +++ b/frontend/public/config/error-code.json @@ -11,9 +11,9 @@ "cleaning.0009": "设置解析错误", "cleaning.0010": "任务ID不能为空", "operator.0001": "算子不存在", - "operator.0002": "算子正在使用中", + "operator.0002": "算子被编排于模版中或处在正在进行的任务中,无法删除", "operator.0003": "无法删除预置算子", - "operator.0004": "不支持的文件类型", + "operator.0004": "不支持的文件类型,当前仅支持tar和zip", "operator.0005": "解析算子包失败", "operator.0006": "缺少必要的字段", "400": "请求参数错误", diff --git a/frontend/src/components/SearchControls.tsx b/frontend/src/components/SearchControls.tsx index 878f7fcc0..b57f85ca5 100644 --- a/frontend/src/components/SearchControls.tsx +++ b/frontend/src/components/SearchControls.tsx @@ -196,7 +196,7 @@ export function SearchControls({
- {t('components.searchControls.selectedFilters')} + {t('components.searchControls.filters.label')} {Object.entries(selectedFilters).map(([filterKey, values]) => values.map((value) => { @@ -231,7 +231,7 @@ export function SearchControls({ onClick={handleClearAllFilters} className="text-gray-500 hover:text-gray-700" > - {t('components.searchControls.clearAll')} + {t('components.searchControls.filters.clearAll')}
diff --git a/frontend/src/pages/OperatorMarket/operator.const.tsx b/frontend/src/pages/OperatorMarket/operator.const.tsx index 75d99cb08..14ddfd85c 100644 --- a/frontend/src/pages/OperatorMarket/operator.const.tsx +++ b/frontend/src/pages/OperatorMarket/operator.const.tsx @@ -113,6 +113,7 @@ export const mapOperator = (op: OperatorI, t: (key: string) => string) => { const FUNCTION_CATEGORY_IDS = { cleaning: "8c09476a-a922-418f-a908-733f8a0de521", annotation: "cfa9d8e2-5b5f-4f1e-9f12-1234567890ab", + system: "96a3b07a-3439-4557-a835-525faad60ca3" } as const; const categories = op?.categories || []; @@ -142,7 +143,7 @@ export const mapOperator = (op: OperatorI, t: (key: string) => string) => { }, { label: t("operatorMarket.const.size"), - value: formatBytes(op?.fileSize), + value: categories?.includes(FUNCTION_CATEGORY_IDS.system) ? '-' : formatBytes(op?.fileSize), }, { label: t("operatorMarket.const.language"), diff --git a/runtime/datamate-python/app/module/cleaning/service/cleaning_task_service.py b/runtime/datamate-python/app/module/cleaning/service/cleaning_task_service.py index 9886a18d2..5ce0bb191 100644 --- a/runtime/datamate-python/app/module/cleaning/service/cleaning_task_service.py +++ b/runtime/datamate-python/app/module/cleaning/service/cleaning_task_service.py @@ -180,6 +180,11 @@ async def create_task( await self.operator_instance_repo.insert_instance(db, task_id, request.instance) + # Increment operator usage count + operator_ids = [inst.id for inst in request.instance if inst.id] + if operator_ids: + await self.operator_service.increment_usage_count(operator_ids, db) + all_operators = await self.operator_service.get_operators(db=db, page=0, size=1000, categories=[], keyword=None, is_star=None) operator_map = {op.id: op for op in all_operators} diff --git a/runtime/datamate-python/app/module/operator/constants.py b/runtime/datamate-python/app/module/operator/constants.py index e6d83ee9e..188e6a061 100644 --- a/runtime/datamate-python/app/module/operator/constants.py +++ b/runtime/datamate-python/app/module/operator/constants.py @@ -30,6 +30,8 @@ CATEGORY_DATAMATE_ID = "431e7798-5426-4e1a-aae6-b9905a836b34" CATEGORY_DATA_JUICER_ID = "79b385b4-fde8-4617-bcba-02a176938996" CATEGORY_OTHER_VENDOR_ID = "f00eaa3e-96c1-4de4-96cd-9848ef5429ec" +CATEGORY_CLEANING_ID = "8c09476a-a922-418f-a908-733f8a0de521" +CATEGORY_ANNOTATION_ID = "cfa9d8e2-5b5f-4f1e-9f12-1234567890ab" # Category mapping CATEGORY_MAP = { @@ -42,6 +44,8 @@ "all": CATEGORY_ALL_ID, "datamate": CATEGORY_DATAMATE_ID, "data-juicer": CATEGORY_DATA_JUICER_ID, + "cleaning": CATEGORY_CLEANING_ID, + "annotation": CATEGORY_ANNOTATION_ID, } # File paths diff --git a/runtime/datamate-python/app/module/operator/parsers/abstract_parser.py b/runtime/datamate-python/app/module/operator/parsers/abstract_parser.py index 50ee98cf0..e861f2f70 100644 --- a/runtime/datamate-python/app/module/operator/parsers/abstract_parser.py +++ b/runtime/datamate-python/app/module/operator/parsers/abstract_parser.py @@ -8,7 +8,8 @@ from typing import Dict, Any, Optional from app.module.operator.schema import OperatorDto, OperatorReleaseDto -from app.module.operator.constants import CATEGORY_MAP, CATEGORY_OTHER_VENDOR_ID, CATEGORY_CUSTOMIZED_ID +from app.module.operator.constants import CATEGORY_MAP, CATEGORY_OTHER_VENDOR_ID, CATEGORY_CUSTOMIZED_ID, \ + CATEGORY_CLEANING_ID from app.module.operator.exceptions import FieldNotFoundError @@ -86,12 +87,22 @@ def parse_yaml( operator.releases = [operator_release] # Build categories - categories = [ + categories = [] + types = content.get("types") + if isinstance(types, list): + for t in types: + if self._to_lower(t) in CATEGORY_MAP: + categories.append(CATEGORY_MAP[self._to_lower(t)]) + if len(categories) == 0: + categories.append(CATEGORY_CLEANING_ID) + + categories.extend([ CATEGORY_MAP.get(self._to_lower(content.get("language")), ""), CATEGORY_MAP.get(self._to_lower(content.get("modal")), ""), CATEGORY_MAP.get(self._to_lower(content.get("vendor")), CATEGORY_OTHER_VENDOR_ID), CATEGORY_CUSTOMIZED_ID, - ] + ]) + operator.categories = categories return operator diff --git a/runtime/datamate-python/app/module/operator/repository/operator_repository.py b/runtime/datamate-python/app/module/operator/repository/operator_repository.py index 990f7eb35..805f8cb01 100644 --- a/runtime/datamate-python/app/module/operator/repository/operator_repository.py +++ b/runtime/datamate-python/app/module/operator/repository/operator_repository.py @@ -6,7 +6,7 @@ from typing import List, Optional from datetime import datetime, timezone -from sqlalchemy import select, text, update +from sqlalchemy import select, text, update, func from sqlalchemy.ext.asyncio import AsyncSession from app.db.models.operator import Operator @@ -115,7 +115,7 @@ async def increment_usage_count( update(Operator) .where(Operator.id.in_(operator_ids)) .values( - usage_count=Operator.usage_count + 1, - updated_at=datetime.now(timezone.utc), + usage_count=func.coalesce(Operator.usage_count, 0) + 1, + updated_at=datetime.utcnow(), ) ) diff --git a/runtime/datamate-python/app/module/operator/service/operator_service.py b/runtime/datamate-python/app/module/operator/service/operator_service.py index 6314f221a..aced839d9 100644 --- a/runtime/datamate-python/app/module/operator/service/operator_service.py +++ b/runtime/datamate-python/app/module/operator/service/operator_service.py @@ -619,6 +619,14 @@ def _get_upload_path(self, file_name: str) -> str: """获取上传文件路径""" return os.path.join(OPERATOR_BASE_PATH, UPLOAD_DIR, file_name) + async def increment_usage_count( + self, + operator_ids: List[str], + db: AsyncSession + ) -> None: + """增加算子使用次数""" + await self.operator_repo.increment_usage_count(operator_ids, db) + def _get_extract_path(self, file_stem: str) -> str: """获取解压路径""" return os.path.join(OPERATOR_BASE_PATH, EXTRACT_DIR, file_stem) diff --git a/runtime/ops/README.md b/runtime/ops/README.md index 3be7c9560..40682f3dc 100644 --- a/runtime/ops/README.md +++ b/runtime/ops/README.md @@ -36,7 +36,17 @@ operator_package/ | `inputs` | 输入的数据模态 (text/image/audio/video) | text | | `outputs` | 输出的数据模态 (text/image/audio/video) | text | -### 2.2 算子版本更新日志 (release) +### 2.2 算子功能分类 + +定义算子功能分类,支持清洗与标注。 + +```yaml +types: + - 'cleaning' + - 'annotation' +``` + +### 2.3 算子版本更新日志 (release) 定义算子当前版本较上版本更新内容。 @@ -46,7 +56,7 @@ release: - '支持基本处理操作' ``` -### 2.2 运行时资源与指标 (runtime & metrics) +### 2.4 运行时资源与指标 (runtime & metrics) 定义算子运行时的资源配额及性能指标参考。 @@ -65,7 +75,7 @@ metrics: # 算子性能参考指标 metric: '99.5%' ``` -### 2.3 参数设置 (settings) - UI 组件规范 +### 2.5 参数设置 (settings) - UI 组件规范 通过 `settings` 字段,开发者可以自定义用户在前端界面配置算子时的交互组件。系统支持以下类型: diff --git a/runtime/ops/examples/test_operator/README.md b/runtime/ops/examples/test_operator/README.md index a40a4a832..8a33a93a4 100644 Binary files a/runtime/ops/examples/test_operator/README.md and b/runtime/ops/examples/test_operator/README.md differ diff --git a/runtime/ops/examples/test_operator/metadata.yml b/runtime/ops/examples/test_operator/metadata.yml index fb1b59b8c..725a86b41 100644 --- a/runtime/ops/examples/test_operator/metadata.yml +++ b/runtime/ops/examples/test_operator/metadata.yml @@ -7,6 +7,9 @@ version: '1.0.0' modal: 'text' # text/image/audio/video/multimodal inputs: 'text' # text/image/audio/video/multimodal outputs: 'text' # text/image/audio/video/multimodal +types: + - 'cleaning' + - 'annotation' release: - '首次发布' - '支持基本处理操作' diff --git a/runtime/ops/examples/test_operator/test_operator.tar b/runtime/ops/examples/test_operator/test_operator.tar index e14771ea6..1d32149f2 100644 Binary files a/runtime/ops/examples/test_operator/test_operator.tar and b/runtime/ops/examples/test_operator/test_operator.tar differ diff --git a/scripts/images/frontend/Dockerfile b/scripts/images/frontend/Dockerfile index d9d83f69f..3aaf639c7 100644 --- a/scripts/images/frontend/Dockerfile +++ b/scripts/images/frontend/Dockerfile @@ -12,7 +12,7 @@ FROM nginx:1.29 AS runner RUN --mount=type=cache,target=/var/cache/apt \ --mount=type=cache,target=/var/lib/apt \ apt update \ - && apt install -y dos2unix python3 python3-dev python3-venv libaugeas-dev gcc \ + && apt install -y dos2unix python3 python3-dev python3-venv libaugeas-dev gcc vim cron \ && python3 -m venv /opt/certbot/ \ && /opt/certbot/bin/pip install --upgrade pip \ && /opt/certbot/bin/pip install certbot certbot-nginx \ diff --git a/scripts/images/frontend/start.sh b/scripts/images/frontend/start.sh index fc3b4aed7..457e4a547 100644 --- a/scripts/images/frontend/start.sh +++ b/scripts/images/frontend/start.sh @@ -6,10 +6,14 @@ if [ -f "/cert/server.pem" ]; then fi if [ -f "/cert/server.key" ]; then - if openssl rsa -in /cert/server.key -passin pass:test_check -noout 2>/dev/null; then - cp /cert/server.key /etc/nginx/cert/server.key + # Check if key is encrypted and decrypt if needed + # Supports RSA, EC (Elliptic Curve), PKCS#8, and DSA keys + if grep -q "ENCRYPTED" /cert/server.key 2>/dev/null; then + # Key is encrypted, decrypt using generic pkey command (supports all key types) + echo "$CERT_PASS" | openssl pkey -in /cert/server.key -out /etc/nginx/cert/server.key -passin stdin else - echo "$CERT_PASS" | openssl rsa -in /cert/server.key -out /etc/nginx/cert/server.key -passin stdin + # Key is not encrypted, copy directly + cp /cert/server.key /etc/nginx/cert/server.key fi chown nginx:nginx /etc/nginx/cert/server.key fi