diff --git a/README-zh.md b/README-zh.md
index c051e6f1f..91e443d3a 100644
--- a/README-zh.md
+++ b/README-zh.md
@@ -35,6 +35,12 @@
- kubernetes (用于部署服务-k8s方式)
- Helm (用于部署服务-k8s方式)
+### Docker一键部署
+```shell
+wget -qO docker-compose.yml https://raw.githubusercontent.com/ModelEngine-Group/DataMate/refs/heads/main/deployment/docker/datamate/docker-compose.yml \
+ && REGISTRY=ghcr.io/modelengine-group/ docker compose up -d
+```
+
### 拉取代码
```bash
diff --git a/README.md b/README.md
index a890a786a..8b30c5973 100644
--- a/README.md
+++ b/README.md
@@ -38,6 +38,12 @@ If you like this project, please give it a Star⭐️!
- Kubernetes (for service deployment - k8s method)
- Helm (for service deployment - k8s method)
+### Docker Quick deploy
+```shell
+wget -qO docker-compose.yml https://raw.githubusercontent.com/ModelEngine-Group/DataMate/refs/heads/main/deployment/docker/datamate/docker-compose.yml \
+ && REGISTRY=ghcr.io/modelengine-group/ docker compose up -d
+```
+
### Clone the Code
```bash
diff --git a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java
index 99d237593..d95f3564f 100644
--- a/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java
+++ b/backend/services/data-management-service/src/main/java/com/datamate/datamanagement/application/DatasetFileApplicationService.java
@@ -877,6 +877,10 @@ private void addFile(String sourPath, String targetPath, boolean softAdd) {
}
try {
+ if (Files.exists(target) && Files.isSameFile(source, target)) {
+ return;
+ }
+
Path parent = target.getParent();
// 创建目标目录(如果需要)
if (parent != null) {
diff --git a/frontend/public/config/error-code.json b/frontend/public/config/error-code.json
index 8270f4db8..0a7bc6518 100644
--- a/frontend/public/config/error-code.json
+++ b/frontend/public/config/error-code.json
@@ -11,9 +11,9 @@
"cleaning.0009": "设置解析错误",
"cleaning.0010": "任务ID不能为空",
"operator.0001": "算子不存在",
- "operator.0002": "算子正在使用中",
+ "operator.0002": "算子被编排于模版中或处在正在进行的任务中,无法删除",
"operator.0003": "无法删除预置算子",
- "operator.0004": "不支持的文件类型",
+ "operator.0004": "不支持的文件类型,当前仅支持tar和zip",
"operator.0005": "解析算子包失败",
"operator.0006": "缺少必要的字段",
"400": "请求参数错误",
diff --git a/frontend/src/components/SearchControls.tsx b/frontend/src/components/SearchControls.tsx
index 878f7fcc0..b57f85ca5 100644
--- a/frontend/src/components/SearchControls.tsx
+++ b/frontend/src/components/SearchControls.tsx
@@ -196,7 +196,7 @@ export function SearchControls({
- {t('components.searchControls.selectedFilters')}
+ {t('components.searchControls.filters.label')}
{Object.entries(selectedFilters).map(([filterKey, values]) =>
values.map((value) => {
@@ -231,7 +231,7 @@ export function SearchControls({
onClick={handleClearAllFilters}
className="text-gray-500 hover:text-gray-700"
>
- {t('components.searchControls.clearAll')}
+ {t('components.searchControls.filters.clearAll')}
diff --git a/frontend/src/pages/OperatorMarket/operator.const.tsx b/frontend/src/pages/OperatorMarket/operator.const.tsx
index 75d99cb08..14ddfd85c 100644
--- a/frontend/src/pages/OperatorMarket/operator.const.tsx
+++ b/frontend/src/pages/OperatorMarket/operator.const.tsx
@@ -113,6 +113,7 @@ export const mapOperator = (op: OperatorI, t: (key: string) => string) => {
const FUNCTION_CATEGORY_IDS = {
cleaning: "8c09476a-a922-418f-a908-733f8a0de521",
annotation: "cfa9d8e2-5b5f-4f1e-9f12-1234567890ab",
+ system: "96a3b07a-3439-4557-a835-525faad60ca3"
} as const;
const categories = op?.categories || [];
@@ -142,7 +143,7 @@ export const mapOperator = (op: OperatorI, t: (key: string) => string) => {
},
{
label: t("operatorMarket.const.size"),
- value: formatBytes(op?.fileSize),
+ value: categories?.includes(FUNCTION_CATEGORY_IDS.system) ? '-' : formatBytes(op?.fileSize),
},
{
label: t("operatorMarket.const.language"),
diff --git a/runtime/datamate-python/app/module/cleaning/service/cleaning_task_service.py b/runtime/datamate-python/app/module/cleaning/service/cleaning_task_service.py
index 9886a18d2..5ce0bb191 100644
--- a/runtime/datamate-python/app/module/cleaning/service/cleaning_task_service.py
+++ b/runtime/datamate-python/app/module/cleaning/service/cleaning_task_service.py
@@ -180,6 +180,11 @@ async def create_task(
await self.operator_instance_repo.insert_instance(db, task_id, request.instance)
+ # Increment operator usage count
+ operator_ids = [inst.id for inst in request.instance if inst.id]
+ if operator_ids:
+ await self.operator_service.increment_usage_count(operator_ids, db)
+
all_operators = await self.operator_service.get_operators(db=db, page=0, size=1000, categories=[], keyword=None, is_star=None)
operator_map = {op.id: op for op in all_operators}
diff --git a/runtime/datamate-python/app/module/operator/constants.py b/runtime/datamate-python/app/module/operator/constants.py
index e6d83ee9e..188e6a061 100644
--- a/runtime/datamate-python/app/module/operator/constants.py
+++ b/runtime/datamate-python/app/module/operator/constants.py
@@ -30,6 +30,8 @@
CATEGORY_DATAMATE_ID = "431e7798-5426-4e1a-aae6-b9905a836b34"
CATEGORY_DATA_JUICER_ID = "79b385b4-fde8-4617-bcba-02a176938996"
CATEGORY_OTHER_VENDOR_ID = "f00eaa3e-96c1-4de4-96cd-9848ef5429ec"
+CATEGORY_CLEANING_ID = "8c09476a-a922-418f-a908-733f8a0de521"
+CATEGORY_ANNOTATION_ID = "cfa9d8e2-5b5f-4f1e-9f12-1234567890ab"
# Category mapping
CATEGORY_MAP = {
@@ -42,6 +44,8 @@
"all": CATEGORY_ALL_ID,
"datamate": CATEGORY_DATAMATE_ID,
"data-juicer": CATEGORY_DATA_JUICER_ID,
+ "cleaning": CATEGORY_CLEANING_ID,
+ "annotation": CATEGORY_ANNOTATION_ID,
}
# File paths
diff --git a/runtime/datamate-python/app/module/operator/parsers/abstract_parser.py b/runtime/datamate-python/app/module/operator/parsers/abstract_parser.py
index 50ee98cf0..e861f2f70 100644
--- a/runtime/datamate-python/app/module/operator/parsers/abstract_parser.py
+++ b/runtime/datamate-python/app/module/operator/parsers/abstract_parser.py
@@ -8,7 +8,8 @@
from typing import Dict, Any, Optional
from app.module.operator.schema import OperatorDto, OperatorReleaseDto
-from app.module.operator.constants import CATEGORY_MAP, CATEGORY_OTHER_VENDOR_ID, CATEGORY_CUSTOMIZED_ID
+from app.module.operator.constants import CATEGORY_MAP, CATEGORY_OTHER_VENDOR_ID, CATEGORY_CUSTOMIZED_ID, \
+ CATEGORY_CLEANING_ID
from app.module.operator.exceptions import FieldNotFoundError
@@ -86,12 +87,22 @@ def parse_yaml(
operator.releases = [operator_release]
# Build categories
- categories = [
+ categories = []
+ types = content.get("types")
+ if isinstance(types, list):
+ for t in types:
+ if self._to_lower(t) in CATEGORY_MAP:
+ categories.append(CATEGORY_MAP[self._to_lower(t)])
+ if len(categories) == 0:
+ categories.append(CATEGORY_CLEANING_ID)
+
+ categories.extend([
CATEGORY_MAP.get(self._to_lower(content.get("language")), ""),
CATEGORY_MAP.get(self._to_lower(content.get("modal")), ""),
CATEGORY_MAP.get(self._to_lower(content.get("vendor")), CATEGORY_OTHER_VENDOR_ID),
CATEGORY_CUSTOMIZED_ID,
- ]
+ ])
+
operator.categories = categories
return operator
diff --git a/runtime/datamate-python/app/module/operator/repository/operator_repository.py b/runtime/datamate-python/app/module/operator/repository/operator_repository.py
index 990f7eb35..805f8cb01 100644
--- a/runtime/datamate-python/app/module/operator/repository/operator_repository.py
+++ b/runtime/datamate-python/app/module/operator/repository/operator_repository.py
@@ -6,7 +6,7 @@
from typing import List, Optional
from datetime import datetime, timezone
-from sqlalchemy import select, text, update
+from sqlalchemy import select, text, update, func
from sqlalchemy.ext.asyncio import AsyncSession
from app.db.models.operator import Operator
@@ -115,7 +115,7 @@ async def increment_usage_count(
update(Operator)
.where(Operator.id.in_(operator_ids))
.values(
- usage_count=Operator.usage_count + 1,
- updated_at=datetime.now(timezone.utc),
+ usage_count=func.coalesce(Operator.usage_count, 0) + 1,
+ updated_at=datetime.utcnow(),
)
)
diff --git a/runtime/datamate-python/app/module/operator/service/operator_service.py b/runtime/datamate-python/app/module/operator/service/operator_service.py
index 6314f221a..aced839d9 100644
--- a/runtime/datamate-python/app/module/operator/service/operator_service.py
+++ b/runtime/datamate-python/app/module/operator/service/operator_service.py
@@ -619,6 +619,14 @@ def _get_upload_path(self, file_name: str) -> str:
"""获取上传文件路径"""
return os.path.join(OPERATOR_BASE_PATH, UPLOAD_DIR, file_name)
+ async def increment_usage_count(
+ self,
+ operator_ids: List[str],
+ db: AsyncSession
+ ) -> None:
+ """增加算子使用次数"""
+ await self.operator_repo.increment_usage_count(operator_ids, db)
+
def _get_extract_path(self, file_stem: str) -> str:
"""获取解压路径"""
return os.path.join(OPERATOR_BASE_PATH, EXTRACT_DIR, file_stem)
diff --git a/runtime/ops/README.md b/runtime/ops/README.md
index 3be7c9560..40682f3dc 100644
--- a/runtime/ops/README.md
+++ b/runtime/ops/README.md
@@ -36,7 +36,17 @@ operator_package/
| `inputs` | 输入的数据模态 (text/image/audio/video) | text |
| `outputs` | 输出的数据模态 (text/image/audio/video) | text |
-### 2.2 算子版本更新日志 (release)
+### 2.2 算子功能分类
+
+定义算子功能分类,支持清洗与标注。
+
+```yaml
+types:
+ - 'cleaning'
+ - 'annotation'
+```
+
+### 2.3 算子版本更新日志 (release)
定义算子当前版本较上版本更新内容。
@@ -46,7 +56,7 @@ release:
- '支持基本处理操作'
```
-### 2.2 运行时资源与指标 (runtime & metrics)
+### 2.4 运行时资源与指标 (runtime & metrics)
定义算子运行时的资源配额及性能指标参考。
@@ -65,7 +75,7 @@ metrics: # 算子性能参考指标
metric: '99.5%'
```
-### 2.3 参数设置 (settings) - UI 组件规范
+### 2.5 参数设置 (settings) - UI 组件规范
通过 `settings` 字段,开发者可以自定义用户在前端界面配置算子时的交互组件。系统支持以下类型:
diff --git a/runtime/ops/examples/test_operator/README.md b/runtime/ops/examples/test_operator/README.md
index a40a4a832..8a33a93a4 100644
Binary files a/runtime/ops/examples/test_operator/README.md and b/runtime/ops/examples/test_operator/README.md differ
diff --git a/runtime/ops/examples/test_operator/metadata.yml b/runtime/ops/examples/test_operator/metadata.yml
index fb1b59b8c..725a86b41 100644
--- a/runtime/ops/examples/test_operator/metadata.yml
+++ b/runtime/ops/examples/test_operator/metadata.yml
@@ -7,6 +7,9 @@ version: '1.0.0'
modal: 'text' # text/image/audio/video/multimodal
inputs: 'text' # text/image/audio/video/multimodal
outputs: 'text' # text/image/audio/video/multimodal
+types:
+ - 'cleaning'
+ - 'annotation'
release:
- '首次发布'
- '支持基本处理操作'
diff --git a/runtime/ops/examples/test_operator/test_operator.tar b/runtime/ops/examples/test_operator/test_operator.tar
index e14771ea6..1d32149f2 100644
Binary files a/runtime/ops/examples/test_operator/test_operator.tar and b/runtime/ops/examples/test_operator/test_operator.tar differ
diff --git a/scripts/images/frontend/Dockerfile b/scripts/images/frontend/Dockerfile
index d9d83f69f..3aaf639c7 100644
--- a/scripts/images/frontend/Dockerfile
+++ b/scripts/images/frontend/Dockerfile
@@ -12,7 +12,7 @@ FROM nginx:1.29 AS runner
RUN --mount=type=cache,target=/var/cache/apt \
--mount=type=cache,target=/var/lib/apt \
apt update \
- && apt install -y dos2unix python3 python3-dev python3-venv libaugeas-dev gcc \
+ && apt install -y dos2unix python3 python3-dev python3-venv libaugeas-dev gcc vim cron \
&& python3 -m venv /opt/certbot/ \
&& /opt/certbot/bin/pip install --upgrade pip \
&& /opt/certbot/bin/pip install certbot certbot-nginx \
diff --git a/scripts/images/frontend/start.sh b/scripts/images/frontend/start.sh
index fc3b4aed7..457e4a547 100644
--- a/scripts/images/frontend/start.sh
+++ b/scripts/images/frontend/start.sh
@@ -6,10 +6,14 @@ if [ -f "/cert/server.pem" ]; then
fi
if [ -f "/cert/server.key" ]; then
- if openssl rsa -in /cert/server.key -passin pass:test_check -noout 2>/dev/null; then
- cp /cert/server.key /etc/nginx/cert/server.key
+ # Check if key is encrypted and decrypt if needed
+ # Supports RSA, EC (Elliptic Curve), PKCS#8, and DSA keys
+ if grep -q "ENCRYPTED" /cert/server.key 2>/dev/null; then
+ # Key is encrypted, decrypt using generic pkey command (supports all key types)
+ echo "$CERT_PASS" | openssl pkey -in /cert/server.key -out /etc/nginx/cert/server.key -passin stdin
else
- echo "$CERT_PASS" | openssl rsa -in /cert/server.key -out /etc/nginx/cert/server.key -passin stdin
+ # Key is not encrypted, copy directly
+ cp /cert/server.key /etc/nginx/cert/server.key
fi
chown nginx:nginx /etc/nginx/cert/server.key
fi