From 1586fcbb55c30da619dd30f92eb2099478e3afe8 Mon Sep 17 00:00:00 2001
From: viktorbeck98 <viktor.beck98@gmail.com>
Date: Sat, 7 Mar 2026 12:27:46 +0100
Subject: [PATCH 1/3] create logbatcher parser

---
 .../parsers/logbatcher/__init__.py            |  29 ++
 .../parsers/logbatcher/engine/LICENSE         |  21 +
 .../parsers/logbatcher/engine/README.md       | 266 +++++++++++
 .../parsers/logbatcher/engine/__init__.py     |   5 +
 .../logbatcher/engine/additional_cluster.py   | 186 ++++++++
 .../parsers/logbatcher/engine/cluster.py      | 175 ++++++++
 .../parsers/logbatcher/engine/matching.py     | 110 +++++
 .../parsers/logbatcher/engine/parser.py       | 128 ++++++
 .../parsers/logbatcher/engine/parsing_base.py | 220 +++++++++
 .../logbatcher/engine/parsing_cache.py        | 416 ++++++++++++++++++
 .../parsers/logbatcher/engine/postprocess.py  | 195 ++++++++
 .../parsers/logbatcher/engine/sample.py       | 140 ++++++
 .../parsers/logbatcher/engine/util.py         | 169 +++++++
 .../parsers/logbatcher/engine/vars.py         |  41 ++
 .../parsers/logbatcher/parser.py              |  65 +++
 tests/test_parsers/test_logbatcher_parser.py  |  82 ++++
 16 files changed, 2248 insertions(+)
 create mode 100644 src/detectmatelibrary/parsers/logbatcher/__init__.py
 create mode 100644 src/detectmatelibrary/parsers/logbatcher/engine/LICENSE
 create mode 100644 src/detectmatelibrary/parsers/logbatcher/engine/README.md
 create mode 100644 src/detectmatelibrary/parsers/logbatcher/engine/__init__.py
 create mode 100644 src/detectmatelibrary/parsers/logbatcher/engine/additional_cluster.py
 create mode 100644 src/detectmatelibrary/parsers/logbatcher/engine/cluster.py
 create mode 100644 src/detectmatelibrary/parsers/logbatcher/engine/matching.py
 create mode 100644 src/detectmatelibrary/parsers/logbatcher/engine/parser.py
 create mode 100644 src/detectmatelibrary/parsers/logbatcher/engine/parsing_base.py
 create mode 100644 src/detectmatelibrary/parsers/logbatcher/engine/parsing_cache.py
 create mode 100644 src/detectmatelibrary/parsers/logbatcher/engine/postprocess.py
 create mode 100644 src/detectmatelibrary/parsers/logbatcher/engine/sample.py
 create mode 100644 src/detectmatelibrary/parsers/logbatcher/engine/util.py
 create mode 100644 src/detectmatelibrary/parsers/logbatcher/engine/vars.py
 create mode 100644 src/detectmatelibrary/parsers/logbatcher/parser.py
 create mode 100644 tests/test_parsers/test_logbatcher_parser.py

diff --git a/src/detectmatelibrary/parsers/logbatcher/__init__.py b/src/detectmatelibrary/parsers/logbatcher/__init__.py
new file mode 100644
index 0000000..f3cfc57
--- /dev/null
+++ b/src/detectmatelibrary/parsers/logbatcher/__init__.py
@@ -0,0 +1,29 @@
+# MIT License
+#
+# Copyright (c) 2024 LogIntelligence
+#
+# Based on LogBatcher (https://github.com/LogIntelligence/LogBatcher)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+# import sys, os
+# sys.path.append(os.path.join(os.getcwd(), "parsing", "parsers"))
+
+# flake8: noqa
+from .parser import LogBatcherParserConfig, LogBatcherParser  # noqa: F401
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/LICENSE b/src/detectmatelibrary/parsers/logbatcher/engine/LICENSE
new file mode 100644
index 0000000..493952e
--- /dev/null
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2024 LogIntelligence
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/README.md b/src/detectmatelibrary/parsers/logbatcher/engine/README.md
new file mode 100644
index 0000000..feb62ad
--- /dev/null
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/README.md
@@ -0,0 +1,266 @@
+# LogBatcher
+[![pypi package](https://img.shields.io/pypi/v/logbatcher.svg)](https://pypi.org/project/logbatcher/)
+[![Build and test](https://github.com/LogIntelligence/LogBatcher/actions/workflows/build_and_test.yml/badge.svg)](https://github.com/LogIntelligence/LogBatcher/actions/workflows/build_and_test.yml)
+[![Upload Python Package](https://github.com/LogIntelligence/LogBatcher/actions/workflows/python-publish.yml/badge.svg)](https://github.com/LogIntelligence/LogBatcher/actions/workflows/python-publish.yml)
+[![Downloads](https://static.pepy.tech/badge/logbatcher)](https://pepy.tech/projects/logbatcher)
+
+
+**LogBatcher** is a cost-effective LLM-based log parser that requires no training process or labeled data. This repository includes artifacts for reuse and reproduction of experimental results presented in our ASE'24 paper titled *"Demonstration-Free: Towards More Practical Log Parsing with Large Language Models"*.
+
+## Work Flow
+![workflow](outputs/figures/workflow.png)
+Log Batcher contians three main components: **Partitioning, Caching and Batching - Querying** 
+
+
+**Table of Contents**
+  - [Setup](#setup)
+    - [Get start](#get-start)
+    - [Project Tree](#project-tree)
+  - [Usage](#usage)
+    - [Data format](#data-format)
+    - [Usage example](#usage-example)
+    - [Example Evaluation](#example-evaluation)
+  - [Benchmark](#benchmark)
+    - [Prepare datasets](#prepare-datasets)
+    - [Reproduce](#reproduce)
+    - [Benchmark Evaluation](#benchmark-evaluation)
+  
+
+## Setup
+
+
+### Get start
+
+_To run at the local environment:_
+
+Git Clone LogBatcher from github
+```bash
+git clone https://github.com/LogIntelligence/LogBatcher.git && cd LogBatcher
+```
+
+The code is implemented in **Python >= 3.9**. To install the required packages, run the following command (conda is optional):
+```bash
+conda create -n logbatcher python==3.9
+conda activate logbatcher
+pip install -r requirements.txt
+```
+
+Install LogBatcher from PyPI
+```bash
+pip install logbatcher
+```
+
+OR, Install LogBatcher from source
+```bash
+pip install -e .
+```
+
+Set your **API Key** in `config.json`
+
+Note that if you find the access to specific API versions is lost, please refer to the following:
+
+To ensure the long-term reusability of LogBatcher, we recommend using OpenAI's latest released models. For example, as indicated on [Open AI](https://platform.openai.com/docs/deprecations), the GPT-3.5 series is soon to be deprecated, and it is recommended to switch to the newer gpt-4o-mini model. Additionally, we also support the open-source LLMs as the base model. You can use the API provided by [Together AI](https://www.together.ai/) to replace LogBatcher's base model with their commercially available open-source models (such as LLama 3.1, etc.).
+
+```json
+"api_key_from_openai": "<OpenAI_API_KEY>",
+"api_key_from_together":"<Together_API_KEY>",
+```
+
+_To run with docker:_
+
+Download the pre-installed docker image from our Zenodo repository, which also includes the source code, benchmarks and scripts.
+
+Zenodo repository DOI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.13752709.svg)](https://doi.org/10.5281/zenodo.13752709)
+
+Running the following command after downloading the pre-built Docker image:
+
+```bash
+docker load -i logbatcher.tar
+docker images
+docker run -it logbatcher
+```
+
+Or you can build the docker image from the `Dockerfile` we provide:
+```bash
+docker build -t logbatcher .
+docker images
+docker run -it logbatcher
+```
+
+### Project Tree
+
+```
+📦LogBatcher
+ ┣ 📂datasets
+ ┃ ┣ 📂loghub-2k
+ ┃ ┃ ┣ 📂Android
+ ┃ ┃ ┃ ┣ 📜Android_2k.log
+ ┃ ┃ ┃ ┣ 📜Android_2k.log_structured.csv
+ ┃ ┃ ┃ ┣ 📜Android_2k.log_templates.csv
+ ┃ ┃ ┃ ┣ 📜Android_2k.log_structured_corrected.csv
+ ┃ ┃ ┃ ┗ 📜Android_2k.log_templates_corrected.csv
+ ┃ ┃ ┣ ...
+ ┃ ┗ 📂loghub-2.0
+ ┣ 📂evaluation
+ ┃ ┣ 📂utils
+ ┃ ┣ 📜logbatcher_eval.py
+ ┃ ┗ 📜settings.py
+ ┣ 📂logbatcher
+ ┃ ┣ 📜additional_cluster.py
+ ┃ ┣ 📜cluster.py
+ ┃ ┣ 📜parser.py
+ ┃ ┣ 📜matching.py
+ ┃ ┣ 📜parsing_base.py
+ ┃ ┣ 📜postprocess.py
+ ┃ ┣ 📜sample.py
+ ┃ ┗ 📜util.py
+ ┣ 📂outputs
+ ┃ ┣ 📂figures
+ ┃ ┗ 📂parser
+ ┣ 📜README.md
+ ┣ 📜benchmark.py
+ ┣ 📜config.json
+ ┣ 📜requirements.txt
+ ┗ 📜demo.py
+```
+
+## Usage
+
+### Data format
+
+LogBatcher mainly takes **a raw log file** (in plain text format) as input and outputs the **parsed log file** (in CSV format). A **raw log file** is a log file with each line representing a complete log. 
+
+Following the data format from [LOGPAI](https://github.com/logpai/loghub), the data can also be a **structured log file**. A **structured log file** is a CSV file that includes at least the `LineID` and `Content` columns for parsing, with optional `EventID` and `EventTemplate` columns for evaluation.
+
+### Usage example
+
+We provide a usage example for more convenient reuse, which is presented as follows. The usage example can be found in file `demo.py`. The example provides a test on a specific dataset **Apache** from [LOGPAI](https://github.com/logpai/loghub). If you want to evaluate LogBatcher on your own dataset, please replace the arguments `file_name` and `dataset_format` with your own raw log file path to load log data and the corresponding dataset format to extract the contents. Run `python demo.py` and find the results in `outputs/parser/test` folder.
+
+```python
+import json
+from logbatcher.parsing_base import single_dataset_paring
+from logbatcher.parser import Parser
+from logbatcher.util import data_loader
+
+# load api key, dataset format and parser
+model, dataset, folder_name ='gpt-3.5-turbo-0125', 'Apache', 'test'
+config = json.load(open('config.json', 'r'))
+parser = Parser(model, folder_name, config)
+
+# load contents from raw log file, structured log file or content list
+contents = data_loader(
+    file_name=f"datasets/loghub-2k/{dataset}/{dataset}_2k.log",
+    dataset_format= config['datasets_format'][dataset],
+    file_format ='raw'
+)
+
+# parse logs
+single_dataset_paring(
+    dataset=dataset,
+    contents=contents,
+    output_dir= f'outputs/parser/{folder_name}/',
+    parser=parser,
+    debug=False
+)
+```
+
+<details>
+<summary>Expected output</summary>
+
+```
+python demo.py
+Parsing 2000 logs in dataset Apache...
+100%|██████████████████████████████████| 2000/2000 [00:04<00:00, 420.55log/s]
+parsing time: 4.756490230560303
+idetified templates: 6
+```
+</details>
+
+### Example Evaluation
+
+To evaluate the output of the usage example, run the following command
+```bash
+cd evaluation && python logbatcher_eval.py --config test --dataset Apache
+```
+
+<details>
+<summary>Expected output</summary>
+
+
+```
+Calculating Edit Distance....
+100%|███████████████████████████████████████████████████████████| 2000/2000 [00:00<00:00, 4029110.47it/s]
+Normalized_Edit_distance (NED): 1.0000, ED: 0.0000,
+Grouping Accuracy calculation done. [Time taken: 0.002]
+Start compute grouping accuracy
+100%|███████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 2084.64it/s]
+Grouping_Accuracy (GA): 1.0000, FGA: 1.0000,
+Grouping Accuracy calculation done. [Time taken: 0.006]
+Parsing_Accuracy (PA): 1.0000
+Parsing Accuracy calculation done. [Time taken: 0.001]
+100%|███████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 10677.06it/s]
+PTA: 1.0000, RTA: 1.0000 FTA: 1.0000
+Identify : 6, Groundtruth : 6
+Template-level accuracy calculation done. [Time taken: 0.003]
+```
+</details>
+
+The results of evaluation metrics can be found in `outputs/parser/test` folder
+
+## Benchmark
+
+### Prepare datasets
+
+We have already provided _loghub-2k_ datasets in `datasets/loghub-2.0` folder.
+
+if you want to benchmark on _Loghub-2.0_ datasets, please Run `datasets/loghub-2.0/download.sh` or download the datasets:
+
+
+1. Datasets DOI: [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.8275861.svg)](https://doi.org/10.5281/zenodo.8275861)
+2. Datasets Homepage: [Loghub-2.0](https://zenodo.org/records/8275861)
+
+### Reproduce
+
+To benchmark on all datasets in loghub-2k or loghub-2.0, you can run the following commands:
+```bash
+python benchmark.py --data_type [DATATYPE] --model [MODEL] --batch_size [BATCHSIZE] --chunk_size [CHUNKSIZE] --sampling_method [SAMPLINGMETHOD]
+```
+
+The description of the arguments can be found in `benchmark.py` or below:
+
+```bash
+--data_type
+  Datasets type, Options: ['2k', 'full'], default: '2k'.
+--model
+  the Large Lauguage model used in LogBatcher, default: 'gpt-3.5-turbo-0125'.
+--batch_size
+  size of a batch query, default: 10.
+--chunk_size
+  size of a log chunk, default: 2000.
+--clustering_method
+  clustering method used in the partitioning stage, Options: ['dbscan', 'meanshift', 'hierarchical'], default: 'dbscan'.
+--sampling_method
+  sampling method used in the batching stage, Options: ['dpp', 'similar', 'random'], default: 'dpp'.
+```
+
+### Benchmark Evaluation
+
+To evaluate the output of benchmark, run the following command
+```bash
+cd evaluation && python logbatcher_eval.py --config logbatcher_2k
+```
+
+
+The expected results will be similar with that presented in the paper, also see [experimental_results](docs/experimental_results.md).
+
+
+The description of the arguments:
+
+```bash
+--config
+  The folder name of the outputs, Options: ['test', 'logbatcher_2k', 'logbatcher_full']
+--data_type
+  Datasets type, Options: ['2k', 'full'], default: '2k'
+--dataset
+  To evaluate on a single dataset, default: 'null'.
+```
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/__init__.py b/src/detectmatelibrary/parsers/logbatcher/engine/__init__.py
new file mode 100644
index 0000000..02e47b9
--- /dev/null
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/__init__.py
@@ -0,0 +1,5 @@
+# MIT License
+#
+# Copyright (c) 2024 LogIntelligence
+#
+# Based on LogBatcher (https://github.com/LogIntelligence/LogBatcher)
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/additional_cluster.py b/src/detectmatelibrary/parsers/logbatcher/engine/additional_cluster.py
new file mode 100644
index 0000000..564e599
--- /dev/null
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/additional_cluster.py
@@ -0,0 +1,186 @@
+# MIT License
+#
+# Copyright (c) 2024 LogIntelligence
+#
+# Based on LogBatcher (https://github.com/LogIntelligence/LogBatcher)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import annotations
+
+import re
+import heapq
+from collections import Counter
+from typing import Any, Dict, FrozenSet, List, Optional, Tuple
+
+from sklearn.feature_extraction._stop_words import ENGLISH_STOP_WORDS
+import time
+import calendar
+import random
+import os
+from sklearn.cluster import MeanShift
+from sklearn.feature_extraction.text import TfidfVectorizer
+
+
+
+class Vocab:
+    def __init__(self, stopwords: Optional[List[str]] = None) -> None:
+        if stopwords is None:
+            stopwords = ["<*>"]
+        stopwords = [
+            "a",
+            "an",
+            "and",
+            "i",
+            "ie",
+            "so",
+            "to",
+            "the",
+
+        ] + list(calendar.day_name) + list(calendar.day_abbr) \
+          + list(calendar.month_name) + list(calendar.month_abbr)
+        self.token_counter: Counter[str] = Counter()
+        self.stopwords: FrozenSet[str] = frozenset(set(stopwords))
+        #print(self.__filter_stopwords(['LDAP', 'Built', 'with']))
+
+    def build(self, sequences: List[List[str]]) -> None:
+        print("Build vocab with examples: ", len(sequences))
+        for sequence in sequences:
+            sequence = self.__filter_stopwords(sequence)
+            #print(sequence)
+            self.update(sequence)
+
+    def update(self, sequence: List[str]) -> None:
+        sequence = self.__filter_stopwords(sequence)
+        self.token_counter.update(sequence)
+
+    def topk_tokens(self, sequence: List[str], topk: int = 3) -> Tuple[str, ...]:
+        sequence = self.__filter_stopwords(sequence)
+        token_count = [(token, self.token_counter[token]) for token in set(sequence)]
+        topk_tuples = heapq.nlargest(topk, token_count, key=lambda x: x[1])
+        topk_keys = tuple([t[0] for t in topk_tuples])
+        return topk_keys
+
+    def __len__(self) -> int:
+        return len(self.token_counter)
+
+    def __filter_stopwords(self, sequence: List[str]) -> List[str]:
+        return [
+            token
+            for token in sequence
+            if (len(token) > 2) and (token not in self.stopwords)
+        ]
+
+
+def clean(s: str) -> Tuple[str, str]:
+    log_format = re.sub(r'[0-9A-Za-z, ]+', '', s)
+    unique_chars = list(set(log_format))
+    sorted_string = ''.join(sorted(unique_chars))
+    s = re.sub(':|\(|\)|=|,|"|\{|\}|@|$|\[|\]|\||;|\.?!', ' ', s)
+    s = " ".join([word for word in s.strip().split() if not bool(re.search(r'\d', word))])
+    # trantab = str.maketrans(dict.fromkeys(list(string.punctuation)))
+    return s, sorted_string
+
+
+def h_clustering(
+    contents: Dict[int, Tuple[str, str]],
+) -> Tuple[Dict[Tuple[str, ...], Dict[str, Any]], int, int]:
+    t1 = time.time()
+    vocab = Vocab()
+    vocab.build([v[0].split() for v in contents.values()])
+    t2 = time.time()
+    # print("Build time: ", t2 - t1)
+
+    # hierichical clustering
+    hierichical_clusters = {}
+    for k, v in contents.items():
+        frequent_token = tuple(sorted(vocab.topk_tokens(v[0].split(), 3))) 
+        log_format = v[1]
+        if frequent_token not in hierichical_clusters:
+            hierichical_clusters[frequent_token] = {"size": 1, "cluster": {log_format: [k]}}
+        else:
+            hierichical_clusters[frequent_token]["size"] = hierichical_clusters[frequent_token]["size"] + 1
+            if log_format not in hierichical_clusters[frequent_token]["cluster"]:
+                hierichical_clusters[frequent_token]["cluster"][log_format] = [k]
+            else:
+                hierichical_clusters[frequent_token]["cluster"][log_format].append(k)
+    print("Number of coarse-grained clusters: ", len(hierichical_clusters.keys()))
+    total_coarse_clusters = len(hierichical_clusters.keys())
+    total_fine_clusters = 0
+    for k, v in hierichical_clusters.items():
+        total_fine_clusters += len(hierichical_clusters[k]["cluster"])
+    print("Number of fine-grained clusters: ", total_fine_clusters)
+    return hierichical_clusters, total_coarse_clusters, total_fine_clusters
+
+
+def assign_labels(
+    clusters: Dict[Tuple[str, ...], Dict[str, Any]], logs: List[str], granularity: str = "coarse"
+) -> List[int]:
+    # Initialize the labels list with -1 for all logs
+    labels = [-1] * len(logs)
+
+    # Map each log ID to its cluster ID
+    cluster_id = 0
+    for frequent_tokens, cluster_info in clusters.items():
+        if granularity == "coarse":
+            # Assign cluster ID based on frequent tokens
+            for log_format, log_ids in cluster_info["cluster"].items():
+                for log_id in log_ids:
+                    labels[log_id] = cluster_id
+            cluster_id += 1
+        elif granularity == "fine":
+            # Assign unique cluster ID for each log format within frequent tokens
+            for log_format, log_ids in cluster_info["cluster"].items():
+                for log_id in log_ids:
+                    labels[log_id] = cluster_id
+                cluster_id += 1
+
+    return labels
+
+def hierichical_clustering(
+    logs: List[str], granularity: str = "fine"
+) -> Tuple[List[int], int]:
+    contents = {}
+    for i, x in enumerate(logs):
+        x, fx = clean(x)
+        if len(x.split()) > 1:
+            contents[i] = (x, fx)
+    clusters, a, b = h_clustering(contents)
+    labels = assign_labels(clusters, logs, granularity)
+    if granularity == "coarse":
+        return labels, a
+    else:
+        return labels, b
+
+def replace_numbers_with_zero(text: str) -> str:
+    return re.sub(r'\d+(\.\d+)?', '0', text)
+
+
+def meanshift_clustering(logs: List[str]) -> Tuple[List[int], int]:
+    
+    text_column = [replace_numbers_with_zero(log) for log in logs]
+
+    # Text preprocessing and vectorization
+    vectorizer = TfidfVectorizer()
+    data_matrix = vectorizer.fit_transform(text_column).toarray()
+
+    # Mean Shift clustering
+    mean_shift = MeanShift(bandwidth=0.5)
+    labels = mean_shift.fit_predict(data_matrix).tolist()
+    return labels, max(labels) + 1
\ No newline at end of file
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/cluster.py b/src/detectmatelibrary/parsers/logbatcher/engine/cluster.py
new file mode 100644
index 0000000..6e511e7
--- /dev/null
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/cluster.py
@@ -0,0 +1,175 @@
+# MIT License
+#
+# Copyright (c) 2024 LogIntelligence
+#
+# Based on LogBatcher (https://github.com/LogIntelligence/LogBatcher)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import annotations
+
+from collections import OrderedDict
+import re
+from typing import List, Optional, Tuple
+
+import numpy as np
+from scipy.sparse import spmatrix
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+from sklearn.cluster import DBSCAN
+from .sample import group_samples_clustering, dpp_sample
+from .util import not_varibility
+import random
+class Cluster:
+    def __init__(self) -> None:
+        self.logs: List[str] = []
+        self.batch_logs: List[str] = []
+        self.indexs: List[int] = []
+        self.size: int = 0
+        self.sample_log: str = ''
+        
+
+    def append_log(self, log: str, index: int) -> None:
+        self.logs.append(log)
+        self.indexs.append(index)
+        self.size += 1
+
+    def varaible_sampling(self, batch_size: int = 5, sample_method: str = "dpp") -> None:
+        self.batch_logs = list(OrderedDict.fromkeys(self.logs)) # remove duplicates
+        def _replacer(match: re.Match[str]) -> str:
+            char = match.group()
+            return '0' if char.isdigit() else 'a'
+        vars = []
+        for var in self.batch_logs:
+            vars.append(re.sub(r'[0-9a-zA-Z]', _replacer, var))
+        vectorizer = TfidfVectorizer()
+        try:
+            tfidf_matrix = vectorizer.fit_transform(vars)
+            tfidf_matrix = tfidf_matrix.toarray()
+        except Exception as e:
+            print("VARS", vars)
+            raise ValueError("Error during TF-IDF vectorization:", e)
+
+        # sample
+        if len(self.batch_logs) <= batch_size:
+            result = range(len(self.batch_logs))
+        elif sample_method == "dpp":
+            similarity_matrix = cosine_similarity(tfidf_matrix)
+            result = dpp_sample(similarity_matrix, batch_size)
+        elif sample_method == "random":
+            random.seed(0)
+            result = random.sample(range(0, len(self.batch_logs)), batch_size)
+        elif sample_method == "similar":
+            result = group_samples_clustering(tfidf_matrix, batch_size)[0]
+        else:
+            raise ValueError("Invalid sample method")
+        self.batch_logs = [self.batch_logs[i] for i in result]
+
+    def batching(self, batch_size: int = 10, min_size: int = 3, sample_method: str = "dpp") -> None:
+        self.batch_logs = list(OrderedDict.fromkeys(self.logs)) # remove duplicates
+        if len(self.batch_logs) > batch_size:
+            self.sample(batch_size, sample_method)
+        if type(self.batch_logs) == str:
+            self.batch_logs = [self.batch_logs]
+        self.sample_log = self.batch_logs[0]
+        if not_varibility(self.batch_logs):
+            self.batch_logs = self.batch_logs[:min_size] if len(self.batch_logs) > min_size else self.batch_logs
+
+    def sample(self, batch_size: int, sample_method: str) -> None:
+        # vetorize logs
+        vectorizer = TfidfVectorizer()
+        tfidf_matrix = vectorizer.fit_transform(self.batch_logs)
+        tfidf_matrix = tfidf_matrix.toarray()
+
+        # sample
+        if sample_method == "dpp":
+            similarity_matrix = cosine_similarity(tfidf_matrix)
+            result = dpp_sample(similarity_matrix, batch_size)
+        elif sample_method == "random":
+            random.seed(0)
+            result = random.sample(range(0, len(self.batch_logs)), batch_size)
+        elif sample_method == "similar":
+            result = group_samples_clustering(tfidf_matrix, batch_size)[0]
+        else:
+            raise ValueError("Invalid sample method")
+        self.batch_logs = [self.batch_logs[i] for i in result]
+        return
+
+def tokenize(log_content: str, tokenize_pattern: str = r'[ ,|]', removeDight: bool = True) -> List[str]:
+    words = re.split(tokenize_pattern, log_content)
+    new_words = []
+    for word in words:
+        if '=' in word:
+            ws = word.split('=')
+            if len(ws) <= 2:
+                new_words.append(ws[0])
+            else:
+                # might be some parameters of a URL 
+                pass 
+
+        elif removeDight and re.search(r'\d', word):
+            pass
+        elif '/' in word.lower() or re.match(r"^[a-zA-Z][+-]$|^[+-][a-zA-Z]$", word):
+            pass
+        else:
+            word = re.sub(r"\([^)]*\)", "", word)
+            new_words.append(word)
+    new_words = [word for word in new_words if word]   # remove null
+    if new_words == []:
+        new_words.append(re.sub(r'\d+(\.\d+)?', '0', log_content))
+    return new_words
+
+
+def vectorize(tokenized_logs: List[List[str]]) -> spmatrix:
+    vectorizer = TfidfVectorizer(tokenizer=lambda x: x, lowercase=False, token_pattern=None)
+    return vectorizer.fit_transform(tokenized_logs)
+
+
+def cluster(vectorized_logs: spmatrix, eps: float = 0.5) -> Tuple[np.ndarray, int]:
+    cluster = DBSCAN(eps=eps, min_samples=5)
+    cluster.fit(vectorized_logs)
+    labels = cluster.labels_
+    cluster_nums = max(labels) + 1
+    return labels, cluster_nums
+    
+
+def reassign_clusters(
+    labels: np.ndarray, cluster_nums: int, tokenized_logs: List[List[str]]
+) -> Tuple[np.ndarray, int]:
+    mergerd_logs = []
+    for tokenized_log in tokenized_logs:
+        mergerd_logs.append(' '.join(tokenized_log))
+
+    for i in range(len(labels)):
+        if labels[i] == -1:
+            for j in range(i+1, len(labels)):
+                if labels[j] == -1 and mergerd_logs[i] == mergerd_logs[j]:
+                    labels[j] = cluster_nums
+            labels[i] = cluster_nums
+            cluster_nums += 1
+    return labels, cluster_nums
+
+def process_new_cluster(
+    new_cluster: Cluster, clusters: List[Optional[Cluster]], batch_size: int, min_size: int = 3
+) -> int:
+    if new_cluster.size != 0:
+        new_cluster.batching(batch_size, min_size)
+        clusters.append(new_cluster)
+        return 1
+    return 0
\ No newline at end of file
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/matching.py b/src/detectmatelibrary/parsers/logbatcher/engine/matching.py
new file mode 100644
index 0000000..1d2a0d8
--- /dev/null
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/matching.py
@@ -0,0 +1,110 @@
+# MIT License
+#
+# Copyright (c) 2024 LogIntelligence
+#
+# Based on LogBatcher (https://github.com/LogIntelligence/LogBatcher)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import annotations
+
+import re
+from types import FrameType
+from typing import Optional, Tuple
+
+from .cluster import Cluster
+
+import signal
+
+class TimeoutException(Exception):
+    pass
+
+def timeout_handler(signum: int, frame: Optional[FrameType]) -> None:
+    raise TimeoutException()
+
+def safe_search(pattern: str, string: str, timeout: float = 0.5) -> Optional[re.Match[str]]:
+    signal.signal(signal.SIGALRM, timeout_handler)
+    signal.alarm(timeout)
+    try:
+        result = re.search(pattern, string)
+    except TimeoutException:
+        result = None
+    finally:
+        signal.alarm(0)
+    return result
+
+
+# @timeout(10)
+def extract_variables(log: str, template: str) -> Optional[Tuple[str, ...]]:
+    log = re.sub(r'\s+', ' ', log.strip()) # DS
+    pattern_parts = template.split("<*>")
+    pattern_parts_escaped = [re.escape(part) for part in pattern_parts]
+    regex_pattern = "(.*?)".join(pattern_parts_escaped)
+    regex = "^" + regex_pattern + "$"  
+    # matches = re.search(regex, log)
+    matches = safe_search(regex, log, 1)
+    if matches:
+        return matches.groups()
+    else:
+        return None
+
+def matches_template(log: str, cached_pair: Tuple[str, str]) -> Optional[str]:
+
+    reference_log = cached_pair[0]
+    template = cached_pair[1]
+
+    # length matters
+    if abs(len(log.split()) - len(reference_log.split())) > 1:
+        return None
+
+    try:
+        groups = extract_variables(log, template)
+    except:
+        groups = None
+    if groups == None:
+        return None
+
+    # consider the case where the varaible is empty
+    parts = []
+    for index, part in enumerate(template.split("<*>")):
+        parts.append(part)
+        if index < len(groups):
+            if groups[index] == '':
+                parts.append('')
+            else:
+                parts.append('<*>')
+
+    return ''.join(parts)
+
+
+
+def prune_from_cluster(template: str, cluster: Cluster) -> Tuple[Cluster, Cluster]:
+
+    new_cluster = Cluster()
+    logs, indexs = cluster.logs, cluster.indexs
+    for log, index in zip(logs, indexs):
+        if extract_variables(log, template) == None:
+            new_cluster.append_log(log, index)
+    if new_cluster.size != 0:
+        old_logs = [log for log in logs if log not in new_cluster.logs]
+        old_indexs = [index for index in indexs if index not in new_cluster.indexs]
+        cluster.logs = old_logs
+        cluster.indexs = old_indexs
+        # print(f"prune {new_cluster.size} logs from {len(logs)} logs in mathcing process")
+    return cluster, new_cluster
\ No newline at end of file
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/parser.py b/src/detectmatelibrary/parsers/logbatcher/engine/parser.py
new file mode 100644
index 0000000..4c445e9
--- /dev/null
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/parser.py
@@ -0,0 +1,128 @@
+# MIT License
+#
+# Copyright (c) 2024 LogIntelligence
+#
+# Based on LogBatcher (https://github.com/LogIntelligence/LogBatcher)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import annotations
+
+import time
+from typing import Dict, List, Tuple
+
+from openai import OpenAI
+# from together import Together
+from tenacity import retry, stop_after_attempt, wait_random_exponential
+from tools.logging import logger
+from .cluster import Cluster
+from .postprocess import post_process
+from .matching import prune_from_cluster
+from .postprocess import correct_single_template
+from .util import verify_template, count_message_tokens
+from .parsing_cache import ParsingCache
+
+class Parser:
+
+    def __init__(self, model: str, theme: str, config: Dict[str, str]) -> None:
+
+        self.model: str = model
+        self.theme: str = theme
+        self.dataset: str = 'null'
+        self.token_list: List[int] = [0, 0]
+        self.time_consumption_llm: float = 0
+        if config['api_key_from_openai'] == '<OpenAI_API_KEY>' and config['api_key_from_together'] == '<Together_API_KEY>':
+            raise ValueError("Please provide your OpenAI API key and Together API key in the config.json file.")
+        if 'gpt' in self.model:
+            self.api_key = config['api_key_from_openai']
+            self.client = OpenAI(
+                api_key=self.api_key
+            )
+        else:
+            # self.api_key = config['api_key_from_together']
+            # self.client = Together(
+            #     api_key=self.api_key
+            # )
+            raise ValueError("Only OpenAI API is supported for now.")
+
+    @retry(wait=wait_random_exponential(min=1, max=8), stop=stop_after_attempt(10))
+    def chat(self, messages: List[Dict[str, str]]) -> str:
+        response = self.client.chat.completions.create(
+            model=self.model,
+            messages=messages,
+            temperature=0.05,
+        )
+        return response.choices[0].message.content.strip('\n')
+
+    def get_responce(self, cluster: Cluster, cache_base: ParsingCache) -> Tuple[str, Cluster, Cluster]:
+
+        # initialize
+        logs = cluster.batch_logs
+        sample_log = cluster.sample_log
+        
+        # Matching and Pruning
+        new_cluster = Cluster()
+        for log in cluster.logs:
+            template, _, _ = cache_base.match_event(log)
+            if template != "NoMatch":
+                cluster, new_cluster = prune_from_cluster(
+                    template, cluster)
+                if new_cluster.size >= 0 and new_cluster.size < cluster.size:
+                    return template, cluster, new_cluster
+                elif new_cluster.size == cluster.size:
+                    cluster.logs, cluster.indexs = new_cluster.logs, new_cluster.indexs
+                    new_cluster = Cluster()
+
+        # historical variables
+        variable_cluster = Cluster()
+        variable_cluster.logs = cache_base.variable_candidates
+        if variable_cluster.logs != []:
+            variable_cluster.varaible_sampling(5)
+        variables = variable_cluster.batch_logs
+
+        variable_prompt = f' Historical variables: {variables}.' if variables != [] else ''
+        instruction = "You will be provided with some log messages separated by line break. You must abstract variables with `{{placeholders}}` to extract the corresponding template. The variable type in log messages can be any of the following: ['url', 'IPv4_port', 'host_port', 'package_host', 'IPv6', 'Mac_address', 'time', 'path', 'id', 'date', 'duration', 'size', 'numerical', 'weekday_months', 'user_name']." + variable_prompt + " Constant text and strings should not be recognized as variables.\nPrint the input log's template delimited by backticks."
+
+        # invoke LLM
+        messages = [
+            {"role": "system", "content": instruction},
+            {"role": "user", "content": '\n'.join(f'Log[{i+1}]: `{log}`' for i, log in enumerate(logs))}
+        ]
+        try:
+            t0 = time.time()
+            answer = self.chat(messages)
+            # print(messages)
+            # print(answer)
+            self.token_list[0] += 1
+            self.token_list[1] += count_message_tokens(messages, self.model)
+            self.time_consumption_llm += (time.time() - t0)
+        except Exception as e:
+            logger.error(f"invoke LLM error: {e}")
+            answer = sample_log
+        
+        template = post_process(answer)
+        if not verify_template(template):
+            template = correct_single_template(sample_log)
+        
+        cluster, new_cluster = prune_from_cluster(template, cluster)
+        if new_cluster.size == cluster.size:
+            cluster.logs, cluster.indexs = new_cluster.logs, new_cluster.indexs
+            new_cluster = Cluster()
+            template = correct_single_template(sample_log)
+        return template, cluster, new_cluster
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/parsing_base.py b/src/detectmatelibrary/parsers/logbatcher/engine/parsing_base.py
new file mode 100644
index 0000000..25035a1
--- /dev/null
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/parsing_base.py
@@ -0,0 +1,220 @@
+# MIT License
+#
+# Copyright (c) 2024 LogIntelligence
+#
+# Based on LogBatcher (https://github.com/LogIntelligence/LogBatcher)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+#
+# Changes from original (parsing_base_old.py):
+# - Returns a result dict (logs_df, templates_df, cache, metrics, template_samples)
+#   instead of writing CSV/JSON files directly to disk.
+# - Replaced print() calls with structured logger (tools.logging.logger).
+# - Made `cache` an optional parameter to support reuse across calls.
+# - Added _extract_template_samples() helper to extract template→sample-log mappings.
+# - Default chunk_size raised from 10 000 to 30 000.
+
+import time
+import pandas as pd
+from collections import Counter
+from typing import List, Dict, Any, Optional
+from tqdm import tqdm
+from tools.logging import logger
+from .vars import vars_update
+from .cluster import Cluster,tokenize, vectorize, cluster, reassign_clusters, process_new_cluster
+from .additional_cluster import hierichical_clustering,meanshift_clustering
+from .util import verify_template
+from .parsing_cache import ParsingCache
+
+def _extract_template_samples(cache: ParsingCache) -> Dict[str, str]:
+    """Extract template to sample log mapping from cache.
+    
+    Args:
+        cache: ParsingCache instance containing template_tree
+        
+    Returns:
+        Dictionary mapping template strings to their sample logs
+    """
+    template_samples = {}
+    
+    def traverse_tree(node):
+        """Recursively traverse template tree to find all templates."""
+        for key, value in node.items():
+            if isinstance(value, tuple):
+                # Tuple structure: (stat_len, wildcard_count, template, template_id, refer_log)
+                template = value[2]  # event_template
+                refer_log = value[4]  # sample log
+                template_samples[template] = refer_log
+            elif isinstance(value, dict):
+                traverse_tree(value)
+    
+    traverse_tree(cache.template_tree)
+    return template_samples
+
+def single_dataset_parsing(
+    dataset: str,
+    contents: List[str],
+    parser: Any,
+    cache: Optional[ParsingCache] = None,
+    batch_size: int = 10,  # number of logs that can be sent to LLM at once
+    chunk_size: int = 30000,
+    clustering_method: str = 'dbscan',
+    debug: bool = True
+) -> Dict[str, Any]:
+    """Parse logs using clustering and LLM-based template extraction.
+    
+    Args:
+        dataset: Name of the dataset being parsed
+        contents: List of log messages to parse
+        parser: Parser object with get_responce method
+        cache: Optional ParsingCache instance for template caching
+        batch_size: Size of batches for processing clusters
+        chunk_size: Number of logs to process in each chunk
+        clustering_method: Method for clustering ('dbscan', 'hierarchical', or 'meanshift')
+        debug: Enable debug logging
+        
+    Returns:
+        Dictionary containing:
+            - logs_df: DataFrame with Content and EventTemplate columns
+            - templates_df: DataFrame with EventId, EventTemplate, and Occurrence columns
+            - cache: Updated ParsingCache instance
+            - metrics: Dictionary with parsing statistics
+            - template_samples: Dictionary mapping templates to sample logs
+    """
+    if cache is None:
+        cache = ParsingCache()
+    
+    logs = contents
+    log_chunk: List[str] = []
+    log_chunk_index: List[int] = []
+    
+    logger.info(f'Parsing {len(logs)} logs in dataset {dataset}...')
+
+    outputs: List[Optional[str]] = [None for _ in range(len(logs))]
+    outputs_index: List[Optional[int]] = [None for _ in range(len(logs))]
+    
+    # Parsing
+    t1 = time.time()
+    iterable = tqdm(enumerate(logs), total=len(logs), unit="log")
+    for index, log in iterable:
+
+        match_results = cache.match_event(log)
+        if match_results[0] != "NoMatch":
+            # outputs[index] = match_results[0]
+            outputs_index[index] = match_results[1]
+        else:
+            log_chunk.append(log)
+            log_chunk_index.append(index)
+        
+
+        # Parsing with LLM
+        if len(log_chunk) == chunk_size or (len(log_chunk)!=0 and index == len(logs) - 1):
+            # parsing start
+            if debug:
+                logger.debug(f'Parsing {len(log_chunk)} logs...')
+            if clustering_method == 'dbscan':
+                # tokenize -> vectorize -> cluster -> reassign_clusters
+                tokenized_logs = [tokenize(log) for log in log_chunk]
+                labels, cluster_nums = cluster(vectorize(tokenized_logs))
+                labels, cluster_nums = reassign_clusters(labels, cluster_nums, tokenized_logs)
+            elif clustering_method == 'hierarchical':
+                labels, cluster_nums = hierichical_clustering(log_chunk)
+            elif clustering_method == 'meanshift':
+                labels, cluster_nums = meanshift_clustering(log_chunk)
+            else:
+                raise ValueError('Invalid clustering method')
+
+            # create clusters
+            clusters: List[Optional[Cluster]] = [None for _ in range(cluster_nums)]
+            for i, label in enumerate(labels):
+                if clusters[label] is None:
+                    clusters[label] = Cluster()
+                clusters[label].append_log(log_chunk[i], log_chunk_index[i])
+
+            # sorting
+            clusters = sorted(clusters, key=lambda cluster: len(cluster.logs), reverse=True)
+
+            # batching
+            [cluster.batching(batch_size) for cluster in clusters]
+
+            # parsing
+            # print(len(clusters), 'clusters identified') if debug else None  
+            for index, old_cluster in enumerate(clusters):
+                template, old_cluster, new_cluster = parser.get_responce(old_cluster, cache_base = cache)
+                # update clusters
+                cluster_nums += process_new_cluster(new_cluster, clusters, batch_size)
+                refer_log = old_cluster.logs[0]
+                if template not in cache.template_list:
+                    if verify_template(template):
+                        if debug:
+                            logger.debug('=' * 20)
+                            logger.debug(f'New cluster processed, {len(set(cache.template_list))} templates identified till now:')
+                            logger.debug(f'Refer Log: {refer_log}')
+                            logger.debug(f'Output Template: {template}')
+                        id, _, _ = cache.add_templates(event_template=template, insert=False, refer_log = refer_log)
+                        cache.variable_candidates.extend(vars_update(refer_log, template, cache.variable_candidates))
+                    else:
+                        id, _, _ = cache.add_templates(event_template=refer_log, insert=False, refer_log = refer_log)
+                else:
+                    id = cache.template_list.index(template)
+                for index in old_cluster.indexs:
+                    outputs_index[index] = id
+            log_chunk = []
+            log_chunk_index = []
+    
+    outputs = [cache.template_list[i] for i in outputs_index]
+    t2 = time.time()
+    parsing_time = t2 - t1
+    template_count = len(set(outputs))
+    
+    logger.info(f'Parsing complete: {parsing_time:.3f}s, {template_count} unique templates identified')
+
+    # Create structured logs DataFrame
+    logs_df = pd.DataFrame({'Content': logs, 'EventTemplate': outputs})
+
+    # Create templates DataFrame
+    counter = Counter(outputs)
+    items = list(counter.items())
+    items.sort(key=lambda x: x[1], reverse=True)
+    templates_df = pd.DataFrame(items, columns=['EventTemplate', 'Occurrence'])
+    templates_df['EventId'] = [f"E{i + 1}" for i in range(len(templates_df))]
+    templates_df = templates_df[['EventId', 'EventTemplate', 'Occurrence']]
+
+    # Extract template-to-sample-log mapping
+    template_samples = _extract_template_samples(cache)
+
+    # Collect metrics
+    metrics = {
+        'dataset': dataset,
+        'parsing_time': round(parsing_time, 3),
+        'llm_invocation_time': round(parser.time_consumption_llm, 3),
+        'cache_hit_num': cache.hit_num,
+        'hash_table_size': len(cache.hashing_cache),
+        'token_stats': parser.token_list,
+        'template_count': template_count,
+        'log_count': len(logs)
+    }
+    
+    return {
+        'logs_df': logs_df,
+        'templates_df': templates_df,
+        'cache': cache,
+        'metrics': metrics,
+        'template_samples': template_samples,
+    }
\ No newline at end of file
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/parsing_cache.py b/src/detectmatelibrary/parsers/logbatcher/engine/parsing_cache.py
new file mode 100644
index 0000000..d642c22
--- /dev/null
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/parsing_cache.py
@@ -0,0 +1,416 @@
+# MIT License
+#
+# Copyright (c) 2024 LogIntelligence
+#
+# Based on LogBatcher (https://github.com/LogIntelligence/LogBatcher)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import annotations
+
+from hashlib import sha256
+import re
+import sys
+from types import FrameType
+from typing import Any, Dict, List, Match, Optional, Tuple
+
+sys.setrecursionlimit(1000000)
+
+import re
+import signal
+
+class TimeoutException(Exception):
+    pass
+
+def timeout_handler(signum: int, frame: Optional[FrameType]) -> None:
+    raise TimeoutException()
+
+def safe_search(pattern: str, string: str, timeout: int = 1) -> Optional[Match[str]]:
+    signal.signal(signal.SIGALRM, timeout_handler)
+    signal.alarm(timeout)
+    try:
+        result = re.search(pattern, string)
+    except TimeoutException:
+        result = None
+    finally:
+        signal.alarm(0)
+    return result
+
+# _PATTERN = re.compile(r'(?:<\*>|\b\d+\b|[\s\/,:._-]+)')
+# def old_standardize(log: str) -> str:
+#     return _PATTERN.sub('', log)
+
+# TODO: logb2 v3.1
+_PATTERN1 = re.compile(r'/([^/]*)(?=/)')  # path
+_PATTERN2 = re.compile(r'\d')               # digit
+_PATTERN3 = re.compile(r'[\/:,._-]+')        # : , . _ -
+_PATTERN4 = re.compile(r'\s')           # space
+
+def standardize(input_string: str) -> str:
+    result = _PATTERN1.sub('', input_string)
+    result = _PATTERN2.sub('', result)
+    result = _PATTERN3.sub('', result)
+    result = _PATTERN4.sub('', result)
+    return result
+
+def print_tree(move_tree: Dict[str, Any], indent: str = ' ') -> None:
+    for key, value in move_tree.items():
+        if isinstance(value, dict):
+            print(f'{indent}|- {key}')
+            print_tree(value, indent + '|  ')
+        elif isinstance(value, tuple):
+            print(f'{indent}|- {key}: tuple')
+        else:
+            print(f'{indent}|- {key}: {value}')
+
+
+def lcs_similarity(X: List[str], Y: List[str]) -> float:
+    m, n = len(X), len(Y)
+    c = [[0] * (n + 1) for _ in range(m + 1)]
+    for i in range(1, m + 1):
+        for j in range(1, n + 1):
+            if X[i - 1] == Y[j - 1]:
+                c[i][j] = c[i - 1][j - 1] + 1
+            else:
+                c[i][j] = max(c[i][j - 1], c[i - 1][j])
+    return 2 * c[m][n] / (m + n)
+
+
+class ParsingCache:
+    def __init__(self) -> None:
+        self.template_tree: Dict[str, Any] = {}
+        self.template_list: List[str] = []
+        self.hashing_cache: Dict[str, Tuple[str, str, int]] = {}
+        self.variable_candidates: List[str] = []
+        self.hit_num: int = 0
+
+    def add_templates(
+        self,
+        event_template: str,
+        insert: bool = True,
+        relevant_templates: Optional[List[str]] = None,
+        refer_log: str = '',
+    ) -> Tuple[int, Optional[str], Optional[bool]]:
+
+            # if "<*>" not in event_template:
+            #     self.template_tree["$CONSTANT_TEMPLATE$"][event_template] = event_template
+            #     continue
+            # original_template = event_template
+            # event_template = self._preprocess_template(event_template)
+            #print("event template after preprocess: ", event_template)
+        if relevant_templates is None:
+            relevant_templates = []
+        template_tokens = message_split(event_template)
+        if not template_tokens or event_template == "<*>":
+            return -1, None, None
+        if insert or len(relevant_templates) == 0:
+            id = self.insert(event_template, template_tokens, len(self.template_list), refer_log)
+            self.template_list.append(event_template)
+            return id,None,None
+        # print("relevant templates: ", relevant_templates)
+        max_similarity = 0
+        similar_template = None
+        for rt in relevant_templates:
+            splited_template1, splited_template2 = rt.split(), event_template.split()
+            if len(splited_template1) != len(splited_template2):
+                continue 
+            similarity = lcs_similarity(splited_template1, splited_template2)
+            if similarity > max_similarity:
+                max_similarity = similarity
+                similar_template = rt
+        if max_similarity > 0.8:
+            success, id = self.modify(similar_template, event_template, refer_log)
+            if not success:
+                id = self.insert(event_template, template_tokens, len(self.template_list), refer_log)
+                self.template_list.append(event_template)
+            return id, similar_template, success
+        else:
+            id = self.insert(event_template, template_tokens, len(self.template_list), refer_log)
+            self.template_list.append(event_template)
+            return id,None,None
+            #print("template tokens: ", template_tokens)
+            
+    def insert(self, event_template: str, template_tokens: List[str], template_id: int, refer_log: str = '') -> int:
+
+        standardized = standardize(event_template)
+        hash_key = sha256(standardized.encode()).hexdigest()
+        self.hashing_cache[hash_key] = (standardized, event_template, template_id)
+
+        start_token = template_tokens[0]
+        if start_token not in self.template_tree:
+            self.template_tree[start_token] = {}
+        move_tree = self.template_tree[start_token]
+
+        tidx = 1
+        while tidx < len(template_tokens):
+            token = template_tokens[tidx]
+            if token not in move_tree:
+                move_tree[token] = {}
+            move_tree = move_tree[token]
+            tidx += 1
+
+        move_tree["".join(template_tokens)] = (
+            sum(1 for s in template_tokens if s != "<*>"),
+            template_tokens.count("<*>"),
+            event_template,
+            template_id,
+            refer_log
+        )  # statistic length, count of <*>, original_log, template_id
+        return template_id
+
+    def modify(self, similar_template: str, event_template: str, refer_log: str) -> Tuple[bool, int]:
+        merged_template = []
+        similar_tokens = similar_template.split()
+        event_tokens = event_template.split()
+        i = 0
+        for token in similar_tokens:
+            if token == event_tokens[i]:
+                merged_template.append(token)
+            else:
+                merged_template.append("<*>")
+            i += 1
+        merged_template = " ".join(merged_template)
+        success, old_ids = self.delete(similar_template)
+        if not success:
+            return False, -1
+        self.insert(merged_template, message_split(merged_template), old_ids, refer_log)
+        self.template_list[old_ids] = merged_template
+        return True, old_ids
+        
+    
+    def delete(self, event_template: str) -> Tuple[bool, int | List[Any]]:
+        template_tokens = message_split(event_template)
+        start_token = template_tokens[0]
+        if start_token not in self.template_tree:
+            return False, []
+        move_tree = self.template_tree[start_token]
+
+        tidx = 1
+        while tidx < len(template_tokens):
+            token = template_tokens[tidx]
+            if token not in move_tree:
+                return False, []
+            move_tree = move_tree[token]
+            tidx += 1
+        old_id = move_tree["".join(template_tokens)][3]
+        del move_tree["".join(template_tokens)]
+        return True, old_id
+
+
+    def match_event(self, log: str) -> Tuple[str, Any, List[str]]:
+        standardized = standardize(log)
+        hash_key = sha256(standardized.encode()).hexdigest()
+        if hash_key in self.hashing_cache:
+            cached_str, template, id = self.hashing_cache[hash_key]
+            if cached_str == standardized:
+                self.hit_num += 1
+                return template, id, []
+        results = tree_match(self.template_tree, self.template_list, log)
+        if results[0] != "NoMatch":
+            standardized = standardize(log)
+            hash_key = sha256(standardized.encode()).hexdigest()
+            self.hashing_cache[hash_key] = (standardized, results[0], results[1])
+        return results
+
+
+    def _preprocess_template(self, template: str) -> str:
+        return template
+
+
+def post_process_tokens(tokens: List[str], punc: str) -> List[str]:
+    excluded_str = ['=', '|', '(', ')', ";"]
+    for i in range(len(tokens)):
+        if tokens[i].find("<*>") != -1:
+            tokens[i] = "<*>"
+        else:
+            new_str = ""
+            for s in tokens[i]:
+                if (s not in punc and s != ' ') or s in excluded_str:
+                    new_str += s
+            tokens[i] = new_str
+    return tokens
+
+
+def message_split(message: str) -> List[str]:
+    punc = "!\"#$%&'()+,-/;:=?@.[\]^_`{|}~"
+    splitters = "\s\\" + "\\".join(punc)
+    splitter_regex = re.compile("([{}])".format(splitters))
+    tokens = re.split(splitter_regex, message)
+
+    tokens = list(filter(lambda x: x != "", tokens))
+    
+    #print("tokens: ", tokens)
+    tokens = post_process_tokens(tokens, punc)
+
+    tokens = [
+        token.strip()
+        for token in tokens
+        if token != "" and token != ' ' 
+    ]
+    tokens = [
+        token
+        for idx, token in enumerate(tokens)
+        if not (token == "<*>" and idx > 0 and tokens[idx - 1] == "<*>")
+    ]
+    return tokens
+
+
+
+def tree_match(match_tree: Dict[str, Any], template_list: List[str], log_content: str) -> Tuple[str, Any, List[str]]:
+    log_tokens = message_split(log_content)
+    template, template_id, refer_log, relevant_templates = match_template(match_tree, log_tokens)
+    # length matters
+    if template:
+        if abs(len(log_content.split()) - len(refer_log.split())) <= 1:
+            return (template, template_id, relevant_templates)
+    elif len(relevant_templates) > 0:
+        if match_log(log_content, relevant_templates[0]):
+            return (relevant_templates[0], template_list.index(relevant_templates[0]), relevant_templates)
+    return ("NoMatch", "NoMatch", relevant_templates)
+
+def match_log(log: str, template: str) -> bool:
+    pattern_parts = template.split("<*>")
+    pattern_parts_escaped = [re.escape(part) for part in pattern_parts]
+    regex_pattern = "(.*?)".join(pattern_parts_escaped)
+    regex = "^" + regex_pattern + "$"  
+    matches = safe_search(regex, log)
+
+    if matches == None:
+        return False
+    else:
+        return True #all(len(var.split()) == 1 for var in matches.groups())
+
+def match_template(
+    match_tree: Dict[str, Any], log_tokens: List[str]
+) -> Tuple[Any, Any, str, List[str]]:
+    results = []
+    find_results = find_template(match_tree, log_tokens, results, [], 1)
+    relevant_templates = find_results[1]
+    if len(results) > 1:
+        new_results = []
+        for result in results:
+            if result[0] is not None and result[1] is not None and result[2] is not None:
+                new_results.append(result)
+    else:
+        new_results = results
+    if len(new_results) > 0:
+        if len(new_results) > 1:
+            new_results.sort(key=lambda x: (-x[1][0], x[1][1]))
+        return new_results[0][1][2], new_results[0][1][3], new_results[0][1][4], relevant_templates
+    return False, False, '', relevant_templates
+
+
+def get_all_templates(move_tree: Dict[str, Any]) -> List[str]:
+    result = []
+    for key, value in move_tree.items():
+        if isinstance(value, tuple):
+            result.append(value[2])
+        else:
+            result = result + get_all_templates(value)
+    return result
+
+
+def find_template(
+    move_tree: Dict[str, Any],
+    log_tokens: List[str],
+    result: List[Tuple[Any, ...]],
+    parameter_list: List[str],
+    depth: int,
+) -> Tuple[bool, List[str]]:
+    flag = 0 # no futher find
+    if len(log_tokens) == 0:
+        for key, value in move_tree.items():
+            if isinstance(value, tuple):
+                result.append((key, value, tuple(parameter_list)))
+                flag = 2 # match
+        if "<*>" in move_tree:
+            parameter_list.append("")
+            move_tree = move_tree["<*>"]
+            if isinstance(move_tree, tuple):
+                result.append(("<*>", None, None))
+                flag = 2 # match
+            else:
+                for key, value in move_tree.items():
+                    if isinstance(value, tuple):
+                        result.append((key, value, tuple(parameter_list)))
+                        flag = 2 # match
+        # return (True, [])
+    else:
+        token = log_tokens[0]
+
+        relevant_templates = []
+        if token in move_tree:
+            find_result = find_template(move_tree[token], log_tokens[1:], result, parameter_list,depth+1)
+            if find_result[0]:
+                flag = 2 # match
+            elif flag != 2:
+                flag = 1 # futher find but no match
+                relevant_templates = relevant_templates + find_result[1]
+        if "<*>" in move_tree:
+            if isinstance(move_tree["<*>"], dict):
+                next_keys = move_tree["<*>"].keys()
+                next_continue_keys = []
+                for nk in next_keys:
+                    nv = move_tree["<*>"][nk]
+                    if not isinstance(nv, tuple):
+                        next_continue_keys.append(nk)
+                idx = 0
+                # print("len : ", len(log_tokens))
+                while idx < len(log_tokens):
+                    token = log_tokens[idx]
+                    # print("try", token)
+                    if token in next_continue_keys:
+                        # print("add", "".join(log_tokens[0:idx]))
+                        parameter_list.append("".join(log_tokens[0:idx]))
+                        # print("End at", idx, parameter_list)
+                        find_result = find_template(
+                            move_tree["<*>"], log_tokens[idx:], result, parameter_list,depth+1
+                        )
+                        if find_result[0]:
+                            flag = 2 # match
+                        elif flag != 2:
+                            flag = 1 # futher find but no match
+                            relevant_templates = relevant_templates + find_result[1]
+                        if parameter_list:
+                            parameter_list.pop()
+                        next_continue_keys.remove(token)
+                    idx += 1
+                if idx == len(log_tokens):
+                    parameter_list.append("".join(log_tokens[0:idx]))
+                    find_result = find_template(
+                        move_tree["<*>"], log_tokens[idx + 1 :], result, parameter_list,depth+1
+                    )
+                    if find_result[0]:
+                        flag = 2 # match
+                    else:
+                        if flag != 2:
+                            flag = 1
+                        # relevant_templates = relevant_templates + find_result[1]
+                    if parameter_list:
+                        parameter_list.pop()
+    if flag == 2:
+        return (True, [])
+    if flag == 1:
+        return (False, relevant_templates)
+    if flag == 0:
+        # print(log_tokens, flag)
+        if depth >= 2:
+            return (False, get_all_templates(move_tree))
+        else:
+            return (False, [])
\ No newline at end of file
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/postprocess.py b/src/detectmatelibrary/parsers/logbatcher/engine/postprocess.py
new file mode 100644
index 0000000..d3868fd
--- /dev/null
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/postprocess.py
@@ -0,0 +1,195 @@
+# MIT License
+#
+# Copyright (c) 2024 LogIntelligence
+#
+# Based on LogBatcher (https://github.com/LogIntelligence/LogBatcher)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import annotations
+
+import re
+from typing import Optional, Set
+
+def post_process(response: str) -> str:
+
+    response = response.replace('\n', '')
+    first_backtick_index = response.find('`')
+    last_backtick_index = response.rfind('`')
+    if first_backtick_index == -1 or last_backtick_index == -1 or first_backtick_index == last_backtick_index:
+        tmps = []
+    else:
+        tmps = response[first_backtick_index: last_backtick_index + 1].split('`')
+    for tmp in tmps:
+        if tmp.replace(' ','').replace('<*>','') == '':
+            tmps.remove(tmp)
+    tmp = ''
+    if len(tmps) == 1:
+        tmp = tmps[0]
+    if len(tmps) > 1:
+        tmp = max(tmps, key=len)
+
+    template = re.sub(r'\{\{.*?\}\}', '<*>', tmp)
+    template = re.sub(r'\$\{.*?\}', '<*>', template)
+    template = correct_single_template(template)
+    if template.replace('<*>', '').replace(' ','') == '':
+        template = ''
+
+    return template
+
+def exclude_digits(string: str) -> bool:
+    '''
+    exclude the digits-domain words from partial constant
+    '''
+    pattern = r'\d'
+    digits = re.findall(pattern, string)
+    if len(digits) == 0 or string[0].isalpha() or any(c.isupper() for c in string):
+        return False
+    elif len(digits) >= 4:
+        return True
+    else:
+        return len(digits) / len(string) > 0.3
+
+def correct_single_template(template: str, user_strings: Optional[Set[str]] = None) -> str:
+    """Apply all rules to process a template.
+
+    DS (Double Space)
+    BL (Boolean)
+    US (User String)
+    DG (Digit)
+    PS (Path-like String)
+    WV (Word concatenated with Variable)
+    DV (Dot-separated Variables)
+    CV (Consecutive Variables)
+
+    """
+
+    boolean = {'true', 'false'}
+    default_strings = {'null', 'root'} # 'null', 'root', 'admin'
+    path_delimiters = {  # reduced set of delimiters for tokenizing for checking the path-like strings
+        r'\s', r'\,', r'\!', r'\;', r'\:',
+        r'\=', r'\|', r'\"', r'\'', r'\+',
+        r'\[', r'\]', r'\(', r'\)', r'\{', r'\}'
+    }
+    token_delimiters = path_delimiters.union({  # all delimiters for tokenizing the remaining rules
+        r'\.', r'\-', r'\@', r'\#', r'\$', r'\%', r'\&', r'\/'
+    })
+
+    if user_strings:
+        default_strings = default_strings.union(user_strings)
+    # default_strings = {}
+
+    # apply DS
+    # Note: this is not necessary while postprorcessing
+    template = template.strip()
+    template = re.sub(r'\s+', ' ', template)
+
+    # apply PS
+    p_tokens = re.split('(' + '|'.join(path_delimiters) + ')', template)
+    new_p_tokens = []
+    for p_token in p_tokens:
+        # print(p_token)
+        # if re.match(r'^(\/[^\/]+)+$', p_token) or re.match(r'^([a-zA-Z0-9-]+\.){2,}[a-zA-Z]+$', p_token):
+        if re.match(r'^(\/[^\/]+)+\/?$', p_token) or re.match(r'.*/.*\..*', p_token) or re.match(r'^([a-zA-Z0-9-]+\.){3,}[a-z]+$', p_token):
+        # or re.match(r'^([a-z0-9-]+\.){2,}[a-z]+$', p_token)
+            p_token = '<*>'
+        
+        new_p_tokens.append(p_token)
+    template = ''.join(new_p_tokens)
+    # tokenize for the remaining rules
+    tokens = re.split('(' + '|'.join(token_delimiters) + ')', template)  # tokenizing while keeping delimiters
+    new_tokens = []
+    for token in tokens:
+        # apply BL, US
+        for to_replace in boolean.union(default_strings):
+            # if token.lower() == to_replace.lower():
+            if token == to_replace:
+                token = '<*>'
+
+        # apply DG
+        # Note: hexadecimal num also appears a lot in the logs
+        # if re.match(r'^\d+$', token) or re.match(r'\b0[xX][0-9a-fA-F]+\b', token):
+        #     token = '<*>'
+        if exclude_digits(token):
+            token = '<*>'
+
+        # apply WV
+        if re.match(r'^[^\s\/]*<\*>[^\s\/]*$', token) or re.match(r'^<\*>.*<\*>$', token):
+            token = '<*>'
+        # collect the result
+        new_tokens.append(token)
+
+    # make the template using new_tokens
+    template = ''.join(new_tokens)
+
+    # Substitute consecutive variables only if separated with any delimiter including "." (DV)
+    while True:
+        prev = template
+        template = re.sub(r'<\*>\.<\*>', '<*>', template)
+        if prev == template:
+            break
+
+    # Substitute consecutive variables only if not separated with any delimiter including space (CV)
+    # NOTE: this should be done at the end
+    while True:
+        prev = template
+        template = re.sub(r'<\*><\*>', '<*>', template)
+        if prev == template:
+            break
+
+    while "#<*>#" in template:
+        template = template.replace("#<*>#", "<*>")
+
+    while "<*>:<*>" in template:
+        template = template.replace("<*>:<*>", "<*>")
+
+    while "<*>/<*>" in template:
+        template = template.replace("<*>/<*>", "<*>")
+
+    while " #<*> " in template:
+        template = template.replace(" #<*> ", " <*> ")
+
+    while "<*>:<*>" in template:
+        template = template.replace("<*>:<*>", "<*>")
+
+    while "<*>#<*>" in template:
+        template = template.replace("<*>#<*>", "<*>")
+
+    while "<*>/<*>" in template:
+        template = template.replace("<*>/<*>", "<*>")
+
+    while "<*>@<*>" in template:
+        template = template.replace("<*>@<*>", "<*>")
+
+    while "<*>.<*>" in template:
+        template = template.replace("<*>.<*>", "<*>")
+
+    while ' "<*>" ' in template:
+        template = template.replace(' "<*>" ', ' <*> ')
+
+    while " '<*>' " in template:
+        template = template.replace(" '<*>' ", " <*> ")
+
+    while "<*><*>" in template:
+        template = template.replace("<*><*>", "<*>")
+
+    template = re.sub(r'<\*> [KGTM]?B\b', '<*>', template)
+
+    return template
+
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/sample.py b/src/detectmatelibrary/parsers/logbatcher/engine/sample.py
new file mode 100644
index 0000000..d0444de
--- /dev/null
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/sample.py
@@ -0,0 +1,140 @@
+# MIT License
+#
+# Copyright (c) 2024 LogIntelligence
+#
+# Based on LogBatcher (https://github.com/LogIntelligence/LogBatcher)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import annotations
+
+from typing import Any, List, Tuple
+
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+import random
+from sklearn.cluster import KMeans
+import numpy as np
+
+
+def dpp_sample(S: np.ndarray, k: int) -> List[int]:
+    # S: similarity matrix
+    # k: number of items to sample
+    n = S.shape[0]
+
+    # Initialize empty set Y
+    Y = set()
+    for _ in range(k):
+        best_i = -1
+        best_p = -1
+
+        for i in range(n):
+            if i not in Y:
+                # Compute determinant of submatrix
+                det_Yi = np.linalg.det(S[np.ix_(list(Y) + [i], list(Y) + [i])])
+
+                # Compute probability of adding i to Y
+                p_add = det_Yi / (1 + det_Yi)
+
+                if p_add > best_p:
+                    best_p = p_add
+                    best_i = i
+
+        # Add best item to Y
+        Y.add(best_i)
+
+    return list(Y)
+
+
+def sample_from_clusters(clusters: List[Any], shot: int = 32) -> List[Tuple[str, str]]:
+    clusters = sorted(clusters, key=lambda cluster: len(cluster.indexs), reverse=True)
+    # form a random list
+    random.seed(0)
+    random_int_list = [random.randint(0, 1000) for _ in range(10)]
+
+    sample_clusters = []
+    sample_pairs = []
+    for cluster in clusters:
+        if len(sample_clusters) >= shot:
+            break
+        if cluster.oracle_template not in [pair[1] for pair in sample_clusters]:
+            sample_clusters.append((cluster, cluster.oracle_template))
+
+    for random_int in random_int_list:
+        if len(sample_pairs) >= shot:
+            break
+        for item in sample_clusters:
+            length = len(item[0].logs)
+            if len(sample_pairs) >= shot:
+                break
+            else:
+                sample_pairs.append((item[0].logs[random_int%length], item[1]))
+    return sample_pairs
+
+
+def nearest_k_pairs_from_log(
+    log: str, sample_pairs: List[Tuple[str, str]], k: int
+) -> List[Tuple[str, str]]:
+    vectorizer = TfidfVectorizer()
+    tfidf_matrix = vectorizer.fit_transform([log] + [pair[0] for pair in sample_pairs])
+    similarity_matrix = cosine_similarity(tfidf_matrix)
+    similarity = similarity_matrix[0][1:]
+    nearest_k_indices = similarity.argsort()[-k:][::-1]
+    nearest_k_pairs = [sample_pairs[i] for i in nearest_k_indices]
+    return nearest_k_pairs
+
+
+
+def group_samples_clustering(embed_matrix: np.ndarray, num_in_batch: int) -> List[List[int]]:
+    def _calculate_cos_similarities(v1: np.ndarray, v2: np.ndarray) -> np.ndarray:
+        num = np.dot(v1, v2.T)
+        denom = np.linalg.norm(v1, axis=1).reshape(-1, 1) * \
+            np.linalg.norm(v2, axis=1)
+        similarity_matrix = num / denom
+        similarity_matrix[np.isneginf(similarity_matrix)] = 0
+        similarity_matrix = 0.5 + 0.5 * similarity_matrix
+        return similarity_matrix
+
+    if embed_matrix.shape[0] % num_in_batch:
+        n_clusters = embed_matrix.shape[0] // num_in_batch + 1
+    else:
+        n_clusters = embed_matrix.shape[0] // num_in_batch
+
+    # K-means clustering
+    kmeans = KMeans(n_clusters=n_clusters, random_state=0,
+                    n_init="auto").fit(embed_matrix)
+    similarity_matrix = _calculate_cos_similarities(
+        embed_matrix, kmeans.cluster_centers_)  # [n_samples, n_clusters]
+    similarity_rankings = np.argsort(-similarity_matrix, axis=1)
+    groups = [[] for _ in range(n_clusters)]
+    for sample_idx, label in enumerate(kmeans.labels_):
+        groups[label].append(sample_idx)
+    # Reassign to equalize the number of samples in each cluster
+    for group_idx, group in enumerate(groups):
+        if len(group) > num_in_batch:
+            groups[group_idx] = sorted(
+                group, key=lambda x: similarity_matrix[x, group_idx], reverse=True)
+            samples_to_reassign = groups[group_idx][num_in_batch:]
+            groups[group_idx] = groups[group_idx][:num_in_batch]
+            for sample_idx in samples_to_reassign:
+                for candi_group_idx in similarity_rankings[sample_idx]:
+                    if len(groups[candi_group_idx]) < num_in_batch:
+                        groups[candi_group_idx].append(sample_idx)
+                        break
+    return groups
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/util.py b/src/detectmatelibrary/parsers/logbatcher/engine/util.py
new file mode 100644
index 0000000..8feb8a5
--- /dev/null
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/util.py
@@ -0,0 +1,169 @@
+# MIT License
+#
+# Copyright (c) 2024 LogIntelligence
+#
+# Based on LogBatcher (https://github.com/LogIntelligence/LogBatcher)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import annotations
+
+import re
+import string
+from typing import Dict, List, Tuple
+
+import pandas as pd
+import tiktoken
+
+def data_loader(file_name: str, dataset_format: str, file_format: str) -> List[str]:
+    if file_format == 'structured':
+        df = pd.read_csv(file_name)
+        contents = df['Content'].tolist()
+    elif file_format == 'raw':
+        with open(file_name, 'r') as f:
+            log_raws = f.readlines()
+        print(f"Total log lines: {len(log_raws)}")
+        headers, regex = generate_logformat_regex(dataset_format)
+        contents = log_to_dataframe(file_name, regex, headers, len(log_raws))
+    return contents
+
+
+def count_prompt_tokens(prompt: str, model_name: str) -> int:
+    """
+    Count the number of tokens in the prompt
+    Models supported: gpt-4o-mini, gpt-3.5-turbo
+    """
+    if model_name == 'gpt-4o-mini':
+        encoder = tiktoken.encoding_for_model('gpt-4o-mini')
+    elif model_name == 'gpt-3.5-turbo':
+        encoder = tiktoken.encoding_for_model('gpt-3.5-turbo')
+    else:
+        raise ValueError("Unsupported model: {}".format(model_name))
+
+    # 计算编码后的token数
+    prompt_tokens = encoder.encode(prompt)
+    return len(prompt_tokens)
+
+
+def count_message_tokens(messages: List[Dict[str, str]], model_name: str = "gpt-3.5-turbo") -> int:
+    """
+    Count the number of tokens in the messages
+    Models supported: gpt-4o-mini, gpt-3.5-turbo
+    """
+    if model_name == 'gpt-4o-mini':
+        encoder = tiktoken.encoding_for_model('gpt-4o-mini')
+    elif model_name == 'gpt-3.5-turbo':
+        encoder = tiktoken.encoding_for_model('gpt-3.5-turbo')
+    else:
+        raise ValueError("Unsupported model: {}".format(model_name))
+
+    token_count = 0
+
+    for message in messages:
+        role_tokens = encoder.encode(message['role'])
+        content_tokens = encoder.encode(message['content'])
+        token_count += len(role_tokens) + len(content_tokens) + 4
+    return token_count
+
+
+def generate_logformat_regex(logformat: str) -> Tuple[List[str], str]:
+        """ 
+        Function to generate regular expression to split log messages
+        Args:
+            logformat: log format, a string
+        Returns:
+            headers: headers of log messages
+            regex: regular expression to split log messages
+        """
+        headers = []
+        splitters = re.split(r'(<[^<>]+>)', logformat)
+        regex = ''
+        for k in range(len(splitters)):
+            if k % 2 == 0:
+                splitter = re.sub(' +', '\\\s+', splitters[k])
+                regex += splitter
+            else:
+                header = splitters[k].strip('<').strip('>')
+                regex += '(?P<%s>.*?)' % header
+                headers.append(header)
+        pattern = '^' + regex + '$'
+        return headers, pattern
+
+
+def log_to_dataframe(log_file: str, regex: str, headers: List[str], size: int) -> List[str]:
+        """ 
+        Function to transform log file to contents
+        Args:
+            log_file: log file path
+            regex: regular expression to split log messages
+            headers: headers of log messages
+            size: number of log messages to read
+        Returns:
+            log_messages: list of log contents
+        """
+        log_contents = []
+        with open(log_file, 'r') as file:
+            for line in [next(file) for _ in range(size)]:
+                try:
+                    if not headers:  # If no headers are defined
+                        log_contents.append(line.strip())
+                        continue
+                    match = regex.search(line.strip())
+                    message = [match.group(header) for header in headers]
+                    log_contents.append(message[-1])
+                except Exception as e:
+                    pass
+        return log_contents
+
+
+def not_varibility(logs: List[str]) -> bool:
+    a_logs = [re.sub(r'\d+', '', log) for log in logs]
+    if len(set(a_logs)) == 1:
+        return True
+    return False
+
+def verify_template(template: str) -> bool:
+    template = template.replace("<*>", "")
+    template = template.replace(" ", "")
+    return any(char not in string.punctuation for char in template)
+
+if __name__ == "__main__":
+    import json
+    import csv
+
+    # LogBacther
+    with open('/root/LogBatcher/messages.json', 'r') as file:
+        messages_dict = json.load(file)
+    data = []
+    datasets = ['BGL', 'HDFS', 'OpenStack', 'OpenSSH', 'HPC', 'Zookeeper', 'Spark', 'Proxifier', 'HealthApp', 'Mac', 'Hadoop', 'Apache', 'Linux', 'Thunderbird']
+    all = 0
+    for dataset in datasets:
+        messages = messages_dict[dataset]
+        count = 0
+        for message in messages:
+            count += count_message_tokens(message)
+        print(f"{dataset}: [{count}, {len(messages)}] -> {count/len(messages).__round__(2)}")
+        data.append([dataset, count, len(messages), (count/len(messages)).__round__(2)])
+        all += count
+    print(f"all: {all}")
+    with open('/root/LogBatcher/output_lilac_0.csv', 'w', newline='') as csvfile:
+        writer = csv.writer(csvfile)
+        writer.writerow(["Dataset", "Value1", "Value2", "Value3"])  # 写入标题
+        for row in data:
+            writer.writerow([row[0], row[1], row[2], row[3]])  # 写入数据
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/vars.py b/src/detectmatelibrary/parsers/logbatcher/engine/vars.py
new file mode 100644
index 0000000..201b703
--- /dev/null
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/vars.py
@@ -0,0 +1,41 @@
+# MIT License
+#
+# Copyright (c) 2024 LogIntelligence
+#
+# Based on LogBatcher (https://github.com/LogIntelligence/LogBatcher)
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from __future__ import annotations
+
+import re
+from typing import List
+
+from .matching import extract_variables
+
+def vars_update(refer_log: str, template: str, candidates: List[str]) -> List[str]:
+    new_variables = extract_variables(refer_log, template)
+    extend_vars = []
+    if not new_variables:
+        return extend_vars
+    for var in new_variables:
+        var = re.sub(r'^\((.*)\)$|^\[(.*)\]$', r'\1\2', var)
+        if var not in candidates and not var.isdigit() and not var.isalpha() and len(var.split()) <= 3:
+            extend_vars.append(var)
+    return extend_vars
\ No newline at end of file
diff --git a/src/detectmatelibrary/parsers/logbatcher/parser.py b/src/detectmatelibrary/parsers/logbatcher/parser.py
new file mode 100644
index 0000000..3009696
--- /dev/null
+++ b/src/detectmatelibrary/parsers/logbatcher/parser.py
@@ -0,0 +1,65 @@
+from detectmatelibrary.common.parser import CoreParser, CoreParserConfig
+from detectmatelibrary.parsers.logbatcher.engine.parser import Parser as LLMParser
+from detectmatelibrary.parsers.logbatcher.engine.parsing_cache import ParsingCache
+from detectmatelibrary.parsers.logbatcher.engine.cluster import Cluster
+from detectmatelibrary.parsers.logbatcher.engine.matching import extract_variables
+from detectmatelibrary import schemas
+
+from typing import Any
+
+
+class LogBatcherParserConfig(CoreParserConfig):
+    """Configuration for LogBatcherParser."""
+    method_type: str = "logbatcher_parser"
+    model: str = "gpt-4o-mini"
+    api_key: str = ""
+    batch_size: int = 10
+
+
+class LogBatcherParser(CoreParser):
+    """LLM-based log parser wrapping LogBatcher, integrated as a CoreParser."""
+
+    def __init__(
+        self,
+        name: str = "LogBatcherParser",
+        config: LogBatcherParserConfig | dict[str, Any] = LogBatcherParserConfig(),
+    ) -> None:
+        if isinstance(config, dict):
+            config = LogBatcherParserConfig.from_dict(config, name)
+
+        super().__init__(name=name, config=config)
+
+        llm_config = {
+            "api_key_from_openai": config.api_key,
+            "api_key_from_together": "<Together_API_KEY>",
+        }
+        self._llm_parser = LLMParser(model=config.model, theme="default", config=llm_config)
+        self._cache = ParsingCache()
+        self._batch_size = config.batch_size
+
+    def parse(
+        self,
+        input_: schemas.LogSchema,
+        output_: schemas.ParserSchema,
+    ) -> None:
+        log_content = input_["log"]
+
+        template, event_id, _ = self._cache.match_event(log_content)
+
+        if template == "NoMatch":
+            cluster = Cluster()
+            cluster.append_log(log_content, 0)
+            cluster.batching(self._batch_size)
+
+            template, cluster, _ = self._llm_parser.get_responce(cluster, cache_base=self._cache)
+
+            if template not in self._cache.template_list:
+                event_id, _, _ = self._cache.add_templates(template, refer_log=log_content)
+            else:
+                event_id = self._cache.template_list.index(template)
+
+        variables = extract_variables(log_content, template) or ()
+
+        output_["template"] = template
+        output_["variables"].extend(list(variables))
+        output_["EventID"] = event_id
diff --git a/tests/test_parsers/test_logbatcher_parser.py b/tests/test_parsers/test_logbatcher_parser.py
new file mode 100644
index 0000000..24e09d5
--- /dev/null
+++ b/tests/test_parsers/test_logbatcher_parser.py
@@ -0,0 +1,82 @@
+"""Temporary tests for LogBatcherParser integration.
+
+These tests verify that LogBatcherParser correctly wraps LogBatcher into the
+CoreParser interface without requiring real API calls.
+"""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+import detectmatelibrary.schemas as schemas
+from detectmatelibrary.parsers.logbatcher import LogBatcherParser, LogBatcherParserConfig
+from detectmatelibrary.utils.aux import time_test_mode
+
+time_test_mode()
+
+LOG = "Connection from 192.168.1.1 port 22"
+# LLM response format: wrapped in backticks, with {{placeholder}} variables
+LLM_RESPONSE = "`Connection from {{ip}} port {{port}}`"
+EXPECTED_TEMPLATE = "Connection from <*> port <*>"
+
+
+def _make_parser():
+    """Create a LogBatcherParser with a mocked OpenAI client."""
+    with patch("detectmatelibrary.parsers.logbatcher.engine.parser.OpenAI"):
+        config = LogBatcherParserConfig(api_key="test-key")
+        parser = LogBatcherParser(name="TestLogBatcherParser", config=config)
+    # Replace the chat method so no real HTTP calls are made
+    parser._llm_parser.chat = MagicMock(return_value=LLM_RESPONSE)
+    return parser
+
+
+class TestLogBatcherParserInit:
+    def test_is_core_parser(self):
+        from detectmatelibrary.common.parser import CoreParser
+        with patch("detectmatelibrary.parsers.logbatcher.engine.parser.OpenAI"):
+            parser = LogBatcherParser(config=LogBatcherParserConfig(api_key="k"))
+        assert isinstance(parser, CoreParser)
+
+    def test_config_method_type(self):
+        config = LogBatcherParserConfig(api_key="k")
+        assert config.method_type == "logbatcher_parser"
+
+
+class TestLogBatcherParserParse:
+    def test_template_extracted(self):
+        parser = _make_parser()
+        log_schema = schemas.LogSchema({"logID": "1", "log": LOG})
+
+        result = parser.process(log_schema)
+
+        assert result["template"] == EXPECTED_TEMPLATE
+
+    def test_variables_extracted(self):
+        parser = _make_parser()
+        log_schema = schemas.LogSchema({"logID": "1", "log": LOG})
+
+        result = parser.process(log_schema)
+
+        assert "192.168.1.1" in result["variables"]
+        assert "22" in result["variables"]
+
+    def test_event_id_is_int(self):
+        parser = _make_parser()
+        log_schema = schemas.LogSchema({"logID": "1", "log": LOG})
+
+        result = parser.process(log_schema)
+
+        assert isinstance(result["EventID"], int)
+
+    def test_second_call_hits_cache(self):
+        """Second identical log must not trigger a new LLM call."""
+        parser = _make_parser()
+
+        log_schema1 = schemas.LogSchema({"logID": "1", "log": LOG})
+        parser.process(log_schema1)
+        llm_call_count = parser._llm_parser.chat.call_count
+
+        log_schema2 = schemas.LogSchema({"logID": "2", "log": LOG})
+        parser.process(log_schema2)
+
+        assert parser._llm_parser.chat.call_count == llm_call_count

From 51f464c318b77a9b5b7a5164b62f82a10dcca9ba Mon Sep 17 00:00:00 2001
From: viktorbeck98 <viktor.beck98@gmail.com>
Date: Sat, 7 Mar 2026 12:31:33 +0100
Subject: [PATCH 2/3] create logbatcher parser

---
 .../parsers/logbatcher/engine/additional_cluster.py         | 3 +--
 src/detectmatelibrary/parsers/logbatcher/engine/matching.py | 2 +-
 .../parsers/logbatcher/engine/parsing_cache.py              | 6 +++---
 src/detectmatelibrary/parsers/logbatcher/engine/util.py     | 2 +-
 4 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/additional_cluster.py b/src/detectmatelibrary/parsers/logbatcher/engine/additional_cluster.py
index 564e599..af6d61d 100644
--- a/src/detectmatelibrary/parsers/logbatcher/engine/additional_cluster.py
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/additional_cluster.py
@@ -29,7 +29,6 @@
 from collections import Counter
 from typing import Any, Dict, FrozenSet, List, Optional, Tuple
 
-from sklearn.feature_extraction._stop_words import ENGLISH_STOP_WORDS
 import time
 import calendar
 import random
@@ -92,7 +91,7 @@ def clean(s: str) -> Tuple[str, str]:
     log_format = re.sub(r'[0-9A-Za-z, ]+', '', s)
     unique_chars = list(set(log_format))
     sorted_string = ''.join(sorted(unique_chars))
-    s = re.sub(':|\(|\)|=|,|"|\{|\}|@|$|\[|\]|\||;|\.?!', ' ', s)
+    s = re.sub(r':|\(|\)|=|,|"|\{|\}|@|$|\[|\]|\||;|\.?!', ' ', s)
     s = " ".join([word for word in s.strip().split() if not bool(re.search(r'\d', word))])
     # trantab = str.maketrans(dict.fromkeys(list(string.punctuation)))
     return s, sorted_string
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/matching.py b/src/detectmatelibrary/parsers/logbatcher/engine/matching.py
index 1d2a0d8..1fd2a05 100644
--- a/src/detectmatelibrary/parsers/logbatcher/engine/matching.py
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/matching.py
@@ -35,7 +35,7 @@
 class TimeoutException(Exception):
     pass
 
-def timeout_handler(signum: int, frame: Optional[FrameType]) -> None:
+def timeout_handler(_signum: int, _frame: Optional[FrameType]) -> None:
     raise TimeoutException()
 
 def safe_search(pattern: str, string: str, timeout: float = 0.5) -> Optional[re.Match[str]]:
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/parsing_cache.py b/src/detectmatelibrary/parsers/logbatcher/engine/parsing_cache.py
index d642c22..6cd7a22 100644
--- a/src/detectmatelibrary/parsers/logbatcher/engine/parsing_cache.py
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/parsing_cache.py
@@ -38,7 +38,7 @@
 class TimeoutException(Exception):
     pass
 
-def timeout_handler(signum: int, frame: Optional[FrameType]) -> None:
+def timeout_handler(_signum: int, _frame: Optional[FrameType]) -> None:
     raise TimeoutException()
 
 def safe_search(pattern: str, string: str, timeout: int = 1) -> Optional[Match[str]]:
@@ -248,8 +248,8 @@ def post_process_tokens(tokens: List[str], punc: str) -> List[str]:
 
 
 def message_split(message: str) -> List[str]:
-    punc = "!\"#$%&'()+,-/;:=?@.[\]^_`{|}~"
-    splitters = "\s\\" + "\\".join(punc)
+    punc = "!\"#$%&'()+,-/;:=?@.[\\]^_`{|}~"
+    splitters = "\\s\\" + "\\".join(punc)
     splitter_regex = re.compile("([{}])".format(splitters))
     tokens = re.split(splitter_regex, message)
 
diff --git a/src/detectmatelibrary/parsers/logbatcher/engine/util.py b/src/detectmatelibrary/parsers/logbatcher/engine/util.py
index 8feb8a5..37bd6e8 100644
--- a/src/detectmatelibrary/parsers/logbatcher/engine/util.py
+++ b/src/detectmatelibrary/parsers/logbatcher/engine/util.py
@@ -96,7 +96,7 @@ def generate_logformat_regex(logformat: str) -> Tuple[List[str], str]:
         regex = ''
         for k in range(len(splitters)):
             if k % 2 == 0:
-                splitter = re.sub(' +', '\\\s+', splitters[k])
+                splitter = re.sub(' +', r'\\s+', splitters[k])
                 regex += splitter
             else:
                 header = splitters[k].strip('<').strip('>')

From e5c00f37c2b77819c35eb69d14922d6b03be611a Mon Sep 17 00:00:00 2001
From: viktorbeck98 <viktor.beck98@gmail.com>
Date: Sat, 7 Mar 2026 12:34:26 +0100
Subject: [PATCH 3/3] update dependencies

---
 pyproject.toml |   1 +
 uv.lock        | 200 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 196 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 286ccda..7d9d1bd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -12,6 +12,7 @@ dependencies = [
     "pyyaml>=6.0.3",
     "regex>=2025.11.3",
     "kafka-python>=2.3.0",
+    "openai>=2.26.0",
 ]
 
 [dependency-groups]
diff --git a/uv.lock b/uv.lock
index caf0ecc..ff963f5 100644
--- a/uv.lock
+++ b/uv.lock
@@ -11,6 +11,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
 ]
 
+[[package]]
+name = "anyio"
+version = "4.12.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "idna" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
+]
+
 [[package]]
 name = "cachetools"
 version = "4.2.1"
@@ -20,6 +33,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/bb/72/8df2e0dc991f1a1d2c6869404e7622e8ee50d80bff357dbb57c3df70305b/cachetools-4.2.1-py3-none-any.whl", hash = "sha256:1d9d5f567be80f7c07d765e21b814326d78c61eb0c3a637dffc0e5d1796cb2e2", size = 12003, upload-time = "2021-01-24T22:40:11.795Z" },
 ]
 
+[[package]]
+name = "certifi"
+version = "2026.2.25"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" },
+]
+
 [[package]]
 name = "colorama"
 version = "0.4.6"
@@ -110,6 +132,7 @@ source = { editable = "." }
 dependencies = [
     { name = "drain3" },
     { name = "kafka-python" },
+    { name = "openai" },
     { name = "pandas" },
     { name = "protobuf" },
     { name = "pydantic" },
@@ -117,7 +140,7 @@ dependencies = [
     { name = "regex" },
 ]
 
-[package.optional-dependencies]
+[package.dev-dependencies]
 dev = [
     { name = "prek" },
     { name = "pytest" },
@@ -128,16 +151,29 @@ dev = [
 requires-dist = [
     { name = "drain3", specifier = ">=0.9.11" },
     { name = "kafka-python", specifier = ">=2.3.0" },
+    { name = "openai", specifier = ">=2.26.0" },
     { name = "pandas", specifier = ">=2.3.2" },
-    { name = "prek", marker = "extra == 'dev'", specifier = ">=0.2.8" },
     { name = "protobuf", specifier = ">=6.32.1" },
     { name = "pydantic", specifier = ">=2.11.7" },
-    { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.4.2" },
-    { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=6.2.1" },
     { name = "pyyaml", specifier = ">=6.0.3" },
     { name = "regex", specifier = ">=2025.11.3" },
 ]
-provides-extras = ["dev"]
+
+[package.metadata.requires-dev]
+dev = [
+    { name = "prek", specifier = ">=0.2.8" },
+    { name = "pytest", specifier = ">=8.4.2" },
+    { name = "pytest-cov", specifier = ">=6.2.1" },
+]
+
+[[package]]
+name = "distro"
+version = "1.9.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722, upload-time = "2023-12-24T09:54:32.31Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
+]
 
 [[package]]
 name = "drain3"
@@ -149,6 +185,52 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/dc/83/4da2d3a11b5e0edf1a4f4c0c2dd42126d2eb1f31c733967edd3dfac1af94/drain3-0.9.11.tar.gz", hash = "sha256:9ab4b1407fad74f56554ae371ef019c3c7985861631f4bab46a0e92585125f75", size = 27960, upload-time = "2022-07-17T06:40:11.433Z" }
 
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
+]
+
+[[package]]
+name = "httpcore"
+version = "1.0.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
+]
+
+[[package]]
+name = "httpx"
+version = "0.28.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "certifi" },
+    { name = "httpcore" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
+]
+
+[[package]]
+name = "idna"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+]
+
 [[package]]
 name = "iniconfig"
 version = "2.1.0"
@@ -158,6 +240,74 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
 ]
 
+[[package]]
+name = "jiter"
+version = "0.13.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0d/5e/4ec91646aee381d01cdb9974e30882c9cd3b8c5d1079d6b5ff4af522439a/jiter-0.13.0.tar.gz", hash = "sha256:f2839f9c2c7e2dffc1bc5929a510e14ce0a946be9365fd1219e7ef342dae14f4", size = 164847, upload-time = "2026-02-02T12:37:56.441Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2e/30/7687e4f87086829955013ca12a9233523349767f69653ebc27036313def9/jiter-0.13.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:0a2bd69fc1d902e89925fc34d1da51b2128019423d7b339a45d9e99c894e0663", size = 307958, upload-time = "2026-02-02T12:35:57.165Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/27/e57f9a783246ed95481e6749cc5002a8a767a73177a83c63ea71f0528b90/jiter-0.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f917a04240ef31898182f76a332f508f2cc4b57d2b4d7ad2dbfebbfe167eb505", size = 318597, upload-time = "2026-02-02T12:35:58.591Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/52/e5719a60ac5d4d7c5995461a94ad5ef962a37c8bf5b088390e6fad59b2ff/jiter-0.13.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c1e2b199f446d3e82246b4fd9236d7cb502dc2222b18698ba0d986d2fecc6152", size = 348821, upload-time = "2026-02-02T12:36:00.093Z" },
+    { url = "https://files.pythonhosted.org/packages/61/db/c1efc32b8ba4c740ab3fc2d037d8753f67685f475e26b9d6536a4322bcdd/jiter-0.13.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:04670992b576fa65bd056dbac0c39fe8bd67681c380cb2b48efa885711d9d726", size = 364163, upload-time = "2026-02-02T12:36:01.937Z" },
+    { url = "https://files.pythonhosted.org/packages/55/8a/fb75556236047c8806995671a18e4a0ad646ed255276f51a20f32dceaeec/jiter-0.13.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a1aff1fbdb803a376d4d22a8f63f8e7ccbce0b4890c26cc7af9e501ab339ef0", size = 483709, upload-time = "2026-02-02T12:36:03.41Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/16/43512e6ee863875693a8e6f6d532e19d650779d6ba9a81593ae40a9088ff/jiter-0.13.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b3fb8c2053acaef8580809ac1d1f7481a0a0bdc012fd7f5d8b18fb696a5a089", size = 370480, upload-time = "2026-02-02T12:36:04.791Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/4c/09b93e30e984a187bc8aaa3510e1ec8dcbdcd71ca05d2f56aac0492453aa/jiter-0.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bdaba7d87e66f26a2c45d8cbadcbfc4bf7884182317907baf39cfe9775bb4d93", size = 360735, upload-time = "2026-02-02T12:36:06.994Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/1b/46c5e349019874ec5dfa508c14c37e29864ea108d376ae26d90bee238cd7/jiter-0.13.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7b88d649135aca526da172e48083da915ec086b54e8e73a425ba50999468cc08", size = 391814, upload-time = "2026-02-02T12:36:08.368Z" },
+    { url = "https://files.pythonhosted.org/packages/15/9e/26184760e85baee7162ad37b7912797d2077718476bf91517641c92b3639/jiter-0.13.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:e404ea551d35438013c64b4f357b0474c7abf9f781c06d44fcaf7a14c69ff9e2", size = 513990, upload-time = "2026-02-02T12:36:09.993Z" },
+    { url = "https://files.pythonhosted.org/packages/e9/34/2c9355247d6debad57a0a15e76ab1566ab799388042743656e566b3b7de1/jiter-0.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1f4748aad1b4a93c8bdd70f604d0f748cdc0e8744c5547798acfa52f10e79228", size = 548021, upload-time = "2026-02-02T12:36:11.376Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/4a/9f2c23255d04a834398b9c2e0e665382116911dc4d06b795710503cdad25/jiter-0.13.0-cp312-cp312-win32.whl", hash = "sha256:0bf670e3b1445fc4d31612199f1744f67f889ee1bbae703c4b54dc097e5dd394", size = 203024, upload-time = "2026-02-02T12:36:12.682Z" },
+    { url = "https://files.pythonhosted.org/packages/09/ee/f0ae675a957ae5a8f160be3e87acea6b11dc7b89f6b7ab057e77b2d2b13a/jiter-0.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:15db60e121e11fe186c0b15236bd5d18381b9ddacdcf4e659feb96fc6c969c92", size = 205424, upload-time = "2026-02-02T12:36:13.93Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/02/ae611edf913d3cbf02c97cdb90374af2082c48d7190d74c1111dde08bcdd/jiter-0.13.0-cp312-cp312-win_arm64.whl", hash = "sha256:41f92313d17989102f3cb5dd533a02787cdb99454d494344b0361355da52fcb9", size = 186818, upload-time = "2026-02-02T12:36:15.308Z" },
+    { url = "https://files.pythonhosted.org/packages/91/9c/7ee5a6ff4b9991e1a45263bfc46731634c4a2bde27dfda6c8251df2d958c/jiter-0.13.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1f8a55b848cbabf97d861495cd65f1e5c590246fabca8b48e1747c4dfc8f85bf", size = 306897, upload-time = "2026-02-02T12:36:16.748Z" },
+    { url = "https://files.pythonhosted.org/packages/7c/02/be5b870d1d2be5dd6a91bdfb90f248fbb7dcbd21338f092c6b89817c3dbf/jiter-0.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f556aa591c00f2c45eb1b89f68f52441a016034d18b65da60e2d2875bbbf344a", size = 317507, upload-time = "2026-02-02T12:36:18.351Z" },
+    { url = "https://files.pythonhosted.org/packages/da/92/b25d2ec333615f5f284f3a4024f7ce68cfa0604c322c6808b2344c7f5d2b/jiter-0.13.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7e1d61da332ec412350463891923f960c3073cf1aae93b538f0bb4c8cd46efb", size = 350560, upload-time = "2026-02-02T12:36:19.746Z" },
+    { url = "https://files.pythonhosted.org/packages/be/ec/74dcb99fef0aca9fbe56b303bf79f6bd839010cb18ad41000bf6cc71eec0/jiter-0.13.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3097d665a27bc96fd9bbf7f86178037db139f319f785e4757ce7ccbf390db6c2", size = 363232, upload-time = "2026-02-02T12:36:21.243Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/37/f17375e0bb2f6a812d4dd92d7616e41917f740f3e71343627da9db2824ce/jiter-0.13.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d01ecc3a8cbdb6f25a37bd500510550b64ddf9f7d64a107d92f3ccb25035d0f", size = 483727, upload-time = "2026-02-02T12:36:22.688Z" },
+    { url = "https://files.pythonhosted.org/packages/77/d2/a71160a5ae1a1e66c1395b37ef77da67513b0adba73b993a27fbe47eb048/jiter-0.13.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ed9bbc30f5d60a3bdf63ae76beb3f9db280d7f195dfcfa61af792d6ce912d159", size = 370799, upload-time = "2026-02-02T12:36:24.106Z" },
+    { url = "https://files.pythonhosted.org/packages/01/99/ed5e478ff0eb4e8aa5fd998f9d69603c9fd3f32de3bd16c2b1194f68361c/jiter-0.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:98fbafb6e88256f4454de33c1f40203d09fc33ed19162a68b3b257b29ca7f663", size = 359120, upload-time = "2026-02-02T12:36:25.519Z" },
+    { url = "https://files.pythonhosted.org/packages/16/be/7ffd08203277a813f732ba897352797fa9493faf8dc7995b31f3d9cb9488/jiter-0.13.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:5467696f6b827f1116556cb0db620440380434591e93ecee7fd14d1a491b6daa", size = 390664, upload-time = "2026-02-02T12:36:26.866Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/84/e0787856196d6d346264d6dcccb01f741e5f0bd014c1d9a2ebe149caf4f3/jiter-0.13.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:2d08c9475d48b92892583df9da592a0e2ac49bcd41fae1fec4f39ba6cf107820", size = 513543, upload-time = "2026-02-02T12:36:28.217Z" },
+    { url = "https://files.pythonhosted.org/packages/65/50/ecbd258181c4313cf79bca6c88fb63207d04d5bf5e4f65174114d072aa55/jiter-0.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:aed40e099404721d7fcaf5b89bd3b4568a4666358bcac7b6b15c09fb6252ab68", size = 547262, upload-time = "2026-02-02T12:36:29.678Z" },
+    { url = "https://files.pythonhosted.org/packages/27/da/68f38d12e7111d2016cd198161b36e1f042bd115c169255bcb7ec823a3bf/jiter-0.13.0-cp313-cp313-win32.whl", hash = "sha256:36ebfbcffafb146d0e6ffb3e74d51e03d9c35ce7c625c8066cdbfc7b953bdc72", size = 200630, upload-time = "2026-02-02T12:36:31.808Z" },
+    { url = "https://files.pythonhosted.org/packages/25/65/3bd1a972c9a08ecd22eb3b08a95d1941ebe6938aea620c246cf426ae09c2/jiter-0.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:8d76029f077379374cf0dbc78dbe45b38dec4a2eb78b08b5194ce836b2517afc", size = 202602, upload-time = "2026-02-02T12:36:33.679Z" },
+    { url = "https://files.pythonhosted.org/packages/15/fe/13bd3678a311aa67686bb303654792c48206a112068f8b0b21426eb6851e/jiter-0.13.0-cp313-cp313-win_arm64.whl", hash = "sha256:bb7613e1a427cfcb6ea4544f9ac566b93d5bf67e0d48c787eca673ff9c9dff2b", size = 185939, upload-time = "2026-02-02T12:36:35.065Z" },
+    { url = "https://files.pythonhosted.org/packages/49/19/a929ec002ad3228bc97ca01dbb14f7632fffdc84a95ec92ceaf4145688ae/jiter-0.13.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fa476ab5dd49f3bf3a168e05f89358c75a17608dbabb080ef65f96b27c19ab10", size = 316616, upload-time = "2026-02-02T12:36:36.579Z" },
+    { url = "https://files.pythonhosted.org/packages/52/56/d19a9a194afa37c1728831e5fb81b7722c3de18a3109e8f282bfc23e587a/jiter-0.13.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ade8cb6ff5632a62b7dbd4757d8c5573f7a2e9ae285d6b5b841707d8363205ef", size = 346850, upload-time = "2026-02-02T12:36:38.058Z" },
+    { url = "https://files.pythonhosted.org/packages/36/4a/94e831c6bf287754a8a019cb966ed39ff8be6ab78cadecf08df3bb02d505/jiter-0.13.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9950290340acc1adaded363edd94baebcee7dabdfa8bee4790794cd5cfad2af6", size = 358551, upload-time = "2026-02-02T12:36:39.417Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/ec/a4c72c822695fa80e55d2b4142b73f0012035d9fcf90eccc56bc060db37c/jiter-0.13.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2b4972c6df33731aac0742b64fd0d18e0a69bc7d6e03108ce7d40c85fd9e3e6d", size = 201950, upload-time = "2026-02-02T12:36:40.791Z" },
+    { url = "https://files.pythonhosted.org/packages/b6/00/393553ec27b824fbc29047e9c7cd4a3951d7fbe4a76743f17e44034fa4e4/jiter-0.13.0-cp313-cp313t-win_arm64.whl", hash = "sha256:701a1e77d1e593c1b435315ff625fd071f0998c5f02792038a5ca98899261b7d", size = 185852, upload-time = "2026-02-02T12:36:42.077Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/f5/f1997e987211f6f9bd71b8083047b316208b4aca0b529bb5f8c96c89ef3e/jiter-0.13.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:cc5223ab19fe25e2f0bf2643204ad7318896fe3729bf12fde41b77bfc4fafff0", size = 308804, upload-time = "2026-02-02T12:36:43.496Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/8f/5482a7677731fd44881f0204981ce2d7175db271f82cba2085dd2212e095/jiter-0.13.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:9776ebe51713acf438fd9b4405fcd86893ae5d03487546dae7f34993217f8a91", size = 318787, upload-time = "2026-02-02T12:36:45.071Z" },
+    { url = "https://files.pythonhosted.org/packages/f3/b9/7257ac59778f1cd025b26a23c5520a36a424f7f1b068f2442a5b499b7464/jiter-0.13.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:879e768938e7b49b5e90b7e3fecc0dbec01b8cb89595861fb39a8967c5220d09", size = 353880, upload-time = "2026-02-02T12:36:47.365Z" },
+    { url = "https://files.pythonhosted.org/packages/c3/87/719eec4a3f0841dad99e3d3604ee4cba36af4419a76f3cb0b8e2e691ad67/jiter-0.13.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:682161a67adea11e3aae9038c06c8b4a9a71023228767477d683f69903ebc607", size = 366702, upload-time = "2026-02-02T12:36:48.871Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/65/415f0a75cf6921e43365a1bc227c565cb949caca8b7532776e430cbaa530/jiter-0.13.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a13b68cd1cd8cc9de8f244ebae18ccb3e4067ad205220ef324c39181e23bbf66", size = 486319, upload-time = "2026-02-02T12:36:53.006Z" },
+    { url = "https://files.pythonhosted.org/packages/54/a2/9e12b48e82c6bbc6081fd81abf915e1443add1b13d8fc586e1d90bb02bb8/jiter-0.13.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:87ce0f14c6c08892b610686ae8be350bf368467b6acd5085a5b65441e2bf36d2", size = 372289, upload-time = "2026-02-02T12:36:54.593Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/c1/e4693f107a1789a239c759a432e9afc592366f04e901470c2af89cfd28e1/jiter-0.13.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0c365005b05505a90d1c47856420980d0237adf82f70c4aff7aebd3c1cc143ad", size = 360165, upload-time = "2026-02-02T12:36:56.112Z" },
+    { url = "https://files.pythonhosted.org/packages/17/08/91b9ea976c1c758240614bd88442681a87672eebc3d9a6dde476874e706b/jiter-0.13.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:1317fdffd16f5873e46ce27d0e0f7f4f90f0cdf1d86bf6abeaea9f63ca2c401d", size = 389634, upload-time = "2026-02-02T12:36:57.495Z" },
+    { url = "https://files.pythonhosted.org/packages/18/23/58325ef99390d6d40427ed6005bf1ad54f2577866594bcf13ce55675f87d/jiter-0.13.0-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:c05b450d37ba0c9e21c77fef1f205f56bcee2330bddca68d344baebfc55ae0df", size = 514933, upload-time = "2026-02-02T12:36:58.909Z" },
+    { url = "https://files.pythonhosted.org/packages/5b/25/69f1120c7c395fd276c3996bb8adefa9c6b84c12bb7111e5c6ccdcd8526d/jiter-0.13.0-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:775e10de3849d0631a97c603f996f518159272db00fdda0a780f81752255ee9d", size = 548842, upload-time = "2026-02-02T12:37:00.433Z" },
+    { url = "https://files.pythonhosted.org/packages/18/05/981c9669d86850c5fbb0d9e62bba144787f9fba84546ba43d624ee27ef29/jiter-0.13.0-cp314-cp314-win32.whl", hash = "sha256:632bf7c1d28421c00dd8bbb8a3bac5663e1f57d5cd5ed962bce3c73bf62608e6", size = 202108, upload-time = "2026-02-02T12:37:01.718Z" },
+    { url = "https://files.pythonhosted.org/packages/8d/96/cdcf54dd0b0341db7d25413229888a346c7130bd20820530905fdb65727b/jiter-0.13.0-cp314-cp314-win_amd64.whl", hash = "sha256:f22ef501c3f87ede88f23f9b11e608581c14f04db59b6a801f354397ae13739f", size = 204027, upload-time = "2026-02-02T12:37:03.075Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/f9/724bcaaab7a3cd727031fe4f6995cb86c4bd344909177c186699c8dec51a/jiter-0.13.0-cp314-cp314-win_arm64.whl", hash = "sha256:07b75fe09a4ee8e0c606200622e571e44943f47254f95e2436c8bdcaceb36d7d", size = 187199, upload-time = "2026-02-02T12:37:04.414Z" },
+    { url = "https://files.pythonhosted.org/packages/62/92/1661d8b9fd6a3d7a2d89831db26fe3c1509a287d83ad7838831c7b7a5c7e/jiter-0.13.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:964538479359059a35fb400e769295d4b315ae61e4105396d355a12f7fef09f0", size = 318423, upload-time = "2026-02-02T12:37:05.806Z" },
+    { url = "https://files.pythonhosted.org/packages/4f/3b/f77d342a54d4ebcd128e520fc58ec2f5b30a423b0fd26acdfc0c6fef8e26/jiter-0.13.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e104da1db1c0991b3eaed391ccd650ae8d947eab1480c733e5a3fb28d4313e40", size = 351438, upload-time = "2026-02-02T12:37:07.189Z" },
+    { url = "https://files.pythonhosted.org/packages/76/b3/ba9a69f0e4209bd3331470c723c2f5509e6f0482e416b612431a5061ed71/jiter-0.13.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0e3a5f0cde8ff433b8e88e41aa40131455420fb3649a3c7abdda6145f8cb7202", size = 364774, upload-time = "2026-02-02T12:37:08.579Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/16/6cdb31fa342932602458dbb631bfbd47f601e03d2e4950740e0b2100b570/jiter-0.13.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:57aab48f40be1db920a582b30b116fe2435d184f77f0e4226f546794cedd9cf0", size = 487238, upload-time = "2026-02-02T12:37:10.066Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/b1/956cc7abaca8d95c13aa8d6c9b3f3797241c246cd6e792934cc4c8b250d2/jiter-0.13.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7772115877c53f62beeb8fd853cab692dbc04374ef623b30f997959a4c0e7e95", size = 372892, upload-time = "2026-02-02T12:37:11.656Z" },
+    { url = "https://files.pythonhosted.org/packages/26/c4/97ecde8b1e74f67b8598c57c6fccf6df86ea7861ed29da84629cdbba76c4/jiter-0.13.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1211427574b17b633cfceba5040de8081e5abf114f7a7602f73d2e16f9fdaa59", size = 360309, upload-time = "2026-02-02T12:37:13.244Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/d7/eabe3cf46715854ccc80be2cd78dd4c36aedeb30751dbf85a1d08c14373c/jiter-0.13.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7beae3a3d3b5212d3a55d2961db3c292e02e302feb43fce6a3f7a31b90ea6dfe", size = 389607, upload-time = "2026-02-02T12:37:14.881Z" },
+    { url = "https://files.pythonhosted.org/packages/df/2d/03963fc0804e6109b82decfb9974eb92df3797fe7222428cae12f8ccaa0c/jiter-0.13.0-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:e5562a0f0e90a6223b704163ea28e831bd3a9faa3512a711f031611e6b06c939", size = 514986, upload-time = "2026-02-02T12:37:16.326Z" },
+    { url = "https://files.pythonhosted.org/packages/f6/6c/8c83b45eb3eb1c1e18d841fe30b4b5bc5619d781267ca9bc03e005d8fd0a/jiter-0.13.0-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:6c26a424569a59140fb51160a56df13f438a2b0967365e987889186d5fc2f6f9", size = 548756, upload-time = "2026-02-02T12:37:17.736Z" },
+    { url = "https://files.pythonhosted.org/packages/47/66/eea81dfff765ed66c68fd2ed8c96245109e13c896c2a5015c7839c92367e/jiter-0.13.0-cp314-cp314t-win32.whl", hash = "sha256:24dc96eca9f84da4131cdf87a95e6ce36765c3b156fc9ae33280873b1c32d5f6", size = 201196, upload-time = "2026-02-02T12:37:19.101Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/32/4ac9c7a76402f8f00d00842a7f6b83b284d0cf7c1e9d4227bc95aa6d17fa/jiter-0.13.0-cp314-cp314t-win_amd64.whl", hash = "sha256:0a8d76c7524087272c8ae913f5d9d608bd839154b62c4322ef65723d2e5bb0b8", size = 204215, upload-time = "2026-02-02T12:37:20.495Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/8e/7def204fea9f9be8b3c21a6f2dd6c020cf56c7d5ff753e0e23ed7f9ea57e/jiter-0.13.0-cp314-cp314t-win_arm64.whl", hash = "sha256:2c26cf47e2cad140fa23b6d58d435a7c0161f5c514284802f25e87fddfe11024", size = 187152, upload-time = "2026-02-02T12:37:22.124Z" },
+    { url = "https://files.pythonhosted.org/packages/80/60/e50fa45dd7e2eae049f0ce964663849e897300433921198aef94b6ffa23a/jiter-0.13.0-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:3d744a6061afba08dd7ae375dcde870cffb14429b7477e10f67e9e6d68772a0a", size = 305169, upload-time = "2026-02-02T12:37:50.376Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/73/a009f41c5eed71c49bec53036c4b33555afcdee70682a18c6f66e396c039/jiter-0.13.0-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:ff732bd0a0e778f43d5009840f20b935e79087b4dc65bd36f1cd0f9b04b8ff7f", size = 303808, upload-time = "2026-02-02T12:37:52.092Z" },
+    { url = "https://files.pythonhosted.org/packages/c4/10/528b439290763bff3d939268085d03382471b442f212dca4ff5f12802d43/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ab44b178f7981fcaea7e0a5df20e773c663d06ffda0198f1a524e91b2fde7e59", size = 337384, upload-time = "2026-02-02T12:37:53.582Z" },
+    { url = "https://files.pythonhosted.org/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19", size = 343768, upload-time = "2026-02-02T12:37:55.055Z" },
+]
+
 [[package]]
 name = "jsonpickle"
 version = "1.5.1"
@@ -239,6 +389,25 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c1/9e/1652778bce745a67b5fe05adde60ed362d38eb17d919a540e813d30f6874/numpy-2.3.2-cp314-cp314t-win_arm64.whl", hash = "sha256:092aeb3449833ea9c0bf0089d70c29ae480685dd2377ec9cdbbb620257f84631", size = 10544226, upload-time = "2025-07-24T20:56:34.509Z" },
 ]
 
+[[package]]
+name = "openai"
+version = "2.26.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "distro" },
+    { name = "httpx" },
+    { name = "jiter" },
+    { name = "pydantic" },
+    { name = "sniffio" },
+    { name = "tqdm" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d7/91/2a06c4e9597c338cac1e5e5a8dd6f29e1836fc229c4c523529dca387fda8/openai-2.26.0.tar.gz", hash = "sha256:b41f37c140ae0034a6e92b0c509376d907f3a66109935fba2c1b471a7c05a8fb", size = 666702, upload-time = "2026-03-05T23:17:35.874Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c6/2e/3f73e8ca53718952222cacd0cf7eecc9db439d020f0c1fe7ae717e4e199a/openai-2.26.0-py3-none-any.whl", hash = "sha256:6151bf8f83802f036117f06cc8a57b3a4da60da9926826cc96747888b57f394f", size = 1136409, upload-time = "2026-03-05T23:17:34.072Z" },
+]
+
 [[package]]
 name = "packaging"
 version = "25.0"
@@ -581,6 +750,27 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050, upload-time = "2024-12-04T17:35:26.475Z" },
 ]
 
+[[package]]
+name = "sniffio"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372, upload-time = "2024-02-25T23:20:04.057Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235, upload-time = "2024-02-25T23:20:01.196Z" },
+]
+
+[[package]]
+name = "tqdm"
+version = "4.67.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/09/a9/6ba95a270c6f1fbcd8dac228323f2777d886cb206987444e4bce66338dd4/tqdm-4.67.3.tar.gz", hash = "sha256:7d825f03f89244ef73f1d4ce193cb1774a8179fd96f31d7e1dcde62092b960bb", size = 169598, upload-time = "2026-02-03T17:35:53.048Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/16/e1/3079a9ff9b8e11b846c6ac5c8b5bfb7ff225eee721825310c91b3b50304f/tqdm-4.67.3-py3-none-any.whl", hash = "sha256:ee1e4c0e59148062281c49d80b25b67771a127c85fc9676d3be5f243206826bf", size = 78374, upload-time = "2026-02-03T17:35:50.982Z" },
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.15.0"