gatewayd-io · mostafa · Feb 21, 2026 · Feb 21, 2026 · Feb 21, 2026 · Feb 21, 2026
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -12,26 +12,22 @@ jobs:
   test:
     runs-on: ubuntu-22.04
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Set up Python
-        uses: actions/setup-python@v2
+        uses: actions/setup-python@v5
         with:
-          python-version: 3.12
+          python-version: "3.12"
       - name: Install dependencies
         run: |
           pip install poetry
           cd training && poetry install --with dev --no-root
-      - name: Run formatter, linter and type checker
-        run: |
-          cd training && poetry run ruff check .
-        #   mypy --explicit-package-bases .
-        #   flake8 .
-        #   interrogate -vv --ignore-init-module --exclude sigma_api .
-      - name: Run tests
+      - name: Lint training code
+        run: cd training && poetry run ruff check .
+      - name: Lint API code
+        run: cd training && poetry run ruff check ../api/
+      - name: Run training tests
         run: cd training && poetry run pytest --cov=training --cov-report term --cov-report lcov:coverage.lcov -vv
-    #   - name: Submit coverage report to Coveralls
-    #     if: ${{ success() }}
-    #     uses: coverallsapp/github-action@1.1.3
-    #     with:
-    #       github-token: ${{ secrets.GITHUB_TOKEN }}
-    #       path-to-lcov: ./coverage.lcov
+      - name: Install API dependencies
+        run: cd training && poetry run pip install flask gunicorn
+      - name: Run API tests
+        run: cd training && poetry run pytest ../api/test_api.py -vv
diff --git a/Dockerfile b/Dockerfile
@@ -1,21 +1,20 @@
-FROM tensorflow/tensorflow:latest
+FROM tensorflow/tensorflow:2.16.1
 
-ENV dataset=sqli_dataset2.csv
 ENV KMP_AFFINITY=noverbose
 ENV TF_CPP_MIN_LOG_LEVEL=3
-ENV DATASET_PATH=/app/${dataset}
+ENV VOCAB_PATH=/app/sql_tokenizer_vocab.json
+ENV MODEL_PATH=/app/sqli_model/3/
 ENV WORKERS=4
 ENV HOST=0.0.0.0
 ENV PORT=8000
 
 WORKDIR /app
-COPY api/api.py /app
-COPY api/pyproject.toml /app
-COPY api/poetry.lock /app
-COPY dataset/${dataset} /app
+COPY api/api.py /app/
+COPY api/pyproject.toml /app/
+COPY api/poetry.lock /app/
 COPY training/sql_tokenizer.py /app/
 COPY training/sql_tokenizer_vocab.json /app/
-COPY sqli_model/ /app/sqli_model/
+COPY sqli_model/3/ /app/sqli_model/3/
 RUN pip install --disable-pip-version-check poetry
 RUN poetry install --no-root
 

diff --git a/api/api.py b/api/api.py
@@ -1,34 +1,45 @@
-from flask import Flask, jsonify, request
-import tensorflow as tf
-import pandas as pd
+import logging
 import os
-from sql_tokenizer import SQLTokenizer  # Import SQLTokenizer
+
+import tensorflow as tf
+from flask import Flask, jsonify, request
+
+from sql_tokenizer import SQLTokenizer
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+)
+logger = logging.getLogger(__name__)
 
 app = Flask(__name__)
 
-# Constants and configurations
 MAX_WORDS = 10000
 MAX_LEN = 100
-DATASET_PATH = os.getenv("DATASET_PATH", "dataset/sqli_dataset1.csv")
+VOCAB_PATH = os.getenv("VOCAB_PATH", "sql_tokenizer_vocab.json")
 MODEL_PATH = os.getenv("MODEL_PATH", "/app/sqli_model/3/")
 
-# Load dataset and initialize SQLTokenizer
-DATASET = pd.read_csv(DATASET_PATH)
 sql_tokenizer = SQLTokenizer(max_words=MAX_WORDS, max_len=MAX_LEN)
-sql_tokenizer.fit_on_texts(DATASET["Query"])  # Fit tokenizer on dataset
+sql_tokenizer.load_token_index(VOCAB_PATH)
+logger.info("Loaded tokenizer vocabulary from %s (%d tokens)", VOCAB_PATH, len(sql_tokenizer.token_index))
 
-# Load the model using tf.saved_model.load and get the serving signature
 loaded_model = tf.saved_model.load(MODEL_PATH)
 model_predict = loaded_model.signatures["serving_default"]
+logger.info("Loaded model from %s", MODEL_PATH)
 
 
 def warm_up_model():
-    """Sends a dummy request to the model to 'warm it up'."""
+    """Sends a dummy request to the model to initialize it."""
     dummy_query = "SELECT * FROM users WHERE id = 1"
     query_seq = sql_tokenizer.texts_to_sequences([dummy_query])
     input_tensor = tf.convert_to_tensor(query_seq, dtype=tf.float32)
-    _ = model_predict(input_tensor)  # Make a dummy prediction to initialize the model
-    print("Model warmed up and ready to serve requests.")
+    _ = model_predict(input_tensor)
+    logger.info("Model warmed up and ready to serve requests.")
+
+
+@app.route("/health", methods=["GET"])
+def health():
+    return jsonify({"status": "ok"})
 
 
 @app.route("/predict", methods=["POST"])
@@ -37,27 +48,20 @@ def predict():
         return jsonify({"error": "No query provided"}), 400
 
     try:
-        # Tokenize and pad the input query using SQLTokenizer
         query = request.json["query"]
         query_seq = sql_tokenizer.texts_to_sequences([query])
         input_tensor = tf.convert_to_tensor(query_seq, dtype=tf.float32)
 
-        # Use the loaded model's serving signature to make the prediction
         prediction = model_predict(input_tensor)
 
-        # Check for valid output and extract the result
         if "output_0" not in prediction or prediction["output_0"].get_shape() != [1, 1]:
             return jsonify({"error": "Invalid model output"}), 500
 
-        # Extract confidence and return the response
-        return jsonify(
-            {
-                "confidence": float("%.4f" % prediction["output_0"].numpy()[0][0]),
-            }
-        )
-    except Exception as e:
-        # Log the error and return a proper error message
-        return jsonify({"error": str(e)}), 500
+        confidence = float("%.4f" % prediction["output_0"].numpy()[0][0])
+        return jsonify({"confidence": confidence})
+    except Exception:
+        logger.exception("Prediction failed")
+        return jsonify({"error": "Internal server error"}), 500
 
 
 if __name__ == "__main__":

diff --git a/api/pyproject.toml b/api/pyproject.toml
@@ -6,12 +6,12 @@ authors = ["Mostafa Moradian <mostafa@gatewayd.io>"]
 readme = "README.md"
 
 [tool.poetry.dependencies]
-python = ">=3.10,<3.11"
+python = "^3.12"
 Flask = "^3.0.2"
 gunicorn = "^21.2.0"
-pandas = "^2.2.1"
+pandas = "^2.2.2"
 numpy = "^1.26.4"
-tensorflow = "^2.15.0"
+tensorflow = "^2.16.1"
 
 [build-system]
 requires = ["poetry-core"]

diff --git a/api/test_api.py b/api/test_api.py
@@ -0,0 +1,74 @@
+import importlib
+import os
+import sys
+
+import pytest
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "training"))
+
+os.environ.setdefault("VOCAB_PATH", os.path.join(
+    os.path.dirname(__file__), "..", "training", "sql_tokenizer_vocab.json"))
+os.environ.setdefault("MODEL_PATH", os.path.join(
+    os.path.dirname(__file__), "..", "sqli_model", "3"))
+
+spec = importlib.util.spec_from_file_location(
+    "api_module", os.path.join(os.path.dirname(__file__), "api.py"))
+api_module = importlib.util.module_from_spec(spec)
+spec.loader.exec_module(api_module)
+app = api_module.app
+
+
+@pytest.fixture
+def client():
+    app.config["TESTING"] = True
+    with app.test_client() as client:
+        yield client
+
+
+def test_health(client):
+    resp = client.get("/health")
+    assert resp.status_code == 200
+    assert resp.get_json() == {"status": "ok"}
+
+
+def test_predict_missing_body(client):
+    resp = client.post("/predict", content_type="application/json")
+    assert resp.status_code == 400
+
+
+def test_predict_missing_query_key(client):
+    resp = client.post("/predict", json={"foo": "bar"})
+    assert resp.status_code == 400
+    data = resp.get_json()
+    assert "error" in data
+
+
+def test_predict_sqli(client):
+    resp = client.post("/predict", json={"query": "SELECT * FROM users WHERE id=1 OR 1=1"})
+    assert resp.status_code == 200
+    data = resp.get_json()
+    assert "confidence" in data
+    assert isinstance(data["confidence"], float)
+
+
+def test_predict_legitimate(client):
+    resp = client.post("/predict", json={"query": "SELECT name FROM products"})
+    assert resp.status_code == 200
+    data = resp.get_json()
+    assert "confidence" in data
+    assert isinstance(data["confidence"], float)
+
+
+def test_predict_empty_query(client):
+    resp = client.post("/predict", json={"query": ""})
+    assert resp.status_code == 200
+    data = resp.get_json()
+    assert "confidence" in data
+
+
+def test_predict_error_not_leaked(client):
+    """Ensure internal error details are not exposed to the client."""
+    resp = client.post("/predict", json={"query": ""})
+    if resp.status_code == 500:
+        data = resp.get_json()
+        assert data["error"] == "Internal server error"
diff --git a/sqli_model/3/fingerprint.pb b/sqli_model/3/fingerprint.pb
@@ -1 +1 @@
-ℨ���־���鿶������月�� �����Ϗ�(�������2
+�����������ݺ���Y���月�� ��������(���վ����2:'306335063828443668507412436166038701185
diff --git a/sqli_model/3/saved_model.pb b/sqli_model/3/saved_model.pb
diff --git a/sqli_model/3/variables/variables.data-00000-of-00001 b/sqli_model/3/variables/variables.data-00000-of-00001
diff --git a/sqli_model/3/variables/variables.index b/sqli_model/3/variables/variables.index
diff --git a/training/requirements.txt b/training/requirements.txt
diff --git a/training/sql_tokenizer.py b/training/sql_tokenizer.py
@@ -22,8 +22,8 @@ def fit_on_texts(self, queries):
         for query in queries:
             tokens = self.tokenize(query)
             all_tokens.update(tokens)
-        # Limit to max_words
-        all_tokens = list(all_tokens)[: self.max_words]
+        # Sort for deterministic ordering, then limit to max_words
+        all_tokens = sorted(all_tokens)[: self.max_words]
         self.token_index = {token: i + 1 for i, token in enumerate(all_tokens)}
 
     def texts_to_sequences(self, queries):

diff --git a/training/sql_tokenizer_vocab.json b/training/sql_tokenizer_vocab.json
diff --git a/training/test_train.py b/training/test_train.py
@@ -77,21 +77,24 @@ def model(request):
     }
 
 
+# Model v3 was retrained with the deterministic (sorted) tokenizer.
+# Known false negative: "or 1=1;" with trailing semicolon scores low (~0.002).
+# This likely needs dataset enrichment with more semicolon-terminated patterns.
 @pytest.mark.parametrize(
     "sample",
     [
-        ("select * from users where id=1 or 1=1;", [0.9202, 0.974, 0.3179]),
-        ("select * from users where id='1' or 1=1--", [0.9202, 0.974, 0.3179]),
-        ("select * from users", [0.00077, 0.0015, 0.0231]),
-        ("select * from users where id=10000", [0.1483, 0.8893, 0.7307]),
-        ("select '1' union select 'a'; -- -'", [0.9999, 0.9732, 0.0139]),
+        ("select * from users where id=1 or 1=1;", [0.9202, 0.974, 0.0019]),
+        ("select * from users where id='1' or 1=1--", [0.9202, 0.974, 0.9592]),
+        ("select * from users", [0.00077, 0.0015, 0.0018]),
+        ("select * from users where id=10000", [0.1483, 0.8893, 0.0011]),
+        ("select '1' union select 'a'; -- -'", [0.9999, 0.9732, 0.9999]),
         (
             "select '' union select 'malicious php code' \\g /var/www/test.php; -- -';",
-            [0.9999, 0.8065, 0.0424],
+            [0.9999, 0.8065, 0.8984],
         ),
         (
             "select '' || pg_sleep((ascii((select 'a' limit 1)) - 32) / 2); -- -';",
-            [0.9999, 0.9999, 0.01543],
+            [0.9999, 0.9999, 0.8479],
         ),
     ],
 )

diff --git a/training/train.py b/training/train.py
@@ -13,7 +13,6 @@
     f1_score,
     confusion_matrix,
 )
-import numpy as np
 
 # Check if the input file and output directory are provided
 if len(sys.argv) != 3:
@@ -62,7 +61,7 @@
 
 # Predict test set
 y_pred = model.predict(X_test)
-y_pred_classes = np.argmax(y_pred, axis=1)
+y_pred_classes = (y_pred > 0.5).astype(int).flatten()
 
 # Calculate model performance indicators
 accuracy = accuracy_score(y_test, y_pred_classes)

diff --git a/training/train_v3.py b/training/train_v3.py
@@ -109,6 +109,12 @@ def plot_history(history):
     X, tokenizer = preprocess_text(data, max_words=MAX_WORDS)
     y = data["Label"].values  # Convert to NumPy array for compatibility with KFold
 
+    # Save the deterministic vocabulary for inference
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    vocab_path = os.path.join(script_dir, "sql_tokenizer_vocab.json")
+    tokenizer.save_token_index(vocab_path)
+    print(f"Saved tokenizer vocabulary ({len(tokenizer.token_index)} tokens) to {vocab_path}")
+
     # Initialize cross-validation
     k_folds = 5
     kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)
@@ -148,15 +154,15 @@ def plot_history(history):
         accuracy = accuracy_score(y_val, y_val_pred)
         precision = precision_score(y_val, y_val_pred)
         recall = recall_score(y_val, y_val_pred)
-        f1_score = calculate_f1_f2(precision, recall, beta=1)
-        f2_score = calculate_f1_f2(precision, recall, beta=2)
+        f1 = calculate_f1_f2(precision, recall, beta=1)
+        f2 = calculate_f1_f2(precision, recall, beta=2)
 
         # Collect fold metrics
         fold_metrics["accuracy"].append(accuracy)
         fold_metrics["precision"].append(precision)
         fold_metrics["recall"].append(recall)
-        fold_metrics["f1"].append(f1_score)
-        fold_metrics["f2"].append(f2_score)
+        fold_metrics["f1"].append(f1)
+        fold_metrics["f2"].append(f2)
 
     # Calculate and display average metrics across folds
     avg_metrics = {metric: np.mean(scores) for metric, scores in fold_metrics.items()}

diff --git a/training/training_history.png b/training/training_history.png
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		ℨ��־��鿶��月�� Ϗ�(��2
		��ݺ��Y��月�� (��վ��2:'306335063828443668507412436166038701185