feat(ml): ML on Rockchip NPUs (#15241)

2025-03-18 00:04:08 +07:00 · 2025-03-18 00:04:08 +07:00 · 14c3b99c0f
parent 1e184a70f1
commit 14c3b99c0f
43 changed files with 2417 additions and 4726 deletions
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@ -49,7 +49,7 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        suffix: ["", "-cuda", "-openvino", "-armnn"]
+        suffix: ["", "-cuda", "-openvino", "-armnn","-rknn"]
    steps:
        - name: Login to GitHub Container Registry
          uses: docker/login-action@v3
@ -129,6 +129,9 @@ jobs:
            runner: ubuntu-24.04-arm
            device: armnn
            suffix: -armnn
          - platforms: linux/arm64
            device: rknn
            suffix: -rknn
    steps:
      - name: Prepare
--- a/docker/docker-compose.dev.yml
+++ b/docker/docker-compose.dev.yml
@ -95,12 +95,12 @@ services:
    image: immich-machine-learning-dev:latest
    # extends:
    #   file: hwaccel.ml.yml
-    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
+    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
    build:
      context: ../machine-learning
      dockerfile: Dockerfile
      args:
-        - DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
+        - DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
    ports:
      - 3003:3003
    volumes:
--- a/docker/docker-compose.prod.yml
+++ b/docker/docker-compose.prod.yml
@ -38,12 +38,12 @@ services:
    image: immich-machine-learning:latest
    # extends:
    #   file: hwaccel.ml.yml
-    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
+    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
    build:
      context: ../machine-learning
      dockerfile: Dockerfile
      args:
-        - DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference
+        - DEVICE=cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference
    ports:
      - 3003:3003
    volumes:
@ -77,22 +77,12 @@ services:
      - 5432:5432
    healthcheck:
      test: >-
-        pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1;
+        pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1; Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align --command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')"; echo "checksum failure count is $$Chksum"; [ "$$Chksum" = '0' ] || exit 1
        Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align
        --command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')";
        echo "checksum failure count is $$Chksum";
        [ "$$Chksum" = '0' ] || exit 1
      interval: 5m
      start_interval: 30s
      start_period: 5m
    command: >-
-      postgres
+      postgres -c shared_preload_libraries=vectors.so -c 'search_path="$$user", public, vectors' -c logging_collector=on -c max_wal_size=2GB -c shared_buffers=512MB -c wal_compression=on
      -c shared_preload_libraries=vectors.so
      -c 'search_path="$$user", public, vectors'
      -c logging_collector=on
      -c max_wal_size=2GB
      -c shared_buffers=512MB
      -c wal_compression=on
    restart: always
  # set IMMICH_TELEMETRY_INCLUDE=all in .env to enable metrics
@ -109,7 +99,7 @@ services:
  # add data source for http://immich-prometheus:9090 to get started
  immich-grafana:
    container_name: immich_grafana
-    command: ['./run.sh', '-disable-reporting']
+    command: [ './run.sh', '-disable-reporting' ]
    ports:
      - 3000:3000
    image: grafana/grafana:11.5.2-ubuntu@sha256:8b5858c447e06fd7a89006b562ba7bba7c4d5813600c7982374c41852adefaeb
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@ -33,12 +33,12 @@ services:
  immich-machine-learning:
    container_name: immich_machine_learning
-    # For hardware acceleration, add one of -[armnn, cuda, openvino] to the image tag.
+    # For hardware acceleration, add one of -[armnn, cuda, openvino, rknn] to the image tag.
    # Example tag: ${IMMICH_VERSION:-release}-cuda
    image: ghcr.io/immich-app/immich-machine-learning:${IMMICH_VERSION:-release}
    # extends: # uncomment this section for hardware acceleration - see https://immich.app/docs/features/ml-hardware-acceleration
    #   file: hwaccel.ml.yml
-    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl] for accelerated inference - use the `-wsl` version for WSL2 where applicable
+    #   service: cpu # set to one of [armnn, cuda, openvino, openvino-wsl, rknn] for accelerated inference - use the `-wsl` version for WSL2 where applicable
    volumes:
      - model-cache:/cache
    env_file:
@ -67,22 +67,12 @@ services:
      - ${DB_DATA_LOCATION}:/var/lib/postgresql/data
    healthcheck:
      test: >-
-        pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1;
+        pg_isready --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" || exit 1; Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align --command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')"; echo "checksum failure count is $$Chksum"; [ "$$Chksum" = '0' ] || exit 1
        Chksum="$$(psql --dbname="$${POSTGRES_DB}" --username="$${POSTGRES_USER}" --tuples-only --no-align
        --command='SELECT COALESCE(SUM(checksum_failures), 0) FROM pg_stat_database')";
        echo "checksum failure count is $$Chksum";
        [ "$$Chksum" = '0' ] || exit 1
      interval: 5m
      start_interval: 30s
      start_period: 5m
    command: >-
-      postgres
+      postgres -c shared_preload_libraries=vectors.so -c 'search_path="$$user", public, vectors' -c logging_collector=on -c max_wal_size=2GB -c shared_buffers=512MB -c wal_compression=on
      -c shared_preload_libraries=vectors.so
      -c 'search_path="$$user", public, vectors'
      -c logging_collector=on
      -c max_wal_size=2GB
      -c shared_buffers=512MB
      -c wal_compression=on
    restart: always
 volumes:
--- a/docker/hwaccel.ml.yml
+++ b/docker/hwaccel.ml.yml
@ -14,6 +14,13 @@ services:
      - /lib/firmware/mali_csffw.bin:/lib/firmware/mali_csffw.bin:ro # Mali firmware for your chipset (not always required depending on the driver)
      - /usr/lib/libmali.so:/usr/lib/libmali.so:ro # Mali driver for your chipset (always required)
  rknn:
    security_opt:
      - systempaths=unconfined
      - apparmor=unconfined
    devices:
      - /dev/dri:/dev/dri
  cpu: {}
  cuda:
--- a/docs/docs/features/ml-hardware-acceleration.md
+++ b/docs/docs/features/ml-hardware-acceleration.md
@ -12,6 +12,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele
 - ARM NN (Mali)
 - CUDA (NVIDIA GPUs with [compute capability](https://developer.nvidia.com/cuda-gpus) 5.2 or higher)
 - OpenVINO (Intel GPUs such as Iris Xe and Arc)
 - RKNN (Rockchip)
 ## Limitations
@ -19,6 +20,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele
 - Only Linux and Windows (through WSL2) servers are supported.
 - ARM NN is only supported on devices with Mali GPUs. Other Arm devices are not supported.
 - Some models may not be compatible with certain backends. CUDA is the most reliable.
 - Search latency isn't improved by ARM NN due to model compatibility issues preventing its use. However, smart search jobs do make use of ARM NN.
 ## Prerequisites
@ -33,6 +35,7 @@ You do not need to redo any machine learning jobs after enabling hardware accele
  - The `hwaccel.ml.yml` file assumes the path to it is `/usr/lib/libmali.so`, so update accordingly if it is elsewhere
  - The `hwaccel.ml.yml` file assumes an additional file `/lib/firmware/mali_csffw.bin`, so update accordingly if your device's driver does not require this file
 - Optional: Configure your `.env` file, see [environment variables](/docs/install/environment-variables) for ARM NN specific settings
  - In particular, the `MACHINE_LEARNING_ANN_FP16_TURBO` can significantly improve performance at the cost of very slightly lower accuracy
 #### CUDA
@ -47,6 +50,16 @@ You do not need to redo any machine learning jobs after enabling hardware accele
 - Ensure the server's kernel version is new enough to use the device for hardware accceleration.
 - Expect higher RAM usage when using OpenVINO compared to CPU processing.
 #### RKNN
 - You must have a supported Rockchip SoC: only RK3566, RK3568, RK3576 and RK3588 are supported at this moment.
 - Make sure you have the appropriate linux kernel driver installed
  - This is usually pre-installed on the device vendor's Linux images
 - RKNPU driver V0.9.8 or later must be available in the host server
  - You may confirm this by running `cat /sys/kernel/debug/rknpu/version` to check the version
 - Optional: Configure your `.env` file, see [environment variables](/docs/install/environment-variables) for RKNN specific settings
  - In particular, setting `MACHINE_LEARNING_RKNN_THREADS` to 2 or 3 can _dramatically_ improve performance for RK3576 and RK3588 compared to the default of 1, at the expense of multiplying the amount of RAM each model uses by that amount.
 ## Setup
 1. If you do not already have it, download the latest [`hwaccel.ml.yml`][hw-file] file and ensure it's in the same folder as the `docker-compose.yml`.
@ -127,3 +140,12 @@ Note that you should increase job concurrencies to increase overall utilization
 - If you encounter an error when a model is running, try a different model to see if the issue is model-specific.
 - You may want to increase concurrency past the default for higher utilization. However, keep in mind that this will also increase VRAM consumption.
 - Larger models benefit more from hardware acceleration, if you have the VRAM for them.
 - Compared to ARM NN, RKNPU has:
  - Wider model support (including for search, which ARM NN does not accelerate)
  - Less heat generation
  - Very slightly lower accuracy (RKNPU always uses FP16, while ARM NN by default uses higher precision FP32 unless `MACHINE_LEARNING_ANN_FP16_TURBO` is enabled)
  - Varying speed (tested on RK3588):
    - If `MACHINE_LEARNING_RKNN_THREADS` is at the default of 1, RKNPU will have substantially lower throughput for ML jobs than ARM NN in most cases, but similar latency (such as when searching)
    - If `MACHINE_LEARNING_RKNN_THREADS` is set to 3, it will be somewhat faster than ARM NN at FP32, but somewhat slower than ARM NN if `MACHINE_LEARNING_ANN_FP16_TURBO` is enabled
    - When other tasks also use the GPU (like transcoding), RKNPU has a significant advantage over ARM NN as it uses the otherwise idle NPU instead of competing for GPU usage
  - Lower RAM usage if `MACHINE_LEARNING_RKNN_THREADS` is at the default of 1, but significantly higher if greater than 1 (which is necessary for it to fully utilize the NPU and hence be comparable in speed to ARM NN)
--- a/docs/docs/install/environment-variables.md
+++ b/docs/docs/install/environment-variables.md
@ -170,6 +170,8 @@ Redis (Sentinel) URL example JSON before encoding:
 | `MACHINE_LEARNING_MAX_BATCH_SIZE__FACIAL_RECOGNITION`       | Set the maximum number of faces that will be processed at once by the facial recognition model      |  None (`1` if using OpenVINO)   | machine learning |
 | `MACHINE_LEARNING_PING_TIMEOUT`                             | How long (ms) to wait for a PING response when checking if an ML server is available                |             `2000`              | server           |
 | `MACHINE_LEARNING_AVAILABILITY_BACKOFF_TIME`                | How long to ignore ML servers that are offline before trying again                                  |             `30000`             | server           |
 | `MACHINE_LEARNING_RKNN`                                     | Enable RKNN hardware acceleration if supported                                                      |             `True`              | machine learning |
 | `MACHINE_LEARNING_RKNN_THREADS`                             | How many threads of RKNN runtime should be spinned up while inferencing.                            |               `1`               | machine learning |
 \*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones.
--- a/machine-learning/.gitignore
+++ b/machine-learning/.gitignore
@ -1,5 +1,24 @@
 *.zip
 *.onnx
 *.rknn
 *.npy
 *_attr__value
 *.weight
 *.bias
 onnx__*
 *in_proj_bias
 *.proj
 *.latent
 *.pos_embed
 vocab.txt
 export/immich_model_exporter/models/**/README.md
 tokenizer.json
 tokenizer_config.json
 special_tokens_map.json
 preprocess_cfg.json
 config.json
 merges.txt
 vocab.json
 upload/
 venv/
 __pycache__/
--- a/machine-learning/Dockerfile
+++ b/machine-learning/Dockerfile
@ -15,6 +15,8 @@ RUN mkdir /opt/armnn && \
    cd /opt/ann && \
    sh build.sh
 FROM builder-cpu AS builder-rknn
 FROM builder-${DEVICE} AS builder
 ARG DEVICE
@ -77,6 +79,10 @@ COPY --from=builder-armnn \
    /opt/ann/build.sh \
    /opt/armnn/
 FROM prod-cpu AS prod-rknn
 ADD --checksum=sha256:73993ed4b440460825f21611731564503cc1d5a0c123746477da6cd574f34885 https://github.com/airockchip/rknn-toolkit2/raw/refs/tags/v2.3.0/rknpu2/runtime/Linux/librknn_api/aarch64/librknnrt.so /usr/lib/
 FROM prod-${DEVICE} AS prod
 ARG DEVICE
--- a/machine-learning/app/config.py
+++ b/machine-learning/app/config.py
@ -64,6 +64,8 @@ class Settings(BaseSettings):
    ann: bool = True
    ann_fp16_turbo: bool = False
    ann_tuning_level: int = 2
    rknn: bool = True
    rknn_threads: int = 1
    preload: PreloadModelData | None = None
    max_batch_size: MaxBatchSize | None = None
--- a/machine-learning/app/conftest.py
+++ b/machine-learning/app/conftest.py
@ -136,6 +136,12 @@ def ann_session() -> Iterator[mock.Mock]:
        yield mocked
@pytest.fixture(scope="function")
 def rknn_session() -> Iterator[mock.Mock]:
    with mock.patch("app.sessions.rknn.RknnPoolExecutor") as mocked:
        yield mocked
@pytest.fixture(scope="function")
 def rmtree() -> Iterator[mock.Mock]:
    with mock.patch("app.models.base.rmtree", autospec=True) as mocked:
--- a/machine-learning/app/main.py
+++ b/machine-learning/app/main.py
@ -226,9 +226,9 @@ async def load(model: InferenceModel) -> InferenceModel:
            except FileNotFoundError as e:
                if model.model_format == ModelFormat.ONNX:
                    raise e
                log.exception(e)
                log.warning(
-                    f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it."
+                    f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it.",
                    exc_info=e,
                )
                model.model_format = ModelFormat.ONNX
                model.load()
--- a/machine-learning/app/models/base.py
+++ b/machine-learning/app/models/base.py
@ -8,6 +8,7 @@ from typing import Any, ClassVar
 from huggingface_hub import snapshot_download
 import ann.ann
 import app.sessions.rknn as rknn
 from app.sessions.ort import OrtSession
 from ..config import clean_name, log, settings
@ -66,12 +67,17 @@ class InferenceModel(ABC):
        pass
    def _download(self) -> None:
-        ignore_patterns = [] if self.model_format == ModelFormat.ARMNN else ["*.armnn"]
+        ignored_patterns: dict[ModelFormat, list[str]] = {
            ModelFormat.ONNX: ["*.armnn", "*.rknn"],
            ModelFormat.ARMNN: ["*.rknn"],
            ModelFormat.RKNN: ["*.armnn"],
        }
        snapshot_download(
            f"immich-app/{clean_name(self.model_name)}",
            cache_dir=self.cache_dir,
            local_dir=self.cache_dir,
-            ignore_patterns=ignore_patterns,
+            ignore_patterns=ignored_patterns.get(self.model_format, []),
        )
    def _load(self) -> ModelSession:
@ -108,17 +114,25 @@ class InferenceModel(ABC):
                session: ModelSession = AnnSession(model_path)
            case ".onnx":
                session = OrtSession(model_path)
            case ".rknn":
                session = rknn.RknnSession(model_path)
            case _:
                raise ValueError(f"Unsupported model file type: {model_path.suffix}")
        return session
    def model_path_for_format(self, model_format: ModelFormat) -> Path:
        model_path_prefix = rknn.model_prefix if model_format == ModelFormat.RKNN else None
        if model_path_prefix:
            return self.model_dir / model_path_prefix / f"model.{model_format}"
        return self.model_dir / f"model.{model_format}"
    @property
    def model_dir(self) -> Path:
        return self.cache_dir / self.model_type.value
    @property
    def model_path(self) -> Path:
-        return self.model_dir / f"model.{self.model_format}"
+        return self.model_path_for_format(self.model_format)
    @property
    def model_task(self) -> ModelTask:
@ -155,4 +169,9 @@ class InferenceModel(ABC):
    @property
    def _model_format_default(self) -> ModelFormat:
-        return ModelFormat.ARMNN if ann.ann.is_available and settings.ann else ModelFormat.ONNX
+        if rknn.is_available:
            return ModelFormat.RKNN
        elif ann.ann.is_available and settings.ann:
            return ModelFormat.ARMNN
        else:
            return ModelFormat.ONNX
--- a/machine-learning/app/models/constants.py
+++ b/machine-learning/app/models/constants.py
@ -44,6 +44,18 @@ _OPENCLIP_MODELS = {
    "nllb-clip-base-siglip__v1",
    "nllb-clip-large-siglip__mrl",
    "nllb-clip-large-siglip__v1",
    "ViT-B-16-SigLIP2__webli",
    "ViT-B-32-SigLIP2-256__webli",
    "ViT-L-16-SigLIP2-256__webli",
    "ViT-L-16-SigLIP2-384__webli",
    "ViT-L-16-SigLIP2-512__webli",
    "ViT-SO400M-14-SigLIP2-378__webli",
    "ViT-SO400M-14-SigLIP2__webli",
    "ViT-SO400M-16-SigLIP2-256__webli",
    "ViT-SO400M-16-SigLIP2-384__webli",
    "ViT-SO400M-16-SigLIP2-512__webli",
    "ViT-gopt-16-SigLIP2-256__webli",
    "ViT-gopt-16-SigLIP2-384__webli",
 }
@ -65,6 +77,9 @@ _INSIGHTFACE_MODELS = {
 SUPPORTED_PROVIDERS = ["CUDAExecutionProvider", "OpenVINOExecutionProvider", "CPUExecutionProvider"]
 RKNN_SUPPORTED_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
 RKNN_COREMASK_SUPPORTED_SOCS = ["rk3576", "rk3588"]
 def get_model_source(model_name: str) -> ModelSource | None:
    cleaned_name = clean_name(model_name)
--- a/machine-learning/app/models/facial_recognition/recognition.py
+++ b/machine-learning/app/models/facial_recognition/recognition.py
@ -31,7 +31,7 @@ class FaceRecognizer(InferenceModel):
            self._add_batch_axis(self.model_path)
            session = self._make_session(self.model_path)
        self.model = ArcFaceONNX(
-            self.model_path.with_suffix(".onnx").as_posix(),
+            self.model_path_for_format(ModelFormat.ONNX).as_posix(),
            session=session,
        )
        return session
--- a/machine-learning/app/schemas.py
+++ b/machine-learning/app/schemas.py
@ -35,6 +35,7 @@ class ModelType(StrEnum):
 class ModelFormat(StrEnum):
    ARMNN = "armnn"
    ONNX = "onnx"
    RKNN = "rknn"
 class ModelSource(StrEnum):
--- a/machine-learning/app/sessions/rknn/init.py
+++ b/machine-learning/app/sessions/rknn/init.py
@ -0,0 +1,76 @@
 from __future__ import annotations
 from pathlib import Path
 from typing import Any, NamedTuple
 import numpy as np
 from numpy.typing import NDArray
 from app.config import log, settings
 from app.schemas import SessionNode
 from .rknnpool import RknnPoolExecutor, is_available, soc_name
 is_available = is_available and settings.rknn
 model_prefix = Path("rknpu") / soc_name if is_available and soc_name is not None else None
 def run_inference(rknn_lite: Any, input: list[NDArray[np.float32]]) -> list[NDArray[np.float32]]:
    outputs: list[NDArray[np.float32]] = rknn_lite.inference(inputs=input, data_format="nchw")
    return outputs
 input_output_mapping: dict[str, dict[str, Any]] = {
    "detection": {
        "input": {"norm_tensor:0": (1, 3, 640, 640)},
        "output": {
            "norm_tensor:1": (12800, 1),
            "norm_tensor:2": (3200, 1),
            "norm_tensor:3": (800, 1),
            "norm_tensor:4": (12800, 4),
            "norm_tensor:5": (3200, 4),
            "norm_tensor:6": (800, 4),
            "norm_tensor:7": (12800, 10),
            "norm_tensor:8": (3200, 10),
            "norm_tensor:9": (800, 10),
        },
    },
    "recognition": {"input": {"norm_tensor:0": (1, 3, 112, 112)}, "output": {"norm_tensor:1": (1, 512)}},
 }
 class RknnSession:
    def __init__(self, model_path: Path) -> None:
        self.model_type = "detection" if "detection" in model_path.parts else "recognition"
        self.tpe = settings.rknn_threads
        log.info(f"Loading RKNN model from {model_path} with {self.tpe} threads.")
        self.rknnpool = RknnPoolExecutor(model_path=model_path.as_posix(), tpes=self.tpe, func=run_inference)
        log.info(f"Loaded RKNN model from {model_path} with {self.tpe} threads.")
    def get_inputs(self) -> list[SessionNode]:
        return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["input"].items()]
    def get_outputs(self) -> list[SessionNode]:
        return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["output"].items()]
    def run(
        self,
        output_names: list[str] | None,
        input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
        run_options: Any = None,
    ) -> list[NDArray[np.float32]]:
        input_data: list[NDArray[np.float32]] = [np.ascontiguousarray(v) for v in input_feed.values()]
        self.rknnpool.put(input_data)
        res = self.rknnpool.get()
        if res is None:
            raise RuntimeError("RKNN inference failed!")
        return res
 class RknnNode(NamedTuple):
    name: str | None
    shape: tuple[int, ...]
 __all__ = ["RknnSession", "RknnNode", "is_available", "soc_name", "model_prefix"]
--- a/machine-learning/app/sessions/rknn/rknnpool.py
+++ b/machine-learning/app/sessions/rknn/rknnpool.py
@ -0,0 +1,91 @@
 # This code is from leafqycc/rknn-multi-threaded
 # Following Apache License 2.0
 import logging
 from concurrent.futures import Future, ThreadPoolExecutor
 from pathlib import Path
 from queue import Queue
 from typing import Callable
 import numpy as np
 from numpy.typing import NDArray
 from app.config import log
 from app.models.constants import RKNN_COREMASK_SUPPORTED_SOCS, RKNN_SUPPORTED_SOCS
 def get_soc(device_tree_path: Path | str) -> str | None:
    try:
        with Path(device_tree_path).open() as f:
            device_compatible_str = f.read()
            for soc in RKNN_SUPPORTED_SOCS:
                if soc in device_compatible_str:
                    return soc
            log.warning("Device is not supported for RKNN")
    except OSError as e:
        log.warning(f"Could not read {device_tree_path}. Reason: %s", e)
    return None
 soc_name = None
 is_available = False
 try:
    from rknnlite.api import RKNNLite
    soc_name = get_soc("/proc/device-tree/compatible")
    is_available = soc_name is not None
 except ImportError:
    log.debug("RKNN is not available")
 def init_rknn(model_path: str) -> "RKNNLite":
    if not is_available:
        raise RuntimeError("rknn is not available!")
    rknn_lite = RKNNLite()
    rknn_lite.rknn_log.logger.setLevel(logging.ERROR)
    ret = rknn_lite.load_rknn(model_path)
    if ret != 0:
        raise RuntimeError("Failed to load RKNN model")
    if soc_name in RKNN_COREMASK_SUPPORTED_SOCS:
        ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO)
    else:
        ret = rknn_lite.init_runtime()  # Please do not set this parameter on other platforms.
    if ret != 0:
        raise RuntimeError("Failed to inititalize RKNN runtime environment")
    return rknn_lite
 class RknnPoolExecutor:
    def __init__(
        self,
        model_path: str,
        tpes: int,
        func: Callable[["RKNNLite", list[NDArray[np.float32]]], list[NDArray[np.float32]]],
    ) -> None:
        self.tpes = tpes
        self.queue: Queue[Future[list[NDArray[np.float32]]]] = Queue()
        self.rknn_pool = [init_rknn(model_path) for _ in range(tpes)]
        self.pool = ThreadPoolExecutor(max_workers=tpes)
        self.func = func
        self.num = 0
    def put(self, inputs: list[NDArray[np.float32]]) -> None:
        self.queue.put(self.pool.submit(self.func, self.rknn_pool[self.num % self.tpes], inputs))
        self.num += 1
    def get(self) -> list[NDArray[np.float32]] | None:
        if self.queue.empty():
            return None
        fut = self.queue.get()
        return fut.result()
    def release(self) -> None:
        self.pool.shutdown()
        for rknn_lite in self.rknn_pool:
            rknn_lite.release()
    def __del__(self) -> None:
        self.release()
--- a/machine-learning/app/test_main.py
+++ b/machine-learning/app/test_main.py
@ -25,6 +25,7 @@ from app.models.facial_recognition.detection import FaceDetector
 from app.models.facial_recognition.recognition import FaceRecognizer
 from app.sessions.ann import AnnSession
 from app.sessions.ort import OrtSession
 from app.sessions.rknn import RknnSession, run_inference
 from .config import Settings, settings
 from .models.base import InferenceModel
@ -69,6 +70,14 @@ class TestBase:
        assert encoder.model_format == ModelFormat.ARMNN
    def test_sets_default_model_format_to_rknn_if_available(self, mocker: MockerFixture) -> None:
        mocker.patch.object(settings, "rknn", True)
        mocker.patch("app.sessions.rknn.is_available", True)
        encoder = OpenClipTextualEncoder("ViT-B-32__openai")
        assert encoder.model_format == ModelFormat.RKNN
    def test_casts_cache_dir_string_to_path(self) -> None:
        cache_dir = "/test_cache"
        encoder = OpenClipTextualEncoder("ViT-B-32__openai", cache_dir=cache_dir)
@ -125,7 +134,7 @@ class TestBase:
            "immich-app/ViT-B-32__openai",
            cache_dir=encoder.cache_dir,
            local_dir=encoder.cache_dir,
-            ignore_patterns=["*.armnn"],
+            ignore_patterns=["*.armnn", "*.rknn"],
        )
    def test_download_downloads_armnn_if_preferred_format(self, snapshot_download: mock.Mock) -> None:
@ -136,7 +145,18 @@ class TestBase:
            "immich-app/ViT-B-32__openai",
            cache_dir=encoder.cache_dir,
            local_dir=encoder.cache_dir,
-            ignore_patterns=[],
+            ignore_patterns=["*.rknn"],
        )
    def test_download_downloads_rknn_if_preferred_format(self, snapshot_download: mock.Mock) -> None:
        encoder = OpenClipTextualEncoder("ViT-B-32__openai", model_format=ModelFormat.RKNN)
        encoder.download()
        snapshot_download.assert_called_once_with(
            "immich-app/ViT-B-32__openai",
            cache_dir=encoder.cache_dir,
            local_dir=encoder.cache_dir,
            ignore_patterns=["*.armnn"],
        )
    def test_throws_exception_if_model_path_does_not_exist(
@ -328,6 +348,33 @@ class TestAnnSession:
        np_spy.assert_has_calls([mock.call(input1), mock.call(input2)])
 class TestRknnSession:
    def test_creates_rknn_session(self, rknn_session: mock.Mock, info: mock.Mock, mocker: MockerFixture) -> None:
        model_path = mock.MagicMock(spec=Path)
        tpe = 1
        mocker.patch("app.sessions.rknn.soc_name", "rk3566")
        mocker.patch("app.sessions.rknn.is_available", True)
        RknnSession(model_path)
        rknn_session.assert_called_once_with(model_path=model_path.as_posix(), tpes=tpe, func=run_inference)
        info.assert_has_calls([mock.call(f"Loaded RKNN model from {model_path} with {tpe} threads.")])
    def test_run_rknn(self, rknn_session: mock.Mock, mocker: MockerFixture) -> None:
        rknn_session.return_value.load.return_value = 123
        np_spy = mocker.spy(np, "ascontiguousarray")
        mocker.patch("app.sessions.rknn.soc_name", "rk3566")
        session = RknnSession(Path("ViT-B-32__openai"))
        [input1, input2] = [np.random.rand(1, 3, 224, 224).astype(np.float32) for _ in range(2)]
        input_feed = {"input.1": input1, "input.2": input2}
        session.run(None, input_feed)
        rknn_session.return_value.put.assert_called_once_with([input1, input2])
        np_spy.call_count == 2
        np_spy.assert_has_calls([mock.call(input1), mock.call(input2)])
 class TestCLIP:
    embedding = np.random.rand(512).astype(np.float32)
    cache_dir = Path("test_cache")
@ -829,9 +876,7 @@ class TestLoad:
        mock_model.clear_cache.assert_not_called()
        mock_model.load.assert_not_called()
-    async def test_falls_back_to_onnx_if_other_format_does_not_exist(
+    async def test_falls_back_to_onnx_if_other_format_does_not_exist(self, warning: mock.Mock) -> None:
        self, exception: mock.Mock, warning: mock.Mock
    ) -> None:
        mock_model = mock.Mock(spec=InferenceModel)
        mock_model.model_name = "test_model_name"
        mock_model.model_type = ModelType.VISUAL
@ -846,8 +891,9 @@ class TestLoad:
        mock_model.clear_cache.assert_not_called()
        assert mock_model.load.call_count == 2
-        exception.assert_called_once_with(error)
+        warning.assert_called_once_with(
-        warning.assert_called_once_with("ARMNN is available, but model 'test_model_name' does not support it.")
+            "ARMNN is available, but model 'test_model_name' does not support it.", exc_info=error
        )
        mock_model.model_format = ModelFormat.ONNX
--- a/machine-learning/export/.python-version
+++ b/machine-learning/export/.python-version
@ -0,0 +1 @@
 3.12
--- a/machine-learning/export/Dockerfile
+++ b/machine-learning/export/Dockerfile
@ -1,20 +0,0 @@
 FROM mambaorg/micromamba:bookworm-slim@sha256:e3797091302382ea841498bc93a7b0a50f7c1448333d5e946d2d1608d0c5f43d AS builder
 ENV TRANSFORMERS_CACHE=/cache \
  PYTHONDONTWRITEBYTECODE=1 \
  PYTHONUNBUFFERED=1 \
  PATH="/opt/venv/bin:$PATH" \
  PYTHONPATH=/usr/src
 COPY --chown=$MAMBA_USER:$MAMBA_USER conda-lock.yml /tmp/conda-lock.yml
 RUN micromamba install -y -n base -f /tmp/conda-lock.yml && \
    micromamba remove -y -n base cxx-compiler && \
    micromamba clean --all --yes
 WORKDIR /usr/src/app
 COPY --chown=$MAMBA_USER:$MAMBA_USER start.sh .
 COPY --chown=$MAMBA_USER:$MAMBA_USER app .
 ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"]
 CMD ["./start.sh"]
--- a/machine-learning/export/models/init.py
+++ b/machine-learning/export/models/init.py
--- a/machine-learning/export/conda-lock.yml
+++ b/machine-learning/export/conda-lock.yml
--- a/machine-learning/export/env.dev.yaml
+++ b/machine-learning/export/env.dev.yaml
@ -1,15 +0,0 @@
 name: base
 channels:
  - conda-forge
 platforms:
  - linux-64
  - linux-aarch64
 dependencies:
  - black
  - conda-lock
  - mypy
  - pytest
  - pytest-cov
  - pytest-mock
  - ruff
 category: dev
--- a/machine-learning/export/env.yaml
+++ b/machine-learning/export/env.yaml
@ -1,25 +0,0 @@
 name: base
 channels:
  - conda-forge
  - nvidia
  - pytorch
 platforms:
  - linux-64
 dependencies:
  - cxx-compiler
  - onnx==1.*
  - onnxruntime==1.*
  - open-clip-torch==2.*
  - orjson==3.*
  - pip
  - python==3.11.*
  - pytorch>=2.3
  - rich==13.*
  - safetensors==0.*
  - setuptools==68.*
  - torchvision
  - transformers==4.*
  - pip:
    - multilingual-clip
    - onnxsim
 category: main
--- a/machine-learning/export/immich_model_exporter/init.py
+++ b/machine-learning/export/immich_model_exporter/init.py
--- a/machine-learning/export/immich_model_exporter/export.py
+++ b/machine-learning/export/immich_model_exporter/export.py
@ -0,0 +1,98 @@
 from pathlib import Path
 import typer
 from tenacity import retry, stop_after_attempt, wait_fixed
 from typing_extensions import Annotated
 from .exporters.constants import DELETE_PATTERNS, SOURCE_TO_METADATA, ModelSource
 from .exporters.onnx import export as onnx_export
 from .exporters.rknn import export as rknn_export
 app = typer.Typer(pretty_exceptions_show_locals=False)
 def generate_readme(model_name: str, model_source: ModelSource) -> str:
    (name, link, type) = SOURCE_TO_METADATA[model_source]
    match model_source:
        case ModelSource.MCLIP:
            tags = ["immich", "clip", "multilingual"]
        case ModelSource.OPENCLIP:
            tags = ["immich", "clip"]
            lowered = model_name.lower()
            if "xlm" in lowered or "nllb" in lowered:
                tags.append("multilingual")
        case ModelSource.INSIGHTFACE:
            tags = ["immich", "facial-recognition"]
        case _:
            raise ValueError(f"Unsupported model source {model_source}")
    return f"""---
 tags:
 {" - " + "\n - ".join(tags)}
 ---
 # Model Description
 This repo contains ONNX exports for the associated {type} model by {name}. See the [{name}]({link}) repo for more info.
 This repo is specifically intended for use with [Immich](https://immich.app/), a self-hosted photo library.
 """
@app.command()
 def main(
    model_name: str,
    model_source: ModelSource,
    output_dir: Path = Path("./models"),
    no_cache: bool = False,
    hf_organization: str = "immich-app",
    hf_auth_token: Annotated[str | None, typer.Option(envvar="HF_AUTH_TOKEN")] = None,
 ) -> None:
    hf_model_name = model_name.split("/")[-1]
    hf_model_name = hf_model_name.replace("xlm-roberta-large", "XLM-Roberta-Large")
    hf_model_name = hf_model_name.replace("xlm-roberta-base", "XLM-Roberta-Base")
    output_dir = output_dir / hf_model_name
    match model_source:
        case ModelSource.MCLIP | ModelSource.OPENCLIP:
            output_dir.mkdir(parents=True, exist_ok=True)
            onnx_export(model_name, model_source, output_dir, no_cache=no_cache)
        case ModelSource.INSIGHTFACE:
            from huggingface_hub import snapshot_download
            # TODO: start from insightface dump instead of downloading from HF
            snapshot_download(f"immich-app/{hf_model_name}", local_dir=output_dir)
        case _:
            raise ValueError(f"Unsupported model source {model_source}")
    try:
        rknn_export(output_dir, no_cache=no_cache)
    except Exception as e:
        print(f"Failed to export model {model_name} to rknn: {e}")
        (output_dir / "rknpu").unlink(missing_ok=True)
    readme_path = output_dir / "README.md"
    if no_cache or not readme_path.exists():
        with open(readme_path, "w") as f:
            f.write(generate_readme(model_name, model_source))
    if hf_auth_token is not None:
        from huggingface_hub import create_repo, upload_folder
        repo_id = f"{hf_organization}/{hf_model_name}"
        @retry(stop=stop_after_attempt(5), wait=wait_fixed(5))
        def upload_model() -> None:
            create_repo(repo_id, exist_ok=True, token=hf_auth_token)
            upload_folder(
                repo_id=repo_id,
                folder_path=output_dir,
                # remote repo files to be deleted before uploading
                # deletion is in the same commit as the upload, so it's atomic
                delete_patterns=DELETE_PATTERNS,
                token=hf_auth_token,
            )
        upload_model()
 if __name__ == "__main__":
    typer.run(main)
--- a/machine-learning/export/immich_model_exporter/exporters/constants.py
+++ b/machine-learning/export/immich_model_exporter/exporters/constants.py
@ -0,0 +1,42 @@
 from enum import StrEnum
 from typing import NamedTuple
 class ModelSource(StrEnum):
    INSIGHTFACE = "insightface"
    MCLIP = "mclip"
    OPENCLIP = "openclip"
 class SourceMetadata(NamedTuple):
    name: str
    link: str
    type: str
 SOURCE_TO_METADATA = {
    ModelSource.MCLIP: SourceMetadata("M-CLIP", "https://huggingface.co/M-CLIP", "CLIP"),
    ModelSource.OPENCLIP: SourceMetadata("OpenCLIP", "https://github.com/mlfoundations/open_clip", "CLIP"),
    ModelSource.INSIGHTFACE: SourceMetadata(
        "InsightFace", "https://github.com/deepinsight/insightface/tree/master", "facial recognition"
    ),
 }
 RKNN_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
 # glob to delete old UUID blobs when reuploading models
 _uuid_char = "[a-fA-F0-9]"
 _uuid_glob = _uuid_char * 8 + "-" + _uuid_char * 4 + "-" + _uuid_char * 4 + "-" + _uuid_char * 4 + "-" + _uuid_char * 12
 DELETE_PATTERNS = [
    "**/*onnx*",
    "**/Constant*",
    "**/*.weight",
    "**/*.bias",
    "**/*.proj",
    "**/*in_proj_bias",
    "**/*.npy",
    "**/*.latent",
    "**/*.pos_embed",
    f"**/{_uuid_glob}",
 ]
--- a/machine-learning/export/immich_model_exporter/exporters/onnx/init.py
+++ b/machine-learning/export/immich_model_exporter/exporters/onnx/init.py
@ -0,0 +1,20 @@
 from pathlib import Path
 from ..constants import ModelSource
 from .models import mclip, openclip
 def export(
    model_name: str, model_source: ModelSource, output_dir: Path, opset_version: int = 19, no_cache: bool = False
 ) -> None:
    visual_dir = output_dir / "visual"
    textual_dir = output_dir / "textual"
    match model_source:
        case ModelSource.MCLIP:
            mclip.to_onnx(model_name, opset_version, visual_dir, textual_dir, no_cache=no_cache)
        case ModelSource.OPENCLIP:
            name, _, pretrained = model_name.partition("__")
            config = openclip.OpenCLIPModelConfig(name, pretrained)
            openclip.to_onnx(config, opset_version, visual_dir, textual_dir, no_cache=no_cache)
        case _:
            raise ValueError(f"Unsupported model source {model_source}")
--- a/machine-learning/export/immich_model_exporter/exporters/onnx/models/init.py
+++ b/machine-learning/export/immich_model_exporter/exporters/onnx/models/init.py
--- a/machine-learning/export/immich_model_exporter/exporters/onnx/models/mclip.py
+++ b/machine-learning/export/immich_model_exporter/exporters/onnx/models/mclip.py
@ -1,11 +1,6 @@
 import os
 import tempfile
 import warnings
 from pathlib import Path
-
+from typing import Any
 import torch
 from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
 from transformers import AutoTokenizer
 from .openclip import OpenCLIPModelConfig
 from .openclip import to_onnx as openclip_to_onnx
@ -21,25 +16,40 @@ _MCLIP_TO_OPENCLIP = {
 def to_onnx(
    model_name: str,
    opset_version: int,
    output_dir_visual: Path | str,
    output_dir_textual: Path | str,
    no_cache: bool = False,
 ) -> tuple[Path, Path]:
    textual_path = get_model_path(output_dir_textual)
-    with tempfile.TemporaryDirectory() as tmpdir:
+    if no_cache or not textual_path.exists():
-        model = MultilingualCLIP.from_pretrained(model_name, cache_dir=os.environ.get("CACHE_DIR", tmpdir))
+        import torch
        from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
        from transformers import AutoTokenizer
        torch.backends.mha.set_fastpath_enabled(False)
        model = MultilingualCLIP.from_pretrained(model_name)
        AutoTokenizer.from_pretrained(model_name).save_pretrained(output_dir_textual)
        model.eval()
        for param in model.parameters():
            param.requires_grad_(False)
-        export_text_encoder(model, textual_path)
+        _export_text_encoder(model, textual_path, opset_version)
-        visual_path, _ = openclip_to_onnx(_MCLIP_TO_OPENCLIP[model_name], output_dir_visual)
+    else:
-        assert visual_path is not None, "Visual model export failed"
+        print(f"Model {textual_path} already exists, skipping")
    visual_path, _ = openclip_to_onnx(
        _MCLIP_TO_OPENCLIP[model_name], opset_version, output_dir_visual, no_cache=no_cache
    )
    assert visual_path is not None, "Visual model export failed"
    return visual_path, textual_path
-def export_text_encoder(model: MultilingualCLIP, output_path: Path | str) -> None:
+def _export_text_encoder(model: Any, output_path: Path | str, opset_version: int) -> None:
    import torch
    from multilingual_clip.pt_multilingual_clip import MultilingualCLIP
    output_path = Path(output_path)
    def forward(self: MultilingualCLIP, input_ids: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
@ -61,7 +71,7 @@ def export_text_encoder(model: MultilingualCLIP, output_path: Path | str) -> Non
            output_path.as_posix(),
            input_names=["input_ids", "attention_mask"],
            output_names=["embedding"],
-            opset_version=17,
+            opset_version=opset_version,
            # dynamic_axes={
            #     "input_ids": {0: "batch_size", 1: "sequence_length"},
            #     "attention_mask": {0: "batch_size", 1: "sequence_length"},
--- a/machine-learning/export/immich_model_exporter/exporters/onnx/models/openclip.py
+++ b/machine-learning/export/immich_model_exporter/exporters/onnx/models/openclip.py
@ -0,0 +1,153 @@
 import warnings
 from dataclasses import dataclass
 from functools import cached_property
 from pathlib import Path
 from typing import Any
 from .util import get_model_path, save_config
@dataclass
 class OpenCLIPModelConfig:
    name: str
    pretrained: str
    @cached_property
    def model_config(self) -> dict[str, Any]:
        import open_clip
        config: dict[str, Any] | None = open_clip.get_model_config(self.name)
        if config is None:
            raise ValueError(f"Unknown model {self.name}")
        return config
    @property
    def image_size(self) -> int:
        image_size: int = self.model_config["vision_cfg"]["image_size"]
        return image_size
    @property
    def sequence_length(self) -> int:
        context_length: int = self.model_config["text_cfg"].get("context_length", 77)
        return context_length
 def to_onnx(
    model_cfg: OpenCLIPModelConfig,
    opset_version: int,
    output_dir_visual: Path | str | None = None,
    output_dir_textual: Path | str | None = None,
    no_cache: bool = False,
 ) -> tuple[Path | None, Path | None]:
    visual_path = None
    textual_path = None
    if output_dir_visual is not None:
        output_dir_visual = Path(output_dir_visual)
        visual_path = get_model_path(output_dir_visual)
    if output_dir_textual is not None:
        output_dir_textual = Path(output_dir_textual)
        textual_path = get_model_path(output_dir_textual)
    if not no_cache and (
        (textual_path is None or textual_path.exists()) and (visual_path is None or visual_path.exists())
    ):
        print(f"Models {textual_path} and {visual_path} already exist, skipping")
        return visual_path, textual_path
    import open_clip
    import torch
    from transformers import AutoTokenizer
    torch.backends.mha.set_fastpath_enabled(False)
    model = open_clip.create_model(
        model_cfg.name,
        pretrained=model_cfg.pretrained,
        jit=False,
        require_pretrained=True,
    )
    text_vision_cfg = open_clip.get_model_config(model_cfg.name)
    model.eval()
    for param in model.parameters():
        param.requires_grad_(False)
    if visual_path is not None and output_dir_visual is not None:
        if no_cache or not visual_path.exists():
            save_config(
                open_clip.get_model_preprocess_cfg(model),
                output_dir_visual / "preprocess_cfg.json",
            )
            save_config(text_vision_cfg, output_dir_visual.parent / "config.json")
            _export_image_encoder(model, model_cfg, visual_path, opset_version)
        else:
            print(f"Model {visual_path} already exists, skipping")
    if textual_path is not None and output_dir_textual is not None:
        if no_cache or not textual_path.exists():
            tokenizer_name = text_vision_cfg["text_cfg"].get("hf_tokenizer_name", "openai/clip-vit-base-patch32")
            AutoTokenizer.from_pretrained(tokenizer_name).save_pretrained(output_dir_textual)
            _export_text_encoder(model, model_cfg, textual_path, opset_version)
        else:
            print(f"Model {textual_path} already exists, skipping")
    return visual_path, textual_path
 def _export_image_encoder(
    model: Any, model_cfg: OpenCLIPModelConfig, output_path: Path | str, opset_version: int
 ) -> None:
    import torch
    output_path = Path(output_path)
    def encode_image(image: torch.Tensor) -> torch.Tensor:
        output = model.encode_image(image, normalize=True)
        assert isinstance(output, torch.Tensor)
        return output
    model.forward = encode_image
    args = (torch.randn(1, 3, model_cfg.image_size, model_cfg.image_size),)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", UserWarning)
        torch.onnx.export(
            model,
            args,
            output_path.as_posix(),
            input_names=["image"],
            output_names=["embedding"],
            opset_version=opset_version,
            # dynamic_axes={"image": {0: "batch_size"}},
        )
 def _export_text_encoder(
    model: Any, model_cfg: OpenCLIPModelConfig, output_path: Path | str, opset_version: int
 ) -> None:
    import torch
    output_path = Path(output_path)
    def encode_text(text: torch.Tensor) -> torch.Tensor:
        output = model.encode_text(text, normalize=True)
        assert isinstance(output, torch.Tensor)
        return output
    model.forward = encode_text
    args = (torch.ones(1, model_cfg.sequence_length, dtype=torch.int32),)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", UserWarning)
        torch.onnx.export(
            model,
            args,
            output_path.as_posix(),
            input_names=["text"],
            output_names=["embedding"],
            opset_version=opset_version,
            # dynamic_axes={"text": {0: "batch_size"}},
        )
--- a/machine-learning/export/immich_model_exporter/exporters/onnx/models/util.py
+++ b/machine-learning/export/immich_model_exporter/exporters/onnx/models/util.py
--- a/machine-learning/export/immich_model_exporter/exporters/rknn.py
+++ b/machine-learning/export/immich_model_exporter/exporters/rknn.py
@ -0,0 +1,96 @@
 from pathlib import Path
 from .constants import RKNN_SOCS
 def _export_platform(
    model_dir: Path,
    target_platform: str,
    inputs: list[str] | None = None,
    input_size_list: list[list[int]] | None = None,
    fuse_matmul_softmax_matmul_to_sdpa: bool = True,
    no_cache: bool = False,
 ) -> None:
    from rknn.api import RKNN
    input_path = model_dir / "model.onnx"
    output_path = model_dir / "rknpu" / target_platform / "model.rknn"
    if not no_cache and output_path.exists():
        print(f"Model {input_path} already exists at {output_path}, skipping")
        return
    print(f"Exporting model {input_path} to {output_path}")
    rknn = RKNN(verbose=False)
    rknn.config(
        target_platform=target_platform,
        disable_rules=["fuse_matmul_softmax_matmul_to_sdpa"] if not fuse_matmul_softmax_matmul_to_sdpa else [],
        enable_flash_attention=False,
        model_pruning=True,
    )
    ret = rknn.load_onnx(model=input_path.as_posix(), inputs=inputs, input_size_list=input_size_list)
    if ret != 0:
        raise RuntimeError("Load failed!")
    ret = rknn.build(do_quantization=False)
    if ret != 0:
        raise RuntimeError("Build failed!")
    output_path.parent.mkdir(parents=True, exist_ok=True)
    ret = rknn.export_rknn(output_path.as_posix())
    if ret != 0:
        raise RuntimeError("Export rknn model failed!")
 def _export_platforms(
    model_dir: Path,
    inputs: list[str] | None = None,
    input_size_list: list[list[int]] | None = None,
    no_cache: bool = False,
 ) -> None:
    fuse_matmul_softmax_matmul_to_sdpa = True
    for soc in RKNN_SOCS:
        try:
            _export_platform(
                model_dir,
                soc,
                inputs=inputs,
                input_size_list=input_size_list,
                fuse_matmul_softmax_matmul_to_sdpa=fuse_matmul_softmax_matmul_to_sdpa,
                no_cache=no_cache,
            )
        except Exception as e:
            print(f"Failed to export model for {soc}: {e}")
            if "inputs or 'outputs' must be set" in str(e):
                print("Retrying without fuse_matmul_softmax_matmul_to_sdpa")
                fuse_matmul_softmax_matmul_to_sdpa = False
                _export_platform(
                    model_dir,
                    soc,
                    inputs=inputs,
                    input_size_list=input_size_list,
                    fuse_matmul_softmax_matmul_to_sdpa=fuse_matmul_softmax_matmul_to_sdpa,
                    no_cache=no_cache,
                )
 def export(model_dir: Path, no_cache: bool = False) -> None:
    textual = model_dir / "textual"
    visual = model_dir / "visual"
    detection = model_dir / "detection"
    recognition = model_dir / "recognition"
    if textual.is_dir():
        _export_platforms(textual, no_cache=no_cache)
    if visual.is_dir():
        _export_platforms(visual, no_cache=no_cache)
    if detection.is_dir():
        _export_platforms(detection, inputs=["input.1"], input_size_list=[[1, 3, 640, 640]], no_cache=no_cache)
    if recognition.is_dir():
        _export_platforms(recognition, inputs=["input.1"], input_size_list=[[1, 3, 112, 112]], no_cache=no_cache)
--- a/machine-learning/export/immich_model_exporter/run.py
+++ b/machine-learning/export/immich_model_exporter/run.py
@ -0,0 +1,88 @@
 import subprocess
 from exporters.constants import ModelSource
 mclip = [
    "M-CLIP/LABSE-Vit-L-14",
    "M-CLIP/XLM-Roberta-Large-Vit-B-16Plus",
    "M-CLIP/XLM-Roberta-Large-Vit-B-32",
    "M-CLIP/XLM-Roberta-Large-Vit-L-14",
 ]
 openclip = [
    "RN101__openai",
    "RN101__yfcc15m",
    "RN50__cc12m",
    "RN50__openai",
    "RN50__yfcc15m",
    "RN50x16__openai",
    "RN50x4__openai",
    "RN50x64__openai",
    "ViT-B-16-SigLIP-256__webli",
    "ViT-B-16-SigLIP-384__webli",
    "ViT-B-16-SigLIP-512__webli",
    "ViT-B-16-SigLIP-i18n-256__webli",
    "ViT-B-16-SigLIP2__webli",
    "ViT-B-16-SigLIP__webli",
    "ViT-B-16-plus-240__laion400m_e31",
    "ViT-B-16-plus-240__laion400m_e32",
    "ViT-B-16__laion400m_e31",
    "ViT-B-16__laion400m_e32",
    "ViT-B-16__openai",
    "ViT-B-32-SigLIP2-256__webli",
    "ViT-B-32__laion2b-s34b-b79k",
    "ViT-B-32__laion2b_e16",
    "ViT-B-32__laion400m_e31",
    "ViT-B-32__laion400m_e32",
    "ViT-B-32__openai",
    "ViT-H-14-378-quickgelu__dfn5b",
    "ViT-H-14-quickgelu__dfn5b",
    "ViT-H-14__laion2b-s32b-b79k",
    "ViT-L-14-336__openai",
    "ViT-L-14-quickgelu__dfn2b",
    "ViT-L-14__laion2b-s32b-b82k",
    "ViT-L-14__laion400m_e31",
    "ViT-L-14__laion400m_e32",
    "ViT-L-14__openai",
    "ViT-L-16-SigLIP-256__webli",
    "ViT-L-16-SigLIP-384__webli",
    "ViT-L-16-SigLIP2-256__webli",
    "ViT-L-16-SigLIP2-384__webli",
    "ViT-L-16-SigLIP2-512__webli",
    "ViT-SO400M-14-SigLIP-384__webli",
    "ViT-SO400M-14-SigLIP2-378__webli",
    "ViT-SO400M-14-SigLIP2__webli",
    "ViT-SO400M-16-SigLIP2-256__webli",
    "ViT-SO400M-16-SigLIP2-384__webli",
    "ViT-SO400M-16-SigLIP2-512__webli",
    "ViT-gopt-16-SigLIP2-256__webli",
    "ViT-gopt-16-SigLIP2-384__webli",
    "nllb-clip-base-siglip__mrl",
    "nllb-clip-base-siglip__v1",
    "nllb-clip-large-siglip__mrl",
    "nllb-clip-large-siglip__v1",
    "xlm-roberta-base-ViT-B-32__laion5b_s13b_b90k",
    "xlm-roberta-large-ViT-H-14__frozen_laion5b_s13b_b90k",
 ]
 insightface = [
    "antelopev2",
    "buffalo_l",
    "buffalo_m",
    "buffalo_s",
 ]
 def export_models(models: list[str], source: ModelSource) -> None:
    for model in models:
        try:
            print(f"Exporting model {model}")
            subprocess.check_call(["python", "-m", "immich_model_exporter.export", model, source])
        except Exception as e:
            print(f"Failed to export model {model}: {e}")
 if __name__ == "__main__":
    export_models(mclip, ModelSource.MCLIP)
    export_models(openclip, ModelSource.OPENCLIP)
    export_models(insightface, ModelSource.INSIGHTFACE)
--- a/machine-learning/export/models/openclip.py
+++ b/machine-learning/export/models/openclip.py
@ -1,114 +0,0 @@
 import os
 import tempfile
 import warnings
 from dataclasses import dataclass, field
 from pathlib import Path
 import open_clip
 import torch
 from transformers import AutoTokenizer
 from .util import get_model_path, save_config
@dataclass
 class OpenCLIPModelConfig:
    name: str
    pretrained: str
    image_size: int = field(init=False)
    sequence_length: int = field(init=False)
    def __post_init__(self) -> None:
        open_clip_cfg = open_clip.get_model_config(self.name)
        if open_clip_cfg is None:
            raise ValueError(f"Unknown model {self.name}")
        self.image_size = open_clip_cfg["vision_cfg"]["image_size"]
        self.sequence_length = open_clip_cfg["text_cfg"].get("context_length", 77)
 def to_onnx(
    model_cfg: OpenCLIPModelConfig,
    output_dir_visual: Path | str | None = None,
    output_dir_textual: Path | str | None = None,
 ) -> tuple[Path | None, Path | None]:
    visual_path = None
    textual_path = None
    with tempfile.TemporaryDirectory() as tmpdir:
        model = open_clip.create_model(
            model_cfg.name,
            pretrained=model_cfg.pretrained,
            jit=False,
            cache_dir=os.environ.get("CACHE_DIR", tmpdir),
            require_pretrained=True,
        )
        text_vision_cfg = open_clip.get_model_config(model_cfg.name)
        model.eval()
        for param in model.parameters():
            param.requires_grad_(False)
        if output_dir_visual is not None:
            output_dir_visual = Path(output_dir_visual)
            visual_path = get_model_path(output_dir_visual)
            save_config(open_clip.get_model_preprocess_cfg(model), output_dir_visual / "preprocess_cfg.json")
            save_config(text_vision_cfg, output_dir_visual.parent / "config.json")
            export_image_encoder(model, model_cfg, visual_path)
        if output_dir_textual is not None:
            output_dir_textual = Path(output_dir_textual)
            textual_path = get_model_path(output_dir_textual)
            tokenizer_name = text_vision_cfg["text_cfg"].get("hf_tokenizer_name", "openai/clip-vit-base-patch32")
            AutoTokenizer.from_pretrained(tokenizer_name).save_pretrained(output_dir_textual)
            export_text_encoder(model, model_cfg, textual_path)
    return visual_path, textual_path
 def export_image_encoder(model: open_clip.CLIP, model_cfg: OpenCLIPModelConfig, output_path: Path | str) -> None:
    output_path = Path(output_path)
    def encode_image(image: torch.Tensor) -> torch.Tensor:
        output = model.encode_image(image, normalize=True)
        assert isinstance(output, torch.Tensor)
        return output
    args = (torch.randn(1, 3, model_cfg.image_size, model_cfg.image_size),)
    traced = torch.jit.trace(encode_image, args)  # type: ignore[no-untyped-call]
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", UserWarning)
        torch.onnx.export(
            traced,
            args,
            output_path.as_posix(),
            input_names=["image"],
            output_names=["embedding"],
            opset_version=17,
            # dynamic_axes={"image": {0: "batch_size"}},
        )
 def export_text_encoder(model: open_clip.CLIP, model_cfg: OpenCLIPModelConfig, output_path: Path | str) -> None:
    output_path = Path(output_path)
    def encode_text(text: torch.Tensor) -> torch.Tensor:
        output = model.encode_text(text, normalize=True)
        assert isinstance(output, torch.Tensor)
        return output
    args = (torch.ones(1, model_cfg.sequence_length, dtype=torch.int32),)
    traced = torch.jit.trace(encode_text, args)  # type: ignore[no-untyped-call]
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", UserWarning)
        torch.onnx.export(
            traced,
            args,
            output_path.as_posix(),
            input_names=["text"],
            output_names=["embedding"],
            opset_version=17,
            # dynamic_axes={"text": {0: "batch_size"}},
        )
--- a/machine-learning/export/models/optimize.py
+++ b/machine-learning/export/models/optimize.py
@ -1,49 +0,0 @@
 from pathlib import Path
 import onnx
 import onnxruntime as ort
 import onnxsim
 def save_onnx(model: onnx.ModelProto, output_path: Path | str) -> None:
    try:
        onnx.save(model, output_path)
    except ValueError as e:
        if "The proto size is larger than the 2 GB limit." in str(e):
            onnx.save(model, output_path, save_as_external_data=True, size_threshold=1_000_000)
        else:
            raise e
 def optimize_onnxsim(model_path: Path | str, output_path: Path | str) -> None:
    model_path = Path(model_path)
    output_path = Path(output_path)
    model = onnx.load(model_path.as_posix())
    model, check = onnxsim.simplify(model)
    assert check, "Simplified ONNX model could not be validated"
    for file in model_path.parent.iterdir():
        if file.name.startswith("Constant") or "onnx" in file.name or file.suffix == ".weight":
            file.unlink()
    save_onnx(model, output_path)
 def optimize_ort(
    model_path: Path | str,
    output_path: Path | str,
    level: ort.GraphOptimizationLevel = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC,
 ) -> None:
    model_path = Path(model_path)
    output_path = Path(output_path)
    sess_options = ort.SessionOptions()
    sess_options.graph_optimization_level = level
    sess_options.optimized_model_filepath = output_path.as_posix()
    ort.InferenceSession(model_path.as_posix(), providers=["CPUExecutionProvider"], sess_options=sess_options)
 def optimize(model_path: Path | str) -> None:
    model_path = Path(model_path)
    optimize_ort(model_path, model_path)
    optimize_onnxsim(model_path, model_path)
--- a/machine-learning/export/pyproject.toml
+++ b/machine-learning/export/pyproject.toml
@ -0,0 +1,67 @@
 [project]
 name = "immich_model_exporter"
 version = "0.1.0"
 description = "Add your description here"
 readme = "README.md"
 requires-python = ">=3.10, <4.0"
 dependencies = [
  "huggingface-hub>=0.29.3",
  "multilingual-clip>=1.0.10",
  "onnx>=1.14.1",
  "onnxruntime>=1.16.0",
  "open-clip-torch>=2.31.0",
  "typer>=0.15.2",
  "rknn-toolkit2>=2.3.0",
  "transformers>=4.49.0",
  "tenacity>=9.0.0",
 ]
 [dependency-groups]
 dev = ["black>=23.3.0", "mypy>=1.3.0", "ruff>=0.0.272"]
 [tool.uv]
 override-dependencies = [
  "onnx>=1.16.0,<2",
  "onnxruntime>=1.18.2,<2",
  "torch>=2.4",
  "torchvision>=0.21",
 ]
 [tool.uv.sources]
 torch = [{ index = "pytorch-cpu" }]
 torchvision = [{ index = "pytorch-cpu" }]
 [[tool.uv.index]]
 name = "pytorch-cpu"
 url = "https://download.pytorch.org/whl/cpu"
 explicit = true
 [tool.hatch.build.targets.sdist]
 include = ["immich_model_exporter"]
 [tool.hatch.build.targets.wheel]
 include = ["immich_model_exporter"]
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"
 [tool.mypy]
 python_version = "3.12"
 follow_imports = "silent"
 warn_redundant_casts = true
 disallow_any_generics = true
 check_untyped_defs = true
 disallow_untyped_defs = true
 ignore_missing_imports = true
 [tool.ruff]
 line-length = 120
 target-version = "py312"
 [tool.ruff.lint]
 select = ["E", "F", "I"]
 [tool.black]
 line-length = 120
 target-version = ['py312']
--- a/machine-learning/export/run.py
+++ b/machine-learning/export/run.py
@ -1,113 +0,0 @@
 import gc
 import os
 from pathlib import Path
 from tempfile import TemporaryDirectory
 import torch
 from huggingface_hub import create_repo, upload_folder
 from models import mclip, openclip
 from models.optimize import optimize
 from rich.progress import Progress
 models = [
    "M-CLIP/LABSE-Vit-L-14",
    "M-CLIP/XLM-Roberta-Large-Vit-B-16Plus",
    "M-CLIP/XLM-Roberta-Large-Vit-B-32",
    "M-CLIP/XLM-Roberta-Large-Vit-L-14",
    "RN101::openai",
    "RN101::yfcc15m",
    "RN50::cc12m",
    "RN50::openai",
    "RN50::yfcc15m",
    "RN50x16::openai",
    "RN50x4::openai",
    "RN50x64::openai",
    "ViT-B-16-SigLIP-256::webli",
    "ViT-B-16-SigLIP-384::webli",
    "ViT-B-16-SigLIP-512::webli",
    "ViT-B-16-SigLIP-i18n-256::webli",
    "ViT-B-16-SigLIP::webli",
    "ViT-B-16-plus-240::laion400m_e31",
    "ViT-B-16-plus-240::laion400m_e32",
    "ViT-B-16::laion400m_e31",
    "ViT-B-16::laion400m_e32",
    "ViT-B-16::openai",
    "ViT-B-32::laion2b-s34b-b79k",
    "ViT-B-32::laion2b_e16",
    "ViT-B-32::laion400m_e31",
    "ViT-B-32::laion400m_e32",
    "ViT-B-32::openai",
    "ViT-H-14-378-quickgelu::dfn5b",
    "ViT-H-14-quickgelu::dfn5b",
    "ViT-H-14::laion2b-s32b-b79k",
    "ViT-L-14-336::openai",
    "ViT-L-14-quickgelu::dfn2b",
    "ViT-L-14::laion2b-s32b-b82k",
    "ViT-L-14::laion400m_e31",
    "ViT-L-14::laion400m_e32",
    "ViT-L-14::openai",
    "ViT-L-16-SigLIP-256::webli",
    "ViT-L-16-SigLIP-384::webli",
    "ViT-SO400M-14-SigLIP-384::webli",
    "ViT-g-14::laion2b-s12b-b42k",
    "nllb-clip-base-siglip::mrl",
    "nllb-clip-base-siglip::v1",
    "nllb-clip-large-siglip::mrl",
    "nllb-clip-large-siglip::v1",
    "xlm-roberta-base-ViT-B-32::laion5b_s13b_b90k",
    "xlm-roberta-large-ViT-H-14::frozen_laion5b_s13b_b90k",
 ]
 # glob to delete old UUID blobs when reuploading models
 uuid_char = "[a-fA-F0-9]"
 uuid_glob = uuid_char * 8 + "-" + uuid_char * 4 + "-" + uuid_char * 4 + "-" + uuid_char * 4 + "-" + uuid_char * 12
 # remote repo files to be deleted before uploading
 # deletion is in the same commit as the upload, so it's atomic
 delete_patterns = ["**/*onnx*", "**/Constant*", "**/*.weight", "**/*.bias", f"**/{uuid_glob}"]
 with Progress() as progress:
    task = progress.add_task("[green]Exporting models...", total=len(models))
    token = os.environ.get("HF_AUTH_TOKEN")
    torch.backends.mha.set_fastpath_enabled(False)
    with TemporaryDirectory() as tmp:
        tmpdir = Path(tmp)
        for model in models:
            model_name = model.split("/")[-1].replace("::", "__")
            hf_model_name = model_name.replace("xlm-roberta-large", "XLM-Roberta-Large")
            hf_model_name = model_name.replace("xlm-roberta-base", "XLM-Roberta-Base")
            config_path = tmpdir / model_name / "config.json"
            def export() -> None:
                progress.update(task, description=f"[green]Exporting {hf_model_name}")
                visual_dir = tmpdir / hf_model_name / "visual"
                textual_dir = tmpdir / hf_model_name / "textual"
                if model.startswith("M-CLIP"):
                    visual_path, textual_path = mclip.to_onnx(model, visual_dir, textual_dir)
                else:
                    name, _, pretrained = model_name.partition("__")
                    config = openclip.OpenCLIPModelConfig(name, pretrained)
                    visual_path, textual_path = openclip.to_onnx(config, visual_dir, textual_dir)
                progress.update(task, description=f"[green]Optimizing {hf_model_name} (visual)")
                optimize(visual_path)
                progress.update(task, description=f"[green]Optimizing {hf_model_name} (textual)")
                optimize(textual_path)
                gc.collect()
            def upload() -> None:
                progress.update(task, description=f"[yellow]Uploading {hf_model_name}")
                repo_id = f"immich-app/{hf_model_name}"
                create_repo(repo_id, exist_ok=True)
                upload_folder(
                    repo_id=repo_id,
                    folder_path=tmpdir / hf_model_name,
                    delete_patterns=delete_patterns,
                    token=token,
                )
            export()
            if token is not None:
                upload()
            progress.update(task, advance=1)
--- a/machine-learning/export/uv.lock
+++ b/machine-learning/export/uv.lock
--- a/machine-learning/pyproject.toml
+++ b/machine-learning/pyproject.toml
@ -51,6 +51,7 @@ cpu = ["onnxruntime>=1.15.0,<2"]
 cuda = ["onnxruntime-gpu>=1.17.0,<2"]
 openvino = ["onnxruntime-openvino>=1.17.1,<1.19.0"]
 armnn = ["onnxruntime>=1.15.0,<2"]
 rknn = ["onnxruntime>=1.15.0,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
 [tool.uv]
 compile-bytecode = true
--- a/machine-learning/uv.lock
+++ b/machine-learning/uv.lock
@ -1109,6 +1109,10 @@ cuda = [
 openvino = [
    { name = "onnxruntime-openvino" },
 ]
 rknn = [
    { name = "onnxruntime" },
    { name = "rknn-toolkit-lite2" },
 ]
 [package.dev-dependencies]
 dev = [
@ -1162,6 +1166,7 @@ requires-dist = [
    { name = "insightface", specifier = ">=0.7.3,<1.0" },
    { name = "onnxruntime", marker = "extra == 'armnn'", specifier = ">=1.15.0,<2" },
    { name = "onnxruntime", marker = "extra == 'cpu'", specifier = ">=1.15.0,<2" },
    { name = "onnxruntime", marker = "extra == 'rknn'", specifier = ">=1.15.0,<2" },
    { name = "onnxruntime-gpu", marker = "extra == 'cuda'", specifier = ">=1.17.0,<2", index = "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/" },
    { name = "onnxruntime-openvino", marker = "extra == 'openvino'", specifier = ">=1.17.1,<1.19.0" },
    { name = "opencv-python-headless", specifier = ">=4.7.0.72,<5.0" },
@ -1171,10 +1176,11 @@ requires-dist = [
    { name = "pydantic-settings", specifier = ">=2.5.2,<3" },
    { name = "python-multipart", specifier = ">=0.0.6,<1.0" },
    { name = "rich", specifier = ">=13.4.2" },
    { name = "rknn-toolkit-lite2", marker = "extra == 'rknn'", specifier = ">=2.3.0,<3" },
    { name = "tokenizers", specifier = ">=0.15.0,<1.0" },
    { name = "uvicorn", extras = ["standard"], specifier = ">=0.22.0,<1.0" },
 ]
-provides-extras = ["cpu", "cuda", "openvino", "armnn"]
+provides-extras = ["cpu", "cuda", "openvino", "armnn", "rknn"]
 [package.metadata.requires-dev]
 dev = [
@ -2131,6 +2137,77 @@ wheels = [
    { url = "https://files.pythonhosted.org/packages/19/71/39c7c0d87f8d4e6c020a393182060eaefeeae6c01dab6a84ec346f2567df/rich-13.9.4-py3-none-any.whl", hash = "sha256:6049d5e6ec054bf2779ab3358186963bac2ea89175919d699e378b99738c2a90", size = 242424 },
 ]
 [[package]]
 name = "rknn-toolkit-lite2"
 version = "2.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "numpy" },
    { name = "psutil" },
    { name = "ruamel-yaml" },
 ]
 wheels = [
    { url = "https://files.pythonhosted.org/packages/ed/77/6af374a4a8cd2aee762a1fb8a3050dcf3f129134bbdc4bb6bed755c4325b/rknn_toolkit_lite2-2.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b6733689bd09a262bcb6ba4744e690dd4b37ebeac4ed427cf45242c4b4ce9a4", size = 559372 },
    { url = "https://files.pythonhosted.org/packages/9b/0c/76ff1eb09d09ce4394a6959d2343a321d28dd9e604348ffdafceafdc344c/rknn_toolkit_lite2-2.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3e4fefe355dc34a155680e4bcb9e4abb37ebc271f045ec9e0a4a3a018bc5beb", size = 569149 },
    { url = "https://files.pythonhosted.org/packages/0d/6e/8679562028051b02312212defc6e8c07248953f10dd7ad506e941b575bf3/rknn_toolkit_lite2-2.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37394371d1561f470c553f39869d7c35ff93405dffe3d0d72babf297a2b0aee9", size = 527457 },
 ]
 [[package]]
 name = "ruamel-yaml"
 version = "0.18.10"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
    { name = "ruamel-yaml-clib", marker = "python_full_version < '3.13' and platform_python_implementation == 'CPython'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ea/46/f44d8be06b85bc7c4d8c95d658be2b68f27711f279bf9dd0612a5e4794f5/ruamel.yaml-0.18.10.tar.gz", hash = "sha256:20c86ab29ac2153f80a428e1254a8adf686d3383df04490514ca3b79a362db58", size = 143447 }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/c2/36/dfc1ebc0081e6d39924a2cc53654497f967a084a436bb64402dfce4254d9/ruamel.yaml-0.18.10-py3-none-any.whl", hash = "sha256:30f22513ab2301b3d2b577adc121c6471f28734d3d9728581245f1e76468b4f1", size = 117729 },
 ]
 [[package]]
 name = "ruamel-yaml-clib"
 version = "0.2.12"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/20/84/80203abff8ea4993a87d823a5f632e4d92831ef75d404c9fc78d0176d2b5/ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f", size = 225315 }
 wheels = [
    { url = "https://files.pythonhosted.org/packages/70/57/40a958e863e299f0c74ef32a3bde9f2d1ea8d69669368c0c502a0997f57f/ruamel.yaml.clib-0.2.12-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:11f891336688faf5156a36293a9c362bdc7c88f03a8a027c2c1d8e0bcde998e5", size = 131301 },
    { url = "https://files.pythonhosted.org/packages/98/a8/29a3eb437b12b95f50a6bcc3d7d7214301c6c529d8fdc227247fa84162b5/ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:a606ef75a60ecf3d924613892cc603b154178ee25abb3055db5062da811fd969", size = 633728 },
    { url = "https://files.pythonhosted.org/packages/35/6d/ae05a87a3ad540259c3ad88d71275cbd1c0f2d30ae04c65dcbfb6dcd4b9f/ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd5415dded15c3822597455bc02bcd66e81ef8b7a48cb71a33628fc9fdde39df", size = 722230 },
    { url = "https://files.pythonhosted.org/packages/7f/b7/20c6f3c0b656fe609675d69bc135c03aac9e3865912444be6339207b6648/ruamel.yaml.clib-0.2.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f66efbc1caa63c088dead1c4170d148eabc9b80d95fb75b6c92ac0aad2437d76", size = 686712 },
    { url = "https://files.pythonhosted.org/packages/cd/11/d12dbf683471f888d354dac59593873c2b45feb193c5e3e0f2ebf85e68b9/ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:22353049ba4181685023b25b5b51a574bce33e7f51c759371a7422dcae5402a6", size = 663936 },
    { url = "https://files.pythonhosted.org/packages/72/14/4c268f5077db5c83f743ee1daeb236269fa8577133a5cfa49f8b382baf13/ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:932205970b9f9991b34f55136be327501903f7c66830e9760a8ffb15b07f05cd", size = 696580 },
    { url = "https://files.pythonhosted.org/packages/30/fc/8cd12f189c6405a4c1cf37bd633aa740a9538c8e40497c231072d0fef5cf/ruamel.yaml.clib-0.2.12-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a52d48f4e7bf9005e8f0a89209bf9a73f7190ddf0489eee5eb51377385f59f2a", size = 663393 },
    { url = "https://files.pythonhosted.org/packages/80/29/c0a017b704aaf3cbf704989785cd9c5d5b8ccec2dae6ac0c53833c84e677/ruamel.yaml.clib-0.2.12-cp310-cp310-win32.whl", hash = "sha256:3eac5a91891ceb88138c113f9db04f3cebdae277f5d44eaa3651a4f573e6a5da", size = 100326 },
    { url = "https://files.pythonhosted.org/packages/3a/65/fa39d74db4e2d0cd252355732d966a460a41cd01c6353b820a0952432839/ruamel.yaml.clib-0.2.12-cp310-cp310-win_amd64.whl", hash = "sha256:ab007f2f5a87bd08ab1499bdf96f3d5c6ad4dcfa364884cb4549aa0154b13a28", size = 118079 },
    { url = "https://files.pythonhosted.org/packages/fb/8f/683c6ad562f558cbc4f7c029abcd9599148c51c54b5ef0f24f2638da9fbb/ruamel.yaml.clib-0.2.12-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:4a6679521a58256a90b0d89e03992c15144c5f3858f40d7c18886023d7943db6", size = 132224 },
    { url = "https://files.pythonhosted.org/packages/3c/d2/b79b7d695e2f21da020bd44c782490578f300dd44f0a4c57a92575758a76/ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:d84318609196d6bd6da0edfa25cedfbabd8dbde5140a0a23af29ad4b8f91fb1e", size = 641480 },
    { url = "https://files.pythonhosted.org/packages/68/6e/264c50ce2a31473a9fdbf4fa66ca9b2b17c7455b31ef585462343818bd6c/ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bb43a269eb827806502c7c8efb7ae7e9e9d0573257a46e8e952f4d4caba4f31e", size = 739068 },
    { url = "https://files.pythonhosted.org/packages/86/29/88c2567bc893c84d88b4c48027367c3562ae69121d568e8a3f3a8d363f4d/ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:811ea1594b8a0fb466172c384267a4e5e367298af6b228931f273b111f17ef52", size = 703012 },
    { url = "https://files.pythonhosted.org/packages/11/46/879763c619b5470820f0cd6ca97d134771e502776bc2b844d2adb6e37753/ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:cf12567a7b565cbf65d438dec6cfbe2917d3c1bdddfce84a9930b7d35ea59642", size = 704352 },
    { url = "https://files.pythonhosted.org/packages/02/80/ece7e6034256a4186bbe50dee28cd032d816974941a6abf6a9d65e4228a7/ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:7dd5adc8b930b12c8fc5b99e2d535a09889941aa0d0bd06f4749e9a9397c71d2", size = 737344 },
    { url = "https://files.pythonhosted.org/packages/f0/ca/e4106ac7e80efbabdf4bf91d3d32fc424e41418458251712f5672eada9ce/ruamel.yaml.clib-0.2.12-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:1492a6051dab8d912fc2adeef0e8c72216b24d57bd896ea607cb90bb0c4981d3", size = 714498 },
    { url = "https://files.pythonhosted.org/packages/67/58/b1f60a1d591b771298ffa0428237afb092c7f29ae23bad93420b1eb10703/ruamel.yaml.clib-0.2.12-cp311-cp311-win32.whl", hash = "sha256:bd0a08f0bab19093c54e18a14a10b4322e1eacc5217056f3c063bd2f59853ce4", size = 100205 },
    { url = "https://files.pythonhosted.org/packages/b4/4f/b52f634c9548a9291a70dfce26ca7ebce388235c93588a1068028ea23fcc/ruamel.yaml.clib-0.2.12-cp311-cp311-win_amd64.whl", hash = "sha256:a274fb2cb086c7a3dea4322ec27f4cb5cc4b6298adb583ab0e211a4682f241eb", size = 118185 },
    { url = "https://files.pythonhosted.org/packages/48/41/e7a405afbdc26af961678474a55373e1b323605a4f5e2ddd4a80ea80f628/ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632", size = 133433 },
    { url = "https://files.pythonhosted.org/packages/ec/b0/b850385604334c2ce90e3ee1013bd911aedf058a934905863a6ea95e9eb4/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:943f32bc9dedb3abff9879edc134901df92cfce2c3d5c9348f172f62eb2d771d", size = 647362 },
    { url = "https://files.pythonhosted.org/packages/44/d0/3f68a86e006448fb6c005aee66565b9eb89014a70c491d70c08de597f8e4/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c3829bb364fdb8e0332c9931ecf57d9be3519241323c5274bd82f709cebc0c", size = 754118 },
    { url = "https://files.pythonhosted.org/packages/52/a9/d39f3c5ada0a3bb2870d7db41901125dbe2434fa4f12ca8c5b83a42d7c53/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd", size = 706497 },
    { url = "https://files.pythonhosted.org/packages/b0/fa/097e38135dadd9ac25aecf2a54be17ddf6e4c23e43d538492a90ab3d71c6/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31", size = 698042 },
    { url = "https://files.pythonhosted.org/packages/ec/d5/a659ca6f503b9379b930f13bc6b130c9f176469b73b9834296822a83a132/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680", size = 745831 },
    { url = "https://files.pythonhosted.org/packages/db/5d/36619b61ffa2429eeaefaab4f3374666adf36ad8ac6330d855848d7d36fd/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b82a7c94a498853aa0b272fd5bc67f29008da798d4f93a2f9f289feb8426a58d", size = 715692 },
    { url = "https://files.pythonhosted.org/packages/b1/82/85cb92f15a4231c89b95dfe08b09eb6adca929ef7df7e17ab59902b6f589/ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5", size = 98777 },
    { url = "https://files.pythonhosted.org/packages/d7/8f/c3654f6f1ddb75daf3922c3d8fc6005b1ab56671ad56ffb874d908bfa668/ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4", size = 115523 },
    { url = "https://files.pythonhosted.org/packages/29/00/4864119668d71a5fa45678f380b5923ff410701565821925c69780356ffa/ruamel.yaml.clib-0.2.12-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:4c8c5d82f50bb53986a5e02d1b3092b03622c02c2eb78e29bec33fd9593bae1a", size = 132011 },
    { url = "https://files.pythonhosted.org/packages/7f/5e/212f473a93ae78c669ffa0cb051e3fee1139cb2d385d2ae1653d64281507/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux2014_aarch64.whl", hash = "sha256:e7e3736715fbf53e9be2a79eb4db68e4ed857017344d697e8b9749444ae57475", size = 642488 },
    { url = "https://files.pythonhosted.org/packages/1f/8f/ecfbe2123ade605c49ef769788f79c38ddb1c8fa81e01f4dbf5cf1a44b16/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b7e75b4965e1d4690e93021adfcecccbca7d61c7bddd8e22406ef2ff20d74ef", size = 745066 },
    { url = "https://files.pythonhosted.org/packages/e2/a9/28f60726d29dfc01b8decdb385de4ced2ced9faeb37a847bd5cf26836815/ruamel.yaml.clib-0.2.12-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:96777d473c05ee3e5e3c3e999f5d23c6f4ec5b0c38c098b3a5229085f74236c6", size = 701785 },
    { url = "https://files.pythonhosted.org/packages/84/7e/8e7ec45920daa7f76046578e4f677a3215fe8f18ee30a9cb7627a19d9b4c/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:3bc2a80e6420ca8b7d3590791e2dfc709c88ab9152c00eeb511c9875ce5778bf", size = 693017 },
    { url = "https://files.pythonhosted.org/packages/c5/b3/d650eaade4ca225f02a648321e1ab835b9d361c60d51150bac49063b83fa/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e188d2699864c11c36cdfdada94d781fd5d6b0071cd9c427bceb08ad3d7c70e1", size = 741270 },
    { url = "https://files.pythonhosted.org/packages/87/b8/01c29b924dcbbed75cc45b30c30d565d763b9c4d540545a0eeecffb8f09c/ruamel.yaml.clib-0.2.12-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4f6f3eac23941b32afccc23081e1f50612bdbe4e982012ef4f5797986828cd01", size = 709059 },
    { url = "https://files.pythonhosted.org/packages/30/8c/ed73f047a73638257aa9377ad356bea4d96125b305c34a28766f4445cc0f/ruamel.yaml.clib-0.2.12-cp313-cp313-win32.whl", hash = "sha256:6442cb36270b3afb1b4951f060eccca1ce49f3d087ca1ca4563a6eb479cb3de6", size = 98583 },
    { url = "https://files.pythonhosted.org/packages/b0/85/e8e751d8791564dd333d5d9a4eab0a7a115f7e349595417fd50ecae3395c/ruamel.yaml.clib-0.2.12-cp313-cp313-win_amd64.whl", hash = "sha256:e5b8daf27af0b90da7bb903a876477a9e6d7270be6146906b276605997c7e9a3", size = 115190 },
 ]
 [[package]]
 name = "ruff"
 version = "0.9.9"
--- a/server/src/constants.ts
+++ b/server/src/constants.ts
@ -96,6 +96,18 @@ export const CLIP_MODEL_INFO: Record<string, ModelInfo> = {
  'ViT-SO400M-14-SigLIP-384__webli': { dimSize: 1152 },
  'nllb-clip-large-siglip__mrl': { dimSize: 1152 },
  'nllb-clip-large-siglip__v1': { dimSize: 1152 },
  'ViT-B-16-SigLIP2__webli': { dimSize: 768 },
  'ViT-B-32-SigLIP2-256__webli': { dimSize: 768 },
  'ViT-L-16-SigLIP2-256__webli': { dimSize: 1024 },
  'ViT-L-16-SigLIP2-384__webli': { dimSize: 1024 },
  'ViT-L-16-SigLIP2-512__webli': { dimSize: 1024 },
  'ViT-SO400M-14-SigLIP2__webli': { dimSize: 1152 },
  'ViT-SO400M-14-SigLIP2-378__webli': { dimSize: 1152 },
  'ViT-SO400M-16-SigLIP2-256__webli': { dimSize: 1152 },
  'ViT-SO400M-16-SigLIP2-384__webli': { dimSize: 1152 },
  'ViT-SO400M-16-SigLIP2-512__webli': { dimSize: 1152 },
  'ViT-gopt-16-SigLIP2-256__webli': { dimSize: 1536 },
  'ViT-gopt-16-SigLIP2-384__webli': { dimSize: 1536 },
 };
 type SharpRotationData = {