Savely Krasovsky 2025-12-10 18:13:11 +07:00 committed by GitHub
commit 585ccfb736
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 748 additions and 1194 deletions

@ -8,6 +8,9 @@ indent_size = 2
insert_final_newline = true
trim_trailing_whitespace = true
[*.py]
indent_size = 4
[*.{ts,js}]
quote_type = single

@ -574,9 +574,9 @@ jobs:
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
- uses: actions/setup-python@83679a892e2d95755f2dac6acb0bfd1e9ac5d548 # v6.1.0
# TODO: add caching when supported (https://github.com/actions/setup-python/pull/818)
# with:
# python-version: 3.11
# cache: 'uv'
with:
python-version: 3.11
#cache: 'uv'
- name: Install dependencies
run: |
uv sync --extra cpu

@ -174,6 +174,7 @@ Redis (Sentinel) URL example JSON before encoding:
| `MACHINE_LEARNING_RKNN_THREADS` | How many threads of RKNN runtime should be spun up while inferencing. | `1` | machine learning |
| `MACHINE_LEARNING_MODEL_ARENA` | Pre-allocates CPU memory to avoid memory fragmentation | true | machine learning |
| `MACHINE_LEARNING_OPENVINO_PRECISION` | If set to FP16, uses half-precision floating-point operations for faster inference with reduced accuracy (one of [`FP16`, `FP32`], applies only to OpenVINO) | `FP32` | machine learning |
| `MACHINE_LEARNING_OPENVINO_CACHE_CAPACITY` | The max number of image dimensions for which models have cached optimizations | `20` | machine learning |
\*1: It is recommended to begin with this parameter when changing the concurrency levels of the machine learning service and then tune the other ones.

@ -2,7 +2,7 @@ ARG DEVICE=cpu
FROM python:3.11-bookworm@sha256:e39286476f84ffedf7c3564b0b74e32c9e1193ec9ca32ee8a11f8c09dbf6aafe AS builder-cpu
FROM builder-cpu AS builder-openvino
FROM python:3.13-slim-trixie@sha256:0222b795db95bf7412cede36ab46a266cfb31f632e64051aac9806dabf840a61 AS builder-openvino
FROM builder-cpu AS builder-cuda
@ -22,20 +22,18 @@ FROM builder-cpu AS builder-rknn
# Warning: 25GiB+ disk space required to pull this image
# TODO: find a way to reduce the image size
FROM rocm/dev-ubuntu-22.04:6.4.3-complete@sha256:6cda50e312f3aac068cea9ec06c560ca1f522ad546bc8b3d2cf06da0fe8e8a76 AS builder-rocm
FROM rocm/dev-ubuntu-24.04:6.4.4-complete@sha256:31418ac10a3769a71eaef330c07280d1d999d7074621339b8f93c484c35f6078 AS builder-rocm
# renovate: datasource=github-releases depName=Microsoft/onnxruntime
ARG ONNXRUNTIME_VERSION="v1.22.1"
WORKDIR /code
RUN apt-get update && apt-get install -y --no-install-recommends wget git python3.10-venv
RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.30.1/cmake-3.30.1-linux-x86_64.sh && \
chmod +x cmake-3.30.1-linux-x86_64.sh && \
mkdir -p /code/cmake-3.30.1-linux-x86_64 && \
./cmake-3.30.1-linux-x86_64.sh --skip-license --prefix=/code/cmake-3.30.1-linux-x86_64 && \
rm cmake-3.30.1-linux-x86_64.sh
ENV PATH=/code/cmake-3.30.1-linux-x86_64/bin:${PATH}
RUN apt-get update && apt-get install -y --no-install-recommends wget git
RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.31.9/cmake-3.31.9-linux-x86_64.sh && \
chmod +x cmake-3.31.9-linux-x86_64.sh && \
mkdir -p /code/cmake-3.31.9-linux-x86_64 && \
./cmake-3.31.9-linux-x86_64.sh --skip-license --prefix=/code/cmake-3.31.9-linux-x86_64 && \
rm cmake-3.31.9-linux-x86_64.sh
RUN git clone --single-branch --branch "${ONNXRUNTIME_VERSION}" --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
WORKDIR /code/onnxruntime
@ -45,9 +43,26 @@ COPY ./patches/* /tmp/
RUN git apply /tmp/*.patch
RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
ENV PATH=/opt/rocm-venv/bin:/code/cmake-3.31.9-linux-x86_64/bin:${PATH}
ENV CCACHE_DIR="/ccache"
# Note: the `parallel` setting uses a substantial amount of RAM
RUN ./build.sh --allow_running_as_root --config Release --build_wheel --update --build --parallel 17 --cmake_extra_defines\
ONNXRUNTIME_VERSION="${ONNXRUNTIME_VERSION}" --skip_tests --use_rocm --rocm_home=/opt/rocm
RUN --mount=type=cache,target=/ccache \
./build.sh \
--allow_running_as_root \
--config Release \
--build_wheel \
--update \
--build \
--parallel 17 \
--cmake_extra_defines \
ONNXRUNTIME_VERSION="${ONNXRUNTIME_VERSION}" \
CMAKE_HIP_ARCHITECTURES="gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" \
--skip_tests \
--use_rocm \
--rocm_home=/opt/rocm \
--use_cache \
--compile_no_warning_as_error
RUN mv /code/onnxruntime/build/Linux/Release/dist/*.whl /opt/
FROM builder-${DEVICE} AS builder
@ -73,15 +88,18 @@ FROM python:3.11-slim-bookworm@sha256:2c5bc243b1cc47985ee4fb768bb0bbd4490481c5d0
ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
MACHINE_LEARNING_MODEL_ARENA=false
FROM python:3.11-slim-bookworm@sha256:2c5bc243b1cc47985ee4fb768bb0bbd4490481c5d0897a62da31b7f30b7304a7 AS prod-openvino
FROM python:3.13-slim-trixie@sha256:0222b795db95bf7412cede36ab46a266cfb31f632e64051aac9806dabf840a61 AS prod-openvino
RUN apt-get update && \
apt-get install --no-install-recommends -yqq ocl-icd-libopencl1 wget && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17384.11/intel-igc-core_1.0.17384.11_amd64.deb && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17384.11/intel-igc-opencl_1.0.17384.11_amd64.deb && \
wget -nv https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/intel-opencl-icd_24.31.30508.7_amd64.deb && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/v2.22.2/intel-igc-core-2_2.22.2+20121_amd64.deb && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/v2.22.2/intel-igc-opencl-2_2.22.2+20121_amd64.deb && \
wget -nv https://github.com/intel/compute-runtime/releases/download/25.44.36015.5/intel-opencl-icd_25.44.36015.5-0_amd64.deb && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.24/intel-igc-core_1.0.17537.24_amd64.deb && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.24/intel-igc-opencl_1.0.17537.24_amd64.deb && \
wget -nv https://github.com/intel/compute-runtime/releases/download/24.35.30872.36/intel-opencl-icd-legacy1_24.35.30872.36_amd64.deb && \
# TODO: Figure out how to get renovate to manage this differently versioned libigdgmm file
wget -nv https://github.com/intel/compute-runtime/releases/download/24.31.30508.7/libigdgmm12_22.4.1_amd64.deb && \
wget -nv https://github.com/intel/compute-runtime/releases/download/25.44.36015.5/libigdgmm12_22.8.2_amd64.deb && \
dpkg -i *.deb && \
rm *.deb && \
apt-get remove wget -yqq && \
@ -102,7 +120,7 @@ COPY --from=builder-cuda /usr/local/bin/python3 /usr/local/bin/python3
COPY --from=builder-cuda /usr/local/lib/python3.11 /usr/local/lib/python3.11
COPY --from=builder-cuda /usr/local/lib/libpython3.11.so /usr/local/lib/libpython3.11.so
FROM rocm/dev-ubuntu-22.04:6.4.3-complete@sha256:6cda50e312f3aac068cea9ec06c560ca1f522ad546bc8b3d2cf06da0fe8e8a76 AS prod-rocm
FROM rocm/dev-ubuntu-24.04:6.4.4-complete@sha256:31418ac10a3769a71eaef330c07280d1d999d7074621339b8f93c484c35f6078 AS prod-rocm
FROM prod-cpu AS prod-armnn

@ -79,6 +79,7 @@ class Settings(BaseSettings):
preload: PreloadModelData | None = None
max_batch_size: MaxBatchSize | None = None
openvino_precision: ModelPrecision = ModelPrecision.FP32
openvino_cache_capacity: int = 20
@property
def device_id(self) -> str:

@ -5,6 +5,7 @@ from typing import Any
import numpy as np
import onnxruntime as ort
import orjson
from numpy.typing import NDArray
from immich_ml.models.constants import SUPPORTED_PROVIDERS
@ -99,6 +100,11 @@ class OrtSession:
"device_type": device,
"precision": settings.openvino_precision.value,
"cache_dir": openvino_dir.as_posix(),
"load_config": orjson.dumps(
{
"CPU": {"CPU_RUNTIME_CACHE_CAPACITY": str(settings.openvino_cache_capacity)},
}
).decode(),
}
case "CoreMLExecutionProvider":
options = {

@ -0,0 +1,33 @@
diff --git a/dockerfiles/scripts/install_common_deps.sh b/dockerfiles/scripts/install_common_deps.sh
index bbb672a99e..0dc652fbda 100644
--- a/dockerfiles/scripts/install_common_deps.sh
+++ b/dockerfiles/scripts/install_common_deps.sh
@@ -8,16 +8,23 @@ apt-get update && apt-get install -y --no-install-recommends \
curl \
libcurl4-openssl-dev \
libssl-dev \
- python3-dev
+ python3-dev \
+ ccache
# Dependencies: conda
-wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh -O ~/miniconda.sh --no-check-certificate && /bin/bash ~/miniconda.sh -b -p /opt/miniconda
+wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py312_25.9.1-1-Linux-x86_64.sh -O ~/miniconda.sh && /bin/bash ~/miniconda.sh -b -p /opt/miniconda
rm ~/miniconda.sh
/opt/miniconda/bin/conda clean -ya
-pip install numpy
-pip install packaging
-pip install "wheel>=0.35.1"
+# Dependencies: venv and packages
+/opt/miniconda/bin/python3 -m venv /opt/rocm-venv
+/opt/rocm-venv/bin/pip install --no-cache-dir --upgrade pip
+/opt/rocm-venv/bin/pip install --no-cache-dir \
+ "numpy==2.3.4" \
+ "packaging==25.0" \
+ "wheel==0.45.1" \
+ "setuptools==80.9.0"
+
rm -rf /opt/miniconda/pkgs
# Dependencies: cmake

@ -1,13 +0,0 @@
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index 2714e6f59..a69da76b4 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -338,7 +338,7 @@ if (onnxruntime_USE_ROCM)
if (ROCM_VERSION_DEV VERSION_LESS "6.2")
message(FATAL_ERROR "CMAKE_HIP_ARCHITECTURES is not set when ROCm version < 6.2")
else()
- set(CMAKE_HIP_ARCHITECTURES "gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx940;gfx941;gfx942;gfx1200;gfx1201")
+ set(CMAKE_HIP_ARCHITECTURES "gfx900;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1102;gfx940;gfx941;gfx942;gfx1200;gfx1201")
endif()
endif()

@ -3,7 +3,7 @@ name = "immich-ml"
version = "2.3.1"
description = ""
authors = [{ name = "Hau Tran", email = "alex.tran1502@gmail.com" }]
requires-python = ">=3.10,<4.0"
requires-python = ">=3.11,<4"
readme = "README.md"
dependencies = [
"aiocache>=0.12.1,<1.0",
@ -12,7 +12,7 @@ dependencies = [
"gunicorn>=21.1.0",
"huggingface-hub>=0.20.1,<1.0",
"insightface>=0.7.3,<1.0",
"numpy<2",
"numpy>=2.3.4",
"opencv-python-headless>=4.7.0.72,<5.0",
"orjson>=3.9.5",
"pillow>=9.5.0,<11.0",
@ -49,24 +49,16 @@ lint = [
dev = ["locust>=2.15.1", { include-group = "test" }, { include-group = "lint" }]
[project.optional-dependencies]
cpu = ["onnxruntime>=1.15.0,<2"]
cuda = ["onnxruntime-gpu>=1.17.0,<2"]
openvino = ["onnxruntime-openvino>=1.17.1,<1.19.0"]
armnn = ["onnxruntime>=1.15.0,<2"]
rknn = ["onnxruntime>=1.15.0,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
cpu = ["onnxruntime>=1.23.2,<2"]
cuda = ["onnxruntime-gpu>=1.23.2,<2"]
openvino = ["onnxruntime-openvino>=1.23.0,<2"]
armnn = ["onnxruntime>=1.23.2,<2"]
rknn = ["onnxruntime>=1.23.2,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
rocm = []
[tool.uv]
compile-bytecode = true
[[tool.uv.index]]
name = "cuda12"
url = "https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/onnxruntime-cuda-12/pypi/simple/"
explicit = true
[tool.uv.sources]
onnxruntime-gpu = { index = "cuda12" }
[tool.hatch.build.targets.sdist]
include = ["immich_ml"]

@ -249,13 +249,15 @@ class TestOrtSession:
"device_type": "GPU.0",
"precision": "FP32",
"cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
"load_config": "{\"CPU\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"20\"}}",
},
{"arena_extend_strategy": "kSameAsRequested"},
]
def test_sets_provider_options_for_openvino(self) -> None:
def test_sets_provider_options_for_openvino(self, mocker: MockerFixture) -> None:
model_path = "/cache/ViT-B-32__openai/textual/model.onnx"
os.environ["MACHINE_LEARNING_DEVICE_ID"] = "1"
mocker.patch.object(settings, "openvino_cache_capacity", 10)
session = OrtSession(model_path, providers=["OpenVINOExecutionProvider"])
@ -264,6 +266,7 @@ class TestOrtSession:
"device_type": "GPU.1",
"precision": "FP32",
"cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
"load_config": "{\"CPU\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"10\"}}"
}
]
@ -279,6 +282,7 @@ class TestOrtSession:
"device_type": "GPU.1",
"precision": "FP16",
"cache_dir": "/cache/ViT-B-32__openai/textual/openvino",
"load_config": "{\"CPU\":{\"CPU_RUNTIME_CACHE_CAPACITY\":\"20\"}}",
}
]

File diff suppressed because it is too large Load Diff