Source Code added

2026-02-02 15:06:40 +01:00 · 2026-02-02 15:06:40 +01:00 · 9efa9bc6dd
commit 9efa9bc6dd
parent 800376eafd
3912 changed files with 754770 additions and 2 deletions
--- a/machine-learning/.dockerignore
+++ b/machine-learning/.dockerignore
@ -0,0 +1,3 @@
+venv/
+*.zip
+*.onnx
--- a/machine-learning/.gitignore
+++ b/machine-learning/.gitignore
@ -0,0 +1,198 @@
+*.zip
+*.onnx
+*.rknn
+*.npy
+*_attr__value
+*.weight
+*.bias
+onnx__*
+*in_proj_bias
+*.proj
+*.latent
+*.pos_embed
+vocab.txt
+export/immich_model_exporter/models/**/README.md
+export/**/results/*.json
+export/**/root
+*.armnn
+tokenizer.json
+tokenizer_config.json
+special_tokens_map.json
+preprocess_cfg.json
+config.json
+merges.txt
+vocab.json
+upload/
+venv/
+__pycache__/
+model-cache/
+
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+
+# VS Code
+.vscode
+
+*.onnx
+*.zip
+
+core
--- a/machine-learning/Dockerfile
+++ b/machine-learning/Dockerfile
@ -0,0 +1,206 @@
+ARG DEVICE=cpu
+
+FROM python:3.11-bookworm@sha256:667cf70698924920f29ebdb8d749ab665811503b87093d4f11826d114fd7255e AS builder-cpu
+
+FROM python:3.13-slim-trixie@sha256:0222b795db95bf7412cede36ab46a266cfb31f632e64051aac9806dabf840a61 AS builder-openvino
+
+FROM builder-cpu AS builder-cuda
+
+FROM builder-cpu AS builder-armnn
+
+# renovate: datasource=github-releases depName=ARM-software/armnn
+ARG ARMNN_VERSION="v24.05"
+
+ENV ARMNN_PATH=/opt/armnn
+COPY ann /opt/ann
+RUN mkdir /opt/armnn && \
+    curl -SL "https://github.com/ARM-software/armnn/releases/download/${ARMNN_VERSION}/ArmNN-linux-aarch64.tar.gz" | tar -zx -C /opt/armnn && \
+    cd /opt/ann && \
+    sh build.sh
+
+FROM builder-cpu AS builder-rknn
+
+# Warning: 25GiB+ disk space required to pull this image
+# TODO: find a way to reduce the image size
+FROM rocm/dev-ubuntu-24.04:6.4.4-complete@sha256:31418ac10a3769a71eaef330c07280d1d999d7074621339b8f93c484c35f6078 AS builder-rocm
+
+# renovate: datasource=github-releases depName=Microsoft/onnxruntime
+ARG ONNXRUNTIME_VERSION="v1.22.1"
+WORKDIR /code
+
+RUN apt-get update && apt-get install -y --no-install-recommends wget git
+RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.31.9/cmake-3.31.9-linux-x86_64.sh && \
+    chmod +x cmake-3.31.9-linux-x86_64.sh && \
+    mkdir -p /code/cmake-3.31.9-linux-x86_64 && \
+    ./cmake-3.31.9-linux-x86_64.sh --skip-license --prefix=/code/cmake-3.31.9-linux-x86_64 && \
+    rm cmake-3.31.9-linux-x86_64.sh
+
+RUN git clone --single-branch --branch "${ONNXRUNTIME_VERSION}" --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
+WORKDIR /code/onnxruntime
+# Fix for multi-threading based on comments in https://github.com/microsoft/onnxruntime/pull/19567
+# TODO: find a way to fix this without disabling algo caching
+COPY ./patches/* /tmp/
+RUN git apply /tmp/*.patch
+
+RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
+
+ENV PATH=/opt/rocm-venv/bin:/code/cmake-3.31.9-linux-x86_64/bin:${PATH}
+ENV CCACHE_DIR="/ccache"
+# Note: the `parallel` setting uses a substantial amount of RAM
+RUN --mount=type=cache,target=/ccache \
+    ./build.sh \
+    --allow_running_as_root \
+    --config Release \
+    --build_wheel \
+    --update \
+    --build \
+    --parallel 17 \
+    --cmake_extra_defines \
+    ONNXRUNTIME_VERSION="${ONNXRUNTIME_VERSION}" \
+    CMAKE_HIP_ARCHITECTURES="gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" \
+    --skip_tests \
+    --use_rocm \
+    --rocm_home=/opt/rocm \
+    --use_cache \
+    --compile_no_warning_as_error
+RUN mv /code/onnxruntime/build/Linux/Release/dist/*.whl /opt/
+
+FROM builder-${DEVICE} AS builder
+
+ARG DEVICE
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    VIRTUAL_ENV=/opt/venv
+
+RUN apt-get update && apt-get install -y --no-install-recommends g++
+
+COPY --from=ghcr.io/astral-sh/uv:0.8.15@sha256:a5727064a0de127bdb7c9d3c1383f3a9ac307d9f2d8a391edc7896c54289ced0 /uv /uvx /bin/
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    uv sync --frozen --extra ${DEVICE} --no-dev --no-editable --no-install-project --compile-bytecode --no-progress --active --link-mode copy
+RUN if [ "$DEVICE" = "rocm" ]; then \
+    uv pip install /opt/onnxruntime_rocm-*.whl; \
+    fi
+
+FROM python:3.11-slim-bookworm@sha256:917ec0e42cd6af87657a768449c2f604a6b67c7ab8e10ff917b8724799f816d3 AS prod-cpu
+
+ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
+    MACHINE_LEARNING_MODEL_ARENA=false
+
+FROM python:3.13-slim-trixie@sha256:0222b795db95bf7412cede36ab46a266cfb31f632e64051aac9806dabf840a61 AS prod-openvino
+
+RUN apt-get update && \
+    apt-get install --no-install-recommends -yqq ocl-icd-libopencl1 wget && \
+    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/v2.27.10/intel-igc-core-2_2.27.10+20617_amd64.deb && \
+    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/v2.27.10/intel-igc-opencl-2_2.27.10+20617_amd64.deb && \
+    wget -nv https://github.com/intel/compute-runtime/releases/download/26.01.36711.4/intel-opencl-icd_26.01.36711.4-0_amd64.deb &&  \
+    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.24/intel-igc-core_1.0.17537.24_amd64.deb && \
+    wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.24/intel-igc-opencl_1.0.17537.24_amd64.deb && \
+    wget -nv https://github.com/intel/compute-runtime/releases/download/24.35.30872.36/intel-opencl-icd-legacy1_24.35.30872.36_amd64.deb && \
+    # TODO: Figure out how to get renovate to manage this differently versioned libigdgmm file
+    wget -nv https://github.com/intel/compute-runtime/releases/download/26.01.36711.4/libigdgmm12_22.9.0_amd64.deb && \
+    dpkg -i *.deb && \
+    rm *.deb && \
+    apt-get remove wget -yqq && \
+    rm -rf /var/lib/apt/lists/*
+
+FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04@sha256:94c1577b2cd9dd6c0312dc04dff9cb2fdce2b268018abc3d7c2dbcacf1155000 AS prod-cuda
+
+ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
+    MACHINE_LEARNING_MODEL_ARENA=false
+
+RUN apt-get update && \
+    # Pascal support was dropped in 9.11
+    apt-get install --no-install-recommends -yqq libcudnn9-cuda-12=9.10.2.21-1 && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+COPY --from=builder-cuda /usr/local/bin/python3 /usr/local/bin/python3
+COPY --from=builder-cuda /usr/local/lib/python3.11 /usr/local/lib/python3.11
+COPY --from=builder-cuda /usr/local/lib/libpython3.11.so /usr/local/lib/libpython3.11.so
+
+FROM rocm/dev-ubuntu-24.04:6.4.4-complete@sha256:31418ac10a3769a71eaef330c07280d1d999d7074621339b8f93c484c35f6078 AS prod-rocm
+
+FROM prod-cpu AS prod-armnn
+
+ENV LD_LIBRARY_PATH=/opt/armnn \
+    LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
+    MACHINE_LEARNING_MODEL_ARENA=false
+
+RUN apt-get update && apt-get install -y --no-install-recommends ocl-icd-libopencl1 mesa-opencl-icd libgomp1 && \
+    rm -rf /var/lib/apt/lists/* && \
+    mkdir --parents /etc/OpenCL/vendors && \
+    echo "/usr/lib/libmali.so" > /etc/OpenCL/vendors/mali.icd && \
+    mkdir /opt/armnn
+
+COPY --from=builder-armnn \
+    /opt/armnn/libarmnn.so.?? \
+    /opt/armnn/libarmnnOnnxParser.so.?? \
+    /opt/armnn/libarmnnDeserializer.so.?? \
+    /opt/armnn/libarmnnTfLiteParser.so.?? \
+    /opt/armnn/libprotobuf.so.?.??.?.? \
+    /opt/ann/libann.s[o] \
+    /opt/ann/build.sh \
+    /opt/armnn/
+
+FROM prod-cpu AS prod-rknn
+
+# renovate: datasource=github-tags depName=airockchip/rknn-toolkit2
+ARG RKNN_TOOLKIT_VERSION="v2.3.0"
+
+ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
+    MACHINE_LEARNING_MODEL_ARENA=false
+
+ADD --checksum=sha256:73993ed4b440460825f21611731564503cc1d5a0c123746477da6cd574f34885 "https://github.com/airockchip/rknn-toolkit2/raw/refs/tags/${RKNN_TOOLKIT_VERSION}/rknpu2/runtime/Linux/librknn_api/aarch64/librknnrt.so" /usr/lib/
+
+FROM prod-${DEVICE} AS prod
+
+ARG DEVICE
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends tini ccache libgl1 libglib2.0-0 libgomp1 $(if ! [ "$DEVICE" = "openvino" ] && ! [ "$DEVICE" = "rocm" ]; then echo "libmimalloc2.0"; fi) && \
+    apt-get autoremove -yqq && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+RUN ln -s "/usr/lib/$(arch)-linux-gnu/libmimalloc.so.2" /usr/lib/libmimalloc.so.2
+
+WORKDIR /usr/src
+ENV TRANSFORMERS_CACHE=/cache \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PATH="/opt/venv/bin:$PATH" \
+    PYTHONPATH=/usr/src \
+    DEVICE=${DEVICE} \
+    VIRTUAL_ENV=/opt/venv \
+    MACHINE_LEARNING_CACHE_FOLDER=/cache
+
+# prevent core dumps
+RUN echo "hard core 0" >> /etc/security/limits.conf && \
+    echo "fs.suid_dumpable 0" >> /etc/sysctl.conf && \
+    echo 'ulimit -S -c 0 > /dev/null 2>&1' >> /etc/profile
+
+COPY --from=builder /opt/venv /opt/venv
+COPY scripts/healthcheck.py .
+COPY immich_ml immich_ml
+
+ARG BUILD_ID
+ARG BUILD_IMAGE
+ARG BUILD_SOURCE_REF
+ARG BUILD_SOURCE_COMMIT
+
+ENV IMMICH_BUILD=${BUILD_ID}
+ENV IMMICH_BUILD_URL=https://github.com/immich-app/immich/actions/runs/${BUILD_ID}
+ENV IMMICH_BUILD_IMAGE=${BUILD_IMAGE}
+ENV IMMICH_BUILD_IMAGE_URL=https://github.com/immich-app/immich/pkgs/container/immich-machine-learning
+ENV IMMICH_REPOSITORY=immich-app/immich
+ENV IMMICH_REPOSITORY_URL=https://github.com/immich-app/immich
+ENV IMMICH_SOURCE_REF=${BUILD_SOURCE_REF}
+ENV IMMICH_SOURCE_COMMIT=${BUILD_SOURCE_COMMIT}
+ENV IMMICH_SOURCE_URL=https://github.com/immich-app/immich/commit/${BUILD_SOURCE_COMMIT}
+
+ENTRYPOINT ["tini", "--"]
+CMD ["python", "-m", "immich_ml"]
+
+HEALTHCHECK CMD python3 healthcheck.py
--- a/machine-learning/README.md
+++ b/machine-learning/README.md
@ -0,0 +1,42 @@
+# Immich Machine Learning
+
+- CLIP embeddings
+- Facial recognition
+
+# Setup
+
+This project uses [uv](https://docs.astral.sh/uv/getting-started/installation/), so be sure to install it first.
+Running `uv sync --extra cpu` will install everything you need in an isolated virtual environment.
+CUDA, ROCM and OpenVINO are supported as acceleration APIs. To use them, you can replace `--extra cpu` with either of `--extra cuda`, `--extra rocm` or `--extra openvino`. In the case of CUDA, a [compute capability](https://developer.nvidia.com/cuda-gpus) of 5.2 or higher is required.
+
+To add or remove dependencies, you can use the commands `uv add $PACKAGE_NAME` and `uv remove $PACKAGE_NAME`, respectively.
+Be sure to commit the `uv.lock` and `pyproject.toml` files with `uv lock` to reflect any changes in dependencies.
+
+# Load Testing
+
+To measure inference throughput and latency, you can use [Locust](https://locust.io/) using the provided `locustfile.py`.
+Locust works by querying the model endpoints and aggregating their statistics, meaning the app must be deployed.
+You can change the models or adjust options like score thresholds through the Locust UI.
+
+To get started, you can simply run `locust --web-host 127.0.0.1` and open `localhost:8089` in a browser to access the UI. See the [Locust documentation](https://docs.locust.io/en/stable/index.html) for more info on running Locust.
+
+Note that in Locust's jargon, concurrency is measured in `users`, and each user runs one task at a time. To achieve a particular per-endpoint concurrency, multiply that number by the number of endpoints to be queried. For example, if there are 3 endpoints and you want each of them to receive 8 requests at a time, you should set the number of users to 24.
+
+# Facial Recognition
+
+## Acknowledgements
+
+This project utilizes facial recognition models from the [InsightFace](https://github.com/deepinsight/insightface/tree/master/model_zoo) project. We appreciate the work put into developing these models, which have been beneficial to the machine learning part of this project.
+
+### Used Models
+
+- antelopev2
+- buffalo_l
+- buffalo_m
+- buffalo_s
+
+## License and Use Restrictions
+
+We have received permission to use the InsightFace facial recognition models in our project, as granted via email by Jia Guo (guojia@insightface.ai) on 18th March 2023. However, it's important to note that this permission does not extend to the redistribution or commercial use of their models by third parties. Users and developers interested in using these models should review the licensing terms provided in the InsightFace GitHub repository.
+
+For more information on the capabilities of the InsightFace models and to ensure compliance with their license, please refer to their [official repository](https://github.com/deepinsight/insightface). Adhering to the specified licensing terms is crucial for the respectful and lawful use of their work.
--- a/machine-learning/README_es_ES.md
+++ b/machine-learning/README_es_ES.md
@ -0,0 +1,21 @@
+# Immich Machine Learning
+
+- Clasificación de imágenes
+- Incorporación de CLIP
+- Reconocimiento facial
+
+# Configuración
+
+Este proyecto utiliza [Poetry](https://python-poetry.org/docs/#installation), así que asegúrate de instalarlo primero.
+Ejecutar `poetry install --no-root --with dev` instalará todo lo necesario en un entorno virtual aislado.
+
+Para agregar o eliminar dependencias, puedes utilizar los comandos `poetry add $PACKAGE_NAME` y `poetry remove $PACKAGE_NAME`, respectivamente.
+Asegúrate de hacer commit de los archivos `poetry.lock` y `pyproject.toml` para reflejar cualquier cambio en las dependencias.
+
+# Pruebas de carga
+
+Para medir la velocidad y latencia de inferencia, puedes utilizar [Locust](https://locust.io/) con el archivo `locustfile.py` proporcionado.
+Locust funciona haciendo consultas a los puntos finales del modelo y agregando estadísticas, lo que significa que la aplicación debe estar desplegada.
+Puedes ejecutar `load_test.sh` para implementar automáticamente la aplicación localmente e iniciar Locust, ajustando opcionalmente sus variables de entorno según sea necesario.
+
+Alternativamente, para pruebas más personalizadas, también puedes ejecutar `locust` directamente: consulta la [documentación](https://docs.locust.io/en/stable/index.html). Ten en cuenta que, en la jerga de Locust, la concurrencia se mide en `usuarios`, y cada usuario ejecuta una tarea a la vez. Para lograr una concurrencia específica por punto final, multiplica ese número por la cantidad de puntos finales que se desean consultar. Por ejemplo, si hay 3 puntos finales y deseas que cada uno de ellos reciba 8 solicitudes al mismo tiempo, debes configurar el número de usuarios en 24.
--- a/machine-learning/README_fr_FR.md
+++ b/machine-learning/README_fr_FR.md
@ -0,0 +1,22 @@
+# Immich Apprentissage machine
+
+- Classification d'images
+- Embarquement de CLIP
+- Reconnaissance faciale
+
+# Mise en place
+
+Ce projet utilise [Poetry](https://python-poetry.org/docs/#installation), donc soyez certain de l'installer en premier.
+Exécuter `poetry install --no-root --with dev` installera tout ce dont vous avez besoin dans un environnement virtuel isolé.
+
+Pour ajouter ou supprimer des dépendances, vous pouvez utiliser les commandes `poetry add $PACKAGE_NAME` et `poetry remove $PACKAGE_NAME` respectivement.
+Soyez sûr de commit les fichiers `poetry.lock` et `pyproject.toml` pour refléter les changements de dépendances.
+
+
+# Test de charge
+
+Pour mesurer le débit d'inférence et la latence, vous pouvez utiliser [Locust](https://locust.io/) avec le fichier fourni `locustfile.py`.
+Locust fonctionne en interrogeant les endpoints des modèles et en aggrégeant leurs statistiques, signifiant que l'application doit être déployée.
+Vous pouvez exécuter `load_test.sh` pour automatiquement déployer l'application localement et démarrer Locust, en ajustant si besoin ses variables d'environnement.
+
+En alternative, pour réaliser plus de tests customisés, vous pourriez aussi exécuter `locust` directement : voir la [documentation](https://docs.locust.io/en/stable/index.html). Notez que dans le jargon de Locust, la concurrence est mesurée en `users` et que chaque user exécute une tâche après l'autre. Pour parvenir à une concurrence par endpoint, multipliez ce nombre par le nombre d'endpoints à interroger. Par exemple, s'il y a 3 endpoints et que vous voulez que chacun d'entre eux reçoive 8 requêtes à la fois, vous devrez mettre ce nombre d'users à 24.
--- a/machine-learning/ann/init.py
+++ b/machine-learning/ann/init.py
--- a/machine-learning/ann/ann.cpp
+++ b/machine-learning/ann/ann.cpp
@ -0,0 +1,310 @@
+#include <fstream>
+#include <mutex>
+#include <atomic>
+
+#include "armnn/IRuntime.hpp"
+#include "armnn/INetwork.hpp"
+#include "armnn/Types.hpp"
+#include "armnnDeserializer/IDeserializer.hpp"
+#include "armnnTfLiteParser/ITfLiteParser.hpp"
+#include "armnnOnnxParser/IOnnxParser.hpp"
+
+using namespace armnn;
+
+struct IOInfos
+{
+    std::vector<BindingPointInfo> inputInfos;
+    std::vector<BindingPointInfo> outputInfos;
+};
+
+// from https://rigtorp.se/spinlock/
+struct SpinLock
+{
+    std::atomic<bool> lock_ = {false};
+
+    void lock()
+    {
+        for (;;)
+        {
+            if (!lock_.exchange(true, std::memory_order_acquire))
+            {
+                break;
+            }
+            while (lock_.load(std::memory_order_relaxed))
+                ;
+        }
+    }
+
+    void unlock() { lock_.store(false, std::memory_order_release); }
+};
+
+class Ann
+{
+
+public:
+    int load(const char *modelPath,
+             bool fastMath,
+             bool fp16,
+             bool saveCachedNetwork,
+             const char *cachedNetworkPath)
+    {
+        NetworkId netId = -2;
+        while (netId == -2)
+        {
+            try
+            {
+                netId = loadInternal(modelPath, fastMath, fp16, saveCachedNetwork, cachedNetworkPath);
+            }
+            catch (InvalidArgumentException e)
+            {
+                // fp16 models do not support the forced fp16-turbo (runtime fp32->fp16 conversion)
+                if (fp16)
+                    fp16 = false;
+                else
+                    netId = -1;
+            }
+        }
+        return netId;
+    }
+
+    void execute(NetworkId netId, const void **inputData, void **outputData)
+    {
+        spinLock.lock();
+        const IOInfos *infos = &ioInfos[netId];
+        auto m = mutexes[netId].get();
+        spinLock.unlock();
+        InputTensors inputTensors;
+        inputTensors.reserve(infos->inputInfos.size());
+        size_t i = 0;
+        for (const BindingPointInfo &info : infos->inputInfos)
+            inputTensors.emplace_back(info.first, ConstTensor(info.second, inputData[i++]));
+        OutputTensors outputTensors;
+        outputTensors.reserve(infos->outputInfos.size());
+        i = 0;
+        for (const BindingPointInfo &info : infos->outputInfos)
+            outputTensors.emplace_back(info.first, Tensor(info.second, outputData[i++]));
+        m->lock();
+        runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
+        m->unlock();
+    }
+
+    void unload(NetworkId netId)
+    {
+        mutex.lock();
+        runtime->UnloadNetwork(netId);
+        mutex.unlock();
+    }
+
+    int tensors(NetworkId netId, bool isInput = false)
+    {
+        spinLock.lock();
+        const IOInfos *infos = &ioInfos[netId];
+        spinLock.unlock();
+        return (int)(isInput ? infos->inputInfos.size() : infos->outputInfos.size());
+    }
+
+    unsigned long shape(NetworkId netId, bool isInput = false, int index = 0)
+    {
+        spinLock.lock();
+        const IOInfos *infos = &ioInfos[netId];
+        spinLock.unlock();
+        const TensorShape shape = (isInput ? infos->inputInfos : infos->outputInfos)[index].second.GetShape();
+        unsigned long s = 0;
+        for (unsigned int d = 0; d < shape.GetNumDimensions(); d++)
+            s |= ((unsigned long)shape[d]) << (d * 16); // stores up to 4 16-bit values in a 64-bit value
+        return s;
+    }
+
+    Ann(int tuningLevel, const char *tuningFile)
+    {
+        IRuntime::CreationOptions runtimeOptions;
+        runtimeOptions.m_ProfilingOptions.m_EnableProfiling = false;
+        runtimeOptions.m_ProfilingOptions.m_TimelineEnabled = false;
+        BackendOptions backendOptions{"GpuAcc",
+                                      {
+                                          {"TuningLevel", tuningLevel},
+                                          {"MemoryOptimizerStrategy", "ConstantMemoryStrategy"}, // SingleAxisPriorityList or ConstantMemoryStrategy
+                                      }};
+        if (tuningFile)
+            backendOptions.AddOption({"TuningFile", tuningFile});
+        runtimeOptions.m_BackendOptions.emplace_back(backendOptions);
+        runtime = IRuntime::CreateRaw(runtimeOptions);
+    };
+    ~Ann()
+    {
+        IRuntime::Destroy(runtime);
+    };
+
+private:
+    int loadInternal(const char *modelPath,
+                     bool fastMath,
+                     bool fp16,
+                     bool saveCachedNetwork,
+                     const char *cachedNetworkPath)
+    {
+        NetworkId netId = -1;
+        INetworkPtr network = loadModel(modelPath);
+        IOptimizedNetworkPtr optNet = OptimizeNetwork(network.get(), fastMath, fp16, saveCachedNetwork, cachedNetworkPath);
+        const IOInfos infos = getIOInfos(optNet.get());
+        mutex.lock();
+        Status status = runtime->LoadNetwork(netId, std::move(optNet));
+        mutex.unlock();
+        if (status != Status::Success)
+        {
+            return -1;
+        }
+        spinLock.lock();
+        ioInfos[netId] = infos;
+        mutexes.emplace(netId, std::make_unique<std::mutex>());
+        spinLock.unlock();
+        return netId;
+    }
+
+    INetworkPtr loadModel(const char *modelPath)
+    {
+        const auto path = std::string(modelPath);
+        if (path.rfind(".tflite") == path.length() - 7) // endsWith()
+        {
+            auto parser = armnnTfLiteParser::ITfLiteParser::CreateRaw();
+            return parser->CreateNetworkFromBinaryFile(modelPath);
+        }
+        else if (path.rfind(".onnx") == path.length() - 5) // endsWith()
+        {
+            auto parser = armnnOnnxParser::IOnnxParser::CreateRaw();
+            return parser->CreateNetworkFromBinaryFile(modelPath);
+        }
+        else
+        {
+            std::ifstream ifs(path, std::ifstream::in | std::ifstream::binary);
+            auto parser = armnnDeserializer::IDeserializer::CreateRaw();
+            return parser->CreateNetworkFromBinary(ifs);
+        }
+    }
+
+    static BindingPointInfo getInputTensorInfo(LayerBindingId inputBindingId, TensorInfo info)
+    {
+        const auto newInfo = TensorInfo{info.GetShape(), info.GetDataType(),
+                                        info.GetQuantizationScale(),
+                                        info.GetQuantizationOffset(),
+                                        true};
+        return {inputBindingId, newInfo};
+    }
+
+    IOptimizedNetworkPtr OptimizeNetwork(INetwork *network, bool fastMath, bool fp16, bool saveCachedNetwork, const char *cachedNetworkPath)
+    {
+        const bool allowExpandedDims = false;
+        const ShapeInferenceMethod shapeInferenceMethod = ShapeInferenceMethod::ValidateOnly;
+
+        OptimizerOptionsOpaque options;
+        options.SetReduceFp32ToFp16(fp16);
+        options.SetShapeInferenceMethod(shapeInferenceMethod);
+        options.SetAllowExpandedDims(allowExpandedDims);
+        options.SetDebugToFileEnabled(false);
+        options.SetProfilingEnabled(false);
+
+        BackendOptions gpuAcc("GpuAcc", {{"FastMathEnabled", fastMath}});
+        if (cachedNetworkPath)
+        {
+            gpuAcc.AddOption({"SaveCachedNetwork", saveCachedNetwork});
+            gpuAcc.AddOption({"CachedNetworkFilePath", cachedNetworkPath});
+        }
+        options.AddModelOption(gpuAcc);
+
+        // No point in using ARMNN for CPU, use ONNX (quantized) instead.
+        // BackendOptions cpuAcc("CpuAcc",
+        //                       {
+        //                           {"FastMathEnabled", fastMath},
+        //                           {"NumberOfThreads", 0},
+        //                       });
+        // options.AddModelOption(cpuAcc);
+
+        BackendOptions allowExDimOpt("AllowExpandedDims",
+                                     {{"AllowExpandedDims", allowExpandedDims}});
+        options.AddModelOption(allowExDimOpt);
+        BackendOptions shapeInferOpt("ShapeInferenceMethod",
+                                     {{"InferAndValidate", shapeInferenceMethod == ShapeInferenceMethod::InferAndValidate}});
+        options.AddModelOption(shapeInferOpt);
+
+        std::vector<BackendId> backends = {
+            BackendId("GpuAcc"),
+            // BackendId("CpuAcc"),
+            // BackendId("CpuRef"),
+        };
+        return Optimize(*network, backends, runtime->GetDeviceSpec(), options);
+    }
+
+    IOInfos getIOInfos(IOptimizedNetwork *optNet)
+    {
+        struct InfoStrategy : IStrategy
+        {
+            void ExecuteStrategy(const IConnectableLayer *layer,
+                                 const BaseDescriptor &descriptor,
+                                 const std::vector<ConstTensor> &constants,
+                                 const char *name,
+                                 const LayerBindingId id = 0) override
+            {
+                IgnoreUnused(descriptor, constants, id);
+                const LayerType lt = layer->GetType();
+                if (lt == LayerType::Input)
+                    ioInfos.inputInfos.push_back(getInputTensorInfo(id, layer->GetOutputSlot(0).GetTensorInfo()));
+                else if (lt == LayerType::Output)
+                    ioInfos.outputInfos.push_back({id, layer->GetInputSlot(0).GetTensorInfo()});
+            }
+            IOInfos ioInfos;
+        };
+
+        InfoStrategy infoStrategy;
+        optNet->ExecuteStrategy(infoStrategy);
+        return infoStrategy.ioInfos;
+    }
+
+    IRuntime *runtime;
+    std::map<NetworkId, IOInfos> ioInfos;
+    std::map<NetworkId, std::unique_ptr<std::mutex>> mutexes; // mutex per network to not execute the same the same network concurrently
+    std::mutex mutex;                                         // global mutex for load/unload calls to the runtime
+    SpinLock spinLock;                                        // fast spin lock to guard access to the ioInfos and mutexes maps
+};
+
+extern "C" void *init(int logLevel, int tuningLevel, const char *tuningFile)
+{
+    LogSeverity level = static_cast<LogSeverity>(logLevel);
+    ConfigureLogging(true, true, level);
+
+    Ann *ann = new Ann(tuningLevel, tuningFile);
+    return ann;
+}
+
+extern "C" void destroy(void *ann)
+{
+    delete ((Ann *)ann);
+}
+
+extern "C" int load(void *ann,
+                    const char *path,
+                    bool fastMath,
+                    bool fp16,
+                    bool saveCachedNetwork,
+                    const char *cachedNetworkPath)
+{
+    return ((Ann *)ann)->load(path, fastMath, fp16, saveCachedNetwork, cachedNetworkPath);
+}
+
+extern "C" void unload(void *ann, NetworkId netId)
+{
+    ((Ann *)ann)->unload(netId);
+}
+
+extern "C" void execute(void *ann, NetworkId netId, const void **inputData, void **outputData)
+{
+    ((Ann *)ann)->execute(netId, inputData, outputData);
+}
+
+extern "C" unsigned long shape(void *ann, NetworkId netId, bool isInput, int index)
+{
+    return ((Ann *)ann)->shape(netId, isInput, index);
+}
+
+extern "C" int tensors(void *ann, NetworkId netId, bool isInput)
+{
+    return ((Ann *)ann)->tensors(netId, isInput);
+}
--- a/machine-learning/ann/build.sh
+++ b/machine-learning/ann/build.sh
@ -0,0 +1,3 @@
+#!/usr/bin/env sh
+
+g++ -shared -O3 -o libann.so -fuse-ld=gold -std=c++17 -I"$ARMNN_PATH"/include -larmnn -larmnnDeserializer -larmnnTfLiteParser -larmnnOnnxParser -L"$ARMNN_PATH" ann.cpp
--- a/machine-learning/ann/export/.gitignore
+++ b/machine-learning/ann/export/.gitignore
@ -0,0 +1,2 @@
+armnn*
+output/
--- a/machine-learning/ann/export/build-converter.sh
+++ b/machine-learning/ann/export/build-converter.sh
@ -0,0 +1,4 @@
+#!/usr/bin/env sh
+
+cd armnn-23.11/ || exit
+g++ -o ../armnnconverter -O1 -DARMNN_ONNX_PARSER -DARMNN_SERIALIZER -DARMNN_TF_LITE_PARSER -fuse-ld=gold -std=c++17 -Iinclude -Isrc/armnnUtils -Ithird-party -larmnn -larmnnDeserializer -larmnnTfLiteParser -larmnnOnnxParser -larmnnSerializer -L../armnn src/armnnConverter/ArmnnConverter.cpp
--- a/machine-learning/ann/export/download-armnn.sh
+++ b/machine-learning/ann/export/download-armnn.sh
@ -0,0 +1,8 @@
+#!/bin/sh
+
+# binaries
+mkdir armnn
+curl -SL "https://github.com/ARM-software/armnn/releases/download/v23.11/ArmNN-linux-x86_64.tar.gz" | tar -zx -C armnn
+
+# source to build ArmnnConverter
+curl -SL "https://github.com/ARM-software/armnn/archive/refs/tags/v23.11.tar.gz" | tar -zx
--- a/machine-learning/ann/export/env.yaml
+++ b/machine-learning/ann/export/env.yaml
@ -0,0 +1,201 @@
+name: annexport
+channels:
+  - pytorch
+  - nvidia
+  - conda-forge
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=2_kmp_llvm
+  - aiohttp=3.9.1=py310h2372a71_0
+  - aiosignal=1.3.1=pyhd8ed1ab_0
+  - arpack=3.8.0=nompi_h0baa96a_101
+  - async-timeout=4.0.3=pyhd8ed1ab_0
+  - attrs=23.1.0=pyh71513ae_1
+  - aws-c-auth=0.7.3=h28f7589_1
+  - aws-c-cal=0.6.1=hc309b26_1
+  - aws-c-common=0.9.0=hd590300_0
+  - aws-c-compression=0.2.17=h4d4d85c_2
+  - aws-c-event-stream=0.3.1=h2e3709c_4
+  - aws-c-http=0.7.11=h00aa349_4
+  - aws-c-io=0.13.32=he9a53bd_1
+  - aws-c-mqtt=0.9.3=hb447be9_1
+  - aws-c-s3=0.3.14=hf3aad02_1
+  - aws-c-sdkutils=0.1.12=h4d4d85c_1
+  - aws-checksums=0.1.17=h4d4d85c_1
+  - aws-crt-cpp=0.21.0=hb942446_5
+  - aws-sdk-cpp=1.10.57=h85b1a90_19
+  - blas=2.120=openblas
+  - blas-devel=3.9.0=20_linux64_openblas
+  - brotli-python=1.0.9=py310hd8f1fbe_9
+  - bzip2=1.0.8=hd590300_5
+  - c-ares=1.23.0=hd590300_0
+  - ca-certificates=2023.11.17=hbcca054_0
+  - certifi=2023.11.17=pyhd8ed1ab_0
+  - charset-normalizer=3.3.2=pyhd8ed1ab_0
+  - click=8.1.7=unix_pyh707e725_0
+  - colorama=0.4.6=pyhd8ed1ab_0
+  - coloredlogs=15.0.1=pyhd8ed1ab_3
+  - cuda-cudart=11.7.99=0
+  - cuda-cupti=11.7.101=0
+  - cuda-libraries=11.7.1=0
+  - cuda-nvrtc=11.7.99=0
+  - cuda-nvtx=11.7.91=0
+  - cuda-runtime=11.7.1=0
+  - dataclasses=0.8=pyhc8e2a94_3
+  - datasets=2.14.7=pyhd8ed1ab_0
+  - dill=0.3.7=pyhd8ed1ab_0
+  - filelock=3.13.1=pyhd8ed1ab_0
+  - flatbuffers=23.5.26=h59595ed_1
+  - freetype=2.12.1=h267a509_2
+  - frozenlist=1.4.0=py310h2372a71_1
+  - fsspec=2023.10.0=pyhca7485f_0
+  - ftfy=6.1.3=pyhd8ed1ab_0
+  - gflags=2.2.2=he1b5a44_1004
+  - glog=0.6.0=h6f12383_0
+  - glpk=5.0=h445213a_0
+  - gmp=6.3.0=h59595ed_0
+  - gmpy2=2.1.2=py310h3ec546c_1
+  - huggingface_hub=0.17.3=pyhd8ed1ab_0
+  - humanfriendly=10.0=pyhd8ed1ab_6
+  - icu=73.2=h59595ed_0
+  - idna=3.6=pyhd8ed1ab_0
+  - importlib-metadata=7.0.0=pyha770c72_0
+  - importlib_metadata=7.0.0=hd8ed1ab_0
+  - joblib=1.3.2=pyhd8ed1ab_0
+  - keyutils=1.6.1=h166bdaf_0
+  - krb5=1.21.2=h659d440_0
+  - lcms2=2.15=h7f713cb_2
+  - ld_impl_linux-64=2.40=h41732ed_0
+  - lerc=4.0.0=h27087fc_0
+  - libabseil=20230125.3=cxx17_h59595ed_0
+  - libarrow=12.0.1=hb87d912_8_cpu
+  - libblas=3.9.0=20_linux64_openblas
+  - libbrotlicommon=1.0.9=h166bdaf_9
+  - libbrotlidec=1.0.9=h166bdaf_9
+  - libbrotlienc=1.0.9=h166bdaf_9
+  - libcblas=3.9.0=20_linux64_openblas
+  - libcrc32c=1.1.2=h9c3ff4c_0
+  - libcublas=11.10.3.66=0
+  - libcufft=10.7.2.124=h4fbf590_0
+  - libcufile=1.8.1.2=0
+  - libcurand=10.3.4.101=0
+  - libcurl=8.5.0=hca28451_0
+  - libcusolver=11.4.0.1=0
+  - libcusparse=11.7.4.91=0
+  - libdeflate=1.19=hd590300_0
+  - libedit=3.1.20191231=he28a2e2_2
+  - libev=4.33=hd590300_2
+  - libevent=2.1.12=hf998b51_1
+  - libffi=3.4.2=h7f98852_5
+  - libgcc-ng=13.2.0=h807b86a_3
+  - libgfortran-ng=13.2.0=h69a702a_3
+  - libgfortran5=13.2.0=ha4646dd_3
+  - libgoogle-cloud=2.12.0=hac9eb74_1
+  - libgrpc=1.54.3=hb20ce57_0
+  - libhwloc=2.9.3=default_h554bfaf_1009
+  - libiconv=1.17=hd590300_1
+  - libjpeg-turbo=2.1.5.1=hd590300_1
+  - liblapack=3.9.0=20_linux64_openblas
+  - liblapacke=3.9.0=20_linux64_openblas
+  - libnghttp2=1.58.0=h47da74e_1
+  - libnpp=11.7.4.75=0
+  - libnsl=2.0.1=hd590300_0
+  - libnuma=2.0.16=h0b41bf4_1
+  - libnvjpeg=11.8.0.2=0
+  - libopenblas=0.3.25=pthreads_h413a1c8_0
+  - libpng=1.6.39=h753d276_0
+  - libprotobuf=3.21.12=hfc55251_2
+  - libsentencepiece=0.1.99=h180e1df_0
+  - libsqlite=3.44.2=h2797004_0
+  - libssh2=1.11.0=h0841786_0
+  - libstdcxx-ng=13.2.0=h7e041cc_3
+  - libthrift=0.18.1=h8fd135c_2
+  - libtiff=4.6.0=h29866fb_1
+  - libutf8proc=2.8.0=h166bdaf_0
+  - libuuid=2.38.1=h0b41bf4_0
+  - libwebp-base=1.3.2=hd590300_0
+  - libxcb=1.15=h0b41bf4_0
+  - libxml2=2.11.6=h232c23b_0
+  - libzlib=1.2.13=hd590300_5
+  - llvm-openmp=17.0.6=h4dfa4b3_0
+  - lz4-c=1.9.4=hcb278e6_0
+  - mkl=2022.2.1=h84fe81f_16997
+  - mkl-devel=2022.2.1=ha770c72_16998
+  - mkl-include=2022.2.1=h84fe81f_16997
+  - mpc=1.3.1=hfe3b2da_0
+  - mpfr=4.2.1=h9458935_0
+  - mpmath=1.3.0=pyhd8ed1ab_0
+  - multidict=6.0.4=py310h2372a71_1
+  - multiprocess=0.70.15=py310h2372a71_1
+  - ncurses=6.4=h59595ed_2
+  - numpy=1.26.2=py310hb13e2d6_0
+  - onnx=1.14.0=py310ha3deec4_1
+  - onnx2torch=1.5.13=pyhd8ed1ab_0
+  - onnxruntime=1.16.3=py310hd4b7fbc_1_cpu
+  - open-clip-torch=2.23.0=pyhd8ed1ab_1
+  - openblas=0.3.25=pthreads_h7a3da1a_0
+  - openjpeg=2.5.0=h488ebb8_3
+  - openssl=3.2.0=hd590300_1
+  - orc=1.9.0=h2f23424_1
+  - packaging=23.2=pyhd8ed1ab_0
+  - pandas=2.1.4=py310hcc13569_0
+  - pillow=10.0.1=py310h29da1c1_1
+  - pip=23.3.1=pyhd8ed1ab_0
+  - protobuf=4.21.12=py310heca2aa9_0
+  - pthread-stubs=0.4=h36c2ea0_1001
+  - pyarrow=12.0.1=py310h0576679_8_cpu
+  - pyarrow-hotfix=0.6=pyhd8ed1ab_0
+  - pysocks=1.7.1=pyha2e5f31_6
+  - python=3.10.13=hd12c33a_0_cpython
+  - python-dateutil=2.8.2=pyhd8ed1ab_0
+  - python-flatbuffers=23.5.26=pyhd8ed1ab_0
+  - python-tzdata=2023.3=pyhd8ed1ab_0
+  - python-xxhash=3.4.1=py310h2372a71_0
+  - python_abi=3.10=4_cp310
+  - pytorch=1.13.1=cpu_py310hd11e9c7_1
+  - pytorch-cuda=11.7=h778d358_5
+  - pytorch-mutex=1.0=cuda
+  - pytz=2023.3.post1=pyhd8ed1ab_0
+  - pyyaml=6.0.1=py310h2372a71_1
+  - rdma-core=28.9=h59595ed_1
+  - re2=2023.03.02=h8c504da_0
+  - readline=8.2=h8228510_1
+  - regex=2023.10.3=py310h2372a71_0
+  - requests=2.31.0=pyhd8ed1ab_0
+  - s2n=1.3.49=h06160fa_0
+  - sacremoses=0.0.53=pyhd8ed1ab_0
+  - safetensors=0.3.3=py310hcb5633a_1
+  - sentencepiece=0.1.99=hff52083_0
+  - sentencepiece-python=0.1.99=py310hebdb9f0_0
+  - sentencepiece-spm=0.1.99=h180e1df_0
+  - setuptools=68.2.2=pyhd8ed1ab_0
+  - six=1.16.0=pyh6c4a22f_0
+  - sleef=3.5.1=h9b69904_2
+  - snappy=1.1.10=h9fff704_0
+  - sympy=1.12=pypyh9d50eac_103
+  - tbb=2021.11.0=h00ab1b0_0
+  - texttable=1.7.0=pyhd8ed1ab_0
+  - timm=0.9.12=pyhd8ed1ab_0
+  - tk=8.6.13=noxft_h4845f30_101
+  - tokenizers=0.14.1=py310h320607d_2
+  - torchvision=0.14.1=cpu_py310hd3d2ac3_1
+  - tqdm=4.66.1=pyhd8ed1ab_0
+  - transformers=4.35.2=pyhd8ed1ab_0
+  - typing-extensions=4.9.0=hd8ed1ab_0
+  - typing_extensions=4.9.0=pyha770c72_0
+  - tzdata=2023c=h71feb2d_0
+  - ucx=1.14.1=h64cca9d_5
+  - urllib3=2.1.0=pyhd8ed1ab_0
+  - wcwidth=0.2.12=pyhd8ed1ab_0
+  - wheel=0.42.0=pyhd8ed1ab_0
+  - xorg-libxau=1.0.11=hd590300_0
+  - xorg-libxdmcp=1.1.3=h7f98852_0
+  - xxhash=0.8.2=hd590300_0
+  - xz=5.2.6=h166bdaf_0
+  - yaml=0.2.5=h7f98852_2
+  - yarl=1.9.3=py310h2372a71_0
+  - zipp=3.17.0=pyhd8ed1ab_0
+  - zlib=1.2.13=hd590300_5
+  - zstd=1.5.5=hfc55251_0
+  - pip:
+      - git+https://github.com/fyfrey/TinyNeuralNetwork.git
--- a/machine-learning/ann/export/run.py
+++ b/machine-learning/ann/export/run.py
@ -0,0 +1,157 @@
+import logging
+import os
+import platform
+import subprocess
+from abc import abstractmethod
+
+import onnx
+import open_clip
+import torch
+from onnx2torch import convert
+from onnxruntime.tools.onnx_model_utils import fix_output_shapes, make_input_shape_fixed
+from tinynn.converter import TFLiteConverter
+
+
+class ExportBase(torch.nn.Module):
+    input_shape: tuple[int, ...]
+
+    def __init__(self, device: torch.device, name: str):
+        super().__init__()
+        self.device = device
+        self.name = name
+        self.optimize = 5
+        self.nchw_transpose = False
+
+    @abstractmethod
+    def forward(self, input_tensor: torch.Tensor) -> torch.Tensor | tuple[torch.Tensor]:
+        pass
+
+    def dummy_input(self) -> torch.FloatTensor:
+        return torch.rand((1, 3, 224, 224), device=self.device)
+
+
+class ArcFace(ExportBase):
+    input_shape = (1, 3, 112, 112)
+
+    def __init__(self, onnx_model_path: str, device: torch.device):
+        name, _ = os.path.splitext(os.path.basename(onnx_model_path))
+        super().__init__(device, name)
+        onnx_model = onnx.load_model(onnx_model_path)
+        make_input_shape_fixed(onnx_model.graph, onnx_model.graph.input[0].name, self.input_shape)
+        fix_output_shapes(onnx_model)
+        self.model = convert(onnx_model).to(device)
+        if self.device.type == "cuda":
+            self.model = self.model.half()
+
+    def forward(self, input_tensor: torch.Tensor) -> torch.FloatTensor:
+        embedding: torch.FloatTensor = self.model(
+            input_tensor.half() if self.device.type == "cuda" else input_tensor
+        ).float()
+        assert isinstance(embedding, torch.FloatTensor)
+        return embedding
+
+    def dummy_input(self) -> torch.FloatTensor:
+        return torch.rand(self.input_shape, device=self.device)
+
+
+class RetinaFace(ExportBase):
+    input_shape = (1, 3, 640, 640)
+
+    def __init__(self, onnx_model_path: str, device: torch.device):
+        name, _ = os.path.splitext(os.path.basename(onnx_model_path))
+        super().__init__(device, name)
+        self.optimize = 3
+        self.model = convert(onnx_model_path).eval().to(device)
+        if self.device.type == "cuda":
+            self.model = self.model.half()
+
+    def forward(self, input_tensor: torch.Tensor) -> tuple[torch.FloatTensor]:
+        out: torch.Tensor = self.model(input_tensor.half() if self.device.type == "cuda" else input_tensor)
+        return tuple(o.float() for o in out)
+
+    def dummy_input(self) -> torch.FloatTensor:
+        return torch.rand(self.input_shape, device=self.device)
+
+
+class ClipVision(ExportBase):
+    input_shape = (1, 3, 224, 224)
+
+    def __init__(self, model_name: str, weights: str, device: torch.device):
+        super().__init__(device, model_name + "__" + weights)
+        self.model = open_clip.create_model(
+            model_name,
+            weights,
+            precision="fp16" if device.type == "cuda" else "fp32",
+            jit=False,
+            require_pretrained=True,
+            device=device,
+        )
+
+    def forward(self, input_tensor: torch.Tensor) -> torch.FloatTensor:
+        embedding: torch.Tensor = self.model.encode_image(
+            input_tensor.half() if self.device.type == "cuda" else input_tensor,
+            normalize=True,
+        ).float()
+        return embedding
+
+
+def export(model: ExportBase) -> None:
+    model.eval()
+    for param in model.parameters():
+        param.requires_grad = False
+    dummy_input = model.dummy_input()
+    model(dummy_input)
+    jit = torch.jit.trace(model, dummy_input)  # type: ignore[no-untyped-call,attr-defined]
+    tflite_model_path = f"output/{model.name}.tflite"
+    os.makedirs("output", exist_ok=True)
+
+    converter = TFLiteConverter(
+        jit,
+        dummy_input,
+        tflite_model_path,
+        optimize=model.optimize,
+        nchw_transpose=model.nchw_transpose,
+    )
+    # segfaults on ARM, must run on x86_64 / AMD64
+    converter.convert()
+
+    armnn_model_path = f"output/{model.name}.armnn"
+    os.environ["LD_LIBRARY_PATH"] = "armnn"
+    subprocess.run(
+        [
+            "./armnnconverter",
+            "-f",
+            "tflite-binary",
+            "-m",
+            tflite_model_path,
+            "-i",
+            "input_tensor",
+            "-o",
+            "output_tensor",
+            "-p",
+            armnn_model_path,
+        ]
+    )
+
+
+def main() -> None:
+    if platform.machine() not in ("x86_64", "AMD64"):
+        raise RuntimeError(f"Can only run on x86_64 / AMD64, not {platform.machine()}")
+
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    if device.type != "cuda":
+        logging.warning(
+            "No CUDA available, cannot create fp16 model! proceeding to create a fp32 model (use only for testing)"
+        )
+    models = [
+        ClipVision("ViT-B-32", "openai", device),
+        ArcFace("buffalo_l_rec.onnx", device),
+        RetinaFace("buffalo_l_det.onnx", device),
+    ]
+    for model in models:
+        export(model)
+
+
+if __name__ == "__main__":
+    with torch.no_grad():
+        main()
--- a/machine-learning/conftest.py
+++ b/machine-learning/conftest.py
@ -0,0 +1,185 @@
+import json
+from typing import Any, Iterator
+from unittest import mock
+
+import numpy as np
+import pytest
+from fastapi.testclient import TestClient
+from numpy.typing import NDArray
+from PIL import Image
+
+from immich_ml.config import log
+from immich_ml.main import app
+
+
+@pytest.fixture
+def pil_image() -> Image.Image:
+    return Image.new("RGB", (600, 800))
+
+
+@pytest.fixture
+def cv_image(pil_image: Image.Image) -> NDArray[np.float32]:
+    return np.asarray(pil_image)[:, :, ::-1]  # PIL uses RGB while cv2 uses BGR
+
+
+@pytest.fixture
+def mock_get_model() -> Iterator[mock.Mock]:
+    with mock.patch("immich_ml.models.cache.from_model_type", autospec=True) as mocked:
+        yield mocked
+
+
+@pytest.fixture(scope="session")
+def deployed_app() -> Iterator[TestClient]:
+    with TestClient(app) as client:
+        yield client
+
+
+@pytest.fixture(scope="session")
+def responses() -> dict[str, Any]:
+    responses: dict[str, Any] = json.load(open("responses.json", "r"))
+    return responses
+
+
+@pytest.fixture(scope="session")
+def clip_model_cfg() -> dict[str, Any]:
+    return {
+        "embed_dim": 512,
+        "vision_cfg": {"image_size": 224, "layers": 12, "width": 768, "patch_size": 32},
+        "text_cfg": {"context_length": 77, "vocab_size": 49408, "width": 512, "heads": 8, "layers": 12},
+    }
+
+
+@pytest.fixture(scope="session")
+def clip_preprocess_cfg() -> dict[str, Any]:
+    return {
+        "size": [224, 224],
+        "mode": "RGB",
+        "mean": [0.48145466, 0.4578275, 0.40821073],
+        "std": [0.26862954, 0.26130258, 0.27577711],
+        "interpolation": "bicubic",
+        "resize_mode": "shortest",
+        "fill_color": 0,
+    }
+
+
+@pytest.fixture(scope="session")
+def clip_tokenizer_cfg() -> dict[str, Any]:
+    return {
+        "add_prefix_space": False,
+        "added_tokens_decoder": {
+            "49406": {
+                "content": "<|startoftext|>",
+                "lstrip": False,
+                "normalized": True,
+                "rstrip": False,
+                "single_word": False,
+                "special": True,
+            },
+            "49407": {
+                "content": "<|endoftext|>",
+                "lstrip": False,
+                "normalized": True,
+                "rstrip": False,
+                "single_word": False,
+                "special": True,
+            },
+        },
+        "bos_token": "<|startoftext|>",
+        "clean_up_tokenization_spaces": True,
+        "do_lower_case": True,
+        "eos_token": "<|endoftext|>",
+        "errors": "replace",
+        "model_max_length": 77,
+        "pad_token": "<|endoftext|>",
+        "tokenizer_class": "CLIPTokenizer",
+        "unk_token": "<|endoftext|>",
+    }
+
+
+@pytest.fixture(scope="function")
+def providers(request: pytest.FixtureRequest) -> Iterator[mock.Mock]:
+    marker = request.node.get_closest_marker("providers")
+    if marker is None:
+        raise ValueError("Missing marker 'providers'")
+
+    providers = marker.args[0]
+    with mock.patch("immich_ml.sessions.ort.ort.get_available_providers") as mocked:
+        mocked.return_value = providers
+        yield providers
+
+
+@pytest.fixture(scope="function")
+def ort_pybind() -> Iterator[mock.Mock]:
+    with mock.patch("immich_ml.sessions.ort.ort.capi._pybind_state") as mocked:
+        yield mocked
+
+
+@pytest.fixture(scope="function")
+def ov_device_ids(request: pytest.FixtureRequest, ort_pybind: mock.Mock) -> Iterator[mock.Mock]:
+    marker = request.node.get_closest_marker("ov_device_ids")
+    if marker is None:
+        raise ValueError("Missing marker 'ov_device_ids'")
+    ort_pybind.get_available_openvino_device_ids.return_value = marker.args[0]
+    return ort_pybind
+
+
+@pytest.fixture(scope="function")
+def ort_session() -> Iterator[mock.Mock]:
+    with mock.patch("immich_ml.sessions.ort.ort.InferenceSession") as mocked:
+        yield mocked
+
+
+@pytest.fixture(scope="function")
+def ann_session() -> Iterator[mock.Mock]:
+    with mock.patch("immich_ml.sessions.ann.Ann") as mocked:
+        yield mocked
+
+
+@pytest.fixture(scope="function")
+def rknn_session() -> Iterator[mock.Mock]:
+    with mock.patch("immich_ml.sessions.rknn.RknnPoolExecutor") as mocked:
+        yield mocked
+
+
+@pytest.fixture(scope="function")
+def rmtree() -> Iterator[mock.Mock]:
+    with mock.patch("immich_ml.models.base.rmtree", autospec=True) as mocked:
+        mocked.avoids_symlink_attacks = True
+        yield mocked
+
+
+@pytest.fixture(scope="function")
+def path() -> Iterator[mock.Mock]:
+    path = mock.MagicMock()
+    path.exists.return_value = True
+    path.is_dir.return_value = True
+    path.is_file.return_value = True
+    path.with_suffix.return_value = path
+    path.return_value = path
+
+    with mock.patch("immich_ml.models.base.Path", return_value=path) as mocked:
+        yield mocked
+
+
+@pytest.fixture(scope="function")
+def info() -> Iterator[mock.Mock]:
+    with mock.patch.object(log, "info") as mocked:
+        yield mocked
+
+
+@pytest.fixture(scope="function")
+def warning() -> Iterator[mock.Mock]:
+    with mock.patch.object(log, "warning") as mocked:
+        yield mocked
+
+
+@pytest.fixture(scope="function")
+def exception() -> Iterator[mock.Mock]:
+    with mock.patch.object(log, "exception") as mocked:
+        yield mocked
+
+
+@pytest.fixture(scope="function")
+def snapshot_download() -> Iterator[mock.Mock]:
+    with mock.patch("immich_ml.models.base.snapshot_download") as mocked:
+        yield mocked
--- a/machine-learning/immich_ml/init.py
+++ b/machine-learning/immich_ml/init.py
--- a/machine-learning/immich_ml/main.py
+++ b/machine-learning/immich_ml/main.py
@ -0,0 +1,57 @@
+import os
+import signal
+import subprocess
+from ipaddress import ip_address
+from pathlib import Path
+
+from .config import log, non_prefixed_settings, settings
+
+if source_ref := os.getenv("IMMICH_SOURCE_REF"):
+    log.info(f"Initializing Immich ML [{source_ref}]")
+else:
+    log.info("Initializing Immich ML")
+
+module_dir = Path(__file__).parent
+
+
+def is_ipv6(host: str) -> bool:
+    try:
+        return ip_address(host).version == 6
+    except ValueError:
+        return False
+
+
+bind_host = non_prefixed_settings.immich_host
+if is_ipv6(bind_host):
+    bind_host = f"[{bind_host}]"
+bind_address = f"{bind_host}:{non_prefixed_settings.immich_port}"
+
+try:
+    with subprocess.Popen(
+        [
+            "python",
+            "-m",
+            "gunicorn",
+            "immich_ml.main:app",
+            "-k",
+            "immich_ml.config.CustomUvicornWorker",
+            "-c",
+            module_dir / "gunicorn_conf.py",
+            "-b",
+            bind_address,
+            "-w",
+            str(settings.workers),
+            "-t",
+            str(settings.worker_timeout),
+            "--log-config-json",
+            module_dir / "log_conf.json",
+            "--keep-alive",
+            str(settings.http_keepalive_timeout_s),
+            "--graceful-timeout",
+            "10",
+        ],
+    ) as cmd:
+        cmd.wait()
+except KeyboardInterrupt:
+    cmd.send_signal(signal.SIGINT)
+exit(cmd.returncode)
--- a/machine-learning/immich_ml/config.py
+++ b/machine-learning/immich_ml/config.py
@ -0,0 +1,165 @@
+import concurrent.futures
+import logging
+import os
+import sys
+from pathlib import Path
+from socket import socket
+
+from gunicorn.arbiter import Arbiter
+from pydantic import BaseModel
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from rich.console import Console
+from rich.logging import RichHandler
+from uvicorn import Server
+from uvicorn.workers import UvicornWorker
+
+from .schemas import ModelPrecision
+
+
+class ClipSettings(BaseModel):
+    textual: str | None = None
+    visual: str | None = None
+
+
+class FacialRecognitionSettings(BaseModel):
+    recognition: str | None = None
+    detection: str | None = None
+
+
+class OcrSettings(BaseModel):
+    recognition: str | None = None
+    detection: str | None = None
+
+
+class PreloadModelData(BaseModel):
+    clip_fallback: str | None = os.getenv("MACHINE_LEARNING_PRELOAD__CLIP", None)
+    facial_recognition_fallback: str | None = os.getenv("MACHINE_LEARNING_PRELOAD__FACIAL_RECOGNITION", None)
+    if clip_fallback is not None:
+        os.environ["MACHINE_LEARNING_PRELOAD__CLIP__TEXTUAL"] = clip_fallback
+        os.environ["MACHINE_LEARNING_PRELOAD__CLIP__VISUAL"] = clip_fallback
+        del os.environ["MACHINE_LEARNING_PRELOAD__CLIP"]
+    if facial_recognition_fallback is not None:
+        os.environ["MACHINE_LEARNING_PRELOAD__FACIAL_RECOGNITION__RECOGNITION"] = facial_recognition_fallback
+        os.environ["MACHINE_LEARNING_PRELOAD__FACIAL_RECOGNITION__DETECTION"] = facial_recognition_fallback
+        del os.environ["MACHINE_LEARNING_PRELOAD__FACIAL_RECOGNITION"]
+    clip: ClipSettings = ClipSettings()
+    facial_recognition: FacialRecognitionSettings = FacialRecognitionSettings()
+    ocr: OcrSettings = OcrSettings()
+
+
+class MaxBatchSize(BaseModel):
+    facial_recognition: int | None = None
+    text_recognition: int | None = None
+
+
+class Settings(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_prefix="MACHINE_LEARNING_",
+        case_sensitive=False,
+        env_nested_delimiter="__",
+        protected_namespaces=("settings_",),
+    )
+
+    cache_folder: Path = (Path.home() / ".cache" / "immich_ml").resolve()
+    model_ttl: int = 300
+    model_ttl_poll_s: int = 10
+    workers: int = 1
+    worker_timeout: int = 300
+    http_keepalive_timeout_s: int = 2
+    test_full: bool = False
+    request_threads: int = os.cpu_count() or 4
+    model_inter_op_threads: int = 0
+    model_intra_op_threads: int = 0
+    model_arena: bool = True
+    ann: bool = True
+    ann_fp16_turbo: bool = False
+    ann_tuning_level: int = 2
+    rknn: bool = True
+    rknn_threads: int = 1
+    preload: PreloadModelData | None = None
+    max_batch_size: MaxBatchSize | None = None
+    openvino_precision: ModelPrecision = ModelPrecision.FP32
+
+    @property
+    def device_id(self) -> str:
+        return os.environ.get("MACHINE_LEARNING_DEVICE_ID", "0")
+
+
+class NonPrefixedSettings(BaseSettings):
+    model_config = SettingsConfigDict(case_sensitive=False)
+
+    immich_host: str = "[::]"
+    immich_port: int = 3003
+    immich_log_level: str = "info"
+    no_color: bool = False
+
+
+_clean_name = str.maketrans(":\\/", "___", ".")
+
+
+def clean_name(model_name: str) -> str:
+    return model_name.split("/")[-1].translate(_clean_name)
+
+
+LOG_LEVELS: dict[str, int] = {
+    "critical": logging.ERROR,
+    "error": logging.ERROR,
+    "warning": logging.WARNING,
+    "warn": logging.WARNING,
+    "info": logging.INFO,
+    "log": logging.INFO,
+    "debug": logging.DEBUG,
+    "verbose": logging.DEBUG,
+}
+
+settings = Settings()
+non_prefixed_settings = NonPrefixedSettings()
+
+LOG_LEVEL = LOG_LEVELS.get(non_prefixed_settings.immich_log_level.lower(), logging.INFO)
+
+
+class CustomRichHandler(RichHandler):
+    def __init__(self) -> None:
+        console = Console(color_system="standard", no_color=non_prefixed_settings.no_color)
+        self.excluded = ["uvicorn", "starlette", "fastapi"]
+        super().__init__(
+            show_path=False,
+            omit_repeated_times=False,
+            console=console,
+            rich_tracebacks=True,
+            tracebacks_suppress=[*self.excluded, concurrent.futures],
+            tracebacks_show_locals=LOG_LEVEL == logging.DEBUG,
+        )
+
+    # hack to exclude certain modules from rich tracebacks
+    def emit(self, record: logging.LogRecord) -> None:
+        if record.exc_info is not None:
+            tb = record.exc_info[2]
+            while tb is not None:
+                if any(excluded in tb.tb_frame.f_code.co_filename for excluded in self.excluded):
+                    tb.tb_frame.f_locals["_rich_traceback_omit"] = True
+                tb = tb.tb_next
+
+        return super().emit(record)
+
+
+log = logging.getLogger("ml.log")
+log.setLevel(LOG_LEVEL)
+
+
+# patches this issue https://github.com/encode/uvicorn/discussions/1803
+class CustomUvicornServer(Server):
+    async def shutdown(self, sockets: list[socket] | None = None) -> None:
+        for sock in sockets or []:
+            sock.close()
+        await super().shutdown()
+
+
+class CustomUvicornWorker(UvicornWorker):
+    async def _serve(self) -> None:
+        self.config.app = self.wsgi
+        server = CustomUvicornServer(config=self.config)
+        self._install_sigquit_handler()
+        await server.serve(sockets=self.sockets)
+        if not server.started:
+            sys.exit(Arbiter.WORKER_BOOT_ERROR)
--- a/machine-learning/immich_ml/gunicorn_conf.py
+++ b/machine-learning/immich_ml/gunicorn_conf.py
@ -0,0 +1,12 @@
+import os
+
+from gunicorn.arbiter import Arbiter
+from gunicorn.workers.base import Worker
+
+device_ids = os.environ.get("MACHINE_LEARNING_DEVICE_IDS", "0").replace(" ", "").split(",")
+env = os.environ
+
+
+# Round-robin device assignment for each worker
+def pre_fork(arbiter: Arbiter, _: Worker) -> None:
+    env["MACHINE_LEARNING_DEVICE_ID"] = device_ids[len(arbiter.WORKERS) % len(device_ids)]
--- a/machine-learning/immich_ml/log_conf.json
+++ b/machine-learning/immich_ml/log_conf.json
@ -0,0 +1,21 @@
+{
+  "version": 1,
+  "disable_existing_loggers": false,
+  "handlers": {
+    "console": {
+      "class": "immich_ml.config.CustomRichHandler"
+    }
+  },
+  "loggers": {
+    "gunicorn.error": {
+      "handlers": [
+        "console"
+      ]
+    }
+  },
+  "root": {
+    "handlers": [
+      "console"
+    ]
+  }
+}
--- a/machine-learning/immich_ml/main.py
+++ b/machine-learning/immich_ml/main.py
@ -0,0 +1,272 @@
+import asyncio
+import gc
+import os
+import signal
+import threading
+import time
+from concurrent.futures import ThreadPoolExecutor
+from contextlib import asynccontextmanager
+from functools import partial
+from typing import Any, AsyncGenerator, Callable, Iterator
+from zipfile import BadZipFile
+
+import orjson
+from fastapi import Depends, FastAPI, File, Form, HTTPException
+from fastapi.responses import ORJSONResponse, PlainTextResponse
+from onnxruntime.capi.onnxruntime_pybind11_state import InvalidProtobuf, NoSuchFile
+from PIL.Image import Image
+from pydantic import ValidationError
+from starlette.formparsers import MultiPartParser
+
+from immich_ml.models import get_model_deps
+from immich_ml.models.base import InferenceModel
+from immich_ml.models.transforms import decode_pil
+
+from .config import PreloadModelData, log, settings
+from .models.cache import ModelCache
+from .schemas import (
+    InferenceEntries,
+    InferenceEntry,
+    InferenceResponse,
+    ModelFormat,
+    ModelIdentity,
+    ModelTask,
+    ModelType,
+    PipelineRequest,
+    T,
+)
+
+MultiPartParser.spool_max_size = 2**26  # spools to disk if payload is 64 MiB or larger
+
+model_cache = ModelCache(revalidate=settings.model_ttl > 0)
+thread_pool: ThreadPoolExecutor | None = None
+lock = threading.Lock()
+active_requests = 0
+last_called: float | None = None
+
+
+@asynccontextmanager
+async def lifespan(_: FastAPI) -> AsyncGenerator[None, None]:
+    global thread_pool
+    log.info(
+        (
+            "Created in-memory cache with unloading "
+            f"{f'after {settings.model_ttl}s of inactivity' if settings.model_ttl > 0 else 'disabled'}."
+        )
+    )
+
+    try:
+        if settings.request_threads > 0:
+            # asyncio is a huge bottleneck for performance, so we use a thread pool to run blocking code
+            thread_pool = ThreadPoolExecutor(settings.request_threads) if settings.request_threads > 0 else None
+            log.info(f"Initialized request thread pool with {settings.request_threads} threads.")
+        if settings.model_ttl > 0 and settings.model_ttl_poll_s > 0:
+            asyncio.ensure_future(idle_shutdown_task())
+        if settings.preload is not None:
+            await preload_models(settings.preload)
+        yield
+    finally:
+        log.handlers.clear()
+        for model in model_cache.cache._cache.values():
+            del model
+        if thread_pool is not None:
+            thread_pool.shutdown()
+        gc.collect()
+
+
+async def preload_models(preload: PreloadModelData) -> None:
+    log.info(f"Preloading models: clip:{preload.clip} facial_recognition:{preload.facial_recognition}")
+
+    async def load_models(model_string: str, model_type: ModelType, model_task: ModelTask) -> None:
+        for model_name in model_string.split(","):
+            model_name = model_name.strip()
+            model = await model_cache.get(model_name, model_type, model_task)
+            await load(model)
+
+    if preload.clip.textual is not None:
+        await load_models(preload.clip.textual, ModelType.TEXTUAL, ModelTask.SEARCH)
+
+    if preload.clip.visual is not None:
+        await load_models(preload.clip.visual, ModelType.VISUAL, ModelTask.SEARCH)
+
+    if preload.facial_recognition.detection is not None:
+        await load_models(
+            preload.facial_recognition.detection,
+            ModelType.DETECTION,
+            ModelTask.FACIAL_RECOGNITION,
+        )
+
+    if preload.facial_recognition.recognition is not None:
+        await load_models(
+            preload.facial_recognition.recognition,
+            ModelType.RECOGNITION,
+            ModelTask.FACIAL_RECOGNITION,
+        )
+
+    if preload.ocr.detection is not None:
+        await load_models(
+            preload.ocr.detection,
+            ModelType.DETECTION,
+            ModelTask.OCR,
+        )
+
+    if preload.ocr.recognition is not None:
+        await load_models(
+            preload.ocr.recognition,
+            ModelType.RECOGNITION,
+            ModelTask.OCR,
+        )
+
+    if preload.clip_fallback is not None:
+        log.warning(
+            "Deprecated env variable: 'MACHINE_LEARNING_PRELOAD__CLIP'. "
+            "Use 'MACHINE_LEARNING_PRELOAD__CLIP__TEXTUAL' and "
+            "'MACHINE_LEARNING_PRELOAD__CLIP__VISUAL' instead."
+        )
+
+    if preload.facial_recognition_fallback is not None:
+        log.warning(
+            "Deprecated env variable: 'MACHINE_LEARNING_PRELOAD__FACIAL_RECOGNITION'. "
+            "Use 'MACHINE_LEARNING_PRELOAD__FACIAL_RECOGNITION__DETECTION' and "
+            "'MACHINE_LEARNING_PRELOAD__FACIAL_RECOGNITION__RECOGNITION' instead."
+        )
+
+
+def update_state() -> Iterator[None]:
+    global active_requests, last_called
+    active_requests += 1
+    last_called = time.time()
+    try:
+        yield
+    finally:
+        active_requests -= 1
+
+
+def get_entries(entries: str = Form()) -> InferenceEntries:
+    try:
+        request: PipelineRequest = orjson.loads(entries)
+        without_deps: list[InferenceEntry] = []
+        with_deps: list[InferenceEntry] = []
+        for task, types in request.items():
+            for type, entry in types.items():
+                parsed: InferenceEntry = {
+                    "name": entry["modelName"],
+                    "task": task,
+                    "type": type,
+                    "options": entry.get("options", {}),
+                }
+                dep = get_model_deps(parsed["name"], type, task)
+                (with_deps if dep else without_deps).append(parsed)
+        return without_deps, with_deps
+    except (orjson.JSONDecodeError, ValidationError, KeyError, AttributeError) as e:
+        log.error(f"Invalid request format: {e}")
+        raise HTTPException(422, "Invalid request format.")
+
+
+app = FastAPI(lifespan=lifespan)
+
+
+@app.get("/")
+async def root() -> ORJSONResponse:
+    return ORJSONResponse({"message": "Immich ML"})
+
+
+@app.get("/ping")
+def ping() -> PlainTextResponse:
+    return PlainTextResponse("pong")
+
+
+@app.post("/predict", dependencies=[Depends(update_state)])
+async def predict(
+    entries: InferenceEntries = Depends(get_entries),
+    image: bytes | None = File(default=None),
+    text: str | None = Form(default=None),
+) -> Any:
+    if image is not None:
+        inputs: Image | str = await run(lambda: decode_pil(image))
+    elif text is not None:
+        inputs = text
+    else:
+        raise HTTPException(400, "Either image or text must be provided")
+    response = await run_inference(inputs, entries)
+    return ORJSONResponse(response)
+
+
+async def run_inference(payload: Image | str, entries: InferenceEntries) -> InferenceResponse:
+    outputs: dict[ModelIdentity, Any] = {}
+    response: InferenceResponse = {}
+
+    async def _run_inference(entry: InferenceEntry) -> None:
+        model = await model_cache.get(
+            entry["name"], entry["type"], entry["task"], ttl=settings.model_ttl, **entry["options"]
+        )
+        inputs = [payload]
+        for dep in model.depends:
+            try:
+                inputs.append(outputs[dep])
+            except KeyError:
+                message = f"Task {entry['task']} of type {entry['type']} depends on output of {dep}"
+                raise HTTPException(400, message)
+        model = await load(model)
+        output = await run(model.predict, *inputs, **entry["options"])
+        outputs[model.identity] = output
+        response[entry["task"]] = output
+
+    without_deps, with_deps = entries
+    await asyncio.gather(*[_run_inference(entry) for entry in without_deps])
+    if with_deps:
+        await asyncio.gather(*[_run_inference(entry) for entry in with_deps])
+    if isinstance(payload, Image):
+        response["imageHeight"], response["imageWidth"] = payload.height, payload.width
+
+    return response
+
+
+async def run(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
+    if thread_pool is None:
+        return func(*args, **kwargs)
+    partial_func = partial(func, *args, **kwargs)
+    return await asyncio.get_running_loop().run_in_executor(thread_pool, partial_func)
+
+
+async def load(model: InferenceModel) -> InferenceModel:
+    if model.loaded:
+        return model
+
+    def _load(model: InferenceModel) -> InferenceModel:
+        if model.load_attempts > 1:
+            raise HTTPException(500, f"Failed to load model '{model.model_name}'")
+        with lock:
+            try:
+                model.load()
+            except FileNotFoundError as e:
+                if model.model_format == ModelFormat.ONNX:
+                    raise e
+                log.warning(
+                    f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it.",
+                    exc_info=e,
+                )
+                model.model_format = ModelFormat.ONNX
+                model.load()
+        return model
+
+    try:
+        return await run(_load, model)
+    except (OSError, InvalidProtobuf, BadZipFile, NoSuchFile):
+        log.warning(f"Failed to load {model.model_type.replace('_', ' ')} model '{model.model_name}'. Clearing cache.")
+        model.clear_cache()
+        return await run(_load, model)
+
+
+async def idle_shutdown_task() -> None:
+    while True:
+        if (
+            last_called is not None
+            and not active_requests
+            and not lock.locked()
+            and time.time() - last_called > settings.model_ttl
+        ):
+            log.info("Shutting down due to inactivity.")
+            os.kill(os.getpid(), signal.SIGINT)
+            break
+        await asyncio.sleep(settings.model_ttl_poll_s)
--- a/machine-learning/immich_ml/models/init.py
+++ b/machine-learning/immich_ml/models/init.py
@ -0,0 +1,48 @@
+from typing import Any
+
+from immich_ml.models.base import InferenceModel
+from immich_ml.models.clip.textual import MClipTextualEncoder, OpenClipTextualEncoder
+from immich_ml.models.clip.visual import OpenClipVisualEncoder
+from immich_ml.models.ocr.detection import TextDetector
+from immich_ml.models.ocr.recognition import TextRecognizer
+from immich_ml.schemas import ModelSource, ModelTask, ModelType
+
+from .constants import get_model_source
+from .facial_recognition.detection import FaceDetector
+from .facial_recognition.recognition import FaceRecognizer
+
+
+def get_model_class(model_name: str, model_type: ModelType, model_task: ModelTask) -> type[InferenceModel]:
+    source = get_model_source(model_name)
+    match source, model_type, model_task:
+        case ModelSource.OPENCLIP | ModelSource.MCLIP, ModelType.VISUAL, ModelTask.SEARCH:
+            return OpenClipVisualEncoder
+
+        case ModelSource.OPENCLIP, ModelType.TEXTUAL, ModelTask.SEARCH:
+            return OpenClipTextualEncoder
+
+        case ModelSource.MCLIP, ModelType.TEXTUAL, ModelTask.SEARCH:
+            return MClipTextualEncoder
+
+        case ModelSource.INSIGHTFACE, ModelType.DETECTION, ModelTask.FACIAL_RECOGNITION:
+            return FaceDetector
+
+        case ModelSource.INSIGHTFACE, ModelType.RECOGNITION, ModelTask.FACIAL_RECOGNITION:
+            return FaceRecognizer
+
+        case ModelSource.PADDLE, ModelType.DETECTION, ModelTask.OCR:
+            return TextDetector
+
+        case ModelSource.PADDLE, ModelType.RECOGNITION, ModelTask.OCR:
+            return TextRecognizer
+
+        case _:
+            raise ValueError(f"Unknown model combination: {source}, {model_type}, {model_task}")
+
+
+def from_model_type(model_name: str, model_type: ModelType, model_task: ModelTask, **kwargs: Any) -> InferenceModel:
+    return get_model_class(model_name, model_type, model_task)(model_name, **kwargs)
+
+
+def get_model_deps(model_name: str, model_type: ModelType, model_task: ModelTask) -> list[tuple[ModelType, ModelTask]]:
+    return get_model_class(model_name, model_type, model_task).depends
--- a/machine-learning/immich_ml/models/base.py
+++ b/machine-learning/immich_ml/models/base.py
@ -0,0 +1,176 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from pathlib import Path
+from shutil import rmtree
+from typing import Any, ClassVar
+
+from huggingface_hub import snapshot_download
+
+import immich_ml.sessions.ann.loader
+import immich_ml.sessions.rknn as rknn
+from immich_ml.sessions.ort import OrtSession
+
+from ..config import clean_name, log, settings
+from ..schemas import ModelFormat, ModelIdentity, ModelSession, ModelTask, ModelType
+from ..sessions.ann import AnnSession
+
+
+class InferenceModel(ABC):
+    depends: ClassVar[list[ModelIdentity]]
+    identity: ClassVar[ModelIdentity]
+
+    def __init__(
+        self,
+        model_name: str,
+        cache_dir: Path | str | None = None,
+        model_format: ModelFormat | None = None,
+        session: ModelSession | None = None,
+        **model_kwargs: Any,
+    ) -> None:
+        self.loaded = session is not None
+        self.load_attempts = 0
+        self.model_name = clean_name(model_name)
+        self.cache_dir = Path(cache_dir) if cache_dir is not None else self._cache_dir_default
+        self.model_format = model_format if model_format is not None else self._model_format_default
+        if session is not None:
+            self.session = session
+
+    def download(self) -> None:
+        if not self.cached:
+            model_type = self.model_type.replace("-", " ")
+            log.info(f"Downloading {model_type} model '{self.model_name}' to {self.model_path}. This may take a while.")
+            self._download()
+
+    def load(self) -> None:
+        if self.loaded:
+            return
+        self.load_attempts += 1
+
+        self.download()
+        attempt = f"Attempt #{self.load_attempts} to load" if self.load_attempts > 1 else "Loading"
+        log.info(f"{attempt} {self.model_type.replace('-', ' ')} model '{self.model_name}' to memory")
+        self.session = self._load()
+        self.loaded = True
+
+    def predict(self, *inputs: Any, **model_kwargs: Any) -> Any:
+        self.load()
+        if model_kwargs:
+            self.configure(**model_kwargs)
+        return self._predict(*inputs)
+
+    @abstractmethod
+    def _predict(self, *inputs: Any, **model_kwargs: Any) -> Any: ...
+
+    def configure(self, **kwargs: Any) -> None:
+        pass
+
+    def _download(self) -> None:
+        ignored_patterns: dict[ModelFormat, list[str]] = {
+            ModelFormat.ONNX: ["*.armnn", "*.rknn"],
+            ModelFormat.ARMNN: ["*.rknn"],
+            ModelFormat.RKNN: ["*.armnn"],
+        }
+
+        snapshot_download(
+            f"immich-app/{clean_name(self.model_name)}",
+            cache_dir=self.cache_dir,
+            local_dir=self.cache_dir,
+            ignore_patterns=ignored_patterns.get(self.model_format, []),
+        )
+
+    def _load(self) -> ModelSession:
+        return self._make_session(self.model_path)
+
+    def clear_cache(self) -> None:
+        if not self.cache_dir.exists():
+            log.warning(
+                f"Attempted to clear cache for model '{self.model_name}', but cache directory does not exist",
+            )
+            return
+        if not rmtree.avoids_symlink_attacks:
+            raise RuntimeError("Attempted to clear cache, but rmtree is not safe on this platform")
+
+        if self.cache_dir.is_dir():
+            log.info(f"Cleared cache directory for model '{self.model_name}'.")
+            rmtree(self.cache_dir)
+        else:
+            log.warning(
+                (
+                    f"Encountered file instead of directory at cache path "
+                    f"for '{self.model_name}'. Removing file and replacing with a directory."
+                ),
+            )
+            self.cache_dir.unlink()
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+
+    def _make_session(self, model_path: Path) -> ModelSession:
+        if not model_path.is_file():
+            raise FileNotFoundError(f"Model file not found: {model_path}")
+
+        match model_path.suffix:
+            case ".armnn":
+                session: ModelSession = AnnSession(model_path)
+            case ".onnx":
+                session = OrtSession(model_path)
+            case ".rknn":
+                session = rknn.RknnSession(model_path)
+            case _:
+                raise ValueError(f"Unsupported model file type: {model_path.suffix}")
+        return session
+
+    def model_path_for_format(self, model_format: ModelFormat) -> Path:
+        model_path_prefix = rknn.model_prefix if model_format == ModelFormat.RKNN else None
+        if model_path_prefix:
+            return self.model_dir / model_path_prefix / f"model.{model_format}"
+        return self.model_dir / f"model.{model_format}"
+
+    @property
+    def model_dir(self) -> Path:
+        return self.cache_dir / self.model_type.value
+
+    @property
+    def model_path(self) -> Path:
+        return self.model_path_for_format(self.model_format)
+
+    @property
+    def model_task(self) -> ModelTask:
+        return self.identity[1]
+
+    @property
+    def model_type(self) -> ModelType:
+        return self.identity[0]
+
+    @property
+    def cache_dir(self) -> Path:
+        return self._cache_dir
+
+    @cache_dir.setter
+    def cache_dir(self, cache_dir: Path) -> None:
+        self._cache_dir = cache_dir
+
+    @property
+    def _cache_dir_default(self) -> Path:
+        return settings.cache_folder / self.model_task.value / self.model_name
+
+    @property
+    def cached(self) -> bool:
+        return self.model_path.is_file()
+
+    @property
+    def model_format(self) -> ModelFormat:
+        return self._model_format
+
+    @model_format.setter
+    def model_format(self, model_format: ModelFormat) -> None:
+        log.debug(f"Setting model format to {model_format}")
+        self._model_format = model_format
+
+    @property
+    def _model_format_default(self) -> ModelFormat:
+        if rknn.is_available:
+            return ModelFormat.RKNN
+        elif immich_ml.sessions.ann.loader.is_available and settings.ann:
+            return ModelFormat.ARMNN
+        else:
+            return ModelFormat.ONNX
--- a/machine-learning/immich_ml/models/cache.py
+++ b/machine-learning/immich_ml/models/cache.py
@ -0,0 +1,60 @@
+from typing import Any
+
+from aiocache.backends.memory import SimpleMemoryCache
+from aiocache.lock import OptimisticLock
+from aiocache.plugins import TimingPlugin
+
+from immich_ml.models import from_model_type
+from immich_ml.models.base import InferenceModel
+
+from ..schemas import ModelTask, ModelType, has_profiling
+
+
+class ModelCache:
+    """Fetches a model from an in-memory cache, instantiating it if it's missing."""
+
+    def __init__(
+        self,
+        revalidate: bool = False,
+        timeout: int | None = None,
+        profiling: bool = False,
+    ) -> None:
+        """
+        Args:
+            revalidate: Resets TTL on cache hit. Useful to keep models in memory while active. Defaults to False.
+            timeout: Maximum allowed time for model to load. Disabled if None. Defaults to None.
+            profiling: Collects metrics for cache operations, adding slight overhead. Defaults to False.
+        """
+
+        plugins = []
+
+        if profiling:
+            plugins.append(TimingPlugin())
+
+        self.should_revalidate = revalidate
+
+        self.cache = SimpleMemoryCache(timeout=timeout, plugins=plugins, namespace=None)
+
+    async def get(
+        self, model_name: str, model_type: ModelType, model_task: ModelTask, **model_kwargs: Any
+    ) -> InferenceModel:
+        key = f"{model_name}{model_type}{model_task}"
+
+        async with OptimisticLock(self.cache, key) as lock:
+            model: InferenceModel | None = await self.cache.get(key)
+            if model is None:
+                model = from_model_type(model_name, model_type, model_task, **model_kwargs)
+                await lock.cas(model, ttl=model_kwargs.get("ttl", None))
+            elif self.should_revalidate:
+                await self.revalidate(key, model_kwargs.get("ttl", None))
+        return model
+
+    async def get_profiling(self) -> dict[str, float] | None:
+        if not has_profiling(self.cache):
+            return None
+
+        return self.cache.profiling
+
+    async def revalidate(self, key: str, ttl: int | None) -> None:
+        if ttl is not None and key in self.cache._handlers:
+            await self.cache.expire(key, ttl)
--- a/machine-learning/immich_ml/models/clip/textual.py
+++ b/machine-learning/immich_ml/models/clip/textual.py
@ -0,0 +1,120 @@
+import json
+from abc import abstractmethod
+from functools import cached_property
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from numpy.typing import NDArray
+from tokenizers import Encoding, Tokenizer
+
+from immich_ml.config import log
+from immich_ml.models.base import InferenceModel
+from immich_ml.models.constants import WEBLATE_TO_FLORES200
+from immich_ml.models.transforms import clean_text, serialize_np_array
+from immich_ml.schemas import ModelSession, ModelTask, ModelType
+
+
+class BaseCLIPTextualEncoder(InferenceModel):
+    depends = []
+    identity = (ModelType.TEXTUAL, ModelTask.SEARCH)
+
+    def _predict(self, inputs: str, language: str | None = None) -> str:
+        tokens = self.tokenize(inputs, language=language)
+        res: NDArray[np.float32] = self.session.run(None, tokens)[0][0]
+        return serialize_np_array(res)
+
+    def _load(self) -> ModelSession:
+        session = super()._load()
+        log.debug(f"Loading tokenizer for CLIP model '{self.model_name}'")
+        self.tokenizer = self._load_tokenizer()
+        tokenizer_kwargs: dict[str, Any] | None = self.text_cfg.get("tokenizer_kwargs")
+        self.canonicalize = tokenizer_kwargs is not None and tokenizer_kwargs.get("clean") == "canonicalize"
+        self.is_nllb = self.model_name.startswith("nllb")
+        log.debug(f"Loaded tokenizer for CLIP model '{self.model_name}'")
+
+        return session
+
+    @abstractmethod
+    def _load_tokenizer(self) -> Tokenizer:
+        pass
+
+    @abstractmethod
+    def tokenize(self, text: str, language: str | None = None) -> dict[str, NDArray[np.int32]]:
+        pass
+
+    @property
+    def model_cfg_path(self) -> Path:
+        return self.cache_dir / "config.json"
+
+    @property
+    def tokenizer_file_path(self) -> Path:
+        return self.model_dir / "tokenizer.json"
+
+    @property
+    def tokenizer_cfg_path(self) -> Path:
+        return self.model_dir / "tokenizer_config.json"
+
+    @cached_property
+    def model_cfg(self) -> dict[str, Any]:
+        log.debug(f"Loading model config for CLIP model '{self.model_name}'")
+        model_cfg: dict[str, Any] = json.load(self.model_cfg_path.open())
+        log.debug(f"Loaded model config for CLIP model '{self.model_name}'")
+        return model_cfg
+
+    @property
+    def text_cfg(self) -> dict[str, Any]:
+        text_cfg: dict[str, Any] = self.model_cfg["text_cfg"]
+        return text_cfg
+
+    @cached_property
+    def tokenizer_file(self) -> dict[str, Any]:
+        log.debug(f"Loading tokenizer file for CLIP model '{self.model_name}'")
+        tokenizer_file: dict[str, Any] = json.load(self.tokenizer_file_path.open())
+        log.debug(f"Loaded tokenizer file for CLIP model '{self.model_name}'")
+        return tokenizer_file
+
+    @cached_property
+    def tokenizer_cfg(self) -> dict[str, Any]:
+        log.debug(f"Loading tokenizer config for CLIP model '{self.model_name}'")
+        tokenizer_cfg: dict[str, Any] = json.load(self.tokenizer_cfg_path.open())
+        log.debug(f"Loaded tokenizer config for CLIP model '{self.model_name}'")
+        return tokenizer_cfg
+
+
+class OpenClipTextualEncoder(BaseCLIPTextualEncoder):
+    def _load_tokenizer(self) -> Tokenizer:
+        context_length: int = self.text_cfg.get("context_length", 77)
+        pad_token: str = self.tokenizer_cfg["pad_token"]
+
+        tokenizer: Tokenizer = Tokenizer.from_file(self.tokenizer_file_path.as_posix())
+
+        pad_id: int = tokenizer.token_to_id(pad_token)
+        tokenizer.enable_padding(length=context_length, pad_token=pad_token, pad_id=pad_id)
+        tokenizer.enable_truncation(max_length=context_length)
+
+        return tokenizer
+
+    def tokenize(self, text: str, language: str | None = None) -> dict[str, NDArray[np.int32]]:
+        text = clean_text(text, canonicalize=self.canonicalize)
+        if self.is_nllb and language is not None:
+            flores_code = WEBLATE_TO_FLORES200.get(language)
+            if flores_code is None:
+                no_country = language.split("-")[0]
+                flores_code = WEBLATE_TO_FLORES200.get(no_country)
+                if flores_code is None:
+                    log.warning(f"Language '{language}' not found, defaulting to 'en'")
+                    flores_code = "eng_Latn"
+            text = f"{flores_code}{text}"
+        tokens: Encoding = self.tokenizer.encode(text)
+        return {"text": np.array([tokens.ids], dtype=np.int32)}
+
+
+class MClipTextualEncoder(OpenClipTextualEncoder):
+    def tokenize(self, text: str, language: str | None = None) -> dict[str, NDArray[np.int32]]:
+        text = clean_text(text, canonicalize=self.canonicalize)
+        tokens: Encoding = self.tokenizer.encode(text)
+        return {
+            "input_ids": np.array([tokens.ids], dtype=np.int32),
+            "attention_mask": np.array([tokens.attention_mask], dtype=np.int32),
+        }
--- a/machine-learning/immich_ml/models/clip/visual.py
+++ b/machine-learning/immich_ml/models/clip/visual.py
@ -0,0 +1,77 @@
+import json
+from abc import abstractmethod
+from functools import cached_property
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from numpy.typing import NDArray
+from PIL import Image
+
+from immich_ml.config import log
+from immich_ml.models.base import InferenceModel
+from immich_ml.models.transforms import (
+    crop_pil,
+    decode_pil,
+    get_pil_resampling,
+    normalize,
+    resize_pil,
+    serialize_np_array,
+    to_numpy,
+)
+from immich_ml.schemas import ModelSession, ModelTask, ModelType
+
+
+class BaseCLIPVisualEncoder(InferenceModel):
+    depends = []
+    identity = (ModelType.VISUAL, ModelTask.SEARCH)
+
+    def _predict(self, inputs: Image.Image | bytes) -> str:
+        image = decode_pil(inputs)
+        res: NDArray[np.float32] = self.session.run(None, self.transform(image))[0][0]
+        return serialize_np_array(res)
+
+    @abstractmethod
+    def transform(self, image: Image.Image) -> dict[str, NDArray[np.float32]]:
+        pass
+
+    @property
+    def model_cfg_path(self) -> Path:
+        return self.cache_dir / "config.json"
+
+    @property
+    def preprocess_cfg_path(self) -> Path:
+        return self.model_dir / "preprocess_cfg.json"
+
+    @cached_property
+    def model_cfg(self) -> dict[str, Any]:
+        log.debug(f"Loading model config for CLIP model '{self.model_name}'")
+        model_cfg: dict[str, Any] = json.load(self.model_cfg_path.open())
+        log.debug(f"Loaded model config for CLIP model '{self.model_name}'")
+        return model_cfg
+
+    @cached_property
+    def preprocess_cfg(self) -> dict[str, Any]:
+        log.debug(f"Loading visual preprocessing config for CLIP model '{self.model_name}'")
+        preprocess_cfg: dict[str, Any] = json.load(self.preprocess_cfg_path.open())
+        log.debug(f"Loaded visual preprocessing config for CLIP model '{self.model_name}'")
+        return preprocess_cfg
+
+
+class OpenClipVisualEncoder(BaseCLIPVisualEncoder):
+    def _load(self) -> ModelSession:
+        size: list[int] | int = self.preprocess_cfg["size"]
+        self.size = size[0] if isinstance(size, list) else size
+
+        self.resampling = get_pil_resampling(self.preprocess_cfg["interpolation"])
+        self.mean = np.array(self.preprocess_cfg["mean"], dtype=np.float32)
+        self.std = np.array(self.preprocess_cfg["std"], dtype=np.float32)
+
+        return super()._load()
+
+    def transform(self, image: Image.Image) -> dict[str, NDArray[np.float32]]:
+        image = resize_pil(image, self.size)
+        image = crop_pil(image, self.size)
+        image_np = to_numpy(image)
+        image_np = normalize(image_np, self.mean, self.std)
+        return {"image": np.expand_dims(image_np.transpose(2, 0, 1), 0)}
--- a/machine-learning/immich_ml/models/constants.py
+++ b/machine-learning/immich_ml/models/constants.py
@ -0,0 +1,178 @@
+from immich_ml.config import clean_name
+from immich_ml.schemas import ModelSource
+
+_OPENCLIP_MODELS = {
+    "RN101__openai",
+    "RN101__yfcc15m",
+    "RN50__cc12m",
+    "RN50__openai",
+    "RN50__yfcc15m",
+    "RN50x16__openai",
+    "RN50x4__openai",
+    "RN50x64__openai",
+    "ViT-B-16-SigLIP-256__webli",
+    "ViT-B-16-SigLIP-384__webli",
+    "ViT-B-16-SigLIP-512__webli",
+    "ViT-B-16-SigLIP-i18n-256__webli",
+    "ViT-B-16-SigLIP__webli",
+    "ViT-B-16-plus-240__laion400m_e31",
+    "ViT-B-16-plus-240__laion400m_e32",
+    "ViT-B-16__laion400m_e31",
+    "ViT-B-16__laion400m_e32",
+    "ViT-B-16__openai",
+    "ViT-B-32__laion2b-s34b-b79k",
+    "ViT-B-32__laion2b_e16",
+    "ViT-B-32__laion400m_e31",
+    "ViT-B-32__laion400m_e32",
+    "ViT-B-32__openai",
+    "ViT-H-14-378-quickgelu__dfn5b",
+    "ViT-H-14-quickgelu__dfn5b",
+    "ViT-H-14__laion2b-s32b-b79k",
+    "ViT-L-14-336__openai",
+    "ViT-L-14-quickgelu__dfn2b",
+    "ViT-L-14__laion2b-s32b-b82k",
+    "ViT-L-14__laion400m_e31",
+    "ViT-L-14__laion400m_e32",
+    "ViT-L-14__openai",
+    "ViT-L-16-SigLIP-256__webli",
+    "ViT-L-16-SigLIP-384__webli",
+    "ViT-SO400M-14-SigLIP-384__webli",
+    "ViT-g-14__laion2b-s12b-b42k",
+    "XLM-Roberta-Base-ViT-B-32__laion5b_s13b_b90k",
+    "XLM-Roberta-Large-ViT-H-14__frozen_laion5b_s13b_b90k",
+    "nllb-clip-base-siglip__mrl",
+    "nllb-clip-base-siglip__v1",
+    "nllb-clip-large-siglip__mrl",
+    "nllb-clip-large-siglip__v1",
+    "ViT-B-16-SigLIP2__webli",
+    "ViT-B-32-SigLIP2-256__webli",
+    "ViT-L-16-SigLIP2-256__webli",
+    "ViT-L-16-SigLIP2-384__webli",
+    "ViT-L-16-SigLIP2-512__webli",
+    "ViT-SO400M-14-SigLIP2-378__webli",
+    "ViT-SO400M-14-SigLIP2__webli",
+    "ViT-SO400M-16-SigLIP2-256__webli",
+    "ViT-SO400M-16-SigLIP2-384__webli",
+    "ViT-SO400M-16-SigLIP2-512__webli",
+    "ViT-gopt-16-SigLIP2-256__webli",
+    "ViT-gopt-16-SigLIP2-384__webli",
+}
+
+
+_MCLIP_MODELS = {
+    "LABSE-Vit-L-14",
+    "XLM-Roberta-Large-Vit-B-16Plus",
+    "XLM-Roberta-Large-Vit-B-32",
+    "XLM-Roberta-Large-Vit-L-14",
+}
+
+
+_INSIGHTFACE_MODELS = {
+    "antelopev2",
+    "buffalo_s",
+    "buffalo_m",
+    "buffalo_l",
+}
+
+
+_PADDLE_MODELS = {
+    "PP-OCRv5_server",
+    "PP-OCRv5_mobile",
+    "CH__PP-OCRv5_server",
+    "CH__PP-OCRv5_mobile",
+    "EL__PP-OCRv5_mobile",
+    "EN__PP-OCRv5_mobile",
+    "ESLAV__PP-OCRv5_mobile",
+    "KOREAN__PP-OCRv5_mobile",
+    "LATIN__PP-OCRv5_mobile",
+    "TH__PP-OCRv5_mobile",
+}
+
+SUPPORTED_PROVIDERS = [
+    "CUDAExecutionProvider",
+    "ROCMExecutionProvider",
+    "OpenVINOExecutionProvider",
+    "CoreMLExecutionProvider",
+    "CPUExecutionProvider",
+]
+
+RKNN_SUPPORTED_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
+RKNN_COREMASK_SUPPORTED_SOCS = ["rk3576", "rk3588"]
+
+
+WEBLATE_TO_FLORES200 = {
+    "af": "afr_Latn",
+    "ar": "arb_Arab",
+    "az": "azj_Latn",
+    "be": "bel_Cyrl",
+    "bg": "bul_Cyrl",
+    "ca": "cat_Latn",
+    "cs": "ces_Latn",
+    "da": "dan_Latn",
+    "de": "deu_Latn",
+    "el": "ell_Grek",
+    "en": "eng_Latn",
+    "es": "spa_Latn",
+    "et": "est_Latn",
+    "fa": "pes_Arab",
+    "fi": "fin_Latn",
+    "fr": "fra_Latn",
+    "he": "heb_Hebr",
+    "hi": "hin_Deva",
+    "hr": "hrv_Latn",
+    "hu": "hun_Latn",
+    "hy": "hye_Armn",
+    "id": "ind_Latn",
+    "it": "ita_Latn",
+    "ja": "jpn_Hira",
+    "kmr": "kmr_Latn",
+    "ko": "kor_Hang",
+    "lb": "ltz_Latn",
+    "lt": "lit_Latn",
+    "lv": "lav_Latn",
+    "mfa": "zsm_Latn",
+    "mk": "mkd_Cyrl",
+    "mn": "khk_Cyrl",
+    "mr": "mar_Deva",
+    "ms": "zsm_Latn",
+    "nb-NO": "nob_Latn",
+    "nn": "nno_Latn",
+    "nl": "nld_Latn",
+    "pl": "pol_Latn",
+    "pt-BR": "por_Latn",
+    "pt": "por_Latn",
+    "ro": "ron_Latn",
+    "ru": "rus_Cyrl",
+    "sk": "slk_Latn",
+    "sl": "slv_Latn",
+    "sr-Cyrl": "srp_Cyrl",
+    "sv": "swe_Latn",
+    "ta": "tam_Taml",
+    "te": "tel_Telu",
+    "th": "tha_Thai",
+    "tr": "tur_Latn",
+    "uk": "ukr_Cyrl",
+    "ur": "urd_Arab",
+    "vi": "vie_Latn",
+    "zh-CN": "zho_Hans",
+    "zh-Hans": "zho_Hans",
+    "zh-TW": "zho_Hant",
+}
+
+
+def get_model_source(model_name: str) -> ModelSource | None:
+    cleaned_name = clean_name(model_name)
+
+    if cleaned_name in _INSIGHTFACE_MODELS:
+        return ModelSource.INSIGHTFACE
+
+    if cleaned_name in _MCLIP_MODELS:
+        return ModelSource.MCLIP
+
+    if cleaned_name in _OPENCLIP_MODELS:
+        return ModelSource.OPENCLIP
+
+    if cleaned_name in _PADDLE_MODELS:
+        return ModelSource.PADDLE
+
+    return None
--- a/machine-learning/immich_ml/models/facial_recognition/detection.py
+++ b/machine-learning/immich_ml/models/facial_recognition/detection.py
@ -0,0 +1,41 @@
+from typing import Any
+
+import numpy as np
+from insightface.model_zoo import RetinaFace
+from numpy.typing import NDArray
+
+from immich_ml.models.base import InferenceModel
+from immich_ml.models.transforms import decode_cv2
+from immich_ml.schemas import FaceDetectionOutput, ModelSession, ModelTask, ModelType
+
+
+class FaceDetector(InferenceModel):
+    depends = []
+    identity = (ModelType.DETECTION, ModelTask.FACIAL_RECOGNITION)
+
+    def __init__(self, model_name: str, min_score: float = 0.7, **model_kwargs: Any) -> None:
+        self.min_score = model_kwargs.pop("minScore", min_score)
+        super().__init__(model_name, **model_kwargs)
+
+    def _load(self) -> ModelSession:
+        session = self._make_session(self.model_path)
+        self.model = RetinaFace(session=session)
+        self.model.prepare(ctx_id=0, det_thresh=self.min_score, input_size=(640, 640))
+
+        return session
+
+    def _predict(self, inputs: NDArray[np.uint8] | bytes) -> FaceDetectionOutput:
+        inputs = decode_cv2(inputs)
+
+        bboxes, landmarks = self._detect(inputs)
+        return {
+            "boxes": bboxes[:, :4].round(),
+            "scores": bboxes[:, 4],
+            "landmarks": landmarks,
+        }
+
+    def _detect(self, inputs: NDArray[np.uint8] | bytes) -> tuple[NDArray[np.float32], NDArray[np.float32]]:
+        return self.model.detect(inputs)  # type: ignore
+
+    def configure(self, **kwargs: Any) -> None:
+        self.model.det_thresh = kwargs.pop("minScore", self.model.det_thresh)
--- a/machine-learning/immich_ml/models/facial_recognition/recognition.py
+++ b/machine-learning/immich_ml/models/facial_recognition/recognition.py
@ -0,0 +1,92 @@
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import onnx
+import onnxruntime as ort
+from insightface.model_zoo import ArcFaceONNX
+from insightface.utils.face_align import norm_crop
+from numpy.typing import NDArray
+from onnx.tools.update_model_dims import update_inputs_outputs_dims
+from PIL import Image
+
+from immich_ml.config import log, settings
+from immich_ml.models.base import InferenceModel
+from immich_ml.models.transforms import decode_cv2, serialize_np_array
+from immich_ml.schemas import (
+    FaceDetectionOutput,
+    FacialRecognitionOutput,
+    ModelFormat,
+    ModelSession,
+    ModelTask,
+    ModelType,
+)
+
+
+class FaceRecognizer(InferenceModel):
+    depends = [(ModelType.DETECTION, ModelTask.FACIAL_RECOGNITION)]
+    identity = (ModelType.RECOGNITION, ModelTask.FACIAL_RECOGNITION)
+
+    def __init__(self, model_name: str, **model_kwargs: Any) -> None:
+        super().__init__(model_name, **model_kwargs)
+        max_batch_size = settings.max_batch_size.facial_recognition if settings.max_batch_size else None
+        self.batch_size = max_batch_size if max_batch_size else self._batch_size_default
+
+    def _load(self) -> ModelSession:
+        session = self._make_session(self.model_path)
+        if (not self.batch_size or self.batch_size > 1) and str(session.get_inputs()[0].shape[0]) != "batch":
+            self._add_batch_axis(self.model_path)
+            session = self._make_session(self.model_path)
+        self.model = ArcFaceONNX(
+            self.model_path_for_format(ModelFormat.ONNX).as_posix(),
+            session=session,
+        )
+        return session
+
+    def _predict(
+        self, inputs: NDArray[np.uint8] | bytes | Image.Image, faces: FaceDetectionOutput
+    ) -> FacialRecognitionOutput:
+        if faces["boxes"].shape[0] == 0:
+            return []
+        inputs = decode_cv2(inputs)
+        cropped_faces = self._crop(inputs, faces)
+        embeddings = self._predict_batch(cropped_faces)
+        return self.postprocess(faces, embeddings)
+
+    def _predict_batch(self, cropped_faces: list[NDArray[np.uint8]]) -> NDArray[np.float32]:
+        if not self.batch_size or len(cropped_faces) <= self.batch_size:
+            embeddings: NDArray[np.float32] = self.model.get_feat(cropped_faces)
+            return embeddings
+
+        batch_embeddings: list[NDArray[np.float32]] = []
+        for i in range(0, len(cropped_faces), self.batch_size):
+            batch_embeddings.append(self.model.get_feat(cropped_faces[i : i + self.batch_size]))
+        return np.concatenate(batch_embeddings, axis=0)
+
+    def postprocess(self, faces: FaceDetectionOutput, embeddings: NDArray[np.float32]) -> FacialRecognitionOutput:
+        return [
+            {
+                "boundingBox": {"x1": x1, "y1": y1, "x2": x2, "y2": y2},
+                "embedding": serialize_np_array(embedding),
+                "score": score,
+            }
+            for (x1, y1, x2, y2), embedding, score in zip(faces["boxes"], embeddings, faces["scores"])
+        ]
+
+    def _crop(self, image: NDArray[np.uint8], faces: FaceDetectionOutput) -> list[NDArray[np.uint8]]:
+        return [norm_crop(image, landmark) for landmark in faces["landmarks"]]
+
+    def _add_batch_axis(self, model_path: Path) -> None:
+        log.debug(f"Adding batch axis to model {model_path}")
+        proto = onnx.load(model_path)
+        static_input_dims = [shape.dim_value for shape in proto.graph.input[0].type.tensor_type.shape.dim[1:]]
+        static_output_dims = [shape.dim_value for shape in proto.graph.output[0].type.tensor_type.shape.dim[1:]]
+        input_dims = {proto.graph.input[0].name: ["batch"] + static_input_dims}
+        output_dims = {proto.graph.output[0].name: ["batch"] + static_output_dims}
+        updated_proto = update_inputs_outputs_dims(proto, input_dims, output_dims)
+        onnx.save(updated_proto, model_path)
+
+    @property
+    def _batch_size_default(self) -> int | None:
+        providers = ort.get_available_providers()
+        return None if self.model_format == ModelFormat.ONNX and "OpenVINOExecutionProvider" not in providers else 1
--- a/machine-learning/immich_ml/models/ocr/detection.py
+++ b/machine-learning/immich_ml/models/ocr/detection.py
@ -0,0 +1,125 @@
+from typing import Any
+
+import cv2
+import numpy as np
+from numpy.typing import NDArray
+from PIL import Image
+from rapidocr.ch_ppocr_det.utils import DBPostProcess
+from rapidocr.inference_engine.base import FileInfo, InferSession
+from rapidocr.utils.download_file import DownloadFile, DownloadFileInput
+from rapidocr.utils.typings import EngineType, LangDet, OCRVersion, TaskType
+from rapidocr.utils.typings import ModelType as RapidModelType
+
+from immich_ml.config import log
+from immich_ml.models.base import InferenceModel
+from immich_ml.schemas import ModelFormat, ModelSession, ModelTask, ModelType
+from immich_ml.sessions.ort import OrtSession
+
+from .schemas import TextDetectionOutput
+
+
+class TextDetector(InferenceModel):
+    depends = []
+    identity = (ModelType.DETECTION, ModelTask.OCR)
+
+    def __init__(self, model_name: str, **model_kwargs: Any) -> None:
+        super().__init__(model_name.split("__")[-1], **model_kwargs, model_format=ModelFormat.ONNX)
+        self.max_resolution = 736
+        self.mean = np.array([0.5, 0.5, 0.5], dtype=np.float32)
+        self.std_inv = np.float32(1.0) / (np.array([0.5, 0.5, 0.5], dtype=np.float32) * 255.0)
+        self._empty: TextDetectionOutput = {
+            "boxes": np.empty(0, dtype=np.float32),
+            "scores": np.empty(0, dtype=np.float32),
+        }
+        self.postprocess = DBPostProcess(
+            thresh=0.3,
+            box_thresh=model_kwargs.get("minScore", 0.5),
+            max_candidates=1000,
+            unclip_ratio=1.6,
+            use_dilation=True,
+            score_mode="fast",
+        )
+
+    def _download(self) -> None:
+        model_info = InferSession.get_model_url(
+            FileInfo(
+                engine_type=EngineType.ONNXRUNTIME,
+                ocr_version=OCRVersion.PPOCRV5,
+                task_type=TaskType.DET,
+                lang_type=LangDet.CH,
+                model_type=RapidModelType.MOBILE if "mobile" in self.model_name else RapidModelType.SERVER,
+            )
+        )
+        download_params = DownloadFileInput(
+            file_url=model_info["model_dir"],
+            sha256=model_info["SHA256"],
+            save_path=self.model_path,
+            logger=log,
+        )
+        DownloadFile.run(download_params)
+
+    def _load(self) -> ModelSession:
+        # TODO: support other runtime sessions
+        return OrtSession(self.model_path)
+
+    # partly adapted from RapidOCR
+    def _predict(self, inputs: Image.Image) -> TextDetectionOutput:
+        w, h = inputs.size
+        if w < 32 or h < 32:
+            return self._empty
+        out = self.session.run(None, {"x": self._transform(inputs)})[0]
+        boxes, scores = self.postprocess(out, (h, w))
+        if len(boxes) == 0:
+            return self._empty
+        return {
+            "boxes": self.sorted_boxes(boxes),
+            "scores": np.array(scores, dtype=np.float32),
+        }
+
+    # adapted from RapidOCR
+    def _transform(self, img: Image.Image) -> NDArray[np.float32]:
+        if img.height < img.width:
+            ratio = float(self.max_resolution) / img.height
+        else:
+            ratio = float(self.max_resolution) / img.width
+        ratio = min(ratio, 1.0)
+
+        resize_h = int(img.height * ratio)
+        resize_w = int(img.width * ratio)
+
+        resize_h = int(round(resize_h / 32) * 32)
+        resize_w = int(round(resize_w / 32) * 32)
+        resized_img = img.resize((int(resize_w), int(resize_h)), resample=Image.Resampling.LANCZOS)
+
+        img_np: NDArray[np.float32] = cv2.cvtColor(np.array(resized_img, dtype=np.float32), cv2.COLOR_RGB2BGR)  # type: ignore
+        img_np -= self.mean
+        img_np *= self.std_inv
+        img_np = np.transpose(img_np, (2, 0, 1))
+        return np.expand_dims(img_np, axis=0)
+
+    def sorted_boxes(self, dt_boxes: NDArray[np.float32]) -> NDArray[np.float32]:
+        if len(dt_boxes) == 0:
+            return dt_boxes
+
+        # Sort by y, then identify lines, then sort by (line, x)
+        y_order = np.argsort(dt_boxes[:, 0, 1], kind="stable")
+        sorted_y = dt_boxes[y_order, 0, 1]
+
+        line_ids = np.empty(len(dt_boxes), dtype=np.int32)
+        line_ids[0] = 0
+        np.cumsum(np.abs(np.diff(sorted_y)) >= 10, out=line_ids[1:])
+
+        # Create composite sort key for final ordering
+        # Shift line_ids by large factor, add x for tie-breaking
+        sort_key = line_ids[y_order] * 1e6 + dt_boxes[y_order, 0, 0]
+        final_order = np.argsort(sort_key, kind="stable")
+        sorted_boxes: NDArray[np.float32] = dt_boxes[y_order[final_order]]
+        return sorted_boxes
+
+    def configure(self, **kwargs: Any) -> None:
+        if (max_resolution := kwargs.get("maxResolution")) is not None:
+            self.max_resolution = max_resolution
+        if (min_score := kwargs.get("minScore")) is not None:
+            self.postprocess.box_thresh = min_score
+        if (score_mode := kwargs.get("scoreMode")) is not None:
+            self.postprocess.score_mode = score_mode
--- a/machine-learning/immich_ml/models/ocr/recognition.py
+++ b/machine-learning/immich_ml/models/ocr/recognition.py
@ -0,0 +1,153 @@
+from typing import Any
+
+import numpy as np
+from numpy.typing import NDArray
+from PIL import Image
+from rapidocr.ch_ppocr_rec import TextRecInput
+from rapidocr.ch_ppocr_rec import TextRecognizer as RapidTextRecognizer
+from rapidocr.inference_engine.base import FileInfo, InferSession
+from rapidocr.utils.download_file import DownloadFile, DownloadFileInput
+from rapidocr.utils.typings import EngineType, LangRec, OCRVersion, TaskType
+from rapidocr.utils.typings import ModelType as RapidModelType
+from rapidocr.utils.vis_res import VisRes
+
+from immich_ml.config import log, settings
+from immich_ml.models.base import InferenceModel
+from immich_ml.models.transforms import pil_to_cv2
+from immich_ml.schemas import ModelFormat, ModelSession, ModelTask, ModelType
+from immich_ml.sessions.ort import OrtSession
+
+from .schemas import OcrOptions, TextDetectionOutput, TextRecognitionOutput
+
+
+class TextRecognizer(InferenceModel):
+    depends = [(ModelType.DETECTION, ModelTask.OCR)]
+    identity = (ModelType.RECOGNITION, ModelTask.OCR)
+
+    def __init__(self, model_name: str, **model_kwargs: Any) -> None:
+        self.language = LangRec[model_name.split("__")[0]] if "__" in model_name else LangRec.CH
+        self.min_score = model_kwargs.get("minScore", 0.9)
+        self._empty: TextRecognitionOutput = {
+            "box": np.empty(0, dtype=np.float32),
+            "boxScore": np.empty(0, dtype=np.float32),
+            "text": [],
+            "textScore": np.empty(0, dtype=np.float32),
+        }
+        VisRes.__init__ = lambda self, **kwargs: None  # pyright: ignore[reportAttributeAccessIssue]
+        super().__init__(model_name, **model_kwargs, model_format=ModelFormat.ONNX)
+
+    def _download(self) -> None:
+        model_info = InferSession.get_model_url(
+            FileInfo(
+                engine_type=EngineType.ONNXRUNTIME,
+                ocr_version=OCRVersion.PPOCRV5,
+                task_type=TaskType.REC,
+                lang_type=self.language,
+                model_type=RapidModelType.MOBILE if "mobile" in self.model_name else RapidModelType.SERVER,
+            )
+        )
+        download_params = DownloadFileInput(
+            file_url=model_info["model_dir"],
+            sha256=model_info["SHA256"],
+            save_path=self.model_path,
+            logger=log,
+        )
+        DownloadFile.run(download_params)
+
+    def _load(self) -> ModelSession:
+        # TODO: support other runtimes
+        session = OrtSession(self.model_path)
+        self.model = RapidTextRecognizer(
+            OcrOptions(
+                session=session.session,
+                rec_batch_num=settings.max_batch_size.text_recognition if settings.max_batch_size is not None else 6,
+                rec_img_shape=(3, 48, 320),
+                lang_type=self.language,
+            )
+        )
+        return session
+
+    def _predict(self, img: Image.Image, texts: TextDetectionOutput) -> TextRecognitionOutput:
+        boxes, box_scores = texts["boxes"], texts["scores"]
+        if boxes.shape[0] == 0:
+            return self._empty
+        rec = self.model(TextRecInput(img=self.get_crop_img_list(img, boxes)))
+        if rec.txts is None:
+            return self._empty
+
+        boxes[:, :, 0] /= img.width
+        boxes[:, :, 1] /= img.height
+
+        text_scores = np.array(rec.scores)
+        valid_text_score_idx = text_scores > self.min_score
+        valid_score_idx_list = valid_text_score_idx.tolist()
+        return {
+            "box": boxes.reshape(-1, 8)[valid_text_score_idx].reshape(-1),
+            "text": [rec.txts[i] for i in range(len(rec.txts)) if valid_score_idx_list[i]],
+            "boxScore": box_scores[valid_text_score_idx],
+            "textScore": text_scores[valid_text_score_idx],
+        }
+
+    def get_crop_img_list(self, img: Image.Image, boxes: NDArray[np.float32]) -> list[NDArray[np.uint8]]:
+        img_crop_width = np.maximum(
+            np.linalg.norm(boxes[:, 1] - boxes[:, 0], axis=1), np.linalg.norm(boxes[:, 2] - boxes[:, 3], axis=1)
+        ).astype(np.int32)
+        img_crop_height = np.maximum(
+            np.linalg.norm(boxes[:, 0] - boxes[:, 3], axis=1), np.linalg.norm(boxes[:, 1] - boxes[:, 2], axis=1)
+        ).astype(np.int32)
+        pts_std = np.zeros((img_crop_width.shape[0], 4, 2), dtype=np.float32)
+        pts_std[:, 1:3, 0] = img_crop_width[:, None]
+        pts_std[:, 2:4, 1] = img_crop_height[:, None]
+
+        img_crop_sizes = np.stack([img_crop_width, img_crop_height], axis=1)
+        all_coeffs = self._get_perspective_transform(pts_std, boxes)
+        imgs: list[NDArray[np.uint8]] = []
+        for coeffs, dst_size in zip(all_coeffs, img_crop_sizes):
+            dst_img = img.transform(
+                size=tuple(dst_size),
+                method=Image.Transform.PERSPECTIVE,
+                data=tuple(coeffs),
+                resample=Image.Resampling.BICUBIC,
+            )
+
+            dst_width, dst_height = dst_img.size
+            if dst_height * 1.0 / dst_width >= 1.5:
+                dst_img = dst_img.rotate(90, expand=True)
+            imgs.append(pil_to_cv2(dst_img))
+
+        return imgs
+
+    def _get_perspective_transform(self, src: NDArray[np.float32], dst: NDArray[np.float32]) -> NDArray[np.float32]:
+        N = src.shape[0]
+        x, y = src[:, :, 0], src[:, :, 1]
+        u, v = dst[:, :, 0], dst[:, :, 1]
+        A = np.zeros((N, 8, 9), dtype=np.float32)
+
+        # Fill even rows (0, 2, 4, 6): [x, y, 1, 0, 0, 0, -u*x, -u*y, -u]
+        A[:, ::2, 0] = x
+        A[:, ::2, 1] = y
+        A[:, ::2, 2] = 1
+        A[:, ::2, 6] = -u * x
+        A[:, ::2, 7] = -u * y
+        A[:, ::2, 8] = -u
+
+        # Fill odd rows (1, 3, 5, 7): [0, 0, 0, x, y, 1, -v*x, -v*y, -v]
+        A[:, 1::2, 3] = x
+        A[:, 1::2, 4] = y
+        A[:, 1::2, 5] = 1
+        A[:, 1::2, 6] = -v * x
+        A[:, 1::2, 7] = -v * y
+        A[:, 1::2, 8] = -v
+
+        # Solve using SVD for all matrices at once
+        _, _, Vt = np.linalg.svd(A)
+        H = Vt[:, -1, :].reshape(N, 3, 3)
+        H = H / H[:, 2:3, 2:3]
+
+        # Extract the 8 coefficients for each transformation
+        return np.column_stack(
+            [H[:, 0, 0], H[:, 0, 1], H[:, 0, 2], H[:, 1, 0], H[:, 1, 1], H[:, 1, 2], H[:, 2, 0], H[:, 2, 1]]
+        )  # pyright: ignore[reportReturnType]
+
+    def configure(self, **kwargs: Any) -> None:
+        self.min_score = kwargs.get("minScore", self.min_score)
--- a/machine-learning/immich_ml/models/ocr/schemas.py
+++ b/machine-learning/immich_ml/models/ocr/schemas.py
@ -0,0 +1,27 @@
+from typing import Any, Iterable
+
+import numpy as np
+import numpy.typing as npt
+from rapidocr.utils.typings import EngineType, LangRec
+from typing_extensions import TypedDict
+
+
+class TextDetectionOutput(TypedDict):
+    boxes: npt.NDArray[np.float32]
+    scores: npt.NDArray[np.float32]
+
+
+class TextRecognitionOutput(TypedDict):
+    box: npt.NDArray[np.float32]
+    boxScore: npt.NDArray[np.float32]
+    text: Iterable[str]
+    textScore: npt.NDArray[np.float32]
+
+
+# RapidOCR expects `engine_type`, `lang_type`, and `font_path` to be attributes
+class OcrOptions(dict[str, Any]):
+    def __init__(self, lang_type: LangRec | None = None, **options: Any) -> None:
+        super().__init__(**options)
+        self.engine_type = EngineType.ONNXRUNTIME
+        self.lang_type = lang_type
+        self.font_path = None
--- a/machine-learning/immich_ml/models/transforms.py
+++ b/machine-learning/immich_ml/models/transforms.py
@ -0,0 +1,80 @@
+import string
+from io import BytesIO
+from typing import IO
+
+import cv2
+import numpy as np
+import orjson
+from numpy.typing import NDArray
+from PIL import Image
+
+_PIL_RESAMPLING_METHODS = {resampling.name.lower(): resampling for resampling in Image.Resampling}
+_PUNCTUATION_TRANS = str.maketrans("", "", string.punctuation)
+
+
+def resize_pil(img: Image.Image, size: int) -> Image.Image:
+    if img.width < img.height:
+        return img.resize((size, int((img.height / img.width) * size)), resample=Image.Resampling.BICUBIC)
+    else:
+        return img.resize((int((img.width / img.height) * size), size), resample=Image.Resampling.BICUBIC)
+
+
+# https://stackoverflow.com/a/60883103
+def crop_pil(img: Image.Image, size: int) -> Image.Image:
+    left = int((img.size[0] / 2) - (size / 2))
+    upper = int((img.size[1] / 2) - (size / 2))
+    right = left + size
+    lower = upper + size
+
+    return img.crop((left, upper, right, lower))
+
+
+def to_numpy(img: Image.Image) -> NDArray[np.float32]:
+    return np.asarray(img if img.mode == "RGB" else img.convert("RGB"), dtype=np.float32) / 255.0
+
+
+def normalize(
+    img: NDArray[np.float32], mean: float | NDArray[np.float32], std: float | NDArray[np.float32]
+) -> NDArray[np.float32]:
+    return (img - mean) / std
+
+
+def get_pil_resampling(resample: str) -> Image.Resampling:
+    return _PIL_RESAMPLING_METHODS[resample.lower()]
+
+
+def pil_to_cv2(image: Image.Image) -> NDArray[np.uint8]:
+    return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)  # type: ignore
+
+
+def decode_pil(image_bytes: bytes | IO[bytes] | Image.Image) -> Image.Image:
+    if isinstance(image_bytes, Image.Image):
+        return image_bytes
+    image: Image.Image = Image.open(BytesIO(image_bytes) if isinstance(image_bytes, bytes) else image_bytes)
+    image.load()
+    if not image.mode == "RGB":
+        image = image.convert("RGB")
+    return image
+
+
+def decode_cv2(image_bytes: NDArray[np.uint8] | bytes | Image.Image) -> NDArray[np.uint8]:
+    match image_bytes:
+        case bytes() | memoryview() | bytearray():
+            return pil_to_cv2(decode_pil(image_bytes))  # pillow is much faster than cv2
+        case Image.Image():
+            return pil_to_cv2(image_bytes)
+        case _:
+            return image_bytes
+
+
+def clean_text(text: str, canonicalize: bool = False) -> str:
+    text = " ".join(text.split())
+    if canonicalize:
+        text = text.translate(_PUNCTUATION_TRANS).lower()
+    return text
+
+
+# this allows the client to use the array as a string without deserializing only to serialize back to a string
+# TODO: use this in a less invasive way
+def serialize_np_array(arr: NDArray[np.float32]) -> str:
+    return orjson.dumps(arr, option=orjson.OPT_SERIALIZE_NUMPY).decode()
--- a/machine-learning/immich_ml/schemas.py
+++ b/machine-learning/immich_ml/schemas.py
@ -0,0 +1,122 @@
+from enum import Enum
+from typing import Any, Literal, Protocol, TypeGuard, TypeVar
+
+import numpy as np
+import numpy.typing as npt
+from typing_extensions import TypedDict
+
+
+class StrEnum(str, Enum):
+    value: str
+
+    def __str__(self) -> str:
+        return self.value
+
+
+class BoundingBox(TypedDict):
+    x1: int
+    y1: int
+    x2: int
+    y2: int
+
+
+class ModelTask(StrEnum):
+    FACIAL_RECOGNITION = "facial-recognition"
+    SEARCH = "clip"
+    OCR = "ocr"
+
+
+class ModelType(StrEnum):
+    DETECTION = "detection"
+    RECOGNITION = "recognition"
+    TEXTUAL = "textual"
+    VISUAL = "visual"
+
+
+class ModelFormat(StrEnum):
+    ARMNN = "armnn"
+    ONNX = "onnx"
+    RKNN = "rknn"
+
+
+class ModelSource(StrEnum):
+    INSIGHTFACE = "insightface"
+    MCLIP = "mclip"
+    OPENCLIP = "openclip"
+    PADDLE = "paddle"
+
+
+class ModelPrecision(StrEnum):
+    FP16 = "FP16"
+    FP32 = "FP32"
+
+
+ModelIdentity = tuple[ModelType, ModelTask]
+
+
+class SessionNode(Protocol):
+    @property
+    def name(self) -> str | None: ...
+
+    @property
+    def shape(self) -> tuple[int, ...]: ...
+
+
+class ModelSession(Protocol):
+    def run(
+        self,
+        output_names: list[str] | None,
+        input_feed: dict[str, npt.NDArray[np.float32]] | dict[str, npt.NDArray[np.int32]],
+        run_options: Any = None,
+    ) -> list[npt.NDArray[np.float32]]: ...
+
+    def get_inputs(self) -> list[SessionNode]: ...
+
+    def get_outputs(self) -> list[SessionNode]: ...
+
+
+class HasProfiling(Protocol):
+    profiling: dict[str, float]
+
+
+class FaceDetectionOutput(TypedDict):
+    boxes: npt.NDArray[np.float32]
+    scores: npt.NDArray[np.float32]
+    landmarks: npt.NDArray[np.float32]
+
+
+class DetectedFace(TypedDict):
+    boundingBox: BoundingBox
+    embedding: str
+    score: float
+
+
+FacialRecognitionOutput = list[DetectedFace]
+
+
+class PipelineEntry(TypedDict):
+    modelName: str
+    options: dict[str, Any]
+
+
+PipelineRequest = dict[ModelTask, dict[ModelType, PipelineEntry]]
+
+
+class InferenceEntry(TypedDict):
+    name: str
+    task: ModelTask
+    type: ModelType
+    options: dict[str, Any]
+
+
+InferenceEntries = tuple[list[InferenceEntry], list[InferenceEntry]]
+
+
+InferenceResponse = dict[ModelTask | Literal["imageHeight"] | Literal["imageWidth"], Any]
+
+
+def has_profiling(obj: Any) -> TypeGuard[HasProfiling]:
+    return hasattr(obj, "profiling") and isinstance(obj.profiling, dict)
+
+
+T = TypeVar("T")
--- a/machine-learning/immich_ml/sessions/init.py
+++ b/machine-learning/immich_ml/sessions/init.py
--- a/machine-learning/immich_ml/sessions/ann/init.py
+++ b/machine-learning/immich_ml/sessions/ann/init.py
@ -0,0 +1,58 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any, NamedTuple
+
+import numpy as np
+from numpy.typing import NDArray
+
+from immich_ml.config import log, settings
+from immich_ml.schemas import SessionNode
+
+from .loader import Ann
+
+
+class AnnSession:
+    """
+    Wrapper for ANN to be drop-in replacement for ONNX session.
+    """
+
+    def __init__(self, model_path: Path, cache_dir: Path = settings.cache_folder) -> None:
+        self.model_path = model_path
+        self.cache_dir = cache_dir
+        self.ann = Ann(tuning_level=settings.ann_tuning_level, tuning_file=(cache_dir / "gpu-tuning.ann").as_posix())
+
+        log.info("Loading ANN model %s ...", model_path)
+        self.model = self.ann.load(
+            model_path.as_posix(),
+            cached_network_path=model_path.with_suffix(".anncache").as_posix(),
+            fp16=settings.ann_fp16_turbo,
+        )
+        log.info("Loaded ANN model with ID %d", self.model)
+
+    def __del__(self) -> None:
+        self.ann.unload(self.model)
+        log.info("Unloaded ANN model %d", self.model)
+        self.ann.destroy()
+
+    def get_inputs(self) -> list[SessionNode]:
+        shapes = self.ann.input_shapes[self.model]
+        return [AnnNode(None, s) for s in shapes]
+
+    def get_outputs(self) -> list[SessionNode]:
+        shapes = self.ann.output_shapes[self.model]
+        return [AnnNode(None, s) for s in shapes]
+
+    def run(
+        self,
+        output_names: list[str] | None,
+        input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
+        run_options: Any = None,
+    ) -> list[NDArray[np.float32]]:
+        inputs: list[NDArray[np.float32]] = [np.ascontiguousarray(v) for v in input_feed.values()]
+        return self.ann.execute(self.model, inputs)
+
+
+class AnnNode(NamedTuple):
+    name: str | None
+    shape: tuple[int, ...]
--- a/machine-learning/immich_ml/sessions/ann/loader.py
+++ b/machine-learning/immich_ml/sessions/ann/loader.py
@ -0,0 +1,169 @@
+from __future__ import annotations
+
+from ctypes import CDLL, Array, c_bool, c_char_p, c_int, c_ulong, c_void_p
+from os.path import exists
+from typing import Any, Protocol, TypeVar
+
+import numpy as np
+from numpy.typing import NDArray
+
+from immich_ml.config import log
+
+try:
+    CDLL("libmali.so")  # fail if libmali.so is not mounted into container
+    libann = CDLL("libann.so")
+    libann.init.argtypes = c_int, c_int, c_char_p
+    libann.init.restype = c_void_p
+    libann.load.argtypes = c_void_p, c_char_p, c_bool, c_bool, c_bool, c_char_p
+    libann.load.restype = c_int
+    libann.execute.argtypes = c_void_p, c_int, Array[c_void_p], Array[c_void_p]
+    libann.unload.argtypes = c_void_p, c_int
+    libann.destroy.argtypes = (c_void_p,)
+    libann.shape.argtypes = c_void_p, c_int, c_bool, c_int
+    libann.shape.restype = c_ulong
+    libann.tensors.argtypes = c_void_p, c_int, c_bool
+    libann.tensors.restype = c_int
+    is_available = True
+except OSError as e:
+    log.debug("Could not load ANN shared libraries, using ONNX: %s", e)
+    is_available = False
+
+T = TypeVar("T", covariant=True)
+
+
+class Newable(Protocol[T]):
+    def new(self) -> None: ...
+
+
+class _Singleton(type, Newable[T]):
+    _instances: dict[_Singleton[T], Newable[T]] = {}
+
+    def __call__(cls, *args: Any, **kwargs: Any) -> Newable[T]:
+        if cls not in cls._instances:
+            obj: Newable[T] = super(_Singleton, cls).__call__(*args, **kwargs)
+            cls._instances[cls] = obj
+        else:
+            obj = cls._instances[cls]
+            obj.new()
+        return obj
+
+
+class Ann(metaclass=_Singleton):
+    def __init__(self, log_level: int = 3, tuning_level: int = 1, tuning_file: str | None = None) -> None:
+        if not is_available:
+            raise RuntimeError("libann is not available!")
+        if tuning_level == 0 and tuning_file is None:
+            raise ValueError("tuning_level == 0 reads existing tuning information and requires a tuning_file")
+        if tuning_level < 0 or tuning_level > 3:
+            raise ValueError("tuning_level must be 0 (load from tuning_file), 1, 2 or 3.")
+        if log_level < 0 or log_level > 5:
+            raise ValueError("log_level must be 0 (trace), 1 (debug), 2 (info), 3 (warning), 4 (error) or 5 (fatal)")
+        self.log_level = log_level
+        self.tuning_level = tuning_level
+        self.tuning_file = tuning_file
+        self.output_shapes: dict[int, tuple[tuple[int], ...]] = {}
+        self.input_shapes: dict[int, tuple[tuple[int], ...]] = {}
+        self.ann: int | None = None
+        self.new()
+
+        if self.tuning_file is not None:
+            # make sure tuning file exists (without clearing contents)
+            # once filled, the tuning file reduces the cost/time of the first
+            # inference after model load by 10s of seconds
+            open(self.tuning_file, "a").close()
+
+    def new(self) -> None:
+        if self.ann is None:
+            self.ann = libann.init(
+                self.log_level,
+                self.tuning_level,
+                self.tuning_file.encode() if self.tuning_file is not None else None,
+            )
+            self.ref_count = 0
+
+        self.ref_count += 1
+
+    def destroy(self) -> None:
+        self.ref_count -= 1
+        if self.ref_count <= 0 and self.ann is not None:
+            libann.destroy(self.ann)
+            self.ann = None
+
+    def __del__(self) -> None:
+        if self.ann is not None:
+            libann.destroy(self.ann)
+            self.ann = None
+
+    def load(
+        self,
+        model_path: str,
+        fast_math: bool = True,
+        fp16: bool = False,
+        cached_network_path: str | None = None,
+    ) -> int:
+        if not model_path.endswith((".armnn", ".tflite", ".onnx")):
+            raise ValueError("model_path must be a file with extension .armnn, .tflite or .onnx")
+        if not exists(model_path):
+            raise ValueError("model_path must point to an existing file!")
+
+        save_cached_network = False
+        if cached_network_path is not None and not exists(cached_network_path):
+            save_cached_network = True
+            # create empty model cache file
+            open(cached_network_path, "a").close()
+
+        net_id: int = libann.load(
+            self.ann,
+            model_path.encode(),
+            fast_math,
+            fp16,
+            save_cached_network,
+            cached_network_path.encode() if cached_network_path is not None else None,
+        )
+        if net_id < 0:
+            raise ValueError("Cannot load model!")
+
+        self.input_shapes[net_id] = tuple(
+            self.shape(net_id, input=True, index=i) for i in range(self.tensors(net_id, input=True))
+        )
+        self.output_shapes[net_id] = tuple(
+            self.shape(net_id, input=False, index=i) for i in range(self.tensors(net_id, input=False))
+        )
+        return net_id
+
+    def unload(self, network_id: int) -> None:
+        libann.unload(self.ann, network_id)
+        del self.output_shapes[network_id]
+
+    def execute(self, network_id: int, input_tensors: list[NDArray[np.float32]]) -> list[NDArray[np.float32]]:
+        if not isinstance(input_tensors, list):
+            raise ValueError("input_tensors needs to be a list!")
+        net_input_shapes = self.input_shapes[network_id]
+        if len(input_tensors) != len(net_input_shapes):
+            raise ValueError(f"input_tensors lengths {len(input_tensors)} != network inputs {len(net_input_shapes)}")
+        for net_input_shape, input_tensor in zip(net_input_shapes, input_tensors):
+            if net_input_shape != input_tensor.shape:
+                raise ValueError(f"input_tensor shape {input_tensor.shape} != network input shape {net_input_shape}")
+            if not input_tensor.flags.c_contiguous:
+                raise ValueError("input_tensors must be c_contiguous numpy ndarrays")
+        output_tensors: list[NDArray[np.float32]] = [
+            np.ndarray(s, dtype=np.float32) for s in self.output_shapes[network_id]
+        ]
+        input_type = c_void_p * len(input_tensors)
+        inputs = input_type(*[t.ctypes.data_as(c_void_p) for t in input_tensors])
+        output_type = c_void_p * len(output_tensors)
+        outputs = output_type(*[t.ctypes.data_as(c_void_p) for t in output_tensors])
+        libann.execute(self.ann, network_id, inputs, outputs)
+        return output_tensors
+
+    def shape(self, network_id: int, input: bool = False, index: int = 0) -> tuple[int]:
+        s = libann.shape(self.ann, network_id, input, index)
+        a = []
+        while s != 0:
+            a.append(s & 0xFFFF)
+            s >>= 16
+        return tuple(a)
+
+    def tensors(self, network_id: int, input: bool = False) -> int:
+        tensors: int = libann.tensors(self.ann, network_id, input)
+        return tensors
--- a/machine-learning/immich_ml/sessions/ort.py
+++ b/machine-learning/immich_ml/sessions/ort.py
@ -0,0 +1,147 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+import onnxruntime as ort
+from numpy.typing import NDArray
+
+from immich_ml.models.constants import SUPPORTED_PROVIDERS
+from immich_ml.schemas import SessionNode
+
+from ..config import log, settings
+
+
+class OrtSession:
+    session: ort.InferenceSession
+
+    def __init__(
+        self,
+        model_path: Path | str,
+        providers: list[str] | None = None,
+        provider_options: list[dict[str, Any]] | None = None,
+        sess_options: ort.SessionOptions | None = None,
+    ):
+        self.model_path = Path(model_path)
+        self.providers = providers if providers is not None else self._providers_default
+        self.provider_options = provider_options if provider_options is not None else self._provider_options_default
+        self.sess_options = sess_options if sess_options is not None else self._sess_options_default
+        self.session = ort.InferenceSession(
+            self.model_path.as_posix(),
+            providers=self.providers,
+            provider_options=self.provider_options,
+            sess_options=self.sess_options,
+        )
+
+    def get_inputs(self) -> list[SessionNode]:
+        inputs: list[SessionNode] = self.session.get_inputs()
+        return inputs
+
+    def get_outputs(self) -> list[SessionNode]:
+        outputs: list[SessionNode] = self.session.get_outputs()
+        return outputs
+
+    def run(
+        self,
+        output_names: list[str] | None,
+        input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
+        run_options: Any = None,
+    ) -> list[NDArray[np.float32]]:
+        outputs: list[NDArray[np.float32]] = self.session.run(output_names, input_feed, run_options)
+        return outputs
+
+    @property
+    def providers(self) -> list[str]:
+        return self._providers
+
+    @providers.setter
+    def providers(self, providers: list[str]) -> None:
+        log.info(f"Setting execution providers to {providers}, in descending order of preference")
+        self._providers = providers
+
+    @property
+    def _providers_default(self) -> list[str]:
+        available_providers = set(ort.get_available_providers())
+        log.debug(f"Available ORT providers: {available_providers}")
+        if (openvino := "OpenVINOExecutionProvider") in available_providers:
+            device_ids: list[str] = ort.capi._pybind_state.get_available_openvino_device_ids()
+            log.debug(f"Available OpenVINO devices: {device_ids}")
+
+            gpu_devices = [device_id for device_id in device_ids if device_id.startswith("GPU")]
+            if not gpu_devices:
+                log.warning("No GPU device found in OpenVINO. Falling back to CPU.")
+                available_providers.remove(openvino)
+        return [provider for provider in SUPPORTED_PROVIDERS if provider in available_providers]
+
+    @property
+    def provider_options(self) -> list[dict[str, Any]]:
+        return self._provider_options
+
+    @provider_options.setter
+    def provider_options(self, provider_options: list[dict[str, Any]]) -> None:
+        log.debug(f"Setting execution provider options to {provider_options}")
+        self._provider_options = provider_options
+
+    @property
+    def _provider_options_default(self) -> list[dict[str, Any]]:
+        provider_options = []
+        for provider in self.providers:
+            match provider:
+                case "CPUExecutionProvider":
+                    options = {"arena_extend_strategy": "kSameAsRequested"}
+                case "CUDAExecutionProvider" | "ROCMExecutionProvider":
+                    options = {"arena_extend_strategy": "kSameAsRequested", "device_id": settings.device_id}
+                case "OpenVINOExecutionProvider":
+                    openvino_dir = self.model_path.parent / "openvino"
+                    device = f"GPU.{settings.device_id}"
+                    options = {
+                        "device_type": device,
+                        "precision": settings.openvino_precision.value,
+                        "cache_dir": openvino_dir.as_posix(),
+                    }
+                case "CoreMLExecutionProvider":
+                    options = {
+                        "ModelFormat": "MLProgram",
+                        "MLComputeUnits": "ALL",
+                        "SpecializationStrategy": "FastPrediction",
+                        "AllowLowPrecisionAccumulationOnGPU": "1",
+                        "ModelCacheDirectory": (self.model_path.parent / "coreml").as_posix(),
+                    }
+                case _:
+                    options = {}
+            provider_options.append(options)
+        return provider_options
+
+    @property
+    def sess_options(self) -> ort.SessionOptions:
+        return self._sess_options
+
+    @sess_options.setter
+    def sess_options(self, sess_options: ort.SessionOptions) -> None:
+        log.debug(f"Setting execution_mode to {sess_options.execution_mode.name}")
+        log.debug(f"Setting inter_op_num_threads to {sess_options.inter_op_num_threads}")
+        log.debug(f"Setting intra_op_num_threads to {sess_options.intra_op_num_threads}")
+        self._sess_options = sess_options
+
+    @property
+    def _sess_options_default(self) -> ort.SessionOptions:
+        sess_options = ort.SessionOptions()
+        sess_options.enable_cpu_mem_arena = settings.model_arena
+
+        # avoid thread contention between models
+        if settings.model_inter_op_threads > 0:
+            sess_options.inter_op_num_threads = settings.model_inter_op_threads
+        # these defaults work well for CPU, but bottleneck GPU
+        elif settings.model_inter_op_threads == 0 and self.providers == ["CPUExecutionProvider"]:
+            sess_options.inter_op_num_threads = 1
+
+        if settings.model_intra_op_threads > 0:
+            sess_options.intra_op_num_threads = settings.model_intra_op_threads
+        elif settings.model_intra_op_threads == 0 and self.providers == ["CPUExecutionProvider"]:
+            sess_options.intra_op_num_threads = 2
+
+        if sess_options.inter_op_num_threads > 1:
+            sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL
+
+        return sess_options
--- a/machine-learning/immich_ml/sessions/rknn/init.py
+++ b/machine-learning/immich_ml/sessions/rknn/init.py
@ -0,0 +1,76 @@
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any, NamedTuple
+
+import numpy as np
+from numpy.typing import NDArray
+
+from immich_ml.config import log, settings
+from immich_ml.schemas import SessionNode
+
+from .rknnpool import RknnPoolExecutor, is_available, soc_name
+
+is_available = is_available and settings.rknn
+model_prefix = Path("rknpu") / soc_name if is_available and soc_name is not None else None
+
+
+def run_inference(rknn_lite: Any, input: list[NDArray[np.float32]]) -> list[NDArray[np.float32]]:
+    outputs: list[NDArray[np.float32]] = rknn_lite.inference(inputs=input, data_format="nchw")
+    return outputs
+
+
+input_output_mapping: dict[str, dict[str, Any]] = {
+    "detection": {
+        "input": {"norm_tensor:0": (1, 3, 640, 640)},
+        "output": {
+            "norm_tensor:1": (12800, 1),
+            "norm_tensor:2": (3200, 1),
+            "norm_tensor:3": (800, 1),
+            "norm_tensor:4": (12800, 4),
+            "norm_tensor:5": (3200, 4),
+            "norm_tensor:6": (800, 4),
+            "norm_tensor:7": (12800, 10),
+            "norm_tensor:8": (3200, 10),
+            "norm_tensor:9": (800, 10),
+        },
+    },
+    "recognition": {"input": {"norm_tensor:0": (1, 3, 112, 112)}, "output": {"norm_tensor:1": (1, 512)}},
+}
+
+
+class RknnSession:
+    def __init__(self, model_path: Path) -> None:
+        self.model_type = "detection" if "detection" in model_path.parts else "recognition"
+        self.tpe = settings.rknn_threads
+
+        log.info(f"Loading RKNN model from {model_path} with {self.tpe} threads.")
+        self.rknnpool = RknnPoolExecutor(model_path=model_path.as_posix(), tpes=self.tpe, func=run_inference)
+        log.info(f"Loaded RKNN model from {model_path} with {self.tpe} threads.")
+
+    def get_inputs(self) -> list[SessionNode]:
+        return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["input"].items()]
+
+    def get_outputs(self) -> list[SessionNode]:
+        return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["output"].items()]
+
+    def run(
+        self,
+        output_names: list[str] | None,
+        input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
+        run_options: Any = None,
+    ) -> list[NDArray[np.float32]]:
+        input_data: list[NDArray[np.float32]] = [np.ascontiguousarray(v) for v in input_feed.values()]
+        self.rknnpool.put(input_data)
+        res = self.rknnpool.get()
+        if res is None:
+            raise RuntimeError("RKNN inference failed!")
+        return res
+
+
+class RknnNode(NamedTuple):
+    name: str | None
+    shape: tuple[int, ...]
+
+
+__all__ = ["RknnSession", "RknnNode", "is_available", "soc_name", "model_prefix"]
--- a/machine-learning/immich_ml/sessions/rknn/rknnpool.py
+++ b/machine-learning/immich_ml/sessions/rknn/rknnpool.py
@ -0,0 +1,91 @@
+# This code is from leafqycc/rknn-multi-threaded
+# Following Apache License 2.0
+
+import logging
+from concurrent.futures import Future, ThreadPoolExecutor
+from pathlib import Path
+from queue import Queue
+from typing import Callable
+
+import numpy as np
+from numpy.typing import NDArray
+
+from immich_ml.config import log
+from immich_ml.models.constants import RKNN_COREMASK_SUPPORTED_SOCS, RKNN_SUPPORTED_SOCS
+
+
+def get_soc(device_tree_path: Path | str) -> str | None:
+    try:
+        with Path(device_tree_path).open() as f:
+            device_compatible_str = f.read()
+            for soc in RKNN_SUPPORTED_SOCS:
+                if soc in device_compatible_str:
+                    return soc
+            log.warning("Device is not supported for RKNN")
+    except OSError as e:
+        log.warning(f"Could not read {device_tree_path}. Reason: %s", e)
+    return None
+
+
+soc_name = None
+is_available = False
+try:
+    from rknnlite.api import RKNNLite
+
+    soc_name = get_soc("/proc/device-tree/compatible")
+    is_available = soc_name is not None
+except ImportError:
+    log.debug("RKNN is not available")
+
+
+def init_rknn(model_path: str) -> "RKNNLite":
+    if not is_available:
+        raise RuntimeError("rknn is not available!")
+    rknn_lite = RKNNLite()
+    rknn_lite.rknn_log.logger.setLevel(logging.ERROR)
+    ret = rknn_lite.load_rknn(model_path)
+    if ret != 0:
+        raise RuntimeError("Failed to load RKNN model")
+
+    if soc_name in RKNN_COREMASK_SUPPORTED_SOCS:
+        ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO)
+    else:
+        ret = rknn_lite.init_runtime()  # Please do not set this parameter on other platforms.
+
+    if ret != 0:
+        raise RuntimeError("Failed to initialize RKNN runtime environment")
+
+    return rknn_lite
+
+
+class RknnPoolExecutor:
+    def __init__(
+        self,
+        model_path: str,
+        tpes: int,
+        func: Callable[["RKNNLite", list[NDArray[np.float32]]], list[NDArray[np.float32]]],
+    ) -> None:
+        self.tpes = tpes
+        self.queue: Queue[Future[list[NDArray[np.float32]]]] = Queue()
+        self.rknn_pool = [init_rknn(model_path) for _ in range(tpes)]
+        self.pool = ThreadPoolExecutor(max_workers=tpes)
+        self.func = func
+        self.num = 0
+
+    def put(self, inputs: list[NDArray[np.float32]]) -> None:
+        self.queue.put(self.pool.submit(self.func, self.rknn_pool[self.num % self.tpes], inputs))
+        self.num += 1
+
+    def get(self) -> list[NDArray[np.float32]] | None:
+        if self.queue.empty():
+            return None
+        fut = self.queue.get()
+        return fut.result()
+
+    def release(self) -> None:
+        self.pool.shutdown()
+        for rknn_lite in self.rknn_pool:
+            rknn_lite.release()
+
+    def __del__(self) -> None:
+        self.release()
--- a/machine-learning/locustfile.py
+++ b/machine-learning/locustfile.py
@ -0,0 +1,81 @@
+import json
+from argparse import ArgumentParser
+from io import BytesIO
+from typing import Any
+
+from locust import HttpUser, events, task
+from locust.env import Environment
+from PIL import Image
+
+byte_image = BytesIO()
+
+
+@events.init_command_line_parser.add_listener
+def _(parser: ArgumentParser) -> None:
+    parser.add_argument("--clip-model", type=str, default="ViT-B-32::openai")
+    parser.add_argument("--face-model", type=str, default="buffalo_l")
+    parser.add_argument(
+        "--face-min-score",
+        type=int,
+        default=0.034,
+        help=(
+            "Returns all faces at or above this score. The default returns 1 face per request; "
+            "setting this to 0 blows up the number of faces to the thousands."
+        ),
+    )
+    parser.add_argument("--image-size", type=int, default=1000)
+
+
+@events.test_start.add_listener
+def on_test_start(environment: Environment, **kwargs: Any) -> None:
+    global byte_image
+    assert environment.parsed_options is not None
+    image = Image.new("RGB", (environment.parsed_options.image_size, environment.parsed_options.image_size))
+    image.save(byte_image, format="jpeg")
+
+
+class InferenceLoadTest(HttpUser):
+    abstract: bool = True
+    host = "http://127.0.0.1:3003"
+    data: bytes
+
+    # re-use the image across all instances in a process
+    def on_start(self) -> None:
+        self.data = byte_image.getvalue()
+
+
+class CLIPTextFormDataLoadTest(InferenceLoadTest):
+    @task
+    def encode_text(self) -> None:
+        request = {"clip": {"textual": {"modelName": self.environment.parsed_options.clip_model}}}
+        data = [("entries", json.dumps(request)), ("text", "test search query")]
+        self.client.post("/predict", data=data)
+
+
+class CLIPVisionFormDataLoadTest(InferenceLoadTest):
+    @task
+    def encode_image(self) -> None:
+        request = {"clip": {"visual": {"modelName": self.environment.parsed_options.clip_model, "options": {}}}}
+        data = [("entries", json.dumps(request))]
+        files = {"image": self.data}
+        self.client.post("/predict", data=data, files=files)
+
+
+class RecognitionFormDataLoadTest(InferenceLoadTest):
+    @task
+    def recognize(self) -> None:
+        request = {
+            "facial-recognition": {
+                "recognition": {
+                    "modelName": self.environment.parsed_options.face_model,
+                },
+                "detection": {
+                    "modelName": self.environment.parsed_options.face_model,
+                    "options": {"minScore": self.environment.parsed_options.face_min_score},
+                },
+            }
+        }
+        data = [("entries", json.dumps(request))]
+        files = {"image": self.data}
+
+        self.client.post("/predict", data=data, files=files)
--- a/machine-learning/patches/0001-disable-rocm-conv-algo-caching.patch
+++ b/machine-learning/patches/0001-disable-rocm-conv-algo-caching.patch
@ -0,0 +1,179 @@
+commit 16839b58d9b3c3162a67ce5d776b36d4d24e801f
+Author: mertalev <101130780+mertalev@users.noreply.github.com>
+Date:   Wed Mar 5 11:25:38 2025 -0500
+
+    disable algo caching (attributed to @dmnieto in https://github.com/microsoft/onnxruntime/pull/19567)
+
+diff --git a/onnxruntime/core/providers/rocm/nn/conv.cc b/onnxruntime/core/providers/rocm/nn/conv.cc
+index d7f47d07a8..4060a2af52 100644
+--- a/onnxruntime/core/providers/rocm/nn/conv.cc
+++ b/onnxruntime/core/providers/rocm/nn/conv.cc
+@@ -127,7 +127,6 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
+
+     if (w_dims_changed) {
+       s_.last_w_dims = gsl::make_span(w_dims);
+-      s_.cached_benchmark_fwd_results.clear();
+     }
+
+     ORT_RETURN_IF_ERROR(conv_attrs_.ValidateInputShape(X->Shape(), W->Shape(), channels_last, channels_last));
+@@ -277,35 +276,6 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
+       HIP_CALL_THROW(hipMalloc(&s_.b_zero, malloc_size));
+       HIP_CALL_THROW(hipMemsetAsync(s_.b_zero, 0, malloc_size, Stream(context)));
+     }
+-
+-    if (!s_.cached_benchmark_fwd_results.contains(x_dims_miopen)) {
+-      miopenConvAlgoPerf_t perf;
+-      int algo_count = 1;
+-      const ROCMExecutionProvider* rocm_ep = static_cast<const ROCMExecutionProvider*>(this->Info().GetExecutionProvider());
+-      static constexpr int num_algos = MIOPEN_CONVOLUTION_FWD_ALGO_COUNT;
+-      size_t max_ws_size = rocm_ep->GetMiopenConvUseMaxWorkspace() ? GetMaxWorkspaceSize(GetMiopenHandle(context), s_, kAllAlgos, num_algos, rocm_ep->GetDeviceId())
+-                                                                   : AlgoSearchWorkspaceSize;
+-      IAllocatorUniquePtr<void> algo_search_workspace = GetTransientScratchBuffer<void>(max_ws_size);
+-      MIOPEN_RETURN_IF_ERROR(miopenFindConvolutionForwardAlgorithm(
+-          GetMiopenHandle(context),
+-          s_.x_tensor,
+-          s_.x_data,
+-          s_.w_desc,
+-          s_.w_data,
+-          s_.conv_desc,
+-          s_.y_tensor,
+-          s_.y_data,
+-          1,            // requestedAlgoCount
+-          &algo_count,  // returnedAlgoCount
+-          &perf,
+-          algo_search_workspace.get(),
+-          max_ws_size,
+-          false));  // Do not do exhaustive algo search.
+-      s_.cached_benchmark_fwd_results.insert(x_dims_miopen, {perf.fwd_algo, perf.memory});
+-    }
+-    const auto& perf = s_.cached_benchmark_fwd_results.at(x_dims_miopen);
+-    s_.fwd_algo = perf.fwd_algo;
+-    s_.workspace_bytes = perf.memory;
+   } else {
+     // set Y
+     s_.Y = context->Output(0, TensorShape(s_.y_dims));
+@@ -319,6 +289,31 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
+       s_.y_data = reinterpret_cast<HipT*>(s_.Y->MutableData<T>());
+     }
+   }
+
+  miopenConvAlgoPerf_t perf;
+  int algo_count = 1;
+  const ROCMExecutionProvider* rocm_ep = static_cast<const ROCMExecutionProvider*>(this->Info().GetExecutionProvider());
+  static constexpr int num_algos = MIOPEN_CONVOLUTION_FWD_ALGO_COUNT;
+  size_t max_ws_size = rocm_ep->GetMiopenConvUseMaxWorkspace() ? GetMaxWorkspaceSize(GetMiopenHandle(context), s_, kAllAlgos, num_algos, rocm_ep->GetDeviceId())
+                                                                : AlgoSearchWorkspaceSize;
+  IAllocatorUniquePtr<void> algo_search_workspace = GetTransientScratchBuffer<void>(max_ws_size);
+  MIOPEN_RETURN_IF_ERROR(miopenFindConvolutionForwardAlgorithm(
+      GetMiopenHandle(context),
+      s_.x_tensor,
+      s_.x_data,
+      s_.w_desc,
+      s_.w_data,
+      s_.conv_desc,
+      s_.y_tensor,
+      s_.y_data,
+      1,            // requestedAlgoCount
+      &algo_count,  // returnedAlgoCount
+      &perf,
+      algo_search_workspace.get(),
+      max_ws_size,
+      false));  // Do not do exhaustive algo search.
+  s_.fwd_algo = perf.fwd_algo;
+  s_.workspace_bytes = perf.memory;
+   return Status::OK();
+ }
+
+diff --git a/onnxruntime/core/providers/rocm/nn/conv.h b/onnxruntime/core/providers/rocm/nn/conv.h
+index bc9846203e..d54218f258 100644
+--- a/onnxruntime/core/providers/rocm/nn/conv.h
+++ b/onnxruntime/core/providers/rocm/nn/conv.h
+@@ -108,9 +108,6 @@ class lru_unordered_map {
+   list_type lru_list_;
+ };
+
+-// cached miopen descriptors
+-constexpr size_t MAX_CACHED_ALGO_PERF_RESULTS = 10000;
+-
+ template <typename AlgoPerfType>
+ struct MiopenConvState {
+   // if x/w dims changed, update algo and miopenTensors
+@@ -148,9 +145,6 @@ struct MiopenConvState {
+     decltype(AlgoPerfType().memory) memory;
+   };
+
+-  lru_unordered_map<TensorShapeVector, PerfFwdResultParams, vector_hash> cached_benchmark_fwd_results{MAX_CACHED_ALGO_PERF_RESULTS};
+-  lru_unordered_map<TensorShapeVector, PerfBwdResultParams, vector_hash> cached_benchmark_bwd_results{MAX_CACHED_ALGO_PERF_RESULTS};
+-
+   // Some properties needed to support asymmetric padded Conv nodes
+   bool post_slicing_required;
+   TensorShapeVector slice_starts;
+diff --git a/onnxruntime/core/providers/rocm/nn/conv_transpose.cc b/onnxruntime/core/providers/rocm/nn/conv_transpose.cc
+index 7447113fdf..a662e35b2e 100644
+--- a/onnxruntime/core/providers/rocm/nn/conv_transpose.cc
+++ b/onnxruntime/core/providers/rocm/nn/conv_transpose.cc
+@@ -76,7 +76,6 @@ Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dy
+
+       if (w_dims_changed) {
+         s_.last_w_dims = gsl::make_span(w_dims);
+-        s_.cached_benchmark_bwd_results.clear();
+       }
+
+       ConvTransposeAttributes::Prepare p;
+@@ -126,35 +125,29 @@ Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dy
+       }
+
+       y_data = reinterpret_cast<HipT*>(p.Y->MutableData<T>());
+-
+-      if (!s_.cached_benchmark_bwd_results.contains(x_dims)) {
+-        IAllocatorUniquePtr<void> algo_search_workspace = GetScratchBuffer<void>(AlgoSearchWorkspaceSize, context->GetComputeStream());
+-
+-        miopenConvAlgoPerf_t perf;
+-        int algo_count = 1;
+-        MIOPEN_RETURN_IF_ERROR(miopenFindConvolutionBackwardDataAlgorithm(
+-            GetMiopenHandle(context),
+-            s_.x_tensor,
+-            x_data,
+-            s_.w_desc,
+-            w_data,
+-            s_.conv_desc,
+-            s_.y_tensor,
+-            y_data,
+-            1,
+-            &algo_count,
+-            &perf,
+-            algo_search_workspace.get(),
+-            AlgoSearchWorkspaceSize,
+-            false));
+-        s_.cached_benchmark_bwd_results.insert(x_dims, {perf.bwd_data_algo, perf.memory});
+-      }
+-
+-      const auto& perf = s_.cached_benchmark_bwd_results.at(x_dims);
+-      s_.bwd_data_algo = perf.bwd_data_algo;
+-      s_.workspace_bytes = perf.memory;
+     }
+
+    IAllocatorUniquePtr<void> algo_search_workspace = GetScratchBuffer<void>(AlgoSearchWorkspaceSize, context->GetComputeStream());
+    miopenConvAlgoPerf_t perf;
+    int algo_count = 1;
+    MIOPEN_RETURN_IF_ERROR(miopenFindConvolutionBackwardDataAlgorithm(
+        GetMiopenHandle(context),
+        s_.x_tensor,
+        x_data,
+        s_.w_desc,
+        w_data,
+        s_.conv_desc,
+        s_.y_tensor,
+        y_data,
+        1,
+        &algo_count,
+        &perf,
+        algo_search_workspace.get(),
+        AlgoSearchWorkspaceSize,
+        false));
+    s_.bwd_data_algo = perf.bwd_data_algo;
+    s_.workspace_bytes = perf.memory;
+
+     // The following block will be executed in case there has been no change in the shapes of the
+     // input and the filter compared to the previous run
+     if (!y_data) {
--- a/machine-learning/patches/0002-install-system-deps.patch
+++ b/machine-learning/patches/0002-install-system-deps.patch
@ -0,0 +1,33 @@
+diff --git a/dockerfiles/scripts/install_common_deps.sh b/dockerfiles/scripts/install_common_deps.sh
+index bbb672a99e..0dc652fbda 100644
+--- a/dockerfiles/scripts/install_common_deps.sh
+++ b/dockerfiles/scripts/install_common_deps.sh
+@@ -8,16 +8,23 @@ apt-get update && apt-get install -y --no-install-recommends \
+         curl \
+         libcurl4-openssl-dev \
+         libssl-dev \
+-        python3-dev
+        python3-dev \
+        ccache
+ 
+ # Dependencies: conda
+-wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh -O ~/miniconda.sh --no-check-certificate && /bin/bash ~/miniconda.sh -b -p /opt/miniconda
+wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py312_25.9.1-1-Linux-x86_64.sh -O ~/miniconda.sh && /bin/bash ~/miniconda.sh -b -p /opt/miniconda
+ rm ~/miniconda.sh
+ /opt/miniconda/bin/conda clean -ya
+ 
+-pip install numpy
+-pip install packaging
+-pip install "wheel>=0.35.1"
+# Dependencies: venv and packages
+/opt/miniconda/bin/python3 -m venv /opt/rocm-venv
+/opt/rocm-venv/bin/pip install --no-cache-dir --upgrade pip
+/opt/rocm-venv/bin/pip install --no-cache-dir \
+  "numpy==2.3.4" \
+  "packaging==25.0" \
+  "wheel==0.45.1" \
+  "setuptools==80.9.0"
+
+ rm -rf /opt/miniconda/pkgs
+ 
+ # Dependencies: cmake
--- a/machine-learning/pyproject.toml
+++ b/machine-learning/pyproject.toml
@ -0,0 +1,101 @@
+[project]
+name = "immich-ml"
+version = "2.5.2"
+description = ""
+authors = [{ name = "Hau Tran", email = "alex.tran1502@gmail.com" }]
+requires-python = ">=3.11,<4.0"
+readme = "README.md"
+dependencies = [
+    "aiocache>=0.12.1,<1.0",
+    "fastapi>=0.95.2,<1.0",
+    "ftfy>=6.1.1",
+    "gunicorn>=21.1.0",
+    "huggingface-hub>=0.20.1,<1.0",
+    "insightface>=0.7.3,<1.0",
+    "numpy>=2.3.4",
+    "opencv-python-headless>=4.7.0.72,<5.0",
+    "orjson>=3.9.5",
+    "pillow>=9.5.0,<11.0",
+    "pydantic>=2.0.0,<3",
+    "pydantic-settings>=2.5.2,<3",
+    "python-multipart>=0.0.6,<1.0",
+    "rich>=13.4.2",
+    "tokenizers>=0.15.0,<1.0",
+    "uvicorn[standard]>=0.22.0,<1.0",
+    "rapidocr>=3.1.0",
+]
+
+[dependency-groups]
+test = [
+    "httpx>=0.24.1",
+    "pytest>=7.3.1",
+    "pytest-asyncio>=0.21.0",
+    "pytest-cov>=4.1.0",
+    "pytest-mock>=3.11.1",
+]
+types = [
+    "types-pyyaml>=6.0.12.20241230",
+    "types-requests>=2.32.0.20250306",
+    "types-setuptools>=75.8.2.20250305",
+    "types-simplejson>=3.20.0.20250218",
+    "types-ujson>=5.10.0.20240515",
+]
+lint = [
+    "black>=23.3.0",
+    "mypy>=1.3.0",
+    "ruff>=0.0.272",
+    { include-group = "types" },
+]
+dev = ["locust>=2.15.1", { include-group = "test" }, { include-group = "lint" }]
+
+[project.optional-dependencies]
+cpu = ["onnxruntime>=1.23.2,<2"]
+cuda = ["onnxruntime-gpu>=1.23.2,<2"]
+openvino = ["onnxruntime-openvino>=1.23.0,<2"]
+armnn = ["onnxruntime>=1.23.2,<2"]
+rknn = ["onnxruntime>=1.23.2,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
+rocm = []
+
+[tool.uv]
+compile-bytecode = true
+
+[tool.hatch.build.targets.sdist]
+include = ["immich_ml"]
+
+[tool.hatch.build.targets.wheel]
+include = ["immich_ml"]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.mypy]
+python_version = "3.11"
+plugins = "pydantic.mypy"
+follow_imports = "silent"
+warn_redundant_casts = true
+disallow_any_generics = true
+check_untyped_defs = true
+disallow_untyped_defs = true
+ignore_missing_imports = true
+
+[tool.pydantic-mypy]
+init_forbid_extra = true
+init_typed = true
+warn_required_dynamic_aliases = true
+warn_untyped_fields = true
+
+[tool.ruff]
+line-length = 120
+target-version = "py311"
+
+[tool.ruff.lint]
+select = ["E", "F", "I"]
+per-file-ignores = { "test_main.py" = ["F403"] }
+
+[tool.black]
+line-length = 120
+target-version = ['py311']
+
+[tool.pytest.ini_options]
+markers = ["providers", "ov_device_ids"]
--- a/machine-learning/responses.json
+++ b/machine-learning/responses.json
@ -0,0 +1,329 @@
+{
+  "clip": {
+    "image": [
+      -0.013126737, -0.022995953, -0.0493738, -0.0063057775, 0.013601424,
+      -0.003761688, -0.03379882, 0.11106285, 0.024760082, 0.023903701,
+      0.04418207, -0.013594999, 0.030850016, 0.0012876489, -0.012471005,
+      0.009750715, 0.0095717255, 0.013320666, 0.0027921356, 0.03240264,
+      0.033538498, 0.013624318, -0.0069946186, -0.0036184592, -0.009846507,
+      -0.017311024, -0.036686428, -0.0041808123, 0.030871637, -0.028624479,
+      -0.016515259, 0.014418001, -0.024542322, -0.0025438748, -0.049111884,
+      -0.023928944, 0.012270045, -0.016418075, 0.004895335, -0.15801854,
+      -0.026325515, 0.03166467, -0.017224329, 0.0411128, -0.022944424,
+      0.015693054, -0.020919899, -0.010764121, -0.008499815, -0.020263294,
+      -0.009743323, -0.035395622, 0.03474742, 0.003049183, 0.009424564,
+      0.010707678, 0.01664117, -0.0059027374, -0.013055344, 0.0018035833,
+      -0.003976456, -0.04325922, 0.014407317, 0.035728276, 0.002226939,
+      -0.006235411, -0.0073032235, -0.035129357, 0.001095443, -0.028552389,
+      -0.044300288, -0.012959393, 0.02259977, 0.017141517, -0.029432472,
+      -0.017583484, 0.010974336, 0.018141218, 0.0015389329, -0.008220305,
+      -0.0060965014, 0.004929384, 0.019477025, -0.033071984, 0.025183259,
+      0.013607688, 0.01836233, 0.04586782, 0.0103442725, -0.036077496,
+      -0.029715508, 0.007203621, -0.7949153, 0.046866275, 0.026910711,
+      -0.0047834567, 0.033243995, 0.009379981, -0.03749048, -0.055274535,
+      -0.01955359, 0.012887587, 0.00922838, -0.0032776103, -0.011456734,
+      0.0045412215, -0.11506394, 0.0348558, 0.029478835, -0.011811103,
+      -0.00483158, -0.010586126, -0.018853206, -0.01591496, -0.019360982,
+      -0.03211199, -0.013473663, -0.019630248, -0.017012835, 0.059128772,
+      -0.03396129, 0.0045991736, -0.015158291, 0.008241974, 0.004403056,
+      -0.007536049, -0.023821214, -0.0059521003, 0.015564905, 0.020600233,
+      0.008175, 0.02100119, -0.0034459834, 0.1058016, 0.008383205, 0.03100292,
+      -0.023814196, -0.016157096, -0.008210107, -0.004146204, 0.016350364,
+      -0.056028433, 0.013261071, 0.034839876, -0.03236049, 0.026573967,
+      -0.018140865, 0.018515658, 0.013251766, 0.007693613, 0.0067239976,
+      -0.0013857568, 0.038114607, 0.0068016117, 0.036603037, 0.0040935865,
+      0.010394745, -0.00082285365, -0.009811308, 0.020343611, -0.012164189,
+      -0.012208623, 0.0005465415, -0.015394064, 0.02499845, 0.021941017,
+      -0.016571017, -0.011810332, 0.017864, -0.010639794, -0.008609091,
+      -0.0007239709, 0.015229945, -0.0035874692, 0.018922612, -0.011209458,
+      -0.013052865, -0.009626533, -0.004419959, 0.007915186, 0.01094836,
+      0.005509159, -0.0034862005, 0.01012292, -0.0059307595, -0.029599648,
+      0.032845, -0.007011692, -0.014218981, 0.00790071, 0.017027052,
+      -0.022314077, -0.03041719, 0.015665755, 0.036747217, -0.018942915,
+      0.008623111, 0.02179961, -0.022312569, 0.007024427, 0.016751591,
+      -0.0034192575, 0.024101255, -0.0046198783, 0.022274924, -0.015562676,
+      -0.0092551885, -0.0063787713, 0.045996074, 0.026235346, 0.009622556,
+      0.05728027, 0.03168525, -0.017600676, 0.029278612, 0.01467962,
+      0.032169178, 0.022459356, -0.012175933, -0.009438608, 0.027234165,
+      0.013514767, -0.008831029, 0.010888894, 0.004518216, 0.009855367,
+      0.012112431, -0.0073178695, 0.0072642234, 4.877679e-5, -0.01221576,
+      0.023542404, -0.009026452, -0.055442516, 0.006579068, 0.033202186,
+      -0.007669379, 0.0010604112, -0.04271919, -0.029112164, 0.021844024,
+      0.029739635, -0.026083348, 0.008940292, -0.039301652, -0.047215454,
+      0.0018794702, -0.008740231, 0.029195482, 0.0037629968, -0.024923965,
+      -0.021407248, 0.009952853, -0.0055059143, 0.0044912454, 0.016966008,
+      -0.00081178773, -0.022250004, -0.014063889, -0.006170697, -0.0008208651,
+      -0.036218595, -0.0029040456, 0.03943083, -0.021814227, 0.017567957,
+      0.035849728, -0.049075987, 0.0040634805, 0.009878297, 0.028557112,
+      0.02336673, 0.010714448, 0.020129073, -0.030503238, 0.009350441,
+      0.039086528, -0.0037483997, -0.0034365985, 0.019824414, 0.014027232,
+      0.030565958, 0.0036307913, 0.0030920429, -0.009908996, 0.0027933475,
+      -7.140754e-5, -0.027733125, 0.0022445584, -0.032248124, 0.050226185,
+      0.030529078, -0.040353864, 0.031086015, -0.0063569676, 0.031343475,
+      -0.020244656, -0.011442288, 0.018035123, -0.005479394, 0.01783419,
+      -0.036066547, -0.0106600635, 0.044636995, -0.030209303, -0.07192714,
+      0.0128155155, 0.003505818, -0.0005725083, -0.01584388, -0.025725754,
+      0.025868604, 0.10576061, -0.012738124, 0.0012224225, 0.0472961,
+      0.021650923, 0.0061313445, 0.014010678, 0.016864019, 0.004049639,
+      0.10989465, 0.011927816, 0.013589654, 0.011258818, 0.022496557,
+      -0.018828733, 0.021635532, 0.0116777215, 0.11320542, -0.0011280471,
+      0.018990291, 0.001824643, -0.03793715, 0.0206918, -0.0050228164,
+      -0.013865701, 0.022277884, 0.019400347, 0.028610364, -0.023974052,
+      0.0030309444, 0.027177742, 0.024541136, 0.023737634, 0.0012539584,
+      0.0187086, -0.015451178, 0.015066189, -0.019812824, 0.050507285,
+      -0.0021846944, 0.041644007, -0.0070109894, -0.014599777, -0.05985813,
+      -0.036328156, -0.02293525, -0.0065515027, 0.016792618, -0.0059018973,
+      -0.008319917, 0.008072106, 0.0073447954, -0.052924518, -0.037344936,
+      0.015524772, -0.0012835241, 0.014405327, 0.0057144985, 0.004945561,
+      -0.024654018, 0.011967616, 0.01832056, 0.019411784, 0.019788045,
+      -0.0006405928, -0.0015148119, -0.05064218, -0.031875107, -0.03803604,
+      -0.0096240705, 0.012371131, -0.019090319, 0.0075365147, -0.024229601,
+      0.014469528, -0.004786435, 0.0011314518, 0.009256282, -0.04957284,
+      -0.0068631344, -0.010091242, -0.023295002, 0.03268865, 0.022269772,
+      0.037733294, -0.015292435, -0.06330943, -0.00854154, 0.0027765913,
+      0.0015374947, 0.0377278, 0.008772586, -0.01810512, -0.0025668603,
+      0.014428339, 0.0027927365, 0.07493676, -0.022829408, -0.028912589,
+      0.008928177, 0.011323267, 0.008405796, 0.016925976, 0.001739356,
+      -0.021090876, -0.0062678503, 0.010898773, -0.010470923, 0.015523946,
+      -0.027888289, 0.023872118, -0.048326068, 0.025968319, 0.0047795917,
+      -0.016123952, 0.00698612, -0.05154045, -0.003691712, -0.0101406425,
+      -0.0241034, 0.004006022, 0.0021649078, 0.0019942294, -0.009274028,
+      -0.006467623, -0.0010948133, -0.012350769, -0.0060371486, -0.0006392645,
+      0.031422533, 0.015165475, -0.012650007, -0.005918423, 0.005781174,
+      -0.023262534, -0.0043034274, -0.010881872, -0.015937665, -0.0043740096,
+      -0.02981798, -0.0037422387, -0.029688178, 0.022320364, -0.0014900378,
+      -0.026122924, -0.04360404, 0.016354023, -0.02447563, 0.0205314,
+      0.0042775236, -0.020184014, -0.0017819501, 0.036122557, 0.0036566693,
+      0.07459051, -0.0035548757, 0.004874807, -0.028627345, -0.023153499,
+      0.03710664, -0.000639956, -0.030509725, -0.005146651, -0.010251552,
+      0.028408762, -0.008056198, -0.018420909, 0.02850364, -0.0075958185,
+      -0.008918139, 0.002778187, 0.06220242, -0.016280292, -0.026200369,
+      0.05900717, -0.013802131, 0.005442568, -0.033114687, 0.010976371,
+      0.008192846, 0.0031891295, 0.024811232, 0.009066575, -0.026441244,
+      0.030676885, -0.014591597, -0.024314625, -0.037472498, -0.015021544,
+      -0.016501956, -0.0069196, 0.013831272, 0.056646723, 0.007946148,
+      -0.002477574, -0.030496774, -0.011770325, 0.06742689, -0.03180974,
+      -0.025615396
+    ],
+    "text": [
+      -0.0040579583, -0.00084722764, -0.008696951, -0.006850008, -0.010870523,
+      0.014495447, -0.010678498, -0.09618138, 0.016697474, -0.014809047,
+      -0.0035991871, 0.020752821, 0.0020757387, 0.0018064519, 0.02969283,
+      -0.0040159826, 0.02335311, 0.015918557, -0.0015919582, 0.013545261,
+      0.011341818, -0.006991808, 0.0020565446, -0.016662853, -0.0064206184,
+      0.011536576, -0.01144098, 0.015054818, -0.013258694, 0.0046747606,
+      -0.00681864, -0.012852865, 0.012708946, 0.006093663, 0.0029938417,
+      0.015458671, 0.0040865405, -0.0004354532, 0.0037405093, -0.015085074,
+      0.0007998808, -0.021485215, -0.0066235093, 0.015721628, -0.002462181,
+      -0.0049815965, -0.011028703, 0.0041498104, -0.00070322485, 0.0031991813,
+      -0.0075132507, -0.008273014, 0.0125206, 0.019671565, 0.02124969,
+      0.0076838327, 0.0015366874, 0.0004413452, -0.0027475145, 0.031049952,
+      0.01782742, -0.01759819, 0.0040917504, -0.011803108, 0.0051114787,
+      -0.0075210207, -0.0062834355, 0.010283767, 0.02023746, -0.020258851,
+      0.004795256, -0.011313993, -0.014636256, 0.004900588, -0.0026439666,
+      -0.0004062344, -0.040196564, -0.014539185, 0.014600707, 0.004162044,
+      -0.0155277, -0.016300475, 0.0039491425, 0.022403471, 0.0055926195,
+      0.03543051, 0.01047029, -0.03262415, 0.02942466, -0.011440199,
+      -0.012713757, 0.0062652803, -0.15837249, 0.028388312, 7.452041e-5,
+      0.007136255, -0.010753105, 0.016393846, 0.004432782, -0.010688704,
+      0.015814407, 0.01654759, 0.0008900756, -0.007162077, -6.0264443e-5,
+      6.894444e-5, -0.034889467, -0.026710762, 0.005202752, -0.012675916,
+      0.010903986, 0.017916093, 0.005404525, -0.003624909, -0.012261727,
+      -0.021104869, -0.01593513, -0.009227664, -0.022844192, 0.008606035,
+      0.0013098373, 0.00637147, 0.0027185818, -0.0032128745, 0.018255865,
+      -0.002964337, -0.006976183, -0.0063263937, 0.0075582284, 0.014295236,
+      0.00485664, -0.029005948, -0.014672015, 0.61821824, -0.010903263,
+      0.022125997, -0.018154604, -0.007414954, -0.012344926, 0.0029751004,
+      -0.010787629, -0.0056861844, -0.025484746, -0.0004887071, -0.036681578,
+      0.010114145, -0.009012449, -0.00048479583, -0.011162415, -0.0057421126,
+      -0.0019520421, -0.0013580753, 0.0037870558, 0.0012404326, -0.00089634134,
+      -0.022112457, 0.0034537334, 0.014985147, -0.010455136, 0.018100852,
+      -0.010999219, -0.027524924, -0.009551776, -0.0047603208, -0.001092369,
+      0.008849578, 0.021949856, -0.034437556, -0.0051499153, -0.0006772509,
+      -0.011200381, -0.009206776, 0.021897016, -0.0013931778, 0.0041396013,
+      -0.025534542, 0.0074160174, 0.00039215147, 0.025992293, 0.0069832364,
+      -0.0175616, 0.01272807, -0.020147255, 0.02455081, -0.01236127,
+      -0.011840565, 0.011820177, 0.018173985, 0.017230362, -0.016969377,
+      0.0010091222, -0.04185319, -0.030467693, -0.012564729, 0.030740628,
+      -0.004086395, 0.0013223978, -0.0013041743, -0.01975558, -0.014959637,
+      0.033446018, -0.014724724, -0.028613493, 0.010436393, -0.009841343,
+      -0.013723956, -0.0010025625, 0.016992576, 0.0056477, -0.026704773,
+      0.018927934, 0.013758461, 0.016924908, -0.026889605, -0.01496036,
+      0.02078507, -0.0149594685, -0.021289647, 0.027369255, -0.00205557,
+      0.0129268635, -0.014446633, 0.0039108247, 0.014774828, 0.004396043,
+      0.0038431762, 0.012223014, 0.0061016707, -0.006525442, -0.018426975,
+      0.03081795, 0.024269402, -0.020132616, -0.008118887, 0.025062446,
+      -0.0033954307, -0.019662865, -0.0032548332, -0.008575233, -0.003158561,
+      0.0012930515, 0.02213235, 0.017646195, -0.016638828, -0.0154889,
+      0.031743307, 0.001081875, -0.0019133464, 0.034760594, -0.008569126,
+      -0.019119555, 0.020908207, -0.0047135833, 0.00984879, -0.016712308,
+      0.028532412, 0.0038664932, -0.0071539935, 0.0013488994, 0.0060503725,
+      0.0021401793, -0.032594826, 0.010918716, 0.0075080344, 0.00020341178,
+      -0.030393362, 0.014375046, 0.018798219, -0.0040685013, 0.020957684,
+      -0.012454064, 0.014856742, 0.0017268835, -0.008762698, 0.007062434,
+      0.024501909, 0.0011791736, -0.023002177, -0.012701125, -0.0053904364,
+      0.015551624, 0.018748082, 0.00704452, 0.0047835982, 0.0013530678,
+      0.0033350172, 0.0056562345, 0.009079597, 0.0059383595, 0.011405316,
+      -0.004795079, 0.007274586, -0.0011659514, -0.0001364172, -0.0050517535,
+      -0.010681983, -0.023743946, -0.020241234, 0.0009631201, 0.014073974,
+      -0.00665422, 0.011845411, -0.0001289105, -0.024006248, -0.0009306585,
+      0.0139923245, 0.020409467, -0.017118154, -0.0151973255, -0.016737074,
+      -0.002157259, -0.0060298163, 0.61768156, 0.01275426, -0.023746304,
+      -0.010786622, -0.00050265377, -0.010761652, -0.013012264, 0.013700237,
+      0.019240098, 0.049388826, -0.007853694, -0.014966961, -0.026477624,
+      0.02809707, 0.009291939, -0.028884491, 0.015102742, -0.27225503,
+      0.01782282, -0.016989257, -0.0051341387, 0.0037056766, -0.004537146,
+      -0.026184445, 0.020256622, 0.0073146136, -0.0070027146, -0.009025792,
+      -0.015298917, -0.0052380697, -0.0005596046, -0.0041900063, 0.015934054,
+      0.008158574, -0.0038807616, -0.0048019756, 0.0061978237, 0.022159556,
+      -0.02619826, 0.0013973896, -0.00012341494, 0.030957809, -0.009596324,
+      0.008263321, 0.0017040323, -0.0010236687, 0.017982712, -0.012567677,
+      0.007361281, 0.0028631007, 0.032613713, 0.035072606, -0.045417674,
+      0.016303446, -0.009096281, 0.012163677, -0.008316459, 0.006423764,
+      -0.008586175, -0.0009862242, 0.009973197, 0.020825483, -0.005682246,
+      0.0066081304, 0.0061441967, -0.00670868, -0.024878936, -0.024288971,
+      -0.009822955, -0.011659227, -0.0067634145, -0.0011930552, 0.017096667,
+      0.01974797, -0.020388834, -0.008245143, -0.0071634515, 0.0012492571,
+      -0.010288493, -0.0025248309, -0.0039965925, 0.037344053, -0.019459987,
+      0.022098366, -0.021084892, -0.014823354, -0.010007409, -0.005560381,
+      -0.012292843, 0.0132691385, 0.0066421456, 0.0045196814, 0.0044144704,
+      0.0062646614, 0.0050272197, 0.020296281, 0.011412983, -0.0040745772,
+      0.00542041, 0.0021500897, 0.005183101, 0.00985178, 0.014477596,
+      -0.0131016085, -0.0064126155, -0.004809687, -0.016441243, 0.010445765,
+      0.0013761928, -0.0135576585, 0.0003352349, -0.010797083, -0.0058007324,
+      0.021649584, 0.012650062, -0.009740497, -0.025809184, -0.026720846,
+      0.029149767, 0.014593344, -0.0134959705, -0.004710099, -0.0062580137,
+      -0.0047687683, -0.029818097, -0.004622532, 0.02532894, 0.0051457905,
+      -0.0046252706, -0.02905562, -0.019097809, -0.035888474, -0.006897086,
+      -0.0035953831, -0.0013759647, 0.0027531807, -0.002395984, -0.040570017,
+      -0.02462688, 0.009387292, 0.0025142033, 0.02404064, -0.0014443685,
+      1.0727288e-5, -0.024033979, 0.011659959, -0.016820917, 0.018782362,
+      -0.019061793, 0.0043488434, 0.00040266776, -0.0022744886, 0.0024185092,
+      -0.0041366024, 0.0028075825, -0.0085624885, -0.012087987, 0.013551666,
+      0.0019014167, 0.007896904, 0.031102024, 0.0091334, -0.0030707342,
+      0.0066130627, -0.002711352, -0.012097188, 0.017067473, 0.021030908,
+      0.0014250687, -0.092848144, -0.0034704215, 0.013624546, 0.013779425,
+      -0.0025326884, -0.0018633928, 0.00014903376, 0.01547092, 0.008385425,
+      -0.0033495796, -0.015248458, -0.0356735, 0.005223496, -0.018293105,
+      -0.043073945, 0.016345823, -0.0050947615, 0.023554962, 0.034400985,
+      0.0045644785, -0.00011241743, 0.0060564913, 0.0021182992, 0.01914424,
+      0.019295372, -0.00551076, -0.00017086207, 0.0032044165, 0.010140755,
+      -0.022354674, 0.026089797
+    ]
+  },
+  "facial-recognition": [
+    {
+      "boundingBox": {
+        "x1": 690.0,
+        "y1": -89.0,
+        "x2": 833.0,
+        "y2": 96.0
+      },
+      "score": 0.03575617074966431,
+      "embedding": [
+        -0.43665668, -0.59305364, -0.12699714, 0.3985032, 0.1878969,
+        -0.25987914, 0.14818184, -0.542229, -0.06710237, -0.1319032,
+        0.056408346, 0.046093762, -0.14984925, 0.043225512, 0.023826078,
+        -0.09063442, 0.07891726, -0.29357076, -0.6277133, -0.29042292,
+        0.18038993, 0.21837695, 0.17909442, -0.040304773, -0.035560638,
+        -0.07568607, 0.1277122, -0.13466191, -0.2368693, 0.3642968, 0.29558533,
+        0.20867407, 0.11252518, 0.47691494, -0.054775044, 0.030100197,
+        -0.049531147, 0.04045874, 0.23517768, 0.17130391, 0.17269331,
+        0.08591308, 0.046999797, -0.17151847, -0.2443775, 0.3110528,
+        -0.23971468, -0.31744513, -0.026422635, -0.26203394, -0.18553479,
+        -0.31044272, 0.6385251, 0.27497086, 0.006674953, 0.053785797,
+        -0.20257844, -0.48399794, 0.21708605, -0.4781224, -0.12367296,
+        -0.099010885, 0.18633766, 0.31143454, -0.12165704, 0.13010044,
+        0.12534627, 0.107288495, 0.37471777, -0.123026475, -0.1263274,
+        -0.15621608, 0.26027548, 0.15841314, 0.5164254, -0.31015784, 0.24754328,
+        0.10240883, -0.1181829, -0.14073256, 0.027111322, 0.09927598,
+        -0.10066943, 0.4808423, -0.042361684, -0.08512197, 0.13695274,
+        0.30378994, 0.11138052, -0.318214, -0.5708592, -0.14786953, 0.49985552,
+        -0.23231967, 0.13856675, -0.5383139, -0.059954256, 0.2796868,
+        -0.32447946, 0.16510965, 0.57146084, -0.15120608, 0.20110571,
+        -0.49805385, -0.2008879, -0.046678245, 0.24653266, 0.022508677,
+        -0.14091778, 0.38075653, 0.33811444, 0.05011098, -0.2371835,
+        -0.20052075, -0.14081016, -0.3422103, 0.11998144, 0.24423985,
+        0.13769919, -0.25340003, -0.41080874, -0.28673622, -0.20673269,
+        0.4604351, 0.4178845, 0.105202496, -0.1446912, 0.0807363, -0.37372503,
+        0.13030809, -0.08456054, 0.21937889, -0.22700784, 0.3039499,
+        0.009784861, -0.07245704, 0.50291365, -0.24968931, 0.3178813,
+        0.12665558, -0.036484346, 0.21702805, -0.09277919, 0.17766781,
+        -0.12018812, 0.008044228, -0.26986086, 0.29888278, -0.28485933,
+        0.30066437, -0.14316985, 0.53800535, 0.030840248, 0.023039162,
+        0.73862207, 0.0034680888, 0.23797399, -0.11183337, 0.067846656,
+        -0.23546576, 0.39354736, 0.0053778216, 0.13494004, 0.1370637,
+        -0.029445097, 0.14705376, -0.48120612, 0.27262342, -0.05196667,
+        -0.3097266, 0.08714986, 0.10841283, -0.11757159, -0.5010461,
+        -0.32369986, -0.21964747, -0.19810468, 0.14780998, -0.04624281,
+        0.24638015, -0.06710279, -0.31719172, 0.26955876, 0.37117082,
+        -0.3964724, 0.21541706, -0.12243534, -0.5392555, 0.04640211, 0.3657012,
+        -0.042127043, -0.030638859, 0.21909437, 0.16005577, -0.03320134,
+        -0.0949998, 0.33176076, 0.22538322, -0.016216129, -0.42417043,
+        0.52940613, -0.011592716, -0.21875188, -0.06394625, 0.24449442,
+        -0.05658462, -0.09727913, -0.3978734, -0.11175068, 0.085142605,
+        -0.057618782, -0.0498557, 0.17287247, 0.41813853, -0.30433404,
+        0.3087585, -0.6604493, -0.13869359, 0.072916515, -0.043251924,
+        0.37401634, 0.17014223, -0.26469553, -0.34653437, 0.13010754,
+        0.21517499, 0.74030113, 0.3460628, -0.5115478, 0.4696753, -0.009848075,
+        -0.1330159, -0.0061842054, 0.013667986, 0.16993025, -0.3161455,
+        0.29015008, 0.65197945, 0.13776428, 0.5275149, 0.1472181, -0.114682674,
+        -0.05685012, 0.21696919, -0.34107065, 0.09352806, -0.03968816,
+        -0.13109599, 0.07406853, 0.15091223, 0.18835881, 0.19146737, -0.3898828,
+        0.469747, -0.11145213, 0.039727956, 0.8268787, -0.09761663,
+        -0.043320894, 0.27001414, 0.12079324, 0.05877747, 0.028245524,
+        0.20692128, 0.68440485, -0.34984088, -0.119763374, -0.39637753,
+        0.23799005, 0.057573274, 0.07855352, 0.37982583, -0.0365879,
+        0.068318695, 0.10845077, -0.18650186, 0.08927679, -0.27789003,
+        0.31810492, 0.4251458, -0.03525705, -0.28072172, 0.07316002, 0.13499324,
+        -0.11333761, -0.0008841604, 0.10874095, 0.29681873, 0.008288942,
+        0.24116173, 0.011309357, -0.3009541, -0.4752865, 0.19921738,
+        -0.16108191, 0.017838746, 0.51260126, -0.086799264, 0.34165853,
+        0.32359147, 0.25770876, 0.21442738, -0.15971375, -0.26682994,
+        0.22788364, -0.38956794, 0.084580205, -0.15929273, 0.24211408,
+        -0.24793725, -0.31528267, 0.15945697, -0.16866091, 0.19472758, 0.408394,
+        0.24238603, -0.23643477, 0.29852632, 0.12915722, 0.327068, 0.501809,
+        -0.40538347, -0.023235738, -0.11315605, 0.007632144, -0.22626217,
+        0.28817925, -0.5816528, 0.1551521, -0.016097836, -0.01634605,
+        0.095855944, -0.010664792, 0.1402924, -0.22450349, -0.13961065,
+        -0.40732136, -0.24776831, 0.12040292, -0.06779129, 0.44510496,
+        0.33206633, 0.19807269, -0.06460787, -0.2524265, -0.12726343,
+        0.44656014, -0.09844789, -0.18762295, 0.16189753, 0.23589599,
+        -0.44798508, 0.2135099, -0.33205217, 0.28407755, -0.0951985,
+        0.035582896, -0.51807857, -0.27382392, 0.03172898, 0.22928514,
+        0.47157723, -0.48383215, 0.014225766, -0.08102345, 0.19384615,
+        -0.060681015, -0.037799604, -0.2875836, 0.024652202, 0.052712113,
+        -0.22610298, 0.46830428, 0.29616976, 0.14641494, -0.24234764,
+        0.30126396, -0.011165038, 0.38622355, -0.12484505, 0.33650652,
+        0.17399745, -0.2703057, 0.36919123, 0.26170117, -0.1537327, -0.43157104,
+        0.35697, 0.043892622, -0.065475196, 0.5542902, 0.019970104, 0.43128124,
+        -0.014292087, -0.33983213, 0.3250854, 0.21585244, -0.34458104,
+        0.23752448, 0.18115376, -0.2586738, -0.16033548, -0.16151018,
+        -0.23306333, 0.14865296, -0.31790328, 0.27215546, -0.059920013,
+        0.16193654, 0.075943366, -0.16281635, 0.4489306, -0.43052202,
+        -0.038787995, -0.11722573, 0.07254093, -0.2997051, 0.16540596,
+        -0.15089649, 0.12507877, 0.43725327, 0.13540109, 0.13391787,
+        0.013777234, 0.26951605, -0.2999856, -0.08645636, 0.12768297,
+        0.23375636, -0.07325045, -0.04433371, 0.04709586, 0.09582621,
+        0.23509142, 0.18061984, 0.35379466, 0.12938409, 0.33010754, 0.18966632,
+        0.07585195, 0.0059688687, -0.13233723, 0.17105722, -0.020040989,
+        -0.2805646, -0.091034755, 0.1950869, -0.21115655, -0.16249251,
+        0.07147664, -0.20138165, 0.15193966, 0.041464765, 0.01074836,
+        0.029091328, -0.22078216, 0.06446775, -0.27403125, -0.51904315,
+        -0.20539844, 0.176225, -0.28688902, 0.030568387, 0.2964594,
+        -0.088931546, -0.4425866, 0.09070322, 0.08005672, 0.009866249,
+        -0.07386999, 0.06683251, -0.34370828, 0.23668535, -0.0847823,
+        -0.27400133, -0.31668398, -0.116622224, 0.20027944, 0.33772525,
+        -0.3041445, -0.61801887, 0.043022886, -0.24733649, -0.20657904,
+        -0.37058303, 0.00644885, 0.2548513, 0.029221226, -0.41749227,
+        0.065117866, -0.3745206, 0.22699282, 0.22139677, 0.28097618, 0.10008535,
+        -0.039953396, -0.33505437, 0.28511694, 0.18131426, -0.879614,
+        -0.041319087, -0.62370497, 0.05170501, 0.23541749, -0.0033701807,
+        0.15842043, 0.020002551, -0.22027364, -0.2730838, -0.23035137,
+        -0.077056274, 0.002099529
+      ]
+    }
+  ],
+  "imageWidth": 600,
+  "imageHeight": 800
+}
--- a/machine-learning/scripts/configure-apt.sh
+++ b/machine-learning/scripts/configure-apt.sh
@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+set -e
+
+sed -i -e's/ main/ main contrib non-free non-free-firmware/g' /etc/apt/sources.list.d/debian.sources
+sed -i -e's/ bookworm-updates/ bookworm-updates sid/g' /etc/apt/sources.list.d/debian.sources
+
+# default priority is 500, so we set unstable to 450 to prefer stable packages
+cat > /etc/apt/preferences.d/preferences << EOL
+Package: *
+Pin: release a=unstable
+Pin-Priority: 450
+EOL
--- a/machine-learning/scripts/healthcheck.py
+++ b/machine-learning/scripts/healthcheck.py
@ -0,0 +1,27 @@
+import os
+import sys
+from ipaddress import ip_address
+
+import requests
+
+port = os.getenv("IMMICH_PORT", 3003)
+host = os.getenv("IMMICH_HOST", "0.0.0.0")
+
+
+def is_ipv6(host: str) -> bool:
+    try:
+        return ip_address(host).version == 6
+    except ValueError:
+        return False
+
+
+host = "localhost" if host == "0.0.0.0" else host
+host = f"[{host}]" if is_ipv6(host) else host
+
+try:
+    response = requests.get(f"http://{host}:{port}/ping", timeout=2)
+    if response.status_code == 200:
+        sys.exit(0)
+    sys.exit(1)
+except requests.RequestException:
+    sys.exit(1)
--- a/machine-learning/test_main.py
+++ b/machine-learning/test_main.py
--- a/machine-learning/uv.lock
+++ b/machine-learning/uv.lock