Source Code added

This commit is contained in:
Fr4nz D13trich 2026-02-02 15:06:40 +01:00
parent 800376eafd
commit 9efa9bc6dd
3912 changed files with 754770 additions and 2 deletions

View file

@ -0,0 +1,3 @@
venv/
*.zip
*.onnx

198
machine-learning/.gitignore vendored Normal file
View file

@ -0,0 +1,198 @@
*.zip
*.onnx
*.rknn
*.npy
*_attr__value
*.weight
*.bias
onnx__*
*in_proj_bias
*.proj
*.latent
*.pos_embed
vocab.txt
export/immich_model_exporter/models/**/README.md
export/**/results/*.json
export/**/root
*.armnn
tokenizer.json
tokenizer_config.json
special_tokens_map.json
preprocess_cfg.json
config.json
merges.txt
vocab.json
upload/
venv/
__pycache__/
model-cache/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
.idea/
# VS Code
.vscode
*.onnx
*.zip
core

206
machine-learning/Dockerfile Normal file
View file

@ -0,0 +1,206 @@
ARG DEVICE=cpu
FROM python:3.11-bookworm@sha256:667cf70698924920f29ebdb8d749ab665811503b87093d4f11826d114fd7255e AS builder-cpu
FROM python:3.13-slim-trixie@sha256:0222b795db95bf7412cede36ab46a266cfb31f632e64051aac9806dabf840a61 AS builder-openvino
FROM builder-cpu AS builder-cuda
FROM builder-cpu AS builder-armnn
# renovate: datasource=github-releases depName=ARM-software/armnn
ARG ARMNN_VERSION="v24.05"
ENV ARMNN_PATH=/opt/armnn
COPY ann /opt/ann
RUN mkdir /opt/armnn && \
curl -SL "https://github.com/ARM-software/armnn/releases/download/${ARMNN_VERSION}/ArmNN-linux-aarch64.tar.gz" | tar -zx -C /opt/armnn && \
cd /opt/ann && \
sh build.sh
FROM builder-cpu AS builder-rknn
# Warning: 25GiB+ disk space required to pull this image
# TODO: find a way to reduce the image size
FROM rocm/dev-ubuntu-24.04:6.4.4-complete@sha256:31418ac10a3769a71eaef330c07280d1d999d7074621339b8f93c484c35f6078 AS builder-rocm
# renovate: datasource=github-releases depName=Microsoft/onnxruntime
ARG ONNXRUNTIME_VERSION="v1.22.1"
WORKDIR /code
RUN apt-get update && apt-get install -y --no-install-recommends wget git
RUN wget -nv https://github.com/Kitware/CMake/releases/download/v3.31.9/cmake-3.31.9-linux-x86_64.sh && \
chmod +x cmake-3.31.9-linux-x86_64.sh && \
mkdir -p /code/cmake-3.31.9-linux-x86_64 && \
./cmake-3.31.9-linux-x86_64.sh --skip-license --prefix=/code/cmake-3.31.9-linux-x86_64 && \
rm cmake-3.31.9-linux-x86_64.sh
RUN git clone --single-branch --branch "${ONNXRUNTIME_VERSION}" --recursive "https://github.com/Microsoft/onnxruntime" onnxruntime
WORKDIR /code/onnxruntime
# Fix for multi-threading based on comments in https://github.com/microsoft/onnxruntime/pull/19567
# TODO: find a way to fix this without disabling algo caching
COPY ./patches/* /tmp/
RUN git apply /tmp/*.patch
RUN /bin/sh ./dockerfiles/scripts/install_common_deps.sh
ENV PATH=/opt/rocm-venv/bin:/code/cmake-3.31.9-linux-x86_64/bin:${PATH}
ENV CCACHE_DIR="/ccache"
# Note: the `parallel` setting uses a substantial amount of RAM
RUN --mount=type=cache,target=/ccache \
./build.sh \
--allow_running_as_root \
--config Release \
--build_wheel \
--update \
--build \
--parallel 17 \
--cmake_extra_defines \
ONNXRUNTIME_VERSION="${ONNXRUNTIME_VERSION}" \
CMAKE_HIP_ARCHITECTURES="gfx900;gfx906;gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" \
--skip_tests \
--use_rocm \
--rocm_home=/opt/rocm \
--use_cache \
--compile_no_warning_as_error
RUN mv /code/onnxruntime/build/Linux/Release/dist/*.whl /opt/
FROM builder-${DEVICE} AS builder
ARG DEVICE
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
VIRTUAL_ENV=/opt/venv
RUN apt-get update && apt-get install -y --no-install-recommends g++
COPY --from=ghcr.io/astral-sh/uv:0.8.15@sha256:a5727064a0de127bdb7c9d3c1383f3a9ac307d9f2d8a391edc7896c54289ced0 /uv /uvx /bin/
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=bind,source=uv.lock,target=uv.lock \
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
uv sync --frozen --extra ${DEVICE} --no-dev --no-editable --no-install-project --compile-bytecode --no-progress --active --link-mode copy
RUN if [ "$DEVICE" = "rocm" ]; then \
uv pip install /opt/onnxruntime_rocm-*.whl; \
fi
FROM python:3.11-slim-bookworm@sha256:917ec0e42cd6af87657a768449c2f604a6b67c7ab8e10ff917b8724799f816d3 AS prod-cpu
ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
MACHINE_LEARNING_MODEL_ARENA=false
FROM python:3.13-slim-trixie@sha256:0222b795db95bf7412cede36ab46a266cfb31f632e64051aac9806dabf840a61 AS prod-openvino
RUN apt-get update && \
apt-get install --no-install-recommends -yqq ocl-icd-libopencl1 wget && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/v2.27.10/intel-igc-core-2_2.27.10+20617_amd64.deb && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/v2.27.10/intel-igc-opencl-2_2.27.10+20617_amd64.deb && \
wget -nv https://github.com/intel/compute-runtime/releases/download/26.01.36711.4/intel-opencl-icd_26.01.36711.4-0_amd64.deb && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.24/intel-igc-core_1.0.17537.24_amd64.deb && \
wget -nv https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.17537.24/intel-igc-opencl_1.0.17537.24_amd64.deb && \
wget -nv https://github.com/intel/compute-runtime/releases/download/24.35.30872.36/intel-opencl-icd-legacy1_24.35.30872.36_amd64.deb && \
# TODO: Figure out how to get renovate to manage this differently versioned libigdgmm file
wget -nv https://github.com/intel/compute-runtime/releases/download/26.01.36711.4/libigdgmm12_22.9.0_amd64.deb && \
dpkg -i *.deb && \
rm *.deb && \
apt-get remove wget -yqq && \
rm -rf /var/lib/apt/lists/*
FROM nvidia/cuda:12.2.2-runtime-ubuntu22.04@sha256:94c1577b2cd9dd6c0312dc04dff9cb2fdce2b268018abc3d7c2dbcacf1155000 AS prod-cuda
ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
MACHINE_LEARNING_MODEL_ARENA=false
RUN apt-get update && \
# Pascal support was dropped in 9.11
apt-get install --no-install-recommends -yqq libcudnn9-cuda-12=9.10.2.21-1 && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
COPY --from=builder-cuda /usr/local/bin/python3 /usr/local/bin/python3
COPY --from=builder-cuda /usr/local/lib/python3.11 /usr/local/lib/python3.11
COPY --from=builder-cuda /usr/local/lib/libpython3.11.so /usr/local/lib/libpython3.11.so
FROM rocm/dev-ubuntu-24.04:6.4.4-complete@sha256:31418ac10a3769a71eaef330c07280d1d999d7074621339b8f93c484c35f6078 AS prod-rocm
FROM prod-cpu AS prod-armnn
ENV LD_LIBRARY_PATH=/opt/armnn \
LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
MACHINE_LEARNING_MODEL_ARENA=false
RUN apt-get update && apt-get install -y --no-install-recommends ocl-icd-libopencl1 mesa-opencl-icd libgomp1 && \
rm -rf /var/lib/apt/lists/* && \
mkdir --parents /etc/OpenCL/vendors && \
echo "/usr/lib/libmali.so" > /etc/OpenCL/vendors/mali.icd && \
mkdir /opt/armnn
COPY --from=builder-armnn \
/opt/armnn/libarmnn.so.?? \
/opt/armnn/libarmnnOnnxParser.so.?? \
/opt/armnn/libarmnnDeserializer.so.?? \
/opt/armnn/libarmnnTfLiteParser.so.?? \
/opt/armnn/libprotobuf.so.?.??.?.? \
/opt/ann/libann.s[o] \
/opt/ann/build.sh \
/opt/armnn/
FROM prod-cpu AS prod-rknn
# renovate: datasource=github-tags depName=airockchip/rknn-toolkit2
ARG RKNN_TOOLKIT_VERSION="v2.3.0"
ENV LD_PRELOAD=/usr/lib/libmimalloc.so.2 \
MACHINE_LEARNING_MODEL_ARENA=false
ADD --checksum=sha256:73993ed4b440460825f21611731564503cc1d5a0c123746477da6cd574f34885 "https://github.com/airockchip/rknn-toolkit2/raw/refs/tags/${RKNN_TOOLKIT_VERSION}/rknpu2/runtime/Linux/librknn_api/aarch64/librknnrt.so" /usr/lib/
FROM prod-${DEVICE} AS prod
ARG DEVICE
RUN apt-get update && \
apt-get install -y --no-install-recommends tini ccache libgl1 libglib2.0-0 libgomp1 $(if ! [ "$DEVICE" = "openvino" ] && ! [ "$DEVICE" = "rocm" ]; then echo "libmimalloc2.0"; fi) && \
apt-get autoremove -yqq && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*
RUN ln -s "/usr/lib/$(arch)-linux-gnu/libmimalloc.so.2" /usr/lib/libmimalloc.so.2
WORKDIR /usr/src
ENV TRANSFORMERS_CACHE=/cache \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PATH="/opt/venv/bin:$PATH" \
PYTHONPATH=/usr/src \
DEVICE=${DEVICE} \
VIRTUAL_ENV=/opt/venv \
MACHINE_LEARNING_CACHE_FOLDER=/cache
# prevent core dumps
RUN echo "hard core 0" >> /etc/security/limits.conf && \
echo "fs.suid_dumpable 0" >> /etc/sysctl.conf && \
echo 'ulimit -S -c 0 > /dev/null 2>&1' >> /etc/profile
COPY --from=builder /opt/venv /opt/venv
COPY scripts/healthcheck.py .
COPY immich_ml immich_ml
ARG BUILD_ID
ARG BUILD_IMAGE
ARG BUILD_SOURCE_REF
ARG BUILD_SOURCE_COMMIT
ENV IMMICH_BUILD=${BUILD_ID}
ENV IMMICH_BUILD_URL=https://github.com/immich-app/immich/actions/runs/${BUILD_ID}
ENV IMMICH_BUILD_IMAGE=${BUILD_IMAGE}
ENV IMMICH_BUILD_IMAGE_URL=https://github.com/immich-app/immich/pkgs/container/immich-machine-learning
ENV IMMICH_REPOSITORY=immich-app/immich
ENV IMMICH_REPOSITORY_URL=https://github.com/immich-app/immich
ENV IMMICH_SOURCE_REF=${BUILD_SOURCE_REF}
ENV IMMICH_SOURCE_COMMIT=${BUILD_SOURCE_COMMIT}
ENV IMMICH_SOURCE_URL=https://github.com/immich-app/immich/commit/${BUILD_SOURCE_COMMIT}
ENTRYPOINT ["tini", "--"]
CMD ["python", "-m", "immich_ml"]
HEALTHCHECK CMD python3 healthcheck.py

View file

@ -0,0 +1,42 @@
# Immich Machine Learning
- CLIP embeddings
- Facial recognition
# Setup
This project uses [uv](https://docs.astral.sh/uv/getting-started/installation/), so be sure to install it first.
Running `uv sync --extra cpu` will install everything you need in an isolated virtual environment.
CUDA, ROCM and OpenVINO are supported as acceleration APIs. To use them, you can replace `--extra cpu` with either of `--extra cuda`, `--extra rocm` or `--extra openvino`. In the case of CUDA, a [compute capability](https://developer.nvidia.com/cuda-gpus) of 5.2 or higher is required.
To add or remove dependencies, you can use the commands `uv add $PACKAGE_NAME` and `uv remove $PACKAGE_NAME`, respectively.
Be sure to commit the `uv.lock` and `pyproject.toml` files with `uv lock` to reflect any changes in dependencies.
# Load Testing
To measure inference throughput and latency, you can use [Locust](https://locust.io/) using the provided `locustfile.py`.
Locust works by querying the model endpoints and aggregating their statistics, meaning the app must be deployed.
You can change the models or adjust options like score thresholds through the Locust UI.
To get started, you can simply run `locust --web-host 127.0.0.1` and open `localhost:8089` in a browser to access the UI. See the [Locust documentation](https://docs.locust.io/en/stable/index.html) for more info on running Locust.
Note that in Locust's jargon, concurrency is measured in `users`, and each user runs one task at a time. To achieve a particular per-endpoint concurrency, multiply that number by the number of endpoints to be queried. For example, if there are 3 endpoints and you want each of them to receive 8 requests at a time, you should set the number of users to 24.
# Facial Recognition
## Acknowledgements
This project utilizes facial recognition models from the [InsightFace](https://github.com/deepinsight/insightface/tree/master/model_zoo) project. We appreciate the work put into developing these models, which have been beneficial to the machine learning part of this project.
### Used Models
- antelopev2
- buffalo_l
- buffalo_m
- buffalo_s
## License and Use Restrictions
We have received permission to use the InsightFace facial recognition models in our project, as granted via email by Jia Guo (guojia@insightface.ai) on 18th March 2023. However, it's important to note that this permission does not extend to the redistribution or commercial use of their models by third parties. Users and developers interested in using these models should review the licensing terms provided in the InsightFace GitHub repository.
For more information on the capabilities of the InsightFace models and to ensure compliance with their license, please refer to their [official repository](https://github.com/deepinsight/insightface). Adhering to the specified licensing terms is crucial for the respectful and lawful use of their work.

View file

@ -0,0 +1,21 @@
# Immich Machine Learning
- Clasificación de imágenes
- Incorporación de CLIP
- Reconocimiento facial
# Configuración
Este proyecto utiliza [Poetry](https://python-poetry.org/docs/#installation), así que asegúrate de instalarlo primero.
Ejecutar `poetry install --no-root --with dev` instalará todo lo necesario en un entorno virtual aislado.
Para agregar o eliminar dependencias, puedes utilizar los comandos `poetry add $PACKAGE_NAME` y `poetry remove $PACKAGE_NAME`, respectivamente.
Asegúrate de hacer commit de los archivos `poetry.lock` y `pyproject.toml` para reflejar cualquier cambio en las dependencias.
# Pruebas de carga
Para medir la velocidad y latencia de inferencia, puedes utilizar [Locust](https://locust.io/) con el archivo `locustfile.py` proporcionado.
Locust funciona haciendo consultas a los puntos finales del modelo y agregando estadísticas, lo que significa que la aplicación debe estar desplegada.
Puedes ejecutar `load_test.sh` para implementar automáticamente la aplicación localmente e iniciar Locust, ajustando opcionalmente sus variables de entorno según sea necesario.
Alternativamente, para pruebas más personalizadas, también puedes ejecutar `locust` directamente: consulta la [documentación](https://docs.locust.io/en/stable/index.html). Ten en cuenta que, en la jerga de Locust, la concurrencia se mide en `usuarios`, y cada usuario ejecuta una tarea a la vez. Para lograr una concurrencia específica por punto final, multiplica ese número por la cantidad de puntos finales que se desean consultar. Por ejemplo, si hay 3 puntos finales y deseas que cada uno de ellos reciba 8 solicitudes al mismo tiempo, debes configurar el número de usuarios en 24.

View file

@ -0,0 +1,22 @@
# Immich Apprentissage machine
- Classification d'images
- Embarquement de CLIP
- Reconnaissance faciale
# Mise en place
Ce projet utilise [Poetry](https://python-poetry.org/docs/#installation), donc soyez certain de l'installer en premier.
Exécuter `poetry install --no-root --with dev` installera tout ce dont vous avez besoin dans un environnement virtuel isolé.
Pour ajouter ou supprimer des dépendances, vous pouvez utiliser les commandes `poetry add $PACKAGE_NAME` et `poetry remove $PACKAGE_NAME` respectivement.
Soyez sûr de commit les fichiers `poetry.lock` et `pyproject.toml` pour refléter les changements de dépendances.
# Test de charge
Pour mesurer le débit d'inférence et la latence, vous pouvez utiliser [Locust](https://locust.io/) avec le fichier fourni `locustfile.py`.
Locust fonctionne en interrogeant les endpoints des modèles et en aggrégeant leurs statistiques, signifiant que l'application doit être déployée.
Vous pouvez exécuter `load_test.sh` pour automatiquement déployer l'application localement et démarrer Locust, en ajustant si besoin ses variables d'environnement.
En alternative, pour réaliser plus de tests customisés, vous pourriez aussi exécuter `locust` directement : voir la [documentation](https://docs.locust.io/en/stable/index.html). Notez que dans le jargon de Locust, la concurrence est mesurée en `users` et que chaque user exécute une tâche après l'autre. Pour parvenir à une concurrence par endpoint, multipliez ce nombre par le nombre d'endpoints à interroger. Par exemple, s'il y a 3 endpoints et que vous voulez que chacun d'entre eux reçoive 8 requêtes à la fois, vous devrez mettre ce nombre d'users à 24.

View file

View file

@ -0,0 +1,310 @@
#include <fstream>
#include <mutex>
#include <atomic>
#include "armnn/IRuntime.hpp"
#include "armnn/INetwork.hpp"
#include "armnn/Types.hpp"
#include "armnnDeserializer/IDeserializer.hpp"
#include "armnnTfLiteParser/ITfLiteParser.hpp"
#include "armnnOnnxParser/IOnnxParser.hpp"
using namespace armnn;
struct IOInfos
{
std::vector<BindingPointInfo> inputInfos;
std::vector<BindingPointInfo> outputInfos;
};
// from https://rigtorp.se/spinlock/
struct SpinLock
{
std::atomic<bool> lock_ = {false};
void lock()
{
for (;;)
{
if (!lock_.exchange(true, std::memory_order_acquire))
{
break;
}
while (lock_.load(std::memory_order_relaxed))
;
}
}
void unlock() { lock_.store(false, std::memory_order_release); }
};
class Ann
{
public:
int load(const char *modelPath,
bool fastMath,
bool fp16,
bool saveCachedNetwork,
const char *cachedNetworkPath)
{
NetworkId netId = -2;
while (netId == -2)
{
try
{
netId = loadInternal(modelPath, fastMath, fp16, saveCachedNetwork, cachedNetworkPath);
}
catch (InvalidArgumentException e)
{
// fp16 models do not support the forced fp16-turbo (runtime fp32->fp16 conversion)
if (fp16)
fp16 = false;
else
netId = -1;
}
}
return netId;
}
void execute(NetworkId netId, const void **inputData, void **outputData)
{
spinLock.lock();
const IOInfos *infos = &ioInfos[netId];
auto m = mutexes[netId].get();
spinLock.unlock();
InputTensors inputTensors;
inputTensors.reserve(infos->inputInfos.size());
size_t i = 0;
for (const BindingPointInfo &info : infos->inputInfos)
inputTensors.emplace_back(info.first, ConstTensor(info.second, inputData[i++]));
OutputTensors outputTensors;
outputTensors.reserve(infos->outputInfos.size());
i = 0;
for (const BindingPointInfo &info : infos->outputInfos)
outputTensors.emplace_back(info.first, Tensor(info.second, outputData[i++]));
m->lock();
runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
m->unlock();
}
void unload(NetworkId netId)
{
mutex.lock();
runtime->UnloadNetwork(netId);
mutex.unlock();
}
int tensors(NetworkId netId, bool isInput = false)
{
spinLock.lock();
const IOInfos *infos = &ioInfos[netId];
spinLock.unlock();
return (int)(isInput ? infos->inputInfos.size() : infos->outputInfos.size());
}
unsigned long shape(NetworkId netId, bool isInput = false, int index = 0)
{
spinLock.lock();
const IOInfos *infos = &ioInfos[netId];
spinLock.unlock();
const TensorShape shape = (isInput ? infos->inputInfos : infos->outputInfos)[index].second.GetShape();
unsigned long s = 0;
for (unsigned int d = 0; d < shape.GetNumDimensions(); d++)
s |= ((unsigned long)shape[d]) << (d * 16); // stores up to 4 16-bit values in a 64-bit value
return s;
}
Ann(int tuningLevel, const char *tuningFile)
{
IRuntime::CreationOptions runtimeOptions;
runtimeOptions.m_ProfilingOptions.m_EnableProfiling = false;
runtimeOptions.m_ProfilingOptions.m_TimelineEnabled = false;
BackendOptions backendOptions{"GpuAcc",
{
{"TuningLevel", tuningLevel},
{"MemoryOptimizerStrategy", "ConstantMemoryStrategy"}, // SingleAxisPriorityList or ConstantMemoryStrategy
}};
if (tuningFile)
backendOptions.AddOption({"TuningFile", tuningFile});
runtimeOptions.m_BackendOptions.emplace_back(backendOptions);
runtime = IRuntime::CreateRaw(runtimeOptions);
};
~Ann()
{
IRuntime::Destroy(runtime);
};
private:
int loadInternal(const char *modelPath,
bool fastMath,
bool fp16,
bool saveCachedNetwork,
const char *cachedNetworkPath)
{
NetworkId netId = -1;
INetworkPtr network = loadModel(modelPath);
IOptimizedNetworkPtr optNet = OptimizeNetwork(network.get(), fastMath, fp16, saveCachedNetwork, cachedNetworkPath);
const IOInfos infos = getIOInfos(optNet.get());
mutex.lock();
Status status = runtime->LoadNetwork(netId, std::move(optNet));
mutex.unlock();
if (status != Status::Success)
{
return -1;
}
spinLock.lock();
ioInfos[netId] = infos;
mutexes.emplace(netId, std::make_unique<std::mutex>());
spinLock.unlock();
return netId;
}
INetworkPtr loadModel(const char *modelPath)
{
const auto path = std::string(modelPath);
if (path.rfind(".tflite") == path.length() - 7) // endsWith()
{
auto parser = armnnTfLiteParser::ITfLiteParser::CreateRaw();
return parser->CreateNetworkFromBinaryFile(modelPath);
}
else if (path.rfind(".onnx") == path.length() - 5) // endsWith()
{
auto parser = armnnOnnxParser::IOnnxParser::CreateRaw();
return parser->CreateNetworkFromBinaryFile(modelPath);
}
else
{
std::ifstream ifs(path, std::ifstream::in | std::ifstream::binary);
auto parser = armnnDeserializer::IDeserializer::CreateRaw();
return parser->CreateNetworkFromBinary(ifs);
}
}
static BindingPointInfo getInputTensorInfo(LayerBindingId inputBindingId, TensorInfo info)
{
const auto newInfo = TensorInfo{info.GetShape(), info.GetDataType(),
info.GetQuantizationScale(),
info.GetQuantizationOffset(),
true};
return {inputBindingId, newInfo};
}
IOptimizedNetworkPtr OptimizeNetwork(INetwork *network, bool fastMath, bool fp16, bool saveCachedNetwork, const char *cachedNetworkPath)
{
const bool allowExpandedDims = false;
const ShapeInferenceMethod shapeInferenceMethod = ShapeInferenceMethod::ValidateOnly;
OptimizerOptionsOpaque options;
options.SetReduceFp32ToFp16(fp16);
options.SetShapeInferenceMethod(shapeInferenceMethod);
options.SetAllowExpandedDims(allowExpandedDims);
options.SetDebugToFileEnabled(false);
options.SetProfilingEnabled(false);
BackendOptions gpuAcc("GpuAcc", {{"FastMathEnabled", fastMath}});
if (cachedNetworkPath)
{
gpuAcc.AddOption({"SaveCachedNetwork", saveCachedNetwork});
gpuAcc.AddOption({"CachedNetworkFilePath", cachedNetworkPath});
}
options.AddModelOption(gpuAcc);
// No point in using ARMNN for CPU, use ONNX (quantized) instead.
// BackendOptions cpuAcc("CpuAcc",
// {
// {"FastMathEnabled", fastMath},
// {"NumberOfThreads", 0},
// });
// options.AddModelOption(cpuAcc);
BackendOptions allowExDimOpt("AllowExpandedDims",
{{"AllowExpandedDims", allowExpandedDims}});
options.AddModelOption(allowExDimOpt);
BackendOptions shapeInferOpt("ShapeInferenceMethod",
{{"InferAndValidate", shapeInferenceMethod == ShapeInferenceMethod::InferAndValidate}});
options.AddModelOption(shapeInferOpt);
std::vector<BackendId> backends = {
BackendId("GpuAcc"),
// BackendId("CpuAcc"),
// BackendId("CpuRef"),
};
return Optimize(*network, backends, runtime->GetDeviceSpec(), options);
}
IOInfos getIOInfos(IOptimizedNetwork *optNet)
{
struct InfoStrategy : IStrategy
{
void ExecuteStrategy(const IConnectableLayer *layer,
const BaseDescriptor &descriptor,
const std::vector<ConstTensor> &constants,
const char *name,
const LayerBindingId id = 0) override
{
IgnoreUnused(descriptor, constants, id);
const LayerType lt = layer->GetType();
if (lt == LayerType::Input)
ioInfos.inputInfos.push_back(getInputTensorInfo(id, layer->GetOutputSlot(0).GetTensorInfo()));
else if (lt == LayerType::Output)
ioInfos.outputInfos.push_back({id, layer->GetInputSlot(0).GetTensorInfo()});
}
IOInfos ioInfos;
};
InfoStrategy infoStrategy;
optNet->ExecuteStrategy(infoStrategy);
return infoStrategy.ioInfos;
}
IRuntime *runtime;
std::map<NetworkId, IOInfos> ioInfos;
std::map<NetworkId, std::unique_ptr<std::mutex>> mutexes; // mutex per network to not execute the same the same network concurrently
std::mutex mutex; // global mutex for load/unload calls to the runtime
SpinLock spinLock; // fast spin lock to guard access to the ioInfos and mutexes maps
};
extern "C" void *init(int logLevel, int tuningLevel, const char *tuningFile)
{
LogSeverity level = static_cast<LogSeverity>(logLevel);
ConfigureLogging(true, true, level);
Ann *ann = new Ann(tuningLevel, tuningFile);
return ann;
}
extern "C" void destroy(void *ann)
{
delete ((Ann *)ann);
}
extern "C" int load(void *ann,
const char *path,
bool fastMath,
bool fp16,
bool saveCachedNetwork,
const char *cachedNetworkPath)
{
return ((Ann *)ann)->load(path, fastMath, fp16, saveCachedNetwork, cachedNetworkPath);
}
extern "C" void unload(void *ann, NetworkId netId)
{
((Ann *)ann)->unload(netId);
}
extern "C" void execute(void *ann, NetworkId netId, const void **inputData, void **outputData)
{
((Ann *)ann)->execute(netId, inputData, outputData);
}
extern "C" unsigned long shape(void *ann, NetworkId netId, bool isInput, int index)
{
return ((Ann *)ann)->shape(netId, isInput, index);
}
extern "C" int tensors(void *ann, NetworkId netId, bool isInput)
{
return ((Ann *)ann)->tensors(netId, isInput);
}

View file

@ -0,0 +1,3 @@
#!/usr/bin/env sh
g++ -shared -O3 -o libann.so -fuse-ld=gold -std=c++17 -I"$ARMNN_PATH"/include -larmnn -larmnnDeserializer -larmnnTfLiteParser -larmnnOnnxParser -L"$ARMNN_PATH" ann.cpp

View file

@ -0,0 +1,2 @@
armnn*
output/

View file

@ -0,0 +1,4 @@
#!/usr/bin/env sh
cd armnn-23.11/ || exit
g++ -o ../armnnconverter -O1 -DARMNN_ONNX_PARSER -DARMNN_SERIALIZER -DARMNN_TF_LITE_PARSER -fuse-ld=gold -std=c++17 -Iinclude -Isrc/armnnUtils -Ithird-party -larmnn -larmnnDeserializer -larmnnTfLiteParser -larmnnOnnxParser -larmnnSerializer -L../armnn src/armnnConverter/ArmnnConverter.cpp

View file

@ -0,0 +1,8 @@
#!/bin/sh
# binaries
mkdir armnn
curl -SL "https://github.com/ARM-software/armnn/releases/download/v23.11/ArmNN-linux-x86_64.tar.gz" | tar -zx -C armnn
# source to build ArmnnConverter
curl -SL "https://github.com/ARM-software/armnn/archive/refs/tags/v23.11.tar.gz" | tar -zx

View file

@ -0,0 +1,201 @@
name: annexport
channels:
- pytorch
- nvidia
- conda-forge
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=2_kmp_llvm
- aiohttp=3.9.1=py310h2372a71_0
- aiosignal=1.3.1=pyhd8ed1ab_0
- arpack=3.8.0=nompi_h0baa96a_101
- async-timeout=4.0.3=pyhd8ed1ab_0
- attrs=23.1.0=pyh71513ae_1
- aws-c-auth=0.7.3=h28f7589_1
- aws-c-cal=0.6.1=hc309b26_1
- aws-c-common=0.9.0=hd590300_0
- aws-c-compression=0.2.17=h4d4d85c_2
- aws-c-event-stream=0.3.1=h2e3709c_4
- aws-c-http=0.7.11=h00aa349_4
- aws-c-io=0.13.32=he9a53bd_1
- aws-c-mqtt=0.9.3=hb447be9_1
- aws-c-s3=0.3.14=hf3aad02_1
- aws-c-sdkutils=0.1.12=h4d4d85c_1
- aws-checksums=0.1.17=h4d4d85c_1
- aws-crt-cpp=0.21.0=hb942446_5
- aws-sdk-cpp=1.10.57=h85b1a90_19
- blas=2.120=openblas
- blas-devel=3.9.0=20_linux64_openblas
- brotli-python=1.0.9=py310hd8f1fbe_9
- bzip2=1.0.8=hd590300_5
- c-ares=1.23.0=hd590300_0
- ca-certificates=2023.11.17=hbcca054_0
- certifi=2023.11.17=pyhd8ed1ab_0
- charset-normalizer=3.3.2=pyhd8ed1ab_0
- click=8.1.7=unix_pyh707e725_0
- colorama=0.4.6=pyhd8ed1ab_0
- coloredlogs=15.0.1=pyhd8ed1ab_3
- cuda-cudart=11.7.99=0
- cuda-cupti=11.7.101=0
- cuda-libraries=11.7.1=0
- cuda-nvrtc=11.7.99=0
- cuda-nvtx=11.7.91=0
- cuda-runtime=11.7.1=0
- dataclasses=0.8=pyhc8e2a94_3
- datasets=2.14.7=pyhd8ed1ab_0
- dill=0.3.7=pyhd8ed1ab_0
- filelock=3.13.1=pyhd8ed1ab_0
- flatbuffers=23.5.26=h59595ed_1
- freetype=2.12.1=h267a509_2
- frozenlist=1.4.0=py310h2372a71_1
- fsspec=2023.10.0=pyhca7485f_0
- ftfy=6.1.3=pyhd8ed1ab_0
- gflags=2.2.2=he1b5a44_1004
- glog=0.6.0=h6f12383_0
- glpk=5.0=h445213a_0
- gmp=6.3.0=h59595ed_0
- gmpy2=2.1.2=py310h3ec546c_1
- huggingface_hub=0.17.3=pyhd8ed1ab_0
- humanfriendly=10.0=pyhd8ed1ab_6
- icu=73.2=h59595ed_0
- idna=3.6=pyhd8ed1ab_0
- importlib-metadata=7.0.0=pyha770c72_0
- importlib_metadata=7.0.0=hd8ed1ab_0
- joblib=1.3.2=pyhd8ed1ab_0
- keyutils=1.6.1=h166bdaf_0
- krb5=1.21.2=h659d440_0
- lcms2=2.15=h7f713cb_2
- ld_impl_linux-64=2.40=h41732ed_0
- lerc=4.0.0=h27087fc_0
- libabseil=20230125.3=cxx17_h59595ed_0
- libarrow=12.0.1=hb87d912_8_cpu
- libblas=3.9.0=20_linux64_openblas
- libbrotlicommon=1.0.9=h166bdaf_9
- libbrotlidec=1.0.9=h166bdaf_9
- libbrotlienc=1.0.9=h166bdaf_9
- libcblas=3.9.0=20_linux64_openblas
- libcrc32c=1.1.2=h9c3ff4c_0
- libcublas=11.10.3.66=0
- libcufft=10.7.2.124=h4fbf590_0
- libcufile=1.8.1.2=0
- libcurand=10.3.4.101=0
- libcurl=8.5.0=hca28451_0
- libcusolver=11.4.0.1=0
- libcusparse=11.7.4.91=0
- libdeflate=1.19=hd590300_0
- libedit=3.1.20191231=he28a2e2_2
- libev=4.33=hd590300_2
- libevent=2.1.12=hf998b51_1
- libffi=3.4.2=h7f98852_5
- libgcc-ng=13.2.0=h807b86a_3
- libgfortran-ng=13.2.0=h69a702a_3
- libgfortran5=13.2.0=ha4646dd_3
- libgoogle-cloud=2.12.0=hac9eb74_1
- libgrpc=1.54.3=hb20ce57_0
- libhwloc=2.9.3=default_h554bfaf_1009
- libiconv=1.17=hd590300_1
- libjpeg-turbo=2.1.5.1=hd590300_1
- liblapack=3.9.0=20_linux64_openblas
- liblapacke=3.9.0=20_linux64_openblas
- libnghttp2=1.58.0=h47da74e_1
- libnpp=11.7.4.75=0
- libnsl=2.0.1=hd590300_0
- libnuma=2.0.16=h0b41bf4_1
- libnvjpeg=11.8.0.2=0
- libopenblas=0.3.25=pthreads_h413a1c8_0
- libpng=1.6.39=h753d276_0
- libprotobuf=3.21.12=hfc55251_2
- libsentencepiece=0.1.99=h180e1df_0
- libsqlite=3.44.2=h2797004_0
- libssh2=1.11.0=h0841786_0
- libstdcxx-ng=13.2.0=h7e041cc_3
- libthrift=0.18.1=h8fd135c_2
- libtiff=4.6.0=h29866fb_1
- libutf8proc=2.8.0=h166bdaf_0
- libuuid=2.38.1=h0b41bf4_0
- libwebp-base=1.3.2=hd590300_0
- libxcb=1.15=h0b41bf4_0
- libxml2=2.11.6=h232c23b_0
- libzlib=1.2.13=hd590300_5
- llvm-openmp=17.0.6=h4dfa4b3_0
- lz4-c=1.9.4=hcb278e6_0
- mkl=2022.2.1=h84fe81f_16997
- mkl-devel=2022.2.1=ha770c72_16998
- mkl-include=2022.2.1=h84fe81f_16997
- mpc=1.3.1=hfe3b2da_0
- mpfr=4.2.1=h9458935_0
- mpmath=1.3.0=pyhd8ed1ab_0
- multidict=6.0.4=py310h2372a71_1
- multiprocess=0.70.15=py310h2372a71_1
- ncurses=6.4=h59595ed_2
- numpy=1.26.2=py310hb13e2d6_0
- onnx=1.14.0=py310ha3deec4_1
- onnx2torch=1.5.13=pyhd8ed1ab_0
- onnxruntime=1.16.3=py310hd4b7fbc_1_cpu
- open-clip-torch=2.23.0=pyhd8ed1ab_1
- openblas=0.3.25=pthreads_h7a3da1a_0
- openjpeg=2.5.0=h488ebb8_3
- openssl=3.2.0=hd590300_1
- orc=1.9.0=h2f23424_1
- packaging=23.2=pyhd8ed1ab_0
- pandas=2.1.4=py310hcc13569_0
- pillow=10.0.1=py310h29da1c1_1
- pip=23.3.1=pyhd8ed1ab_0
- protobuf=4.21.12=py310heca2aa9_0
- pthread-stubs=0.4=h36c2ea0_1001
- pyarrow=12.0.1=py310h0576679_8_cpu
- pyarrow-hotfix=0.6=pyhd8ed1ab_0
- pysocks=1.7.1=pyha2e5f31_6
- python=3.10.13=hd12c33a_0_cpython
- python-dateutil=2.8.2=pyhd8ed1ab_0
- python-flatbuffers=23.5.26=pyhd8ed1ab_0
- python-tzdata=2023.3=pyhd8ed1ab_0
- python-xxhash=3.4.1=py310h2372a71_0
- python_abi=3.10=4_cp310
- pytorch=1.13.1=cpu_py310hd11e9c7_1
- pytorch-cuda=11.7=h778d358_5
- pytorch-mutex=1.0=cuda
- pytz=2023.3.post1=pyhd8ed1ab_0
- pyyaml=6.0.1=py310h2372a71_1
- rdma-core=28.9=h59595ed_1
- re2=2023.03.02=h8c504da_0
- readline=8.2=h8228510_1
- regex=2023.10.3=py310h2372a71_0
- requests=2.31.0=pyhd8ed1ab_0
- s2n=1.3.49=h06160fa_0
- sacremoses=0.0.53=pyhd8ed1ab_0
- safetensors=0.3.3=py310hcb5633a_1
- sentencepiece=0.1.99=hff52083_0
- sentencepiece-python=0.1.99=py310hebdb9f0_0
- sentencepiece-spm=0.1.99=h180e1df_0
- setuptools=68.2.2=pyhd8ed1ab_0
- six=1.16.0=pyh6c4a22f_0
- sleef=3.5.1=h9b69904_2
- snappy=1.1.10=h9fff704_0
- sympy=1.12=pypyh9d50eac_103
- tbb=2021.11.0=h00ab1b0_0
- texttable=1.7.0=pyhd8ed1ab_0
- timm=0.9.12=pyhd8ed1ab_0
- tk=8.6.13=noxft_h4845f30_101
- tokenizers=0.14.1=py310h320607d_2
- torchvision=0.14.1=cpu_py310hd3d2ac3_1
- tqdm=4.66.1=pyhd8ed1ab_0
- transformers=4.35.2=pyhd8ed1ab_0
- typing-extensions=4.9.0=hd8ed1ab_0
- typing_extensions=4.9.0=pyha770c72_0
- tzdata=2023c=h71feb2d_0
- ucx=1.14.1=h64cca9d_5
- urllib3=2.1.0=pyhd8ed1ab_0
- wcwidth=0.2.12=pyhd8ed1ab_0
- wheel=0.42.0=pyhd8ed1ab_0
- xorg-libxau=1.0.11=hd590300_0
- xorg-libxdmcp=1.1.3=h7f98852_0
- xxhash=0.8.2=hd590300_0
- xz=5.2.6=h166bdaf_0
- yaml=0.2.5=h7f98852_2
- yarl=1.9.3=py310h2372a71_0
- zipp=3.17.0=pyhd8ed1ab_0
- zlib=1.2.13=hd590300_5
- zstd=1.5.5=hfc55251_0
- pip:
- git+https://github.com/fyfrey/TinyNeuralNetwork.git

View file

@ -0,0 +1,157 @@
import logging
import os
import platform
import subprocess
from abc import abstractmethod
import onnx
import open_clip
import torch
from onnx2torch import convert
from onnxruntime.tools.onnx_model_utils import fix_output_shapes, make_input_shape_fixed
from tinynn.converter import TFLiteConverter
class ExportBase(torch.nn.Module):
input_shape: tuple[int, ...]
def __init__(self, device: torch.device, name: str):
super().__init__()
self.device = device
self.name = name
self.optimize = 5
self.nchw_transpose = False
@abstractmethod
def forward(self, input_tensor: torch.Tensor) -> torch.Tensor | tuple[torch.Tensor]:
pass
def dummy_input(self) -> torch.FloatTensor:
return torch.rand((1, 3, 224, 224), device=self.device)
class ArcFace(ExportBase):
input_shape = (1, 3, 112, 112)
def __init__(self, onnx_model_path: str, device: torch.device):
name, _ = os.path.splitext(os.path.basename(onnx_model_path))
super().__init__(device, name)
onnx_model = onnx.load_model(onnx_model_path)
make_input_shape_fixed(onnx_model.graph, onnx_model.graph.input[0].name, self.input_shape)
fix_output_shapes(onnx_model)
self.model = convert(onnx_model).to(device)
if self.device.type == "cuda":
self.model = self.model.half()
def forward(self, input_tensor: torch.Tensor) -> torch.FloatTensor:
embedding: torch.FloatTensor = self.model(
input_tensor.half() if self.device.type == "cuda" else input_tensor
).float()
assert isinstance(embedding, torch.FloatTensor)
return embedding
def dummy_input(self) -> torch.FloatTensor:
return torch.rand(self.input_shape, device=self.device)
class RetinaFace(ExportBase):
input_shape = (1, 3, 640, 640)
def __init__(self, onnx_model_path: str, device: torch.device):
name, _ = os.path.splitext(os.path.basename(onnx_model_path))
super().__init__(device, name)
self.optimize = 3
self.model = convert(onnx_model_path).eval().to(device)
if self.device.type == "cuda":
self.model = self.model.half()
def forward(self, input_tensor: torch.Tensor) -> tuple[torch.FloatTensor]:
out: torch.Tensor = self.model(input_tensor.half() if self.device.type == "cuda" else input_tensor)
return tuple(o.float() for o in out)
def dummy_input(self) -> torch.FloatTensor:
return torch.rand(self.input_shape, device=self.device)
class ClipVision(ExportBase):
input_shape = (1, 3, 224, 224)
def __init__(self, model_name: str, weights: str, device: torch.device):
super().__init__(device, model_name + "__" + weights)
self.model = open_clip.create_model(
model_name,
weights,
precision="fp16" if device.type == "cuda" else "fp32",
jit=False,
require_pretrained=True,
device=device,
)
def forward(self, input_tensor: torch.Tensor) -> torch.FloatTensor:
embedding: torch.Tensor = self.model.encode_image(
input_tensor.half() if self.device.type == "cuda" else input_tensor,
normalize=True,
).float()
return embedding
def export(model: ExportBase) -> None:
model.eval()
for param in model.parameters():
param.requires_grad = False
dummy_input = model.dummy_input()
model(dummy_input)
jit = torch.jit.trace(model, dummy_input) # type: ignore[no-untyped-call,attr-defined]
tflite_model_path = f"output/{model.name}.tflite"
os.makedirs("output", exist_ok=True)
converter = TFLiteConverter(
jit,
dummy_input,
tflite_model_path,
optimize=model.optimize,
nchw_transpose=model.nchw_transpose,
)
# segfaults on ARM, must run on x86_64 / AMD64
converter.convert()
armnn_model_path = f"output/{model.name}.armnn"
os.environ["LD_LIBRARY_PATH"] = "armnn"
subprocess.run(
[
"./armnnconverter",
"-f",
"tflite-binary",
"-m",
tflite_model_path,
"-i",
"input_tensor",
"-o",
"output_tensor",
"-p",
armnn_model_path,
]
)
def main() -> None:
if platform.machine() not in ("x86_64", "AMD64"):
raise RuntimeError(f"Can only run on x86_64 / AMD64, not {platform.machine()}")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if device.type != "cuda":
logging.warning(
"No CUDA available, cannot create fp16 model! proceeding to create a fp32 model (use only for testing)"
)
models = [
ClipVision("ViT-B-32", "openai", device),
ArcFace("buffalo_l_rec.onnx", device),
RetinaFace("buffalo_l_det.onnx", device),
]
for model in models:
export(model)
if __name__ == "__main__":
with torch.no_grad():
main()

View file

@ -0,0 +1,185 @@
import json
from typing import Any, Iterator
from unittest import mock
import numpy as np
import pytest
from fastapi.testclient import TestClient
from numpy.typing import NDArray
from PIL import Image
from immich_ml.config import log
from immich_ml.main import app
@pytest.fixture
def pil_image() -> Image.Image:
return Image.new("RGB", (600, 800))
@pytest.fixture
def cv_image(pil_image: Image.Image) -> NDArray[np.float32]:
return np.asarray(pil_image)[:, :, ::-1] # PIL uses RGB while cv2 uses BGR
@pytest.fixture
def mock_get_model() -> Iterator[mock.Mock]:
with mock.patch("immich_ml.models.cache.from_model_type", autospec=True) as mocked:
yield mocked
@pytest.fixture(scope="session")
def deployed_app() -> Iterator[TestClient]:
with TestClient(app) as client:
yield client
@pytest.fixture(scope="session")
def responses() -> dict[str, Any]:
responses: dict[str, Any] = json.load(open("responses.json", "r"))
return responses
@pytest.fixture(scope="session")
def clip_model_cfg() -> dict[str, Any]:
return {
"embed_dim": 512,
"vision_cfg": {"image_size": 224, "layers": 12, "width": 768, "patch_size": 32},
"text_cfg": {"context_length": 77, "vocab_size": 49408, "width": 512, "heads": 8, "layers": 12},
}
@pytest.fixture(scope="session")
def clip_preprocess_cfg() -> dict[str, Any]:
return {
"size": [224, 224],
"mode": "RGB",
"mean": [0.48145466, 0.4578275, 0.40821073],
"std": [0.26862954, 0.26130258, 0.27577711],
"interpolation": "bicubic",
"resize_mode": "shortest",
"fill_color": 0,
}
@pytest.fixture(scope="session")
def clip_tokenizer_cfg() -> dict[str, Any]:
return {
"add_prefix_space": False,
"added_tokens_decoder": {
"49406": {
"content": "<|startoftext|>",
"lstrip": False,
"normalized": True,
"rstrip": False,
"single_word": False,
"special": True,
},
"49407": {
"content": "<|endoftext|>",
"lstrip": False,
"normalized": True,
"rstrip": False,
"single_word": False,
"special": True,
},
},
"bos_token": "<|startoftext|>",
"clean_up_tokenization_spaces": True,
"do_lower_case": True,
"eos_token": "<|endoftext|>",
"errors": "replace",
"model_max_length": 77,
"pad_token": "<|endoftext|>",
"tokenizer_class": "CLIPTokenizer",
"unk_token": "<|endoftext|>",
}
@pytest.fixture(scope="function")
def providers(request: pytest.FixtureRequest) -> Iterator[mock.Mock]:
marker = request.node.get_closest_marker("providers")
if marker is None:
raise ValueError("Missing marker 'providers'")
providers = marker.args[0]
with mock.patch("immich_ml.sessions.ort.ort.get_available_providers") as mocked:
mocked.return_value = providers
yield providers
@pytest.fixture(scope="function")
def ort_pybind() -> Iterator[mock.Mock]:
with mock.patch("immich_ml.sessions.ort.ort.capi._pybind_state") as mocked:
yield mocked
@pytest.fixture(scope="function")
def ov_device_ids(request: pytest.FixtureRequest, ort_pybind: mock.Mock) -> Iterator[mock.Mock]:
marker = request.node.get_closest_marker("ov_device_ids")
if marker is None:
raise ValueError("Missing marker 'ov_device_ids'")
ort_pybind.get_available_openvino_device_ids.return_value = marker.args[0]
return ort_pybind
@pytest.fixture(scope="function")
def ort_session() -> Iterator[mock.Mock]:
with mock.patch("immich_ml.sessions.ort.ort.InferenceSession") as mocked:
yield mocked
@pytest.fixture(scope="function")
def ann_session() -> Iterator[mock.Mock]:
with mock.patch("immich_ml.sessions.ann.Ann") as mocked:
yield mocked
@pytest.fixture(scope="function")
def rknn_session() -> Iterator[mock.Mock]:
with mock.patch("immich_ml.sessions.rknn.RknnPoolExecutor") as mocked:
yield mocked
@pytest.fixture(scope="function")
def rmtree() -> Iterator[mock.Mock]:
with mock.patch("immich_ml.models.base.rmtree", autospec=True) as mocked:
mocked.avoids_symlink_attacks = True
yield mocked
@pytest.fixture(scope="function")
def path() -> Iterator[mock.Mock]:
path = mock.MagicMock()
path.exists.return_value = True
path.is_dir.return_value = True
path.is_file.return_value = True
path.with_suffix.return_value = path
path.return_value = path
with mock.patch("immich_ml.models.base.Path", return_value=path) as mocked:
yield mocked
@pytest.fixture(scope="function")
def info() -> Iterator[mock.Mock]:
with mock.patch.object(log, "info") as mocked:
yield mocked
@pytest.fixture(scope="function")
def warning() -> Iterator[mock.Mock]:
with mock.patch.object(log, "warning") as mocked:
yield mocked
@pytest.fixture(scope="function")
def exception() -> Iterator[mock.Mock]:
with mock.patch.object(log, "exception") as mocked:
yield mocked
@pytest.fixture(scope="function")
def snapshot_download() -> Iterator[mock.Mock]:
with mock.patch("immich_ml.models.base.snapshot_download") as mocked:
yield mocked

View file

View file

@ -0,0 +1,57 @@
import os
import signal
import subprocess
from ipaddress import ip_address
from pathlib import Path
from .config import log, non_prefixed_settings, settings
if source_ref := os.getenv("IMMICH_SOURCE_REF"):
log.info(f"Initializing Immich ML [{source_ref}]")
else:
log.info("Initializing Immich ML")
module_dir = Path(__file__).parent
def is_ipv6(host: str) -> bool:
try:
return ip_address(host).version == 6
except ValueError:
return False
bind_host = non_prefixed_settings.immich_host
if is_ipv6(bind_host):
bind_host = f"[{bind_host}]"
bind_address = f"{bind_host}:{non_prefixed_settings.immich_port}"
try:
with subprocess.Popen(
[
"python",
"-m",
"gunicorn",
"immich_ml.main:app",
"-k",
"immich_ml.config.CustomUvicornWorker",
"-c",
module_dir / "gunicorn_conf.py",
"-b",
bind_address,
"-w",
str(settings.workers),
"-t",
str(settings.worker_timeout),
"--log-config-json",
module_dir / "log_conf.json",
"--keep-alive",
str(settings.http_keepalive_timeout_s),
"--graceful-timeout",
"10",
],
) as cmd:
cmd.wait()
except KeyboardInterrupt:
cmd.send_signal(signal.SIGINT)
exit(cmd.returncode)

View file

@ -0,0 +1,165 @@
import concurrent.futures
import logging
import os
import sys
from pathlib import Path
from socket import socket
from gunicorn.arbiter import Arbiter
from pydantic import BaseModel
from pydantic_settings import BaseSettings, SettingsConfigDict
from rich.console import Console
from rich.logging import RichHandler
from uvicorn import Server
from uvicorn.workers import UvicornWorker
from .schemas import ModelPrecision
class ClipSettings(BaseModel):
textual: str | None = None
visual: str | None = None
class FacialRecognitionSettings(BaseModel):
recognition: str | None = None
detection: str | None = None
class OcrSettings(BaseModel):
recognition: str | None = None
detection: str | None = None
class PreloadModelData(BaseModel):
clip_fallback: str | None = os.getenv("MACHINE_LEARNING_PRELOAD__CLIP", None)
facial_recognition_fallback: str | None = os.getenv("MACHINE_LEARNING_PRELOAD__FACIAL_RECOGNITION", None)
if clip_fallback is not None:
os.environ["MACHINE_LEARNING_PRELOAD__CLIP__TEXTUAL"] = clip_fallback
os.environ["MACHINE_LEARNING_PRELOAD__CLIP__VISUAL"] = clip_fallback
del os.environ["MACHINE_LEARNING_PRELOAD__CLIP"]
if facial_recognition_fallback is not None:
os.environ["MACHINE_LEARNING_PRELOAD__FACIAL_RECOGNITION__RECOGNITION"] = facial_recognition_fallback
os.environ["MACHINE_LEARNING_PRELOAD__FACIAL_RECOGNITION__DETECTION"] = facial_recognition_fallback
del os.environ["MACHINE_LEARNING_PRELOAD__FACIAL_RECOGNITION"]
clip: ClipSettings = ClipSettings()
facial_recognition: FacialRecognitionSettings = FacialRecognitionSettings()
ocr: OcrSettings = OcrSettings()
class MaxBatchSize(BaseModel):
facial_recognition: int | None = None
text_recognition: int | None = None
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_prefix="MACHINE_LEARNING_",
case_sensitive=False,
env_nested_delimiter="__",
protected_namespaces=("settings_",),
)
cache_folder: Path = (Path.home() / ".cache" / "immich_ml").resolve()
model_ttl: int = 300
model_ttl_poll_s: int = 10
workers: int = 1
worker_timeout: int = 300
http_keepalive_timeout_s: int = 2
test_full: bool = False
request_threads: int = os.cpu_count() or 4
model_inter_op_threads: int = 0
model_intra_op_threads: int = 0
model_arena: bool = True
ann: bool = True
ann_fp16_turbo: bool = False
ann_tuning_level: int = 2
rknn: bool = True
rknn_threads: int = 1
preload: PreloadModelData | None = None
max_batch_size: MaxBatchSize | None = None
openvino_precision: ModelPrecision = ModelPrecision.FP32
@property
def device_id(self) -> str:
return os.environ.get("MACHINE_LEARNING_DEVICE_ID", "0")
class NonPrefixedSettings(BaseSettings):
model_config = SettingsConfigDict(case_sensitive=False)
immich_host: str = "[::]"
immich_port: int = 3003
immich_log_level: str = "info"
no_color: bool = False
_clean_name = str.maketrans(":\\/", "___", ".")
def clean_name(model_name: str) -> str:
return model_name.split("/")[-1].translate(_clean_name)
LOG_LEVELS: dict[str, int] = {
"critical": logging.ERROR,
"error": logging.ERROR,
"warning": logging.WARNING,
"warn": logging.WARNING,
"info": logging.INFO,
"log": logging.INFO,
"debug": logging.DEBUG,
"verbose": logging.DEBUG,
}
settings = Settings()
non_prefixed_settings = NonPrefixedSettings()
LOG_LEVEL = LOG_LEVELS.get(non_prefixed_settings.immich_log_level.lower(), logging.INFO)
class CustomRichHandler(RichHandler):
def __init__(self) -> None:
console = Console(color_system="standard", no_color=non_prefixed_settings.no_color)
self.excluded = ["uvicorn", "starlette", "fastapi"]
super().__init__(
show_path=False,
omit_repeated_times=False,
console=console,
rich_tracebacks=True,
tracebacks_suppress=[*self.excluded, concurrent.futures],
tracebacks_show_locals=LOG_LEVEL == logging.DEBUG,
)
# hack to exclude certain modules from rich tracebacks
def emit(self, record: logging.LogRecord) -> None:
if record.exc_info is not None:
tb = record.exc_info[2]
while tb is not None:
if any(excluded in tb.tb_frame.f_code.co_filename for excluded in self.excluded):
tb.tb_frame.f_locals["_rich_traceback_omit"] = True
tb = tb.tb_next
return super().emit(record)
log = logging.getLogger("ml.log")
log.setLevel(LOG_LEVEL)
# patches this issue https://github.com/encode/uvicorn/discussions/1803
class CustomUvicornServer(Server):
async def shutdown(self, sockets: list[socket] | None = None) -> None:
for sock in sockets or []:
sock.close()
await super().shutdown()
class CustomUvicornWorker(UvicornWorker):
async def _serve(self) -> None:
self.config.app = self.wsgi
server = CustomUvicornServer(config=self.config)
self._install_sigquit_handler()
await server.serve(sockets=self.sockets)
if not server.started:
sys.exit(Arbiter.WORKER_BOOT_ERROR)

View file

@ -0,0 +1,12 @@
import os
from gunicorn.arbiter import Arbiter
from gunicorn.workers.base import Worker
device_ids = os.environ.get("MACHINE_LEARNING_DEVICE_IDS", "0").replace(" ", "").split(",")
env = os.environ
# Round-robin device assignment for each worker
def pre_fork(arbiter: Arbiter, _: Worker) -> None:
env["MACHINE_LEARNING_DEVICE_ID"] = device_ids[len(arbiter.WORKERS) % len(device_ids)]

View file

@ -0,0 +1,21 @@
{
"version": 1,
"disable_existing_loggers": false,
"handlers": {
"console": {
"class": "immich_ml.config.CustomRichHandler"
}
},
"loggers": {
"gunicorn.error": {
"handlers": [
"console"
]
}
},
"root": {
"handlers": [
"console"
]
}
}

View file

@ -0,0 +1,272 @@
import asyncio
import gc
import os
import signal
import threading
import time
from concurrent.futures import ThreadPoolExecutor
from contextlib import asynccontextmanager
from functools import partial
from typing import Any, AsyncGenerator, Callable, Iterator
from zipfile import BadZipFile
import orjson
from fastapi import Depends, FastAPI, File, Form, HTTPException
from fastapi.responses import ORJSONResponse, PlainTextResponse
from onnxruntime.capi.onnxruntime_pybind11_state import InvalidProtobuf, NoSuchFile
from PIL.Image import Image
from pydantic import ValidationError
from starlette.formparsers import MultiPartParser
from immich_ml.models import get_model_deps
from immich_ml.models.base import InferenceModel
from immich_ml.models.transforms import decode_pil
from .config import PreloadModelData, log, settings
from .models.cache import ModelCache
from .schemas import (
InferenceEntries,
InferenceEntry,
InferenceResponse,
ModelFormat,
ModelIdentity,
ModelTask,
ModelType,
PipelineRequest,
T,
)
MultiPartParser.spool_max_size = 2**26 # spools to disk if payload is 64 MiB or larger
model_cache = ModelCache(revalidate=settings.model_ttl > 0)
thread_pool: ThreadPoolExecutor | None = None
lock = threading.Lock()
active_requests = 0
last_called: float | None = None
@asynccontextmanager
async def lifespan(_: FastAPI) -> AsyncGenerator[None, None]:
global thread_pool
log.info(
(
"Created in-memory cache with unloading "
f"{f'after {settings.model_ttl}s of inactivity' if settings.model_ttl > 0 else 'disabled'}."
)
)
try:
if settings.request_threads > 0:
# asyncio is a huge bottleneck for performance, so we use a thread pool to run blocking code
thread_pool = ThreadPoolExecutor(settings.request_threads) if settings.request_threads > 0 else None
log.info(f"Initialized request thread pool with {settings.request_threads} threads.")
if settings.model_ttl > 0 and settings.model_ttl_poll_s > 0:
asyncio.ensure_future(idle_shutdown_task())
if settings.preload is not None:
await preload_models(settings.preload)
yield
finally:
log.handlers.clear()
for model in model_cache.cache._cache.values():
del model
if thread_pool is not None:
thread_pool.shutdown()
gc.collect()
async def preload_models(preload: PreloadModelData) -> None:
log.info(f"Preloading models: clip:{preload.clip} facial_recognition:{preload.facial_recognition}")
async def load_models(model_string: str, model_type: ModelType, model_task: ModelTask) -> None:
for model_name in model_string.split(","):
model_name = model_name.strip()
model = await model_cache.get(model_name, model_type, model_task)
await load(model)
if preload.clip.textual is not None:
await load_models(preload.clip.textual, ModelType.TEXTUAL, ModelTask.SEARCH)
if preload.clip.visual is not None:
await load_models(preload.clip.visual, ModelType.VISUAL, ModelTask.SEARCH)
if preload.facial_recognition.detection is not None:
await load_models(
preload.facial_recognition.detection,
ModelType.DETECTION,
ModelTask.FACIAL_RECOGNITION,
)
if preload.facial_recognition.recognition is not None:
await load_models(
preload.facial_recognition.recognition,
ModelType.RECOGNITION,
ModelTask.FACIAL_RECOGNITION,
)
if preload.ocr.detection is not None:
await load_models(
preload.ocr.detection,
ModelType.DETECTION,
ModelTask.OCR,
)
if preload.ocr.recognition is not None:
await load_models(
preload.ocr.recognition,
ModelType.RECOGNITION,
ModelTask.OCR,
)
if preload.clip_fallback is not None:
log.warning(
"Deprecated env variable: 'MACHINE_LEARNING_PRELOAD__CLIP'. "
"Use 'MACHINE_LEARNING_PRELOAD__CLIP__TEXTUAL' and "
"'MACHINE_LEARNING_PRELOAD__CLIP__VISUAL' instead."
)
if preload.facial_recognition_fallback is not None:
log.warning(
"Deprecated env variable: 'MACHINE_LEARNING_PRELOAD__FACIAL_RECOGNITION'. "
"Use 'MACHINE_LEARNING_PRELOAD__FACIAL_RECOGNITION__DETECTION' and "
"'MACHINE_LEARNING_PRELOAD__FACIAL_RECOGNITION__RECOGNITION' instead."
)
def update_state() -> Iterator[None]:
global active_requests, last_called
active_requests += 1
last_called = time.time()
try:
yield
finally:
active_requests -= 1
def get_entries(entries: str = Form()) -> InferenceEntries:
try:
request: PipelineRequest = orjson.loads(entries)
without_deps: list[InferenceEntry] = []
with_deps: list[InferenceEntry] = []
for task, types in request.items():
for type, entry in types.items():
parsed: InferenceEntry = {
"name": entry["modelName"],
"task": task,
"type": type,
"options": entry.get("options", {}),
}
dep = get_model_deps(parsed["name"], type, task)
(with_deps if dep else without_deps).append(parsed)
return without_deps, with_deps
except (orjson.JSONDecodeError, ValidationError, KeyError, AttributeError) as e:
log.error(f"Invalid request format: {e}")
raise HTTPException(422, "Invalid request format.")
app = FastAPI(lifespan=lifespan)
@app.get("/")
async def root() -> ORJSONResponse:
return ORJSONResponse({"message": "Immich ML"})
@app.get("/ping")
def ping() -> PlainTextResponse:
return PlainTextResponse("pong")
@app.post("/predict", dependencies=[Depends(update_state)])
async def predict(
entries: InferenceEntries = Depends(get_entries),
image: bytes | None = File(default=None),
text: str | None = Form(default=None),
) -> Any:
if image is not None:
inputs: Image | str = await run(lambda: decode_pil(image))
elif text is not None:
inputs = text
else:
raise HTTPException(400, "Either image or text must be provided")
response = await run_inference(inputs, entries)
return ORJSONResponse(response)
async def run_inference(payload: Image | str, entries: InferenceEntries) -> InferenceResponse:
outputs: dict[ModelIdentity, Any] = {}
response: InferenceResponse = {}
async def _run_inference(entry: InferenceEntry) -> None:
model = await model_cache.get(
entry["name"], entry["type"], entry["task"], ttl=settings.model_ttl, **entry["options"]
)
inputs = [payload]
for dep in model.depends:
try:
inputs.append(outputs[dep])
except KeyError:
message = f"Task {entry['task']} of type {entry['type']} depends on output of {dep}"
raise HTTPException(400, message)
model = await load(model)
output = await run(model.predict, *inputs, **entry["options"])
outputs[model.identity] = output
response[entry["task"]] = output
without_deps, with_deps = entries
await asyncio.gather(*[_run_inference(entry) for entry in without_deps])
if with_deps:
await asyncio.gather(*[_run_inference(entry) for entry in with_deps])
if isinstance(payload, Image):
response["imageHeight"], response["imageWidth"] = payload.height, payload.width
return response
async def run(func: Callable[..., T], *args: Any, **kwargs: Any) -> T:
if thread_pool is None:
return func(*args, **kwargs)
partial_func = partial(func, *args, **kwargs)
return await asyncio.get_running_loop().run_in_executor(thread_pool, partial_func)
async def load(model: InferenceModel) -> InferenceModel:
if model.loaded:
return model
def _load(model: InferenceModel) -> InferenceModel:
if model.load_attempts > 1:
raise HTTPException(500, f"Failed to load model '{model.model_name}'")
with lock:
try:
model.load()
except FileNotFoundError as e:
if model.model_format == ModelFormat.ONNX:
raise e
log.warning(
f"{model.model_format.upper()} is available, but model '{model.model_name}' does not support it.",
exc_info=e,
)
model.model_format = ModelFormat.ONNX
model.load()
return model
try:
return await run(_load, model)
except (OSError, InvalidProtobuf, BadZipFile, NoSuchFile):
log.warning(f"Failed to load {model.model_type.replace('_', ' ')} model '{model.model_name}'. Clearing cache.")
model.clear_cache()
return await run(_load, model)
async def idle_shutdown_task() -> None:
while True:
if (
last_called is not None
and not active_requests
and not lock.locked()
and time.time() - last_called > settings.model_ttl
):
log.info("Shutting down due to inactivity.")
os.kill(os.getpid(), signal.SIGINT)
break
await asyncio.sleep(settings.model_ttl_poll_s)

View file

@ -0,0 +1,48 @@
from typing import Any
from immich_ml.models.base import InferenceModel
from immich_ml.models.clip.textual import MClipTextualEncoder, OpenClipTextualEncoder
from immich_ml.models.clip.visual import OpenClipVisualEncoder
from immich_ml.models.ocr.detection import TextDetector
from immich_ml.models.ocr.recognition import TextRecognizer
from immich_ml.schemas import ModelSource, ModelTask, ModelType
from .constants import get_model_source
from .facial_recognition.detection import FaceDetector
from .facial_recognition.recognition import FaceRecognizer
def get_model_class(model_name: str, model_type: ModelType, model_task: ModelTask) -> type[InferenceModel]:
source = get_model_source(model_name)
match source, model_type, model_task:
case ModelSource.OPENCLIP | ModelSource.MCLIP, ModelType.VISUAL, ModelTask.SEARCH:
return OpenClipVisualEncoder
case ModelSource.OPENCLIP, ModelType.TEXTUAL, ModelTask.SEARCH:
return OpenClipTextualEncoder
case ModelSource.MCLIP, ModelType.TEXTUAL, ModelTask.SEARCH:
return MClipTextualEncoder
case ModelSource.INSIGHTFACE, ModelType.DETECTION, ModelTask.FACIAL_RECOGNITION:
return FaceDetector
case ModelSource.INSIGHTFACE, ModelType.RECOGNITION, ModelTask.FACIAL_RECOGNITION:
return FaceRecognizer
case ModelSource.PADDLE, ModelType.DETECTION, ModelTask.OCR:
return TextDetector
case ModelSource.PADDLE, ModelType.RECOGNITION, ModelTask.OCR:
return TextRecognizer
case _:
raise ValueError(f"Unknown model combination: {source}, {model_type}, {model_task}")
def from_model_type(model_name: str, model_type: ModelType, model_task: ModelTask, **kwargs: Any) -> InferenceModel:
return get_model_class(model_name, model_type, model_task)(model_name, **kwargs)
def get_model_deps(model_name: str, model_type: ModelType, model_task: ModelTask) -> list[tuple[ModelType, ModelTask]]:
return get_model_class(model_name, model_type, model_task).depends

View file

@ -0,0 +1,176 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from pathlib import Path
from shutil import rmtree
from typing import Any, ClassVar
from huggingface_hub import snapshot_download
import immich_ml.sessions.ann.loader
import immich_ml.sessions.rknn as rknn
from immich_ml.sessions.ort import OrtSession
from ..config import clean_name, log, settings
from ..schemas import ModelFormat, ModelIdentity, ModelSession, ModelTask, ModelType
from ..sessions.ann import AnnSession
class InferenceModel(ABC):
depends: ClassVar[list[ModelIdentity]]
identity: ClassVar[ModelIdentity]
def __init__(
self,
model_name: str,
cache_dir: Path | str | None = None,
model_format: ModelFormat | None = None,
session: ModelSession | None = None,
**model_kwargs: Any,
) -> None:
self.loaded = session is not None
self.load_attempts = 0
self.model_name = clean_name(model_name)
self.cache_dir = Path(cache_dir) if cache_dir is not None else self._cache_dir_default
self.model_format = model_format if model_format is not None else self._model_format_default
if session is not None:
self.session = session
def download(self) -> None:
if not self.cached:
model_type = self.model_type.replace("-", " ")
log.info(f"Downloading {model_type} model '{self.model_name}' to {self.model_path}. This may take a while.")
self._download()
def load(self) -> None:
if self.loaded:
return
self.load_attempts += 1
self.download()
attempt = f"Attempt #{self.load_attempts} to load" if self.load_attempts > 1 else "Loading"
log.info(f"{attempt} {self.model_type.replace('-', ' ')} model '{self.model_name}' to memory")
self.session = self._load()
self.loaded = True
def predict(self, *inputs: Any, **model_kwargs: Any) -> Any:
self.load()
if model_kwargs:
self.configure(**model_kwargs)
return self._predict(*inputs)
@abstractmethod
def _predict(self, *inputs: Any, **model_kwargs: Any) -> Any: ...
def configure(self, **kwargs: Any) -> None:
pass
def _download(self) -> None:
ignored_patterns: dict[ModelFormat, list[str]] = {
ModelFormat.ONNX: ["*.armnn", "*.rknn"],
ModelFormat.ARMNN: ["*.rknn"],
ModelFormat.RKNN: ["*.armnn"],
}
snapshot_download(
f"immich-app/{clean_name(self.model_name)}",
cache_dir=self.cache_dir,
local_dir=self.cache_dir,
ignore_patterns=ignored_patterns.get(self.model_format, []),
)
def _load(self) -> ModelSession:
return self._make_session(self.model_path)
def clear_cache(self) -> None:
if not self.cache_dir.exists():
log.warning(
f"Attempted to clear cache for model '{self.model_name}', but cache directory does not exist",
)
return
if not rmtree.avoids_symlink_attacks:
raise RuntimeError("Attempted to clear cache, but rmtree is not safe on this platform")
if self.cache_dir.is_dir():
log.info(f"Cleared cache directory for model '{self.model_name}'.")
rmtree(self.cache_dir)
else:
log.warning(
(
f"Encountered file instead of directory at cache path "
f"for '{self.model_name}'. Removing file and replacing with a directory."
),
)
self.cache_dir.unlink()
self.cache_dir.mkdir(parents=True, exist_ok=True)
def _make_session(self, model_path: Path) -> ModelSession:
if not model_path.is_file():
raise FileNotFoundError(f"Model file not found: {model_path}")
match model_path.suffix:
case ".armnn":
session: ModelSession = AnnSession(model_path)
case ".onnx":
session = OrtSession(model_path)
case ".rknn":
session = rknn.RknnSession(model_path)
case _:
raise ValueError(f"Unsupported model file type: {model_path.suffix}")
return session
def model_path_for_format(self, model_format: ModelFormat) -> Path:
model_path_prefix = rknn.model_prefix if model_format == ModelFormat.RKNN else None
if model_path_prefix:
return self.model_dir / model_path_prefix / f"model.{model_format}"
return self.model_dir / f"model.{model_format}"
@property
def model_dir(self) -> Path:
return self.cache_dir / self.model_type.value
@property
def model_path(self) -> Path:
return self.model_path_for_format(self.model_format)
@property
def model_task(self) -> ModelTask:
return self.identity[1]
@property
def model_type(self) -> ModelType:
return self.identity[0]
@property
def cache_dir(self) -> Path:
return self._cache_dir
@cache_dir.setter
def cache_dir(self, cache_dir: Path) -> None:
self._cache_dir = cache_dir
@property
def _cache_dir_default(self) -> Path:
return settings.cache_folder / self.model_task.value / self.model_name
@property
def cached(self) -> bool:
return self.model_path.is_file()
@property
def model_format(self) -> ModelFormat:
return self._model_format
@model_format.setter
def model_format(self, model_format: ModelFormat) -> None:
log.debug(f"Setting model format to {model_format}")
self._model_format = model_format
@property
def _model_format_default(self) -> ModelFormat:
if rknn.is_available:
return ModelFormat.RKNN
elif immich_ml.sessions.ann.loader.is_available and settings.ann:
return ModelFormat.ARMNN
else:
return ModelFormat.ONNX

View file

@ -0,0 +1,60 @@
from typing import Any
from aiocache.backends.memory import SimpleMemoryCache
from aiocache.lock import OptimisticLock
from aiocache.plugins import TimingPlugin
from immich_ml.models import from_model_type
from immich_ml.models.base import InferenceModel
from ..schemas import ModelTask, ModelType, has_profiling
class ModelCache:
"""Fetches a model from an in-memory cache, instantiating it if it's missing."""
def __init__(
self,
revalidate: bool = False,
timeout: int | None = None,
profiling: bool = False,
) -> None:
"""
Args:
revalidate: Resets TTL on cache hit. Useful to keep models in memory while active. Defaults to False.
timeout: Maximum allowed time for model to load. Disabled if None. Defaults to None.
profiling: Collects metrics for cache operations, adding slight overhead. Defaults to False.
"""
plugins = []
if profiling:
plugins.append(TimingPlugin())
self.should_revalidate = revalidate
self.cache = SimpleMemoryCache(timeout=timeout, plugins=plugins, namespace=None)
async def get(
self, model_name: str, model_type: ModelType, model_task: ModelTask, **model_kwargs: Any
) -> InferenceModel:
key = f"{model_name}{model_type}{model_task}"
async with OptimisticLock(self.cache, key) as lock:
model: InferenceModel | None = await self.cache.get(key)
if model is None:
model = from_model_type(model_name, model_type, model_task, **model_kwargs)
await lock.cas(model, ttl=model_kwargs.get("ttl", None))
elif self.should_revalidate:
await self.revalidate(key, model_kwargs.get("ttl", None))
return model
async def get_profiling(self) -> dict[str, float] | None:
if not has_profiling(self.cache):
return None
return self.cache.profiling
async def revalidate(self, key: str, ttl: int | None) -> None:
if ttl is not None and key in self.cache._handlers:
await self.cache.expire(key, ttl)

View file

@ -0,0 +1,120 @@
import json
from abc import abstractmethod
from functools import cached_property
from pathlib import Path
from typing import Any
import numpy as np
from numpy.typing import NDArray
from tokenizers import Encoding, Tokenizer
from immich_ml.config import log
from immich_ml.models.base import InferenceModel
from immich_ml.models.constants import WEBLATE_TO_FLORES200
from immich_ml.models.transforms import clean_text, serialize_np_array
from immich_ml.schemas import ModelSession, ModelTask, ModelType
class BaseCLIPTextualEncoder(InferenceModel):
depends = []
identity = (ModelType.TEXTUAL, ModelTask.SEARCH)
def _predict(self, inputs: str, language: str | None = None) -> str:
tokens = self.tokenize(inputs, language=language)
res: NDArray[np.float32] = self.session.run(None, tokens)[0][0]
return serialize_np_array(res)
def _load(self) -> ModelSession:
session = super()._load()
log.debug(f"Loading tokenizer for CLIP model '{self.model_name}'")
self.tokenizer = self._load_tokenizer()
tokenizer_kwargs: dict[str, Any] | None = self.text_cfg.get("tokenizer_kwargs")
self.canonicalize = tokenizer_kwargs is not None and tokenizer_kwargs.get("clean") == "canonicalize"
self.is_nllb = self.model_name.startswith("nllb")
log.debug(f"Loaded tokenizer for CLIP model '{self.model_name}'")
return session
@abstractmethod
def _load_tokenizer(self) -> Tokenizer:
pass
@abstractmethod
def tokenize(self, text: str, language: str | None = None) -> dict[str, NDArray[np.int32]]:
pass
@property
def model_cfg_path(self) -> Path:
return self.cache_dir / "config.json"
@property
def tokenizer_file_path(self) -> Path:
return self.model_dir / "tokenizer.json"
@property
def tokenizer_cfg_path(self) -> Path:
return self.model_dir / "tokenizer_config.json"
@cached_property
def model_cfg(self) -> dict[str, Any]:
log.debug(f"Loading model config for CLIP model '{self.model_name}'")
model_cfg: dict[str, Any] = json.load(self.model_cfg_path.open())
log.debug(f"Loaded model config for CLIP model '{self.model_name}'")
return model_cfg
@property
def text_cfg(self) -> dict[str, Any]:
text_cfg: dict[str, Any] = self.model_cfg["text_cfg"]
return text_cfg
@cached_property
def tokenizer_file(self) -> dict[str, Any]:
log.debug(f"Loading tokenizer file for CLIP model '{self.model_name}'")
tokenizer_file: dict[str, Any] = json.load(self.tokenizer_file_path.open())
log.debug(f"Loaded tokenizer file for CLIP model '{self.model_name}'")
return tokenizer_file
@cached_property
def tokenizer_cfg(self) -> dict[str, Any]:
log.debug(f"Loading tokenizer config for CLIP model '{self.model_name}'")
tokenizer_cfg: dict[str, Any] = json.load(self.tokenizer_cfg_path.open())
log.debug(f"Loaded tokenizer config for CLIP model '{self.model_name}'")
return tokenizer_cfg
class OpenClipTextualEncoder(BaseCLIPTextualEncoder):
def _load_tokenizer(self) -> Tokenizer:
context_length: int = self.text_cfg.get("context_length", 77)
pad_token: str = self.tokenizer_cfg["pad_token"]
tokenizer: Tokenizer = Tokenizer.from_file(self.tokenizer_file_path.as_posix())
pad_id: int = tokenizer.token_to_id(pad_token)
tokenizer.enable_padding(length=context_length, pad_token=pad_token, pad_id=pad_id)
tokenizer.enable_truncation(max_length=context_length)
return tokenizer
def tokenize(self, text: str, language: str | None = None) -> dict[str, NDArray[np.int32]]:
text = clean_text(text, canonicalize=self.canonicalize)
if self.is_nllb and language is not None:
flores_code = WEBLATE_TO_FLORES200.get(language)
if flores_code is None:
no_country = language.split("-")[0]
flores_code = WEBLATE_TO_FLORES200.get(no_country)
if flores_code is None:
log.warning(f"Language '{language}' not found, defaulting to 'en'")
flores_code = "eng_Latn"
text = f"{flores_code}{text}"
tokens: Encoding = self.tokenizer.encode(text)
return {"text": np.array([tokens.ids], dtype=np.int32)}
class MClipTextualEncoder(OpenClipTextualEncoder):
def tokenize(self, text: str, language: str | None = None) -> dict[str, NDArray[np.int32]]:
text = clean_text(text, canonicalize=self.canonicalize)
tokens: Encoding = self.tokenizer.encode(text)
return {
"input_ids": np.array([tokens.ids], dtype=np.int32),
"attention_mask": np.array([tokens.attention_mask], dtype=np.int32),
}

View file

@ -0,0 +1,77 @@
import json
from abc import abstractmethod
from functools import cached_property
from pathlib import Path
from typing import Any
import numpy as np
from numpy.typing import NDArray
from PIL import Image
from immich_ml.config import log
from immich_ml.models.base import InferenceModel
from immich_ml.models.transforms import (
crop_pil,
decode_pil,
get_pil_resampling,
normalize,
resize_pil,
serialize_np_array,
to_numpy,
)
from immich_ml.schemas import ModelSession, ModelTask, ModelType
class BaseCLIPVisualEncoder(InferenceModel):
depends = []
identity = (ModelType.VISUAL, ModelTask.SEARCH)
def _predict(self, inputs: Image.Image | bytes) -> str:
image = decode_pil(inputs)
res: NDArray[np.float32] = self.session.run(None, self.transform(image))[0][0]
return serialize_np_array(res)
@abstractmethod
def transform(self, image: Image.Image) -> dict[str, NDArray[np.float32]]:
pass
@property
def model_cfg_path(self) -> Path:
return self.cache_dir / "config.json"
@property
def preprocess_cfg_path(self) -> Path:
return self.model_dir / "preprocess_cfg.json"
@cached_property
def model_cfg(self) -> dict[str, Any]:
log.debug(f"Loading model config for CLIP model '{self.model_name}'")
model_cfg: dict[str, Any] = json.load(self.model_cfg_path.open())
log.debug(f"Loaded model config for CLIP model '{self.model_name}'")
return model_cfg
@cached_property
def preprocess_cfg(self) -> dict[str, Any]:
log.debug(f"Loading visual preprocessing config for CLIP model '{self.model_name}'")
preprocess_cfg: dict[str, Any] = json.load(self.preprocess_cfg_path.open())
log.debug(f"Loaded visual preprocessing config for CLIP model '{self.model_name}'")
return preprocess_cfg
class OpenClipVisualEncoder(BaseCLIPVisualEncoder):
def _load(self) -> ModelSession:
size: list[int] | int = self.preprocess_cfg["size"]
self.size = size[0] if isinstance(size, list) else size
self.resampling = get_pil_resampling(self.preprocess_cfg["interpolation"])
self.mean = np.array(self.preprocess_cfg["mean"], dtype=np.float32)
self.std = np.array(self.preprocess_cfg["std"], dtype=np.float32)
return super()._load()
def transform(self, image: Image.Image) -> dict[str, NDArray[np.float32]]:
image = resize_pil(image, self.size)
image = crop_pil(image, self.size)
image_np = to_numpy(image)
image_np = normalize(image_np, self.mean, self.std)
return {"image": np.expand_dims(image_np.transpose(2, 0, 1), 0)}

View file

@ -0,0 +1,178 @@
from immich_ml.config import clean_name
from immich_ml.schemas import ModelSource
_OPENCLIP_MODELS = {
"RN101__openai",
"RN101__yfcc15m",
"RN50__cc12m",
"RN50__openai",
"RN50__yfcc15m",
"RN50x16__openai",
"RN50x4__openai",
"RN50x64__openai",
"ViT-B-16-SigLIP-256__webli",
"ViT-B-16-SigLIP-384__webli",
"ViT-B-16-SigLIP-512__webli",
"ViT-B-16-SigLIP-i18n-256__webli",
"ViT-B-16-SigLIP__webli",
"ViT-B-16-plus-240__laion400m_e31",
"ViT-B-16-plus-240__laion400m_e32",
"ViT-B-16__laion400m_e31",
"ViT-B-16__laion400m_e32",
"ViT-B-16__openai",
"ViT-B-32__laion2b-s34b-b79k",
"ViT-B-32__laion2b_e16",
"ViT-B-32__laion400m_e31",
"ViT-B-32__laion400m_e32",
"ViT-B-32__openai",
"ViT-H-14-378-quickgelu__dfn5b",
"ViT-H-14-quickgelu__dfn5b",
"ViT-H-14__laion2b-s32b-b79k",
"ViT-L-14-336__openai",
"ViT-L-14-quickgelu__dfn2b",
"ViT-L-14__laion2b-s32b-b82k",
"ViT-L-14__laion400m_e31",
"ViT-L-14__laion400m_e32",
"ViT-L-14__openai",
"ViT-L-16-SigLIP-256__webli",
"ViT-L-16-SigLIP-384__webli",
"ViT-SO400M-14-SigLIP-384__webli",
"ViT-g-14__laion2b-s12b-b42k",
"XLM-Roberta-Base-ViT-B-32__laion5b_s13b_b90k",
"XLM-Roberta-Large-ViT-H-14__frozen_laion5b_s13b_b90k",
"nllb-clip-base-siglip__mrl",
"nllb-clip-base-siglip__v1",
"nllb-clip-large-siglip__mrl",
"nllb-clip-large-siglip__v1",
"ViT-B-16-SigLIP2__webli",
"ViT-B-32-SigLIP2-256__webli",
"ViT-L-16-SigLIP2-256__webli",
"ViT-L-16-SigLIP2-384__webli",
"ViT-L-16-SigLIP2-512__webli",
"ViT-SO400M-14-SigLIP2-378__webli",
"ViT-SO400M-14-SigLIP2__webli",
"ViT-SO400M-16-SigLIP2-256__webli",
"ViT-SO400M-16-SigLIP2-384__webli",
"ViT-SO400M-16-SigLIP2-512__webli",
"ViT-gopt-16-SigLIP2-256__webli",
"ViT-gopt-16-SigLIP2-384__webli",
}
_MCLIP_MODELS = {
"LABSE-Vit-L-14",
"XLM-Roberta-Large-Vit-B-16Plus",
"XLM-Roberta-Large-Vit-B-32",
"XLM-Roberta-Large-Vit-L-14",
}
_INSIGHTFACE_MODELS = {
"antelopev2",
"buffalo_s",
"buffalo_m",
"buffalo_l",
}
_PADDLE_MODELS = {
"PP-OCRv5_server",
"PP-OCRv5_mobile",
"CH__PP-OCRv5_server",
"CH__PP-OCRv5_mobile",
"EL__PP-OCRv5_mobile",
"EN__PP-OCRv5_mobile",
"ESLAV__PP-OCRv5_mobile",
"KOREAN__PP-OCRv5_mobile",
"LATIN__PP-OCRv5_mobile",
"TH__PP-OCRv5_mobile",
}
SUPPORTED_PROVIDERS = [
"CUDAExecutionProvider",
"ROCMExecutionProvider",
"OpenVINOExecutionProvider",
"CoreMLExecutionProvider",
"CPUExecutionProvider",
]
RKNN_SUPPORTED_SOCS = ["rk3566", "rk3568", "rk3576", "rk3588"]
RKNN_COREMASK_SUPPORTED_SOCS = ["rk3576", "rk3588"]
WEBLATE_TO_FLORES200 = {
"af": "afr_Latn",
"ar": "arb_Arab",
"az": "azj_Latn",
"be": "bel_Cyrl",
"bg": "bul_Cyrl",
"ca": "cat_Latn",
"cs": "ces_Latn",
"da": "dan_Latn",
"de": "deu_Latn",
"el": "ell_Grek",
"en": "eng_Latn",
"es": "spa_Latn",
"et": "est_Latn",
"fa": "pes_Arab",
"fi": "fin_Latn",
"fr": "fra_Latn",
"he": "heb_Hebr",
"hi": "hin_Deva",
"hr": "hrv_Latn",
"hu": "hun_Latn",
"hy": "hye_Armn",
"id": "ind_Latn",
"it": "ita_Latn",
"ja": "jpn_Hira",
"kmr": "kmr_Latn",
"ko": "kor_Hang",
"lb": "ltz_Latn",
"lt": "lit_Latn",
"lv": "lav_Latn",
"mfa": "zsm_Latn",
"mk": "mkd_Cyrl",
"mn": "khk_Cyrl",
"mr": "mar_Deva",
"ms": "zsm_Latn",
"nb-NO": "nob_Latn",
"nn": "nno_Latn",
"nl": "nld_Latn",
"pl": "pol_Latn",
"pt-BR": "por_Latn",
"pt": "por_Latn",
"ro": "ron_Latn",
"ru": "rus_Cyrl",
"sk": "slk_Latn",
"sl": "slv_Latn",
"sr-Cyrl": "srp_Cyrl",
"sv": "swe_Latn",
"ta": "tam_Taml",
"te": "tel_Telu",
"th": "tha_Thai",
"tr": "tur_Latn",
"uk": "ukr_Cyrl",
"ur": "urd_Arab",
"vi": "vie_Latn",
"zh-CN": "zho_Hans",
"zh-Hans": "zho_Hans",
"zh-TW": "zho_Hant",
}
def get_model_source(model_name: str) -> ModelSource | None:
cleaned_name = clean_name(model_name)
if cleaned_name in _INSIGHTFACE_MODELS:
return ModelSource.INSIGHTFACE
if cleaned_name in _MCLIP_MODELS:
return ModelSource.MCLIP
if cleaned_name in _OPENCLIP_MODELS:
return ModelSource.OPENCLIP
if cleaned_name in _PADDLE_MODELS:
return ModelSource.PADDLE
return None

View file

@ -0,0 +1,41 @@
from typing import Any
import numpy as np
from insightface.model_zoo import RetinaFace
from numpy.typing import NDArray
from immich_ml.models.base import InferenceModel
from immich_ml.models.transforms import decode_cv2
from immich_ml.schemas import FaceDetectionOutput, ModelSession, ModelTask, ModelType
class FaceDetector(InferenceModel):
depends = []
identity = (ModelType.DETECTION, ModelTask.FACIAL_RECOGNITION)
def __init__(self, model_name: str, min_score: float = 0.7, **model_kwargs: Any) -> None:
self.min_score = model_kwargs.pop("minScore", min_score)
super().__init__(model_name, **model_kwargs)
def _load(self) -> ModelSession:
session = self._make_session(self.model_path)
self.model = RetinaFace(session=session)
self.model.prepare(ctx_id=0, det_thresh=self.min_score, input_size=(640, 640))
return session
def _predict(self, inputs: NDArray[np.uint8] | bytes) -> FaceDetectionOutput:
inputs = decode_cv2(inputs)
bboxes, landmarks = self._detect(inputs)
return {
"boxes": bboxes[:, :4].round(),
"scores": bboxes[:, 4],
"landmarks": landmarks,
}
def _detect(self, inputs: NDArray[np.uint8] | bytes) -> tuple[NDArray[np.float32], NDArray[np.float32]]:
return self.model.detect(inputs) # type: ignore
def configure(self, **kwargs: Any) -> None:
self.model.det_thresh = kwargs.pop("minScore", self.model.det_thresh)

View file

@ -0,0 +1,92 @@
from pathlib import Path
from typing import Any
import numpy as np
import onnx
import onnxruntime as ort
from insightface.model_zoo import ArcFaceONNX
from insightface.utils.face_align import norm_crop
from numpy.typing import NDArray
from onnx.tools.update_model_dims import update_inputs_outputs_dims
from PIL import Image
from immich_ml.config import log, settings
from immich_ml.models.base import InferenceModel
from immich_ml.models.transforms import decode_cv2, serialize_np_array
from immich_ml.schemas import (
FaceDetectionOutput,
FacialRecognitionOutput,
ModelFormat,
ModelSession,
ModelTask,
ModelType,
)
class FaceRecognizer(InferenceModel):
depends = [(ModelType.DETECTION, ModelTask.FACIAL_RECOGNITION)]
identity = (ModelType.RECOGNITION, ModelTask.FACIAL_RECOGNITION)
def __init__(self, model_name: str, **model_kwargs: Any) -> None:
super().__init__(model_name, **model_kwargs)
max_batch_size = settings.max_batch_size.facial_recognition if settings.max_batch_size else None
self.batch_size = max_batch_size if max_batch_size else self._batch_size_default
def _load(self) -> ModelSession:
session = self._make_session(self.model_path)
if (not self.batch_size or self.batch_size > 1) and str(session.get_inputs()[0].shape[0]) != "batch":
self._add_batch_axis(self.model_path)
session = self._make_session(self.model_path)
self.model = ArcFaceONNX(
self.model_path_for_format(ModelFormat.ONNX).as_posix(),
session=session,
)
return session
def _predict(
self, inputs: NDArray[np.uint8] | bytes | Image.Image, faces: FaceDetectionOutput
) -> FacialRecognitionOutput:
if faces["boxes"].shape[0] == 0:
return []
inputs = decode_cv2(inputs)
cropped_faces = self._crop(inputs, faces)
embeddings = self._predict_batch(cropped_faces)
return self.postprocess(faces, embeddings)
def _predict_batch(self, cropped_faces: list[NDArray[np.uint8]]) -> NDArray[np.float32]:
if not self.batch_size or len(cropped_faces) <= self.batch_size:
embeddings: NDArray[np.float32] = self.model.get_feat(cropped_faces)
return embeddings
batch_embeddings: list[NDArray[np.float32]] = []
for i in range(0, len(cropped_faces), self.batch_size):
batch_embeddings.append(self.model.get_feat(cropped_faces[i : i + self.batch_size]))
return np.concatenate(batch_embeddings, axis=0)
def postprocess(self, faces: FaceDetectionOutput, embeddings: NDArray[np.float32]) -> FacialRecognitionOutput:
return [
{
"boundingBox": {"x1": x1, "y1": y1, "x2": x2, "y2": y2},
"embedding": serialize_np_array(embedding),
"score": score,
}
for (x1, y1, x2, y2), embedding, score in zip(faces["boxes"], embeddings, faces["scores"])
]
def _crop(self, image: NDArray[np.uint8], faces: FaceDetectionOutput) -> list[NDArray[np.uint8]]:
return [norm_crop(image, landmark) for landmark in faces["landmarks"]]
def _add_batch_axis(self, model_path: Path) -> None:
log.debug(f"Adding batch axis to model {model_path}")
proto = onnx.load(model_path)
static_input_dims = [shape.dim_value for shape in proto.graph.input[0].type.tensor_type.shape.dim[1:]]
static_output_dims = [shape.dim_value for shape in proto.graph.output[0].type.tensor_type.shape.dim[1:]]
input_dims = {proto.graph.input[0].name: ["batch"] + static_input_dims}
output_dims = {proto.graph.output[0].name: ["batch"] + static_output_dims}
updated_proto = update_inputs_outputs_dims(proto, input_dims, output_dims)
onnx.save(updated_proto, model_path)
@property
def _batch_size_default(self) -> int | None:
providers = ort.get_available_providers()
return None if self.model_format == ModelFormat.ONNX and "OpenVINOExecutionProvider" not in providers else 1

View file

@ -0,0 +1,125 @@
from typing import Any
import cv2
import numpy as np
from numpy.typing import NDArray
from PIL import Image
from rapidocr.ch_ppocr_det.utils import DBPostProcess
from rapidocr.inference_engine.base import FileInfo, InferSession
from rapidocr.utils.download_file import DownloadFile, DownloadFileInput
from rapidocr.utils.typings import EngineType, LangDet, OCRVersion, TaskType
from rapidocr.utils.typings import ModelType as RapidModelType
from immich_ml.config import log
from immich_ml.models.base import InferenceModel
from immich_ml.schemas import ModelFormat, ModelSession, ModelTask, ModelType
from immich_ml.sessions.ort import OrtSession
from .schemas import TextDetectionOutput
class TextDetector(InferenceModel):
depends = []
identity = (ModelType.DETECTION, ModelTask.OCR)
def __init__(self, model_name: str, **model_kwargs: Any) -> None:
super().__init__(model_name.split("__")[-1], **model_kwargs, model_format=ModelFormat.ONNX)
self.max_resolution = 736
self.mean = np.array([0.5, 0.5, 0.5], dtype=np.float32)
self.std_inv = np.float32(1.0) / (np.array([0.5, 0.5, 0.5], dtype=np.float32) * 255.0)
self._empty: TextDetectionOutput = {
"boxes": np.empty(0, dtype=np.float32),
"scores": np.empty(0, dtype=np.float32),
}
self.postprocess = DBPostProcess(
thresh=0.3,
box_thresh=model_kwargs.get("minScore", 0.5),
max_candidates=1000,
unclip_ratio=1.6,
use_dilation=True,
score_mode="fast",
)
def _download(self) -> None:
model_info = InferSession.get_model_url(
FileInfo(
engine_type=EngineType.ONNXRUNTIME,
ocr_version=OCRVersion.PPOCRV5,
task_type=TaskType.DET,
lang_type=LangDet.CH,
model_type=RapidModelType.MOBILE if "mobile" in self.model_name else RapidModelType.SERVER,
)
)
download_params = DownloadFileInput(
file_url=model_info["model_dir"],
sha256=model_info["SHA256"],
save_path=self.model_path,
logger=log,
)
DownloadFile.run(download_params)
def _load(self) -> ModelSession:
# TODO: support other runtime sessions
return OrtSession(self.model_path)
# partly adapted from RapidOCR
def _predict(self, inputs: Image.Image) -> TextDetectionOutput:
w, h = inputs.size
if w < 32 or h < 32:
return self._empty
out = self.session.run(None, {"x": self._transform(inputs)})[0]
boxes, scores = self.postprocess(out, (h, w))
if len(boxes) == 0:
return self._empty
return {
"boxes": self.sorted_boxes(boxes),
"scores": np.array(scores, dtype=np.float32),
}
# adapted from RapidOCR
def _transform(self, img: Image.Image) -> NDArray[np.float32]:
if img.height < img.width:
ratio = float(self.max_resolution) / img.height
else:
ratio = float(self.max_resolution) / img.width
ratio = min(ratio, 1.0)
resize_h = int(img.height * ratio)
resize_w = int(img.width * ratio)
resize_h = int(round(resize_h / 32) * 32)
resize_w = int(round(resize_w / 32) * 32)
resized_img = img.resize((int(resize_w), int(resize_h)), resample=Image.Resampling.LANCZOS)
img_np: NDArray[np.float32] = cv2.cvtColor(np.array(resized_img, dtype=np.float32), cv2.COLOR_RGB2BGR) # type: ignore
img_np -= self.mean
img_np *= self.std_inv
img_np = np.transpose(img_np, (2, 0, 1))
return np.expand_dims(img_np, axis=0)
def sorted_boxes(self, dt_boxes: NDArray[np.float32]) -> NDArray[np.float32]:
if len(dt_boxes) == 0:
return dt_boxes
# Sort by y, then identify lines, then sort by (line, x)
y_order = np.argsort(dt_boxes[:, 0, 1], kind="stable")
sorted_y = dt_boxes[y_order, 0, 1]
line_ids = np.empty(len(dt_boxes), dtype=np.int32)
line_ids[0] = 0
np.cumsum(np.abs(np.diff(sorted_y)) >= 10, out=line_ids[1:])
# Create composite sort key for final ordering
# Shift line_ids by large factor, add x for tie-breaking
sort_key = line_ids[y_order] * 1e6 + dt_boxes[y_order, 0, 0]
final_order = np.argsort(sort_key, kind="stable")
sorted_boxes: NDArray[np.float32] = dt_boxes[y_order[final_order]]
return sorted_boxes
def configure(self, **kwargs: Any) -> None:
if (max_resolution := kwargs.get("maxResolution")) is not None:
self.max_resolution = max_resolution
if (min_score := kwargs.get("minScore")) is not None:
self.postprocess.box_thresh = min_score
if (score_mode := kwargs.get("scoreMode")) is not None:
self.postprocess.score_mode = score_mode

View file

@ -0,0 +1,153 @@
from typing import Any
import numpy as np
from numpy.typing import NDArray
from PIL import Image
from rapidocr.ch_ppocr_rec import TextRecInput
from rapidocr.ch_ppocr_rec import TextRecognizer as RapidTextRecognizer
from rapidocr.inference_engine.base import FileInfo, InferSession
from rapidocr.utils.download_file import DownloadFile, DownloadFileInput
from rapidocr.utils.typings import EngineType, LangRec, OCRVersion, TaskType
from rapidocr.utils.typings import ModelType as RapidModelType
from rapidocr.utils.vis_res import VisRes
from immich_ml.config import log, settings
from immich_ml.models.base import InferenceModel
from immich_ml.models.transforms import pil_to_cv2
from immich_ml.schemas import ModelFormat, ModelSession, ModelTask, ModelType
from immich_ml.sessions.ort import OrtSession
from .schemas import OcrOptions, TextDetectionOutput, TextRecognitionOutput
class TextRecognizer(InferenceModel):
depends = [(ModelType.DETECTION, ModelTask.OCR)]
identity = (ModelType.RECOGNITION, ModelTask.OCR)
def __init__(self, model_name: str, **model_kwargs: Any) -> None:
self.language = LangRec[model_name.split("__")[0]] if "__" in model_name else LangRec.CH
self.min_score = model_kwargs.get("minScore", 0.9)
self._empty: TextRecognitionOutput = {
"box": np.empty(0, dtype=np.float32),
"boxScore": np.empty(0, dtype=np.float32),
"text": [],
"textScore": np.empty(0, dtype=np.float32),
}
VisRes.__init__ = lambda self, **kwargs: None # pyright: ignore[reportAttributeAccessIssue]
super().__init__(model_name, **model_kwargs, model_format=ModelFormat.ONNX)
def _download(self) -> None:
model_info = InferSession.get_model_url(
FileInfo(
engine_type=EngineType.ONNXRUNTIME,
ocr_version=OCRVersion.PPOCRV5,
task_type=TaskType.REC,
lang_type=self.language,
model_type=RapidModelType.MOBILE if "mobile" in self.model_name else RapidModelType.SERVER,
)
)
download_params = DownloadFileInput(
file_url=model_info["model_dir"],
sha256=model_info["SHA256"],
save_path=self.model_path,
logger=log,
)
DownloadFile.run(download_params)
def _load(self) -> ModelSession:
# TODO: support other runtimes
session = OrtSession(self.model_path)
self.model = RapidTextRecognizer(
OcrOptions(
session=session.session,
rec_batch_num=settings.max_batch_size.text_recognition if settings.max_batch_size is not None else 6,
rec_img_shape=(3, 48, 320),
lang_type=self.language,
)
)
return session
def _predict(self, img: Image.Image, texts: TextDetectionOutput) -> TextRecognitionOutput:
boxes, box_scores = texts["boxes"], texts["scores"]
if boxes.shape[0] == 0:
return self._empty
rec = self.model(TextRecInput(img=self.get_crop_img_list(img, boxes)))
if rec.txts is None:
return self._empty
boxes[:, :, 0] /= img.width
boxes[:, :, 1] /= img.height
text_scores = np.array(rec.scores)
valid_text_score_idx = text_scores > self.min_score
valid_score_idx_list = valid_text_score_idx.tolist()
return {
"box": boxes.reshape(-1, 8)[valid_text_score_idx].reshape(-1),
"text": [rec.txts[i] for i in range(len(rec.txts)) if valid_score_idx_list[i]],
"boxScore": box_scores[valid_text_score_idx],
"textScore": text_scores[valid_text_score_idx],
}
def get_crop_img_list(self, img: Image.Image, boxes: NDArray[np.float32]) -> list[NDArray[np.uint8]]:
img_crop_width = np.maximum(
np.linalg.norm(boxes[:, 1] - boxes[:, 0], axis=1), np.linalg.norm(boxes[:, 2] - boxes[:, 3], axis=1)
).astype(np.int32)
img_crop_height = np.maximum(
np.linalg.norm(boxes[:, 0] - boxes[:, 3], axis=1), np.linalg.norm(boxes[:, 1] - boxes[:, 2], axis=1)
).astype(np.int32)
pts_std = np.zeros((img_crop_width.shape[0], 4, 2), dtype=np.float32)
pts_std[:, 1:3, 0] = img_crop_width[:, None]
pts_std[:, 2:4, 1] = img_crop_height[:, None]
img_crop_sizes = np.stack([img_crop_width, img_crop_height], axis=1)
all_coeffs = self._get_perspective_transform(pts_std, boxes)
imgs: list[NDArray[np.uint8]] = []
for coeffs, dst_size in zip(all_coeffs, img_crop_sizes):
dst_img = img.transform(
size=tuple(dst_size),
method=Image.Transform.PERSPECTIVE,
data=tuple(coeffs),
resample=Image.Resampling.BICUBIC,
)
dst_width, dst_height = dst_img.size
if dst_height * 1.0 / dst_width >= 1.5:
dst_img = dst_img.rotate(90, expand=True)
imgs.append(pil_to_cv2(dst_img))
return imgs
def _get_perspective_transform(self, src: NDArray[np.float32], dst: NDArray[np.float32]) -> NDArray[np.float32]:
N = src.shape[0]
x, y = src[:, :, 0], src[:, :, 1]
u, v = dst[:, :, 0], dst[:, :, 1]
A = np.zeros((N, 8, 9), dtype=np.float32)
# Fill even rows (0, 2, 4, 6): [x, y, 1, 0, 0, 0, -u*x, -u*y, -u]
A[:, ::2, 0] = x
A[:, ::2, 1] = y
A[:, ::2, 2] = 1
A[:, ::2, 6] = -u * x
A[:, ::2, 7] = -u * y
A[:, ::2, 8] = -u
# Fill odd rows (1, 3, 5, 7): [0, 0, 0, x, y, 1, -v*x, -v*y, -v]
A[:, 1::2, 3] = x
A[:, 1::2, 4] = y
A[:, 1::2, 5] = 1
A[:, 1::2, 6] = -v * x
A[:, 1::2, 7] = -v * y
A[:, 1::2, 8] = -v
# Solve using SVD for all matrices at once
_, _, Vt = np.linalg.svd(A)
H = Vt[:, -1, :].reshape(N, 3, 3)
H = H / H[:, 2:3, 2:3]
# Extract the 8 coefficients for each transformation
return np.column_stack(
[H[:, 0, 0], H[:, 0, 1], H[:, 0, 2], H[:, 1, 0], H[:, 1, 1], H[:, 1, 2], H[:, 2, 0], H[:, 2, 1]]
) # pyright: ignore[reportReturnType]
def configure(self, **kwargs: Any) -> None:
self.min_score = kwargs.get("minScore", self.min_score)

View file

@ -0,0 +1,27 @@
from typing import Any, Iterable
import numpy as np
import numpy.typing as npt
from rapidocr.utils.typings import EngineType, LangRec
from typing_extensions import TypedDict
class TextDetectionOutput(TypedDict):
boxes: npt.NDArray[np.float32]
scores: npt.NDArray[np.float32]
class TextRecognitionOutput(TypedDict):
box: npt.NDArray[np.float32]
boxScore: npt.NDArray[np.float32]
text: Iterable[str]
textScore: npt.NDArray[np.float32]
# RapidOCR expects `engine_type`, `lang_type`, and `font_path` to be attributes
class OcrOptions(dict[str, Any]):
def __init__(self, lang_type: LangRec | None = None, **options: Any) -> None:
super().__init__(**options)
self.engine_type = EngineType.ONNXRUNTIME
self.lang_type = lang_type
self.font_path = None

View file

@ -0,0 +1,80 @@
import string
from io import BytesIO
from typing import IO
import cv2
import numpy as np
import orjson
from numpy.typing import NDArray
from PIL import Image
_PIL_RESAMPLING_METHODS = {resampling.name.lower(): resampling for resampling in Image.Resampling}
_PUNCTUATION_TRANS = str.maketrans("", "", string.punctuation)
def resize_pil(img: Image.Image, size: int) -> Image.Image:
if img.width < img.height:
return img.resize((size, int((img.height / img.width) * size)), resample=Image.Resampling.BICUBIC)
else:
return img.resize((int((img.width / img.height) * size), size), resample=Image.Resampling.BICUBIC)
# https://stackoverflow.com/a/60883103
def crop_pil(img: Image.Image, size: int) -> Image.Image:
left = int((img.size[0] / 2) - (size / 2))
upper = int((img.size[1] / 2) - (size / 2))
right = left + size
lower = upper + size
return img.crop((left, upper, right, lower))
def to_numpy(img: Image.Image) -> NDArray[np.float32]:
return np.asarray(img if img.mode == "RGB" else img.convert("RGB"), dtype=np.float32) / 255.0
def normalize(
img: NDArray[np.float32], mean: float | NDArray[np.float32], std: float | NDArray[np.float32]
) -> NDArray[np.float32]:
return (img - mean) / std
def get_pil_resampling(resample: str) -> Image.Resampling:
return _PIL_RESAMPLING_METHODS[resample.lower()]
def pil_to_cv2(image: Image.Image) -> NDArray[np.uint8]:
return cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) # type: ignore
def decode_pil(image_bytes: bytes | IO[bytes] | Image.Image) -> Image.Image:
if isinstance(image_bytes, Image.Image):
return image_bytes
image: Image.Image = Image.open(BytesIO(image_bytes) if isinstance(image_bytes, bytes) else image_bytes)
image.load()
if not image.mode == "RGB":
image = image.convert("RGB")
return image
def decode_cv2(image_bytes: NDArray[np.uint8] | bytes | Image.Image) -> NDArray[np.uint8]:
match image_bytes:
case bytes() | memoryview() | bytearray():
return pil_to_cv2(decode_pil(image_bytes)) # pillow is much faster than cv2
case Image.Image():
return pil_to_cv2(image_bytes)
case _:
return image_bytes
def clean_text(text: str, canonicalize: bool = False) -> str:
text = " ".join(text.split())
if canonicalize:
text = text.translate(_PUNCTUATION_TRANS).lower()
return text
# this allows the client to use the array as a string without deserializing only to serialize back to a string
# TODO: use this in a less invasive way
def serialize_np_array(arr: NDArray[np.float32]) -> str:
return orjson.dumps(arr, option=orjson.OPT_SERIALIZE_NUMPY).decode()

View file

@ -0,0 +1,122 @@
from enum import Enum
from typing import Any, Literal, Protocol, TypeGuard, TypeVar
import numpy as np
import numpy.typing as npt
from typing_extensions import TypedDict
class StrEnum(str, Enum):
value: str
def __str__(self) -> str:
return self.value
class BoundingBox(TypedDict):
x1: int
y1: int
x2: int
y2: int
class ModelTask(StrEnum):
FACIAL_RECOGNITION = "facial-recognition"
SEARCH = "clip"
OCR = "ocr"
class ModelType(StrEnum):
DETECTION = "detection"
RECOGNITION = "recognition"
TEXTUAL = "textual"
VISUAL = "visual"
class ModelFormat(StrEnum):
ARMNN = "armnn"
ONNX = "onnx"
RKNN = "rknn"
class ModelSource(StrEnum):
INSIGHTFACE = "insightface"
MCLIP = "mclip"
OPENCLIP = "openclip"
PADDLE = "paddle"
class ModelPrecision(StrEnum):
FP16 = "FP16"
FP32 = "FP32"
ModelIdentity = tuple[ModelType, ModelTask]
class SessionNode(Protocol):
@property
def name(self) -> str | None: ...
@property
def shape(self) -> tuple[int, ...]: ...
class ModelSession(Protocol):
def run(
self,
output_names: list[str] | None,
input_feed: dict[str, npt.NDArray[np.float32]] | dict[str, npt.NDArray[np.int32]],
run_options: Any = None,
) -> list[npt.NDArray[np.float32]]: ...
def get_inputs(self) -> list[SessionNode]: ...
def get_outputs(self) -> list[SessionNode]: ...
class HasProfiling(Protocol):
profiling: dict[str, float]
class FaceDetectionOutput(TypedDict):
boxes: npt.NDArray[np.float32]
scores: npt.NDArray[np.float32]
landmarks: npt.NDArray[np.float32]
class DetectedFace(TypedDict):
boundingBox: BoundingBox
embedding: str
score: float
FacialRecognitionOutput = list[DetectedFace]
class PipelineEntry(TypedDict):
modelName: str
options: dict[str, Any]
PipelineRequest = dict[ModelTask, dict[ModelType, PipelineEntry]]
class InferenceEntry(TypedDict):
name: str
task: ModelTask
type: ModelType
options: dict[str, Any]
InferenceEntries = tuple[list[InferenceEntry], list[InferenceEntry]]
InferenceResponse = dict[ModelTask | Literal["imageHeight"] | Literal["imageWidth"], Any]
def has_profiling(obj: Any) -> TypeGuard[HasProfiling]:
return hasattr(obj, "profiling") and isinstance(obj.profiling, dict)
T = TypeVar("T")

View file

@ -0,0 +1,58 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, NamedTuple
import numpy as np
from numpy.typing import NDArray
from immich_ml.config import log, settings
from immich_ml.schemas import SessionNode
from .loader import Ann
class AnnSession:
"""
Wrapper for ANN to be drop-in replacement for ONNX session.
"""
def __init__(self, model_path: Path, cache_dir: Path = settings.cache_folder) -> None:
self.model_path = model_path
self.cache_dir = cache_dir
self.ann = Ann(tuning_level=settings.ann_tuning_level, tuning_file=(cache_dir / "gpu-tuning.ann").as_posix())
log.info("Loading ANN model %s ...", model_path)
self.model = self.ann.load(
model_path.as_posix(),
cached_network_path=model_path.with_suffix(".anncache").as_posix(),
fp16=settings.ann_fp16_turbo,
)
log.info("Loaded ANN model with ID %d", self.model)
def __del__(self) -> None:
self.ann.unload(self.model)
log.info("Unloaded ANN model %d", self.model)
self.ann.destroy()
def get_inputs(self) -> list[SessionNode]:
shapes = self.ann.input_shapes[self.model]
return [AnnNode(None, s) for s in shapes]
def get_outputs(self) -> list[SessionNode]:
shapes = self.ann.output_shapes[self.model]
return [AnnNode(None, s) for s in shapes]
def run(
self,
output_names: list[str] | None,
input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
run_options: Any = None,
) -> list[NDArray[np.float32]]:
inputs: list[NDArray[np.float32]] = [np.ascontiguousarray(v) for v in input_feed.values()]
return self.ann.execute(self.model, inputs)
class AnnNode(NamedTuple):
name: str | None
shape: tuple[int, ...]

View file

@ -0,0 +1,169 @@
from __future__ import annotations
from ctypes import CDLL, Array, c_bool, c_char_p, c_int, c_ulong, c_void_p
from os.path import exists
from typing import Any, Protocol, TypeVar
import numpy as np
from numpy.typing import NDArray
from immich_ml.config import log
try:
CDLL("libmali.so") # fail if libmali.so is not mounted into container
libann = CDLL("libann.so")
libann.init.argtypes = c_int, c_int, c_char_p
libann.init.restype = c_void_p
libann.load.argtypes = c_void_p, c_char_p, c_bool, c_bool, c_bool, c_char_p
libann.load.restype = c_int
libann.execute.argtypes = c_void_p, c_int, Array[c_void_p], Array[c_void_p]
libann.unload.argtypes = c_void_p, c_int
libann.destroy.argtypes = (c_void_p,)
libann.shape.argtypes = c_void_p, c_int, c_bool, c_int
libann.shape.restype = c_ulong
libann.tensors.argtypes = c_void_p, c_int, c_bool
libann.tensors.restype = c_int
is_available = True
except OSError as e:
log.debug("Could not load ANN shared libraries, using ONNX: %s", e)
is_available = False
T = TypeVar("T", covariant=True)
class Newable(Protocol[T]):
def new(self) -> None: ...
class _Singleton(type, Newable[T]):
_instances: dict[_Singleton[T], Newable[T]] = {}
def __call__(cls, *args: Any, **kwargs: Any) -> Newable[T]:
if cls not in cls._instances:
obj: Newable[T] = super(_Singleton, cls).__call__(*args, **kwargs)
cls._instances[cls] = obj
else:
obj = cls._instances[cls]
obj.new()
return obj
class Ann(metaclass=_Singleton):
def __init__(self, log_level: int = 3, tuning_level: int = 1, tuning_file: str | None = None) -> None:
if not is_available:
raise RuntimeError("libann is not available!")
if tuning_level == 0 and tuning_file is None:
raise ValueError("tuning_level == 0 reads existing tuning information and requires a tuning_file")
if tuning_level < 0 or tuning_level > 3:
raise ValueError("tuning_level must be 0 (load from tuning_file), 1, 2 or 3.")
if log_level < 0 or log_level > 5:
raise ValueError("log_level must be 0 (trace), 1 (debug), 2 (info), 3 (warning), 4 (error) or 5 (fatal)")
self.log_level = log_level
self.tuning_level = tuning_level
self.tuning_file = tuning_file
self.output_shapes: dict[int, tuple[tuple[int], ...]] = {}
self.input_shapes: dict[int, tuple[tuple[int], ...]] = {}
self.ann: int | None = None
self.new()
if self.tuning_file is not None:
# make sure tuning file exists (without clearing contents)
# once filled, the tuning file reduces the cost/time of the first
# inference after model load by 10s of seconds
open(self.tuning_file, "a").close()
def new(self) -> None:
if self.ann is None:
self.ann = libann.init(
self.log_level,
self.tuning_level,
self.tuning_file.encode() if self.tuning_file is not None else None,
)
self.ref_count = 0
self.ref_count += 1
def destroy(self) -> None:
self.ref_count -= 1
if self.ref_count <= 0 and self.ann is not None:
libann.destroy(self.ann)
self.ann = None
def __del__(self) -> None:
if self.ann is not None:
libann.destroy(self.ann)
self.ann = None
def load(
self,
model_path: str,
fast_math: bool = True,
fp16: bool = False,
cached_network_path: str | None = None,
) -> int:
if not model_path.endswith((".armnn", ".tflite", ".onnx")):
raise ValueError("model_path must be a file with extension .armnn, .tflite or .onnx")
if not exists(model_path):
raise ValueError("model_path must point to an existing file!")
save_cached_network = False
if cached_network_path is not None and not exists(cached_network_path):
save_cached_network = True
# create empty model cache file
open(cached_network_path, "a").close()
net_id: int = libann.load(
self.ann,
model_path.encode(),
fast_math,
fp16,
save_cached_network,
cached_network_path.encode() if cached_network_path is not None else None,
)
if net_id < 0:
raise ValueError("Cannot load model!")
self.input_shapes[net_id] = tuple(
self.shape(net_id, input=True, index=i) for i in range(self.tensors(net_id, input=True))
)
self.output_shapes[net_id] = tuple(
self.shape(net_id, input=False, index=i) for i in range(self.tensors(net_id, input=False))
)
return net_id
def unload(self, network_id: int) -> None:
libann.unload(self.ann, network_id)
del self.output_shapes[network_id]
def execute(self, network_id: int, input_tensors: list[NDArray[np.float32]]) -> list[NDArray[np.float32]]:
if not isinstance(input_tensors, list):
raise ValueError("input_tensors needs to be a list!")
net_input_shapes = self.input_shapes[network_id]
if len(input_tensors) != len(net_input_shapes):
raise ValueError(f"input_tensors lengths {len(input_tensors)} != network inputs {len(net_input_shapes)}")
for net_input_shape, input_tensor in zip(net_input_shapes, input_tensors):
if net_input_shape != input_tensor.shape:
raise ValueError(f"input_tensor shape {input_tensor.shape} != network input shape {net_input_shape}")
if not input_tensor.flags.c_contiguous:
raise ValueError("input_tensors must be c_contiguous numpy ndarrays")
output_tensors: list[NDArray[np.float32]] = [
np.ndarray(s, dtype=np.float32) for s in self.output_shapes[network_id]
]
input_type = c_void_p * len(input_tensors)
inputs = input_type(*[t.ctypes.data_as(c_void_p) for t in input_tensors])
output_type = c_void_p * len(output_tensors)
outputs = output_type(*[t.ctypes.data_as(c_void_p) for t in output_tensors])
libann.execute(self.ann, network_id, inputs, outputs)
return output_tensors
def shape(self, network_id: int, input: bool = False, index: int = 0) -> tuple[int]:
s = libann.shape(self.ann, network_id, input, index)
a = []
while s != 0:
a.append(s & 0xFFFF)
s >>= 16
return tuple(a)
def tensors(self, network_id: int, input: bool = False) -> int:
tensors: int = libann.tensors(self.ann, network_id, input)
return tensors

View file

@ -0,0 +1,147 @@
from __future__ import annotations
from pathlib import Path
from typing import Any
import numpy as np
import onnxruntime as ort
from numpy.typing import NDArray
from immich_ml.models.constants import SUPPORTED_PROVIDERS
from immich_ml.schemas import SessionNode
from ..config import log, settings
class OrtSession:
session: ort.InferenceSession
def __init__(
self,
model_path: Path | str,
providers: list[str] | None = None,
provider_options: list[dict[str, Any]] | None = None,
sess_options: ort.SessionOptions | None = None,
):
self.model_path = Path(model_path)
self.providers = providers if providers is not None else self._providers_default
self.provider_options = provider_options if provider_options is not None else self._provider_options_default
self.sess_options = sess_options if sess_options is not None else self._sess_options_default
self.session = ort.InferenceSession(
self.model_path.as_posix(),
providers=self.providers,
provider_options=self.provider_options,
sess_options=self.sess_options,
)
def get_inputs(self) -> list[SessionNode]:
inputs: list[SessionNode] = self.session.get_inputs()
return inputs
def get_outputs(self) -> list[SessionNode]:
outputs: list[SessionNode] = self.session.get_outputs()
return outputs
def run(
self,
output_names: list[str] | None,
input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
run_options: Any = None,
) -> list[NDArray[np.float32]]:
outputs: list[NDArray[np.float32]] = self.session.run(output_names, input_feed, run_options)
return outputs
@property
def providers(self) -> list[str]:
return self._providers
@providers.setter
def providers(self, providers: list[str]) -> None:
log.info(f"Setting execution providers to {providers}, in descending order of preference")
self._providers = providers
@property
def _providers_default(self) -> list[str]:
available_providers = set(ort.get_available_providers())
log.debug(f"Available ORT providers: {available_providers}")
if (openvino := "OpenVINOExecutionProvider") in available_providers:
device_ids: list[str] = ort.capi._pybind_state.get_available_openvino_device_ids()
log.debug(f"Available OpenVINO devices: {device_ids}")
gpu_devices = [device_id for device_id in device_ids if device_id.startswith("GPU")]
if not gpu_devices:
log.warning("No GPU device found in OpenVINO. Falling back to CPU.")
available_providers.remove(openvino)
return [provider for provider in SUPPORTED_PROVIDERS if provider in available_providers]
@property
def provider_options(self) -> list[dict[str, Any]]:
return self._provider_options
@provider_options.setter
def provider_options(self, provider_options: list[dict[str, Any]]) -> None:
log.debug(f"Setting execution provider options to {provider_options}")
self._provider_options = provider_options
@property
def _provider_options_default(self) -> list[dict[str, Any]]:
provider_options = []
for provider in self.providers:
match provider:
case "CPUExecutionProvider":
options = {"arena_extend_strategy": "kSameAsRequested"}
case "CUDAExecutionProvider" | "ROCMExecutionProvider":
options = {"arena_extend_strategy": "kSameAsRequested", "device_id": settings.device_id}
case "OpenVINOExecutionProvider":
openvino_dir = self.model_path.parent / "openvino"
device = f"GPU.{settings.device_id}"
options = {
"device_type": device,
"precision": settings.openvino_precision.value,
"cache_dir": openvino_dir.as_posix(),
}
case "CoreMLExecutionProvider":
options = {
"ModelFormat": "MLProgram",
"MLComputeUnits": "ALL",
"SpecializationStrategy": "FastPrediction",
"AllowLowPrecisionAccumulationOnGPU": "1",
"ModelCacheDirectory": (self.model_path.parent / "coreml").as_posix(),
}
case _:
options = {}
provider_options.append(options)
return provider_options
@property
def sess_options(self) -> ort.SessionOptions:
return self._sess_options
@sess_options.setter
def sess_options(self, sess_options: ort.SessionOptions) -> None:
log.debug(f"Setting execution_mode to {sess_options.execution_mode.name}")
log.debug(f"Setting inter_op_num_threads to {sess_options.inter_op_num_threads}")
log.debug(f"Setting intra_op_num_threads to {sess_options.intra_op_num_threads}")
self._sess_options = sess_options
@property
def _sess_options_default(self) -> ort.SessionOptions:
sess_options = ort.SessionOptions()
sess_options.enable_cpu_mem_arena = settings.model_arena
# avoid thread contention between models
if settings.model_inter_op_threads > 0:
sess_options.inter_op_num_threads = settings.model_inter_op_threads
# these defaults work well for CPU, but bottleneck GPU
elif settings.model_inter_op_threads == 0 and self.providers == ["CPUExecutionProvider"]:
sess_options.inter_op_num_threads = 1
if settings.model_intra_op_threads > 0:
sess_options.intra_op_num_threads = settings.model_intra_op_threads
elif settings.model_intra_op_threads == 0 and self.providers == ["CPUExecutionProvider"]:
sess_options.intra_op_num_threads = 2
if sess_options.inter_op_num_threads > 1:
sess_options.execution_mode = ort.ExecutionMode.ORT_PARALLEL
return sess_options

View file

@ -0,0 +1,76 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, NamedTuple
import numpy as np
from numpy.typing import NDArray
from immich_ml.config import log, settings
from immich_ml.schemas import SessionNode
from .rknnpool import RknnPoolExecutor, is_available, soc_name
is_available = is_available and settings.rknn
model_prefix = Path("rknpu") / soc_name if is_available and soc_name is not None else None
def run_inference(rknn_lite: Any, input: list[NDArray[np.float32]]) -> list[NDArray[np.float32]]:
outputs: list[NDArray[np.float32]] = rknn_lite.inference(inputs=input, data_format="nchw")
return outputs
input_output_mapping: dict[str, dict[str, Any]] = {
"detection": {
"input": {"norm_tensor:0": (1, 3, 640, 640)},
"output": {
"norm_tensor:1": (12800, 1),
"norm_tensor:2": (3200, 1),
"norm_tensor:3": (800, 1),
"norm_tensor:4": (12800, 4),
"norm_tensor:5": (3200, 4),
"norm_tensor:6": (800, 4),
"norm_tensor:7": (12800, 10),
"norm_tensor:8": (3200, 10),
"norm_tensor:9": (800, 10),
},
},
"recognition": {"input": {"norm_tensor:0": (1, 3, 112, 112)}, "output": {"norm_tensor:1": (1, 512)}},
}
class RknnSession:
def __init__(self, model_path: Path) -> None:
self.model_type = "detection" if "detection" in model_path.parts else "recognition"
self.tpe = settings.rknn_threads
log.info(f"Loading RKNN model from {model_path} with {self.tpe} threads.")
self.rknnpool = RknnPoolExecutor(model_path=model_path.as_posix(), tpes=self.tpe, func=run_inference)
log.info(f"Loaded RKNN model from {model_path} with {self.tpe} threads.")
def get_inputs(self) -> list[SessionNode]:
return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["input"].items()]
def get_outputs(self) -> list[SessionNode]:
return [RknnNode(name=k, shape=v) for k, v in input_output_mapping[self.model_type]["output"].items()]
def run(
self,
output_names: list[str] | None,
input_feed: dict[str, NDArray[np.float32]] | dict[str, NDArray[np.int32]],
run_options: Any = None,
) -> list[NDArray[np.float32]]:
input_data: list[NDArray[np.float32]] = [np.ascontiguousarray(v) for v in input_feed.values()]
self.rknnpool.put(input_data)
res = self.rknnpool.get()
if res is None:
raise RuntimeError("RKNN inference failed!")
return res
class RknnNode(NamedTuple):
name: str | None
shape: tuple[int, ...]
__all__ = ["RknnSession", "RknnNode", "is_available", "soc_name", "model_prefix"]

View file

@ -0,0 +1,91 @@
# This code is from leafqycc/rknn-multi-threaded
# Following Apache License 2.0
import logging
from concurrent.futures import Future, ThreadPoolExecutor
from pathlib import Path
from queue import Queue
from typing import Callable
import numpy as np
from numpy.typing import NDArray
from immich_ml.config import log
from immich_ml.models.constants import RKNN_COREMASK_SUPPORTED_SOCS, RKNN_SUPPORTED_SOCS
def get_soc(device_tree_path: Path | str) -> str | None:
try:
with Path(device_tree_path).open() as f:
device_compatible_str = f.read()
for soc in RKNN_SUPPORTED_SOCS:
if soc in device_compatible_str:
return soc
log.warning("Device is not supported for RKNN")
except OSError as e:
log.warning(f"Could not read {device_tree_path}. Reason: %s", e)
return None
soc_name = None
is_available = False
try:
from rknnlite.api import RKNNLite
soc_name = get_soc("/proc/device-tree/compatible")
is_available = soc_name is not None
except ImportError:
log.debug("RKNN is not available")
def init_rknn(model_path: str) -> "RKNNLite":
if not is_available:
raise RuntimeError("rknn is not available!")
rknn_lite = RKNNLite()
rknn_lite.rknn_log.logger.setLevel(logging.ERROR)
ret = rknn_lite.load_rknn(model_path)
if ret != 0:
raise RuntimeError("Failed to load RKNN model")
if soc_name in RKNN_COREMASK_SUPPORTED_SOCS:
ret = rknn_lite.init_runtime(core_mask=RKNNLite.NPU_CORE_AUTO)
else:
ret = rknn_lite.init_runtime() # Please do not set this parameter on other platforms.
if ret != 0:
raise RuntimeError("Failed to initialize RKNN runtime environment")
return rknn_lite
class RknnPoolExecutor:
def __init__(
self,
model_path: str,
tpes: int,
func: Callable[["RKNNLite", list[NDArray[np.float32]]], list[NDArray[np.float32]]],
) -> None:
self.tpes = tpes
self.queue: Queue[Future[list[NDArray[np.float32]]]] = Queue()
self.rknn_pool = [init_rknn(model_path) for _ in range(tpes)]
self.pool = ThreadPoolExecutor(max_workers=tpes)
self.func = func
self.num = 0
def put(self, inputs: list[NDArray[np.float32]]) -> None:
self.queue.put(self.pool.submit(self.func, self.rknn_pool[self.num % self.tpes], inputs))
self.num += 1
def get(self) -> list[NDArray[np.float32]] | None:
if self.queue.empty():
return None
fut = self.queue.get()
return fut.result()
def release(self) -> None:
self.pool.shutdown()
for rknn_lite in self.rknn_pool:
rknn_lite.release()
def __del__(self) -> None:
self.release()

View file

@ -0,0 +1,81 @@
import json
from argparse import ArgumentParser
from io import BytesIO
from typing import Any
from locust import HttpUser, events, task
from locust.env import Environment
from PIL import Image
byte_image = BytesIO()
@events.init_command_line_parser.add_listener
def _(parser: ArgumentParser) -> None:
parser.add_argument("--clip-model", type=str, default="ViT-B-32::openai")
parser.add_argument("--face-model", type=str, default="buffalo_l")
parser.add_argument(
"--face-min-score",
type=int,
default=0.034,
help=(
"Returns all faces at or above this score. The default returns 1 face per request; "
"setting this to 0 blows up the number of faces to the thousands."
),
)
parser.add_argument("--image-size", type=int, default=1000)
@events.test_start.add_listener
def on_test_start(environment: Environment, **kwargs: Any) -> None:
global byte_image
assert environment.parsed_options is not None
image = Image.new("RGB", (environment.parsed_options.image_size, environment.parsed_options.image_size))
image.save(byte_image, format="jpeg")
class InferenceLoadTest(HttpUser):
abstract: bool = True
host = "http://127.0.0.1:3003"
data: bytes
# re-use the image across all instances in a process
def on_start(self) -> None:
self.data = byte_image.getvalue()
class CLIPTextFormDataLoadTest(InferenceLoadTest):
@task
def encode_text(self) -> None:
request = {"clip": {"textual": {"modelName": self.environment.parsed_options.clip_model}}}
data = [("entries", json.dumps(request)), ("text", "test search query")]
self.client.post("/predict", data=data)
class CLIPVisionFormDataLoadTest(InferenceLoadTest):
@task
def encode_image(self) -> None:
request = {"clip": {"visual": {"modelName": self.environment.parsed_options.clip_model, "options": {}}}}
data = [("entries", json.dumps(request))]
files = {"image": self.data}
self.client.post("/predict", data=data, files=files)
class RecognitionFormDataLoadTest(InferenceLoadTest):
@task
def recognize(self) -> None:
request = {
"facial-recognition": {
"recognition": {
"modelName": self.environment.parsed_options.face_model,
},
"detection": {
"modelName": self.environment.parsed_options.face_model,
"options": {"minScore": self.environment.parsed_options.face_min_score},
},
}
}
data = [("entries", json.dumps(request))]
files = {"image": self.data}
self.client.post("/predict", data=data, files=files)

View file

@ -0,0 +1,179 @@
commit 16839b58d9b3c3162a67ce5d776b36d4d24e801f
Author: mertalev <101130780+mertalev@users.noreply.github.com>
Date: Wed Mar 5 11:25:38 2025 -0500
disable algo caching (attributed to @dmnieto in https://github.com/microsoft/onnxruntime/pull/19567)
diff --git a/onnxruntime/core/providers/rocm/nn/conv.cc b/onnxruntime/core/providers/rocm/nn/conv.cc
index d7f47d07a8..4060a2af52 100644
--- a/onnxruntime/core/providers/rocm/nn/conv.cc
+++ b/onnxruntime/core/providers/rocm/nn/conv.cc
@@ -127,7 +127,6 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
if (w_dims_changed) {
s_.last_w_dims = gsl::make_span(w_dims);
- s_.cached_benchmark_fwd_results.clear();
}
ORT_RETURN_IF_ERROR(conv_attrs_.ValidateInputShape(X->Shape(), W->Shape(), channels_last, channels_last));
@@ -277,35 +276,6 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
HIP_CALL_THROW(hipMalloc(&s_.b_zero, malloc_size));
HIP_CALL_THROW(hipMemsetAsync(s_.b_zero, 0, malloc_size, Stream(context)));
}
-
- if (!s_.cached_benchmark_fwd_results.contains(x_dims_miopen)) {
- miopenConvAlgoPerf_t perf;
- int algo_count = 1;
- const ROCMExecutionProvider* rocm_ep = static_cast<const ROCMExecutionProvider*>(this->Info().GetExecutionProvider());
- static constexpr int num_algos = MIOPEN_CONVOLUTION_FWD_ALGO_COUNT;
- size_t max_ws_size = rocm_ep->GetMiopenConvUseMaxWorkspace() ? GetMaxWorkspaceSize(GetMiopenHandle(context), s_, kAllAlgos, num_algos, rocm_ep->GetDeviceId())
- : AlgoSearchWorkspaceSize;
- IAllocatorUniquePtr<void> algo_search_workspace = GetTransientScratchBuffer<void>(max_ws_size);
- MIOPEN_RETURN_IF_ERROR(miopenFindConvolutionForwardAlgorithm(
- GetMiopenHandle(context),
- s_.x_tensor,
- s_.x_data,
- s_.w_desc,
- s_.w_data,
- s_.conv_desc,
- s_.y_tensor,
- s_.y_data,
- 1, // requestedAlgoCount
- &algo_count, // returnedAlgoCount
- &perf,
- algo_search_workspace.get(),
- max_ws_size,
- false)); // Do not do exhaustive algo search.
- s_.cached_benchmark_fwd_results.insert(x_dims_miopen, {perf.fwd_algo, perf.memory});
- }
- const auto& perf = s_.cached_benchmark_fwd_results.at(x_dims_miopen);
- s_.fwd_algo = perf.fwd_algo;
- s_.workspace_bytes = perf.memory;
} else {
// set Y
s_.Y = context->Output(0, TensorShape(s_.y_dims));
@@ -319,6 +289,31 @@ Status Conv<T, NHWC>::UpdateState(OpKernelContext* context, bool bias_expected)
s_.y_data = reinterpret_cast<HipT*>(s_.Y->MutableData<T>());
}
}
+
+ miopenConvAlgoPerf_t perf;
+ int algo_count = 1;
+ const ROCMExecutionProvider* rocm_ep = static_cast<const ROCMExecutionProvider*>(this->Info().GetExecutionProvider());
+ static constexpr int num_algos = MIOPEN_CONVOLUTION_FWD_ALGO_COUNT;
+ size_t max_ws_size = rocm_ep->GetMiopenConvUseMaxWorkspace() ? GetMaxWorkspaceSize(GetMiopenHandle(context), s_, kAllAlgos, num_algos, rocm_ep->GetDeviceId())
+ : AlgoSearchWorkspaceSize;
+ IAllocatorUniquePtr<void> algo_search_workspace = GetTransientScratchBuffer<void>(max_ws_size);
+ MIOPEN_RETURN_IF_ERROR(miopenFindConvolutionForwardAlgorithm(
+ GetMiopenHandle(context),
+ s_.x_tensor,
+ s_.x_data,
+ s_.w_desc,
+ s_.w_data,
+ s_.conv_desc,
+ s_.y_tensor,
+ s_.y_data,
+ 1, // requestedAlgoCount
+ &algo_count, // returnedAlgoCount
+ &perf,
+ algo_search_workspace.get(),
+ max_ws_size,
+ false)); // Do not do exhaustive algo search.
+ s_.fwd_algo = perf.fwd_algo;
+ s_.workspace_bytes = perf.memory;
return Status::OK();
}
diff --git a/onnxruntime/core/providers/rocm/nn/conv.h b/onnxruntime/core/providers/rocm/nn/conv.h
index bc9846203e..d54218f258 100644
--- a/onnxruntime/core/providers/rocm/nn/conv.h
+++ b/onnxruntime/core/providers/rocm/nn/conv.h
@@ -108,9 +108,6 @@ class lru_unordered_map {
list_type lru_list_;
};
-// cached miopen descriptors
-constexpr size_t MAX_CACHED_ALGO_PERF_RESULTS = 10000;
-
template <typename AlgoPerfType>
struct MiopenConvState {
// if x/w dims changed, update algo and miopenTensors
@@ -148,9 +145,6 @@ struct MiopenConvState {
decltype(AlgoPerfType().memory) memory;
};
- lru_unordered_map<TensorShapeVector, PerfFwdResultParams, vector_hash> cached_benchmark_fwd_results{MAX_CACHED_ALGO_PERF_RESULTS};
- lru_unordered_map<TensorShapeVector, PerfBwdResultParams, vector_hash> cached_benchmark_bwd_results{MAX_CACHED_ALGO_PERF_RESULTS};
-
// Some properties needed to support asymmetric padded Conv nodes
bool post_slicing_required;
TensorShapeVector slice_starts;
diff --git a/onnxruntime/core/providers/rocm/nn/conv_transpose.cc b/onnxruntime/core/providers/rocm/nn/conv_transpose.cc
index 7447113fdf..a662e35b2e 100644
--- a/onnxruntime/core/providers/rocm/nn/conv_transpose.cc
+++ b/onnxruntime/core/providers/rocm/nn/conv_transpose.cc
@@ -76,7 +76,6 @@ Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dy
if (w_dims_changed) {
s_.last_w_dims = gsl::make_span(w_dims);
- s_.cached_benchmark_bwd_results.clear();
}
ConvTransposeAttributes::Prepare p;
@@ -126,35 +125,29 @@ Status ConvTranspose<T, NHWC>::DoConvTranspose(OpKernelContext* context, bool dy
}
y_data = reinterpret_cast<HipT*>(p.Y->MutableData<T>());
-
- if (!s_.cached_benchmark_bwd_results.contains(x_dims)) {
- IAllocatorUniquePtr<void> algo_search_workspace = GetScratchBuffer<void>(AlgoSearchWorkspaceSize, context->GetComputeStream());
-
- miopenConvAlgoPerf_t perf;
- int algo_count = 1;
- MIOPEN_RETURN_IF_ERROR(miopenFindConvolutionBackwardDataAlgorithm(
- GetMiopenHandle(context),
- s_.x_tensor,
- x_data,
- s_.w_desc,
- w_data,
- s_.conv_desc,
- s_.y_tensor,
- y_data,
- 1,
- &algo_count,
- &perf,
- algo_search_workspace.get(),
- AlgoSearchWorkspaceSize,
- false));
- s_.cached_benchmark_bwd_results.insert(x_dims, {perf.bwd_data_algo, perf.memory});
- }
-
- const auto& perf = s_.cached_benchmark_bwd_results.at(x_dims);
- s_.bwd_data_algo = perf.bwd_data_algo;
- s_.workspace_bytes = perf.memory;
}
+ IAllocatorUniquePtr<void> algo_search_workspace = GetScratchBuffer<void>(AlgoSearchWorkspaceSize, context->GetComputeStream());
+ miopenConvAlgoPerf_t perf;
+ int algo_count = 1;
+ MIOPEN_RETURN_IF_ERROR(miopenFindConvolutionBackwardDataAlgorithm(
+ GetMiopenHandle(context),
+ s_.x_tensor,
+ x_data,
+ s_.w_desc,
+ w_data,
+ s_.conv_desc,
+ s_.y_tensor,
+ y_data,
+ 1,
+ &algo_count,
+ &perf,
+ algo_search_workspace.get(),
+ AlgoSearchWorkspaceSize,
+ false));
+ s_.bwd_data_algo = perf.bwd_data_algo;
+ s_.workspace_bytes = perf.memory;
+
// The following block will be executed in case there has been no change in the shapes of the
// input and the filter compared to the previous run
if (!y_data) {

View file

@ -0,0 +1,33 @@
diff --git a/dockerfiles/scripts/install_common_deps.sh b/dockerfiles/scripts/install_common_deps.sh
index bbb672a99e..0dc652fbda 100644
--- a/dockerfiles/scripts/install_common_deps.sh
+++ b/dockerfiles/scripts/install_common_deps.sh
@@ -8,16 +8,23 @@ apt-get update && apt-get install -y --no-install-recommends \
curl \
libcurl4-openssl-dev \
libssl-dev \
- python3-dev
+ python3-dev \
+ ccache
# Dependencies: conda
-wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-4.5.11-Linux-x86_64.sh -O ~/miniconda.sh --no-check-certificate && /bin/bash ~/miniconda.sh -b -p /opt/miniconda
+wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-py312_25.9.1-1-Linux-x86_64.sh -O ~/miniconda.sh && /bin/bash ~/miniconda.sh -b -p /opt/miniconda
rm ~/miniconda.sh
/opt/miniconda/bin/conda clean -ya
-pip install numpy
-pip install packaging
-pip install "wheel>=0.35.1"
+# Dependencies: venv and packages
+/opt/miniconda/bin/python3 -m venv /opt/rocm-venv
+/opt/rocm-venv/bin/pip install --no-cache-dir --upgrade pip
+/opt/rocm-venv/bin/pip install --no-cache-dir \
+ "numpy==2.3.4" \
+ "packaging==25.0" \
+ "wheel==0.45.1" \
+ "setuptools==80.9.0"
+
rm -rf /opt/miniconda/pkgs
# Dependencies: cmake

View file

@ -0,0 +1,101 @@
[project]
name = "immich-ml"
version = "2.5.2"
description = ""
authors = [{ name = "Hau Tran", email = "alex.tran1502@gmail.com" }]
requires-python = ">=3.11,<4.0"
readme = "README.md"
dependencies = [
"aiocache>=0.12.1,<1.0",
"fastapi>=0.95.2,<1.0",
"ftfy>=6.1.1",
"gunicorn>=21.1.0",
"huggingface-hub>=0.20.1,<1.0",
"insightface>=0.7.3,<1.0",
"numpy>=2.3.4",
"opencv-python-headless>=4.7.0.72,<5.0",
"orjson>=3.9.5",
"pillow>=9.5.0,<11.0",
"pydantic>=2.0.0,<3",
"pydantic-settings>=2.5.2,<3",
"python-multipart>=0.0.6,<1.0",
"rich>=13.4.2",
"tokenizers>=0.15.0,<1.0",
"uvicorn[standard]>=0.22.0,<1.0",
"rapidocr>=3.1.0",
]
[dependency-groups]
test = [
"httpx>=0.24.1",
"pytest>=7.3.1",
"pytest-asyncio>=0.21.0",
"pytest-cov>=4.1.0",
"pytest-mock>=3.11.1",
]
types = [
"types-pyyaml>=6.0.12.20241230",
"types-requests>=2.32.0.20250306",
"types-setuptools>=75.8.2.20250305",
"types-simplejson>=3.20.0.20250218",
"types-ujson>=5.10.0.20240515",
]
lint = [
"black>=23.3.0",
"mypy>=1.3.0",
"ruff>=0.0.272",
{ include-group = "types" },
]
dev = ["locust>=2.15.1", { include-group = "test" }, { include-group = "lint" }]
[project.optional-dependencies]
cpu = ["onnxruntime>=1.23.2,<2"]
cuda = ["onnxruntime-gpu>=1.23.2,<2"]
openvino = ["onnxruntime-openvino>=1.23.0,<2"]
armnn = ["onnxruntime>=1.23.2,<2"]
rknn = ["onnxruntime>=1.23.2,<2", "rknn-toolkit-lite2>=2.3.0,<3"]
rocm = []
[tool.uv]
compile-bytecode = true
[tool.hatch.build.targets.sdist]
include = ["immich_ml"]
[tool.hatch.build.targets.wheel]
include = ["immich_ml"]
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[tool.mypy]
python_version = "3.11"
plugins = "pydantic.mypy"
follow_imports = "silent"
warn_redundant_casts = true
disallow_any_generics = true
check_untyped_defs = true
disallow_untyped_defs = true
ignore_missing_imports = true
[tool.pydantic-mypy]
init_forbid_extra = true
init_typed = true
warn_required_dynamic_aliases = true
warn_untyped_fields = true
[tool.ruff]
line-length = 120
target-version = "py311"
[tool.ruff.lint]
select = ["E", "F", "I"]
per-file-ignores = { "test_main.py" = ["F403"] }
[tool.black]
line-length = 120
target-version = ['py311']
[tool.pytest.ini_options]
markers = ["providers", "ov_device_ids"]

View file

@ -0,0 +1,329 @@
{
"clip": {
"image": [
-0.013126737, -0.022995953, -0.0493738, -0.0063057775, 0.013601424,
-0.003761688, -0.03379882, 0.11106285, 0.024760082, 0.023903701,
0.04418207, -0.013594999, 0.030850016, 0.0012876489, -0.012471005,
0.009750715, 0.0095717255, 0.013320666, 0.0027921356, 0.03240264,
0.033538498, 0.013624318, -0.0069946186, -0.0036184592, -0.009846507,
-0.017311024, -0.036686428, -0.0041808123, 0.030871637, -0.028624479,
-0.016515259, 0.014418001, -0.024542322, -0.0025438748, -0.049111884,
-0.023928944, 0.012270045, -0.016418075, 0.004895335, -0.15801854,
-0.026325515, 0.03166467, -0.017224329, 0.0411128, -0.022944424,
0.015693054, -0.020919899, -0.010764121, -0.008499815, -0.020263294,
-0.009743323, -0.035395622, 0.03474742, 0.003049183, 0.009424564,
0.010707678, 0.01664117, -0.0059027374, -0.013055344, 0.0018035833,
-0.003976456, -0.04325922, 0.014407317, 0.035728276, 0.002226939,
-0.006235411, -0.0073032235, -0.035129357, 0.001095443, -0.028552389,
-0.044300288, -0.012959393, 0.02259977, 0.017141517, -0.029432472,
-0.017583484, 0.010974336, 0.018141218, 0.0015389329, -0.008220305,
-0.0060965014, 0.004929384, 0.019477025, -0.033071984, 0.025183259,
0.013607688, 0.01836233, 0.04586782, 0.0103442725, -0.036077496,
-0.029715508, 0.007203621, -0.7949153, 0.046866275, 0.026910711,
-0.0047834567, 0.033243995, 0.009379981, -0.03749048, -0.055274535,
-0.01955359, 0.012887587, 0.00922838, -0.0032776103, -0.011456734,
0.0045412215, -0.11506394, 0.0348558, 0.029478835, -0.011811103,
-0.00483158, -0.010586126, -0.018853206, -0.01591496, -0.019360982,
-0.03211199, -0.013473663, -0.019630248, -0.017012835, 0.059128772,
-0.03396129, 0.0045991736, -0.015158291, 0.008241974, 0.004403056,
-0.007536049, -0.023821214, -0.0059521003, 0.015564905, 0.020600233,
0.008175, 0.02100119, -0.0034459834, 0.1058016, 0.008383205, 0.03100292,
-0.023814196, -0.016157096, -0.008210107, -0.004146204, 0.016350364,
-0.056028433, 0.013261071, 0.034839876, -0.03236049, 0.026573967,
-0.018140865, 0.018515658, 0.013251766, 0.007693613, 0.0067239976,
-0.0013857568, 0.038114607, 0.0068016117, 0.036603037, 0.0040935865,
0.010394745, -0.00082285365, -0.009811308, 0.020343611, -0.012164189,
-0.012208623, 0.0005465415, -0.015394064, 0.02499845, 0.021941017,
-0.016571017, -0.011810332, 0.017864, -0.010639794, -0.008609091,
-0.0007239709, 0.015229945, -0.0035874692, 0.018922612, -0.011209458,
-0.013052865, -0.009626533, -0.004419959, 0.007915186, 0.01094836,
0.005509159, -0.0034862005, 0.01012292, -0.0059307595, -0.029599648,
0.032845, -0.007011692, -0.014218981, 0.00790071, 0.017027052,
-0.022314077, -0.03041719, 0.015665755, 0.036747217, -0.018942915,
0.008623111, 0.02179961, -0.022312569, 0.007024427, 0.016751591,
-0.0034192575, 0.024101255, -0.0046198783, 0.022274924, -0.015562676,
-0.0092551885, -0.0063787713, 0.045996074, 0.026235346, 0.009622556,
0.05728027, 0.03168525, -0.017600676, 0.029278612, 0.01467962,
0.032169178, 0.022459356, -0.012175933, -0.009438608, 0.027234165,
0.013514767, -0.008831029, 0.010888894, 0.004518216, 0.009855367,
0.012112431, -0.0073178695, 0.0072642234, 4.877679e-5, -0.01221576,
0.023542404, -0.009026452, -0.055442516, 0.006579068, 0.033202186,
-0.007669379, 0.0010604112, -0.04271919, -0.029112164, 0.021844024,
0.029739635, -0.026083348, 0.008940292, -0.039301652, -0.047215454,
0.0018794702, -0.008740231, 0.029195482, 0.0037629968, -0.024923965,
-0.021407248, 0.009952853, -0.0055059143, 0.0044912454, 0.016966008,
-0.00081178773, -0.022250004, -0.014063889, -0.006170697, -0.0008208651,
-0.036218595, -0.0029040456, 0.03943083, -0.021814227, 0.017567957,
0.035849728, -0.049075987, 0.0040634805, 0.009878297, 0.028557112,
0.02336673, 0.010714448, 0.020129073, -0.030503238, 0.009350441,
0.039086528, -0.0037483997, -0.0034365985, 0.019824414, 0.014027232,
0.030565958, 0.0036307913, 0.0030920429, -0.009908996, 0.0027933475,
-7.140754e-5, -0.027733125, 0.0022445584, -0.032248124, 0.050226185,
0.030529078, -0.040353864, 0.031086015, -0.0063569676, 0.031343475,
-0.020244656, -0.011442288, 0.018035123, -0.005479394, 0.01783419,
-0.036066547, -0.0106600635, 0.044636995, -0.030209303, -0.07192714,
0.0128155155, 0.003505818, -0.0005725083, -0.01584388, -0.025725754,
0.025868604, 0.10576061, -0.012738124, 0.0012224225, 0.0472961,
0.021650923, 0.0061313445, 0.014010678, 0.016864019, 0.004049639,
0.10989465, 0.011927816, 0.013589654, 0.011258818, 0.022496557,
-0.018828733, 0.021635532, 0.0116777215, 0.11320542, -0.0011280471,
0.018990291, 0.001824643, -0.03793715, 0.0206918, -0.0050228164,
-0.013865701, 0.022277884, 0.019400347, 0.028610364, -0.023974052,
0.0030309444, 0.027177742, 0.024541136, 0.023737634, 0.0012539584,
0.0187086, -0.015451178, 0.015066189, -0.019812824, 0.050507285,
-0.0021846944, 0.041644007, -0.0070109894, -0.014599777, -0.05985813,
-0.036328156, -0.02293525, -0.0065515027, 0.016792618, -0.0059018973,
-0.008319917, 0.008072106, 0.0073447954, -0.052924518, -0.037344936,
0.015524772, -0.0012835241, 0.014405327, 0.0057144985, 0.004945561,
-0.024654018, 0.011967616, 0.01832056, 0.019411784, 0.019788045,
-0.0006405928, -0.0015148119, -0.05064218, -0.031875107, -0.03803604,
-0.0096240705, 0.012371131, -0.019090319, 0.0075365147, -0.024229601,
0.014469528, -0.004786435, 0.0011314518, 0.009256282, -0.04957284,
-0.0068631344, -0.010091242, -0.023295002, 0.03268865, 0.022269772,
0.037733294, -0.015292435, -0.06330943, -0.00854154, 0.0027765913,
0.0015374947, 0.0377278, 0.008772586, -0.01810512, -0.0025668603,
0.014428339, 0.0027927365, 0.07493676, -0.022829408, -0.028912589,
0.008928177, 0.011323267, 0.008405796, 0.016925976, 0.001739356,
-0.021090876, -0.0062678503, 0.010898773, -0.010470923, 0.015523946,
-0.027888289, 0.023872118, -0.048326068, 0.025968319, 0.0047795917,
-0.016123952, 0.00698612, -0.05154045, -0.003691712, -0.0101406425,
-0.0241034, 0.004006022, 0.0021649078, 0.0019942294, -0.009274028,
-0.006467623, -0.0010948133, -0.012350769, -0.0060371486, -0.0006392645,
0.031422533, 0.015165475, -0.012650007, -0.005918423, 0.005781174,
-0.023262534, -0.0043034274, -0.010881872, -0.015937665, -0.0043740096,
-0.02981798, -0.0037422387, -0.029688178, 0.022320364, -0.0014900378,
-0.026122924, -0.04360404, 0.016354023, -0.02447563, 0.0205314,
0.0042775236, -0.020184014, -0.0017819501, 0.036122557, 0.0036566693,
0.07459051, -0.0035548757, 0.004874807, -0.028627345, -0.023153499,
0.03710664, -0.000639956, -0.030509725, -0.005146651, -0.010251552,
0.028408762, -0.008056198, -0.018420909, 0.02850364, -0.0075958185,
-0.008918139, 0.002778187, 0.06220242, -0.016280292, -0.026200369,
0.05900717, -0.013802131, 0.005442568, -0.033114687, 0.010976371,
0.008192846, 0.0031891295, 0.024811232, 0.009066575, -0.026441244,
0.030676885, -0.014591597, -0.024314625, -0.037472498, -0.015021544,
-0.016501956, -0.0069196, 0.013831272, 0.056646723, 0.007946148,
-0.002477574, -0.030496774, -0.011770325, 0.06742689, -0.03180974,
-0.025615396
],
"text": [
-0.0040579583, -0.00084722764, -0.008696951, -0.006850008, -0.010870523,
0.014495447, -0.010678498, -0.09618138, 0.016697474, -0.014809047,
-0.0035991871, 0.020752821, 0.0020757387, 0.0018064519, 0.02969283,
-0.0040159826, 0.02335311, 0.015918557, -0.0015919582, 0.013545261,
0.011341818, -0.006991808, 0.0020565446, -0.016662853, -0.0064206184,
0.011536576, -0.01144098, 0.015054818, -0.013258694, 0.0046747606,
-0.00681864, -0.012852865, 0.012708946, 0.006093663, 0.0029938417,
0.015458671, 0.0040865405, -0.0004354532, 0.0037405093, -0.015085074,
0.0007998808, -0.021485215, -0.0066235093, 0.015721628, -0.002462181,
-0.0049815965, -0.011028703, 0.0041498104, -0.00070322485, 0.0031991813,
-0.0075132507, -0.008273014, 0.0125206, 0.019671565, 0.02124969,
0.0076838327, 0.0015366874, 0.0004413452, -0.0027475145, 0.031049952,
0.01782742, -0.01759819, 0.0040917504, -0.011803108, 0.0051114787,
-0.0075210207, -0.0062834355, 0.010283767, 0.02023746, -0.020258851,
0.004795256, -0.011313993, -0.014636256, 0.004900588, -0.0026439666,
-0.0004062344, -0.040196564, -0.014539185, 0.014600707, 0.004162044,
-0.0155277, -0.016300475, 0.0039491425, 0.022403471, 0.0055926195,
0.03543051, 0.01047029, -0.03262415, 0.02942466, -0.011440199,
-0.012713757, 0.0062652803, -0.15837249, 0.028388312, 7.452041e-5,
0.007136255, -0.010753105, 0.016393846, 0.004432782, -0.010688704,
0.015814407, 0.01654759, 0.0008900756, -0.007162077, -6.0264443e-5,
6.894444e-5, -0.034889467, -0.026710762, 0.005202752, -0.012675916,
0.010903986, 0.017916093, 0.005404525, -0.003624909, -0.012261727,
-0.021104869, -0.01593513, -0.009227664, -0.022844192, 0.008606035,
0.0013098373, 0.00637147, 0.0027185818, -0.0032128745, 0.018255865,
-0.002964337, -0.006976183, -0.0063263937, 0.0075582284, 0.014295236,
0.00485664, -0.029005948, -0.014672015, 0.61821824, -0.010903263,
0.022125997, -0.018154604, -0.007414954, -0.012344926, 0.0029751004,
-0.010787629, -0.0056861844, -0.025484746, -0.0004887071, -0.036681578,
0.010114145, -0.009012449, -0.00048479583, -0.011162415, -0.0057421126,
-0.0019520421, -0.0013580753, 0.0037870558, 0.0012404326, -0.00089634134,
-0.022112457, 0.0034537334, 0.014985147, -0.010455136, 0.018100852,
-0.010999219, -0.027524924, -0.009551776, -0.0047603208, -0.001092369,
0.008849578, 0.021949856, -0.034437556, -0.0051499153, -0.0006772509,
-0.011200381, -0.009206776, 0.021897016, -0.0013931778, 0.0041396013,
-0.025534542, 0.0074160174, 0.00039215147, 0.025992293, 0.0069832364,
-0.0175616, 0.01272807, -0.020147255, 0.02455081, -0.01236127,
-0.011840565, 0.011820177, 0.018173985, 0.017230362, -0.016969377,
0.0010091222, -0.04185319, -0.030467693, -0.012564729, 0.030740628,
-0.004086395, 0.0013223978, -0.0013041743, -0.01975558, -0.014959637,
0.033446018, -0.014724724, -0.028613493, 0.010436393, -0.009841343,
-0.013723956, -0.0010025625, 0.016992576, 0.0056477, -0.026704773,
0.018927934, 0.013758461, 0.016924908, -0.026889605, -0.01496036,
0.02078507, -0.0149594685, -0.021289647, 0.027369255, -0.00205557,
0.0129268635, -0.014446633, 0.0039108247, 0.014774828, 0.004396043,
0.0038431762, 0.012223014, 0.0061016707, -0.006525442, -0.018426975,
0.03081795, 0.024269402, -0.020132616, -0.008118887, 0.025062446,
-0.0033954307, -0.019662865, -0.0032548332, -0.008575233, -0.003158561,
0.0012930515, 0.02213235, 0.017646195, -0.016638828, -0.0154889,
0.031743307, 0.001081875, -0.0019133464, 0.034760594, -0.008569126,
-0.019119555, 0.020908207, -0.0047135833, 0.00984879, -0.016712308,
0.028532412, 0.0038664932, -0.0071539935, 0.0013488994, 0.0060503725,
0.0021401793, -0.032594826, 0.010918716, 0.0075080344, 0.00020341178,
-0.030393362, 0.014375046, 0.018798219, -0.0040685013, 0.020957684,
-0.012454064, 0.014856742, 0.0017268835, -0.008762698, 0.007062434,
0.024501909, 0.0011791736, -0.023002177, -0.012701125, -0.0053904364,
0.015551624, 0.018748082, 0.00704452, 0.0047835982, 0.0013530678,
0.0033350172, 0.0056562345, 0.009079597, 0.0059383595, 0.011405316,
-0.004795079, 0.007274586, -0.0011659514, -0.0001364172, -0.0050517535,
-0.010681983, -0.023743946, -0.020241234, 0.0009631201, 0.014073974,
-0.00665422, 0.011845411, -0.0001289105, -0.024006248, -0.0009306585,
0.0139923245, 0.020409467, -0.017118154, -0.0151973255, -0.016737074,
-0.002157259, -0.0060298163, 0.61768156, 0.01275426, -0.023746304,
-0.010786622, -0.00050265377, -0.010761652, -0.013012264, 0.013700237,
0.019240098, 0.049388826, -0.007853694, -0.014966961, -0.026477624,
0.02809707, 0.009291939, -0.028884491, 0.015102742, -0.27225503,
0.01782282, -0.016989257, -0.0051341387, 0.0037056766, -0.004537146,
-0.026184445, 0.020256622, 0.0073146136, -0.0070027146, -0.009025792,
-0.015298917, -0.0052380697, -0.0005596046, -0.0041900063, 0.015934054,
0.008158574, -0.0038807616, -0.0048019756, 0.0061978237, 0.022159556,
-0.02619826, 0.0013973896, -0.00012341494, 0.030957809, -0.009596324,
0.008263321, 0.0017040323, -0.0010236687, 0.017982712, -0.012567677,
0.007361281, 0.0028631007, 0.032613713, 0.035072606, -0.045417674,
0.016303446, -0.009096281, 0.012163677, -0.008316459, 0.006423764,
-0.008586175, -0.0009862242, 0.009973197, 0.020825483, -0.005682246,
0.0066081304, 0.0061441967, -0.00670868, -0.024878936, -0.024288971,
-0.009822955, -0.011659227, -0.0067634145, -0.0011930552, 0.017096667,
0.01974797, -0.020388834, -0.008245143, -0.0071634515, 0.0012492571,
-0.010288493, -0.0025248309, -0.0039965925, 0.037344053, -0.019459987,
0.022098366, -0.021084892, -0.014823354, -0.010007409, -0.005560381,
-0.012292843, 0.0132691385, 0.0066421456, 0.0045196814, 0.0044144704,
0.0062646614, 0.0050272197, 0.020296281, 0.011412983, -0.0040745772,
0.00542041, 0.0021500897, 0.005183101, 0.00985178, 0.014477596,
-0.0131016085, -0.0064126155, -0.004809687, -0.016441243, 0.010445765,
0.0013761928, -0.0135576585, 0.0003352349, -0.010797083, -0.0058007324,
0.021649584, 0.012650062, -0.009740497, -0.025809184, -0.026720846,
0.029149767, 0.014593344, -0.0134959705, -0.004710099, -0.0062580137,
-0.0047687683, -0.029818097, -0.004622532, 0.02532894, 0.0051457905,
-0.0046252706, -0.02905562, -0.019097809, -0.035888474, -0.006897086,
-0.0035953831, -0.0013759647, 0.0027531807, -0.002395984, -0.040570017,
-0.02462688, 0.009387292, 0.0025142033, 0.02404064, -0.0014443685,
1.0727288e-5, -0.024033979, 0.011659959, -0.016820917, 0.018782362,
-0.019061793, 0.0043488434, 0.00040266776, -0.0022744886, 0.0024185092,
-0.0041366024, 0.0028075825, -0.0085624885, -0.012087987, 0.013551666,
0.0019014167, 0.007896904, 0.031102024, 0.0091334, -0.0030707342,
0.0066130627, -0.002711352, -0.012097188, 0.017067473, 0.021030908,
0.0014250687, -0.092848144, -0.0034704215, 0.013624546, 0.013779425,
-0.0025326884, -0.0018633928, 0.00014903376, 0.01547092, 0.008385425,
-0.0033495796, -0.015248458, -0.0356735, 0.005223496, -0.018293105,
-0.043073945, 0.016345823, -0.0050947615, 0.023554962, 0.034400985,
0.0045644785, -0.00011241743, 0.0060564913, 0.0021182992, 0.01914424,
0.019295372, -0.00551076, -0.00017086207, 0.0032044165, 0.010140755,
-0.022354674, 0.026089797
]
},
"facial-recognition": [
{
"boundingBox": {
"x1": 690.0,
"y1": -89.0,
"x2": 833.0,
"y2": 96.0
},
"score": 0.03575617074966431,
"embedding": [
-0.43665668, -0.59305364, -0.12699714, 0.3985032, 0.1878969,
-0.25987914, 0.14818184, -0.542229, -0.06710237, -0.1319032,
0.056408346, 0.046093762, -0.14984925, 0.043225512, 0.023826078,
-0.09063442, 0.07891726, -0.29357076, -0.6277133, -0.29042292,
0.18038993, 0.21837695, 0.17909442, -0.040304773, -0.035560638,
-0.07568607, 0.1277122, -0.13466191, -0.2368693, 0.3642968, 0.29558533,
0.20867407, 0.11252518, 0.47691494, -0.054775044, 0.030100197,
-0.049531147, 0.04045874, 0.23517768, 0.17130391, 0.17269331,
0.08591308, 0.046999797, -0.17151847, -0.2443775, 0.3110528,
-0.23971468, -0.31744513, -0.026422635, -0.26203394, -0.18553479,
-0.31044272, 0.6385251, 0.27497086, 0.006674953, 0.053785797,
-0.20257844, -0.48399794, 0.21708605, -0.4781224, -0.12367296,
-0.099010885, 0.18633766, 0.31143454, -0.12165704, 0.13010044,
0.12534627, 0.107288495, 0.37471777, -0.123026475, -0.1263274,
-0.15621608, 0.26027548, 0.15841314, 0.5164254, -0.31015784, 0.24754328,
0.10240883, -0.1181829, -0.14073256, 0.027111322, 0.09927598,
-0.10066943, 0.4808423, -0.042361684, -0.08512197, 0.13695274,
0.30378994, 0.11138052, -0.318214, -0.5708592, -0.14786953, 0.49985552,
-0.23231967, 0.13856675, -0.5383139, -0.059954256, 0.2796868,
-0.32447946, 0.16510965, 0.57146084, -0.15120608, 0.20110571,
-0.49805385, -0.2008879, -0.046678245, 0.24653266, 0.022508677,
-0.14091778, 0.38075653, 0.33811444, 0.05011098, -0.2371835,
-0.20052075, -0.14081016, -0.3422103, 0.11998144, 0.24423985,
0.13769919, -0.25340003, -0.41080874, -0.28673622, -0.20673269,
0.4604351, 0.4178845, 0.105202496, -0.1446912, 0.0807363, -0.37372503,
0.13030809, -0.08456054, 0.21937889, -0.22700784, 0.3039499,
0.009784861, -0.07245704, 0.50291365, -0.24968931, 0.3178813,
0.12665558, -0.036484346, 0.21702805, -0.09277919, 0.17766781,
-0.12018812, 0.008044228, -0.26986086, 0.29888278, -0.28485933,
0.30066437, -0.14316985, 0.53800535, 0.030840248, 0.023039162,
0.73862207, 0.0034680888, 0.23797399, -0.11183337, 0.067846656,
-0.23546576, 0.39354736, 0.0053778216, 0.13494004, 0.1370637,
-0.029445097, 0.14705376, -0.48120612, 0.27262342, -0.05196667,
-0.3097266, 0.08714986, 0.10841283, -0.11757159, -0.5010461,
-0.32369986, -0.21964747, -0.19810468, 0.14780998, -0.04624281,
0.24638015, -0.06710279, -0.31719172, 0.26955876, 0.37117082,
-0.3964724, 0.21541706, -0.12243534, -0.5392555, 0.04640211, 0.3657012,
-0.042127043, -0.030638859, 0.21909437, 0.16005577, -0.03320134,
-0.0949998, 0.33176076, 0.22538322, -0.016216129, -0.42417043,
0.52940613, -0.011592716, -0.21875188, -0.06394625, 0.24449442,
-0.05658462, -0.09727913, -0.3978734, -0.11175068, 0.085142605,
-0.057618782, -0.0498557, 0.17287247, 0.41813853, -0.30433404,
0.3087585, -0.6604493, -0.13869359, 0.072916515, -0.043251924,
0.37401634, 0.17014223, -0.26469553, -0.34653437, 0.13010754,
0.21517499, 0.74030113, 0.3460628, -0.5115478, 0.4696753, -0.009848075,
-0.1330159, -0.0061842054, 0.013667986, 0.16993025, -0.3161455,
0.29015008, 0.65197945, 0.13776428, 0.5275149, 0.1472181, -0.114682674,
-0.05685012, 0.21696919, -0.34107065, 0.09352806, -0.03968816,
-0.13109599, 0.07406853, 0.15091223, 0.18835881, 0.19146737, -0.3898828,
0.469747, -0.11145213, 0.039727956, 0.8268787, -0.09761663,
-0.043320894, 0.27001414, 0.12079324, 0.05877747, 0.028245524,
0.20692128, 0.68440485, -0.34984088, -0.119763374, -0.39637753,
0.23799005, 0.057573274, 0.07855352, 0.37982583, -0.0365879,
0.068318695, 0.10845077, -0.18650186, 0.08927679, -0.27789003,
0.31810492, 0.4251458, -0.03525705, -0.28072172, 0.07316002, 0.13499324,
-0.11333761, -0.0008841604, 0.10874095, 0.29681873, 0.008288942,
0.24116173, 0.011309357, -0.3009541, -0.4752865, 0.19921738,
-0.16108191, 0.017838746, 0.51260126, -0.086799264, 0.34165853,
0.32359147, 0.25770876, 0.21442738, -0.15971375, -0.26682994,
0.22788364, -0.38956794, 0.084580205, -0.15929273, 0.24211408,
-0.24793725, -0.31528267, 0.15945697, -0.16866091, 0.19472758, 0.408394,
0.24238603, -0.23643477, 0.29852632, 0.12915722, 0.327068, 0.501809,
-0.40538347, -0.023235738, -0.11315605, 0.007632144, -0.22626217,
0.28817925, -0.5816528, 0.1551521, -0.016097836, -0.01634605,
0.095855944, -0.010664792, 0.1402924, -0.22450349, -0.13961065,
-0.40732136, -0.24776831, 0.12040292, -0.06779129, 0.44510496,
0.33206633, 0.19807269, -0.06460787, -0.2524265, -0.12726343,
0.44656014, -0.09844789, -0.18762295, 0.16189753, 0.23589599,
-0.44798508, 0.2135099, -0.33205217, 0.28407755, -0.0951985,
0.035582896, -0.51807857, -0.27382392, 0.03172898, 0.22928514,
0.47157723, -0.48383215, 0.014225766, -0.08102345, 0.19384615,
-0.060681015, -0.037799604, -0.2875836, 0.024652202, 0.052712113,
-0.22610298, 0.46830428, 0.29616976, 0.14641494, -0.24234764,
0.30126396, -0.011165038, 0.38622355, -0.12484505, 0.33650652,
0.17399745, -0.2703057, 0.36919123, 0.26170117, -0.1537327, -0.43157104,
0.35697, 0.043892622, -0.065475196, 0.5542902, 0.019970104, 0.43128124,
-0.014292087, -0.33983213, 0.3250854, 0.21585244, -0.34458104,
0.23752448, 0.18115376, -0.2586738, -0.16033548, -0.16151018,
-0.23306333, 0.14865296, -0.31790328, 0.27215546, -0.059920013,
0.16193654, 0.075943366, -0.16281635, 0.4489306, -0.43052202,
-0.038787995, -0.11722573, 0.07254093, -0.2997051, 0.16540596,
-0.15089649, 0.12507877, 0.43725327, 0.13540109, 0.13391787,
0.013777234, 0.26951605, -0.2999856, -0.08645636, 0.12768297,
0.23375636, -0.07325045, -0.04433371, 0.04709586, 0.09582621,
0.23509142, 0.18061984, 0.35379466, 0.12938409, 0.33010754, 0.18966632,
0.07585195, 0.0059688687, -0.13233723, 0.17105722, -0.020040989,
-0.2805646, -0.091034755, 0.1950869, -0.21115655, -0.16249251,
0.07147664, -0.20138165, 0.15193966, 0.041464765, 0.01074836,
0.029091328, -0.22078216, 0.06446775, -0.27403125, -0.51904315,
-0.20539844, 0.176225, -0.28688902, 0.030568387, 0.2964594,
-0.088931546, -0.4425866, 0.09070322, 0.08005672, 0.009866249,
-0.07386999, 0.06683251, -0.34370828, 0.23668535, -0.0847823,
-0.27400133, -0.31668398, -0.116622224, 0.20027944, 0.33772525,
-0.3041445, -0.61801887, 0.043022886, -0.24733649, -0.20657904,
-0.37058303, 0.00644885, 0.2548513, 0.029221226, -0.41749227,
0.065117866, -0.3745206, 0.22699282, 0.22139677, 0.28097618, 0.10008535,
-0.039953396, -0.33505437, 0.28511694, 0.18131426, -0.879614,
-0.041319087, -0.62370497, 0.05170501, 0.23541749, -0.0033701807,
0.15842043, 0.020002551, -0.22027364, -0.2730838, -0.23035137,
-0.077056274, 0.002099529
]
}
],
"imageWidth": 600,
"imageHeight": 800
}

View file

@ -0,0 +1,13 @@
#!/usr/bin/env bash
set -e
sed -i -e's/ main/ main contrib non-free non-free-firmware/g' /etc/apt/sources.list.d/debian.sources
sed -i -e's/ bookworm-updates/ bookworm-updates sid/g' /etc/apt/sources.list.d/debian.sources
# default priority is 500, so we set unstable to 450 to prefer stable packages
cat > /etc/apt/preferences.d/preferences << EOL
Package: *
Pin: release a=unstable
Pin-Priority: 450
EOL

View file

@ -0,0 +1,27 @@
import os
import sys
from ipaddress import ip_address
import requests
port = os.getenv("IMMICH_PORT", 3003)
host = os.getenv("IMMICH_HOST", "0.0.0.0")
def is_ipv6(host: str) -> bool:
try:
return ip_address(host).version == 6
except ValueError:
return False
host = "localhost" if host == "0.0.0.0" else host
host = f"[{host}]" if is_ipv6(host) else host
try:
response = requests.get(f"http://{host}:{port}/ping", timeout=2)
if response.status_code == 200:
sys.exit(0)
sys.exit(1)
except requests.RequestException:
sys.exit(1)

File diff suppressed because it is too large Load diff

3137
machine-learning/uv.lock generated Normal file

File diff suppressed because it is too large Load diff