inference (llama-3.2-latest)
Published 2026-02-04 09:45:51 +00:00 by dany.henriquez
Installation
docker pull gitea.basebox.health/basebox-distribution/inference:llama-3.2-latestsha256:4936dbc4b3d612ed7e8f7b1c3cf23d973b57cb402d353e5dbc64585b32ae880b
Image Layers
| ARG RELEASE |
| ARG LAUNCHPAD_BUILD_ARCH |
| LABEL org.opencontainers.image.ref.name=ubuntu |
| LABEL org.opencontainers.image.version=22.04 |
| ADD file:415bbc01dfb447d002e2d8173e113ef025d2bbfa20f1205823fa699dc87a2019 in / |
| CMD ["/bin/bash"] |
| ENV NVARCH=x86_64 |
| ENV NVIDIA_REQUIRE_CUDA=cuda>=12.9 brand=unknown,driver>=535,driver<536 brand=grid,driver>=535,driver<536 brand=tesla,driver>=535,driver<536 brand=nvidia,driver>=535,driver<536 brand=quadro,driver>=535,driver<536 brand=quadrortx,driver>=535,driver<536 brand=nvidiartx,driver>=535,driver<536 brand=vapps,driver>=535,driver<536 brand=vpc,driver>=535,driver<536 brand=vcs,driver>=535,driver<536 brand=vws,driver>=535,driver<536 brand=cloudgaming,driver>=535,driver<536 brand=unknown,driver>=550,driver<551 brand=grid,driver>=550,driver<551 brand=tesla,driver>=550,driver<551 brand=nvidia,driver>=550,driver<551 brand=quadro,driver>=550,driver<551 brand=quadrortx,driver>=550,driver<551 brand=nvidiartx,driver>=550,driver<551 brand=vapps,driver>=550,driver<551 brand=vpc,driver>=550,driver<551 brand=vcs,driver>=550,driver<551 brand=vws,driver>=550,driver<551 brand=cloudgaming,driver>=550,driver<551 brand=unknown,driver>=560,driver<561 brand=grid,driver>=560,driver<561 brand=tesla,driver>=560,driver<561 brand=nvidia,driver>=560,driver<561 brand=quadro,driver>=560,driver<561 brand=quadrortx,driver>=560,driver<561 brand=nvidiartx,driver>=560,driver<561 brand=vapps,driver>=560,driver<561 brand=vpc,driver>=560,driver<561 brand=vcs,driver>=560,driver<561 brand=vws,driver>=560,driver<561 brand=cloudgaming,driver>=560,driver<561 brand=unknown,driver>=565,driver<566 brand=grid,driver>=565,driver<566 brand=tesla,driver>=565,driver<566 brand=nvidia,driver>=565,driver<566 brand=quadro,driver>=565,driver<566 brand=quadrortx,driver>=565,driver<566 brand=nvidiartx,driver>=565,driver<566 brand=vapps,driver>=565,driver<566 brand=vpc,driver>=565,driver<566 brand=vcs,driver>=565,driver<566 brand=vws,driver>=565,driver<566 brand=cloudgaming,driver>=565,driver<566 brand=unknown,driver>=570,driver<571 brand=grid,driver>=570,driver<571 brand=tesla,driver>=570,driver<571 brand=nvidia,driver>=570,driver<571 brand=quadro,driver>=570,driver<571 brand=quadrortx,driver>=570,driver<571 brand=nvidiartx,driver>=570,driver<571 brand=vapps,driver>=570,driver<571 brand=vpc,driver>=570,driver<571 brand=vcs,driver>=570,driver<571 brand=vws,driver>=570,driver<571 brand=cloudgaming,driver>=570,driver<571 |
| ENV NV_CUDA_CUDART_VERSION=12.9.79-1 |
| ARG TARGETARCH |
| LABEL maintainer=NVIDIA CORPORATION <cudatools@nvidia.com> |
| RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates && curl -fsSLO https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${NVARCH}/cuda-keyring_1.1-1_all.deb && dpkg -i cuda-keyring_1.1-1_all.deb && apt-get purge --autoremove -y curl && rm -rf /var/lib/apt/lists/* # buildkit |
| ENV CUDA_VERSION=12.9.1 |
| RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends cuda-cudart-12-9=${NV_CUDA_CUDART_VERSION} cuda-compat-12-9 && rm -rf /var/lib/apt/lists/* # buildkit |
| RUN |1 TARGETARCH=amd64 /bin/sh -c echo "/usr/local/cuda/lib64" >> /etc/ld.so.conf.d/nvidia.conf # buildkit |
| ENV PATH=/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin |
| ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64 |
| COPY NGC-DL-CONTAINER-LICENSE / # buildkit |
| ENV NVIDIA_VISIBLE_DEVICES=all |
| ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility |
| ARG CUDA_VERSION |
| ARG PYTHON_VERSION |
| ARG DEADSNAKES_MIRROR_URL |
| ARG DEADSNAKES_GPGKEY_URL |
| ARG GET_PIP_URL |
| ENV DEBIAN_FRONTEND=noninteractive |
| WORKDIR /vllm-workspace |
| RUN |5 CUDA_VERSION=12.9.1 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py /bin/sh -c PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment # buildkit |
| RUN |5 CUDA_VERSION=12.9.1 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py /bin/sh -c echo 'tzdata tzdata/Areas select America' | debconf-set-selections && echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections && apt-get update -y && apt-get install -y --no-install-recommends software-properties-common curl sudo python3-pip ffmpeg libsm6 libxext6 libgl1 && if [ ! -z ${DEADSNAKES_MIRROR_URL} ] ; then if [ ! -z "${DEADSNAKES_GPGKEY_URL}" ] ; then mkdir -p -m 0755 /etc/apt/keyrings ; curl -L ${DEADSNAKES_GPGKEY_URL} | gpg --dearmor > /etc/apt/keyrings/deadsnakes.gpg ; sudo chmod 644 /etc/apt/keyrings/deadsnakes.gpg ; echo "deb [signed-by=/etc/apt/keyrings/deadsnakes.gpg] ${DEADSNAKES_MIRROR_URL} $(lsb_release -cs) main" > /etc/apt/sources.list.d/deadsnakes.list ; fi ; else for i in 1 2 3; do add-apt-repository -y ppa:deadsnakes/ppa && break || { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; done ; fi && apt-get update -y && apt-get install -y --no-install-recommends python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv libibverbs-dev && rm -rf /var/lib/apt/lists/* && update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 && update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} && ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config && curl -sS ${GET_PIP_URL} | python${PYTHON_VERSION} && python3 --version && python3 -m pip --version # buildkit |
| RUN |5 CUDA_VERSION=12.9.1 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py /bin/sh -c CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-') && apt-get update -y && apt-get install -y --no-install-recommends cuda-nvcc-${CUDA_VERSION_DASH} cuda-cudart-${CUDA_VERSION_DASH} cuda-nvrtc-${CUDA_VERSION_DASH} cuda-cuobjdump-${CUDA_VERSION_DASH} libcurand-dev-${CUDA_VERSION_DASH} libcublas-${CUDA_VERSION_DASH} libnccl-dev && rm -rf /var/lib/apt/lists/* # buildkit |
| RUN |5 CUDA_VERSION=12.9.1 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py /bin/sh -c python3 -m pip install uv # buildkit |
| ENV UV_HTTP_TIMEOUT=500 |
| ENV UV_INDEX_STRATEGY=unsafe-best-match |
| ENV UV_LINK_MODE=copy |
| RUN |5 CUDA_VERSION=12.9.1 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py /bin/sh -c ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ # buildkit |
| ARG PYTORCH_CUDA_INDEX_BASE_URL |
| COPY requirements/common.txt /tmp/common.txt # buildkit |
| COPY requirements/cuda.txt /tmp/requirements-cuda.txt # buildkit |
| RUN |6 CUDA_VERSION=12.9.1 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl /bin/sh -c uv pip install --system -r /tmp/requirements-cuda.txt --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && rm /tmp/requirements-cuda.txt /tmp/common.txt # buildkit |
| ARG FLASHINFER_VERSION=0.5.3 |
| RUN |7 CUDA_VERSION=12.9.1 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.5.3 /bin/sh -c uv pip install --system flashinfer-cubin==${FLASHINFER_VERSION} && uv pip install --system flashinfer-jit-cache==${FLASHINFER_VERSION} --extra-index-url https://flashinfer.ai/whl/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') && flashinfer show-config # buildkit |
| ARG GDRCOPY_CUDA_VERSION=12.8 |
| ARG GDRCOPY_OS_VERSION=Ubuntu22_04 |
| ARG TARGETPLATFORM |
| COPY tools/install_gdrcopy.sh /tmp/install_gdrcopy.sh # buildkit |
| RUN |10 CUDA_VERSION=12.9.1 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.5.3 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 TARGETPLATFORM=linux/amd64 /bin/sh -c set -eux; case "${TARGETPLATFORM}" in linux/arm64) UUARCH="aarch64" ;; linux/amd64) UUARCH="x64" ;; *) echo "Unsupported TARGETPLATFORM: ${TARGETPLATFORM}" >&2; exit 1 ;; esac; /tmp/install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "${GDRCOPY_CUDA_VERSION}" "${UUARCH}" && rm /tmp/install_gdrcopy.sh # buildkit |
| RUN |10 CUDA_VERSION=12.9.1 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.5.3 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 TARGETPLATFORM=linux/amd64 /bin/sh -c if [ "$TARGETPLATFORM" = "linux/arm64" ]; then BITSANDBYTES_VERSION="0.42.0"; else BITSANDBYTES_VERSION="0.46.1"; fi; uv pip install --system accelerate hf_transfer modelscope "bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm>=1.0.17' 'runai-model-streamer[s3,gcs]>=0.15.3' # buildkit |
| ARG PIP_INDEX_URL UV_INDEX_URL |
| ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL |
| ARG PYTORCH_CUDA_INDEX_BASE_URL |
| ARG PIP_KEYRING_PROVIDER UV_KEYRING_PROVIDER |
| RUN |17 CUDA_VERSION=12.9.1 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.5.3 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 TARGETPLATFORM=linux/amd64 PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled /bin/sh -c uv pip install --system dist/*.whl --verbose --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') # buildkit |
| RUN |17 CUDA_VERSION=12.9.1 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.5.3 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 TARGETPLATFORM=linux/amd64 PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled /bin/sh -c . /etc/environment && uv pip list # buildkit |
| RUN |17 CUDA_VERSION=12.9.1 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.5.3 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 TARGETPLATFORM=linux/amd64 PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled /bin/sh -c sh -c 'if ls /tmp/deepgemm/dist/*.whl >/dev/null 2>&1; then uv pip install --system /tmp/deepgemm/dist/*.whl; else echo "No DeepGEMM wheels to install; skipping."; fi' # buildkit |
| ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64 |
| RUN |17 CUDA_VERSION=12.9.1 PYTHON_VERSION=3.12 DEADSNAKES_MIRROR_URL= DEADSNAKES_GPGKEY_URL= GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl FLASHINFER_VERSION=0.5.3 GDRCOPY_CUDA_VERSION=12.8 GDRCOPY_OS_VERSION=Ubuntu22_04 TARGETPLATFORM=linux/amd64 PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= PYTORCH_CUDA_INDEX_BASE_URL=https://download.pytorch.org/whl PIP_KEYRING_PROVIDER=disabled UV_KEYRING_PROVIDER=disabled /bin/sh -c uv pip install --system ep_kernels/dist/*.whl --verbose --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') # buildkit |
| ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64 |
| COPY examples examples # buildkit |
| COPY benchmarks benchmarks # buildkit |
| COPY ./vllm/collect_env.py . # buildkit |
| ARG TARGETPLATFORM |
| ARG INSTALL_KV_CONNECTORS=false |
| ARG CUDA_VERSION |
| ARG PIP_INDEX_URL UV_INDEX_URL |
| ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL |
| ENV UV_HTTP_TIMEOUT=500 |
| ARG torch_cuda_arch_list=7.0 7.5 8.0 8.9 9.0 10.0 12.0 |
| ENV TORCH_CUDA_ARCH_LIST=7.0 7.5 8.0 8.9 9.0 10.0 12.0 |
| RUN |8 TARGETPLATFORM=linux/amd64 INSTALL_KV_CONNECTORS=true CUDA_VERSION=12.9.1 PIP_INDEX_URL= UV_INDEX_URL= PIP_EXTRA_INDEX_URL= UV_EXTRA_INDEX_URL= torch_cuda_arch_list=7.0 7.5 8.0 8.9 9.0 10.0 12.0 /bin/sh -c CUDA_MAJOR="${CUDA_VERSION%%.*}"; CUDA_VERSION_DASH=$(echo $CUDA_VERSION | cut -d. -f1,2 | tr '.' '-'); CUDA_HOME=/usr/local/cuda; BUILD_PKGS="libcusparse-dev-${CUDA_VERSION_DASH} libcublas-dev-${CUDA_VERSION_DASH} libcusolver-dev-${CUDA_VERSION_DASH}"; if [ "$INSTALL_KV_CONNECTORS" = "true" ]; then if [ "$CUDA_MAJOR" -ge 13 ]; then uv pip install --system nixl-cu13; fi; uv pip install --system -r /tmp/kv_connectors.txt --no-build || ( apt-get update -y && apt-get install -y --no-install-recommends ${BUILD_PKGS} && uv pip install --system -r /tmp/kv_connectors.txt --no-build-isolation && apt-get purge -y ${BUILD_PKGS} && rm -rf /var/lib/apt/lists/* ); fi # buildkit |
| ENV VLLM_USAGE_SOURCE=production-docker-image |
| ENTRYPOINT ["vllm" "serve"] |
| RUN /bin/sh -c mkdir -p /root/.cache/huggingface/hub && cp -r /tmp/model /root/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B-Instruct && python3 -c "from huggingface_hub import snapshot_download; print('Model verified:', snapshot_download(repo_id='meta-llama/Llama-3.2-1B-Instruct', local_files_only=True))" # buildkit |
| ENV MODEL_NAME=meta-llama/Llama-3.2-1B-Instruct |
Labels
| Key | Value |
|---|---|
| maintainer | NVIDIA CORPORATION <cudatools@nvidia.com> |
| org.opencontainers.image.ref.name | ubuntu |
| org.opencontainers.image.version | 22.04 |
Details
2026-02-04 09:45:51 +00:00
Versions (31)
View all
Container
6
OCI / Docker
linux/amd64
12 GiB
dev-latest
2026-02-12
dev-e1656e2
2026-02-12
dev-cf575a6
2026-02-12
dev-89f178e
2026-02-11
dev-48a5bbb
2026-02-11