basebox Distribution

inference (latest)

Published 2025-11-17 18:29:08 +00:00 by dany.henriquez

Installation

docker pull gitea.basebox.health/basebox-distribution/inference:latest
sha256:64d9f056d4fc5ed4673acde8a2fe2d09338913f967fb12c72cad146f9a82f175

About this package

Large Language Model Text Generation Inference

Image Layers

ARG RELEASE
ARG LAUNCHPAD_BUILD_ARCH
LABEL org.opencontainers.image.ref.name=ubuntu
LABEL org.opencontainers.image.version=22.04
ADD file:21c2e8d95909bec6f4acdaf4aed55b44ee13603681f93b152e423e3e6a4a207b in /
CMD ["/bin/bash"]
ENV NVARCH=x86_64
ENV NVIDIA_REQUIRE_CUDA=cuda>=12.4 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526 brand=tesla,driver>=535,driver<536 brand=unknown,driver>=535,driver<536 brand=nvidia,driver>=535,driver<536 brand=nvidiartx,driver>=535,driver<536 brand=geforce,driver>=535,driver<536 brand=geforcertx,driver>=535,driver<536 brand=quadro,driver>=535,driver<536 brand=quadrortx,driver>=535,driver<536 brand=titan,driver>=535,driver<536 brand=titanrtx,driver>=535,driver<536
ENV NV_CUDA_CUDART_VERSION=12.4.99-1
ENV NV_CUDA_COMPAT_PACKAGE=cuda-compat-12-4
ARG TARGETARCH
LABEL maintainer=NVIDIA CORPORATION <cudatools@nvidia.com>
RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends gnupg2 curl ca-certificates && curl -fsSLO https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/${NVARCH}/cuda-keyring_1.1-1_all.deb && dpkg -i cuda-keyring_1.1-1_all.deb && apt-get purge --autoremove -y curl && rm -rf /var/lib/apt/lists/* # buildkit
ENV CUDA_VERSION=12.4.0
RUN |1 TARGETARCH=amd64 /bin/sh -c apt-get update && apt-get install -y --no-install-recommends cuda-cudart-12-4=${NV_CUDA_CUDART_VERSION} ${NV_CUDA_COMPAT_PACKAGE} && rm -rf /var/lib/apt/lists/* # buildkit
RUN |1 TARGETARCH=amd64 /bin/sh -c echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf # buildkit
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
COPY NGC-DL-CONTAINER-LICENSE / # buildkit
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
ENV HF_HOME=/data HF_HUB_ENABLE_HF_TRANSFER=1 PORT=80
WORKDIR /usr/src
RUN /bin/sh -c apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends libssl-dev ca-certificates make curl git && rm -rf /var/lib/apt/lists/* # buildkit
COPY /uv /uvx /bin/ # buildkit
COPY /usr/src/.venv /usr/src/.venv # buildkit
ENV PYTHON_VERSION=3.11
RUN /bin/sh -c uv python install ${PYTHON_VERSION} # buildkit
ENV VIRTUAL_ENV=/usr/src/.venv/
ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/src/.venv/bin/
COPY proto proto # buildkit
COPY server server # buildkit
COPY server/Makefile server/Makefile # buildkit
ENV HF_KERNELS_CACHE=/kernels
RUN /bin/sh -c cd server && uv sync --frozen --extra gen --extra bnb --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --extra torch --no-install-project --active && make gen-server-raw && kernels download . # buildkit
RUN /bin/sh -c cd server && uv sync --frozen --extra gen --extra bnb --extra accelerate --extra compressed-tensors --extra quantize --extra peft --extra outlines --extra torch --active --python=${PYTHON_VERSION} && uv pip install nvidia-nccl-cu12==2.25.1 && pwd && text-generation-server --help # buildkit
COPY /usr/src/flash-attention/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages # buildkit
COPY /usr/src/flash-attention/csrc/layer_norm/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages # buildkit
COPY /usr/src/flash-attention/csrc/rotary/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages # buildkit
COPY /usr/src/.venv/lib/python3.11/site-packages/flash_attn_2_cuda.cpython-311-x86_64-linux-gnu.so /usr/src/.venv/lib/python3.11/site-packages # buildkit
COPY /usr/src/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages # buildkit
COPY /usr/src/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages # buildkit
COPY /usr/src/exllamav2/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages # buildkit
COPY /usr/src/llm-awq/awq/kernels/build/lib.linux-x86_64-cpython-311 /usr/src/.venv/lib/python3.11/site-packages # buildkit
COPY /usr/src/mamba/build/lib.linux-x86_64-cpython-311/ /usr/src/.venv/lib/python3.11/site-packages # buildkit
COPY /usr/src/causal-conv1d/build/lib.linux-x86_64-cpython-311/ /usr/src/.venv/lib/python3.11/site-packages # buildkit
COPY /usr/src/.venv/lib/python3.11/site-packages/flashinfer/ /usr/src/.venv/lib/python3.11/site-packages/flashinfer/ # buildkit
ENV EXLLAMA_NO_FLASH_ATTN=1
RUN /bin/sh -c apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends build-essential g++ && rm -rf /var/lib/apt/lists/* # buildkit
COPY /usr/src/target/release-opt/text-generation-benchmark /usr/local/bin/text-generation-benchmark # buildkit
COPY /usr/src/target/release-opt/text-generation-router /usr/local/bin/text-generation-router # buildkit
COPY /usr/src/target/release-opt/text-generation-launcher /usr/local/bin/text-generation-launcher # buildkit
COPY ./tgi-entrypoint.sh /tgi-entrypoint.sh # buildkit
RUN /bin/sh -c chmod +x /tgi-entrypoint.sh # buildkit
ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/root/.local/share/uv/python/cpython-3.11.11-linux-x86_64-gnu/lib/
ENTRYPOINT ["/tgi-entrypoint.sh"]
ENV MODEL_ID=meta-llama/Meta-Llama-3.1-8B-Instruct
ARG HF_TOKEN=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
RUN |1 HF_TOKEN=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 /bin/sh -c if [ -n "$HF_TOKEN" ]; then huggingface-cli login --token $HF_TOKEN && huggingface-cli download $MODEL_ID && huggingface-cli logout 2>/dev/null || true && rm -rf /root/.cache/huggingface/token /root/.cache/huggingface/stored_tokens /tmp/* /var/tmp/*; else echo "Warning: HF_TOKEN not provided, skipping model download"; fi # buildkit
COPY entrypoint.sh /entrypoint.sh # buildkit
RUN |1 HF_TOKEN=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 /bin/sh -c chmod +x /entrypoint.sh # buildkit
EXPOSE [80/tcp]
ENTRYPOINT ["/entrypoint.sh"]

Labels

Key Value
maintainer NVIDIA CORPORATION <cudatools@nvidia.com>
org.opencontainers.image.created 2025-09-16T23:17:38.784Z
org.opencontainers.image.description Large Language Model Text Generation Inference
org.opencontainers.image.licenses Apache-2.0
org.opencontainers.image.ref.name ubuntu
org.opencontainers.image.revision efb94e0d3db6aba9d464bc9a2f83191146203152
org.opencontainers.image.source https://github.com/huggingface/text-generation-inference
org.opencontainers.image.title text-generation-inference
org.opencontainers.image.url https://github.com/huggingface/text-generation-inference
org.opencontainers.image.version latest
Details
Container
2025-11-17 18:29:08 +00:00
36
OCI / Docker
linux/amd64
Apache-2.0
39 GiB
Versions (4) View all
dev-latest 2026-01-23
dev-b1aa235 2026-01-23
dev-2edad07 2026-01-23
latest 2025-11-17