aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuri Victorovich <yuri@FreeBSD.org>2026-04-07 19:18:53 +0000
committerYuri Victorovich <yuri@FreeBSD.org>2026-04-07 19:19:14 +0000
commit919c3600edaed3248916e0b75d4249fa9903b904 (patch)
tree2fa84f25cfccf37ebe42a5208d9e3b1a78151152
parent3a02618c2a32646ed996ad83c53464726dbea66d (diff)
misc/py-vllm: New port: High-throughput and memory-efficient LLM inference engine
-rw-r--r--misc/Makefile1
-rw-r--r--misc/py-vllm/Makefile109
-rw-r--r--misc/py-vllm/distinfo5
-rw-r--r--misc/py-vllm/files/patch-cmake_cpu__extension.cmake78
-rw-r--r--misc/py-vllm/files/patch-csrc_cpu_shm.cpp12
-rw-r--r--misc/py-vllm/files/patch-pyproject.toml26
-rw-r--r--misc/py-vllm/files/patch-setup.py15
-rw-r--r--misc/py-vllm/files/patch-vllm_distributed_parallel__state.py35
-rw-r--r--misc/py-vllm/files/patch-vllm_platforms_____init____.py29
-rw-r--r--misc/py-vllm/files/patch-vllm_platforms_cpu.py42
-rw-r--r--misc/py-vllm/files/patch-vllm_v1_worker_cpu__worker.py12
-rw-r--r--misc/py-vllm/pkg-descr13
12 files changed, 377 insertions, 0 deletions
diff --git a/misc/Makefile b/misc/Makefile
index 2dadb25668f2..c09343f97fef 100644
--- a/misc/Makefile
+++ b/misc/Makefile
@@ -623,6 +623,7 @@
SUBDIR += py-uhi
SUBDIR += py-uuid-utils
SUBDIR += py-vaderSentiment
+ SUBDIR += py-vllm
SUBDIR += py-wandb
SUBDIR += py-wurlitzer
SUBDIR += py-xformers
diff --git a/misc/py-vllm/Makefile b/misc/py-vllm/Makefile
new file mode 100644
index 000000000000..1a56e18195a9
--- /dev/null
+++ b/misc/py-vllm/Makefile
@@ -0,0 +1,109 @@
+PORTNAME= vllm
+DISTVERSION= 0.19.0
+CATEGORIES= misc python # machine-learning
+MASTER_SITES= PYPI \
+ https://github.com/uxlfoundation/oneDNN/archive/refs/tags/:onednn_src
+PKGNAMEPREFIX= ${PYTHON_PKGNAMEPREFIX}
+DISTFILES= ${DISTNAME}${EXTRACT_SUFX} \
+ v3.10${EXTRACT_SUFX}:onednn_src
+
+MAINTAINER= yuri@FreeBSD.org
+COMMENT= High-throughput and memory-efficient LLM inference engine
+WWW= https://vllm.ai/ \
+ https://github.com/vllm-project/vllm
+
+LICENSE= APACHE20
+LICENSE_FILE= ${WRKSRC}/LICENSE
+
+BUILD_DEPENDS= ${LOCALBASE}/llvm19/bin/clang:devel/llvm19 \
+ ${PYTHON_PKGNAMEPREFIX}Jinja2>=3.0:devel/py-Jinja2@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}ninja>=1.13:devel/py-ninja@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}packaging>=24.2:devel/py-packaging@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}pytorch>=2.10.0:misc/py-pytorch@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}setuptools>=63.0:devel/py-setuptools@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}setuptools-scm>=8.0:devel/py-setuptools-scm@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}wheel>0:devel/py-wheel@${PY_FLAVOR}
+LIB_DEPENDS= libabsl_status.so:devel/abseil \
+ libprotobuf.so:devel/protobuf
+RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}aiohttp>=3.13.3:www/py-aiohttp@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}anthropic>0:misc/py-anthropic@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}blake3>0:security/py-blake3@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}cachetools>0:devel/py-cachetools@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}cbor2>0:devel/py-cbor2@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}cloudpickle>0:devel/py-cloudpickle@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}compressed-tensors>=0.14.0.1:misc/py-compressed-tensors@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}depyf>=0.20.0:devel/py-depyf@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}diskcache>=5.6.3:devel/py-diskcache@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}einops>0:misc/py-einops@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}fastapi>0:www/py-fastapi@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}filelock>=3.16.1:sysutils/py-filelock@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}gguf>=0.17.0:misc/py-gguf@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}ijson>0:devel/py-ijson@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}Jinja2>=3.0:devel/py-Jinja2@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}lark>=1.2.2:devel/py-lark@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}llguidance>=1.3.0:textproc/py-llguidance@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}lm-format-enforcer>=0.11.3:misc/py-lm-format-enforcer@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}mcp>0:misc/py-mcp@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}mistral-common>=1.10.0:misc/py-mistral-common@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}model-hosting-container-standards>=0.1.13:misc/py-model-hosting-container-standards@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}msgspec>0:devel/py-msgspec@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}ninja>=1.13:devel/py-ninja@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}numpy1>=1.25:math/py-numpy1@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}openai>=2.0.0:misc/py-openai@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}openai-harmony>=0.0.3:misc/py-openai-harmony@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}opentelemetry-api>=1.27.0:devel/py-opentelemetry-api@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}opentelemetry-exporter-otlp>=1.27.0:devel/py-opentelemetry-exporter-otlp@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}opentelemetry-sdk>=1.27.0:devel/py-opentelemetry-sdk@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}opentelemetry-semantic-conventions-ai>=0.4.1:devel/py-opentelemetry-semantic-conventions-ai@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}opencv-python-headless>=4.11.0:graphics/py-opencv-python-headless@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}outlines-core>=0.2.11:textproc/py-outlines-core@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}partial-json-parser>0:textproc/py-partial-json-parser@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}pillow>=10.0.0:graphics/py-pillow@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}prometheus-client>=0.18.0:net-mgmt/py-prometheus-client@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}prometheus-fastapi-instrumentator>=7.0.0:www/py-prometheus-fastapi-instrumentator@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}protobuf>=5.29.6:devel/py-protobuf@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}psutil>=5.9.0:sysutils/py-psutil@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}py-cpuinfo>0:sysutils/py-py-cpuinfo@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}pybase64>0:devel/py-pybase64@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}pydantic2>=2.12.0:devel/py-pydantic2@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}python-json-logger>0:devel/py-python-json-logger@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}pyyaml>0:devel/py-pyyaml@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}pyzmq>=25.0.0:net/py-pyzmq@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}regex>0:textproc/py-regex@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}requests>=2.26.0:www/py-requests@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}sentencepiece>0:textproc/py-sentencepiece@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}setproctitle>0:devel/py-setproctitle@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}setuptools>=63.0:devel/py-setuptools@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}tiktoken>=0.6.0:textproc/py-tiktoken@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}tokenizers>=0.21.1:textproc/py-tokenizers@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}tqdm>=4.0:misc/py-tqdm@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}uvloop>=0.20.0:devel/py-uvloop@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}transformers>=4.56.0:misc/py-transformers@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}pytorch>=2.10.0:misc/py-pytorch@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}typing-extensions>=4.10:devel/py-typing-extensions@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}watchfiles>0:devel/py-watchfiles@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}xgrammar>=0.1.32:misc/py-xgrammar@${PY_FLAVOR}
+TEST_DEPENDS= ${PYTHON_PKGNAMEPREFIX}datasets>=4.8.2:misc/py-datasets@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}evaluate>=0.4.6:misc/py-evaluate@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}multiprocess>=0.70.19:devel/py-multiprocess@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}pytest-asyncio>=1.3.0:devel/py-pytest-asyncio@${PY_FLAVOR} \
+ ${PYTHON_PKGNAMEPREFIX}tblib>=3.2.2:devel/py-tblib@${PY_FLAVOR}
+
+USES= cmake:indirect python
+USE_PYTHON= pep517 autoplist pytest
+
+# Build the CPU extension using clang (same ABI as PyTorch on FreeBSD).
+# VLLM_TARGET_DEVICE=cpu builds the vllm._C CPU extension.
+# oneDNN (fetched as a distfile) provides optimised GEMM kernels.
+MAKE_ENV+= VLLM_TARGET_DEVICE=cpu \
+ CMAKE_ARGS="-DCMAKE_C_COMPILER=${LOCALBASE}/llvm19/bin/clang -DCMAKE_CXX_COMPILER=${LOCALBASE}/llvm19/bin/clang++ -DCMAKE_INSTALL_RPATH_USE_LINK_PATH=TRUE" \
+ FETCHCONTENT_SOURCE_DIR_ONEDNN=${WRKDIR}/oneDNN-3.10
+
+TEST_ENV= ${MAKE_ENV} PYTHONPATH=${STAGEDIR}${PYTHONPREFIX_SITELIBDIR}:${WRKSRC}/tests/vllm_test_utils:${WRKSRC}/tests/plugins/vllm_add_dummy_stat_logger
+TEST_WRKDIR= ${WRKSRC}/tests
+
+# tests don't run because:
+# * imagehash, lm_eval, mteb, pqdm, ray, runai_model_streamer, schemathesis which are not in FreeBSD ports yet
+# * vllm._C and vllm.v1.worker.gpu.mm.encoder_cudagraph require CUDA/GPU hardware.
+
+.include <bsd.port.mk>
diff --git a/misc/py-vllm/distinfo b/misc/py-vllm/distinfo
new file mode 100644
index 000000000000..579dac429ed1
--- /dev/null
+++ b/misc/py-vllm/distinfo
@@ -0,0 +1,5 @@
+TIMESTAMP = 1775582925
+SHA256 (vllm-0.19.0.tar.gz) = 81e59cf87175e7a62eb8d9acf5989484bbd17089d5eface353f89067bda282d9
+SIZE (vllm-0.19.0.tar.gz) = 31071745
+SHA256 (v3.10.tar.gz) = ba5834a1fdbb6d1c1b1c065dfd789438e7aa42c03fc52d92c02af85d78d1c75c
+SIZE (v3.10.tar.gz) = 13507701
diff --git a/misc/py-vllm/files/patch-cmake_cpu__extension.cmake b/misc/py-vllm/files/patch-cmake_cpu__extension.cmake
new file mode 100644
index 000000000000..9b7998f407f8
--- /dev/null
+++ b/misc/py-vllm/files/patch-cmake_cpu__extension.cmake
@@ -0,0 +1,78 @@
+--- cmake/cpu_extension.cmake.orig 2026-04-03 01:57:10 UTC
++++ cmake/cpu_extension.cmake
+@@ -20,6 +20,11 @@ set (ENABLE_NUMA TRUE)
+
+ set (ENABLE_NUMA TRUE)
+
++# FreeBSD does not have libnuma
++if (CMAKE_SYSTEM_NAME STREQUAL "FreeBSD")
++ set(ENABLE_NUMA OFF)
++endif()
++
+ #
+ # Check the compile flags
+ #
+@@ -33,12 +38,25 @@ if (NOT MACOSX_FOUND)
+ endif()
+
+ if (NOT MACOSX_FOUND)
+- execute_process(COMMAND cat /proc/cpuinfo
+- RESULT_VARIABLE CPUINFO_RET
+- OUTPUT_VARIABLE CPUINFO)
+- if (NOT CPUINFO_RET EQUAL 0)
+- message(FATAL_ERROR "Failed to check CPU features via /proc/cpuinfo")
++ # Try Linux /proc/cpuinfo first, then the FreeBSD linuxulator path
++ if (EXISTS "/proc/cpuinfo")
++ set(_cpuinfo_path "/proc/cpuinfo")
++ elseif (EXISTS "/compat/linux/proc/cpuinfo")
++ set(_cpuinfo_path "/compat/linux/proc/cpuinfo")
++ else()
++ set(_cpuinfo_path "")
+ endif()
++ if (_cpuinfo_path)
++ execute_process(COMMAND cat ${_cpuinfo_path}
++ RESULT_VARIABLE CPUINFO_RET
++ OUTPUT_VARIABLE CPUINFO)
++ if (NOT CPUINFO_RET EQUAL 0)
++ message(FATAL_ERROR "Failed to check CPU features via ${_cpuinfo_path}")
++ endif()
++ else()
++ message(STATUS "No cpuinfo available; relying on CMAKE_SYSTEM_PROCESSOR for ISA detection")
++ set(CPUINFO "")
++ endif()
+ endif()
+
+
+@@ -91,9 +109,10 @@ if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64" OR E
+
+ if (CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64" OR ENABLE_X86_ISA)
+ set(ENABLE_X86_ISA ON)
+- if (NOT (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND
+- CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3))
+- message(FATAL_ERROR "X86 backend requires gcc/g++ >= 12.3")
++ if (NOT (
++ (CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 12.3) OR
++ (CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 15.0)))
++ message(FATAL_ERROR "X86 backend requires gcc/g++ >= 12.3 or clang >= 15.0")
+ endif()
+ list(APPEND CXX_COMPILE_FLAGS "-mf16c")
+ list(APPEND CXX_COMPILE_FLAGS_AVX512 ${CXX_COMPILE_FLAGS})
+@@ -407,9 +426,15 @@ if (ENABLE_X86_ISA)
+ message(STATUS "CPU extension (AVX512F) source files: ${VLLM_EXT_SRC_AVX512}")
+ message(STATUS "CPU extension (AVX2) source files: ${VLLM_EXT_SRC_AVX2}")
+
+- set(_C_LIBS numa dnnl_ext)
+- set(_C_AVX512_LIBS numa dnnl_ext)
+- set(_C_AVX2_LIBS numa)
++ if(ENABLE_NUMA)
++ set(_C_LIBS numa dnnl_ext)
++ set(_C_AVX512_LIBS numa dnnl_ext)
++ set(_C_AVX2_LIBS numa)
++ else()
++ set(_C_LIBS dnnl_ext)
++ set(_C_AVX512_LIBS dnnl_ext)
++ set(_C_AVX2_LIBS "")
++ endif()
+
+ # AMX + AVX512F + AVX512BF16 + AVX512VNNI
+ define_extension_target(
diff --git a/misc/py-vllm/files/patch-csrc_cpu_shm.cpp b/misc/py-vllm/files/patch-csrc_cpu_shm.cpp
new file mode 100644
index 000000000000..521a3f335840
--- /dev/null
+++ b/misc/py-vllm/files/patch-csrc_cpu_shm.cpp
@@ -0,0 +1,12 @@
+--- csrc/cpu/shm.cpp.orig 2026-04-07 17:37:32 UTC
++++ csrc/cpu/shm.cpp
+@@ -2,6 +2,9 @@
+
+ #include <fcntl.h>
+ #include <sys/mman.h>
++#ifndef MAP_POPULATE
++# define MAP_POPULATE 0
++#endif
+ #include <sys/stat.h>
+ #include <unistd.h>
+
diff --git a/misc/py-vllm/files/patch-pyproject.toml b/misc/py-vllm/files/patch-pyproject.toml
new file mode 100644
index 000000000000..5a3bc19a43b2
--- /dev/null
+++ b/misc/py-vllm/files/patch-pyproject.toml
@@ -0,0 +1,26 @@
+--- pyproject.toml.orig 2026-04-06 20:40:36 UTC
++++ pyproject.toml
+@@ -1,12 +1,9 @@ requires = [
+ [build-system]
+ # Should be mirrored in requirements/build.txt
+ requires = [
+- "cmake>=3.26.1",
+- "ninja",
+ "packaging>=24.2",
+- "setuptools>=77.0.3,<81.0.0",
++ "setuptools>=63.0",
+ "setuptools-scm>=8.0",
+- "torch == 2.10.0",
+ "wheel",
+ "jinja2",
+ ]
+@@ -15,8 +12,7 @@ authors = [{name = "vLLM Team"}]
+ [project]
+ name = "vllm"
+ authors = [{name = "vLLM Team"}]
+-license = "Apache-2.0"
+-license-files = ["LICENSE"]
++license = {text = "Apache-2.0"}
+ readme = "README.md"
+ description = "A high-throughput and memory-efficient inference and serving engine for LLMs"
+ classifiers = [
diff --git a/misc/py-vllm/files/patch-setup.py b/misc/py-vllm/files/patch-setup.py
new file mode 100644
index 000000000000..f05813edcc49
--- /dev/null
+++ b/misc/py-vllm/files/patch-setup.py
@@ -0,0 +1,15 @@
+--- setup.py.orig 2026-04-07 17:26:12 UTC
++++ setup.py
+@@ -42,7 +42,11 @@ if sys.platform.startswith("darwin") and VLLM_TARGET_D
+ if sys.platform.startswith("darwin") and VLLM_TARGET_DEVICE != "cpu":
+ logger.warning("VLLM_TARGET_DEVICE automatically set to `cpu` due to macOS")
+ VLLM_TARGET_DEVICE = "cpu"
+-elif not (sys.platform.startswith("linux") or sys.platform.startswith("darwin")):
++elif not (
++ sys.platform.startswith("linux")
++ or sys.platform.startswith("darwin")
++ or sys.platform.startswith("freebsd")
++):
+ logger.warning(
+ "vLLM only supports Linux platform (including WSL) and MacOS."
+ "Building on %s, "
diff --git a/misc/py-vllm/files/patch-vllm_distributed_parallel__state.py b/misc/py-vllm/files/patch-vllm_distributed_parallel__state.py
new file mode 100644
index 000000000000..cebb4198f919
--- /dev/null
+++ b/misc/py-vllm/files/patch-vllm_distributed_parallel__state.py
@@ -0,0 +1,35 @@
+--- vllm/distributed/parallel_state.py.orig 2026-04-07 18:19:15 UTC
++++ vllm/distributed/parallel_state.py
+@@ -24,6 +24,7 @@ import contextlib
+ """
+
+ import contextlib
++import sys
+ import gc
+ import pickle
+ import weakref
+@@ -341,8 +342,13 @@ class GroupCoordinator:
+ )
+ # a group with `gloo` backend, to allow direct coordination between
+ # processes through the CPU.
++ # On FreeBSD, gloo TCP transport is unavailable; use fake backend.
++ _cpu_backend = "gloo"
++ if sys.platform.startswith("freebsd"):
++ import importlib; importlib.import_module("torch.testing._internal.distributed.fake_pg")
++ _cpu_backend = "fake"
+ with suppress_stdout():
+- cpu_group = torch.distributed.new_group(ranks, backend="gloo")
++ cpu_group = torch.distributed.new_group(ranks, backend=_cpu_backend)
+ if self.rank in ranks:
+ self.ranks = ranks
+ self.world_size = len(ranks)
+@@ -1419,6 +1425,9 @@ def init_distributed_environment(
+ )
+ backend = "gloo"
+ # this backend is used for WORLD
++ # On FreeBSD, register the 'fake' backend before use.
++ if sys.platform.startswith("freebsd") and backend == "fake":
++ import importlib; importlib.import_module("torch.testing._internal.distributed.fake_pg")
+ torch.distributed.init_process_group(
+ backend=backend,
+ init_method=distributed_init_method,
diff --git a/misc/py-vllm/files/patch-vllm_platforms_____init____.py b/misc/py-vllm/files/patch-vllm_platforms_____init____.py
new file mode 100644
index 000000000000..ab6f660d7de3
--- /dev/null
+++ b/misc/py-vllm/files/patch-vllm_platforms_____init____.py
@@ -0,0 +1,29 @@
+--- vllm/platforms/__init__.py.orig 2026-04-07 17:26:12 UTC
++++ vllm/platforms/__init__.py
+@@ -58,6 +58,11 @@ def cuda_platform_plugin() -> str | None:
+
+
+ def cuda_platform_plugin() -> str | None:
++ import sys
++ if sys.platform.startswith("freebsd"):
++ # CUDA extensions are not built on FreeBSD; use CPU platform instead.
++ logger.debug("CUDA platform disabled on FreeBSD.")
++ return None
+ is_cuda = False
+ logger.debug("Checking if CUDA platform is available.")
+ try:
+@@ -172,10 +177,12 @@ def cpu_platform_plugin() -> str | None:
+ if not is_cpu:
+ import sys
+
+- is_cpu = sys.platform.startswith("darwin")
++ is_cpu = sys.platform.startswith("darwin") or sys.platform.startswith(
++ "freebsd"
++ )
+ if is_cpu:
+ logger.debug(
+- "Confirmed CPU platform is available because the machine is MacOS."
++ "Confirmed CPU platform is available because the machine is MacOS or FreeBSD."
+ )
+
+ except Exception as e:
diff --git a/misc/py-vllm/files/patch-vllm_platforms_cpu.py b/misc/py-vllm/files/patch-vllm_platforms_cpu.py
new file mode 100644
index 000000000000..ae1db6392125
--- /dev/null
+++ b/misc/py-vllm/files/patch-vllm_platforms_cpu.py
@@ -0,0 +1,42 @@
+--- vllm/platforms/cpu.py.orig 2026-04-03 01:57:10 UTC
++++ vllm/platforms/cpu.py
+@@ -74,7 +74,8 @@ class CpuPlatform(Platform):
+ device_name: str = "cpu"
+ device_type: str = "cpu"
+ dispatch_key: str = "CPU"
+- dist_backend: str = "gloo"
++ # FreeBSD lacks gloo TCP transport (epoll-based); use fake backend.
++ dist_backend: str = "fake" if sys.platform.startswith("freebsd") else "gloo"
+ device_control_env_var = "CPU_VISIBLE_MEMORY_NODES"
+
+ @property
+@@ -378,7 +379,28 @@ class CpuPlatform(Platform):
+
+ @classmethod
+ def get_allowed_cpu_core_node_list(cls) -> tuple[list[int], list[LogicalCPUInfo]]:
+- assert platform.system() == "Linux"
++ assert platform.system() in ("Linux", "FreeBSD")
++
++ if platform.system() == "FreeBSD":
++ # FreeBSD lacks lscpu -J; treat all CPUs as a single NUMA node.
++ allowed_cpu_id_set = (
++ os.sched_getaffinity(0)
++ if hasattr(os, "sched_getaffinity")
++ else set(range(os.cpu_count() or 1))
++ )
++ logical_cpu_list = [
++ LogicalCPUInfo(id=cpu_id, physical_core=cpu_id, numa_node=0)
++ for cpu_id in sorted(allowed_cpu_id_set)
++ ]
++ allowed_numa_nodes_list = [0]
++ env_key = CpuPlatform.device_control_env_var
++ if env_key in os.environ and os.environ[env_key] != "":
++ visible_nodes = [int(s) for s in os.environ[env_key].split(",")]
++ allowed_numa_nodes_list = [
++ x for x in sorted(list(set(visible_nodes)))
++ if x in allowed_numa_nodes_list
++ ]
++ return allowed_numa_nodes_list, logical_cpu_list
+
+ # Init LogicalCPUInfo from lscpu
+ lscpu_output = subprocess.check_output(
diff --git a/misc/py-vllm/files/patch-vllm_v1_worker_cpu__worker.py b/misc/py-vllm/files/patch-vllm_v1_worker_cpu__worker.py
new file mode 100644
index 000000000000..e762ac4eafb9
--- /dev/null
+++ b/misc/py-vllm/files/patch-vllm_v1_worker_cpu__worker.py
@@ -0,0 +1,12 @@
+--- vllm/v1/worker/cpu_worker.py.orig 2026-04-07 17:26:12 UTC
++++ vllm/v1/worker/cpu_worker.py
+@@ -91,6 +91,9 @@ class CPUWorker(Worker):
+ self.local_omp_cpuid = self._get_autobind_cpu_ids(lambda cpus: cpus)
+ else:
+ self.local_omp_cpuid = "nobind"
++ elif omp_cpuids == "auto":
++ # Non-Linux OS: NUMA-based auto-binding not supported, fall back to nobind
++ self.local_omp_cpuid = "nobind"
+ elif omp_cpuids == "nobind":
+ self.local_omp_cpuid = "nobind"
+ else:
diff --git a/misc/py-vllm/pkg-descr b/misc/py-vllm/pkg-descr
new file mode 100644
index 000000000000..1d4993624aea
--- /dev/null
+++ b/misc/py-vllm/pkg-descr
@@ -0,0 +1,13 @@
+vLLM is a fast and easy-to-use library for LLM inference and serving.
+It provides high-throughput and memory-efficient inference for large language
+models (LLMs) using state-of-the-art serving technologies including:
+
+- PagedAttention for efficient KV cache memory management
+- Continuous batching of incoming requests
+- Optimized CUDA kernels (on supported platforms)
+- Hugging Face model compatibility
+- Various decoding algorithms including parallel sampling and beam search
+- OpenAI-compatible API server
+
+On FreeBSD, vLLM runs in CPU/empty device mode (VLLM_TARGET_DEVICE=empty),
+providing pure Python inference without GPU acceleration.