diff --git a/setup.py b/setup.py index 66750dd8d..7a122d1b2 100644 --- a/setup.py +++ b/setup.py @@ -469,80 +469,33 @@ def _build_autoawq_requires() -> Optional[str]: os_type, _ = get_cpu_avx_support() if os_type == OSType.DARWIN: return None - auto_gptq_version = get_latest_version( - "auto-gptq", "https://huggingface.github.io/autogptq-index/whl/cu118/", "0.5.1" - ) - # eg. 0.5.1+cu118 - auto_gptq_version = auto_gptq_version.split("+")[0] - - def pkg_file_func(pkg_name, pkg_version, cuda_version, py_version, os_type): - pkg_name = pkg_name.replace("-", "_") - if os_type == OSType.DARWIN: - return None - os_pkg_name = ( - "manylinux_2_17_x86_64.manylinux2014_x86_64.whl" - if os_type == OSType.LINUX - else "win_amd64.whl" - ) - return f"{pkg_name}-{pkg_version}+{cuda_version}-{py_version}-{py_version}-{os_pkg_name}" - - auto_gptq_url = _build_wheels( - "auto-gptq", - auto_gptq_version, - base_url_func=lambda v, x, y: f"https://huggingface.github.io/autogptq-index/whl/{x}/auto-gptq", - pkg_file_func=pkg_file_func, - supported_cuda_versions=["11.8"], - ) - if auto_gptq_url: - print(f"Install auto-gptq from {auto_gptq_url}") - return f"auto-gptq @ {auto_gptq_url}" - else: - "auto-gptq" + return "auto-gptq" def quantization_requires(): - pkgs = [] os_type, _ = get_cpu_avx_support() - if os_type != OSType.WINDOWS: - pkgs = ["bitsandbytes"] - else: + quantization_pkgs = [] + if os_type == OSType.WINDOWS: + # For Windows, fetch a specific bitsandbytes WHL package latest_version = get_latest_version( "bitsandbytes", "https://jllllll.github.io/bitsandbytes-windows-webui", "0.41.1", ) - extra_index_url = f"https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-{latest_version}-py3-none-win_amd64.whl" - local_pkg = cache_package( - extra_index_url, "bitsandbytes", os_type == OSType.WINDOWS - ) - pkgs = [f"bitsandbytes @ {local_pkg}"] - print(pkgs) - # For chatglm2-6b-int4 - pkgs += ["cpm_kernels"] + whl_url = f"https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-{latest_version}-py3-none-win_amd64.whl" + local_pkg_path = cache_package(whl_url, "bitsandbytes", True) + setup_spec.extras["bitsandbytes"] = [f"bitsandbytes @ {local_pkg_path}"] + else: + setup_spec.extras["bitsandbytes"] = ["bitsandbytes"] if os_type != OSType.DARWIN: # Since transformers 4.35.0, the GPT-Q/AWQ model can be loaded using AutoModelForCausalLM. # autoawq requirements: # 1. Compute Capability 7.5 (sm75). Turing and later architectures are supported. # 2. CUDA Toolkit 11.8 and later. - autoawq_url = _build_wheels( - "autoawq", - "0.1.7", - base_url_func=lambda v, x, y: f"https://github.com/casper-hansen/AutoAWQ/releases/download/v{v}", - supported_cuda_versions=["11.8"], - ) - if autoawq_url: - print(f"Install autoawq from {autoawq_url}") - pkgs.append(f"autoawq @ {autoawq_url}") - else: - pkgs.append("autoawq") - - auto_gptq_pkg = _build_autoawq_requires() - if auto_gptq_pkg: - pkgs.append(auto_gptq_pkg) - pkgs.append("optimum") + quantization_pkgs.extend(["autoawq", _build_autoawq_requires(), "optimum"]) - setup_spec.extras["quantization"] = pkgs + setup_spec.extras["quantization"] = ["cpm_kernels"] + quantization_pkgs def all_vector_store_requires():