diff --git a/requirements-cuda.txt b/requirements-cuda.txt index c91c417b..a630141d 100644 --- a/requirements-cuda.txt +++ b/requirements-cuda.txt @@ -508,7 +508,7 @@ prompt-toolkit==3.0.39 # via # ipython # jupyter-console -protobuf==3.19.6 +protobuf==3.20.3 # via # googleapis-common-protos # grpcio-reflection diff --git a/requirements.txt b/requirements.txt index d7c3ad36..d5ccefcc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -506,7 +506,7 @@ prompt-toolkit==3.0.39 # via # ipython # jupyter-console -protobuf==3.19.6 +protobuf==3.20.3 # via # googleapis-common-protos # grpcio-reflection diff --git a/saxml/server/pax/quantization.py b/saxml/server/pax/quantization.py index 57806b34..5297f9b7 100644 --- a/saxml/server/pax/quantization.py +++ b/saxml/server/pax/quantization.py @@ -56,8 +56,6 @@ def for_transformer( quantize_ngrammer_embedding: bool = False, dtype: jnp.dtype = jnp.int8, block_size: int = 0, - use_int4_packed_weights: bool = True, - int4_packed_weights_container_dtype: jnp.dtype = jnp.int32, ): """Find and quantize transformer. @@ -86,11 +84,6 @@ def for_transformer( Ngrammer/VQNgrammer layer. dtype: Dtype of the quantized variables. block_size: Block size for sub-channel quantization. Defaults to off. - use_int4_packed_weights: If True, pack/unpack int4 weights into int32 or - int8. It is for int4 weights only and has not effect on other type. If - False int4 weights will be kept in int8. - int4_packed_weights_container_dtype: Container type for int4 weights: int32 - to pack 8 int4s, or int8 to pack 2 int4s. Returns: a modifier that quantizes transformers when applied to a config. @@ -130,8 +123,6 @@ def task(self): quantize_ngrammer_embedding=quantize_ngrammer_embedding, dtype=dtype, block_size=block_size, - use_int4_packed_weights=use_int4_packed_weights, - int4_packed_weights_container_dtype=int4_packed_weights_container_dtype, ) return task_p