mlx
v0.2.0
Search documentation of mlx
Settings
mlx_quantization
(mlx v0.2.0)
View Source
Summary
Functions
autocast(Fun)
autocast(Fun, Options)
calibrate_int8(Model, CalibrationData)
calibrate_int8(Model, CalibrationData, Options)
collect_stats(Arrays, ExistingStats)
compress_model(Model, CompressionConfig)
compute_quantization_params(Stats, Percentile)
decompress_model(CompressedModel)
dequantize(QuantizedArray, Scale, ZeroPoint)
dequantize(QuantizedArray, Scale, ZeroPoint, TargetType)
disable_mixed_precision()
enable_mixed_precision()
estimate_compression_ratio(OriginalModel, CompressedModel)
fake_quantize(Array, Scale, ZeroPoint)
fake_quantize(Array, Scale, ZeroPoint, QuantType)
pack_bits(Array, NumBits)
pack_int4(Array)
quantize_activations(Activations, Method)
quantize_activations(Activations, Method, Options)
quantize_dynamic(Array)
quantize_dynamic(Array, TargetType)
quantize_int4(Array, Scale)
quantize_int4(Array, Scale, ZeroPoint)
quantize_int8(Array, Scale)
quantize_int8(Array, Scale, ZeroPoint)
quantize_outlier_aware(Array, OutlierThreshold, Scale)
quantize_qdq(Array, Scale, ZeroPoint)
quantize_qdq(Array, Scale, ZeroPoint, QuantType)
quantize_smooth(Array, Alpha, Scale)
quantize_smooth(Array, Alpha, Scale, QuantType)
quantize_weights(Weights, Method)
quantize_weights(Weights, Method, Options)
quantized_conv2d(Input, QuantWeight, Scale, ZeroPoint)
quantized_conv2d(Input, QuantWeight, Scale, ZeroPoint, Options)
quantized_linear(Input, QuantWeight, Scale, ZeroPoint)
quantized_linear(Input, QuantWeight, Scale, ZeroPoint, Bias)
quantized_matmul(A, QuantB, Scale, ZeroPoint)
quantized_matmul(A, QuantB, Scale, ZeroPoint, OutputType)
sparse_quantize(Array, Mask, Scale)
sparse_quantize(Array, Mask, Scale, QuantType)
straight_through_estimator(Array, Scale, ZeroPoint)
structured_prune_quantize(Array, PruningRatio, QuantMethod, Options)
unpack_bits(PackedArray, NumBits)
unpack_int4(PackedArray)
Functions
autocast(Fun)
autocast(Fun, Options)
calibrate_int8(Model, CalibrationData)
calibrate_int8(Model, CalibrationData, Options)
collect_stats(Arrays, ExistingStats)
compress_model(Model, CompressionConfig)
compute_quantization_params(Stats, Percentile)
decompress_model(CompressedModel)
dequantize(QuantizedArray, Scale, ZeroPoint)
dequantize(QuantizedArray, Scale, ZeroPoint, TargetType)
disable_mixed_precision()
enable_mixed_precision()
estimate_compression_ratio(OriginalModel, CompressedModel)
fake_quantize(Array, Scale, ZeroPoint)
fake_quantize(Array, Scale, ZeroPoint, QuantType)
pack_bits(Array, NumBits)
pack_int4(Array)
quantize_activations(Activations, Method)
quantize_activations(Activations, Method, Options)
quantize_dynamic(Array)
quantize_dynamic(Array, TargetType)
quantize_int4(Array, Scale)
quantize_int4(Array, Scale, ZeroPoint)
quantize_int8(Array, Scale)
quantize_int8(Array, Scale, ZeroPoint)
quantize_outlier_aware(Array, OutlierThreshold, Scale)
quantize_qdq(Array, Scale, ZeroPoint)
quantize_qdq(Array, Scale, ZeroPoint, QuantType)
quantize_smooth(Array, Alpha, Scale)
quantize_smooth(Array, Alpha, Scale, QuantType)
quantize_weights(Weights, Method)
quantize_weights(Weights, Method, Options)
quantized_conv2d(Input, QuantWeight, Scale, ZeroPoint)
quantized_conv2d(Input, QuantWeight, Scale, ZeroPoint, Options)
quantized_linear(Input, QuantWeight, Scale, ZeroPoint)
quantized_linear(Input, QuantWeight, Scale, ZeroPoint, Bias)
quantized_matmul(A, QuantB, Scale, ZeroPoint)
quantized_matmul(A, QuantB, Scale, ZeroPoint, OutputType)
sparse_quantize(Array, Mask, Scale)
sparse_quantize(Array, Mask, Scale, QuantType)
straight_through_estimator(Array, Scale, ZeroPoint)
structured_prune_quantize(Array, PruningRatio, QuantMethod, Options)
unpack_bits(PackedArray, NumBits)
unpack_int4(PackedArray)