mlx_quantization (mlx v0.1.0)

View Source

Summary

Functions

Functions

autocast(Fun)

autocast(Fun, Options)

calibrate_int8(Model, CalibrationData)

calibrate_int8(Model, CalibrationData, Options)

collect_stats(Arrays, ExistingStats)

compress_model(Model, CompressionConfig)

compute_quantization_params(Stats, Percentile)

decompress_model(CompressedModel)

dequantize(QuantizedArray, Scale, ZeroPoint)

dequantize(QuantizedArray, Scale, ZeroPoint, TargetType)

disable_mixed_precision()

enable_mixed_precision()

estimate_compression_ratio(OriginalModel, CompressedModel)

fake_quantize(Array, Scale, ZeroPoint)

fake_quantize(Array, Scale, ZeroPoint, QuantType)

pack_bits(Array, NumBits)

pack_int4(Array)

quantize_activations(Activations, Method)

quantize_activations(Activations, Method, Options)

quantize_dynamic(Array)

quantize_dynamic(Array, TargetType)

quantize_int4(Array, Scale)

quantize_int4(Array, Scale, ZeroPoint)

quantize_int8(Array, Scale)

quantize_int8(Array, Scale, ZeroPoint)

quantize_outlier_aware(Array, OutlierThreshold, Scale)

quantize_qdq(Array, Scale, ZeroPoint)

quantize_qdq(Array, Scale, ZeroPoint, QuantType)

quantize_smooth(Array, Alpha, Scale)

quantize_smooth(Array, Alpha, Scale, QuantType)

quantize_weights(Weights, Method)

quantize_weights(Weights, Method, Options)

quantized_conv2d(Input, QuantWeight, Scale, ZeroPoint)

quantized_conv2d(Input, QuantWeight, Scale, ZeroPoint, Options)

quantized_linear(Input, QuantWeight, Scale, ZeroPoint)

quantized_linear(Input, QuantWeight, Scale, ZeroPoint, Bias)

quantized_matmul(A, QuantB, Scale, ZeroPoint)

quantized_matmul(A, QuantB, Scale, ZeroPoint, OutputType)

sparse_quantize(Array, Mask, Scale)

sparse_quantize(Array, Mask, Scale, QuantType)

straight_through_estimator(Array, Scale, ZeroPoint)

structured_prune_quantize(Array, PruningRatio, QuantMethod, Options)

unpack_bits(PackedArray, NumBits)

unpack_int4(PackedArray)