diff --git a/backends/arm/test/models/test_llama.py b/backends/arm/test/models/test_llama.py index 5e06c0e6c05..1c6f8865d1e 100644 --- a/backends/arm/test/models/test_llama.py +++ b/backends/arm/test/models/test_llama.py @@ -209,6 +209,8 @@ def test_llama_tosa_INT_FP_partial_quant(): tosa_extensions=["FP"], # Due to a few outliers, atol must be set high atol=1.1, + # TODO(MLETORCH-1875): reduce tolerance + qtol=75, frobenius_threshold=None, cosine_threshold=None, ) @@ -232,6 +234,8 @@ def test_llama_vgf_quant_partial_quant(): quantize=True, # Due to a few outliers, atol must be set high atol=1.1, + # TODO(MLETORCH-1875): reduce tolerance + qtol=75, ) _use_partial_quantizer(pipeline) pipeline.run() diff --git a/backends/arm/test/tester/analyze_output_utils.py b/backends/arm/test/tester/analyze_output_utils.py index 6ba08fd4785..a94be891b6d 100644 --- a/backends/arm/test/tester/analyze_output_utils.py +++ b/backends/arm/test/tester/analyze_output_utils.py @@ -318,6 +318,8 @@ def dump_error_output( output_node = export_stage.artifact.graph_module.graph.output_node() qp_input = get_input_quantization_params(export_stage.artifact) qp_output = get_output_quantization_params(output_node) + scales = {k.name: v.scale for k, v in qp_output.items() if v is not None} + logger.error(f"Output Quant scales: {scales}") logger.error(f"Input QuantArgs: {qp_input}") logger.error(f"Output QuantArgs: {qp_output}") diff --git a/backends/arm/test/tester/arm_tester.py b/backends/arm/test/tester/arm_tester.py index 2336ccc6233..3ae108aa8e9 100644 --- a/backends/arm/test/tester/arm_tester.py +++ b/backends/arm/test/tester/arm_tester.py @@ -995,9 +995,9 @@ def _compare_outputs( stage_output, reference_output, quantization_scale=quantization_scale, - atol=1e-03, - rtol=1e-03, - qtol=0, + atol=atol, + rtol=rtol, + qtol=qtol, ) raise e