pytorch · mansnils · Mar 5, 2026 · Feb 26, 2026
@@ -209,6 +209,8 @@ def test_llama_tosa_INT_FP_partial_quant():
             tosa_extensions=["FP"],
             # Due to a few outliers, atol must be set high
             atol=1.1,
+            # TODO(MLETORCH-1875): reduce tolerance
+            qtol=75,
             frobenius_threshold=None,
             cosine_threshold=None,
         )
@@ -232,6 +234,8 @@ def test_llama_vgf_quant_partial_quant():
             quantize=True,
             # Due to a few outliers, atol must be set high
             atol=1.1,
+            # TODO(MLETORCH-1875): reduce tolerance
+            qtol=75,
         )
         _use_partial_quantizer(pipeline)
         pipeline.run()
@@ -318,6 +318,8 @@ def dump_error_output(
         output_node = export_stage.artifact.graph_module.graph.output_node()
         qp_input = get_input_quantization_params(export_stage.artifact)
         qp_output = get_output_quantization_params(output_node)
+        scales = {k.name: v.scale for k, v in qp_output.items() if v is not None}
+        logger.error(f"Output Quant scales: {scales}")
         logger.error(f"Input QuantArgs: {qp_input}")
         logger.error(f"Output QuantArgs: {qp_output}")
 

@@ -995,9 +995,9 @@ def _compare_outputs(
                     stage_output,
                     reference_output,
                     quantization_scale=quantization_scale,
-                    atol=1e-03,
-                    rtol=1e-03,
-                    qtol=0,
+                    atol=atol,
+                    rtol=rtol,
+                    qtol=qtol,
                 )
             raise e