From 8b742614d63527b181a4628fb2a7179b47ccbf02 Mon Sep 17 00:00:00 2001 From: Martin Pavella Date: Tue, 24 Feb 2026 09:58:05 +0100 Subject: [PATCH 1/6] NXP backend: Move tests to correct directories. --- .../converter/node_converter}/test_convert_upsample_bilinear2d.py | 0 .../converter/node_converter}/test_convert_upsample_nearest2d.py | 0 .../{ => ir/edge_passes}/test_convert_reshaping_nodes_to_view.py | 0 backends/nxp/tests/{ => ir/edge_passes}/test_edge_passes.py | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename backends/nxp/tests/{ => ir/converter/node_converter}/test_convert_upsample_bilinear2d.py (100%) rename backends/nxp/tests/{ => ir/converter/node_converter}/test_convert_upsample_nearest2d.py (100%) rename backends/nxp/tests/{ => ir/edge_passes}/test_convert_reshaping_nodes_to_view.py (100%) rename backends/nxp/tests/{ => ir/edge_passes}/test_edge_passes.py (100%) diff --git a/backends/nxp/tests/test_convert_upsample_bilinear2d.py b/backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_bilinear2d.py similarity index 100% rename from backends/nxp/tests/test_convert_upsample_bilinear2d.py rename to backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_bilinear2d.py diff --git a/backends/nxp/tests/test_convert_upsample_nearest2d.py b/backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_nearest2d.py similarity index 100% rename from backends/nxp/tests/test_convert_upsample_nearest2d.py rename to backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_nearest2d.py diff --git a/backends/nxp/tests/test_convert_reshaping_nodes_to_view.py b/backends/nxp/tests/ir/edge_passes/test_convert_reshaping_nodes_to_view.py similarity index 100% rename from backends/nxp/tests/test_convert_reshaping_nodes_to_view.py rename to backends/nxp/tests/ir/edge_passes/test_convert_reshaping_nodes_to_view.py diff --git a/backends/nxp/tests/test_edge_passes.py b/backends/nxp/tests/ir/edge_passes/test_edge_passes.py similarity index 100% rename from backends/nxp/tests/test_edge_passes.py rename to backends/nxp/tests/ir/edge_passes/test_edge_passes.py From 0f5361f3e539456498b0f94ff21b1a769f6386de Mon Sep 17 00:00:00 2001 From: Martin Pavella Date: Wed, 25 Feb 2026 15:12:53 +0100 Subject: [PATCH 2/6] NXP backend: Update `aten.max_pool2d.default` support to match Neutron requirements. --- .../backend/ir/converter/conversion/common.py | 6 +- .../ops_converters/max_pool_2d_converter.py | 151 ++++++++--- .../test_max_pool_2d_converter.py | 247 +++++++++++------- docs/source/backends/nxp/op-support.csv | 4 +- 4 files changed, 264 insertions(+), 144 deletions(-) diff --git a/backends/nxp/backend/ir/converter/conversion/common.py b/backends/nxp/backend/ir/converter/conversion/common.py index 318fe66dfbd..9186f5d0ac6 100755 --- a/backends/nxp/backend/ir/converter/conversion/common.py +++ b/backends/nxp/backend/ir/converter/conversion/common.py @@ -1,6 +1,6 @@ # # Copyright 2023 Martin Pavella -# Copyright 2023-2025 NXP +# Copyright 2023-2026 NXP # # License: MIT # See the LICENSE_MIT for more details. @@ -12,7 +12,7 @@ 'conversion/builtin/' directory. """ -from typing import List, MutableSequence, Optional +from typing import List, MutableSequence, Optional, Sequence import executorch.backends.nxp.backend.ir.logger as logger from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model @@ -71,7 +71,7 @@ def extend_1d_dilation_to_2d(tflite_1d_dilation: MutableSequence): ) -def assign_2d_strides(options: StridedOptions, strides: Optional[List[int]]): +def assign_2d_strides(options: StridedOptions, strides: Sequence[int] | None): """Assign to 'obj' the attributes 'stride_h' and 'stride_w' from 'strides'. If 'strides' is None, assign 1s. diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool_2d_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool_2d_converter.py index ce9a3697318..e5c1d1f4be2 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool_2d_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool_2d_converter.py @@ -1,9 +1,11 @@ -# Copyright 2024-2025 NXP +# Copyright 2024-2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. + import numpy as np +from executorch.backends.nxp.backend.edge_helper import try_get_arg from executorch.backends.nxp.backend.ir.converter.conversion import ( aten_translator, common, @@ -14,13 +16,19 @@ NodeConverter, ) from executorch.backends.nxp.backend.ir.lib.tflite.TensorType import TensorType -from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model -from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import ( - max_pool_2d_options, +from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options.max_pool_2d_options import ( + MaxPool2D, ) +from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from torch.fx import Node from torch.nn import Parameter +KernelSize = tuple[int, int] +Stride = tuple[int, int] +Padding = tuple[int, int] +Dilation = tuple[int, int] +CeilMode = bool + class MaxPool2dConverter(NodeConverter): """Convert 'max_pool2d' operator to TFLite 'MaxPool2D'. @@ -33,12 +41,16 @@ def _is_supported_in_IR( parameters_mapping: dict[str, Parameter], custom_delegation_options: CustomDelegationOptions, ) -> bool: - n_args = len(node.args) + kernel_size, stride, padding, dilation, ceil_mode = ( + MaxPool2dConverter._get_node_args(node) + ) - dilation = node.args[4] if n_args >= 5 else [1, 1] - ceil_mode = node.args[5] if n_args == 6 else False + if dilation != (1, 1): + # The Neutron IR MaxPool2D does not support dilation. + return False - if any(dil != 1 for dil in dilation) or ceil_mode: + if ceil_mode: + # This argument affects how the output shape is computed. Neutron IR only supports the default `False`. return False if not NodeConverter._has_shared_q_params_if_quantized(node): @@ -46,9 +58,49 @@ def _is_supported_in_IR( return True - def _get_pad_constant_value(self, input_type: TensorType) -> np.ndarray: - """ - Get scalar NumPy array with constant value used as constant value for 'Pad' operator. + @staticmethod + def _is_supported_on_target( + node: Node, + neutron_target_spec: NeutronTargetSpec, + parameters_mapping: dict[str, Parameter], + custom_delegation_options: CustomDelegationOptions, + ) -> bool: + kernel_size, stride, padding, dilation, ceil_mode = ( + MaxPool2dConverter._get_node_args(node) + ) + + output_shape = node.meta["val"].shape + if output_shape[0] != 1: + # /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106 + return False + + # Neutron only has a restriction on `stride_h`. `stride_w` is not restricted. + stride_h = stride[0] + if stride_h not in (1, 2): + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901 + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923 + return False + + channels = output_shape[1] + if channels % neutron_target_spec.get_num_macs() != 0: + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903 + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925 + return False + + if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)): + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907 + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929 + + # Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the + # effective kernel size, which is an even stricter requirement than what Neutron imposes. + # https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489 + return False + + return True + + @staticmethod + def _get_pad_constant_value(input_type: TensorType) -> np.ndarray: + """Get scalar NumPy array with constant value used as constant value for 'Pad' operator. :param input_type: Input tensor type. :return: Scalar array with single minimum value of given type. @@ -62,43 +114,64 @@ def _get_pad_constant_value(self, input_type: TensorType) -> np.ndarray: case TensorType.FLOAT32: return np.asarray([np.finfo(np.float32).min], dtype=np.float32) case _: - raise RuntimeError("Unexpected input type for MaxPool operator.") + # Should never happen. + raise RuntimeError( + f"Unexpected input type '{input_type}' for MaxPool operator." + ) - # noinspection PyMethodMayBeStatic - def _convert_2d_max_pool( - self, kernel_size, stride, padding, t_op: tflite_model.Operator - ) -> list[tflite_model.Operator]: - x = t_op.tmp_inputs[0] + @staticmethod + def _get_node_args( + node: Node, + ) -> tuple[KernelSize, Stride, Padding, Dilation, CeilMode]: + """Extract and return `aten.max_pool2d` arguments from the node. + :param node: The node representing the `aten.max_pool2d` operation. + :return: Tuple of (kernel_size, stride, padding, dilation, ceil_mode). + """ + kernel_size = node.args[1] + stride = node.args[ + 2 + ] # The default value is equal to the kernel_size, so it is never empty here. + padding = try_get_arg(node, 3) or (0, 0) + dilation = try_get_arg(node, 4) or (1, 1) + ceil_mode = try_get_arg(node, 5) or False + + return kernel_size, stride, padding, dilation, ceil_mode + + def convert(self, node: Node): + """Convert the `aten.max_pool2d.default` operator to Neutron IR `MaxPool2D`. + The schema is: + aten::max_pool2d( + Tensor self, + int[2] kernel_size, + int[2] stride=[], # The default value is equal to the kernel_size. + int[2] padding=0, + int[2] dilation=1, + bool ceil_mode=False + ) -> Tensor + """ + self.assert_convertible(node) + + kernel_size, stride, padding, dilation, ceil_mode = self._get_node_args(node) + + t_op = self._create_tflite_op_with_io_tensors(node) ops = OpsList(middle_op=t_op) - t_op.builtin_options = max_pool_2d_options.MaxPool2D() - t_op.builtin_options.filter_h = kernel_size[0] - t_op.builtin_options.filter_w = kernel_size[1] + + x = t_op.tmp_inputs[0] + + t_op.builtin_options = MaxPool2D() + t_op.builtin_options.filter_h, t_op.builtin_options.filter_w = kernel_size common.assign_2d_strides(t_op.builtin_options, stride) + t_op.builtin_options.padding, explicit_padding = ( - aten_translator.convert_padding(padding) + aten_translator.convert_padding(list(padding)) ) - if explicit_padding is not None: # Need to prepend a 'Pad' operator, which adds min values for type. constant_value = self._get_pad_constant_value(x.type) - pre_pad_op = self.builder.create_pad_operator_before( + pad_op = self.builder.create_pad_operator_before( t_op, 0, explicit_padding, constant_value=constant_value ) - ops.add_pre(pre_pad_op) - - return ops.flatten() - - # Maxpool2d Node format: (Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) - def convert(self, node: Node): - self.assert_convertible(node) - - n_args = len(node.args) + ops.add_pre(pad_op) - kernel_size = node.args[1] - stride = node.args[2] - padding = node.args[3] if n_args >= 4 else [0, 0] - - t_op = self._create_tflite_op_with_io_tensors(node) - ops_to_add = self._convert_2d_max_pool(kernel_size, stride, padding, t_op) - self.builder.append_operators(ops_to_add) + self.builder.append_operators(ops.flatten()) diff --git a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py index 569ad571dbc..79786169510 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py @@ -1,4 +1,4 @@ -# Copyright 2024 NXP +# Copyright 2024,2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -6,27 +6,41 @@ import numpy as np import pytest import torch + from executorch.backends.nxp.backend.edge_program_converter import ( EdgeProgramToIRConverter, ) - -from executorch.backends.nxp.backend.ir.conversion_config import ConversionConfig -from executorch.backends.nxp.neutron_pass_manager import NeutronPassManager -from executorch.backends.nxp.tests.executorch_pipeline import ( - to_edge_program, - to_quantized_edge_program, -) +from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program from executorch.backends.nxp.tests.executors import ( convert_run_compare, - ToNCHWPreprocess, - ToNHWCPreprocess, + graph_contains_any_of_ops, + ToChannelFirstPreprocess, + ToChannelLastPreprocess, ) -from executorch.backends.nxp.tests.models import MaxPool2dConvModule, MaxPool2dModule -from executorch.backends.xnnpack._passes import RemoveGetItemPass -from executorch.exir.verification.verifier import EXIREdgeDialectVerifier -from torch.export import ExportedProgram from executorch.backends.nxp.tests.use_qat import * # noqa F403 +# noinspection PyProtectedMember +from executorch.exir.dialects._ops import ops as exir_ops + +ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate +MaxPool2D = exir_ops.edge.aten.max_pool2d.default + + +class MaxPool2dModule(torch.nn.Module): + def __init__(self, kernel_size=3, **kwargs): + super().__init__() + self.max_pool2d = torch.nn.MaxPool2d(kernel_size, **kwargs) + + def forward(self, x): + return self.max_pool2d(x) + + +def _generate_test_data(input_shape: tuple) -> np.ndarray: + """Generate random int8 test data for given shape.""" + return (np.random.random(input_shape).astype(np.float32) * 256.0 - 128.0).astype( + np.int8 + ) + @pytest.fixture(autouse=True) def reseed_model_per_test_run(): @@ -34,99 +48,132 @@ def reseed_model_per_test_run(): np.random.seed(23) -@pytest.mark.parametrize( - "input_shape, padding", - [ - pytest.param((1, 4, 8, 8), (0, 0), id="No padding."), - pytest.param( - (1, 4, 8, 8), - (1, 1), - id="Padding, keep the same output tensor size as input.", - ), - pytest.param( - (1, 4, 8, 8), (1, 0), id="Padding, change the output tensor size." - ), - pytest.param( - (1, 4, 9, 9), (1, 0), id="Padding, change the output tensor size." - ), - pytest.param( - (1, 4, 9, 9), (0, 1), id="Padding, change the output tensor size." - ), - ], -) -def test_max_pool_2d_conversion(input_shape, padding): - edge_program = to_edge_program( - MaxPool2dModule(padding=padding), input_shape - ).exported_program() - - # We need to create custom model verifier with max_pool2d added as exception. - # Otherwise, we get violation that this op is not part of ATen Core ops. - edge_program._verifiers = [ - EXIREdgeDialectVerifier( - class_only=True, - core_aten_ops_exception_list=[torch.ops.aten.max_pool2d.default], - ) - ] +class TestMaxPool2DSupported: + """Tests for supported MaxPool2D configurations.""" - # Remove MaxPool-related "getitem" nodes from graph - edge_program = NeutronPassManager(edge_program, [RemoveGetItemPass]).transform() + @staticmethod + def _verify_successful_delegation(module, converter_spy, input_shape): + edge_model = to_quantized_edge_program( + module, + input_shape, + use_neutron_for_format_conversion=False, + ).exported_program() - input_data = np.random.random(input_shape).astype(np.float32) + # Make sure the MaxPool was delegated. + assert not graph_contains_any_of_ops(edge_model.graph, [MaxPool2D]) + assert graph_contains_any_of_ops(edge_model.graph, [ExecutorchDelegateCall]) - convert_run_compare( - edge_program, - input_data, - tflite_input_preprocess=ToNHWCPreprocess(), - tflite_output_preprocess=ToNCHWPreprocess(), - conversion_config=ConversionConfig( - {"use_neutron_for_format_conversion": False} - ), - ) + # Verify correct behavior of the converted NeutronIR model. + edge_partition = converter_spy.call_args.args[1] + neutron_ir_partition, _ = converter_spy.spy_return + input_data = _generate_test_data(input_shape) -@pytest.mark.parametrize( - "input_shape, padding", - [ - pytest.param((1, 4, 8, 8), (0, 0), id="No padding."), - pytest.param( - (1, 4, 8, 8), - (1, 1), - id="Padding, keep the same output tensor size as input.", - ), - pytest.param( - (1, 4, 8, 8), (1, 0), id="Padding, change the output tensor size." - ), - pytest.param( - (1, 4, 11, 11), (1, 0), id="Padding, change the output tensor size." - ), - pytest.param( - (1, 4, 11, 11), (0, 1), id="Padding, change the output tensor size." - ), - ], -) -def test_max_pool_2d_quant_conversion(mocker, input_shape, padding, use_qat): - converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") - - # Run conversion - _ = to_quantized_edge_program( - MaxPool2dConvModule(padding=padding), - input_shape, - use_qat=use_qat, - use_neutron_for_format_conversion=False, + # Make sure the tested program contains the `MaxPool`. + assert graph_contains_any_of_ops(edge_partition.graph, [MaxPool2D]) + + convert_run_compare( + edge_partition, + tfl_model=neutron_ir_partition, + input_data=input_data, + tflite_input_preprocess=ToChannelLastPreprocess(), + tflite_output_preprocess=ToChannelFirstPreprocess(), + ) + + @pytest.mark.parametrize( + "padding", + [(0, 0), (1, 1), (0, 1), 0, 1], + ids=lambda padding: f"Padding = {'tuple' if isinstance(padding, tuple) else 'scalar'} `{padding}`", + ) + def test_padding(self, padding, mocker): + input_shape = (1, 8, 5, 6) + stride = 1 # Default value would be equal to kernel size (3), which is not supported by Neutron. + module = MaxPool2dModule(kernel_size=3, stride=stride, padding=padding) + + converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + self._verify_successful_delegation(module, converter_spy, input_shape) + + @pytest.mark.parametrize( + "stride", + [(1, 1), (2, 1), (2, 2), (2, 3), (2, 8), 1, 2], + ids=lambda stride: f"Stride = {'tuple' if isinstance(stride, tuple) else 'scalar'} `{stride}`", ) + def test_stride(self, stride, mocker): + input_shape = (1, 8, 7, 9) + module = MaxPool2dModule(kernel_size=3, stride=stride) - # Capture generated model - tflite_flatbuffers_model, io_formats = converter_spy.spy_return + converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + self._verify_successful_delegation(module, converter_spy, input_shape) - # Capture converted program - exported_program: ExportedProgram = converter_spy.call_args.args[1] - input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8) +class TestMaxPool2DUnsupported: + """Tests for unsupported MaxPool2D configurations.""" - convert_run_compare( - exported_program, - tflite_input_preprocess=ToNHWCPreprocess(), - tfl_model=tflite_flatbuffers_model, - tflite_output_preprocess=ToNCHWPreprocess(), - input_data=input_data, - ) + @staticmethod + def _verify_no_delegation(module, input_shape): + edge_model = to_quantized_edge_program( + module, + input_shape, + use_neutron_for_format_conversion=False, + ).exported_program() + + assert graph_contains_any_of_ops(edge_model.graph, [MaxPool2D]) + assert not graph_contains_any_of_ops(edge_model.graph, [ExecutorchDelegateCall]) + + def test_unsupported_dilation(self): + dilation = 2 # Unsupported. + input_shape = (1, 8, 7, 9) + + module = MaxPool2dModule(kernel_size=3, dilation=dilation) + + # Make sure the MaxPool was NOT delegated. + self._verify_no_delegation(module, input_shape) + + def test_unsupported_stride(self): + stride = 3 # Unsupported. + input_shape = (1, 8, 7, 9) + + module = MaxPool2dModule(kernel_size=3, stride=stride) + + # Make sure the MaxPool was NOT delegated. + self._verify_no_delegation(module, input_shape) + + def test_unsupported_padding(self): + padding = 4 # Unsupported. Bigger than kernel size. + input_shape = (1, 8, 7, 9) + + with pytest.raises( + RuntimeError, match=r"pad should be at most half of effective kernel size" + ): + to_quantized_edge_program( + MaxPool2dModule(kernel_size=3, padding=padding), + input_shape, + use_neutron_for_format_conversion=False, + ).exported_program() + + def test_unsupported_ceil_mode(self): + ceil_mode = True # Unsupported. + input_shape = (1, 8, 7, 9) + + module = MaxPool2dModule(kernel_size=3, ceil_mode=ceil_mode) + + # Make sure the MaxPool was NOT delegated. + self._verify_no_delegation(module, input_shape) + + def test_unsupported_batch_size(self): + batch_size = 2 # Unsupported. + input_shape = (batch_size, 8, 7, 9) + + module = MaxPool2dModule(kernel_size=3) + + # Make sure the MaxPool was NOT delegated. + self._verify_no_delegation(module, input_shape) + + def test_unsupported_channels(self): + channels = 3 # Unsupported. Must be a multiple of `num_macs` (`8`). + input_shape = (1, channels, 7, 9) + + module = MaxPool2dModule(kernel_size=3) + + # Make sure the MaxPool was NOT delegated. + self._verify_no_delegation(module, input_shape) diff --git a/docs/source/backends/nxp/op-support.csv b/docs/source/backends/nxp/op-support.csv index de1f83783a3..2025f6572db 100644 --- a/docs/source/backends/nxp/op-support.csv +++ b/docs/source/backends/nxp/op-support.csv @@ -13,8 +13,8 @@ aten.convolution.default,int8,static int8,"1D or 2D convolution, constant weight aten.div.Tensor,int8,static int8,"divisor - static tensor or scalar value, one dimension must satisfy %8 = 0 or scalar division (all dims = 1)" aten.hardtanh.default,int8,static int8,"supported ranges: <0,6>, <-1, 1>, <0,1>, <0,inf>" aten.leaky_relu.default,int8,static int8, -aten.max_pool2d.default,int8,static int8,"dilation=1, ceil_mode=False" -aten.max_pool2d_with_indices.default,int8,static int8,"dilation=1, ceil_mode=False" +aten.max_pool2d.default,int8,static int8,"dilation=1, ceil_mode=False, channels%8=0, batch_size=1, stride_h=1 or 2" +aten.max_pool2d_with_indices.default,int8,static int8,"dilation=1, ceil_mode=False, channels%8=0, batch_size=1, stride_h=1 or 2" aten.mean.dim,int8,static int8,"4D tensor only, dims = [-1,-2] or [-2,-1]" aten.mul.Tensor, int8, static int8, "tensor-size % 8 = 0" aten.mm.default,int8,static int8,"2D tensor only" From 274eccaae880eb40f31c86de789d462e29269596 Mon Sep 17 00:00:00 2001 From: Martin Pavella Date: Wed, 25 Feb 2026 15:21:29 +0100 Subject: [PATCH 3/6] NXP backend: Update `RemoveGetitemPass` to an edge dialect pass. This change is in preparation for `aten.max_pool1d.default` support, which gets converted to unsqueeze -> max_pool2d_with_indices -> squeeze during lowering to edge. So the `max_pool2d_with_indices` must be converted to `max_pool2d` in edge dialect. --- backends/nxp/_passes/remove_getitem_pass.py | 105 ------------------ .../edge_passes/neutron_edge_pass_manager.py | 5 + .../remove_max_pool_getitem_pass.py | 79 +++++++++++++ backends/nxp/neutron_partitioner.py | 1 - backends/nxp/neutron_pass_manager.py | 50 --------- backends/nxp/nxp_backend.py | 27 ++--- backends/nxp/tests/executorch_pipeline.py | 7 +- .../test_remove_max_pool_get_item.py | 56 ++++++++++ backends/nxp/tests/test_integration.py | 4 +- .../tests/test_neutron_backend_executor.py | 1 + .../nxp/tests/test_node_format_inference.py | 34 +++--- backends/nxp/tests_models/utils.py | 10 +- examples/nxp/aot_neutron_compile.py | 9 +- 13 files changed, 187 insertions(+), 201 deletions(-) delete mode 100644 backends/nxp/_passes/remove_getitem_pass.py create mode 100644 backends/nxp/edge_passes/remove_max_pool_getitem_pass.py delete mode 100644 backends/nxp/neutron_pass_manager.py create mode 100644 backends/nxp/tests/ir/edge_passes/test_remove_max_pool_get_item.py diff --git a/backends/nxp/_passes/remove_getitem_pass.py b/backends/nxp/_passes/remove_getitem_pass.py deleted file mode 100644 index 6e5f2535746..00000000000 --- a/backends/nxp/_passes/remove_getitem_pass.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2025-2026 NXP -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch - -from executorch.backends.nxp.backend.data_format import DataFormat, NXP_NODE_FORMAT -from executorch.exir.dialects._ops import ops as exir_ops -from executorch.exir.pass_base import ExportPass, PassResult - - -class RemoveGetItemPass(ExportPass): - """ - This remove item is used to remove getitem operator for max_pool2d_with_indices.default operator, and replace it with a single operator, - that extracts the first output. More specifically, we are only getting the first output from aten::maxpool2d operator. - Before Pass: - MaxPool2d ---> GetItem[max_values, max_indexes] - After Pass: - MaxPool2d -> max_values - """ - - def call(self, graph_module: torch.fx.GraphModule): - module = graph_module - for node in module.graph.nodes: - if node.op == "call_function": - if ( - node.target.__name__ == "aten.max_pool2d_with_indices.default" - or node.target.__name__ == "aten.max.dim" - ): - users = list(node.users.keys()) - - if len(users) != 1: - if len(users) == 2 and node.target.__name__ == "aten.max.dim": - # Two users is allowed for max.dim. For that case, - # rather than removing the getitem node in this - # pass, we handle the getitem nodes in the op's - # visitor when serializing - continue - else: - raise AssertionError( - f"Invalid number of users for {node.target.__name__}: {len(users)}" - ) - - getitem_node = list(node.users.keys())[0] - - if getitem_node.target.__name__ != "getitem": - raise AssertionError( - f"Expected max node's user to be getitem, got {getitem_node.target.__name__}" - ) - - getitem_index = getitem_node.args[1] - - with module.graph.inserting_before(node): - if ( - node.target.__name__ - == "aten.max_pool2d_with_indices.default" - ): - if getitem_index != 0: - raise AssertionError( - f"Expected second argument of getitem node for {node.target.__name__} to be 0, got " - f"{getitem_index}. XNNPACK delegate currently only supports getting just the max " - "values from the op but not getting the corresponding indices." - ) - new_max_wd = module.graph.create_node( - "call_function", - exir_ops.edge.aten.max_pool2d.default, - args=node.args, - kwargs=node.kwargs, - ) - - else: - if getitem_index != 0: - raise AssertionError( - f"Expected second argument of getitem node for {node.target.__name__} to be 0, got " - f"{getitem_index}. XNNPACK delegate currently only supports getting just the max " - "values or getting both the max values and their corresponding indices from the " - "op, but not getting the indices alone." - ) - new_max_wd = module.graph.create_node( - "call_function", - exir_ops.edge.aten.amax.default, - args=node.args, - kwargs=node.kwargs, - ) - - # MODIFIED PART START - # Make sure to preserve the inferred node format. - new_max_wd.meta[NXP_NODE_FORMAT] = node.meta.get( - NXP_NODE_FORMAT, DataFormat.NONE - ) - # MODIFIED PART END - - getitem_node.replace_all_uses_with(new_max_wd) - - module.graph.erase_node(getitem_node) - module.graph.erase_node(node) - - graph_module.recompile() - # Propagate metadata and retrace module - graph_module = super().call(graph_module).graph_module - - return PassResult(graph_module, True) diff --git a/backends/nxp/edge_passes/neutron_edge_pass_manager.py b/backends/nxp/edge_passes/neutron_edge_pass_manager.py index 2252ff05a21..383305a8573 100644 --- a/backends/nxp/edge_passes/neutron_edge_pass_manager.py +++ b/backends/nxp/edge_passes/neutron_edge_pass_manager.py @@ -14,6 +14,10 @@ from executorch.backends.nxp.edge_passes.remove_as_strided_copy_nodes import ( RemoveUselessAsStridedCopyNodes, ) + +from executorch.backends.nxp.edge_passes.remove_max_pool_getitem_pass import ( + RemoveMaxPoolGetItemPass, +) from torch.fx.passes.infra.pass_manager import PassManager @@ -25,6 +29,7 @@ def __init__(self, passes: list[NeutronEdgePass] = None): MoveTrailingAuxiliaryOperatorIntoSeparateQDQClusterPass(), RemoveUselessAsStridedCopyNodes(), ConvertReshapingNodesToViewPass(), + RemoveMaxPoolGetItemPass(), ] super().__init__( diff --git a/backends/nxp/edge_passes/remove_max_pool_getitem_pass.py b/backends/nxp/edge_passes/remove_max_pool_getitem_pass.py new file mode 100644 index 00000000000..7c9642ac44c --- /dev/null +++ b/backends/nxp/edge_passes/remove_max_pool_getitem_pass.py @@ -0,0 +1,79 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# Copyright 2025-2026 NXP +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import operator + +import torch + +from executorch.backends.nxp.edge_passes.neutron_edge_pass import NeutronEdgePass + +# noinspection PyProtectedMember +from executorch.exir.dialects._ops import ops as exir_ops +from executorch.exir.pass_base import PassResult + + +class RemoveMaxPoolGetItemPass(NeutronEdgePass): + """Replace nodes in the following pattern: + + │ + ┌────────────────▼────────────────┐ + │ max_pool2d_with_indices.default │ + └────────────────┬────────────────┘ │ + │ replace with ┌──────────▼─────────┐ + │ ──────────────► │ max_pool2d.default │ + ┌──────▼─────┐ └──────────┬─────────┘ + │ getitem[0] │ (extract max values only) ▼ + └──────┬─────┘ + │ + ▼ + + This transformation is necessary because Neutron does not support returning the indices of the maximum values. + """ + + def run(self, graph_module: torch.fx.GraphModule) -> PassResult: + for node in graph_module.graph.nodes: + if not ( + node.op == "call_function" + and node.target == exir_ops.edge.aten.max_pool2d_with_indices.default + ): + continue + + if len(users := list(node.users)) != 1: + continue # Unexpected case. + + if (getitem_node := users[0]).target != operator.getitem: + continue # Unexpected case. + + if getitem_node.args[1] != 0: + # The index of the output tensor. Only `0` is supported as index `1` holds the indices from which the + # max values were selected, which cannot be done on Neutron. + continue + + with graph_module.graph.inserting_before(node): + new_max_pool_2d = graph_module.graph.create_node( + "call_function", + exir_ops.edge.aten.max_pool2d.default, + args=node.args, + kwargs=node.kwargs, + ) + + # Attach the rest of the model to the `aten.max_pool2d.default`. + getitem_node.replace_all_uses_with(new_max_pool_2d) + + # Remove the old nodes. + graph_module.graph.erase_node(getitem_node) + graph_module.graph.erase_node(node) + + # Recompile the graph. + graph_module.graph.eliminate_dead_code() + graph_module.recompile() + + # Return now to avoid traversing a modified graph. The parent class will call this pass again if needed. + return PassResult(graph_module, True) + + # No changes were made. + return PassResult(graph_module, False) diff --git a/backends/nxp/neutron_partitioner.py b/backends/nxp/neutron_partitioner.py index 004f411dbbb..b4fe1e79472 100644 --- a/backends/nxp/neutron_partitioner.py +++ b/backends/nxp/neutron_partitioner.py @@ -214,7 +214,6 @@ def tag_qdq_clusters(self, nodes: list[torch.fx.Node]): exir_ops.edge.aten.hardtanh.default: HardTanhConverter, # noqa F405 exir_ops.edge.aten.leaky_relu.default: LeakyReluConverter, # noqa F405 exir_ops.edge.aten.max_pool2d.default: MaxPool2dConverter, # noqa F405 - exir_ops.edge.aten.max_pool2d_with_indices.default: MaxPool2dConverter, # noqa F405 exir_ops.edge.aten.mean.dim: MeanDimConverter, # noqa F405 exir_ops.edge.aten.mm.default: MMConverter, # noqa F405 exir_ops.edge.aten.mul.Tensor: MulTensorConverter, # noqa F405 diff --git a/backends/nxp/neutron_pass_manager.py b/backends/nxp/neutron_pass_manager.py deleted file mode 100644 index 02bcc0079f6..00000000000 --- a/backends/nxp/neutron_pass_manager.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2025 NXP -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import List, Optional, Type - -from executorch.exir.pass_base import ExportPass -from executorch.exir.program._program import _transform - -from torch._export.pass_base import PassType -from torch.export import ExportedProgram - - -class NeutronPassManager: - def __init__( - self, - exported_program: ExportedProgram, - passes: Optional[List[Type[PassType]]] = None, - ) -> None: - """ - A helper class to run multiple passes on a program - """ - self._exported_program = exported_program - - if not passes: - self.passes = [] - else: - self.passes = passes - - @property - def exported_program(self) -> ExportedProgram: - return self._exported_program - - def transform(self) -> ExportedProgram: - """ - Returns a transformed ExportedProgram - """ - ep = self.exported_program - for pass_ in self.passes: - if issubclass(pass_, ExportPass): - transform_pass = pass_() - else: - raise RuntimeError( - f"Expecting ExportPass or ExportPass(), but got pass: {pass_} with type: {type(pass_)}" - ) - ep = _transform(ep, transform_pass) - return ep diff --git a/backends/nxp/nxp_backend.py b/backends/nxp/nxp_backend.py index f72587d537a..d59d58e42f9 100644 --- a/backends/nxp/nxp_backend.py +++ b/backends/nxp/nxp_backend.py @@ -15,7 +15,6 @@ import numpy as np import torch -from executorch.backends.nxp._passes.remove_getitem_pass import RemoveGetItemPass from executorch.backends.nxp.backend.data_format import DataFormat from executorch.backends.nxp.backend.edge_program_converter import ( EdgeProgramToIRConverter, @@ -29,12 +28,17 @@ extract_artifacts_from_neutron_node, NeutronNodeArtifacts, ) -from executorch.backends.nxp.neutron_pass_manager import NeutronPassManager from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult from executorch.exir.backend.compile_spec_schema import CompileSpec -from executorch.exir.verification.verifier import EXIREdgeDialectVerifier from torch.export.exported_program import ExportedProgram +# Aten dialect operators that are allowed to be in the edge dialect model. These operators are usually created by a +# transform pass or by a prevented operator decomposition during lowering to edge. +core_aten_ops_exception_list = [ + torch.ops.aten.max_pool2d.default, + torch.ops.aten.prelu.default, +] + class NeutronCompileSpecBuilder: config: NeutronTargetSpec @@ -184,23 +188,6 @@ def preprocess( # noqa C901 # Serialize and return the program. if output_format == "tflite": - # We need to create custom model verifier with max_pool2d added as exception. - # Otherwise, we get violation that this op is not part of ATen Core ops. - edge_program._verifiers = [ - EXIREdgeDialectVerifier( - class_only=True, - core_aten_ops_exception_list=[ - torch.ops.aten.max_pool2d.default, - torch.ops.aten.prelu.default, - ], - ) - ] - - # Remove MaxPool-related "getitem" nodes from graph - edge_program = NeutronPassManager( - edge_program, [RemoveGetItemPass] - ).transform() - # Convert the edge program to TFLite. conversion_config = ConversionConfig( {"use_neutron_for_format_conversion": use_neutron_for_format_conversion} diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py index cd216a2d307..9bf90bbd5ba 100644 --- a/backends/nxp/tests/executorch_pipeline.py +++ b/backends/nxp/tests/executorch_pipeline.py @@ -24,7 +24,11 @@ RemoveIOQuantOpsPass, ) from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner -from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec + +from executorch.backends.nxp.nxp_backend import ( + core_aten_ops_exception_list, + generate_neutron_compile_spec, +) from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize from executorch.exir import ( @@ -145,6 +149,7 @@ def to_quantized_edge_program( partitioner=partitioners, compile_config=EdgeCompileConfig( _check_ir_validity=False, + _core_aten_ops_exception_list=core_aten_ops_exception_list, ), ) diff --git a/backends/nxp/tests/ir/edge_passes/test_remove_max_pool_get_item.py b/backends/nxp/tests/ir/edge_passes/test_remove_max_pool_get_item.py new file mode 100644 index 00000000000..e864f9eb2d7 --- /dev/null +++ b/backends/nxp/tests/ir/edge_passes/test_remove_max_pool_get_item.py @@ -0,0 +1,56 @@ +# Copyright 2026 NXP +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import operator + +import torch + +from executorch.backends.nxp.edge_passes.remove_max_pool_getitem_pass import ( + RemoveMaxPoolGetItemPass, +) +from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program +from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops + +# noinspection PyProtectedMember +from executorch.exir.dialects._ops import ops as exir_ops + + +class MaxPool2dModule(torch.nn.Module): + def __init__(self, kernel_size=3, **kwargs): + super().__init__() + self.max_pool2d = torch.nn.MaxPool2d(kernel_size, **kwargs) + + def forward(self, x): + return self.max_pool2d(x) + + +def test_remove_max_pool_get_item_pass(mocker): + model = MaxPool2dModule() + input_shape = (1, 3, 12, 12) + + # Spy on the pass. + spy = mocker.spy(RemoveMaxPoolGetItemPass, "run") + + edge_program = to_quantized_edge_program( + model, + input_shape, + ).exported_program() + + # We cannot extract the graph before the pass, because it is modified inplace. So accessing the 2nd argument of the + # first call of the pass (which is the graph) returns the graph which is already modified by the pass. + # But at least we can access the return value to determine if the pass made a modification. + assert spy.spy_return_list[0].modified, "The pass did not modify the graph." + + # Make sure the `aten.max_pool2d_with_indices.default` and `getitem` were replaced by `aten.max_pool2d.default`. + assert not graph_contains_any_of_ops( + edge_program.graph, + [exir_ops.edge.aten.max_pool2d_with_indices.default, operator.getitem], + ) + assert graph_contains_any_of_ops( + edge_program.graph, + [ + exir_ops.edge.aten.max_pool2d.default, + ], + ) diff --git a/backends/nxp/tests/test_integration.py b/backends/nxp/tests/test_integration.py index fe157b44c48..5b64646004c 100644 --- a/backends/nxp/tests/test_integration.py +++ b/backends/nxp/tests/test_integration.py @@ -1,4 +1,4 @@ -# Copyright 2024-2025 NXP +# Copyright 2024-2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -47,7 +47,7 @@ def test_cifarnet(use_qat): delegation_info = get_delegation_info(exec_prog.exported_program().graph_module) assert delegation_info.num_delegated_subgraphs == 1 assert delegation_info.num_non_delegated_nodes == 11 - assert delegation_info.num_delegated_nodes == 45 + assert delegation_info.num_delegated_nodes == 42 nodes = list(exec_prog.exported_program().graph.nodes) assert nodes[2].name == "quantized_decomposed_quantize_per_tensor_default" diff --git a/backends/nxp/tests/test_neutron_backend_executor.py b/backends/nxp/tests/test_neutron_backend_executor.py index d3a84860614..14bfeebd325 100644 --- a/backends/nxp/tests/test_neutron_backend_executor.py +++ b/backends/nxp/tests/test_neutron_backend_executor.py @@ -108,6 +108,7 @@ def test_conv_fc__lowered_program_and_tflite_output_match(mocker): ) convert_run_compare( exported_program, + tfl_model=tflite_flatbuffers_model, input_data=input_data, tflite_input_preprocess=ToNHWCPreprocess(), ) diff --git a/backends/nxp/tests/test_node_format_inference.py b/backends/nxp/tests/test_node_format_inference.py index 21ad95c6b64..9a6d3063a0b 100644 --- a/backends/nxp/tests/test_node_format_inference.py +++ b/backends/nxp/tests/test_node_format_inference.py @@ -11,14 +11,15 @@ NodeFormatInference, NXP_NODE_FORMAT, ) -from executorch.backends.nxp.neutron_pass_manager import NeutronPassManager +from executorch.backends.nxp.edge_passes.neutron_edge_pass_manager import ( + NeutronEdgePassManager, +) from executorch.backends.nxp.tests.models import ( Conv2dModule, MaxPool2dModule, SoftmaxModule, ) -from executorch.backends.xnnpack._passes import RemoveGetItemPass -from executorch.exir.verification.verifier import EXIREdgeDialectVerifier +from executorch.exir import EdgeCompileConfig def test_convolution(): @@ -61,25 +62,22 @@ def test_softmax(): assert expected_mapping[node.name] == node.meta[NXP_NODE_FORMAT] -def test_maxpool2d(): +def test_max_pool2d(): model = MaxPool2dModule() example_input = (torch.ones(1, 4, 32, 32),) exir_program = torch.export.export(model, example_input) - edge_program = exir.to_edge(exir_program).exported_program() - # We need to create custom model verifier with max_pool2d added as exception. - # Otherwise, we get violation that this op is not part of ATen Core ops. - edge_program._verifiers = [ - EXIREdgeDialectVerifier( - class_only=True, - core_aten_ops_exception_list=[torch.ops.aten.max_pool2d.default], - ) - ] - - # Remove MaxPool-related "getitem" nodes from graph - edge_program = NeutronPassManager(edge_program, [RemoveGetItemPass]).transform() - NodeFormatInference(edge_program).identify_node_formats() + # We need to add the `aten.max_pool2d.default` as an exception, otherwise we would get violation that this op is + # not part of ATen Core ops. + exception_list = [torch.ops.aten.max_pool2d.default] + epm = exir.to_edge( + exir_program, + compile_config=EdgeCompileConfig(_core_aten_ops_exception_list=exception_list), + ) + + epm = epm.transform(NeutronEdgePassManager()) + NodeFormatInference(epm.exported_program()).identify_node_formats() expected_mapping = { "x": DataFormat.CHANNELS_FIRST, @@ -87,5 +85,5 @@ def test_maxpool2d(): "output": DataFormat.CHANNELS_FIRST, } - for node in edge_program.graph.nodes: + for node in epm.exported_program().graph.nodes: assert expected_mapping[node.name] == node.meta[NXP_NODE_FORMAT] diff --git a/backends/nxp/tests_models/utils.py b/backends/nxp/tests_models/utils.py index dcfc7d5b18b..5c179f5ab76 100644 --- a/backends/nxp/tests_models/utils.py +++ b/backends/nxp/tests_models/utils.py @@ -18,7 +18,11 @@ NeutronEdgePassManager, ) from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner -from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec + +from executorch.backends.nxp.nxp_backend import ( + core_aten_ops_exception_list, + generate_neutron_compile_spec, +) from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer from executorch.backends.nxp.tests_models.model_input_spec import ModelInputSpec from executorch.devtools.visualization.visualization_utils import ( @@ -146,7 +150,9 @@ def to_quantized_edge_program( core_aten_ep, transform_passes=NeutronEdgePassManager(), partitioner=partitioners, - compile_config=EdgeCompileConfig(), + compile_config=EdgeCompileConfig( + _core_aten_ops_exception_list=core_aten_ops_exception_list + ), ) return edge_program_manager diff --git a/examples/nxp/aot_neutron_compile.py b/examples/nxp/aot_neutron_compile.py index c8d3e376c91..2439aeb3f82 100644 --- a/examples/nxp/aot_neutron_compile.py +++ b/examples/nxp/aot_neutron_compile.py @@ -25,7 +25,10 @@ RemoveIOQuantOpsPass, ) from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner -from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec +from executorch.backends.nxp.nxp_backend import ( + core_aten_ops_exception_list, + generate_neutron_compile_spec, +) from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize from executorch.devtools.visualization.visualization_utils import ( @@ -327,7 +330,9 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool): export(module, example_inputs, strict=True), transform_passes=NeutronEdgePassManager(), partitioner=partitioners, - compile_config=EdgeCompileConfig(), + compile_config=EdgeCompileConfig( + _core_aten_ops_exception_list=core_aten_ops_exception_list, + ), ) if args.remove_quant_io_ops: From 4f41cc01735055967d96902f04de2896fc4d8c99 Mon Sep 17 00:00:00 2001 From: Martin Pavella Date: Wed, 25 Feb 2026 15:24:52 +0100 Subject: [PATCH 4/6] NXP backend: Add support for `aten.max_pool1d.default`. --- ...operator_into_separate_qdq_cluster_pass.py | 15 +++++ .../remove_max_pool_getitem_pass.py | 4 +- backends/nxp/quantizer/neutron_quantizer.py | 6 +- backends/nxp/quantizer/patterns.py | 13 ++-- .../test_max_pool_2d_converter.py | 67 +++++++++++++++++++ docs/source/backends/nxp/op-support.csv | 1 + 6 files changed, 97 insertions(+), 9 deletions(-) diff --git a/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py b/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py index ab6e394c7ef..75ed6c34c0f 100644 --- a/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py +++ b/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py @@ -7,6 +7,8 @@ from executorch.backends.nxp.edge_passes.neutron_edge_pass import NeutronEdgePass from executorch.backends.nxp.neutron_partitioner import QDQClusterRecognizer + +# noinspection PyProtectedMember from executorch.exir.dialects._ops import ops as exir_ops from torch.fx import Node from torch.fx.passes.infra.pass_base import PassResult @@ -14,6 +16,7 @@ # Operator aliases for better readability. AddMM = exir_ops.edge.aten.addmm.default AvgPool2D = exir_ops.edge.aten.avg_pool2d.default +MaxPool2D = exir_ops.edge.aten.max_pool2d.default Conv = exir_ops.edge.aten.convolution.default Clone = exir_ops.edge.aten.clone.default CloneDimOrder = exir_ops.edge.dim_order_ops._clone_dim_order.default @@ -117,6 +120,12 @@ class MoveLeadingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass): ViewCopy, UnsqueezeCopy, ], + # MaxPool1D is represented in edge as Unsqueeze -> MaxPool2D -> Squeeze. The reshaping nodes must be moved out + # of the cluster. Instead of [Un]squeeze, ViewCopy can be used as well. + MaxPool2D: [ + ViewCopy, + UnsqueezeCopy, + ], } def run(self, graph_module: torch.fx.GraphModule) -> PassResult: @@ -221,6 +230,12 @@ class MoveTrailingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass): ViewCopy, SqueezeCopy, ], + # MaxPool1D is represented in edge as Unsqueeze -> MaxPool2D -> Squeeze. The reshaping nodes must be moved out + # of the cluster. Instead of [Un]squeeze, ViewCopy can be used as well. + MaxPool2D: [ + ViewCopy, + SqueezeCopy, + ], } def run(self, graph_module: torch.fx.GraphModule) -> PassResult: diff --git a/backends/nxp/edge_passes/remove_max_pool_getitem_pass.py b/backends/nxp/edge_passes/remove_max_pool_getitem_pass.py index 7c9642ac44c..315d5248f5c 100644 --- a/backends/nxp/edge_passes/remove_max_pool_getitem_pass.py +++ b/backends/nxp/edge_passes/remove_max_pool_getitem_pass.py @@ -1,6 +1,4 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2025-2026 NXP -# All rights reserved. +# Copyright 2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py index 23c9bd782f2..485e96d53c6 100644 --- a/backends/nxp/quantizer/neutron_quantizer.py +++ b/backends/nxp/quantizer/neutron_quantizer.py @@ -32,7 +32,8 @@ LeakyReluInPlacePattern, LeakyReluPattern, LinearPattern, - MaxPoolPattern, + MaxPool1DPattern, + MaxPool2DPattern, MeanDimPattern, MmPattern, MulTensorPattern, @@ -273,7 +274,8 @@ def __init__(self, neutron_target_spec: NeutronTargetSpec, is_qat: bool = False) OpQuantizer(LeakyReluPattern(is_qat=is_qat), static_fc_qconfig), OpQuantizer(LeakyReluInPlacePattern(is_qat=is_qat), static_fc_qconfig), OpQuantizer(LinearPattern(self, is_qat=is_qat), static_fc_qconfig), - OpQuantizer(MaxPoolPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(MaxPool1DPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(MaxPool2DPattern(is_qat=is_qat), static_qconfig), OpQuantizer(MeanDimPattern(is_qat=is_qat), static_qconfig), OpQuantizer(MmPattern(self, is_qat=is_qat), static_qconfig), OpQuantizer(MulTensorPattern(is_qat=is_qat), static_qconfig), diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py index 2b949497272..ccbcb297bc6 100644 --- a/backends/nxp/quantizer/patterns.py +++ b/backends/nxp/quantizer/patterns.py @@ -738,10 +738,15 @@ def get_anchors( ) -class MaxPoolPattern(SharedSpecPattern): - """ - Quantizer for MaxPool2D operator. - """ +class MaxPool1DPattern(SharedSpecPattern): + """Quantizer for the MaxPool1D operator.""" + + def partition_types(self): + return [torch.ops.aten.max_pool1d.default] + + +class MaxPool2DPattern(SharedSpecPattern): + """Quantizer for the MaxPool2D operator.""" def partition_types(self): return [torch.ops.aten.max_pool2d.default] diff --git a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py index 79786169510..5567d5567bb 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py @@ -24,6 +24,23 @@ ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate MaxPool2D = exir_ops.edge.aten.max_pool2d.default +Squeeze = exir_ops.edge.aten.squeeze.default +SqueezeDim = exir_ops.edge.aten.squeeze.dim +SqueezeDims = exir_ops.edge.aten.squeeze.dims +Unsqueeze = exir_ops.edge.aten.unsqueeze.default +ViewCopy = exir_ops.edge.aten.view_copy.default + + +class MaxPool1DModule(torch.nn.Module): + def __init__(self): + super().__init__() + + self.max_pool = torch.nn.MaxPool1d( + kernel_size=3, + ) + + def forward(self, x): + return self.max_pool(x) class MaxPool2dModule(torch.nn.Module): @@ -177,3 +194,53 @@ def test_unsupported_channels(self): # Make sure the MaxPool was NOT delegated. self._verify_no_delegation(module, input_shape) + + +class TestMaxPool1D: + """There is no `max_pool1d` in the edge dialect. During lowering to edge, ExecuTorch extends the shape to 4D (with + a `1`), then applies `max_pool2d`, and then removes the `1` from the shape to make it 3D again. So the aten + `max_pool1d` is handled by the `max_pool2d` support. This test verifies that the lowering process works correctly. + """ + + def test_max_pool_2d__from_1d(self, mocker): + model = MaxPool1DModule() + input_shape = (1, 8, 12) + extended_shape = (1, 8, 1, 12) + + converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + edge_model = to_quantized_edge_program( + model, input_shape, use_neutron_for_format_conversion=False + ).exported_program() + + # Make sure the `max_pool` was delegated. + assert graph_contains_any_of_ops(edge_model.graph, [ExecutorchDelegateCall]) + assert not graph_contains_any_of_ops(edge_model.graph, [MaxPool2D]) + # There is not `max_pool1d` in the edge dialect, so we cannot check for its absence by comparing with the target. + # In order to detect any potential future changes (like the addition of `max_pool1d` to edge dialect), we check + # the name of the target. + assert not any( + n for n in edge_model.graph.nodes if "1d" in str(n.target) + ) # Check for anything 1D. + + # Make sure both `view_copy` nodes were added, and there is no `squeeze` or `unsqueeze`. + assert len([n for n in edge_model.graph.nodes if n.target == ViewCopy]) == 2 + assert not graph_contains_any_of_ops( + edge_model.graph, [Unsqueeze, Squeeze, SqueezeDim, SqueezeDims] + ) + + # Verify correct behavior of the converted NeutronIR model. + edge_partition = converter_spy.call_args.args[1] + neutron_ir_partition, _ = converter_spy.spy_return + + input_data = _generate_test_data(extended_shape) + + # Make sure the tested program contains the `MaxPool`. + assert graph_contains_any_of_ops(edge_partition.graph, [MaxPool2D]) + + convert_run_compare( + edge_partition, + tfl_model=neutron_ir_partition, + input_data=input_data, + tflite_input_preprocess=ToChannelLastPreprocess(), + tflite_output_preprocess=ToChannelFirstPreprocess(), + ) diff --git a/docs/source/backends/nxp/op-support.csv b/docs/source/backends/nxp/op-support.csv index 2025f6572db..fa9a8a43205 100644 --- a/docs/source/backends/nxp/op-support.csv +++ b/docs/source/backends/nxp/op-support.csv @@ -13,6 +13,7 @@ aten.convolution.default,int8,static int8,"1D or 2D convolution, constant weight aten.div.Tensor,int8,static int8,"divisor - static tensor or scalar value, one dimension must satisfy %8 = 0 or scalar division (all dims = 1)" aten.hardtanh.default,int8,static int8,"supported ranges: <0,6>, <-1, 1>, <0,1>, <0,inf>" aten.leaky_relu.default,int8,static int8, +aten.max_pool1d.default,int8,static int8,"dilation=1, ceil_mode=False, channels%8=0, batch_size=1, stride_h=1 or 2" aten.max_pool2d.default,int8,static int8,"dilation=1, ceil_mode=False, channels%8=0, batch_size=1, stride_h=1 or 2" aten.max_pool2d_with_indices.default,int8,static int8,"dilation=1, ceil_mode=False, channels%8=0, batch_size=1, stride_h=1 or 2" aten.mean.dim,int8,static int8,"4D tensor only, dims = [-1,-2] or [-2,-1]" From 596b4d173de7dae634446aa3e49f36c6706eb163 Mon Sep 17 00:00:00 2001 From: Martin Pavella Date: Tue, 3 Mar 2026 13:24:42 +0100 Subject: [PATCH 5/6] NXP backend: Add conversion support for `GetItem`, and for nodes with multiple outputs. --- .../nxp/backend/edge_program_converter.py | 26 +++++++- .../builder/aten_model_builder_director.py | 33 ++++++---- .../backend/ir/converter/node_converter.py | 63 +++++++++++++------ .../ops_converters/__init__.py | 4 ++ .../ops_converters/getitem_converter.py | 45 +++++++++++++ .../nxp/backend/ir/converter/tensor_utils.py | 7 ++- 6 files changed, 144 insertions(+), 34 deletions(-) create mode 100644 backends/nxp/backend/ir/converter/node_converters/ops_converters/getitem_converter.py diff --git a/backends/nxp/backend/edge_program_converter.py b/backends/nxp/backend/edge_program_converter.py index 114f55c64ee..2d58fa193d8 100644 --- a/backends/nxp/backend/edge_program_converter.py +++ b/backends/nxp/backend/edge_program_converter.py @@ -3,6 +3,8 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import operator + import executorch.backends.nxp.backend.ir.logger as logger import flatbuffers from executorch.backends.nxp.backend.ir.conversion_config import ConversionConfig @@ -57,6 +59,9 @@ } +NXP_PROCESSED_TAG = "NXP_PROCESSED_TAG" + + class EdgeProgramToIRConverter: """ Converter from convertion of ExportedProgram in Edge dialect to IR (TFLite Flatbuffers). @@ -159,6 +164,11 @@ def _process_nodes(self, nodes: list[Node], conversion_context: ConversionContex if node.target in qdq_related_functions and "cluster" in node.meta: # Skip (De)Quantize nodes that were already processed pass + elif node.target == operator.getitem and node.meta.get( + NXP_PROCESSED_TAG, False + ): + # The node was already processed alongside the Q/DQ ops. + pass elif node.target in functions_converters: functions_converters[node.target](conversion_context).convert(node) else: @@ -264,9 +274,8 @@ def build_conversion_context( def _convert_qdq_cluster_q_dq_nodes( self, nodes: list[Node], conversion_context: ConversionContext ): - """ - Go through program and convert De(Quantize) nodes that are part of the QDQ cluster into - tensors. + """Go through the program and convert [De]Quantize nodes that are part of a QDQ cluster into tensors. + Also convert related `GetItem` nodes to NO-OPs, which just propagate the quantization. :param nodes: Program's nodes. :param conversion_context: ConversionContext instance. @@ -285,3 +294,14 @@ def _convert_qdq_cluster_q_dq_nodes( and part_of_qdq_cluster ): qdq_q_ops_converters[node.target](conversion_context).convert(node) + + # Usually, `getitem` nodes are a part of a "foreign" QDQ cluster. They consume the output of the main compute + # operator, and they are followed by a `Quantize` operator, which specifies the output quantization parameters + # of the cluster. So the input of the `GetItem` is float32, and the output is quantized. Due to how the Neutron + # IR represents quantization, the quantization parameters must be propagated from the output to the input. + for node in nodes: + if node.target == operator.getitem: + # Convert the builtin function into a "NO-OP" in the IR, and propagate the quantization parameters in + # reverse. + GetItemConverter(conversion_context).convert(node) # noqa: F405 + node.meta[NXP_PROCESSED_TAG] = True diff --git a/backends/nxp/backend/ir/converter/builder/aten_model_builder_director.py b/backends/nxp/backend/ir/converter/builder/aten_model_builder_director.py index d4c4d96a5c6..4e7f706afbd 100644 --- a/backends/nxp/backend/ir/converter/builder/aten_model_builder_director.py +++ b/backends/nxp/backend/ir/converter/builder/aten_model_builder_director.py @@ -8,6 +8,10 @@ ModelBuilder, ) from executorch.backends.nxp.backend.ir.converter.conversion import translator + +from executorch.backends.nxp.backend.ir.converter.tensor_utils import ( + get_name_of_node_output, +) from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model from torch.fx import Node from torch.nn import Parameter @@ -30,19 +34,26 @@ def append_as_fake_tensor(self, node: Node, node_format: DataFormat): if self.tensor_exists(node.name): return - tensor = node.meta["val"] - if isinstance(tensor, tuple): - tensor = tensor[0] # Fake tensor - _type = translator.convert_data_type(tensor.dtype) - shape = list(tensor.shape) + def _append_tensor(tensor_, name=None): + type_ = translator.convert_data_type(tensor_.dtype) + shape = list(tensor_.shape) - if node_format.is_channels_first(): - shape = translator.dims_to_channels_last(shape) + if node_format.is_channels_first(): + shape = translator.dims_to_channels_last(shape) - tensor = self.create_empty_tensor(node.name, _type, shape) - tensor.tensor_format = DataFormat.convert_executorch_format_to_neutron( - node_format - ) + tensor = self.create_empty_tensor(name or node.name, type_, shape) + tensor.tensor_format = DataFormat.convert_executorch_format_to_neutron( + node_format + ) + + tensor_or_tuple = node.meta["val"] + if isinstance(tensor_or_tuple, tuple): + # The `node` can produce multiple output tensors, which are represented using this tuple. + for i, t in enumerate(tensor_or_tuple): + _append_tensor(t, get_name_of_node_output(node, i)) + + else: + _append_tensor(tensor_or_tuple) def append_as_static_tensor( self, node: Node, node_format: DataFormat, tensor: Parameter diff --git a/backends/nxp/backend/ir/converter/node_converter.py b/backends/nxp/backend/ir/converter/node_converter.py index 623ba97ba73..aef7d8f2104 100755 --- a/backends/nxp/backend/ir/converter/node_converter.py +++ b/backends/nxp/backend/ir/converter/node_converter.py @@ -3,6 +3,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import operator from abc import ABC, abstractmethod import torch @@ -14,6 +15,10 @@ from executorch.backends.nxp.backend.ir.converter.builder.aten_model_builder_director import ( AtenModelBuilderDirector, ) + +from executorch.backends.nxp.backend.ir.converter.tensor_utils import ( + get_name_of_node_output, +) from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from executorch.exir.dialects._ops import ops as exir_ops @@ -231,25 +236,45 @@ def _create_tflite_op_with_io_tensors(self, node: Node) -> tflite_model.Operator # Initialize node's inputs t_operator.inputs = tflite_model.OperatorInputs() - input_nodes = [] - for arg in node.args: - match arg: - case Node(): - input_nodes.append(arg) - case list() if all(isinstance(node_, Node) for node_ in arg): - input_nodes.extend(arg) - - for ancestor_node in input_nodes: - assert self.context.tflite_builder.tensor_exists(ancestor_node.name) - t_operator.tmp_inputs.append( - self.context.tflite_builder.tensor_for_name(ancestor_node.name) - ) - - # Add node's output as a new tensor - assert self.context.tflite_builder.tensor_exists(node.name) - t_operator.outputs = tflite_model.OperatorOutputs() - t_operator.tmp_outputs.append( - self.context.tflite_builder.tensor_for_name(node.name) + if node.target == operator.getitem: + # Special case of a builtin function, which can extract a specific output tensor from the previous node. + previous_node = node.args[0] + output_index = node.args[1] + input_name = get_name_of_node_output(previous_node, output_index) + assert self.builder.tensor_exists(input_name) + t_operator.tmp_inputs.append(self.builder.tensor_for_name(input_name)) + + else: + # Regular operator. + input_nodes = [] + for arg in node.args: + match arg: + case Node(): + input_nodes.append(arg) + case list() if all(isinstance(node_, Node) for node_ in arg): + input_nodes.extend(arg) + + for ancestor_node in input_nodes: + assert self.context.tflite_builder.tensor_exists(ancestor_node.name) + t_operator.tmp_inputs.append( + self.context.tflite_builder.tensor_for_name(ancestor_node.name) + ) + + # Add node's outputs as a new tensors + num_outputs = ( + len(node.meta["val"]) if isinstance(node.meta["val"], tuple) else 1 ) + if num_outputs == 1: + # Single output node. + assert self.builder.tensor_exists(node.name) + t_operator.outputs = tflite_model.OperatorOutputs() + t_operator.tmp_outputs.append(self.builder.tensor_for_name(node.name)) + else: + # The node has multiple outputs. + t_operator.outputs = tflite_model.OperatorOutputs() + for output_index in range(num_outputs): + tensor_name = get_name_of_node_output(node, output_index) + assert self.builder.tensor_exists(tensor_name) + t_operator.tmp_outputs.append(self.builder.tensor_for_name(tensor_name)) return t_operator diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py index 7463bef1bfa..ba4c9149702 100755 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py @@ -28,6 +28,9 @@ from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.convolution_converter import ( ConvolutionConverter, ) +from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.getitem_converter import ( + GetItemConverter, +) from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.hardtanh_converter import ( HardTanhConverter, ) @@ -101,6 +104,7 @@ "CloneConverter", "ConstantPadNDConverter", "ConvolutionConverter", + "GetItemConverter", "HardTanhConverter", "LeakyReluConverter", "MaxPool2dConverter", diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/getitem_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/getitem_converter.py new file mode 100644 index 00000000000..81e9b01b220 --- /dev/null +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/getitem_converter.py @@ -0,0 +1,45 @@ +# Copyright 2025-2026 NXP +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + +from executorch.backends.nxp.backend.custom_delegation_options import ( + CustomDelegationOptions, +) +from executorch.backends.nxp.backend.ir.converter.node_converter import NodeConverter +from executorch.backends.nxp.backend.ir.converter.quantization_utils import ( + propagate_quantization, +) +from torch.fx import Node +from torch.nn import Parameter + + +class GetItemConverter(NodeConverter): + + @staticmethod + def _is_supported_in_IR( + node: Node, + parameters_mapping: dict[str, Parameter], + custom_delegation_options: CustomDelegationOptions, + ) -> bool: + return True + + def convert(self, node: Node): + """Skip the `GetItem` node, as it serves no purpose in NeutronIR.""" + self.assert_convertible(node) + + t_op = self._create_tflite_op_with_io_tensors(node) + + # Usually, `getitem` nodes are a part of a "foreign" QDQ cluster. They consume the output of the main compute + # operator, and they are followed by a `Quantize` operator, which specifies the output quantization parameters + # of the cluster. So the input of the `GetItem` is float32, and the output is quantized. Due to how the Neutron + # IR represents quantization, the quantization parameters must be propagated from the output to the input. + input_ = t_op.tmp_inputs[0] + output = t_op.tmp_outputs[0] + if input_.quantization is None and output.quantization is not None: + input_.type = output.type + propagate_quantization(from_tensor=output, to_tensor=input_) + + self.builder.turn_operator_to_identity(t_op) + self.builder.append_operators([t_op]) diff --git a/backends/nxp/backend/ir/converter/tensor_utils.py b/backends/nxp/backend/ir/converter/tensor_utils.py index efa0bdc2a42..5b377a51fcf 100755 --- a/backends/nxp/backend/ir/converter/tensor_utils.py +++ b/backends/nxp/backend/ir/converter/tensor_utils.py @@ -1,4 +1,4 @@ -# Copyright 2024 NXP +# Copyright 2024-2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -9,6 +9,7 @@ from executorch.backends.nxp.backend.ir.tflite_generator import ( tflite_model as tflite_model, ) +from torch.fx import Node def _buffer_has_data(t_buffer: tflite_model.Buffer) -> Optional[bool]: @@ -48,3 +49,7 @@ def all_tensors_are_static(*list_of_tensors) -> bool: """ return all(tensor_has_data(t) for t in list_of_tensors) + + +def get_name_of_node_output(node: Node, output_index: int) -> str: + return node.name + f"_" From 7f031e21afa0353a64d62ead43d5aff6b38c4e8a Mon Sep 17 00:00:00 2001 From: Martin Pavella Date: Tue, 3 Mar 2026 14:03:57 +0100 Subject: [PATCH 6/6] NXP backend: Remove dependency on the non-edge operator `aten.max_pool2d.default`. --- .../nxp/backend/edge_program_converter.py | 2 +- .../builder/quantization_verification.py | 24 ++++-- .../ops_converters/__init__.py | 6 +- ...y => max_pool2d_with_indices_converter.py} | 41 ++++++---- ...operator_into_separate_qdq_cluster_pass.py | 20 +++-- .../edge_passes/neutron_edge_pass_manager.py | 4 - .../remove_max_pool_getitem_pass.py | 77 ------------------- backends/nxp/neutron_partitioner.py | 2 +- .../test_max_pool_2d_converter.py | 8 +- .../test_remove_max_pool_get_item.py | 56 -------------- 10 files changed, 72 insertions(+), 168 deletions(-) rename backends/nxp/backend/ir/converter/node_converters/ops_converters/{max_pool_2d_converter.py => max_pool2d_with_indices_converter.py} (80%) delete mode 100644 backends/nxp/edge_passes/remove_max_pool_getitem_pass.py delete mode 100644 backends/nxp/tests/ir/edge_passes/test_remove_max_pool_get_item.py diff --git a/backends/nxp/backend/edge_program_converter.py b/backends/nxp/backend/edge_program_converter.py index 2d58fa193d8..37edde42856 100644 --- a/backends/nxp/backend/edge_program_converter.py +++ b/backends/nxp/backend/edge_program_converter.py @@ -40,7 +40,7 @@ exir_ops.edge.aten.convolution.default: ConvolutionConverter, # noqa F405 exir_ops.edge.aten.hardtanh.default: HardTanhConverter, # noqa F405 exir_ops.edge.aten.leaky_relu.default: LeakyReluConverter, # noqa F405 - exir_ops.edge.aten.max_pool2d.default: MaxPool2dConverter, # noqa F405 + exir_ops.edge.aten.max_pool2d_with_indices.default: MaxPool2DWithIndicesConverter, # noqa F405 exir_ops.edge.aten.mean.dim: MeanDimConverter, # noqa F405 exir_ops.edge.aten.mm.default: MMConverter, # noqa F405 exir_ops.edge.aten.mul.Tensor: MulTensorConverter, # noqa F405 diff --git a/backends/nxp/backend/ir/converter/builder/quantization_verification.py b/backends/nxp/backend/ir/converter/builder/quantization_verification.py index 25989123385..648e595c2a6 100755 --- a/backends/nxp/backend/ir/converter/builder/quantization_verification.py +++ b/backends/nxp/backend/ir/converter/builder/quantization_verification.py @@ -86,13 +86,23 @@ def valid(self, op: tflite_model.Operator) -> bool: first_quantization = shared_tensors[0].quantization # Check quantization values (scales & zero-points) - scales_same = all( - first_quantization.scale == t.quantization.scale for t in shared_tensors[1:] - ) - zp_same = all( - first_quantization.zero_point == t.quantization.zero_point - for t in shared_tensors[1:] - ) + try: + scales_same = all( + first_quantization.scale == t.quantization.scale + for t in shared_tensors[1:] + ) + zp_same = all( + first_quantization.zero_point == t.quantization.zero_point + for t in shared_tensors[1:] + ) + except AttributeError: + # Common error when one of the tensors is not quantized. + logger.w( + f"NXP backend: The Neutron IR operator {op.builtin_options} is not quantized correctly. " + "Please report this." + ) + return False + return scales_same and zp_same def __str__(self): diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py index ba4c9149702..409f6e35973 100755 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py @@ -37,8 +37,8 @@ from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.leaky_relu_converter import ( LeakyReluConverter, ) -from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.max_pool_2d_converter import ( - MaxPool2dConverter, +from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.max_pool2d_with_indices_converter import ( + MaxPool2DWithIndicesConverter, ) from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.mean_dim_converter import ( MeanDimConverter, @@ -107,7 +107,7 @@ "GetItemConverter", "HardTanhConverter", "LeakyReluConverter", - "MaxPool2dConverter", + "MaxPool2DWithIndicesConverter", "MeanDimConverter", "MMConverter", "MulTensorConverter", diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool_2d_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py similarity index 80% rename from backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool_2d_converter.py rename to backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py index e5c1d1f4be2..d8b3cdb3707 100644 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool_2d_converter.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py @@ -3,6 +3,8 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import operator + import numpy as np from executorch.backends.nxp.backend.edge_helper import try_get_arg @@ -30,10 +32,7 @@ CeilMode = bool -class MaxPool2dConverter(NodeConverter): - """Convert 'max_pool2d' operator to TFLite 'MaxPool2D'. - NOTE: max_pool2d_with_indices is a different operator and is unsupported. - """ +class MaxPool2DWithIndicesConverter(NodeConverter): @staticmethod def _is_supported_in_IR( @@ -42,7 +41,7 @@ def _is_supported_in_IR( custom_delegation_options: CustomDelegationOptions, ) -> bool: kernel_size, stride, padding, dilation, ceil_mode = ( - MaxPool2dConverter._get_node_args(node) + MaxPool2DWithIndicesConverter._get_node_args(node) ) if dilation != (1, 1): @@ -56,6 +55,11 @@ def _is_supported_in_IR( if not NodeConverter._has_shared_q_params_if_quantized(node): return False + # The second output cannot be represented in Neutron IR. If it's used, do not delegate. + getitem_nodes = list(node.users) + if any(n.args[1] == 1 for n in getitem_nodes if n.target == operator.getitem): + return False + return True @staticmethod @@ -66,10 +70,10 @@ def _is_supported_on_target( custom_delegation_options: CustomDelegationOptions, ) -> bool: kernel_size, stride, padding, dilation, ceil_mode = ( - MaxPool2dConverter._get_node_args(node) + MaxPool2DWithIndicesConverter._get_node_args(node) ) - output_shape = node.meta["val"].shape + output_shape = node.meta["val"][0].shape # Shape of the main output (index 0) if output_shape[0] != 1: # /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106 return False @@ -123,9 +127,9 @@ def _get_pad_constant_value(input_type: TensorType) -> np.ndarray: def _get_node_args( node: Node, ) -> tuple[KernelSize, Stride, Padding, Dilation, CeilMode]: - """Extract and return `aten.max_pool2d` arguments from the node. + """Extract and return `aten.max_pool2d_with_indices` arguments from the node. - :param node: The node representing the `aten.max_pool2d` operation. + :param node: The node representing the `aten.max_pool2d_with_indices` operation. :return: Tuple of (kernel_size, stride, padding, dilation, ceil_mode). """ kernel_size = node.args[1] @@ -139,16 +143,23 @@ def _get_node_args( return kernel_size, stride, padding, dilation, ceil_mode def convert(self, node: Node): - """Convert the `aten.max_pool2d.default` operator to Neutron IR `MaxPool2D`. + """Convert the `aten.max_pool2d_with_indices.default` operator to Neutron IR `MaxPool2D`. The schema is: - aten::max_pool2d( + aten::max_pool2d_with_indices( Tensor self, int[2] kernel_size, - int[2] stride=[], # The default value is equal to the kernel_size. + int[2] stride=[], # The default value is equal to the kernel_size. int[2] padding=0, int[2] dilation=1, bool ceil_mode=False - ) -> Tensor + ) -> (Tensor, Tensor) + + It produces 2 output tensors: + 1. The first one contains the maximum values selected by the kernel. + 2. The second one contains the indices of the selected values. + + The second output tensor cannot be represented in Neutron IR. So the operator is only supported when the second + output is unused. """ self.assert_convertible(node) @@ -174,4 +185,8 @@ def convert(self, node: Node): ) ops.add_pre(pad_op) + # The second output of the operator cannot be represented in NeutronIR. The `_is_supported_in_IR()` method + # ensures the second output is never used in the model, so it can be safely removed here. + t_op.tmp_outputs[1:] = [] + self.builder.append_operators(ops.flatten()) diff --git a/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py b/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py index 75ed6c34c0f..0a0f6641f4b 100644 --- a/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py +++ b/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py @@ -3,6 +3,8 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import operator + import torch from executorch.backends.nxp.edge_passes.neutron_edge_pass import NeutronEdgePass @@ -16,10 +18,11 @@ # Operator aliases for better readability. AddMM = exir_ops.edge.aten.addmm.default AvgPool2D = exir_ops.edge.aten.avg_pool2d.default -MaxPool2D = exir_ops.edge.aten.max_pool2d.default +MaxPool2D = exir_ops.edge.aten.max_pool2d_with_indices.default Conv = exir_ops.edge.aten.convolution.default Clone = exir_ops.edge.aten.clone.default CloneDimOrder = exir_ops.edge.dim_order_ops._clone_dim_order.default +Getitem = operator.getitem HardTanh = exir_ops.edge.aten.hardtanh.default MM = exir_ops.edge.aten.mm.default Relu = exir_ops.edge.aten.relu.default @@ -120,7 +123,7 @@ class MoveLeadingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass): ViewCopy, UnsqueezeCopy, ], - # MaxPool1D is represented in edge as Unsqueeze -> MaxPool2D -> Squeeze. The reshaping nodes must be moved out + # MaxPool1D is represented in edge as Unsqueeze -> MaxPool2D -> Getitem -> Squeeze. The reshaping nodes must be moved out # of the cluster. Instead of [Un]squeeze, ViewCopy can be used as well. MaxPool2D: [ ViewCopy, @@ -230,9 +233,9 @@ class MoveTrailingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass): ViewCopy, SqueezeCopy, ], - # MaxPool1D is represented in edge as Unsqueeze -> MaxPool2D -> Squeeze. The reshaping nodes must be moved out + # MaxPool1D is represented in edge as Unsqueeze -> MaxPool2D -> Getitem -> Squeeze. The reshaping nodes must be moved out # of the cluster. Instead of [Un]squeeze, ViewCopy can be used as well. - MaxPool2D: [ + Getitem: [ ViewCopy, SqueezeCopy, ], @@ -268,7 +271,14 @@ def run(self, graph_module: torch.fx.GraphModule) -> PassResult: continue # Make sure the nodes are part of the same QDQ cluster. - cluster = QDQClusterRecognizer().get_qdq_cluster(main_cluster_node) + # In the use case where `main_cluster_node` is mapped to a `getitem`, its parent node must be used to + # satisfy the requirements of the `QDQClusterRecognizer`. + actual_main_cluster_node = ( + main_cluster_node + if main_cluster_node.target != Getitem + else main_cluster_node.args[0] + ) + cluster = QDQClusterRecognizer().get_qdq_cluster(actual_main_cluster_node) if any( node_ not in cluster for node_ in [quantize_node, aux_node, main_cluster_node] diff --git a/backends/nxp/edge_passes/neutron_edge_pass_manager.py b/backends/nxp/edge_passes/neutron_edge_pass_manager.py index 383305a8573..563537c53af 100644 --- a/backends/nxp/edge_passes/neutron_edge_pass_manager.py +++ b/backends/nxp/edge_passes/neutron_edge_pass_manager.py @@ -15,9 +15,6 @@ RemoveUselessAsStridedCopyNodes, ) -from executorch.backends.nxp.edge_passes.remove_max_pool_getitem_pass import ( - RemoveMaxPoolGetItemPass, -) from torch.fx.passes.infra.pass_manager import PassManager @@ -29,7 +26,6 @@ def __init__(self, passes: list[NeutronEdgePass] = None): MoveTrailingAuxiliaryOperatorIntoSeparateQDQClusterPass(), RemoveUselessAsStridedCopyNodes(), ConvertReshapingNodesToViewPass(), - RemoveMaxPoolGetItemPass(), ] super().__init__( diff --git a/backends/nxp/edge_passes/remove_max_pool_getitem_pass.py b/backends/nxp/edge_passes/remove_max_pool_getitem_pass.py deleted file mode 100644 index 315d5248f5c..00000000000 --- a/backends/nxp/edge_passes/remove_max_pool_getitem_pass.py +++ /dev/null @@ -1,77 +0,0 @@ -# Copyright 2026 NXP -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import operator - -import torch - -from executorch.backends.nxp.edge_passes.neutron_edge_pass import NeutronEdgePass - -# noinspection PyProtectedMember -from executorch.exir.dialects._ops import ops as exir_ops -from executorch.exir.pass_base import PassResult - - -class RemoveMaxPoolGetItemPass(NeutronEdgePass): - """Replace nodes in the following pattern: - - │ - ┌────────────────▼────────────────┐ - │ max_pool2d_with_indices.default │ - └────────────────┬────────────────┘ │ - │ replace with ┌──────────▼─────────┐ - │ ──────────────► │ max_pool2d.default │ - ┌──────▼─────┐ └──────────┬─────────┘ - │ getitem[0] │ (extract max values only) ▼ - └──────┬─────┘ - │ - ▼ - - This transformation is necessary because Neutron does not support returning the indices of the maximum values. - """ - - def run(self, graph_module: torch.fx.GraphModule) -> PassResult: - for node in graph_module.graph.nodes: - if not ( - node.op == "call_function" - and node.target == exir_ops.edge.aten.max_pool2d_with_indices.default - ): - continue - - if len(users := list(node.users)) != 1: - continue # Unexpected case. - - if (getitem_node := users[0]).target != operator.getitem: - continue # Unexpected case. - - if getitem_node.args[1] != 0: - # The index of the output tensor. Only `0` is supported as index `1` holds the indices from which the - # max values were selected, which cannot be done on Neutron. - continue - - with graph_module.graph.inserting_before(node): - new_max_pool_2d = graph_module.graph.create_node( - "call_function", - exir_ops.edge.aten.max_pool2d.default, - args=node.args, - kwargs=node.kwargs, - ) - - # Attach the rest of the model to the `aten.max_pool2d.default`. - getitem_node.replace_all_uses_with(new_max_pool_2d) - - # Remove the old nodes. - graph_module.graph.erase_node(getitem_node) - graph_module.graph.erase_node(node) - - # Recompile the graph. - graph_module.graph.eliminate_dead_code() - graph_module.recompile() - - # Return now to avoid traversing a modified graph. The parent class will call this pass again if needed. - return PassResult(graph_module, True) - - # No changes were made. - return PassResult(graph_module, False) diff --git a/backends/nxp/neutron_partitioner.py b/backends/nxp/neutron_partitioner.py index b4fe1e79472..eac03e408f0 100644 --- a/backends/nxp/neutron_partitioner.py +++ b/backends/nxp/neutron_partitioner.py @@ -213,7 +213,7 @@ def tag_qdq_clusters(self, nodes: list[torch.fx.Node]): exir_ops.edge.aten.convolution.default: ConvolutionConverter, # noqa F405 exir_ops.edge.aten.hardtanh.default: HardTanhConverter, # noqa F405 exir_ops.edge.aten.leaky_relu.default: LeakyReluConverter, # noqa F405 - exir_ops.edge.aten.max_pool2d.default: MaxPool2dConverter, # noqa F405 + exir_ops.edge.aten.max_pool2d_with_indices.default: MaxPool2DWithIndicesConverter, # noqa F405 exir_ops.edge.aten.mean.dim: MeanDimConverter, # noqa F405 exir_ops.edge.aten.mm.default: MMConverter, # noqa F405 exir_ops.edge.aten.mul.Tensor: MulTensorConverter, # noqa F405 diff --git a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py index 5567d5567bb..6bb1000b38b 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py @@ -3,6 +3,8 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import operator + import numpy as np import pytest import torch @@ -23,7 +25,8 @@ from executorch.exir.dialects._ops import ops as exir_ops ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate -MaxPool2D = exir_ops.edge.aten.max_pool2d.default +GetItem = operator.getitem +MaxPool2D = exir_ops.edge.aten.max_pool2d_with_indices.default Squeeze = exir_ops.edge.aten.squeeze.default SqueezeDim = exir_ops.edge.aten.squeeze.dim SqueezeDims = exir_ops.edge.aten.squeeze.dims @@ -88,6 +91,7 @@ def _verify_successful_delegation(module, converter_spy, input_shape): # Make sure the tested program contains the `MaxPool`. assert graph_contains_any_of_ops(edge_partition.graph, [MaxPool2D]) + assert graph_contains_any_of_ops(edge_partition.graph, [GetItem]) convert_run_compare( edge_partition, @@ -135,6 +139,7 @@ def _verify_no_delegation(module, input_shape): ).exported_program() assert graph_contains_any_of_ops(edge_model.graph, [MaxPool2D]) + assert graph_contains_any_of_ops(edge_model.graph, [GetItem]) assert not graph_contains_any_of_ops(edge_model.graph, [ExecutorchDelegateCall]) def test_unsupported_dilation(self): @@ -236,6 +241,7 @@ def test_max_pool_2d__from_1d(self, mocker): # Make sure the tested program contains the `MaxPool`. assert graph_contains_any_of_ops(edge_partition.graph, [MaxPool2D]) + assert graph_contains_any_of_ops(edge_partition.graph, [GetItem]) convert_run_compare( edge_partition, diff --git a/backends/nxp/tests/ir/edge_passes/test_remove_max_pool_get_item.py b/backends/nxp/tests/ir/edge_passes/test_remove_max_pool_get_item.py deleted file mode 100644 index e864f9eb2d7..00000000000 --- a/backends/nxp/tests/ir/edge_passes/test_remove_max_pool_get_item.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright 2026 NXP -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import operator - -import torch - -from executorch.backends.nxp.edge_passes.remove_max_pool_getitem_pass import ( - RemoveMaxPoolGetItemPass, -) -from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program -from executorch.backends.nxp.tests.executors import graph_contains_any_of_ops - -# noinspection PyProtectedMember -from executorch.exir.dialects._ops import ops as exir_ops - - -class MaxPool2dModule(torch.nn.Module): - def __init__(self, kernel_size=3, **kwargs): - super().__init__() - self.max_pool2d = torch.nn.MaxPool2d(kernel_size, **kwargs) - - def forward(self, x): - return self.max_pool2d(x) - - -def test_remove_max_pool_get_item_pass(mocker): - model = MaxPool2dModule() - input_shape = (1, 3, 12, 12) - - # Spy on the pass. - spy = mocker.spy(RemoveMaxPoolGetItemPass, "run") - - edge_program = to_quantized_edge_program( - model, - input_shape, - ).exported_program() - - # We cannot extract the graph before the pass, because it is modified inplace. So accessing the 2nd argument of the - # first call of the pass (which is the graph) returns the graph which is already modified by the pass. - # But at least we can access the return value to determine if the pass made a modification. - assert spy.spy_return_list[0].modified, "The pass did not modify the graph." - - # Make sure the `aten.max_pool2d_with_indices.default` and `getitem` were replaced by `aten.max_pool2d.default`. - assert not graph_contains_any_of_ops( - edge_program.graph, - [exir_ops.edge.aten.max_pool2d_with_indices.default, operator.getitem], - ) - assert graph_contains_any_of_ops( - edge_program.graph, - [ - exir_ops.edge.aten.max_pool2d.default, - ], - )