diff --git a/backends/nxp/_passes/remove_getitem_pass.py b/backends/nxp/_passes/remove_getitem_pass.py deleted file mode 100644 index 6e5f2535746..00000000000 --- a/backends/nxp/_passes/remove_getitem_pass.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2025-2026 NXP -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import torch - -from executorch.backends.nxp.backend.data_format import DataFormat, NXP_NODE_FORMAT -from executorch.exir.dialects._ops import ops as exir_ops -from executorch.exir.pass_base import ExportPass, PassResult - - -class RemoveGetItemPass(ExportPass): - """ - This remove item is used to remove getitem operator for max_pool2d_with_indices.default operator, and replace it with a single operator, - that extracts the first output. More specifically, we are only getting the first output from aten::maxpool2d operator. - Before Pass: - MaxPool2d ---> GetItem[max_values, max_indexes] - After Pass: - MaxPool2d -> max_values - """ - - def call(self, graph_module: torch.fx.GraphModule): - module = graph_module - for node in module.graph.nodes: - if node.op == "call_function": - if ( - node.target.__name__ == "aten.max_pool2d_with_indices.default" - or node.target.__name__ == "aten.max.dim" - ): - users = list(node.users.keys()) - - if len(users) != 1: - if len(users) == 2 and node.target.__name__ == "aten.max.dim": - # Two users is allowed for max.dim. For that case, - # rather than removing the getitem node in this - # pass, we handle the getitem nodes in the op's - # visitor when serializing - continue - else: - raise AssertionError( - f"Invalid number of users for {node.target.__name__}: {len(users)}" - ) - - getitem_node = list(node.users.keys())[0] - - if getitem_node.target.__name__ != "getitem": - raise AssertionError( - f"Expected max node's user to be getitem, got {getitem_node.target.__name__}" - ) - - getitem_index = getitem_node.args[1] - - with module.graph.inserting_before(node): - if ( - node.target.__name__ - == "aten.max_pool2d_with_indices.default" - ): - if getitem_index != 0: - raise AssertionError( - f"Expected second argument of getitem node for {node.target.__name__} to be 0, got " - f"{getitem_index}. XNNPACK delegate currently only supports getting just the max " - "values from the op but not getting the corresponding indices." - ) - new_max_wd = module.graph.create_node( - "call_function", - exir_ops.edge.aten.max_pool2d.default, - args=node.args, - kwargs=node.kwargs, - ) - - else: - if getitem_index != 0: - raise AssertionError( - f"Expected second argument of getitem node for {node.target.__name__} to be 0, got " - f"{getitem_index}. XNNPACK delegate currently only supports getting just the max " - "values or getting both the max values and their corresponding indices from the " - "op, but not getting the indices alone." - ) - new_max_wd = module.graph.create_node( - "call_function", - exir_ops.edge.aten.amax.default, - args=node.args, - kwargs=node.kwargs, - ) - - # MODIFIED PART START - # Make sure to preserve the inferred node format. - new_max_wd.meta[NXP_NODE_FORMAT] = node.meta.get( - NXP_NODE_FORMAT, DataFormat.NONE - ) - # MODIFIED PART END - - getitem_node.replace_all_uses_with(new_max_wd) - - module.graph.erase_node(getitem_node) - module.graph.erase_node(node) - - graph_module.recompile() - # Propagate metadata and retrace module - graph_module = super().call(graph_module).graph_module - - return PassResult(graph_module, True) diff --git a/backends/nxp/backend/edge_program_converter.py b/backends/nxp/backend/edge_program_converter.py index 114f55c64ee..37edde42856 100644 --- a/backends/nxp/backend/edge_program_converter.py +++ b/backends/nxp/backend/edge_program_converter.py @@ -3,6 +3,8 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import operator + import executorch.backends.nxp.backend.ir.logger as logger import flatbuffers from executorch.backends.nxp.backend.ir.conversion_config import ConversionConfig @@ -38,7 +40,7 @@ exir_ops.edge.aten.convolution.default: ConvolutionConverter, # noqa F405 exir_ops.edge.aten.hardtanh.default: HardTanhConverter, # noqa F405 exir_ops.edge.aten.leaky_relu.default: LeakyReluConverter, # noqa F405 - exir_ops.edge.aten.max_pool2d.default: MaxPool2dConverter, # noqa F405 + exir_ops.edge.aten.max_pool2d_with_indices.default: MaxPool2DWithIndicesConverter, # noqa F405 exir_ops.edge.aten.mean.dim: MeanDimConverter, # noqa F405 exir_ops.edge.aten.mm.default: MMConverter, # noqa F405 exir_ops.edge.aten.mul.Tensor: MulTensorConverter, # noqa F405 @@ -57,6 +59,9 @@ } +NXP_PROCESSED_TAG = "NXP_PROCESSED_TAG" + + class EdgeProgramToIRConverter: """ Converter from convertion of ExportedProgram in Edge dialect to IR (TFLite Flatbuffers). @@ -159,6 +164,11 @@ def _process_nodes(self, nodes: list[Node], conversion_context: ConversionContex if node.target in qdq_related_functions and "cluster" in node.meta: # Skip (De)Quantize nodes that were already processed pass + elif node.target == operator.getitem and node.meta.get( + NXP_PROCESSED_TAG, False + ): + # The node was already processed alongside the Q/DQ ops. + pass elif node.target in functions_converters: functions_converters[node.target](conversion_context).convert(node) else: @@ -264,9 +274,8 @@ def build_conversion_context( def _convert_qdq_cluster_q_dq_nodes( self, nodes: list[Node], conversion_context: ConversionContext ): - """ - Go through program and convert De(Quantize) nodes that are part of the QDQ cluster into - tensors. + """Go through the program and convert [De]Quantize nodes that are part of a QDQ cluster into tensors. + Also convert related `GetItem` nodes to NO-OPs, which just propagate the quantization. :param nodes: Program's nodes. :param conversion_context: ConversionContext instance. @@ -285,3 +294,14 @@ def _convert_qdq_cluster_q_dq_nodes( and part_of_qdq_cluster ): qdq_q_ops_converters[node.target](conversion_context).convert(node) + + # Usually, `getitem` nodes are a part of a "foreign" QDQ cluster. They consume the output of the main compute + # operator, and they are followed by a `Quantize` operator, which specifies the output quantization parameters + # of the cluster. So the input of the `GetItem` is float32, and the output is quantized. Due to how the Neutron + # IR represents quantization, the quantization parameters must be propagated from the output to the input. + for node in nodes: + if node.target == operator.getitem: + # Convert the builtin function into a "NO-OP" in the IR, and propagate the quantization parameters in + # reverse. + GetItemConverter(conversion_context).convert(node) # noqa: F405 + node.meta[NXP_PROCESSED_TAG] = True diff --git a/backends/nxp/backend/ir/converter/builder/aten_model_builder_director.py b/backends/nxp/backend/ir/converter/builder/aten_model_builder_director.py index d4c4d96a5c6..4e7f706afbd 100644 --- a/backends/nxp/backend/ir/converter/builder/aten_model_builder_director.py +++ b/backends/nxp/backend/ir/converter/builder/aten_model_builder_director.py @@ -8,6 +8,10 @@ ModelBuilder, ) from executorch.backends.nxp.backend.ir.converter.conversion import translator + +from executorch.backends.nxp.backend.ir.converter.tensor_utils import ( + get_name_of_node_output, +) from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model from torch.fx import Node from torch.nn import Parameter @@ -30,19 +34,26 @@ def append_as_fake_tensor(self, node: Node, node_format: DataFormat): if self.tensor_exists(node.name): return - tensor = node.meta["val"] - if isinstance(tensor, tuple): - tensor = tensor[0] # Fake tensor - _type = translator.convert_data_type(tensor.dtype) - shape = list(tensor.shape) + def _append_tensor(tensor_, name=None): + type_ = translator.convert_data_type(tensor_.dtype) + shape = list(tensor_.shape) - if node_format.is_channels_first(): - shape = translator.dims_to_channels_last(shape) + if node_format.is_channels_first(): + shape = translator.dims_to_channels_last(shape) - tensor = self.create_empty_tensor(node.name, _type, shape) - tensor.tensor_format = DataFormat.convert_executorch_format_to_neutron( - node_format - ) + tensor = self.create_empty_tensor(name or node.name, type_, shape) + tensor.tensor_format = DataFormat.convert_executorch_format_to_neutron( + node_format + ) + + tensor_or_tuple = node.meta["val"] + if isinstance(tensor_or_tuple, tuple): + # The `node` can produce multiple output tensors, which are represented using this tuple. + for i, t in enumerate(tensor_or_tuple): + _append_tensor(t, get_name_of_node_output(node, i)) + + else: + _append_tensor(tensor_or_tuple) def append_as_static_tensor( self, node: Node, node_format: DataFormat, tensor: Parameter diff --git a/backends/nxp/backend/ir/converter/builder/quantization_verification.py b/backends/nxp/backend/ir/converter/builder/quantization_verification.py index 25989123385..648e595c2a6 100755 --- a/backends/nxp/backend/ir/converter/builder/quantization_verification.py +++ b/backends/nxp/backend/ir/converter/builder/quantization_verification.py @@ -86,13 +86,23 @@ def valid(self, op: tflite_model.Operator) -> bool: first_quantization = shared_tensors[0].quantization # Check quantization values (scales & zero-points) - scales_same = all( - first_quantization.scale == t.quantization.scale for t in shared_tensors[1:] - ) - zp_same = all( - first_quantization.zero_point == t.quantization.zero_point - for t in shared_tensors[1:] - ) + try: + scales_same = all( + first_quantization.scale == t.quantization.scale + for t in shared_tensors[1:] + ) + zp_same = all( + first_quantization.zero_point == t.quantization.zero_point + for t in shared_tensors[1:] + ) + except AttributeError: + # Common error when one of the tensors is not quantized. + logger.w( + f"NXP backend: The Neutron IR operator {op.builtin_options} is not quantized correctly. " + "Please report this." + ) + return False + return scales_same and zp_same def __str__(self): diff --git a/backends/nxp/backend/ir/converter/conversion/common.py b/backends/nxp/backend/ir/converter/conversion/common.py index 318fe66dfbd..9186f5d0ac6 100755 --- a/backends/nxp/backend/ir/converter/conversion/common.py +++ b/backends/nxp/backend/ir/converter/conversion/common.py @@ -1,6 +1,6 @@ # # Copyright 2023 Martin Pavella -# Copyright 2023-2025 NXP +# Copyright 2023-2026 NXP # # License: MIT # See the LICENSE_MIT for more details. @@ -12,7 +12,7 @@ 'conversion/builtin/' directory. """ -from typing import List, MutableSequence, Optional +from typing import List, MutableSequence, Optional, Sequence import executorch.backends.nxp.backend.ir.logger as logger from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model @@ -71,7 +71,7 @@ def extend_1d_dilation_to_2d(tflite_1d_dilation: MutableSequence): ) -def assign_2d_strides(options: StridedOptions, strides: Optional[List[int]]): +def assign_2d_strides(options: StridedOptions, strides: Sequence[int] | None): """Assign to 'obj' the attributes 'stride_h' and 'stride_w' from 'strides'. If 'strides' is None, assign 1s. diff --git a/backends/nxp/backend/ir/converter/node_converter.py b/backends/nxp/backend/ir/converter/node_converter.py index 623ba97ba73..aef7d8f2104 100755 --- a/backends/nxp/backend/ir/converter/node_converter.py +++ b/backends/nxp/backend/ir/converter/node_converter.py @@ -3,6 +3,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import operator from abc import ABC, abstractmethod import torch @@ -14,6 +15,10 @@ from executorch.backends.nxp.backend.ir.converter.builder.aten_model_builder_director import ( AtenModelBuilderDirector, ) + +from executorch.backends.nxp.backend.ir.converter.tensor_utils import ( + get_name_of_node_output, +) from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec from executorch.exir.dialects._ops import ops as exir_ops @@ -231,25 +236,45 @@ def _create_tflite_op_with_io_tensors(self, node: Node) -> tflite_model.Operator # Initialize node's inputs t_operator.inputs = tflite_model.OperatorInputs() - input_nodes = [] - for arg in node.args: - match arg: - case Node(): - input_nodes.append(arg) - case list() if all(isinstance(node_, Node) for node_ in arg): - input_nodes.extend(arg) - - for ancestor_node in input_nodes: - assert self.context.tflite_builder.tensor_exists(ancestor_node.name) - t_operator.tmp_inputs.append( - self.context.tflite_builder.tensor_for_name(ancestor_node.name) - ) - - # Add node's output as a new tensor - assert self.context.tflite_builder.tensor_exists(node.name) - t_operator.outputs = tflite_model.OperatorOutputs() - t_operator.tmp_outputs.append( - self.context.tflite_builder.tensor_for_name(node.name) + if node.target == operator.getitem: + # Special case of a builtin function, which can extract a specific output tensor from the previous node. + previous_node = node.args[0] + output_index = node.args[1] + input_name = get_name_of_node_output(previous_node, output_index) + assert self.builder.tensor_exists(input_name) + t_operator.tmp_inputs.append(self.builder.tensor_for_name(input_name)) + + else: + # Regular operator. + input_nodes = [] + for arg in node.args: + match arg: + case Node(): + input_nodes.append(arg) + case list() if all(isinstance(node_, Node) for node_ in arg): + input_nodes.extend(arg) + + for ancestor_node in input_nodes: + assert self.context.tflite_builder.tensor_exists(ancestor_node.name) + t_operator.tmp_inputs.append( + self.context.tflite_builder.tensor_for_name(ancestor_node.name) + ) + + # Add node's outputs as a new tensors + num_outputs = ( + len(node.meta["val"]) if isinstance(node.meta["val"], tuple) else 1 ) + if num_outputs == 1: + # Single output node. + assert self.builder.tensor_exists(node.name) + t_operator.outputs = tflite_model.OperatorOutputs() + t_operator.tmp_outputs.append(self.builder.tensor_for_name(node.name)) + else: + # The node has multiple outputs. + t_operator.outputs = tflite_model.OperatorOutputs() + for output_index in range(num_outputs): + tensor_name = get_name_of_node_output(node, output_index) + assert self.builder.tensor_exists(tensor_name) + t_operator.tmp_outputs.append(self.builder.tensor_for_name(tensor_name)) return t_operator diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py index 7463bef1bfa..409f6e35973 100755 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/__init__.py @@ -28,14 +28,17 @@ from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.convolution_converter import ( ConvolutionConverter, ) +from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.getitem_converter import ( + GetItemConverter, +) from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.hardtanh_converter import ( HardTanhConverter, ) from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.leaky_relu_converter import ( LeakyReluConverter, ) -from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.max_pool_2d_converter import ( - MaxPool2dConverter, +from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.max_pool2d_with_indices_converter import ( + MaxPool2DWithIndicesConverter, ) from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.mean_dim_converter import ( MeanDimConverter, @@ -101,9 +104,10 @@ "CloneConverter", "ConstantPadNDConverter", "ConvolutionConverter", + "GetItemConverter", "HardTanhConverter", "LeakyReluConverter", - "MaxPool2dConverter", + "MaxPool2DWithIndicesConverter", "MeanDimConverter", "MMConverter", "MulTensorConverter", diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/getitem_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/getitem_converter.py new file mode 100644 index 00000000000..81e9b01b220 --- /dev/null +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/getitem_converter.py @@ -0,0 +1,45 @@ +# Copyright 2025-2026 NXP +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + + +from executorch.backends.nxp.backend.custom_delegation_options import ( + CustomDelegationOptions, +) +from executorch.backends.nxp.backend.ir.converter.node_converter import NodeConverter +from executorch.backends.nxp.backend.ir.converter.quantization_utils import ( + propagate_quantization, +) +from torch.fx import Node +from torch.nn import Parameter + + +class GetItemConverter(NodeConverter): + + @staticmethod + def _is_supported_in_IR( + node: Node, + parameters_mapping: dict[str, Parameter], + custom_delegation_options: CustomDelegationOptions, + ) -> bool: + return True + + def convert(self, node: Node): + """Skip the `GetItem` node, as it serves no purpose in NeutronIR.""" + self.assert_convertible(node) + + t_op = self._create_tflite_op_with_io_tensors(node) + + # Usually, `getitem` nodes are a part of a "foreign" QDQ cluster. They consume the output of the main compute + # operator, and they are followed by a `Quantize` operator, which specifies the output quantization parameters + # of the cluster. So the input of the `GetItem` is float32, and the output is quantized. Due to how the Neutron + # IR represents quantization, the quantization parameters must be propagated from the output to the input. + input_ = t_op.tmp_inputs[0] + output = t_op.tmp_outputs[0] + if input_.quantization is None and output.quantization is not None: + input_.type = output.type + propagate_quantization(from_tensor=output, to_tensor=input_) + + self.builder.turn_operator_to_identity(t_op) + self.builder.append_operators([t_op]) diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py new file mode 100644 index 00000000000..d8b3cdb3707 --- /dev/null +++ b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool2d_with_indices_converter.py @@ -0,0 +1,192 @@ +# Copyright 2024-2026 NXP +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import operator + +import numpy as np + +from executorch.backends.nxp.backend.edge_helper import try_get_arg +from executorch.backends.nxp.backend.ir.converter.conversion import ( + aten_translator, + common, +) +from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList +from executorch.backends.nxp.backend.ir.converter.node_converter import ( + CustomDelegationOptions, + NodeConverter, +) +from executorch.backends.nxp.backend.ir.lib.tflite.TensorType import TensorType +from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options.max_pool_2d_options import ( + MaxPool2D, +) +from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec +from torch.fx import Node +from torch.nn import Parameter + +KernelSize = tuple[int, int] +Stride = tuple[int, int] +Padding = tuple[int, int] +Dilation = tuple[int, int] +CeilMode = bool + + +class MaxPool2DWithIndicesConverter(NodeConverter): + + @staticmethod + def _is_supported_in_IR( + node: Node, + parameters_mapping: dict[str, Parameter], + custom_delegation_options: CustomDelegationOptions, + ) -> bool: + kernel_size, stride, padding, dilation, ceil_mode = ( + MaxPool2DWithIndicesConverter._get_node_args(node) + ) + + if dilation != (1, 1): + # The Neutron IR MaxPool2D does not support dilation. + return False + + if ceil_mode: + # This argument affects how the output shape is computed. Neutron IR only supports the default `False`. + return False + + if not NodeConverter._has_shared_q_params_if_quantized(node): + return False + + # The second output cannot be represented in Neutron IR. If it's used, do not delegate. + getitem_nodes = list(node.users) + if any(n.args[1] == 1 for n in getitem_nodes if n.target == operator.getitem): + return False + + return True + + @staticmethod + def _is_supported_on_target( + node: Node, + neutron_target_spec: NeutronTargetSpec, + parameters_mapping: dict[str, Parameter], + custom_delegation_options: CustomDelegationOptions, + ) -> bool: + kernel_size, stride, padding, dilation, ceil_mode = ( + MaxPool2DWithIndicesConverter._get_node_args(node) + ) + + output_shape = node.meta["val"][0].shape # Shape of the main output (index 0) + if output_shape[0] != 1: + # /neutron-converter/src/OperatorC/MaxPoolPlugin.cpp?at=NEUTRON_SOFTWARE_2.2.2#106 + return False + + # Neutron only has a restriction on `stride_h`. `stride_w` is not restricted. + stride_h = stride[0] + if stride_h not in (1, 2): + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#901 + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#923 + return False + + channels = output_shape[1] + if channels % neutron_target_spec.get_num_macs() != 0: + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#903 + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#925 + return False + + if any(pad > kernel_dim for pad, kernel_dim in zip(padding, kernel_size)): + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#904-907 + # /neutron-library/src/utils/NeutronLibraryInterrogation.cpp?at=refs%2Ftags%2FNEUTRON_SOFTWARE_2.2.2#926-929 + + # Cannot be tested as PyTorch crashes in this case. It requires the padding to be at most half of the + # effective kernel size, which is an even stricter requirement than what Neutron imposes. + # https://github.com/pytorch/pytorch/blob/449b1768410104d3ed79d3bcfe4ba1d65c7f22c0/torch/_meta_registrations.py#L4483-L4489 + return False + + return True + + @staticmethod + def _get_pad_constant_value(input_type: TensorType) -> np.ndarray: + """Get scalar NumPy array with constant value used as constant value for 'Pad' operator. + + :param input_type: Input tensor type. + :return: Scalar array with single minimum value of given type. + """ + + match input_type: + case TensorType.INT8: + return np.asarray([np.iinfo(np.int8).min], dtype=np.int8) + case TensorType.UINT8: + return np.asarray([np.iinfo(np.uint8).min], dtype=np.uint8) + case TensorType.FLOAT32: + return np.asarray([np.finfo(np.float32).min], dtype=np.float32) + case _: + # Should never happen. + raise RuntimeError( + f"Unexpected input type '{input_type}' for MaxPool operator." + ) + + @staticmethod + def _get_node_args( + node: Node, + ) -> tuple[KernelSize, Stride, Padding, Dilation, CeilMode]: + """Extract and return `aten.max_pool2d_with_indices` arguments from the node. + + :param node: The node representing the `aten.max_pool2d_with_indices` operation. + :return: Tuple of (kernel_size, stride, padding, dilation, ceil_mode). + """ + kernel_size = node.args[1] + stride = node.args[ + 2 + ] # The default value is equal to the kernel_size, so it is never empty here. + padding = try_get_arg(node, 3) or (0, 0) + dilation = try_get_arg(node, 4) or (1, 1) + ceil_mode = try_get_arg(node, 5) or False + + return kernel_size, stride, padding, dilation, ceil_mode + + def convert(self, node: Node): + """Convert the `aten.max_pool2d_with_indices.default` operator to Neutron IR `MaxPool2D`. + The schema is: + aten::max_pool2d_with_indices( + Tensor self, + int[2] kernel_size, + int[2] stride=[], # The default value is equal to the kernel_size. + int[2] padding=0, + int[2] dilation=1, + bool ceil_mode=False + ) -> (Tensor, Tensor) + + It produces 2 output tensors: + 1. The first one contains the maximum values selected by the kernel. + 2. The second one contains the indices of the selected values. + + The second output tensor cannot be represented in Neutron IR. So the operator is only supported when the second + output is unused. + """ + self.assert_convertible(node) + + kernel_size, stride, padding, dilation, ceil_mode = self._get_node_args(node) + + t_op = self._create_tflite_op_with_io_tensors(node) + ops = OpsList(middle_op=t_op) + + x = t_op.tmp_inputs[0] + + t_op.builtin_options = MaxPool2D() + t_op.builtin_options.filter_h, t_op.builtin_options.filter_w = kernel_size + common.assign_2d_strides(t_op.builtin_options, stride) + + t_op.builtin_options.padding, explicit_padding = ( + aten_translator.convert_padding(list(padding)) + ) + if explicit_padding is not None: + # Need to prepend a 'Pad' operator, which adds min values for type. + constant_value = self._get_pad_constant_value(x.type) + pad_op = self.builder.create_pad_operator_before( + t_op, 0, explicit_padding, constant_value=constant_value + ) + ops.add_pre(pad_op) + + # The second output of the operator cannot be represented in NeutronIR. The `_is_supported_in_IR()` method + # ensures the second output is never used in the model, so it can be safely removed here. + t_op.tmp_outputs[1:] = [] + + self.builder.append_operators(ops.flatten()) diff --git a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool_2d_converter.py b/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool_2d_converter.py deleted file mode 100644 index ce9a3697318..00000000000 --- a/backends/nxp/backend/ir/converter/node_converters/ops_converters/max_pool_2d_converter.py +++ /dev/null @@ -1,104 +0,0 @@ -# Copyright 2024-2025 NXP -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. -import numpy as np - -from executorch.backends.nxp.backend.ir.converter.conversion import ( - aten_translator, - common, -) -from executorch.backends.nxp.backend.ir.converter.conversion.common import OpsList -from executorch.backends.nxp.backend.ir.converter.node_converter import ( - CustomDelegationOptions, - NodeConverter, -) -from executorch.backends.nxp.backend.ir.lib.tflite.TensorType import TensorType -from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model -from executorch.backends.nxp.backend.ir.tflite_generator.builtin_options import ( - max_pool_2d_options, -) -from torch.fx import Node -from torch.nn import Parameter - - -class MaxPool2dConverter(NodeConverter): - """Convert 'max_pool2d' operator to TFLite 'MaxPool2D'. - NOTE: max_pool2d_with_indices is a different operator and is unsupported. - """ - - @staticmethod - def _is_supported_in_IR( - node: Node, - parameters_mapping: dict[str, Parameter], - custom_delegation_options: CustomDelegationOptions, - ) -> bool: - n_args = len(node.args) - - dilation = node.args[4] if n_args >= 5 else [1, 1] - ceil_mode = node.args[5] if n_args == 6 else False - - if any(dil != 1 for dil in dilation) or ceil_mode: - return False - - if not NodeConverter._has_shared_q_params_if_quantized(node): - return False - - return True - - def _get_pad_constant_value(self, input_type: TensorType) -> np.ndarray: - """ - Get scalar NumPy array with constant value used as constant value for 'Pad' operator. - - :param input_type: Input tensor type. - :return: Scalar array with single minimum value of given type. - """ - - match input_type: - case TensorType.INT8: - return np.asarray([np.iinfo(np.int8).min], dtype=np.int8) - case TensorType.UINT8: - return np.asarray([np.iinfo(np.uint8).min], dtype=np.uint8) - case TensorType.FLOAT32: - return np.asarray([np.finfo(np.float32).min], dtype=np.float32) - case _: - raise RuntimeError("Unexpected input type for MaxPool operator.") - - # noinspection PyMethodMayBeStatic - def _convert_2d_max_pool( - self, kernel_size, stride, padding, t_op: tflite_model.Operator - ) -> list[tflite_model.Operator]: - x = t_op.tmp_inputs[0] - - ops = OpsList(middle_op=t_op) - t_op.builtin_options = max_pool_2d_options.MaxPool2D() - t_op.builtin_options.filter_h = kernel_size[0] - t_op.builtin_options.filter_w = kernel_size[1] - common.assign_2d_strides(t_op.builtin_options, stride) - t_op.builtin_options.padding, explicit_padding = ( - aten_translator.convert_padding(padding) - ) - - if explicit_padding is not None: - # Need to prepend a 'Pad' operator, which adds min values for type. - constant_value = self._get_pad_constant_value(x.type) - pre_pad_op = self.builder.create_pad_operator_before( - t_op, 0, explicit_padding, constant_value=constant_value - ) - ops.add_pre(pre_pad_op) - - return ops.flatten() - - # Maxpool2d Node format: (Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) - def convert(self, node: Node): - self.assert_convertible(node) - - n_args = len(node.args) - - kernel_size = node.args[1] - stride = node.args[2] - padding = node.args[3] if n_args >= 4 else [0, 0] - - t_op = self._create_tflite_op_with_io_tensors(node) - ops_to_add = self._convert_2d_max_pool(kernel_size, stride, padding, t_op) - self.builder.append_operators(ops_to_add) diff --git a/backends/nxp/backend/ir/converter/tensor_utils.py b/backends/nxp/backend/ir/converter/tensor_utils.py index efa0bdc2a42..5b377a51fcf 100755 --- a/backends/nxp/backend/ir/converter/tensor_utils.py +++ b/backends/nxp/backend/ir/converter/tensor_utils.py @@ -1,4 +1,4 @@ -# Copyright 2024 NXP +# Copyright 2024-2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -9,6 +9,7 @@ from executorch.backends.nxp.backend.ir.tflite_generator import ( tflite_model as tflite_model, ) +from torch.fx import Node def _buffer_has_data(t_buffer: tflite_model.Buffer) -> Optional[bool]: @@ -48,3 +49,7 @@ def all_tensors_are_static(*list_of_tensors) -> bool: """ return all(tensor_has_data(t) for t in list_of_tensors) + + +def get_name_of_node_output(node: Node, output_index: int) -> str: + return node.name + f"_" diff --git a/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py b/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py index ab6e394c7ef..0a0f6641f4b 100644 --- a/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py +++ b/backends/nxp/edge_passes/move_auxiliary_operator_into_separate_qdq_cluster_pass.py @@ -3,10 +3,14 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import operator + import torch from executorch.backends.nxp.edge_passes.neutron_edge_pass import NeutronEdgePass from executorch.backends.nxp.neutron_partitioner import QDQClusterRecognizer + +# noinspection PyProtectedMember from executorch.exir.dialects._ops import ops as exir_ops from torch.fx import Node from torch.fx.passes.infra.pass_base import PassResult @@ -14,9 +18,11 @@ # Operator aliases for better readability. AddMM = exir_ops.edge.aten.addmm.default AvgPool2D = exir_ops.edge.aten.avg_pool2d.default +MaxPool2D = exir_ops.edge.aten.max_pool2d_with_indices.default Conv = exir_ops.edge.aten.convolution.default Clone = exir_ops.edge.aten.clone.default CloneDimOrder = exir_ops.edge.dim_order_ops._clone_dim_order.default +Getitem = operator.getitem HardTanh = exir_ops.edge.aten.hardtanh.default MM = exir_ops.edge.aten.mm.default Relu = exir_ops.edge.aten.relu.default @@ -117,6 +123,12 @@ class MoveLeadingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass): ViewCopy, UnsqueezeCopy, ], + # MaxPool1D is represented in edge as Unsqueeze -> MaxPool2D -> Getitem -> Squeeze. The reshaping nodes must be moved out + # of the cluster. Instead of [Un]squeeze, ViewCopy can be used as well. + MaxPool2D: [ + ViewCopy, + UnsqueezeCopy, + ], } def run(self, graph_module: torch.fx.GraphModule) -> PassResult: @@ -221,6 +233,12 @@ class MoveTrailingAuxiliaryOperatorIntoSeparateQDQClusterPass(NeutronEdgePass): ViewCopy, SqueezeCopy, ], + # MaxPool1D is represented in edge as Unsqueeze -> MaxPool2D -> Getitem -> Squeeze. The reshaping nodes must be moved out + # of the cluster. Instead of [Un]squeeze, ViewCopy can be used as well. + Getitem: [ + ViewCopy, + SqueezeCopy, + ], } def run(self, graph_module: torch.fx.GraphModule) -> PassResult: @@ -253,7 +271,14 @@ def run(self, graph_module: torch.fx.GraphModule) -> PassResult: continue # Make sure the nodes are part of the same QDQ cluster. - cluster = QDQClusterRecognizer().get_qdq_cluster(main_cluster_node) + # In the use case where `main_cluster_node` is mapped to a `getitem`, its parent node must be used to + # satisfy the requirements of the `QDQClusterRecognizer`. + actual_main_cluster_node = ( + main_cluster_node + if main_cluster_node.target != Getitem + else main_cluster_node.args[0] + ) + cluster = QDQClusterRecognizer().get_qdq_cluster(actual_main_cluster_node) if any( node_ not in cluster for node_ in [quantize_node, aux_node, main_cluster_node] diff --git a/backends/nxp/edge_passes/neutron_edge_pass_manager.py b/backends/nxp/edge_passes/neutron_edge_pass_manager.py index 2252ff05a21..563537c53af 100644 --- a/backends/nxp/edge_passes/neutron_edge_pass_manager.py +++ b/backends/nxp/edge_passes/neutron_edge_pass_manager.py @@ -14,6 +14,7 @@ from executorch.backends.nxp.edge_passes.remove_as_strided_copy_nodes import ( RemoveUselessAsStridedCopyNodes, ) + from torch.fx.passes.infra.pass_manager import PassManager diff --git a/backends/nxp/neutron_partitioner.py b/backends/nxp/neutron_partitioner.py index 004f411dbbb..eac03e408f0 100644 --- a/backends/nxp/neutron_partitioner.py +++ b/backends/nxp/neutron_partitioner.py @@ -213,8 +213,7 @@ def tag_qdq_clusters(self, nodes: list[torch.fx.Node]): exir_ops.edge.aten.convolution.default: ConvolutionConverter, # noqa F405 exir_ops.edge.aten.hardtanh.default: HardTanhConverter, # noqa F405 exir_ops.edge.aten.leaky_relu.default: LeakyReluConverter, # noqa F405 - exir_ops.edge.aten.max_pool2d.default: MaxPool2dConverter, # noqa F405 - exir_ops.edge.aten.max_pool2d_with_indices.default: MaxPool2dConverter, # noqa F405 + exir_ops.edge.aten.max_pool2d_with_indices.default: MaxPool2DWithIndicesConverter, # noqa F405 exir_ops.edge.aten.mean.dim: MeanDimConverter, # noqa F405 exir_ops.edge.aten.mm.default: MMConverter, # noqa F405 exir_ops.edge.aten.mul.Tensor: MulTensorConverter, # noqa F405 diff --git a/backends/nxp/neutron_pass_manager.py b/backends/nxp/neutron_pass_manager.py deleted file mode 100644 index 02bcc0079f6..00000000000 --- a/backends/nxp/neutron_pass_manager.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright (c) Meta Platforms, Inc. and affiliates. -# Copyright 2025 NXP -# All rights reserved. -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -from typing import List, Optional, Type - -from executorch.exir.pass_base import ExportPass -from executorch.exir.program._program import _transform - -from torch._export.pass_base import PassType -from torch.export import ExportedProgram - - -class NeutronPassManager: - def __init__( - self, - exported_program: ExportedProgram, - passes: Optional[List[Type[PassType]]] = None, - ) -> None: - """ - A helper class to run multiple passes on a program - """ - self._exported_program = exported_program - - if not passes: - self.passes = [] - else: - self.passes = passes - - @property - def exported_program(self) -> ExportedProgram: - return self._exported_program - - def transform(self) -> ExportedProgram: - """ - Returns a transformed ExportedProgram - """ - ep = self.exported_program - for pass_ in self.passes: - if issubclass(pass_, ExportPass): - transform_pass = pass_() - else: - raise RuntimeError( - f"Expecting ExportPass or ExportPass(), but got pass: {pass_} with type: {type(pass_)}" - ) - ep = _transform(ep, transform_pass) - return ep diff --git a/backends/nxp/nxp_backend.py b/backends/nxp/nxp_backend.py index f72587d537a..d59d58e42f9 100644 --- a/backends/nxp/nxp_backend.py +++ b/backends/nxp/nxp_backend.py @@ -15,7 +15,6 @@ import numpy as np import torch -from executorch.backends.nxp._passes.remove_getitem_pass import RemoveGetItemPass from executorch.backends.nxp.backend.data_format import DataFormat from executorch.backends.nxp.backend.edge_program_converter import ( EdgeProgramToIRConverter, @@ -29,12 +28,17 @@ extract_artifacts_from_neutron_node, NeutronNodeArtifacts, ) -from executorch.backends.nxp.neutron_pass_manager import NeutronPassManager from executorch.exir.backend.backend_details import BackendDetails, PreprocessResult from executorch.exir.backend.compile_spec_schema import CompileSpec -from executorch.exir.verification.verifier import EXIREdgeDialectVerifier from torch.export.exported_program import ExportedProgram +# Aten dialect operators that are allowed to be in the edge dialect model. These operators are usually created by a +# transform pass or by a prevented operator decomposition during lowering to edge. +core_aten_ops_exception_list = [ + torch.ops.aten.max_pool2d.default, + torch.ops.aten.prelu.default, +] + class NeutronCompileSpecBuilder: config: NeutronTargetSpec @@ -184,23 +188,6 @@ def preprocess( # noqa C901 # Serialize and return the program. if output_format == "tflite": - # We need to create custom model verifier with max_pool2d added as exception. - # Otherwise, we get violation that this op is not part of ATen Core ops. - edge_program._verifiers = [ - EXIREdgeDialectVerifier( - class_only=True, - core_aten_ops_exception_list=[ - torch.ops.aten.max_pool2d.default, - torch.ops.aten.prelu.default, - ], - ) - ] - - # Remove MaxPool-related "getitem" nodes from graph - edge_program = NeutronPassManager( - edge_program, [RemoveGetItemPass] - ).transform() - # Convert the edge program to TFLite. conversion_config = ConversionConfig( {"use_neutron_for_format_conversion": use_neutron_for_format_conversion} diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py index 23c9bd782f2..485e96d53c6 100644 --- a/backends/nxp/quantizer/neutron_quantizer.py +++ b/backends/nxp/quantizer/neutron_quantizer.py @@ -32,7 +32,8 @@ LeakyReluInPlacePattern, LeakyReluPattern, LinearPattern, - MaxPoolPattern, + MaxPool1DPattern, + MaxPool2DPattern, MeanDimPattern, MmPattern, MulTensorPattern, @@ -273,7 +274,8 @@ def __init__(self, neutron_target_spec: NeutronTargetSpec, is_qat: bool = False) OpQuantizer(LeakyReluPattern(is_qat=is_qat), static_fc_qconfig), OpQuantizer(LeakyReluInPlacePattern(is_qat=is_qat), static_fc_qconfig), OpQuantizer(LinearPattern(self, is_qat=is_qat), static_fc_qconfig), - OpQuantizer(MaxPoolPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(MaxPool1DPattern(is_qat=is_qat), static_qconfig), + OpQuantizer(MaxPool2DPattern(is_qat=is_qat), static_qconfig), OpQuantizer(MeanDimPattern(is_qat=is_qat), static_qconfig), OpQuantizer(MmPattern(self, is_qat=is_qat), static_qconfig), OpQuantizer(MulTensorPattern(is_qat=is_qat), static_qconfig), diff --git a/backends/nxp/quantizer/patterns.py b/backends/nxp/quantizer/patterns.py index 2b949497272..ccbcb297bc6 100644 --- a/backends/nxp/quantizer/patterns.py +++ b/backends/nxp/quantizer/patterns.py @@ -738,10 +738,15 @@ def get_anchors( ) -class MaxPoolPattern(SharedSpecPattern): - """ - Quantizer for MaxPool2D operator. - """ +class MaxPool1DPattern(SharedSpecPattern): + """Quantizer for the MaxPool1D operator.""" + + def partition_types(self): + return [torch.ops.aten.max_pool1d.default] + + +class MaxPool2DPattern(SharedSpecPattern): + """Quantizer for the MaxPool2D operator.""" def partition_types(self): return [torch.ops.aten.max_pool2d.default] diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py index cd216a2d307..9bf90bbd5ba 100644 --- a/backends/nxp/tests/executorch_pipeline.py +++ b/backends/nxp/tests/executorch_pipeline.py @@ -24,7 +24,11 @@ RemoveIOQuantOpsPass, ) from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner -from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec + +from executorch.backends.nxp.nxp_backend import ( + core_aten_ops_exception_list, + generate_neutron_compile_spec, +) from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize from executorch.exir import ( @@ -145,6 +149,7 @@ def to_quantized_edge_program( partitioner=partitioners, compile_config=EdgeCompileConfig( _check_ir_validity=False, + _core_aten_ops_exception_list=core_aten_ops_exception_list, ), ) diff --git a/backends/nxp/tests/test_convert_upsample_bilinear2d.py b/backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_bilinear2d.py similarity index 100% rename from backends/nxp/tests/test_convert_upsample_bilinear2d.py rename to backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_bilinear2d.py diff --git a/backends/nxp/tests/test_convert_upsample_nearest2d.py b/backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_nearest2d.py similarity index 100% rename from backends/nxp/tests/test_convert_upsample_nearest2d.py rename to backends/nxp/tests/ir/converter/node_converter/test_convert_upsample_nearest2d.py diff --git a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py index 569ad571dbc..6bb1000b38b 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_max_pool_2d_converter.py @@ -1,32 +1,66 @@ -# Copyright 2024 NXP +# Copyright 2024,2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import operator + import numpy as np import pytest import torch + from executorch.backends.nxp.backend.edge_program_converter import ( EdgeProgramToIRConverter, ) - -from executorch.backends.nxp.backend.ir.conversion_config import ConversionConfig -from executorch.backends.nxp.neutron_pass_manager import NeutronPassManager -from executorch.backends.nxp.tests.executorch_pipeline import ( - to_edge_program, - to_quantized_edge_program, -) +from executorch.backends.nxp.tests.executorch_pipeline import to_quantized_edge_program from executorch.backends.nxp.tests.executors import ( convert_run_compare, - ToNCHWPreprocess, - ToNHWCPreprocess, + graph_contains_any_of_ops, + ToChannelFirstPreprocess, + ToChannelLastPreprocess, ) -from executorch.backends.nxp.tests.models import MaxPool2dConvModule, MaxPool2dModule -from executorch.backends.xnnpack._passes import RemoveGetItemPass -from executorch.exir.verification.verifier import EXIREdgeDialectVerifier -from torch.export import ExportedProgram from executorch.backends.nxp.tests.use_qat import * # noqa F403 +# noinspection PyProtectedMember +from executorch.exir.dialects._ops import ops as exir_ops + +ExecutorchDelegateCall = torch.ops.higher_order.executorch_call_delegate +GetItem = operator.getitem +MaxPool2D = exir_ops.edge.aten.max_pool2d_with_indices.default +Squeeze = exir_ops.edge.aten.squeeze.default +SqueezeDim = exir_ops.edge.aten.squeeze.dim +SqueezeDims = exir_ops.edge.aten.squeeze.dims +Unsqueeze = exir_ops.edge.aten.unsqueeze.default +ViewCopy = exir_ops.edge.aten.view_copy.default + + +class MaxPool1DModule(torch.nn.Module): + def __init__(self): + super().__init__() + + self.max_pool = torch.nn.MaxPool1d( + kernel_size=3, + ) + + def forward(self, x): + return self.max_pool(x) + + +class MaxPool2dModule(torch.nn.Module): + def __init__(self, kernel_size=3, **kwargs): + super().__init__() + self.max_pool2d = torch.nn.MaxPool2d(kernel_size, **kwargs) + + def forward(self, x): + return self.max_pool2d(x) + + +def _generate_test_data(input_shape: tuple) -> np.ndarray: + """Generate random int8 test data for given shape.""" + return (np.random.random(input_shape).astype(np.float32) * 256.0 - 128.0).astype( + np.int8 + ) + @pytest.fixture(autouse=True) def reseed_model_per_test_run(): @@ -34,99 +68,185 @@ def reseed_model_per_test_run(): np.random.seed(23) -@pytest.mark.parametrize( - "input_shape, padding", - [ - pytest.param((1, 4, 8, 8), (0, 0), id="No padding."), - pytest.param( - (1, 4, 8, 8), - (1, 1), - id="Padding, keep the same output tensor size as input.", - ), - pytest.param( - (1, 4, 8, 8), (1, 0), id="Padding, change the output tensor size." - ), - pytest.param( - (1, 4, 9, 9), (1, 0), id="Padding, change the output tensor size." - ), - pytest.param( - (1, 4, 9, 9), (0, 1), id="Padding, change the output tensor size." - ), - ], -) -def test_max_pool_2d_conversion(input_shape, padding): - edge_program = to_edge_program( - MaxPool2dModule(padding=padding), input_shape - ).exported_program() - - # We need to create custom model verifier with max_pool2d added as exception. - # Otherwise, we get violation that this op is not part of ATen Core ops. - edge_program._verifiers = [ - EXIREdgeDialectVerifier( - class_only=True, - core_aten_ops_exception_list=[torch.ops.aten.max_pool2d.default], - ) - ] +class TestMaxPool2DSupported: + """Tests for supported MaxPool2D configurations.""" - # Remove MaxPool-related "getitem" nodes from graph - edge_program = NeutronPassManager(edge_program, [RemoveGetItemPass]).transform() + @staticmethod + def _verify_successful_delegation(module, converter_spy, input_shape): + edge_model = to_quantized_edge_program( + module, + input_shape, + use_neutron_for_format_conversion=False, + ).exported_program() - input_data = np.random.random(input_shape).astype(np.float32) + # Make sure the MaxPool was delegated. + assert not graph_contains_any_of_ops(edge_model.graph, [MaxPool2D]) + assert graph_contains_any_of_ops(edge_model.graph, [ExecutorchDelegateCall]) - convert_run_compare( - edge_program, - input_data, - tflite_input_preprocess=ToNHWCPreprocess(), - tflite_output_preprocess=ToNCHWPreprocess(), - conversion_config=ConversionConfig( - {"use_neutron_for_format_conversion": False} - ), - ) + # Verify correct behavior of the converted NeutronIR model. + edge_partition = converter_spy.call_args.args[1] + neutron_ir_partition, _ = converter_spy.spy_return + input_data = _generate_test_data(input_shape) -@pytest.mark.parametrize( - "input_shape, padding", - [ - pytest.param((1, 4, 8, 8), (0, 0), id="No padding."), - pytest.param( - (1, 4, 8, 8), - (1, 1), - id="Padding, keep the same output tensor size as input.", - ), - pytest.param( - (1, 4, 8, 8), (1, 0), id="Padding, change the output tensor size." - ), - pytest.param( - (1, 4, 11, 11), (1, 0), id="Padding, change the output tensor size." - ), - pytest.param( - (1, 4, 11, 11), (0, 1), id="Padding, change the output tensor size." - ), - ], -) -def test_max_pool_2d_quant_conversion(mocker, input_shape, padding, use_qat): - converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") - - # Run conversion - _ = to_quantized_edge_program( - MaxPool2dConvModule(padding=padding), - input_shape, - use_qat=use_qat, - use_neutron_for_format_conversion=False, + # Make sure the tested program contains the `MaxPool`. + assert graph_contains_any_of_ops(edge_partition.graph, [MaxPool2D]) + assert graph_contains_any_of_ops(edge_partition.graph, [GetItem]) + + convert_run_compare( + edge_partition, + tfl_model=neutron_ir_partition, + input_data=input_data, + tflite_input_preprocess=ToChannelLastPreprocess(), + tflite_output_preprocess=ToChannelFirstPreprocess(), + ) + + @pytest.mark.parametrize( + "padding", + [(0, 0), (1, 1), (0, 1), 0, 1], + ids=lambda padding: f"Padding = {'tuple' if isinstance(padding, tuple) else 'scalar'} `{padding}`", + ) + def test_padding(self, padding, mocker): + input_shape = (1, 8, 5, 6) + stride = 1 # Default value would be equal to kernel size (3), which is not supported by Neutron. + module = MaxPool2dModule(kernel_size=3, stride=stride, padding=padding) + + converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + self._verify_successful_delegation(module, converter_spy, input_shape) + + @pytest.mark.parametrize( + "stride", + [(1, 1), (2, 1), (2, 2), (2, 3), (2, 8), 1, 2], + ids=lambda stride: f"Stride = {'tuple' if isinstance(stride, tuple) else 'scalar'} `{stride}`", ) + def test_stride(self, stride, mocker): + input_shape = (1, 8, 7, 9) + module = MaxPool2dModule(kernel_size=3, stride=stride) - # Capture generated model - tflite_flatbuffers_model, io_formats = converter_spy.spy_return + converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + self._verify_successful_delegation(module, converter_spy, input_shape) - # Capture converted program - exported_program: ExportedProgram = converter_spy.call_args.args[1] - input_data = (np.random.random(input_shape).astype(np.float32) * 50).astype(np.int8) +class TestMaxPool2DUnsupported: + """Tests for unsupported MaxPool2D configurations.""" - convert_run_compare( - exported_program, - tflite_input_preprocess=ToNHWCPreprocess(), - tfl_model=tflite_flatbuffers_model, - tflite_output_preprocess=ToNCHWPreprocess(), - input_data=input_data, - ) + @staticmethod + def _verify_no_delegation(module, input_shape): + edge_model = to_quantized_edge_program( + module, + input_shape, + use_neutron_for_format_conversion=False, + ).exported_program() + + assert graph_contains_any_of_ops(edge_model.graph, [MaxPool2D]) + assert graph_contains_any_of_ops(edge_model.graph, [GetItem]) + assert not graph_contains_any_of_ops(edge_model.graph, [ExecutorchDelegateCall]) + + def test_unsupported_dilation(self): + dilation = 2 # Unsupported. + input_shape = (1, 8, 7, 9) + + module = MaxPool2dModule(kernel_size=3, dilation=dilation) + + # Make sure the MaxPool was NOT delegated. + self._verify_no_delegation(module, input_shape) + + def test_unsupported_stride(self): + stride = 3 # Unsupported. + input_shape = (1, 8, 7, 9) + + module = MaxPool2dModule(kernel_size=3, stride=stride) + + # Make sure the MaxPool was NOT delegated. + self._verify_no_delegation(module, input_shape) + + def test_unsupported_padding(self): + padding = 4 # Unsupported. Bigger than kernel size. + input_shape = (1, 8, 7, 9) + + with pytest.raises( + RuntimeError, match=r"pad should be at most half of effective kernel size" + ): + to_quantized_edge_program( + MaxPool2dModule(kernel_size=3, padding=padding), + input_shape, + use_neutron_for_format_conversion=False, + ).exported_program() + + def test_unsupported_ceil_mode(self): + ceil_mode = True # Unsupported. + input_shape = (1, 8, 7, 9) + + module = MaxPool2dModule(kernel_size=3, ceil_mode=ceil_mode) + + # Make sure the MaxPool was NOT delegated. + self._verify_no_delegation(module, input_shape) + + def test_unsupported_batch_size(self): + batch_size = 2 # Unsupported. + input_shape = (batch_size, 8, 7, 9) + + module = MaxPool2dModule(kernel_size=3) + + # Make sure the MaxPool was NOT delegated. + self._verify_no_delegation(module, input_shape) + + def test_unsupported_channels(self): + channels = 3 # Unsupported. Must be a multiple of `num_macs` (`8`). + input_shape = (1, channels, 7, 9) + + module = MaxPool2dModule(kernel_size=3) + + # Make sure the MaxPool was NOT delegated. + self._verify_no_delegation(module, input_shape) + + +class TestMaxPool1D: + """There is no `max_pool1d` in the edge dialect. During lowering to edge, ExecuTorch extends the shape to 4D (with + a `1`), then applies `max_pool2d`, and then removes the `1` from the shape to make it 3D again. So the aten + `max_pool1d` is handled by the `max_pool2d` support. This test verifies that the lowering process works correctly. + """ + + def test_max_pool_2d__from_1d(self, mocker): + model = MaxPool1DModule() + input_shape = (1, 8, 12) + extended_shape = (1, 8, 1, 12) + + converter_spy = mocker.spy(EdgeProgramToIRConverter, "convert_program") + edge_model = to_quantized_edge_program( + model, input_shape, use_neutron_for_format_conversion=False + ).exported_program() + + # Make sure the `max_pool` was delegated. + assert graph_contains_any_of_ops(edge_model.graph, [ExecutorchDelegateCall]) + assert not graph_contains_any_of_ops(edge_model.graph, [MaxPool2D]) + # There is not `max_pool1d` in the edge dialect, so we cannot check for its absence by comparing with the target. + # In order to detect any potential future changes (like the addition of `max_pool1d` to edge dialect), we check + # the name of the target. + assert not any( + n for n in edge_model.graph.nodes if "1d" in str(n.target) + ) # Check for anything 1D. + + # Make sure both `view_copy` nodes were added, and there is no `squeeze` or `unsqueeze`. + assert len([n for n in edge_model.graph.nodes if n.target == ViewCopy]) == 2 + assert not graph_contains_any_of_ops( + edge_model.graph, [Unsqueeze, Squeeze, SqueezeDim, SqueezeDims] + ) + + # Verify correct behavior of the converted NeutronIR model. + edge_partition = converter_spy.call_args.args[1] + neutron_ir_partition, _ = converter_spy.spy_return + + input_data = _generate_test_data(extended_shape) + + # Make sure the tested program contains the `MaxPool`. + assert graph_contains_any_of_ops(edge_partition.graph, [MaxPool2D]) + assert graph_contains_any_of_ops(edge_partition.graph, [GetItem]) + + convert_run_compare( + edge_partition, + tfl_model=neutron_ir_partition, + input_data=input_data, + tflite_input_preprocess=ToChannelLastPreprocess(), + tflite_output_preprocess=ToChannelFirstPreprocess(), + ) diff --git a/backends/nxp/tests/test_convert_reshaping_nodes_to_view.py b/backends/nxp/tests/ir/edge_passes/test_convert_reshaping_nodes_to_view.py similarity index 100% rename from backends/nxp/tests/test_convert_reshaping_nodes_to_view.py rename to backends/nxp/tests/ir/edge_passes/test_convert_reshaping_nodes_to_view.py diff --git a/backends/nxp/tests/test_edge_passes.py b/backends/nxp/tests/ir/edge_passes/test_edge_passes.py similarity index 100% rename from backends/nxp/tests/test_edge_passes.py rename to backends/nxp/tests/ir/edge_passes/test_edge_passes.py diff --git a/backends/nxp/tests/test_integration.py b/backends/nxp/tests/test_integration.py index fe157b44c48..5b64646004c 100644 --- a/backends/nxp/tests/test_integration.py +++ b/backends/nxp/tests/test_integration.py @@ -1,4 +1,4 @@ -# Copyright 2024-2025 NXP +# Copyright 2024-2026 NXP # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. @@ -47,7 +47,7 @@ def test_cifarnet(use_qat): delegation_info = get_delegation_info(exec_prog.exported_program().graph_module) assert delegation_info.num_delegated_subgraphs == 1 assert delegation_info.num_non_delegated_nodes == 11 - assert delegation_info.num_delegated_nodes == 45 + assert delegation_info.num_delegated_nodes == 42 nodes = list(exec_prog.exported_program().graph.nodes) assert nodes[2].name == "quantized_decomposed_quantize_per_tensor_default" diff --git a/backends/nxp/tests/test_neutron_backend_executor.py b/backends/nxp/tests/test_neutron_backend_executor.py index d3a84860614..14bfeebd325 100644 --- a/backends/nxp/tests/test_neutron_backend_executor.py +++ b/backends/nxp/tests/test_neutron_backend_executor.py @@ -108,6 +108,7 @@ def test_conv_fc__lowered_program_and_tflite_output_match(mocker): ) convert_run_compare( exported_program, + tfl_model=tflite_flatbuffers_model, input_data=input_data, tflite_input_preprocess=ToNHWCPreprocess(), ) diff --git a/backends/nxp/tests/test_node_format_inference.py b/backends/nxp/tests/test_node_format_inference.py index 21ad95c6b64..9a6d3063a0b 100644 --- a/backends/nxp/tests/test_node_format_inference.py +++ b/backends/nxp/tests/test_node_format_inference.py @@ -11,14 +11,15 @@ NodeFormatInference, NXP_NODE_FORMAT, ) -from executorch.backends.nxp.neutron_pass_manager import NeutronPassManager +from executorch.backends.nxp.edge_passes.neutron_edge_pass_manager import ( + NeutronEdgePassManager, +) from executorch.backends.nxp.tests.models import ( Conv2dModule, MaxPool2dModule, SoftmaxModule, ) -from executorch.backends.xnnpack._passes import RemoveGetItemPass -from executorch.exir.verification.verifier import EXIREdgeDialectVerifier +from executorch.exir import EdgeCompileConfig def test_convolution(): @@ -61,25 +62,22 @@ def test_softmax(): assert expected_mapping[node.name] == node.meta[NXP_NODE_FORMAT] -def test_maxpool2d(): +def test_max_pool2d(): model = MaxPool2dModule() example_input = (torch.ones(1, 4, 32, 32),) exir_program = torch.export.export(model, example_input) - edge_program = exir.to_edge(exir_program).exported_program() - # We need to create custom model verifier with max_pool2d added as exception. - # Otherwise, we get violation that this op is not part of ATen Core ops. - edge_program._verifiers = [ - EXIREdgeDialectVerifier( - class_only=True, - core_aten_ops_exception_list=[torch.ops.aten.max_pool2d.default], - ) - ] - - # Remove MaxPool-related "getitem" nodes from graph - edge_program = NeutronPassManager(edge_program, [RemoveGetItemPass]).transform() - NodeFormatInference(edge_program).identify_node_formats() + # We need to add the `aten.max_pool2d.default` as an exception, otherwise we would get violation that this op is + # not part of ATen Core ops. + exception_list = [torch.ops.aten.max_pool2d.default] + epm = exir.to_edge( + exir_program, + compile_config=EdgeCompileConfig(_core_aten_ops_exception_list=exception_list), + ) + + epm = epm.transform(NeutronEdgePassManager()) + NodeFormatInference(epm.exported_program()).identify_node_formats() expected_mapping = { "x": DataFormat.CHANNELS_FIRST, @@ -87,5 +85,5 @@ def test_maxpool2d(): "output": DataFormat.CHANNELS_FIRST, } - for node in edge_program.graph.nodes: + for node in epm.exported_program().graph.nodes: assert expected_mapping[node.name] == node.meta[NXP_NODE_FORMAT] diff --git a/backends/nxp/tests_models/utils.py b/backends/nxp/tests_models/utils.py index dcfc7d5b18b..5c179f5ab76 100644 --- a/backends/nxp/tests_models/utils.py +++ b/backends/nxp/tests_models/utils.py @@ -18,7 +18,11 @@ NeutronEdgePassManager, ) from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner -from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec + +from executorch.backends.nxp.nxp_backend import ( + core_aten_ops_exception_list, + generate_neutron_compile_spec, +) from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer from executorch.backends.nxp.tests_models.model_input_spec import ModelInputSpec from executorch.devtools.visualization.visualization_utils import ( @@ -146,7 +150,9 @@ def to_quantized_edge_program( core_aten_ep, transform_passes=NeutronEdgePassManager(), partitioner=partitioners, - compile_config=EdgeCompileConfig(), + compile_config=EdgeCompileConfig( + _core_aten_ops_exception_list=core_aten_ops_exception_list + ), ) return edge_program_manager diff --git a/docs/source/backends/nxp/op-support.csv b/docs/source/backends/nxp/op-support.csv index de1f83783a3..fa9a8a43205 100644 --- a/docs/source/backends/nxp/op-support.csv +++ b/docs/source/backends/nxp/op-support.csv @@ -13,8 +13,9 @@ aten.convolution.default,int8,static int8,"1D or 2D convolution, constant weight aten.div.Tensor,int8,static int8,"divisor - static tensor or scalar value, one dimension must satisfy %8 = 0 or scalar division (all dims = 1)" aten.hardtanh.default,int8,static int8,"supported ranges: <0,6>, <-1, 1>, <0,1>, <0,inf>" aten.leaky_relu.default,int8,static int8, -aten.max_pool2d.default,int8,static int8,"dilation=1, ceil_mode=False" -aten.max_pool2d_with_indices.default,int8,static int8,"dilation=1, ceil_mode=False" +aten.max_pool1d.default,int8,static int8,"dilation=1, ceil_mode=False, channels%8=0, batch_size=1, stride_h=1 or 2" +aten.max_pool2d.default,int8,static int8,"dilation=1, ceil_mode=False, channels%8=0, batch_size=1, stride_h=1 or 2" +aten.max_pool2d_with_indices.default,int8,static int8,"dilation=1, ceil_mode=False, channels%8=0, batch_size=1, stride_h=1 or 2" aten.mean.dim,int8,static int8,"4D tensor only, dims = [-1,-2] or [-2,-1]" aten.mul.Tensor, int8, static int8, "tensor-size % 8 = 0" aten.mm.default,int8,static int8,"2D tensor only" diff --git a/examples/nxp/aot_neutron_compile.py b/examples/nxp/aot_neutron_compile.py index c8d3e376c91..2439aeb3f82 100644 --- a/examples/nxp/aot_neutron_compile.py +++ b/examples/nxp/aot_neutron_compile.py @@ -25,7 +25,10 @@ RemoveIOQuantOpsPass, ) from executorch.backends.nxp.neutron_partitioner import NeutronPartitioner -from executorch.backends.nxp.nxp_backend import generate_neutron_compile_spec +from executorch.backends.nxp.nxp_backend import ( + core_aten_ops_exception_list, + generate_neutron_compile_spec, +) from executorch.backends.nxp.quantizer.neutron_quantizer import NeutronQuantizer from executorch.backends.nxp.quantizer.utils import calibrate_and_quantize from executorch.devtools.visualization.visualization_utils import ( @@ -327,7 +330,9 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool): export(module, example_inputs, strict=True), transform_passes=NeutronEdgePassManager(), partitioner=partitioners, - compile_config=EdgeCompileConfig(), + compile_config=EdgeCompileConfig( + _core_aten_ops_exception_list=core_aten_ops_exception_list, + ), ) if args.remove_quant_io_ops: