pytorch · MartinPavella · Feb 24, 2026 · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026
@@ -3,6 +3,8 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import operator
+
 import executorch.backends.nxp.backend.ir.logger as logger
 import flatbuffers
 from executorch.backends.nxp.backend.ir.conversion_config import ConversionConfig
@@ -38,7 +40,7 @@
     exir_ops.edge.aten.convolution.default: ConvolutionConverter,  # noqa F405
     exir_ops.edge.aten.hardtanh.default: HardTanhConverter,  # noqa F405
     exir_ops.edge.aten.leaky_relu.default: LeakyReluConverter,  # noqa F405
-    exir_ops.edge.aten.max_pool2d.default: MaxPool2dConverter,  # noqa F405
+    exir_ops.edge.aten.max_pool2d_with_indices.default: MaxPool2DWithIndicesConverter,  # noqa F405
     exir_ops.edge.aten.mean.dim: MeanDimConverter,  # noqa F405
     exir_ops.edge.aten.mm.default: MMConverter,  # noqa F405
     exir_ops.edge.aten.mul.Tensor: MulTensorConverter,  # noqa F405
@@ -57,6 +59,9 @@
 }
 
 
+NXP_PROCESSED_TAG = "NXP_PROCESSED_TAG"
+
+
 class EdgeProgramToIRConverter:
     """
     Converter from convertion of ExportedProgram in Edge dialect to IR (TFLite Flatbuffers).
@@ -159,6 +164,11 @@ def _process_nodes(self, nodes: list[Node], conversion_context: ConversionContex
                 if node.target in qdq_related_functions and "cluster" in node.meta:
                     # Skip (De)Quantize nodes that were already processed
                     pass
+                elif node.target == operator.getitem and node.meta.get(
+                    NXP_PROCESSED_TAG, False
+                ):
+                    # The node was already processed alongside the Q/DQ ops.
+                    pass
                 elif node.target in functions_converters:
                     functions_converters[node.target](conversion_context).convert(node)
                 else:
@@ -264,9 +274,8 @@ def build_conversion_context(
     def _convert_qdq_cluster_q_dq_nodes(
         self, nodes: list[Node], conversion_context: ConversionContext
     ):
-        """
-        Go through program and convert De(Quantize) nodes that are part of the QDQ cluster into
-        tensors.
+        """Go through the program and convert [De]Quantize nodes that are part of a QDQ cluster into tensors.
+            Also convert related `GetItem` nodes to NO-OPs, which just propagate the quantization.
 
         :param nodes: Program's nodes.
         :param conversion_context: ConversionContext instance.
@@ -285,3 +294,14 @@ def _convert_qdq_cluster_q_dq_nodes(
                 and part_of_qdq_cluster
             ):
                 qdq_q_ops_converters[node.target](conversion_context).convert(node)
+
+        # Usually, `getitem` nodes are a part of a "foreign" QDQ cluster. They consume the output of the main compute
+        #  operator, and they are followed by a `Quantize` operator, which specifies the output quantization parameters
+        #  of the cluster. So the input of the `GetItem` is float32, and the output is quantized. Due to how the Neutron
+        #  IR represents quantization, the quantization parameters must be propagated from the output to the input.
+        for node in nodes:
+            if node.target == operator.getitem:
+                # Convert the builtin function into a "NO-OP" in the IR, and propagate the quantization parameters in
+                #  reverse.
+                GetItemConverter(conversion_context).convert(node)  # noqa: F405
+                node.meta[NXP_PROCESSED_TAG] = True
@@ -8,6 +8,10 @@
     ModelBuilder,
 )
 from executorch.backends.nxp.backend.ir.converter.conversion import translator
+
+from executorch.backends.nxp.backend.ir.converter.tensor_utils import (
+    get_name_of_node_output,
+)
 from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model
 from torch.fx import Node
 from torch.nn import Parameter
@@ -30,19 +34,26 @@ def append_as_fake_tensor(self, node: Node, node_format: DataFormat):
         if self.tensor_exists(node.name):
             return
 
-        tensor = node.meta["val"]
-        if isinstance(tensor, tuple):
-            tensor = tensor[0]  # Fake tensor
-        _type = translator.convert_data_type(tensor.dtype)
-        shape = list(tensor.shape)
+        def _append_tensor(tensor_, name=None):
+            type_ = translator.convert_data_type(tensor_.dtype)
+            shape = list(tensor_.shape)
 
-        if node_format.is_channels_first():
-            shape = translator.dims_to_channels_last(shape)
+            if node_format.is_channels_first():
+                shape = translator.dims_to_channels_last(shape)
 
-        tensor = self.create_empty_tensor(node.name, _type, shape)
-        tensor.tensor_format = DataFormat.convert_executorch_format_to_neutron(
-            node_format
-        )
+            tensor = self.create_empty_tensor(name or node.name, type_, shape)
+            tensor.tensor_format = DataFormat.convert_executorch_format_to_neutron(
+                node_format
+            )
+
+        tensor_or_tuple = node.meta["val"]
+        if isinstance(tensor_or_tuple, tuple):
+            # The `node` can produce multiple output tensors, which are represented using this tuple.
+            for i, t in enumerate(tensor_or_tuple):
+                _append_tensor(t, get_name_of_node_output(node, i))
+
+        else:
+            _append_tensor(tensor_or_tuple)
 
     def append_as_static_tensor(
         self, node: Node, node_format: DataFormat, tensor: Parameter

@@ -86,13 +86,23 @@ def valid(self, op: tflite_model.Operator) -> bool:
         first_quantization = shared_tensors[0].quantization
 
         # Check quantization values (scales & zero-points)
-        scales_same = all(
-            first_quantization.scale == t.quantization.scale for t in shared_tensors[1:]
-        )
-        zp_same = all(
-            first_quantization.zero_point == t.quantization.zero_point
-            for t in shared_tensors[1:]
-        )
+        try:
+            scales_same = all(
+                first_quantization.scale == t.quantization.scale
+                for t in shared_tensors[1:]
+            )
+            zp_same = all(
+                first_quantization.zero_point == t.quantization.zero_point
+                for t in shared_tensors[1:]
+            )
+        except AttributeError:
+            # Common error when one of the tensors is not quantized.
+            logger.w(
+                f"NXP backend: The Neutron IR operator {op.builtin_options} is not quantized correctly. "
+                "Please report this."
+            )
+            return False
+
         return scales_same and zp_same
 
     def __str__(self):

@@ -1,6 +1,6 @@
 #
 # Copyright 2023 Martin Pavella
-# Copyright 2023-2025 NXP
+# Copyright 2023-2026 NXP
 #
 # License: MIT
 # See the LICENSE_MIT for more details.
@@ -12,7 +12,7 @@
 'conversion/builtin/' directory.
 """
 
-from typing import List, MutableSequence, Optional
+from typing import List, MutableSequence, Optional, Sequence
 
 import executorch.backends.nxp.backend.ir.logger as logger
 from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model
@@ -71,7 +71,7 @@ def extend_1d_dilation_to_2d(tflite_1d_dilation: MutableSequence):
 )
 
 
-def assign_2d_strides(options: StridedOptions, strides: Optional[List[int]]):
+def assign_2d_strides(options: StridedOptions, strides: Sequence[int] | None):
     """Assign to 'obj' the attributes 'stride_h' and 'stride_w' from 'strides'.
          If 'strides' is None, assign 1s.
 

@@ -3,6 +3,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import operator
 from abc import ABC, abstractmethod
 
 import torch
@@ -14,6 +15,10 @@
 from executorch.backends.nxp.backend.ir.converter.builder.aten_model_builder_director import (
     AtenModelBuilderDirector,
 )
+
+from executorch.backends.nxp.backend.ir.converter.tensor_utils import (
+    get_name_of_node_output,
+)
 from executorch.backends.nxp.backend.ir.tflite_generator import tflite_model
 from executorch.backends.nxp.backend.neutron_target_spec import NeutronTargetSpec
 from executorch.exir.dialects._ops import ops as exir_ops
@@ -231,25 +236,45 @@ def _create_tflite_op_with_io_tensors(self, node: Node) -> tflite_model.Operator
         # Initialize node's inputs
         t_operator.inputs = tflite_model.OperatorInputs()
 
-        input_nodes = []
-        for arg in node.args:
-            match arg:
-                case Node():
-                    input_nodes.append(arg)
-                case list() if all(isinstance(node_, Node) for node_ in arg):
-                    input_nodes.extend(arg)
-
-        for ancestor_node in input_nodes:
-            assert self.context.tflite_builder.tensor_exists(ancestor_node.name)
-            t_operator.tmp_inputs.append(
-                self.context.tflite_builder.tensor_for_name(ancestor_node.name)
-            )
-
-        # Add node's output as a new tensor
-        assert self.context.tflite_builder.tensor_exists(node.name)
-        t_operator.outputs = tflite_model.OperatorOutputs()
-        t_operator.tmp_outputs.append(
-            self.context.tflite_builder.tensor_for_name(node.name)
+        if node.target == operator.getitem:
+            # Special case of a builtin function, which can extract a specific output tensor from the previous node.
+            previous_node = node.args[0]
+            output_index = node.args[1]
+            input_name = get_name_of_node_output(previous_node, output_index)
+            assert self.builder.tensor_exists(input_name)
+            t_operator.tmp_inputs.append(self.builder.tensor_for_name(input_name))
+
+        else:
+            # Regular operator.
+            input_nodes = []
+            for arg in node.args:
+                match arg:
+                    case Node():
+                        input_nodes.append(arg)
+                    case list() if all(isinstance(node_, Node) for node_ in arg):
+                        input_nodes.extend(arg)
+
+            for ancestor_node in input_nodes:
+                assert self.context.tflite_builder.tensor_exists(ancestor_node.name)
+                t_operator.tmp_inputs.append(
+                    self.context.tflite_builder.tensor_for_name(ancestor_node.name)
+                )
+
+        # Add node's outputs as a new tensors
+        num_outputs = (
+            len(node.meta["val"]) if isinstance(node.meta["val"], tuple) else 1
         )
+        if num_outputs == 1:
+            # Single output node.
+            assert self.builder.tensor_exists(node.name)
+            t_operator.outputs = tflite_model.OperatorOutputs()
+            t_operator.tmp_outputs.append(self.builder.tensor_for_name(node.name))
+        else:
+            # The node has multiple outputs.
+            t_operator.outputs = tflite_model.OperatorOutputs()
+            for output_index in range(num_outputs):
+                tensor_name = get_name_of_node_output(node, output_index)
+                assert self.builder.tensor_exists(tensor_name)
+                t_operator.tmp_outputs.append(self.builder.tensor_for_name(tensor_name))
 
         return t_operator
@@ -28,14 +28,17 @@
 from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.convolution_converter import (
     ConvolutionConverter,
 )
+from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.getitem_converter import (
+    GetItemConverter,
+)
 from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.hardtanh_converter import (
     HardTanhConverter,
 )
 from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.leaky_relu_converter import (
     LeakyReluConverter,
 )
-from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.max_pool_2d_converter import (
-    MaxPool2dConverter,
+from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.max_pool2d_with_indices_converter import (
+    MaxPool2DWithIndicesConverter,
 )
 from executorch.backends.nxp.backend.ir.converter.node_converters.ops_converters.mean_dim_converter import (
     MeanDimConverter,
@@ -101,9 +104,10 @@
     "CloneConverter",
     "ConstantPadNDConverter",
     "ConvolutionConverter",
+    "GetItemConverter",
     "HardTanhConverter",
     "LeakyReluConverter",
-    "MaxPool2dConverter",
+    "MaxPool2DWithIndicesConverter",
     "MeanDimConverter",
     "MMConverter",
     "MulTensorConverter",