dotnet · tolleybot · Mar 6, 2026 · Mar 11, 2026 · Copilot · Feb 24, 2026
diff --git a/RELEASENOTES.md b/RELEASENOTES.md
@@ -24,6 +24,7 @@ __Bug Fixes__:
 
 __API Changes__:
 
+#1498 Add support for torch.export ExportedProgram models (.pt2 files)<br/>
 #1503 Add ReadOnlySpan overloads to many methods.<br/>
 #1478 Fix `torch.jit.ScriptModule.zero_grad`.<br/>
 #1495 Make `torchvision.io.read_image` and `torchvision.io.read_image_async` allow subsequent opening of the file for reading.<br/>

diff --git a/src/Native/LibTorchSharp/CMakeLists.txt b/src/Native/LibTorchSharp/CMakeLists.txt
@@ -11,6 +11,7 @@ set(SOURCES
 	crc32c.h
     THSAutograd.h
     THSData.h
+    THSExport.h
     THSJIT.h
     THSNN.h
 	THSStorage.h
@@ -23,6 +24,7 @@ set(SOURCES
 	THSActivation.cpp
     THSAutograd.cpp
 	THSData.cpp
+	THSExport.cpp
 	THSFFT.cpp
     THSJIT.cpp
 	THSLinearAlgebra.cpp

diff --git a/src/Native/LibTorchSharp/THSExport.cpp b/src/Native/LibTorchSharp/THSExport.cpp
@@ -0,0 +1,63 @@
+// Copyright (c) .NET Foundation and Contributors.  All Rights Reserved.  See LICENSE in the project root for license information.
+#include "THSExport.h"
+
+// torch.export support via AOTInductor
+// This uses torch::inductor::AOTIModelPackageLoader which is INFERENCE-ONLY
+// Models must be compiled with torch._inductor.aoti_compile_and_package() in Python
+
+ExportedProgramModule THSExport_load(const char* filename)
+{
+    CATCH(
+        // Load .pt2 file using AOTIModelPackageLoader
+        // This requires models to be compiled with aoti_compile_and_package()
+        auto* loader = new torch::inductor::AOTIModelPackageLoader(filename);
+        return loader;
+    );
+
+    return nullptr;
+}
+
+void THSExport_Module_dispose(const ExportedProgramModule module)
+{
+    delete module;
+}
+
+void THSExport_Module_run(
+    const ExportedProgramModule module,
+    const Tensor* input_tensors,
+    const int input_length,
+    Tensor** result_tensors,
+    int64_t* result_length)
+{
+    *result_tensors = nullptr;
+    *result_length = 0;
+
+    CATCH(
+        // Convert input tensor pointers to std::vector<torch::Tensor>
+        std::vector<torch::Tensor> inputs;
+        inputs.reserve(input_length);
+        for (int i = 0; i < input_length; i++) {
+            inputs.push_back(*input_tensors[i]);
+        }
+
+        // Run inference
+        std::vector<torch::Tensor> outputs = module->run(inputs);
+
+        // Allocate output array and copy results
+        auto count = outputs.size();
+        auto* tensors = new Tensor[count];
+
+        for (size_t i = 0; i < count; i++) {
+            tensors[i] = new torch::Tensor(outputs[i]);
+        }
+
+        // Only expose to caller after full success
+        *result_tensors = tensors;
+        *result_length = static_cast<int64_t>(count);
+    );
+}
+
+void THSExport_Module_run_free_results(Tensor* result_tensors)
+{
+    delete[] result_tensors;
+}
diff --git a/src/Native/LibTorchSharp/THSExport.h b/src/Native/LibTorchSharp/THSExport.h
@@ -0,0 +1,39 @@
+// Copyright (c) .NET Foundation and Contributors.  All Rights Reserved.  See LICENSE in the project root for license information.
+#pragma once
+
+#include "../Stdafx.h"
+
+#include "torch/torch.h"
+#include "torch/csrc/inductor/aoti_package/model_package_loader.h"
+
+#include "Utils.h"
+
+// torch.export ExportedProgram module via AOTInductor
+// Note: Uses torch::inductor::AOTIModelPackageLoader for inference-only execution
+typedef torch::inductor::AOTIModelPackageLoader* ExportedProgramModule;
+
+// torch.export support via AOTInductor - Load and execute PyTorch ExportedProgram models (.pt2 files)
+// ExportedProgram is PyTorch 2.x's recommended way to export models for production deployment
+//
+// IMPORTANT: This implementation uses torch::inductor::AOTIModelPackageLoader which is
+// INFERENCE-ONLY. Training, parameter updates, and device movement are not supported.
+// Models must be compiled with torch._inductor.aoti_compile_and_package() in Python.
+
+// Load an AOTInductor-compiled model package from a .pt2 file
+EXPORT_API(ExportedProgramModule) THSExport_load(const char* filename);
+
+// Dispose of an ExportedProgram module
+EXPORT_API(void) THSExport_Module_dispose(const ExportedProgramModule module);
+
+// Execute the ExportedProgram's forward method (inference only)
+// Input: Array of tensors
+// Output: Array of result tensors (caller must free with THSExport_Module_run_free_results)
+EXPORT_API(void) THSExport_Module_run(
+    const ExportedProgramModule module,
+    const Tensor* input_tensors,
+    const int input_length,
+    Tensor** result_tensors,
+    int64_t* result_length);
+
+// Free the result tensor array allocated by THSExport_Module_run
+EXPORT_API(void) THSExport_Module_run_free_results(Tensor* result_tensors);
diff --git a/src/TorchSharp/Export/ExportedProgram.cs b/src/TorchSharp/Export/ExportedProgram.cs
@@ -0,0 +1,238 @@
+// Copyright (c) .NET Foundation and Contributors.  All Rights Reserved.  See LICENSE in the project root for license information.
+
+using System;
+using System.Runtime.InteropServices;
+using static TorchSharp.PInvoke.NativeMethods;
+
+namespace TorchSharp
+{
+    public static partial class torch
+    {
+        public static partial class export
+        {
+            /// <summary>
+            /// Load a PyTorch ExportedProgram from a .pt2 file compiled with AOTInductor.
+            /// </summary>
+            /// <param name="filename">Path to the .pt2 file</param>
+            /// <returns>ExportedProgram model for inference</returns>
+            /// <remarks>
+            /// IMPORTANT: The .pt2 file must be compiled with torch._inductor.aoti_compile_and_package() in Python.
+            /// Models saved with torch.export.save() alone will NOT work - they require AOTInductor compilation.
+            ///
+            /// This implementation is INFERENCE-ONLY. Training, parameter updates, and device movement
+            /// are not supported. The model is compiled for a specific device (CPU/CUDA) at compile time.
+            ///
+            /// Example Python code to create compatible .pt2 files:
+            /// <code>
+            /// import torch
+            /// import torch._inductor
+            ///
+            /// # Export the model
+            /// exported = torch.export.export(model, example_inputs)
+            ///
+            /// # Compile with AOTInductor (required for C++ loading)
+            /// torch._inductor.aoti_compile_and_package(
+            ///     exported,
+            ///     package_path="model.pt2"
+            /// )
+            /// </code>
+            /// </remarks>
+            public static ExportedProgram load(string filename)
+            {
+                return new ExportedProgram(filename);
+            }
+
+            /// <summary>
+            /// Load a PyTorch ExportedProgram with typed output.
+            /// </summary>
+            public static ExportedProgram<TResult> load<TResult>(string filename)
+            {
+                return new ExportedProgram<TResult>(filename);
+            }
+        }
+    }
+
+    /// <summary>
+    /// Represents a PyTorch ExportedProgram loaded from an AOTInductor-compiled .pt2 file.
+    /// This is an INFERENCE-ONLY implementation - training and parameter updates are not supported.
+    /// </summary>
+    /// <remarks>
+    /// Unlike TorchScript models, ExportedProgram models are ahead-of-time (AOT) compiled for
+    /// a specific device and are optimized for inference performance. They provide 30-40% better
+    /// latency compared to TorchScript in many cases.
+    ///
+    /// Key limitations:
+    /// - Inference only (no training, no gradients)
+    /// - No parameter access or updates
+    /// - No device movement (compiled for specific device)
+    /// - No dynamic model structure changes
+    ///
+    /// Use torch.jit for models that require training or dynamic behavior.
+    /// </remarks>
+    public class ExportedProgram : IDisposable
+    {
+        private IntPtr handle;
+        private bool _disposed = false;
+
+        internal ExportedProgram(string filename)
+        {
+            handle = THSExport_load(filename);
+            if (handle == IntPtr.Zero)
+                torch.CheckForErrors();
+        }
+
+        /// <summary>
+        /// Run inference on the model with the given input tensors.
+        /// </summary>
+        /// <param name="inputs">Input tensors for the model</param>
+        /// <returns>Array of output tensors</returns>
+        /// <remarks>
+        /// The number and shapes of inputs must match what the model was exported with.
+        /// All inputs must be on the same device that the model was compiled for.
+        /// </remarks>
+        public torch.Tensor[] run(params torch.Tensor[] inputs)
+        {
+            if (_disposed)
+                throw new ObjectDisposedException(nameof(ExportedProgram));
+
+            // Convert managed tensors to IntPtr array
+            IntPtr[] input_handles = new IntPtr[inputs.Length];
+            for (int i = 0; i < inputs.Length; i++)
+            {
+                input_handles[i] = inputs[i].Handle;
+            }
+
+            // Call native run method
+            THSExport_Module_run(handle, input_handles, inputs.Length, out IntPtr result_ptr, out long result_length);
+            torch.CheckForErrors();
+
+            // Marshal result array
-            // Marshal result array
+            // Marshal result array
+            if (result_length < 0 || result_length > int.MaxValue)
+            {
+                throw new InvalidOperationException(
+                    $"Native export run returned an out-of-range result length: {result_length}.");
+            }
-            // Marshal result array
+            // Marshal result array
+            if (result_length < 0 || result_length > int.MaxValue)
+            {
+                throw new InvalidOperationException(
+                    $"Native export run returned an out-of-range result length: {result_length}.");
+            }
+            if (result_length < 0 || result_length > int.MaxValue)
+                throw new InvalidOperationException(
+                    $"Native export run returned an out-of-range result length: {result_length}.");
+
+            int count = (int)result_length;
+            torch.Tensor[] results = new torch.Tensor[count];
+            IntPtr[] result_handles = new IntPtr[count];
+
+            try
+            {
+                Marshal.Copy(result_ptr, result_handles, 0, count);
+
+                for (int i = 0; i < count; i++)
+                {
+                    results[i] = new torch.Tensor(result_handles[i]);
+                }
+            }
+            finally
+            {
+                // Free the native array (tensors are now owned by managed Tensor objects)
+                THSExport_Module_run_free_results(result_ptr);
+            }
+
+            return results;
+        }
+
+        /// <summary>
+        /// Synonym for run() - executes forward pass.
+        /// </summary>
+        public torch.Tensor[] forward(params torch.Tensor[] inputs) => run(inputs);
+
+        /// <summary>
+        /// Synonym for run() - executes the model.
+        /// </summary>
+        public torch.Tensor[] call(params torch.Tensor[] inputs) => run(inputs);
+
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+
+        protected virtual void Dispose(bool disposing)
+        {
+            if (!_disposed)
+            {
+                if (handle != IntPtr.Zero)
+                {
+                    THSExport_Module_dispose(handle);
+                    handle = IntPtr.Zero;
+                }
+                _disposed = true;
+            }
+        }
+
+        ~ExportedProgram()
+        {
+            Dispose(false);
+        }
+    }
+
+    /// <summary>
+    /// Generic version of ExportedProgram with typed output.
+    /// </summary>
+    /// <typeparam name="TResult">The return type (Tensor, Tensor[], or tuple of Tensors)</typeparam>
+    public class ExportedProgram<TResult> : ExportedProgram
+    {
+        internal ExportedProgram(string filename) : base(filename)
+        {
+        }
+
+        /// <summary>
+        /// Run inference with typed return value.
+        /// </summary>
+        public new TResult run(params torch.Tensor[] inputs)
+        {
+            var results = base.run(inputs);
+
+            // Handle different return types
+            if (typeof(TResult) == typeof(torch.Tensor))
+            {
+                if (results.Length != 1)
+                    throw new InvalidOperationException($"Expected 1 output tensor, got {results.Length}");
+                return (TResult)(object)results[0];
+            }
+
+            if (typeof(TResult) == typeof(torch.Tensor[]))
+            {
+                return (TResult)(object)results;
+            }
+
+            // Handle tuple types
+            if (typeof(TResult).IsGenericType)
+            {
+                var resultType = typeof(TResult);
+                var genericType = resultType.GetGenericTypeDefinition();
+
+                if (genericType == typeof(ValueTuple<,>))
+                {
+                    var typeArgs = resultType.GetGenericArguments();
+                    if (typeArgs[0] != typeof(torch.Tensor) || typeArgs[1] != typeof(torch.Tensor))
+                        throw new NotSupportedException(
+                            $"Tuple return type {resultType} is not supported. Only ValueTuple<Tensor, Tensor> is supported.");
+
+                    if (results.Length != 2)
+                        throw new InvalidOperationException($"Expected 2 output tensors, got {results.Length}");
+                    return (TResult)Activator.CreateInstance(resultType, results[0], results[1]);
+                }
+
+                if (genericType == typeof(ValueTuple<,,>))
+                {
+                    var typeArgs = resultType.GetGenericArguments();
+                    if (typeArgs[0] != typeof(torch.Tensor) || typeArgs[1] != typeof(torch.Tensor) || typeArgs[2] != typeof(torch.Tensor))
+                        throw new NotSupportedException(
+                            $"Tuple return type {resultType} is not supported. Only ValueTuple<Tensor, Tensor, Tensor> is supported.");
+
+                    if (results.Length != 3)
+                        throw new InvalidOperationException($"Expected 3 output tensors, got {results.Length}");
+                    return (TResult)Activator.CreateInstance(resultType, results[0], results[1], results[2]);
+                }
+            }
+
+            throw new NotSupportedException($"Return type {typeof(TResult)} is not supported");
+        }
+
+        public new TResult forward(params torch.Tensor[] inputs) => run(inputs);
+        public new TResult call(params torch.Tensor[] inputs) => run(inputs);
+    }
+}