Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ __Bug Fixes__:

__API Changes__:

#1498 Add support for torch.export ExportedProgram models (.pt2 files)<br/>
#1503 Add ReadOnlySpan overloads to many methods.<br/>
#1478 Fix `torch.jit.ScriptModule.zero_grad`.<br/>
#1495 Make `torchvision.io.read_image` and `torchvision.io.read_image_async` allow subsequent opening of the file for reading.<br/>
Expand Down
2 changes: 2 additions & 0 deletions src/Native/LibTorchSharp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ set(SOURCES
crc32c.h
THSAutograd.h
THSData.h
THSExport.h
THSJIT.h
THSNN.h
THSStorage.h
Expand All @@ -23,6 +24,7 @@ set(SOURCES
THSActivation.cpp
THSAutograd.cpp
THSData.cpp
THSExport.cpp
THSFFT.cpp
THSJIT.cpp
THSLinearAlgebra.cpp
Expand Down
63 changes: 63 additions & 0 deletions src/Native/LibTorchSharp/THSExport.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
#include "THSExport.h"

// torch.export support via AOTInductor
// This uses torch::inductor::AOTIModelPackageLoader which is INFERENCE-ONLY
// Models must be compiled with torch._inductor.aoti_compile_and_package() in Python

ExportedProgramModule THSExport_load(const char* filename)
{
CATCH(
// Load .pt2 file using AOTIModelPackageLoader
// This requires models to be compiled with aoti_compile_and_package()
auto* loader = new torch::inductor::AOTIModelPackageLoader(filename);
return loader;
);

return nullptr;
}

void THSExport_Module_dispose(const ExportedProgramModule module)
{
delete module;
}

void THSExport_Module_run(
const ExportedProgramModule module,
const Tensor* input_tensors,
const int input_length,
Tensor** result_tensors,
int64_t* result_length)
{
*result_tensors = nullptr;
*result_length = 0;

CATCH(
// Convert input tensor pointers to std::vector<torch::Tensor>
std::vector<torch::Tensor> inputs;
inputs.reserve(input_length);
for (int i = 0; i < input_length; i++) {
inputs.push_back(*input_tensors[i]);
}

// Run inference
std::vector<torch::Tensor> outputs = module->run(inputs);

// Allocate output array and copy results
auto count = outputs.size();
auto* tensors = new Tensor[count];

Comment on lines +46 to +49
Copy link

Copilot AI Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The returned pointer array is allocated with new Tensor[outputs.size()] but there is no corresponding exported API to free it from managed code (and FreeHGlobal is not compatible with new[]). Add an exported free function that delete[]s this array (or switch to a caller-provided allocator callback), and consider using size_t/int64_t for result_length to avoid truncation from outputs.size().

Copilot uses AI. Check for mistakes.
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed both issues. Added THSExport_Module_run_free_results() for proper delete[] cleanup, and changed result_length from int to int64_t to avoid truncation.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed — added THSExport_Module_run_free_results() with delete[], and changed result_length to int64_t.

for (size_t i = 0; i < count; i++) {
tensors[i] = new torch::Tensor(outputs[i]);
}

// Only expose to caller after full success
*result_tensors = tensors;
*result_length = static_cast<int64_t>(count);
);
}

void THSExport_Module_run_free_results(Tensor* result_tensors)
{
delete[] result_tensors;
}
39 changes: 39 additions & 0 deletions src/Native/LibTorchSharp/THSExport.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.
#pragma once

#include "../Stdafx.h"

#include "torch/torch.h"
#include "torch/csrc/inductor/aoti_package/model_package_loader.h"

#include "Utils.h"

// torch.export ExportedProgram module via AOTInductor
// Note: Uses torch::inductor::AOTIModelPackageLoader for inference-only execution
typedef torch::inductor::AOTIModelPackageLoader* ExportedProgramModule;

// torch.export support via AOTInductor - Load and execute PyTorch ExportedProgram models (.pt2 files)
// ExportedProgram is PyTorch 2.x's recommended way to export models for production deployment
//
// IMPORTANT: This implementation uses torch::inductor::AOTIModelPackageLoader which is
// INFERENCE-ONLY. Training, parameter updates, and device movement are not supported.
// Models must be compiled with torch._inductor.aoti_compile_and_package() in Python.

// Load an AOTInductor-compiled model package from a .pt2 file
EXPORT_API(ExportedProgramModule) THSExport_load(const char* filename);

// Dispose of an ExportedProgram module
EXPORT_API(void) THSExport_Module_dispose(const ExportedProgramModule module);

// Execute the ExportedProgram's forward method (inference only)
// Input: Array of tensors
// Output: Array of result tensors (caller must free with THSExport_Module_run_free_results)
EXPORT_API(void) THSExport_Module_run(
const ExportedProgramModule module,
const Tensor* input_tensors,
const int input_length,
Tensor** result_tensors,
int64_t* result_length);

// Free the result tensor array allocated by THSExport_Module_run
EXPORT_API(void) THSExport_Module_run_free_results(Tensor* result_tensors);
238 changes: 238 additions & 0 deletions src/TorchSharp/Export/ExportedProgram.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
// Copyright (c) .NET Foundation and Contributors. All Rights Reserved. See LICENSE in the project root for license information.

using System;
using System.Runtime.InteropServices;
using static TorchSharp.PInvoke.NativeMethods;

namespace TorchSharp
{
public static partial class torch
{
public static partial class export
{
/// <summary>
/// Load a PyTorch ExportedProgram from a .pt2 file compiled with AOTInductor.
/// </summary>
/// <param name="filename">Path to the .pt2 file</param>
/// <returns>ExportedProgram model for inference</returns>
/// <remarks>
/// IMPORTANT: The .pt2 file must be compiled with torch._inductor.aoti_compile_and_package() in Python.
/// Models saved with torch.export.save() alone will NOT work - they require AOTInductor compilation.
///
/// This implementation is INFERENCE-ONLY. Training, parameter updates, and device movement
/// are not supported. The model is compiled for a specific device (CPU/CUDA) at compile time.
///
/// Example Python code to create compatible .pt2 files:
/// <code>
/// import torch
/// import torch._inductor
///
/// # Export the model
/// exported = torch.export.export(model, example_inputs)
///
/// # Compile with AOTInductor (required for C++ loading)
/// torch._inductor.aoti_compile_and_package(
/// exported,
/// package_path="model.pt2"
/// )
/// </code>
/// </remarks>
public static ExportedProgram load(string filename)
{
return new ExportedProgram(filename);
}

/// <summary>
/// Load a PyTorch ExportedProgram with typed output.
/// </summary>
public static ExportedProgram<TResult> load<TResult>(string filename)
{
return new ExportedProgram<TResult>(filename);
}
}
}

/// <summary>
/// Represents a PyTorch ExportedProgram loaded from an AOTInductor-compiled .pt2 file.
/// This is an INFERENCE-ONLY implementation - training and parameter updates are not supported.
/// </summary>
/// <remarks>
/// Unlike TorchScript models, ExportedProgram models are ahead-of-time (AOT) compiled for
/// a specific device and are optimized for inference performance. They provide 30-40% better
/// latency compared to TorchScript in many cases.
///
/// Key limitations:
/// - Inference only (no training, no gradients)
/// - No parameter access or updates
/// - No device movement (compiled for specific device)
/// - No dynamic model structure changes
///
/// Use torch.jit for models that require training or dynamic behavior.
/// </remarks>
public class ExportedProgram : IDisposable
{
private IntPtr handle;
private bool _disposed = false;

internal ExportedProgram(string filename)
{
handle = THSExport_load(filename);
if (handle == IntPtr.Zero)
torch.CheckForErrors();
}

/// <summary>
/// Run inference on the model with the given input tensors.
/// </summary>
/// <param name="inputs">Input tensors for the model</param>
/// <returns>Array of output tensors</returns>
/// <remarks>
/// The number and shapes of inputs must match what the model was exported with.
/// All inputs must be on the same device that the model was compiled for.
/// </remarks>
public torch.Tensor[] run(params torch.Tensor[] inputs)
{
if (_disposed)
throw new ObjectDisposedException(nameof(ExportedProgram));

// Convert managed tensors to IntPtr array
IntPtr[] input_handles = new IntPtr[inputs.Length];
for (int i = 0; i < inputs.Length; i++)
{
input_handles[i] = inputs[i].Handle;
}

// Call native run method
THSExport_Module_run(handle, input_handles, inputs.Length, out IntPtr result_ptr, out long result_length);
torch.CheckForErrors();

// Marshal result array
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

result_length is an Int64 from native code but is cast to int without validation. If a model ever returns more than int.MaxValue outputs, this will overflow and lead to incorrect allocations / marshalling. Add a checked cast or an explicit range check and throw a clear exception when the length is out of range.

Suggested change
// Marshal result array
// Marshal result array
if (result_length < 0 || result_length > int.MaxValue)
{
throw new InvalidOperationException(
$"Native export run returned an out-of-range result length: {result_length}.");
}

Copilot uses AI. Check for mistakes.
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed — added range check before casting.

if (result_length < 0 || result_length > int.MaxValue)
throw new InvalidOperationException(
$"Native export run returned an out-of-range result length: {result_length}.");

int count = (int)result_length;
torch.Tensor[] results = new torch.Tensor[count];
IntPtr[] result_handles = new IntPtr[count];

try
{
Marshal.Copy(result_ptr, result_handles, 0, count);

for (int i = 0; i < count; i++)
{
results[i] = new torch.Tensor(result_handles[i]);
}
}
finally
{
// Free the native array (tensors are now owned by managed Tensor objects)
THSExport_Module_run_free_results(result_ptr);
}

return results;
}

/// <summary>
/// Synonym for run() - executes forward pass.
/// </summary>
public torch.Tensor[] forward(params torch.Tensor[] inputs) => run(inputs);

/// <summary>
/// Synonym for run() - executes the model.
/// </summary>
public torch.Tensor[] call(params torch.Tensor[] inputs) => run(inputs);

public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}

protected virtual void Dispose(bool disposing)
{
if (!_disposed)
{
if (handle != IntPtr.Zero)
{
THSExport_Module_dispose(handle);
handle = IntPtr.Zero;
}
_disposed = true;
}
}

~ExportedProgram()
{
Dispose(false);
}
}

/// <summary>
/// Generic version of ExportedProgram with typed output.
/// </summary>
/// <typeparam name="TResult">The return type (Tensor, Tensor[], or tuple of Tensors)</typeparam>
public class ExportedProgram<TResult> : ExportedProgram
{
internal ExportedProgram(string filename) : base(filename)
{
}

/// <summary>
/// Run inference with typed return value.
/// </summary>
public new TResult run(params torch.Tensor[] inputs)
{
var results = base.run(inputs);

// Handle different return types
if (typeof(TResult) == typeof(torch.Tensor))
{
if (results.Length != 1)
throw new InvalidOperationException($"Expected 1 output tensor, got {results.Length}");
return (TResult)(object)results[0];
}

if (typeof(TResult) == typeof(torch.Tensor[]))
{
return (TResult)(object)results;
}

// Handle tuple types
if (typeof(TResult).IsGenericType)
{
var resultType = typeof(TResult);
var genericType = resultType.GetGenericTypeDefinition();

if (genericType == typeof(ValueTuple<,>))
{
var typeArgs = resultType.GetGenericArguments();
if (typeArgs[0] != typeof(torch.Tensor) || typeArgs[1] != typeof(torch.Tensor))
throw new NotSupportedException(
$"Tuple return type {resultType} is not supported. Only ValueTuple<Tensor, Tensor> is supported.");

if (results.Length != 2)
throw new InvalidOperationException($"Expected 2 output tensors, got {results.Length}");
return (TResult)Activator.CreateInstance(resultType, results[0], results[1]);
}

if (genericType == typeof(ValueTuple<,,>))
{
var typeArgs = resultType.GetGenericArguments();
if (typeArgs[0] != typeof(torch.Tensor) || typeArgs[1] != typeof(torch.Tensor) || typeArgs[2] != typeof(torch.Tensor))
throw new NotSupportedException(
$"Tuple return type {resultType} is not supported. Only ValueTuple<Tensor, Tensor, Tensor> is supported.");

if (results.Length != 3)
throw new InvalidOperationException($"Expected 3 output tensors, got {results.Length}");
return (TResult)Activator.CreateInstance(resultType, results[0], results[1], results[2]);
}
}

throw new NotSupportedException($"Return type {typeof(TResult)} is not supported");
}

public new TResult forward(params torch.Tensor[] inputs) => run(inputs);
public new TResult call(params torch.Tensor[] inputs) => run(inputs);
}
}
Loading