diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 9794447..ca85094 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -186,6 +186,82 @@ jobs:
           '
     continue-on-error: true  # Free-threading is experimental
 
+  # Sanitizer builds for detecting memory issues and race conditions
+  test-sanitizers:
+    name: ${{ matrix.sanitizer }} / Python ${{ matrix.python }}
+    runs-on: ubuntu-24.04
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # ASan + UBSan with Python 3.12
+          - sanitizer: "ASan+UBSan"
+            python: "3.12"
+            cmake_flags: "-DENABLE_ASAN=ON -DENABLE_UBSAN=ON"
+            env_vars: "ASAN_OPTIONS=detect_leaks=1:abort_on_error=1"
+          # ASan + UBSan with Python 3.13
+          - sanitizer: "ASan+UBSan"
+            python: "3.13"
+            cmake_flags: "-DENABLE_ASAN=ON -DENABLE_UBSAN=ON"
+            env_vars: "ASAN_OPTIONS=detect_leaks=1:abort_on_error=1"
+          # TSan with Python 3.12 (separate because incompatible with ASan)
+          - sanitizer: "TSan"
+            python: "3.12"
+            cmake_flags: "-DENABLE_TSAN=ON"
+            env_vars: "TSAN_OPTIONS=second_deadlock_stack=1"
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+
+      - name: Set up Erlang
+        uses: erlef/setup-beam@v1
+        with:
+          otp-version: "27.0"
+          rebar3-version: "3.24"
+
+      - name: Install dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y cmake
+
+      - name: Set Python library path
+        run: |
+          PYTHON_LIB=$(python3 -c "import sysconfig; print(sysconfig.get_config_var('LIBDIR'))")
+          echo "LD_LIBRARY_PATH=${PYTHON_LIB}:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
+
+      - name: Clean and compile with sanitizers
+        run: |
+          rm -rf _build/cmake
+          mkdir -p _build/cmake
+          cd _build/cmake
+          cmake ../../c_src ${{ matrix.cmake_flags }}
+          cmake --build . -- -j $(nproc)
+          cd ../..
+          rebar3 compile
+
+      - name: Run tests with sanitizers
+        env:
+          ASAN_OPTIONS: ${{ contains(matrix.env_vars, 'ASAN_OPTIONS') && 'detect_leaks=1:abort_on_error=1' || '' }}
+          TSAN_OPTIONS: ${{ contains(matrix.env_vars, 'TSAN_OPTIONS') && 'second_deadlock_stack=1' || '' }}
+        run: |
+          rebar3 ct --readable=compact
+
+      - name: Check debug counters
+        run: |
+          erl -pa _build/default/lib/erlang_python/ebin -noshell -eval '
+            application:ensure_all_started(erlang_python),
+            Counters = py_nif:get_debug_counters(),
+            io:format("Debug counters: ~p~n", [Counters]),
+            halt().
+          '
+
   lint:
     name: Lint
     runs-on: ubuntu-24.04
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 216f137..a666a9a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,18 @@
 
 ### Added
 
+- **`erlang.reactor` module** - FD-based protocol handling for building custom servers
+  - `reactor.Protocol` - Base class for implementing protocols
+  - `reactor.serve(sock, protocol_factory)` - Serve connections using a protocol
+  - `reactor.run_fd(fd, protocol_factory)` - Handle a single FD with a protocol
+  - Integrates with Erlang's `enif_select` for efficient I/O multiplexing
+  - Zero-copy buffer management for high-throughput scenarios
+
+- **ETF encoding for PIDs and References** - Full Erlang term format support
+  - Erlang PIDs encode/decode properly in ETF binary format
+  - Erlang References encode/decode properly in ETF binary format
+  - Enables proper serialization for distributed Erlang communication
+
 - **PID serialization** - Erlang PIDs now convert to `erlang.Pid` objects in Python
   and back to real PIDs when returned to Erlang. Previously, PIDs fell through to
   `None` (Erlang→Python) or string representation (Python→Erlang).
@@ -16,12 +28,106 @@
   Subclass of `Exception`, so it's catchable with `except Exception` or
   `except erlang.ProcessError`.
 
+- **Audit hook sandbox** - Block dangerous operations when running inside Erlang VM
+  - Uses Python's `sys.addaudithook()` (PEP 578) for low-level blocking
+  - Blocks: `os.fork`, `os.system`, `os.popen`, `os.exec*`, `os.spawn*`, `subprocess.Popen`
+  - Raises `RuntimeError` with clear message about using Erlang ports instead
+  - Automatically installed when `py_event_loop` NIF is available
+
+- **Process-per-context architecture** - Each Python context runs in dedicated process
+  - `py_context_process` - Gen_server managing a single Python context
+  - `py_context_sup` - Supervisor for context processes
+  - `py_context_router` - Routes calls to appropriate context process
+  - Improved isolation between contexts
+  - Better crash recovery and resource management
+
+- **Worker thread pool** - High-throughput Python operations
+  - Configurable pool size for parallel execution
+  - Efficient work distribution across threads
+
+- **`py:contexts_started/0`** - Helper to check if contexts are ready
+
 ### Changed
 
+- **`py:call_async` renamed to `py:cast`** - Follows gen_server convention where
+  `call` is synchronous and `cast` is asynchronous. The semantics are identical,
+  only the name changed.
+
+- **Unified `erlang` Python module** - Consolidated callback and event loop APIs
+  - `erlang.run(coro)` - Run coroutine with ErlangEventLoop (like uvloop.run)
+  - `erlang.new_event_loop()` - Create new ErlangEventLoop instance
+  - `erlang.install()` - Install ErlangEventLoopPolicy (deprecated in 3.12+)
+  - `erlang.EventLoopPolicy` - Alias for ErlangEventLoopPolicy
+  - Removed separate `erlang_asyncio` module - all functionality now in `erlang`
+
+- **Async worker backend replaced with event loop model** - The pthread+usleep
+  polling async workers have been replaced with an event-driven model using
+  `py_event_loop` and `enif_select`:
+  - Removed `py_async_worker.erl` and `py_async_worker_sup.erl`
+  - Removed `py_async_worker_t` and `async_pending_t` structs from C code
+  - Deprecated `async_worker_new`, `async_call`, `async_gather`, `async_stream` NIFs
+  - Added `py_event_loop_pool.erl` for managing event loop-based async execution
+  - Added `py_event_loop:run_async/2` for submitting coroutines to event loops
+  - Added `nif_event_loop_run_async` NIF for direct coroutine submission
+  - Added `_run_and_send` wrapper in Python for result delivery via `erlang.send()`
+  - **Internal change**: `py:async_call/3,4` and `py:await/1,2` API unchanged
+
 - **`SuspensionRequired` base class** - Now inherits from `BaseException` instead
   of `Exception`. This prevents ASGI/WSGI middleware `except Exception` handlers
   from intercepting the suspension control flow used by `erlang.call()`.
 
+- **Per-interpreter isolation in py_event_loop.c** - Removed global state for
+  proper subinterpreter support. Each interpreter now has isolated event loop state.
+
+- **ErlangEventLoopPolicy always returns ErlangEventLoop** - Previously only
+  returned ErlangEventLoop for main thread; now consistent across all threads.
+
+### Removed
+
+- **Context affinity functions** - Removed `py:bind`, `py:unbind`, `py:is_bound`,
+  `py:with_context`, and `py:ctx_*` functions. The new `py_context_router` provides
+  automatic scheduler-affinity routing. For explicit context control, use
+  `py_context_router:bind_context/1` and `py_context:call/5`.
+
+- **Signal handling support** - Removed `add_signal_handler`/`remove_signal_handler`
+  from ErlangEventLoop. Signal handling should be done at the Erlang VM level.
+  Methods now raise `NotImplementedError` with guidance.
+
+- **Subprocess support** - ErlangEventLoop raises `NotImplementedError` for
+  `subprocess_shell` and `subprocess_exec`. Use Erlang ports (`open_port/2`)
+  for subprocess management instead.
+
+### Fixed
+
+- **FD stealing and UDP connected socket issues** - Fixed file descriptor handling
+  for UDP sockets in connected mode
+
+- **Context test expectations** - Updated tests for Python contextvars behavior
+
+- **Unawaited coroutine warnings** - Fixed warnings in test suite
+
+- **Timer scheduling for standalone ErlangEventLoop** - Fixed timer callbacks not
+  firing for loops created outside the main event loop infrastructure
+
+- **Subinterpreter cleanup and thread worker re-registration** - Fixed cleanup
+  issues when subinterpreters are destroyed and recreated
+
+- **Thread worker handlers not re-registering after app restart** - Workers now
+  properly re-register when application restarts
+
+- **Timeout handling** - Improved timeout handling across the codebase
+
+- **Eval locals_term initialization** - Fixed uninitialized variable in eval
+
+- **Two race conditions in worker pool** - Fixed concurrent access issues
+
+### Performance
+
+- **Async coroutine latency reduced from ~10-20ms to <1ms** - The event loop model
+  eliminates pthread polling overhead
+- **Zero CPU usage when idle** - Event-driven instead of usleep-based polling
+- **No extra threads** - Coroutines run on the existing event loop infrastructure
+
 ## 1.8.1 (2026-02-25)
 
 ### Fixed
@@ -102,16 +208,15 @@
 ### Added
 
 - **Shared Router Architecture for Event Loops**
-  - Single `py_event_router` process handles all event loops (both shared and isolated)
+  - Single `py_event_router` process handles all event loops
   - Timer and FD messages include loop identity for correct dispatch
   - Eliminates need for per-loop router processes
   - Handle-based Python C API using PyCapsule for loop references
 
-- **Isolated Event Loops** - Create isolated event loops with `ErlangEventLoop(isolated=True)`
-  - Default (`isolated=False`): uses the shared global loop managed by Erlang
-  - Isolated (`isolated=True`): creates a dedicated loop with its own pending queue
-  - Full asyncio support (timers, FD operations) for both modes
-  - Useful for multi-threaded Python applications where each thread needs its own loop
+- **Per-Loop Capsule Architecture** - Each `ErlangEventLoop` instance has its own isolated capsule
+  - Dedicated pending queue per loop for proper event routing
+  - Full asyncio support (timers, FD operations) with correct loop isolation
+  - Safe for multi-threaded Python applications where each thread needs its own loop
   - See `docs/asyncio.md` for usage and architecture details
 
 ## 1.6.1 (2026-02-22)
diff --git a/README.md b/README.md
index 37282d8..79f1b5c 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,7 @@ Key features:
 - **AI/ML ready** - Examples for embeddings, semantic search, RAG, and LLMs
 - **Logging integration** - Python logging forwarded to Erlang logger
 - **Distributed tracing** - Span-based tracing from Python code
+- **Security sandbox** - Blocks fork/exec operations that would corrupt the VM
 
 ## Requirements
 
@@ -66,7 +67,7 @@ application:ensure_all_started(erlang_python).
 {ok, 25} = py:eval(<<"x * y">>, #{x => 5, y => 5}).
 
 %% Async calls
-Ref = py:call_async(math, factorial, [100]),
+Ref = py:cast(math, factorial, [100]),
 {ok, Result} = py:await(Ref).
 
 %% Streaming from generators
@@ -443,7 +444,7 @@ escript examples/logging_example.erl
 {ok, Result} = py:call(Module, Function, Args, KwArgs, Timeout).
 
 %% Async
-Ref = py:call_async(Module, Function, Args).
+Ref = py:cast(Module, Function, Args).
 {ok, Result} = py:await(Ref).
 {ok, Result} = py:await(Ref, Timeout).
 ```
@@ -573,6 +574,8 @@ py:execution_mode().  %% => free_threaded | subinterp | multi_executor
 - [Threading](docs/threading.md)
 - [Logging and Tracing](docs/logging.md)
 - [Asyncio Event Loop](docs/asyncio.md) - Erlang-native asyncio with TCP/UDP support
+- [Reactor](docs/reactor.md) - FD-based protocol handling
+- [Security](docs/security.md) - Sandbox and blocked operations
 - [Web Frameworks](docs/web-frameworks.md) - ASGI/WSGI integration
 - [Changelog](https://github.com/benoitc/erlang-python/releases)
 
diff --git a/benchmark_results/baseline_20260224_133948.txt.log b/benchmark_results/baseline_20260224_133948.txt.log
new file mode 100644
index 0000000..484ee73
--- /dev/null
+++ b/benchmark_results/baseline_20260224_133948.txt.log
@@ -0,0 +1,10 @@
+Error! Failed to eval: 
+            application:ensure_all_started(erlang_python),
+            Results = py_scalable_io_bench:run_all(),
+            py_scalable_io_bench:save_results(Results, "/Users/benoitc/Projects/erlang-python/benchmark_results/baseline_20260224_133948.txt"),
+            init:stop()
+        
+
+Runtime terminating during boot ({undef,[{py_scalable_io_bench,run_all,[],[]},{erl_eval,do_apply,7,[{file,"erl_eval.erl"},{line,920}]},{erl_eval,expr,6,[{file,"erl_eval.erl"},{line,668}]},{erl_eval,exprs,6,[{file,"erl_eval.erl"},{line,276}]},{init,start_it,1,[]},{init,start_em,1,[]},{init,do_boot,3,[]}]})
+
+Crash dump is being written to: erl_crash.dump...done
diff --git a/benchmark_results/current_20260224_133950.txt.log b/benchmark_results/current_20260224_133950.txt.log
new file mode 100644
index 0000000..75de770
--- /dev/null
+++ b/benchmark_results/current_20260224_133950.txt.log
@@ -0,0 +1,10 @@
+Error! Failed to eval: 
+            application:ensure_all_started(erlang_python),
+            Results = py_scalable_io_bench:run_all(),
+            py_scalable_io_bench:save_results(Results, "/Users/benoitc/Projects/erlang-python/benchmark_results/current_20260224_133950.txt"),
+            init:stop()
+        
+
+Runtime terminating during boot ({undef,[{py_scalable_io_bench,run_all,[],[]},{erl_eval,do_apply,7,[{file,"erl_eval.erl"},{line,920}]},{erl_eval,expr,6,[{file,"erl_eval.erl"},{line,668}]},{erl_eval,exprs,6,[{file,"erl_eval.erl"},{line,276}]},{init,start_it,1,[]},{init,start_em,1,[]},{init,do_boot,3,[]}]})
+
+Crash dump is being written to: erl_crash.dump...done
diff --git a/c_src/CMakeLists.txt b/c_src/CMakeLists.txt
index 9afdff0..c110207 100644
--- a/c_src/CMakeLists.txt
+++ b/c_src/CMakeLists.txt
@@ -50,6 +50,40 @@ endif()
 # Performance build option (for maximum optimization)
 option(PERF_BUILD "Enable aggressive performance optimizations (-O3, LTO, native arch)" OFF)
 
+# ASGI profiling option (for internal timing analysis)
+option(ASGI_PROFILING "Enable ASGI internal profiling" OFF)
+if(ASGI_PROFILING)
+    message(STATUS "ASGI profiling enabled - timing instrumentation active")
+    add_definitions(-DASGI_PROFILING)
+endif()
+
+# Sanitizer options for debugging race conditions and memory issues
+option(ENABLE_ASAN "Enable AddressSanitizer" OFF)
+option(ENABLE_TSAN "Enable ThreadSanitizer" OFF)
+option(ENABLE_UBSAN "Enable UndefinedBehaviorSanitizer" OFF)
+
+if(ENABLE_ASAN)
+    message(STATUS "AddressSanitizer enabled")
+    add_compile_options(-fsanitize=address -fno-omit-frame-pointer -g -O1)
+    add_link_options(-fsanitize=address)
+    # ASan is incompatible with TSan
+    if(ENABLE_TSAN)
+        message(FATAL_ERROR "ASan and TSan cannot be used together")
+    endif()
+endif()
+
+if(ENABLE_TSAN)
+    message(STATUS "ThreadSanitizer enabled")
+    add_compile_options(-fsanitize=thread -fno-omit-frame-pointer -g -O1)
+    add_link_options(-fsanitize=thread)
+endif()
+
+if(ENABLE_UBSAN)
+    message(STATUS "UndefinedBehaviorSanitizer enabled")
+    add_compile_options(-fsanitize=undefined -fno-omit-frame-pointer -g -O1)
+    add_link_options(-fsanitize=undefined)
+endif()
+
 if(PERF_BUILD)
     message(STATUS "Performance build enabled - using aggressive optimizations")
     # Override compiler flags for maximum performance
@@ -63,7 +97,12 @@ include(FindErlang)
 include_directories(${ERLANG_ERTS_INCLUDE_PATH})
 
 # Find Python using CMake's built-in FindPython3
-# Use PYTHON_CONFIG env variable to hint which Python to use
+#
+# To specify a particular Python installation, set PYTHON_CONFIG env variable:
+#   PYTHON_CONFIG=/opt/local/bin/python3.14-config cmake ...
+#
+# CMake will use its default search order otherwise.
+
 if(DEFINED ENV{PYTHON_CONFIG})
     # Extract prefix from python-config for hinting
     execute_process(
@@ -82,6 +121,67 @@ message(STATUS "Python3 include dirs: ${Python3_INCLUDE_DIRS}")
 message(STATUS "Python3 libraries: ${Python3_LIBRARIES}")
 message(STATUS "Python3 library: ${Python3_LIBRARY}")
 
+# Detect Python features for worker pool optimization
+# We use both version checks and compile tests to verify actual API availability
+
+include(CheckCSourceCompiles)
+
+# First check Python version - subinterpreters with OWN_GIL require Python 3.12+
+if(Python3_VERSION VERSION_GREATER_EQUAL "3.12")
+    message(STATUS "Python ${Python3_VERSION} >= 3.12, checking subinterpreter API...")
+
+    # Save and set required variables for compile test
+    set(CMAKE_REQUIRED_INCLUDES ${Python3_INCLUDE_DIRS})
+    set(CMAKE_REQUIRED_LIBRARIES Python3::Python)
+
+    # Clear any cached result to ensure fresh detection
+    unset(HAVE_SUBINTERPRETERS CACHE)
+
+    # Check for subinterpreter API with per-interpreter GIL (PEP 684, Python 3.12+)
+    # This verifies PyInterpreterConfig and PyInterpreterConfig_OWN_GIL are available
+    check_c_source_compiles("
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+int main(void) {
+    PyInterpreterConfig config = {
+        .use_main_obmalloc = 0,
+        .allow_fork = 0,
+        .allow_exec = 0,
+        .allow_threads = 1,
+        .allow_daemon_threads = 0,
+        .check_multi_interp_extensions = 1,
+        .gil = PyInterpreterConfig_OWN_GIL,
+    };
+    (void)config;
+    return 0;
+}
+" HAVE_SUBINTERPRETERS)
+
+    if(HAVE_SUBINTERPRETERS)
+        message(STATUS "Subinterpreter API detected (PyInterpreterConfig_OWN_GIL available)")
+    else()
+        message(STATUS "Subinterpreter API compile test failed, using shared GIL fallback")
+    endif()
+else()
+    message(STATUS "Python ${Python3_VERSION} < 3.12, subinterpreter API not available")
+    set(HAVE_SUBINTERPRETERS FALSE)
+endif()
+
+# Check for free-threaded Python (Python 3.13+ with --disable-gil / nogil build)
+# Free-threaded builds have Py_GIL_DISABLED defined in sysconfig
+execute_process(
+    COMMAND ${Python3_EXECUTABLE} -c "import sysconfig; print('yes' if sysconfig.get_config_var('Py_GIL_DISABLED') else 'no')"
+    OUTPUT_VARIABLE Python3_FREE_THREADED
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    ERROR_QUIET
+)
+if(Python3_FREE_THREADED STREQUAL "yes")
+    set(HAVE_FREE_THREADED TRUE)
+    message(STATUS "Free-threaded Python detected: GIL disabled at runtime")
+else()
+    set(HAVE_FREE_THREADED FALSE)
+endif()
+
 # Create the NIF shared library
 add_library(py_nif MODULE py_nif.c)
 
@@ -97,6 +197,14 @@ elseif(Python3_LIBRARIES)
     message(STATUS "Using Python library path for dlopen: ${Python3_FIRST_LIB}")
 endif()
 
+# Add Python feature compile definitions for worker pool optimization
+if(HAVE_SUBINTERPRETERS)
+    target_compile_definitions(py_nif PRIVATE HAVE_SUBINTERPRETERS=1)
+endif()
+if(HAVE_FREE_THREADED)
+    target_compile_definitions(py_nif PRIVATE HAVE_FREE_THREADED=1)
+endif()
+
 # Set output name
 set_target_properties(py_nif PROPERTIES
     PREFIX ""
diff --git a/c_src/py_asgi.c b/c_src/py_asgi.c
index d96a99a..72ec58e 100644
--- a/c_src/py_asgi.c
+++ b/c_src/py_asgi.c
@@ -50,6 +50,61 @@ static pthread_mutex_t g_interp_state_mutex = PTHREAD_MUTEX_INITIALIZER;
 /* Flag: ASGI subsystem is initialized (not per-interpreter) */
 static bool g_asgi_initialized = false;
 
+/* ============================================================================
+ * Internal Profiling Support
+ * ============================================================================
+ * When ASGI_PROFILING is defined, detailed timing of each phase is collected.
+ * Enable with: -DASGI_PROFILING during compilation
+ */
+#ifdef ASGI_PROFILING
+#include <sys/time.h>
+
+typedef struct {
+    uint64_t count;
+    uint64_t gil_acquire_us;
+    uint64_t string_conv_us;
+    uint64_t module_import_us;
+    uint64_t get_callable_us;
+    uint64_t scope_build_us;
+    uint64_t body_conv_us;
+    uint64_t runner_import_us;
+    uint64_t runner_call_us;
+    uint64_t response_extract_us;
+    uint64_t gil_release_us;
+    uint64_t total_us;
+} asgi_profile_stats_t;
+
+static asgi_profile_stats_t g_asgi_profile = {0};
+static pthread_mutex_t g_asgi_profile_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static inline uint64_t get_time_us(void) {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return (uint64_t)tv.tv_sec * 1000000 + tv.tv_usec;
+}
+
+#define PROF_START() uint64_t _prof_start = get_time_us(), _prof_prev = _prof_start, _prof_now
+#define PROF_MARK(field) do { \
+    _prof_now = get_time_us(); \
+    pthread_mutex_lock(&g_asgi_profile_mutex); \
+    g_asgi_profile.field += (_prof_now - _prof_prev); \
+    pthread_mutex_unlock(&g_asgi_profile_mutex); \
+    _prof_prev = _prof_now; \
+} while(0)
+#define PROF_END() do { \
+    _prof_now = get_time_us(); \
+    pthread_mutex_lock(&g_asgi_profile_mutex); \
+    g_asgi_profile.count++; \
+    g_asgi_profile.total_us += (_prof_now - _prof_start); \
+    pthread_mutex_unlock(&g_asgi_profile_mutex); \
+} while(0)
+
+#else
+#define PROF_START()
+#define PROF_MARK(field)
+#define PROF_END()
+#endif /* ASGI_PROFILING */
+
 /* ASGI-specific Erlang atoms for scope map keys */
 ERL_NIF_TERM ATOM_ASGI_PATH;
 ERL_NIF_TERM ATOM_ASGI_HEADERS;
@@ -2432,16 +2487,29 @@ static PyObject *get_cached_scope(ErlNifEnv *env, ERL_NIF_TERM scope_map) {
  * Output Erlang format: {Status, [{Header, Value}, ...], Body}
  */
 static ERL_NIF_TERM extract_asgi_response(ErlNifEnv *env, PyObject *result) {
-    /* Validate 3-element tuple, fallback to py_to_term if not */
-    if (!PyTuple_Check(result) || PyTuple_Size(result) != 3) {
+    PyObject *py_status = NULL;
+    PyObject *py_headers = NULL;
+    PyObject *py_body = NULL;
+
+    /* Handle both dict format (hornbeam) and tuple format */
+    if (PyDict_Check(result)) {
+        /* Dict format: {'status': int, 'headers': list, 'body': bytes, ...} */
+        py_status = PyDict_GetItemString(result, "status");
+        py_headers = PyDict_GetItemString(result, "headers");
+        py_body = PyDict_GetItemString(result, "body");
+
+        if (py_status == NULL || py_headers == NULL || py_body == NULL) {
+            return py_to_term(env, result);
+        }
+    } else if (PyTuple_Check(result) && PyTuple_Size(result) == 3) {
+        /* Tuple format: (status, headers, body) */
+        py_status = PyTuple_GET_ITEM(result, 0);
+        py_headers = PyTuple_GET_ITEM(result, 1);
+        py_body = PyTuple_GET_ITEM(result, 2);
+    } else {
         return py_to_term(env, result);
     }
 
-    /* Get tuple elements (borrowed references) */
-    PyObject *py_status = PyTuple_GET_ITEM(result, 0);
-    PyObject *py_headers = PyTuple_GET_ITEM(result, 1);
-    PyObject *py_body = PyTuple_GET_ITEM(result, 2);
-
     /* Validate types */
     if (!PyLong_Check(py_status) || !PyList_Check(py_headers) || !PyBytes_Check(py_body)) {
         return py_to_term(env, result);
@@ -2555,6 +2623,8 @@ static ERL_NIF_TERM nif_asgi_build_scope(ErlNifEnv *env, int argc, const ERL_NIF
 }
 
 static ERL_NIF_TERM nif_asgi_run(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    PROF_START();
+
     if (argc < 5) {
         return make_error(env, "badarg");
     }
@@ -2580,6 +2650,7 @@ static ERL_NIF_TERM nif_asgi_run(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
     }
 
     PyGILState_STATE gstate = PyGILState_Ensure();
+    PROF_MARK(gil_acquire_us);
 
     ERL_NIF_TERM result;
 
@@ -2594,6 +2665,7 @@ static ERL_NIF_TERM nif_asgi_run(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
         PyGILState_Release(gstate);
         return make_error(env, "alloc_failed");
     }
+    PROF_MARK(string_conv_us);
 
     /* Import module */
     PyObject *module = PyImport_ImportModule(module_name);
@@ -2601,6 +2673,7 @@ static ERL_NIF_TERM nif_asgi_run(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
         result = make_py_error(env);
         goto cleanup;
     }
+    PROF_MARK(module_import_us);
 
     /* Get ASGI callable */
     PyObject *asgi_app = PyObject_GetAttrString(module, callable_name);
@@ -2609,6 +2682,7 @@ static ERL_NIF_TERM nif_asgi_run(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
         result = make_py_error(env);
         goto cleanup;
     }
+    PROF_MARK(get_callable_us);
 
     /* Build optimized scope dict from Erlang map (with caching) */
     PyObject *scope = get_cached_scope(env, argv[3]);
@@ -2617,6 +2691,7 @@ static ERL_NIF_TERM nif_asgi_run(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
         result = make_py_error(env);
         goto cleanup;
     }
+    PROF_MARK(scope_build_us);
 
     /* Convert body binary */
     PyObject *body = asgi_binary_to_buffer(env, argv[4]);
@@ -2626,6 +2701,7 @@ static ERL_NIF_TERM nif_asgi_run(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
         result = make_py_error(env);
         goto cleanup;
     }
+    PROF_MARK(body_conv_us);
 
     /* Import the ASGI runner module */
     PyObject *runner_module = PyImport_ImportModule(runner_name);
@@ -2653,6 +2729,7 @@ static ERL_NIF_TERM nif_asgi_run(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
         result = make_error(env, "runner_module_required");
         goto cleanup;
     }
+    PROF_MARK(runner_import_us);
 
     /* Call _run_asgi_sync(module_name, callable_name, scope, body) */
     PyObject *run_result = PyObject_CallMethod(
@@ -2663,6 +2740,7 @@ static ERL_NIF_TERM nif_asgi_run(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
     Py_DECREF(body);
     Py_DECREF(scope);
     Py_DECREF(asgi_app);
+    PROF_MARK(runner_call_us);
 
     if (run_result == NULL) {
         result = make_py_error(env);
@@ -2672,6 +2750,7 @@ static ERL_NIF_TERM nif_asgi_run(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
     /* Convert result to Erlang term using optimized extraction */
     ERL_NIF_TERM term_result = extract_asgi_response(env, run_result);
     Py_DECREF(run_result);
+    PROF_MARK(response_extract_us);
 
     result = enif_make_tuple2(env, ATOM_OK, term_result);
 
@@ -2680,6 +2759,86 @@ static ERL_NIF_TERM nif_asgi_run(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
     enif_free(module_name);
     enif_free(callable_name);
     PyGILState_Release(gstate);
+    PROF_MARK(gil_release_us);
+    PROF_END();
 
     return result;
 }
+
+#ifdef ASGI_PROFILING
+/**
+ * @brief Get ASGI profiling statistics
+ * @return Map with timing breakdown
+ */
+static ERL_NIF_TERM nif_asgi_profile_stats(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+
+    pthread_mutex_lock(&g_asgi_profile_mutex);
+    asgi_profile_stats_t stats = g_asgi_profile;
+    pthread_mutex_unlock(&g_asgi_profile_mutex);
+
+    if (stats.count == 0) {
+        return enif_make_tuple2(env, ATOM_OK,
+            enif_make_new_map(env));
+    }
+
+    ERL_NIF_TERM keys[12], values[12];
+    int i = 0;
+
+    keys[i] = enif_make_atom(env, "count");
+    values[i++] = enif_make_uint64(env, stats.count);
+
+    keys[i] = enif_make_atom(env, "gil_acquire_us");
+    values[i++] = enif_make_uint64(env, stats.gil_acquire_us);
+
+    keys[i] = enif_make_atom(env, "string_conv_us");
+    values[i++] = enif_make_uint64(env, stats.string_conv_us);
+
+    keys[i] = enif_make_atom(env, "module_import_us");
+    values[i++] = enif_make_uint64(env, stats.module_import_us);
+
+    keys[i] = enif_make_atom(env, "get_callable_us");
+    values[i++] = enif_make_uint64(env, stats.get_callable_us);
+
+    keys[i] = enif_make_atom(env, "scope_build_us");
+    values[i++] = enif_make_uint64(env, stats.scope_build_us);
+
+    keys[i] = enif_make_atom(env, "body_conv_us");
+    values[i++] = enif_make_uint64(env, stats.body_conv_us);
+
+    keys[i] = enif_make_atom(env, "runner_import_us");
+    values[i++] = enif_make_uint64(env, stats.runner_import_us);
+
+    keys[i] = enif_make_atom(env, "runner_call_us");
+    values[i++] = enif_make_uint64(env, stats.runner_call_us);
+
+    keys[i] = enif_make_atom(env, "response_extract_us");
+    values[i++] = enif_make_uint64(env, stats.response_extract_us);
+
+    keys[i] = enif_make_atom(env, "gil_release_us");
+    values[i++] = enif_make_uint64(env, stats.gil_release_us);
+
+    keys[i] = enif_make_atom(env, "total_us");
+    values[i++] = enif_make_uint64(env, stats.total_us);
+
+    ERL_NIF_TERM map;
+    enif_make_map_from_arrays(env, keys, values, i, &map);
+
+    return enif_make_tuple2(env, ATOM_OK, map);
+}
+
+/**
+ * @brief Reset ASGI profiling statistics
+ */
+static ERL_NIF_TERM nif_asgi_profile_reset(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+
+    pthread_mutex_lock(&g_asgi_profile_mutex);
+    memset(&g_asgi_profile, 0, sizeof(g_asgi_profile));
+    pthread_mutex_unlock(&g_asgi_profile_mutex);
+
+    return ATOM_OK;
+}
+#endif /* ASGI_PROFILING */
diff --git a/c_src/py_callback.c b/c_src/py_callback.c
index b1179d5..3195647 100644
--- a/c_src/py_callback.c
+++ b/c_src/py_callback.c
@@ -592,6 +592,495 @@ static ERL_NIF_TERM build_suspended_result(ErlNifEnv *env, suspended_state_t *su
         enif_make_tuple2(env, func_name_term, args_term));
 }
 
+/* ============================================================================
+ * Context suspension helpers (for process-per-context architecture)
+ *
+ * These functions handle suspension/resume for py_context_t-based execution.
+ * Unlike worker suspension, context suspension doesn't use mutex or condvar -
+ * the context process handles callbacks inline via recursive receive.
+ * ============================================================================ */
+
+/**
+ * Create a suspended context state for a py:call.
+ *
+ * Called when Python code in a context calls erlang.call() and suspension
+ * is required. Captures all state needed to resume after callback completes.
+ *
+ * @param env NIF environment
+ * @param ctx Context executing the Python code
+ * @param module_bin Original module binary
+ * @param func_bin Original function binary
+ * @param args_term Original args term
+ * @param kwargs_term Original kwargs term
+ * @return suspended_context_state_t* or NULL on error
+ */
+static suspended_context_state_t *create_suspended_context_state_for_call(
+    ErlNifEnv *env,
+    py_context_t *ctx,
+    ErlNifBinary *module_bin,
+    ErlNifBinary *func_bin,
+    ERL_NIF_TERM args_term,
+    ERL_NIF_TERM kwargs_term) {
+
+    /* Allocate the suspended context state resource */
+    suspended_context_state_t *state = enif_alloc_resource(
+        PY_CONTEXT_SUSPENDED_RESOURCE_TYPE, sizeof(suspended_context_state_t));
+    if (state == NULL) {
+        return NULL;
+    }
+
+    /* Initialize to zero */
+    memset(state, 0, sizeof(suspended_context_state_t));
+
+    state->ctx = ctx;
+    enif_keep_resource(ctx);  /* Keep ctx alive while suspended state exists */
+    state->callback_id = tl_pending_callback_id;
+    state->request_type = PY_REQ_CALL;
+
+    /* Copy callback function name */
+    state->callback_func_name = enif_alloc(tl_pending_func_name_len + 1);
+    if (state->callback_func_name == NULL) {
+        enif_release_resource(state);
+        return NULL;
+    }
+    memcpy(state->callback_func_name, tl_pending_func_name, tl_pending_func_name_len);
+    state->callback_func_name[tl_pending_func_name_len] = '\0';
+    state->callback_func_len = tl_pending_func_name_len;
+
+    /* Store callback args reference */
+    Py_INCREF(tl_pending_args);
+    state->callback_args = tl_pending_args;
+
+    /* Create environment to hold copied terms */
+    state->orig_env = enif_alloc_env();
+    if (state->orig_env == NULL) {
+        Py_DECREF(state->callback_args);
+        enif_free(state->callback_func_name);
+        enif_release_resource(state);
+        return NULL;
+    }
+
+    /* Copy module binary */
+    if (!enif_alloc_binary(module_bin->size, &state->orig_module)) {
+        enif_free_env(state->orig_env);
+        Py_DECREF(state->callback_args);
+        enif_free(state->callback_func_name);
+        enif_release_resource(state);
+        return NULL;
+    }
+    memcpy(state->orig_module.data, module_bin->data, module_bin->size);
+
+    /* Copy function binary */
+    if (!enif_alloc_binary(func_bin->size, &state->orig_func)) {
+        enif_release_binary(&state->orig_module);
+        enif_free_env(state->orig_env);
+        Py_DECREF(state->callback_args);
+        enif_free(state->callback_func_name);
+        enif_release_resource(state);
+        return NULL;
+    }
+    memcpy(state->orig_func.data, func_bin->data, func_bin->size);
+
+    /* Copy args and kwargs to our environment */
+    state->orig_args = enif_make_copy(state->orig_env, args_term);
+    state->orig_kwargs = enif_make_copy(state->orig_env, kwargs_term);
+
+    return state;
+}
+
+/**
+ * Create a suspended context state for a py:eval.
+ *
+ * Called when Python code in a context calls erlang.call() during eval
+ * and suspension is required.
+ *
+ * @param env NIF environment
+ * @param ctx Context executing the Python code
+ * @param code_bin Original code binary
+ * @param locals_term Original locals term
+ * @return suspended_context_state_t* or NULL on error
+ */
+static suspended_context_state_t *create_suspended_context_state_for_eval(
+    ErlNifEnv *env,
+    py_context_t *ctx,
+    ErlNifBinary *code_bin,
+    ERL_NIF_TERM locals_term) {
+
+    (void)env;
+
+    /* Allocate the suspended context state resource */
+    suspended_context_state_t *state = enif_alloc_resource(
+        PY_CONTEXT_SUSPENDED_RESOURCE_TYPE, sizeof(suspended_context_state_t));
+    if (state == NULL) {
+        return NULL;
+    }
+
+    /* Initialize to zero */
+    memset(state, 0, sizeof(suspended_context_state_t));
+
+    state->ctx = ctx;
+    enif_keep_resource(ctx);  /* Keep ctx alive while suspended state exists */
+    state->callback_id = tl_pending_callback_id;
+    state->request_type = PY_REQ_EVAL;
+
+    /* Copy callback function name */
+    state->callback_func_name = enif_alloc(tl_pending_func_name_len + 1);
+    if (state->callback_func_name == NULL) {
+        enif_release_resource(state);
+        return NULL;
+    }
+    memcpy(state->callback_func_name, tl_pending_func_name, tl_pending_func_name_len);
+    state->callback_func_name[tl_pending_func_name_len] = '\0';
+    state->callback_func_len = tl_pending_func_name_len;
+
+    /* Store callback args reference */
+    Py_INCREF(tl_pending_args);
+    state->callback_args = tl_pending_args;
+
+    /* Create environment to hold copied terms */
+    state->orig_env = enif_alloc_env();
+    if (state->orig_env == NULL) {
+        Py_DECREF(state->callback_args);
+        enif_free(state->callback_func_name);
+        enif_release_resource(state);
+        return NULL;
+    }
+
+    /* Copy code binary */
+    if (!enif_alloc_binary(code_bin->size, &state->orig_code)) {
+        enif_free_env(state->orig_env);
+        Py_DECREF(state->callback_args);
+        enif_free(state->callback_func_name);
+        enif_release_resource(state);
+        return NULL;
+    }
+    memcpy(state->orig_code.data, code_bin->data, code_bin->size);
+
+    /* Copy locals to our environment */
+    state->orig_locals = enif_make_copy(state->orig_env, locals_term);
+
+    return state;
+}
+
+/**
+ * Build the {suspended, ...} result term from a suspended context state.
+ *
+ * @param env NIF environment
+ * @param suspended Suspended context state (resource will be released)
+ * @return ERL_NIF_TERM {suspended, CallbackId, StateRef, {FuncName, Args}}
+ * @note Clears tl_pending_callback
+ */
+static ERL_NIF_TERM build_suspended_context_result(ErlNifEnv *env, suspended_context_state_t *suspended) {
+    ERL_NIF_TERM state_ref = enif_make_resource(env, suspended);
+    enif_release_resource(suspended);
+
+    ERL_NIF_TERM callback_id_term = enif_make_uint64(env, tl_pending_callback_id);
+
+    ERL_NIF_TERM func_name_term;
+    unsigned char *fn_buf = enif_make_new_binary(env, tl_pending_func_name_len, &func_name_term);
+    memcpy(fn_buf, tl_pending_func_name, tl_pending_func_name_len);
+
+    ERL_NIF_TERM args_term = py_to_term(env, tl_pending_args);
+
+    tl_pending_callback = false;
+
+    return enif_make_tuple4(env,
+        ATOM_SUSPENDED,
+        callback_id_term,
+        state_ref,
+        enif_make_tuple2(env, func_name_term, args_term));
+}
+
+/**
+ * Copy accumulated callback results from parent state to nested state.
+ *
+ * When a sequential callback occurs during replay, the nested suspended state
+ * needs to include all callback results from the parent PLUS the current result.
+ * This function copies parent's callback_results array and adds the parent's
+ * current result (result_data) to the end.
+ *
+ * @param nested The nested suspended state being created
+ * @param parent The parent suspended state (current tl_current_context_suspended)
+ * @return 0 on success, -1 on memory allocation failure
+ */
+static int copy_callback_results_to_nested(suspended_context_state_t *nested,
+                                           suspended_context_state_t *parent) {
+    if (parent == NULL) {
+        /* No parent state - nothing to copy */
+        return 0;
+    }
+
+    /*
+     * Calculate total results needed: parent's array + parent's current result.
+     *
+     * IMPORTANT: We check result_data != NULL instead of has_result because
+     * has_result may have been set to false when the result was consumed
+     * during replay, but the result data is still valid and needs to be
+     * copied to the nested state for subsequent replays.
+     */
+    size_t total_results = parent->num_callback_results;
+    bool has_current_result = (parent->result_data != NULL && parent->result_len > 0);
+    if (has_current_result) {
+        total_results += 1;
+    }
+
+    if (total_results == 0) {
+        /* No results to copy */
+        return 0;
+    }
+
+    /* Allocate results array */
+    nested->callback_results = enif_alloc(total_results * sizeof(nested->callback_results[0]));
+    if (nested->callback_results == NULL) {
+        return -1;
+    }
+    nested->callback_results_capacity = total_results;
+    nested->num_callback_results = total_results;
+    nested->callback_result_index = 0;
+
+    /* Copy parent's accumulated results */
+    for (size_t i = 0; i < parent->num_callback_results; i++) {
+        size_t len = parent->callback_results[i].len;
+        nested->callback_results[i].data = enif_alloc(len);
+        if (nested->callback_results[i].data == NULL) {
+            /* Cleanup on failure */
+            for (size_t j = 0; j < i; j++) {
+                enif_free(nested->callback_results[j].data);
+            }
+            enif_free(nested->callback_results);
+            nested->callback_results = NULL;
+            nested->num_callback_results = 0;
+            nested->callback_results_capacity = 0;
+            return -1;
+        }
+        memcpy(nested->callback_results[i].data, parent->callback_results[i].data, len);
+        nested->callback_results[i].len = len;
+    }
+
+    /* Add parent's current result (result_data) as the last element */
+    if (has_current_result) {
+        size_t idx = parent->num_callback_results;
+        nested->callback_results[idx].data = enif_alloc(parent->result_len);
+        if (nested->callback_results[idx].data == NULL) {
+            /* Cleanup on failure */
+            for (size_t j = 0; j < idx; j++) {
+                enif_free(nested->callback_results[j].data);
+            }
+            enif_free(nested->callback_results);
+            nested->callback_results = NULL;
+            nested->num_callback_results = 0;
+            nested->callback_results_capacity = 0;
+            return -1;
+        }
+        memcpy(nested->callback_results[idx].data, parent->result_data, parent->result_len);
+        nested->callback_results[idx].len = parent->result_len;
+    }
+
+    return 0;
+}
+
+/**
+ * Helper to convert __etf__:base64 strings to Python objects.
+ * Used for encoding pids and references in callback responses.
+ * Returns a NEW reference on success, NULL with exception on error.
+ */
+static PyObject *decode_etf_string(const char *str, Py_ssize_t len) {
+    /* Check for __etf__: prefix (8 chars) */
+    const char *prefix = "__etf__:";
+    size_t prefix_len = 8;
+
+    if (len <= (Py_ssize_t)prefix_len || strncmp(str, prefix, prefix_len) != 0) {
+        return NULL;  /* Not an ETF string */
+    }
+
+    /* Extract base64 portion */
+    const char *b64_data = str + prefix_len;
+    size_t b64_len = len - prefix_len;
+
+    /* Import base64 module and decode */
+    PyObject *base64_mod = PyImport_ImportModule("base64");
+    if (base64_mod == NULL) {
+        PyErr_Clear();
+        return NULL;
+    }
+
+    PyObject *b64decode = PyObject_GetAttrString(base64_mod, "b64decode");
+    Py_DECREF(base64_mod);
+    if (b64decode == NULL) {
+        PyErr_Clear();
+        return NULL;
+    }
+
+    PyObject *b64_str = PyUnicode_FromStringAndSize(b64_data, b64_len);
+    if (b64_str == NULL) {
+        Py_DECREF(b64decode);
+        PyErr_Clear();
+        return NULL;
+    }
+
+    PyObject *decoded = PyObject_CallFunctionObjArgs(b64decode, b64_str, NULL);
+    Py_DECREF(b64decode);
+    Py_DECREF(b64_str);
+
+    if (decoded == NULL) {
+        PyErr_Clear();
+        return NULL;
+    }
+
+    /* Get the binary data */
+    char *bin_data;
+    Py_ssize_t bin_len;
+    if (PyBytes_AsStringAndSize(decoded, &bin_data, &bin_len) < 0) {
+        Py_DECREF(decoded);
+        PyErr_Clear();
+        return NULL;
+    }
+
+    /* Create a temporary NIF environment to decode the term */
+    ErlNifEnv *tmp_env = enif_alloc_env();
+    if (tmp_env == NULL) {
+        Py_DECREF(decoded);
+        return NULL;
+    }
+
+    /* Decode the ETF binary to an Erlang term */
+    ERL_NIF_TERM term;
+    if (enif_binary_to_term(tmp_env, (unsigned char *)bin_data, bin_len, &term, 0) == 0) {
+        /* Decoding failed */
+        enif_free_env(tmp_env);
+        Py_DECREF(decoded);
+        return NULL;
+    }
+
+    Py_DECREF(decoded);
+
+    /* Convert the term to a Python object */
+    PyObject *result = term_to_py(tmp_env, term);
+    enif_free_env(tmp_env);
+
+    return result;
+}
+
+/**
+ * Recursively convert __etf__:base64 strings in a Python object.
+ * Handles nested tuples, lists, and dicts.
+ * Returns a NEW reference with ETF strings converted, or the original object
+ * with its refcount incremented if no conversion was needed.
+ */
+static PyObject *convert_etf_strings(PyObject *obj) {
+    if (obj == NULL) {
+        return NULL;
+    }
+
+    /* Check if it's a string that might be an ETF encoding */
+    if (PyUnicode_Check(obj)) {
+        Py_ssize_t len;
+        const char *str = PyUnicode_AsUTF8AndSize(obj, &len);
+        if (str != NULL && len > 8 && strncmp(str, "__etf__:", 8) == 0) {
+            PyObject *decoded = decode_etf_string(str, len);
+            if (decoded != NULL) {
+                return decoded;  /* Return the decoded object */
+            }
+            /* If decoding failed, fall through and return original */
+        }
+        Py_INCREF(obj);
+        return obj;
+    }
+
+    /* Handle tuples */
+    if (PyTuple_Check(obj)) {
+        Py_ssize_t size = PyTuple_Size(obj);
+        int needs_conversion = 0;
+
+        /* First pass: check if any element needs conversion */
+        for (Py_ssize_t i = 0; i < size; i++) {
+            PyObject *item = PyTuple_GET_ITEM(obj, i);
+            if (PyUnicode_Check(item)) {
+                Py_ssize_t len;
+                const char *str = PyUnicode_AsUTF8AndSize(item, &len);
+                if (str != NULL && len > 8 && strncmp(str, "__etf__:", 8) == 0) {
+                    needs_conversion = 1;
+                    break;
+                }
+            } else if (PyTuple_Check(item) || PyList_Check(item) || PyDict_Check(item)) {
+                needs_conversion = 1;  /* Might need recursive conversion */
+                break;
+            }
+        }
+
+        if (!needs_conversion) {
+            Py_INCREF(obj);
+            return obj;
+        }
+
+        /* Create new tuple with converted elements */
+        PyObject *new_tuple = PyTuple_New(size);
+        if (new_tuple == NULL) {
+            return NULL;
+        }
+
+        for (Py_ssize_t i = 0; i < size; i++) {
+            PyObject *item = PyTuple_GET_ITEM(obj, i);
+            PyObject *converted = convert_etf_strings(item);
+            if (converted == NULL) {
+                Py_DECREF(new_tuple);
+                return NULL;
+            }
+            PyTuple_SET_ITEM(new_tuple, i, converted);  /* Steals reference */
+        }
+        return new_tuple;
+    }
+
+    /* Handle lists */
+    if (PyList_Check(obj)) {
+        Py_ssize_t size = PyList_Size(obj);
+        PyObject *new_list = PyList_New(size);
+        if (new_list == NULL) {
+            return NULL;
+        }
+
+        for (Py_ssize_t i = 0; i < size; i++) {
+            PyObject *item = PyList_GET_ITEM(obj, i);
+            PyObject *converted = convert_etf_strings(item);
+            if (converted == NULL) {
+                Py_DECREF(new_list);
+                return NULL;
+            }
+            PyList_SET_ITEM(new_list, i, converted);  /* Steals reference */
+        }
+        return new_list;
+    }
+
+    /* Handle dicts */
+    if (PyDict_Check(obj)) {
+        PyObject *new_dict = PyDict_New();
+        if (new_dict == NULL) {
+            return NULL;
+        }
+
+        PyObject *key, *value;
+        Py_ssize_t pos = 0;
+        while (PyDict_Next(obj, &pos, &key, &value)) {
+            PyObject *conv_key = convert_etf_strings(key);
+            PyObject *conv_value = convert_etf_strings(value);
+            if (conv_key == NULL || conv_value == NULL) {
+                Py_XDECREF(conv_key);
+                Py_XDECREF(conv_value);
+                Py_DECREF(new_dict);
+                return NULL;
+            }
+            PyDict_SetItem(new_dict, conv_key, conv_value);
+            Py_DECREF(conv_key);
+            Py_DECREF(conv_value);
+        }
+        return new_dict;
+    }
+
+    /* For all other types, just return with incremented refcount */
+    Py_INCREF(obj);
+    return obj;
+}
+
 /**
  * Helper to parse callback response data into a Python object.
  * Response format: status_byte (0=ok, 1=error) + python_repr_string
@@ -618,18 +1107,35 @@ static PyObject *parse_callback_response(unsigned char *response_data, size_t re
 
     PyObject *result = NULL;
     if (status == 0) {
-        /* Try to evaluate the result string as Python literal using cached function */
-        if (g_ast_literal_eval != NULL) {
-            PyObject *arg = PyUnicode_FromStringAndSize(result_str, result_len);
-            if (arg != NULL) {
-                result = PyObject_CallFunctionObjArgs(g_ast_literal_eval, arg, NULL);
-                Py_DECREF(arg);
-                if (result == NULL) {
-                    /* If literal_eval fails, return as string */
-                    PyErr_Clear();
-                    result = PyUnicode_FromStringAndSize(result_str, result_len);
+        /* Try to evaluate the result string as Python literal.
+         * Import ast.literal_eval fresh to support subinterpreters
+         * (the cached g_ast_literal_eval may be from a different interpreter). */
+        PyObject *ast_mod = PyImport_ImportModule("ast");
+        if (ast_mod != NULL) {
+            PyObject *literal_eval = PyObject_GetAttrString(ast_mod, "literal_eval");
+            Py_DECREF(ast_mod);
+            if (literal_eval != NULL) {
+                PyObject *arg = PyUnicode_FromStringAndSize(result_str, result_len);
+                if (arg != NULL) {
+                    result = PyObject_CallFunctionObjArgs(literal_eval, arg, NULL);
+                    Py_DECREF(arg);
+                    if (result == NULL) {
+                        /* If literal_eval fails, return as string */
+                        PyErr_Clear();
+                        result = PyUnicode_FromStringAndSize(result_str, result_len);
+                    } else {
+                        /* Post-process result to convert __etf__: strings to Python objects.
+                         * This handles pids, references, and other Erlang terms that can't
+                         * be represented as Python literals. */
+                        PyObject *converted = convert_etf_strings(result);
+                        Py_DECREF(result);
+                        result = converted;
+                    }
                 }
+                Py_DECREF(literal_eval);
             }
+        } else {
+            PyErr_Clear();
         }
         if (result == NULL) {
             result = PyUnicode_FromStringAndSize(result_str, result_len);
@@ -757,10 +1263,18 @@ static PyObject *erlang_call_impl(PyObject *self, PyObject *args) {
     (void)self;
 
     /*
-     * Check if this is a call from an executor thread (normal path) or
-     * from a spawned thread (thread worker path).
+     * Check if we have a callback handler available.
+     * Priority:
+     * 1. tl_current_context with suspension enabled (new process-per-context API)
+     * 2. tl_current_context with callback_handler (old blocking pipe mode)
+     * 3. tl_current_worker (legacy worker API)
+     * 4. thread_worker_call (spawned threads)
      */
-    if (tl_current_worker == NULL || !tl_current_worker->has_callback_handler) {
+    bool has_context_suspension = (tl_current_context != NULL && tl_allow_suspension);
+    bool has_context_handler = (tl_current_context != NULL && tl_current_context->has_callback_handler);
+    bool has_worker_handler = (tl_current_worker != NULL && tl_current_worker->has_callback_handler);
+
+    if (!has_context_suspension && !has_context_handler && !has_worker_handler) {
         /*
          * Not an executor thread - use thread worker path.
          * This enables any spawned Python thread to call erlang.call():
@@ -831,15 +1345,77 @@ static PyObject *erlang_call_impl(PyObject *self, PyObject *args) {
         }
     }
 
+    /* Check for context-based suspended state with cached results (context replay case) */
+    if (tl_current_context_suspended != NULL) {
+        /*
+         * Sequential callback support:
+         * When replaying Python code with multiple sequential erlang.call()s,
+         * we need to return results in the same order they were executed.
+         * The callback_results array stores results from previous callbacks,
+         * indexed in call order. The has_result field holds the CURRENT callback's
+         * result (the one that triggered this resume).
+         *
+         * Example: f(g(h(x)))
+         * - Replay 1: h(x) suspended, resumed with h_result
+         *   callback_results = [], has_result = h_result
+         *   h(x) returns h_result, g(...) suspends
+         *
+         * - Replay 2: nested state has callback_results = [h_result], has_result = g_result
+         *   h(x) returns callback_results[0] = h_result
+         *   g(...) returns has_result = g_result
+         *   f(...) suspends
+         *
+         * - Replay 3: nested state has callback_results = [h_result, g_result], has_result = f_result
+         *   h(x) returns callback_results[0] = h_result
+         *   g(...) returns callback_results[1] = g_result
+         *   f(...) returns has_result = f_result
+         *   Done!
+         */
+
+        /* First, check if we have a cached result from a PREVIOUS callback */
+        if (tl_current_context_suspended->callback_result_index <
+            tl_current_context_suspended->num_callback_results) {
+            /* Return cached result from previous callback, advance index */
+            size_t idx = tl_current_context_suspended->callback_result_index++;
+            PyObject *result = parse_callback_response(
+                tl_current_context_suspended->callback_results[idx].data,
+                tl_current_context_suspended->callback_results[idx].len);
+            return result;
+        }
+
+        /* Next, check if this is the CURRENT callback (the one that triggered resume) */
+        if (tl_current_context_suspended->has_result) {
+            /* Verify this is the same callback */
+            if (tl_current_context_suspended->callback_func_len == func_name_len &&
+                memcmp(tl_current_context_suspended->callback_func_name, func_name, func_name_len) == 0) {
+                /* Return the current callback result */
+                PyObject *result = parse_callback_response(
+                    tl_current_context_suspended->result_data,
+                    tl_current_context_suspended->result_len);
+                /* Mark result as consumed */
+                tl_current_context_suspended->has_result = false;
+                return result;
+            }
+        }
+        /* If we get here, this is a NEW callback - will suspend below */
+    }
+
     /*
      * FIX for multiple sequential erlang.call():
-     * If we're in replay context (tl_current_suspended != NULL) but didn't get
+     * If we're in WORKER replay context (tl_current_suspended != NULL) but didn't get
      * a cache hit above, this is a SUBSEQUENT call (e.g., second erlang.call()
-     * in the same Python function). We MUST NOT suspend again - that would
-     * cause an infinite loop where replay always hits this second call.
-     * Instead, fall through to blocking pipe behavior for subsequent calls.
+     * in the same Python function). For WORKER mode, the callback handler process
+     * is still running and will handle this via blocking pipe.
+     *
+     * For CONTEXT replay (tl_current_context_suspended != NULL), we CANNOT block
+     * because there's no callback handler process. Instead, we must suspend again
+     * and let the context process handle the subsequent callback. This works because
+     * the context process re-replays from the beginning, and each callback result
+     * is returned via the cached result mechanism on subsequent replays.
      */
     bool force_blocking = (tl_current_suspended != NULL);
+    /* Note: tl_current_context_suspended is NOT included here - context mode
+     * always uses suspension for callbacks, allowing unlimited nesting via replay */
 
     /* Build args list (remaining args) */
     PyObject *call_args = PyTuple_GetSlice(args, 1, nargs);
@@ -885,12 +1461,23 @@ static PyObject *erlang_call_impl(PyObject *self, PyObject *args) {
         uint32_t response_len = 0;
         int read_result;
 
+        /* Get callback handler and pipe from context or worker */
+        ErlNifPid *handler_pid;
+        int read_fd;
+        if (has_context_handler) {
+            handler_pid = &tl_current_context->callback_handler;
+            read_fd = tl_current_context->callback_pipe[0];
+        } else {
+            handler_pid = &tl_current_worker->callback_handler;
+            read_fd = tl_current_worker->callback_pipe[0];
+        }
+
         Py_BEGIN_ALLOW_THREADS
-        enif_send(NULL, &tl_current_worker->callback_handler, msg_env, msg);
+        enif_send(NULL, handler_pid, msg_env, msg);
         enif_free_env(msg_env);
         /* Use 30 second timeout to prevent indefinite blocking */
         read_result = read_length_prefixed_data(
-            tl_current_worker->callback_pipe[0],
+            read_fd,
             &response_data, &response_len, 30000);
         Py_END_ALLOW_THREADS
 
@@ -1722,217 +2309,102 @@ static int create_erlang_module(void) {
         Py_DECREF(log_globals);
     }
 
-    return 0;
-}
-
-/* ============================================================================
- * Asyncio support
- * ============================================================================ */
-
-/**
- * Callback function that gets invoked when a future completes.
- * This is called from within the event loop thread.
- */
-static void async_future_callback(py_async_worker_t *worker, async_pending_t *pending) {
-    ErlNifEnv *msg_env = enif_alloc_env();
-    if (msg_env == NULL) {
-        /* Cannot send result - just log and return */
-        return;
-    }
-    PyObject *py_result = PyObject_CallMethod(pending->future, "result", NULL);
-
-    ERL_NIF_TERM result_term;
-    if (py_result == NULL) {
-        /* Exception occurred */
-        PyObject *exc = PyObject_CallMethod(pending->future, "exception", NULL);
-        if (exc != NULL && exc != Py_None) {
-            PyObject *str = PyObject_Str(exc);
-            const char *err_msg = str ? PyUnicode_AsUTF8(str) : "unknown";
-            result_term = enif_make_tuple2(msg_env, ATOM_ERROR,
-                enif_make_string(msg_env, err_msg, ERL_NIF_LATIN1));
-            Py_XDECREF(str);
-        } else {
-            result_term = enif_make_tuple2(msg_env, ATOM_ERROR,
-                enif_make_atom(msg_env, "unknown"));
-        }
-        Py_XDECREF(exc);
-        PyErr_Clear();
-    } else {
-        result_term = enif_make_tuple2(msg_env, ATOM_OK,
-            py_to_term(msg_env, py_result));
-        Py_DECREF(py_result);
-    }
-
-    /* Send message: {async_result, Id, Result} */
-    ERL_NIF_TERM msg = enif_make_tuple3(msg_env,
-        ATOM_ASYNC_RESULT,
-        enif_make_uint64(msg_env, pending->id),
-        result_term);
-    enif_send(NULL, &pending->caller, msg_env, msg);
-    enif_free_env(msg_env);
-}
-
-/**
- * Background thread running the asyncio event loop.
- * This thread owns the event loop and processes coroutines.
- */
-static void *async_event_loop_thread(void *arg) {
-    py_async_worker_t *worker = (py_async_worker_t *)arg;
-
-    /* Acquire GIL for this thread */
-    PyGILState_STATE gstate = PyGILState_Ensure();
-
-    /* Import asyncio */
-    PyObject *asyncio = PyImport_ImportModule("asyncio");
-    if (asyncio == NULL) {
-        PyErr_Print();
-        PyGILState_Release(gstate);
-        worker->loop_running = false;
-        return NULL;
-    }
-
-    /* Create a default selector event loop directly, bypassing the policy.
-     * Worker threads should NOT use ErlangEventLoop since it requires the
-     * main thread's event router. Using SelectorEventLoop ensures these
-     * background threads have their own independent event loops. */
-    PyObject *selector_loop_class = PyObject_GetAttrString(asyncio, "SelectorEventLoop");
-    PyObject *loop = NULL;
-    if (selector_loop_class != NULL) {
-        loop = PyObject_CallObject(selector_loop_class, NULL);
-        Py_DECREF(selector_loop_class);
-    }
-    if (loop == NULL) {
-        /* Fallback to new_event_loop if SelectorEventLoop not available */
-        PyErr_Clear();
-        loop = PyObject_CallMethod(asyncio, "new_event_loop", NULL);
-    }
-    if (loop == NULL) {
-        PyErr_Print();
-        Py_DECREF(asyncio);
-        PyGILState_Release(gstate);
-        worker->loop_running = false;
-        return NULL;
-    }
-
-    /* Set as current loop for this thread */
-    PyObject *set_result = PyObject_CallMethod(asyncio, "set_event_loop", "O", loop);
-    Py_XDECREF(set_result);
-
-    worker->event_loop = loop;
-    Py_INCREF(loop);  /* Keep extra ref for worker struct */
-
-    Py_DECREF(asyncio);
-
-    worker->loop_running = true;
-
-    /* Run the event loop with proper GIL management */
-    while (!worker->shutdown) {
-        /* Release GIL while sleeping (allow other Python threads to run) */
-        Py_BEGIN_ALLOW_THREADS
-        usleep(10000);  /* 10ms sleep without holding GIL */
-        Py_END_ALLOW_THREADS
-
-        /* Run one iteration of the event loop with GIL held */
-        PyObject *asyncio_mod = PyImport_ImportModule("asyncio");
-        if (asyncio_mod != NULL) {
-            PyObject *sleep_coro = PyObject_CallMethod(asyncio_mod, "sleep", "d", 0.0);
-            if (sleep_coro != NULL) {
-                PyObject *task = PyObject_CallMethod(loop, "create_task", "O", sleep_coro);
-                Py_DECREF(sleep_coro);
-                if (task != NULL) {
-                    PyObject *run_result = PyObject_CallMethod(loop, "run_until_complete", "O", task);
-                    Py_DECREF(task);
-                    Py_XDECREF(run_result);
-                }
-            }
-            Py_DECREF(asyncio_mod);
-        }
-        if (PyErr_Occurred()) {
-            PyErr_Clear();
-        }
+    /* Add helper to extend erlang module with Python package exports.
+     * Called from Erlang after priv_dir is added to sys.path.
+     * Follows uvloop's minimal export pattern.
+     */
+    const char *extend_code =
+        "def _extend_erlang_module(priv_dir):\n"
+        "    '''\n"
+        "    Extend the C erlang module with Python event loop exports.\n"
+        "    \n"
+        "    Called from Erlang after priv_dir is set up in sys.path.\n"
+        "    This allows the C 'erlang' module to also provide:\n"
+        "      - erlang.run()\n"
+        "      - erlang.new_event_loop()\n"
+        "      - erlang.get_event_loop_policy()\n"
+        "      - erlang.install()\n"
+        "      - erlang.EventLoopPolicy\n"
+        "      - erlang.ErlangEventLoop\n"
+        "    \n"
+        "    Args:\n"
+        "        priv_dir: Path to erlang_python priv directory (bytes or str)\n"
+        "    \n"
+        "    Returns:\n"
+        "        True on success, False on failure\n"
+        "    '''\n"
+        "    import sys\n"
+        "    # Handle bytes from Erlang\n"
+        "    if isinstance(priv_dir, bytes):\n"
+        "        priv_dir = priv_dir.decode('utf-8')\n"
+        "    if priv_dir not in sys.path:\n"
+        "        sys.path.insert(0, priv_dir)\n"
+        "    try:\n"
+        "        import _erlang_impl\n"
+        "        import erlang\n"
+        "        # Primary exports (uvloop-compatible)\n"
+        "        erlang.run = _erlang_impl.run\n"
+        "        erlang.new_event_loop = _erlang_impl.new_event_loop\n"
+        "        erlang.ErlangEventLoop = _erlang_impl.ErlangEventLoop\n"
+        "        # Deprecated (Python < 3.16)\n"
+        "        erlang.install = _erlang_impl.install\n"
+        "        erlang.EventLoopPolicy = _erlang_impl.EventLoopPolicy\n"
+        "        erlang.ErlangEventLoopPolicy = _erlang_impl.ErlangEventLoopPolicy\n"
+        "        # Additional exports for compatibility\n"
+        "        erlang.get_event_loop_policy = _erlang_impl.get_event_loop_policy\n"
+        "        erlang.detect_mode = _erlang_impl.detect_mode\n"
+        "        erlang.ExecutionMode = _erlang_impl.ExecutionMode\n"
+        "        # Reactor for fd-based protocol handling\n"
+        "        erlang.reactor = _erlang_impl.reactor\n"
+        "        # Make erlang behave as a package for 'import erlang.reactor' syntax\n"
+        "        erlang.__path__ = [priv_dir]\n"
+        "        sys.modules['erlang.reactor'] = erlang.reactor\n"
+        "        return True\n"
+        "    except ImportError as e:\n"
+        "        import sys\n"
+        "        sys.stderr.write(f'Failed to extend erlang module: {e}\\n')\n"
+        "        return False\n"
+        "\n"
+        "import erlang\n"
+        "erlang._extend_erlang_module = _extend_erlang_module\n";
 
-        /*
-         * Check for completed futures (GIL held).
-         *
-         * IMPORTANT: We must not hold the mutex while calling Python functions
-         * to avoid deadlocks. The pattern is:
-         * 1. Lock mutex, collect completed items, unlock
-         * 2. Process callbacks outside mutex (no contention)
-         * 3. Lock mutex, remove processed items, unlock
-         */
+    PyObject *ext_globals = PyDict_New();
+    if (ext_globals != NULL) {
+        PyObject *builtins = PyEval_GetBuiltins();
+        PyDict_SetItemString(ext_globals, "__builtins__", builtins);
 
-        /* Phase 1: Collect completed futures under mutex */
-        #define MAX_COMPLETED_BATCH 16
-        async_pending_t *completed[MAX_COMPLETED_BATCH];
-        int num_completed = 0;
-
-        pthread_mutex_lock(&worker->queue_mutex);
-        async_pending_t *p = worker->pending_head;
-        while (p != NULL && num_completed < MAX_COMPLETED_BATCH) {
-            if (p->future != NULL) {
-                /* Quick check if future is done (still needs GIL, but mutex held briefly) */
-                PyObject *done = PyObject_CallMethod(p->future, "done", NULL);
-                if (done != NULL && PyObject_IsTrue(done)) {
-                    Py_DECREF(done);
-                    completed[num_completed++] = p;
-                } else {
-                    Py_XDECREF(done);
-                }
+        /* Import erlang module into globals so the code can reference it */
+        PyObject *sys_modules = PySys_GetObject("modules");
+        if (sys_modules != NULL) {
+            PyObject *erlang_mod = PyDict_GetItemString(sys_modules, "erlang");
+            if (erlang_mod != NULL) {
+                PyDict_SetItemString(ext_globals, "erlang", erlang_mod);
             }
-            p = p->next;
-        }
-        pthread_mutex_unlock(&worker->queue_mutex);
-
-        /* Phase 2: Process completed callbacks outside mutex (no deadlock risk) */
-        for (int i = 0; i < num_completed; i++) {
-            async_future_callback(worker, completed[i]);
         }
 
-        /* Phase 3: Remove processed items under mutex */
-        if (num_completed > 0) {
-            pthread_mutex_lock(&worker->queue_mutex);
-            for (int i = 0; i < num_completed; i++) {
-                async_pending_t *to_remove = completed[i];
-
-                /* Find and remove from list */
-                async_pending_t *prev = NULL;
-                p = worker->pending_head;
-                while (p != NULL) {
-                    if (p == to_remove) {
-                        /* Remove from list */
-                        if (prev == NULL) {
-                            worker->pending_head = p->next;
-                        } else {
-                            prev->next = p->next;
-                        }
-                        if (p == worker->pending_tail) {
-                            worker->pending_tail = prev;
-                        }
-                        break;
-                    }
-                    prev = p;
-                    p = p->next;
-                }
-
-                /* Clean up */
-                Py_DECREF(to_remove->future);
-                enif_free(to_remove);
-            }
-            pthread_mutex_unlock(&worker->queue_mutex);
+        PyObject *result = PyRun_String(extend_code, Py_file_input, ext_globals, ext_globals);
+        if (result == NULL) {
+            /* Non-fatal - extension will be called from Erlang */
+            PyErr_Print();
+            PyErr_Clear();
+        } else {
+            Py_DECREF(result);
         }
+        Py_DECREF(ext_globals);
     }
 
-    /* Stop and close the event loop */
-    PyObject_CallMethod(loop, "stop", NULL);
-    PyObject_CallMethod(loop, "close", NULL);
-    Py_DECREF(loop);
-
-    worker->loop_running = false;
-    PyGILState_Release(gstate);
-
-    return NULL;
+    return 0;
 }
 
+/* ============================================================================
+ * Asyncio support (DEPRECATED - replaced by event loop model)
+ *
+ * The async_future_callback and async_event_loop_thread functions have been
+ * removed. Async coroutine execution is now handled by py_event_loop and
+ * py_event_loop_pool using enif_select and erlang.send() for efficient
+ * event-driven operation without pthread polling.
+ * ============================================================================ */
+
 /* ============================================================================
  * Resume callback NIFs
  * ============================================================================ */
diff --git a/c_src/py_event_loop.c b/c_src/py_event_loop.c
index 4d2991d..ded4ef6 100644
--- a/c_src/py_event_loop.c
+++ b/c_src/py_event_loop.c
@@ -86,15 +86,73 @@ static const char *EVENT_LOOP_CAPSULE_NAME = "erlang_python.event_loop";
 /** @brief Module attribute name for storing the event loop */
 static const char *EVENT_LOOP_ATTR_NAME = "_loop";
 
-/* Forward declaration for fallback in get_interpreter_event_loop */
-static erlang_event_loop_t *g_python_event_loop;
+/* ============================================================================
+ * Module State Structure
+ * ============================================================================
+ *
+ * Instead of using global variables, we store state in the Python module.
+ * This enables proper per-interpreter/per-context isolation.
+ */
+typedef struct {
+    /** @brief Event loop for this interpreter */
+    erlang_event_loop_t *event_loop;
+
+    /** @brief Shared router PID for loops created via _loop_new() */
+    ErlNifPid shared_router;
+
+    /** @brief Whether shared_router has been set */
+    bool shared_router_valid;
+
+    /** @brief Isolation mode: 0=global, 1=per_loop */
+    int isolation_mode;
+} py_event_loop_module_state_t;
+
+/* ============================================================================
+ * Global Shared Router
+ * ============================================================================
+ *
+ * A global shared router that can be used by all interpreters (main and sub).
+ * This is separate from the per-module state to allow subinterpreters to
+ * access the router even when their module state doesn't have it set.
+ */
+static ErlNifPid g_global_shared_router;
+static bool g_global_shared_router_valid = false;
+static pthread_mutex_t g_global_router_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+/* Forward declaration for module state access */
+static py_event_loop_module_state_t *get_module_state(void);
+static py_event_loop_module_state_t *get_module_state_from_module(PyObject *module);
+
+/**
+ * Try to acquire a router for the event loop.
+ *
+ * If the loop doesn't have a router/worker configured, check the global
+ * shared router and use it if available. This allows subinterpreters
+ * to use the main interpreter's router.
+ *
+ * @param loop Event loop to check/update
+ * @return true if a router/worker is available, false otherwise
+ */
+static bool event_loop_ensure_router(erlang_event_loop_t *loop) {
+    if (loop == NULL) {
+        return false;
+    }
+
+    /* Already have a router or worker */
+    if (loop->has_router || loop->has_worker) {
+        return true;
+    }
 
-/* Global flag for isolation mode - set by Erlang via NIF */
-static volatile int g_isolation_mode = 0;  /* 0 = global, 1 = per_loop */
+    /* Try to get the global shared router */
+    pthread_mutex_lock(&g_global_router_mutex);
+    if (g_global_shared_router_valid) {
+        loop->router_pid = g_global_shared_router;
+        loop->has_router = true;
+    }
+    pthread_mutex_unlock(&g_global_router_mutex);
 
-/* Global shared router PID - set during init, used by all loops in per_loop mode */
-static ErlNifPid g_shared_router;
-static volatile int g_shared_router_valid = 0;
+    return loop->has_router || loop->has_worker;
+}
 
 /**
  * Get the py_event_loop module for the current interpreter.
@@ -110,57 +168,61 @@ static PyObject *get_event_loop_module(void) {
 }
 
 /**
- * Get the event loop for the current Python interpreter.
+ * Get module state from a module object.
+ * MUST be called with GIL held.
+ *
+ * @param module The py_event_loop module object
+ * @return Module state or NULL if not available
+ */
+static py_event_loop_module_state_t *get_module_state_from_module(PyObject *module) {
+    if (module == NULL) {
+        return NULL;
+    }
+    void *state = PyModule_GetState(module);
+    return (py_event_loop_module_state_t *)state;
+}
+
+/**
+ * Get module state for the current interpreter.
  * MUST be called with GIL held.
  *
- * For now, we use the global g_python_event_loop directly. Per-interpreter
- * storage via module attributes was causing issues on some Python versions.
- * The global approach works correctly since all Python code in the main
- * interpreter shares the same event loop.
+ * @return Module state or NULL if not available
+ */
+static py_event_loop_module_state_t *get_module_state(void) {
+    PyObject *module = get_event_loop_module();
+    return get_module_state_from_module(module);
+}
+
+/**
+ * Get the event loop for the current Python interpreter.
+ * MUST be called with GIL held.
  *
- * TODO: Implement proper per-interpreter storage for sub-interpreter support.
+ * Uses module state for proper per-interpreter isolation.
  *
  * @return Event loop pointer or NULL if not set
  */
 static erlang_event_loop_t *get_interpreter_event_loop(void) {
-    return g_python_event_loop;
+    py_event_loop_module_state_t *state = get_module_state();
+    if (state == NULL) {
+        return NULL;
+    }
+    return state->event_loop;
 }
 
 /**
  * Set the event loop for the current interpreter.
  * MUST be called with GIL held.
- * Stores as py_event_loop._loop module attribute.
+ * Stores in module state for proper per-interpreter isolation.
  *
- * @param loop Event loop to set
+ * @param loop Event loop to set (NULL to clear)
  * @return 0 on success, -1 on error
  */
 static int set_interpreter_event_loop(erlang_event_loop_t *loop) {
-    PyObject *module = get_event_loop_module();
-    if (module == NULL) {
-        return -1;
-    }
-
-    if (loop == NULL) {
-        /* Clear the event loop attribute */
-        if (PyObject_SetAttrString(module, EVENT_LOOP_ATTR_NAME, Py_None) < 0) {
-            PyErr_Clear();
-        }
-        return 0;
-    }
-
-    PyObject *capsule = PyCapsule_New(loop, EVENT_LOOP_CAPSULE_NAME, NULL);
-    if (capsule == NULL) {
+    py_event_loop_module_state_t *state = get_module_state();
+    if (state == NULL) {
         return -1;
     }
-
-    int result = PyObject_SetAttrString(module, EVENT_LOOP_ATTR_NAME, capsule);
-    Py_DECREF(capsule);
-
-    if (result < 0) {
-        PyErr_Clear();
-        return -1;
-    }
-
+    state->event_loop = loop;
     return 0;
 }
 
@@ -177,18 +239,14 @@ int create_default_event_loop(ErlNifEnv *env);
 void event_loop_destructor(ErlNifEnv *env, void *obj) {
     erlang_event_loop_t *loop = (erlang_event_loop_t *)obj;
 
-    /* If this is the active Python event loop, clear references */
-    if (g_python_event_loop == loop) {
-        g_python_event_loop = NULL;
-        /* Clear per-interpreter storage if we can acquire GIL.
-         * Don't create new loop in destructor - let next Python call handle it. */
-        PyGILState_STATE gstate = PyGILState_Ensure();
-        erlang_event_loop_t *interp_loop = get_interpreter_event_loop();
-        if (interp_loop == loop) {
-            set_interpreter_event_loop(NULL);
-        }
-        PyGILState_Release(gstate);
+    /* If this is the active Python event loop, clear references.
+     * Acquire GIL to safely access module state. */
+    PyGILState_STATE gstate = PyGILState_Ensure();
+    erlang_event_loop_t *interp_loop = get_interpreter_event_loop();
+    if (interp_loop == loop) {
+        set_interpreter_event_loop(NULL);
     }
+    PyGILState_Release(gstate);
 
     /* Signal shutdown */
     loop->shutdown = true;
@@ -611,7 +669,7 @@ ERL_NIF_TERM nif_add_reader(ErlNifEnv *env, int argc,
     }
 
     /* Scalable I/O: prefer worker, fall back to router */
-    if (!loop->has_worker && !loop->has_router) {
+    if (!event_loop_ensure_router(loop)) {
         return make_error(env, "no_router");
     }
     ErlNifPid *target_pid = loop->has_worker ? &loop->worker_pid : &loop->router_pid;
@@ -723,7 +781,7 @@ ERL_NIF_TERM nif_add_writer(ErlNifEnv *env, int argc,
     }
 
     /* Scalable I/O: prefer worker, fall back to router */
-    if (!loop->has_worker && !loop->has_router) {
+    if (!event_loop_ensure_router(loop)) {
         return make_error(env, "no_router");
     }
     ErlNifPid *target_pid = loop->has_worker ? &loop->worker_pid : &loop->router_pid;
@@ -837,7 +895,7 @@ ERL_NIF_TERM nif_call_later(ErlNifEnv *env, int argc,
     }
 
     /* Scalable I/O: prefer worker, fall back to router */
-    if (!loop->has_worker && !loop->has_router) {
+    if (!event_loop_ensure_router(loop)) {
         return make_error(env, "no_router");
     }
     ErlNifPid *target_pid = loop->has_worker ? &loop->worker_pid : &loop->router_pid;
@@ -877,7 +935,7 @@ ERL_NIF_TERM nif_cancel_timer(ErlNifEnv *env, int argc,
     ERL_NIF_TERM timer_ref = argv[1];
 
     /* Scalable I/O: prefer worker, fall back to router */
-    if (!loop->has_worker && !loop->has_router) {
+    if (!event_loop_ensure_router(loop)) {
         return make_error(env, "no_router");
     }
     ErlNifPid *target_pid = loop->has_worker ? &loop->worker_pid : &loop->router_pid;
@@ -1287,6 +1345,281 @@ ERL_NIF_TERM nif_event_loop_wakeup(ErlNifEnv *env, int argc,
     return ATOM_OK;
 }
 
+/**
+ * event_loop_run_async(LoopRef, CallerPid, Ref, Module, Func, Args, Kwargs) -> ok | {error, Reason}
+ *
+ * Submit an async coroutine to run on the event loop. When the coroutine
+ * completes, the result is sent to CallerPid via erlang.send().
+ *
+ * This replaces the pthread+usleep polling model with direct message passing.
+ */
+ERL_NIF_TERM nif_event_loop_run_async(ErlNifEnv *env, int argc,
+                                       const ERL_NIF_TERM argv[]) {
+    (void)argc;
+
+    erlang_event_loop_t *loop;
+    ErlNifPid caller_pid;
+    ErlNifBinary module_bin, func_bin;
+
+    /* Get loop reference */
+    if (!enif_get_resource(env, argv[0], EVENT_LOOP_RESOURCE_TYPE,
+                           (void **)&loop)) {
+        return make_error(env, "invalid_loop");
+    }
+
+    /* Get caller PID */
+    if (!enif_get_local_pid(env, argv[1], &caller_pid)) {
+        return make_error(env, "invalid_caller_pid");
+    }
+
+    /* argv[2] is the reference - we'll pass it to Python */
+    ERL_NIF_TERM ref_term = argv[2];
+
+    /* Get module and function names */
+    if (!enif_inspect_binary(env, argv[3], &module_bin)) {
+        return make_error(env, "invalid_module");
+    }
+    if (!enif_inspect_binary(env, argv[4], &func_bin)) {
+        return make_error(env, "invalid_func");
+    }
+
+    /* Convert args list - argv[5] */
+    /* Convert kwargs map - argv[6] */
+
+    PyGILState_STATE gstate = PyGILState_Ensure();
+
+    /* Convert module/func names to C strings */
+    char *module_name = enif_alloc(module_bin.size + 1);
+    char *func_name = enif_alloc(func_bin.size + 1);
+    if (module_name == NULL || func_name == NULL) {
+        enif_free(module_name);
+        enif_free(func_name);
+        PyGILState_Release(gstate);
+        return make_error(env, "alloc_failed");
+    }
+    memcpy(module_name, module_bin.data, module_bin.size);
+    module_name[module_bin.size] = '\0';
+    memcpy(func_name, func_bin.data, func_bin.size);
+    func_name[func_bin.size] = '\0';
+
+    ERL_NIF_TERM result;
+
+    /* Import module and get function */
+    PyObject *module = PyImport_ImportModule(module_name);
+    if (module == NULL) {
+        result = make_py_error(env);
+        goto cleanup;
+    }
+
+    PyObject *func = PyObject_GetAttrString(module, func_name);
+    Py_DECREF(module);
+    if (func == NULL) {
+        result = make_py_error(env);
+        goto cleanup;
+    }
+
+    /* Convert args list to Python tuple */
+    unsigned int args_len;
+    if (!enif_get_list_length(env, argv[5], &args_len)) {
+        Py_DECREF(func);
+        result = make_error(env, "invalid_args");
+        goto cleanup;
+    }
+
+    PyObject *args = PyTuple_New(args_len);
+    ERL_NIF_TERM head, tail = argv[5];
+    for (unsigned int i = 0; i < args_len; i++) {
+        enif_get_list_cell(env, tail, &head, &tail);
+        PyObject *arg = term_to_py(env, head);
+        if (arg == NULL) {
+            Py_DECREF(args);
+            Py_DECREF(func);
+            result = make_error(env, "arg_conversion_failed");
+            goto cleanup;
+        }
+        PyTuple_SET_ITEM(args, i, arg);
+    }
+
+    /* Convert kwargs */
+    PyObject *kwargs = NULL;
+    if (argc > 6 && enif_is_map(env, argv[6])) {
+        kwargs = term_to_py(env, argv[6]);
+    }
+
+    /* Call the function to get coroutine */
+    PyObject *coro = PyObject_Call(func, args, kwargs);
+    Py_DECREF(func);
+    Py_DECREF(args);
+    Py_XDECREF(kwargs);
+
+    if (coro == NULL) {
+        result = make_py_error(env);
+        goto cleanup;
+    }
+
+    /* Check if result is a coroutine */
+    PyObject *asyncio = PyImport_ImportModule("asyncio");
+    if (asyncio == NULL) {
+        Py_DECREF(coro);
+        result = make_error(env, "asyncio_import_failed");
+        goto cleanup;
+    }
+
+    PyObject *iscoroutine = PyObject_CallMethod(asyncio, "iscoroutine", "O", coro);
+    bool is_coro = iscoroutine != NULL && PyObject_IsTrue(iscoroutine);
+    Py_XDECREF(iscoroutine);
+
+    if (!is_coro) {
+        Py_DECREF(asyncio);
+        /* Not a coroutine - convert result and send immediately */
+        PyObject *erlang_mod = PyImport_ImportModule("erlang");
+        if (erlang_mod == NULL) {
+            Py_DECREF(coro);
+            result = make_py_error(env);
+            goto cleanup;
+        }
+
+        /* Create the caller PID object */
+        extern PyTypeObject ErlangPidType;
+        ErlangPidObject *pid_obj = PyObject_New(ErlangPidObject, &ErlangPidType);
+        if (pid_obj == NULL) {
+            Py_DECREF(erlang_mod);
+            Py_DECREF(coro);
+            result = make_error(env, "pid_alloc_failed");
+            goto cleanup;
+        }
+        pid_obj->pid = caller_pid;
+
+        /* Convert ref and result to Python */
+        PyObject *py_ref = term_to_py(env, ref_term);
+        if (py_ref == NULL) {
+            Py_DECREF((PyObject *)pid_obj);
+            Py_DECREF(erlang_mod);
+            Py_DECREF(coro);
+            result = make_error(env, "ref_conversion_failed");
+            goto cleanup;
+        }
+
+        /* Build result tuple: ('async_result', ref, ('ok', result)) */
+        PyObject *ok_tuple = PyTuple_Pack(2, PyUnicode_FromString("ok"), coro);
+        PyObject *msg = PyTuple_Pack(3,
+            PyUnicode_FromString("async_result"),
+            py_ref,
+            ok_tuple);
+
+        /* Send via erlang.send() */
+        PyObject *send_result = PyObject_CallMethod(erlang_mod, "send", "OO",
+                                                     (PyObject *)pid_obj, msg);
+        Py_XDECREF(send_result);
+        Py_DECREF(msg);
+        Py_DECREF(ok_tuple);
+        Py_DECREF(py_ref);
+        Py_DECREF((PyObject *)pid_obj);
+        Py_DECREF(erlang_mod);
+        Py_DECREF(coro);
+
+        result = ATOM_OK;
+        goto cleanup;
+    }
+
+    /* Import erlang_loop to get _run_and_send */
+    PyObject *erlang_loop = PyImport_ImportModule("erlang_loop");
+    if (erlang_loop == NULL) {
+        /* Try _erlang_impl._loop as fallback */
+        PyErr_Clear();
+        erlang_loop = PyImport_ImportModule("_erlang_impl._loop");
+    }
+    if (erlang_loop == NULL) {
+        Py_DECREF(asyncio);
+        Py_DECREF(coro);
+        result = make_error(env, "erlang_loop_import_failed");
+        goto cleanup;
+    }
+
+    PyObject *run_and_send = PyObject_GetAttrString(erlang_loop, "_run_and_send");
+    Py_DECREF(erlang_loop);
+    if (run_and_send == NULL) {
+        Py_DECREF(asyncio);
+        Py_DECREF(coro);
+        result = make_error(env, "run_and_send_not_found");
+        goto cleanup;
+    }
+
+    /* Create the caller PID object */
+    extern PyTypeObject ErlangPidType;
+    ErlangPidObject *pid_obj = PyObject_New(ErlangPidObject, &ErlangPidType);
+    if (pid_obj == NULL) {
+        Py_DECREF(run_and_send);
+        Py_DECREF(asyncio);
+        Py_DECREF(coro);
+        result = make_error(env, "pid_alloc_failed");
+        goto cleanup;
+    }
+    pid_obj->pid = caller_pid;
+
+    /* Convert ref to Python */
+    PyObject *py_ref = term_to_py(env, ref_term);
+    if (py_ref == NULL) {
+        Py_DECREF((PyObject *)pid_obj);
+        Py_DECREF(run_and_send);
+        Py_DECREF(asyncio);
+        Py_DECREF(coro);
+        result = make_error(env, "ref_conversion_failed");
+        goto cleanup;
+    }
+
+    /* Create wrapped coroutine: _run_and_send(coro, caller_pid, ref) */
+    PyObject *wrapped_coro = PyObject_CallFunction(run_and_send, "OOO",
+                                                    coro, (PyObject *)pid_obj, py_ref);
+    Py_DECREF(run_and_send);
+    Py_DECREF(coro);
+    Py_DECREF((PyObject *)pid_obj);
+    Py_DECREF(py_ref);
+
+    if (wrapped_coro == NULL) {
+        Py_DECREF(asyncio);
+        result = make_py_error(env);
+        goto cleanup;
+    }
+
+    /* Get the running event loop and create a task */
+    PyObject *get_loop = PyObject_CallMethod(asyncio, "get_event_loop", NULL);
+    if (get_loop == NULL) {
+        PyErr_Clear();
+        /* Try to use the event loop policy instead */
+        get_loop = PyObject_CallMethod(asyncio, "get_running_loop", NULL);
+    }
+
+    if (get_loop == NULL) {
+        PyErr_Clear();
+        Py_DECREF(wrapped_coro);
+        Py_DECREF(asyncio);
+        result = make_error(env, "no_running_loop");
+        goto cleanup;
+    }
+
+    /* Schedule the task on the loop */
+    PyObject *task = PyObject_CallMethod(get_loop, "create_task", "O", wrapped_coro);
+    Py_DECREF(wrapped_coro);
+    Py_DECREF(get_loop);
+    Py_DECREF(asyncio);
+
+    if (task == NULL) {
+        result = make_py_error(env);
+        goto cleanup;
+    }
+
+    Py_DECREF(task);
+    result = ATOM_OK;
+
+cleanup:
+    enif_free(module_name);
+    enif_free(func_name);
+    PyGILState_Release(gstate);
+
+    return result;
+}
+
 /* ============================================================================
  * Helper Functions
  * ============================================================================ */
@@ -1636,7 +1969,7 @@ ERL_NIF_TERM nif_reselect_reader_fd(ErlNifEnv *env, int argc,
 
     /* Use the loop stored in the fd resource */
     erlang_event_loop_t *loop = fd_res->loop;
-    if (loop == NULL || (!loop->has_router && !loop->has_worker)) {
+    if (loop == NULL || !event_loop_ensure_router(loop)) {
         return make_error(env, "no_loop");
     }
 
@@ -1679,7 +2012,7 @@ ERL_NIF_TERM nif_reselect_writer_fd(ErlNifEnv *env, int argc,
 
     /* Use the loop stored in the fd resource */
     erlang_event_loop_t *loop = fd_res->loop;
-    if (loop == NULL || (!loop->has_router && !loop->has_worker)) {
+    if (loop == NULL || !event_loop_ensure_router(loop)) {
         return make_error(env, "no_loop");
     }
 
@@ -1752,7 +2085,7 @@ ERL_NIF_TERM nif_start_reader(ErlNifEnv *env, int argc,
     }
 
     erlang_event_loop_t *loop = fd_res->loop;
-    if (loop == NULL || (!loop->has_router && !loop->has_worker)) {
+    if (loop == NULL || !event_loop_ensure_router(loop)) {
         return make_error(env, "no_loop");
     }
 
@@ -1827,7 +2160,7 @@ ERL_NIF_TERM nif_start_writer(ErlNifEnv *env, int argc,
     }
 
     erlang_event_loop_t *loop = fd_res->loop;
-    if (loop == NULL || (!loop->has_router && !loop->has_worker)) {
+    if (loop == NULL || !event_loop_ensure_router(loop)) {
         return make_error(env, "no_loop");
     }
 
@@ -2457,137 +2790,642 @@ ERL_NIF_TERM nif_set_udp_broadcast(ErlNifEnv *env, int argc,
 }
 
 /* ============================================================================
- * Python Module: py_event_loop
+ * Context Event Loop Access
  *
- * This provides Python-callable functions for the event loop, allowing
- * Python's asyncio to use the Erlang-native event loop.
+ * These NIFs allow Erlang to access the event loop for a subinterpreter context
  * ============================================================================ */
 
 /**
- * Initialize the global Python event loop.
- * Note: This function is currently unused (dead code).
- */
-int py_event_loop_init_python(ErlNifEnv *env, erlang_event_loop_t *loop) {
-    (void)env;
-    g_python_event_loop = loop;
-    return 0;
-}
-
-/**
- * NIF to set the global Python event loop.
- * Called from Erlang: py_nif:set_python_event_loop(LoopRef)
+ * context_get_event_loop(ContextRef) -> {ok, LoopRef} | {error, Reason}
  *
- * Updates both the global C variable (for NIF calls) and the per-interpreter
- * storage (for Python code). Acquires GIL to set per-interpreter storage.
+ * Get the event loop for a subinterpreter context.
+ * This allows Erlang to create a dedicated event worker for the context.
  */
-ERL_NIF_TERM nif_set_python_event_loop(ErlNifEnv *env, int argc,
-                                        const ERL_NIF_TERM argv[]) {
+ERL_NIF_TERM nif_context_get_event_loop(ErlNifEnv *env, int argc,
+                                         const ERL_NIF_TERM argv[]) {
     (void)argc;
 
-    erlang_event_loop_t *loop;
-    if (!enif_get_resource(env, argv[0], EVENT_LOOP_RESOURCE_TYPE, (void **)&loop)) {
-        return make_error(env, "invalid_event_loop");
+    py_context_t *ctx;
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
     }
 
-    /* Set global C variable for fast access from C code */
-    g_python_event_loop = loop;
+#ifdef HAVE_SUBINTERPRETERS
+    if (ctx->is_subinterp && ctx->tstate != NULL) {
+        /* Enter the subinterpreter - same pattern as context_call/context_eval */
+        PyThreadState *saved_tstate = PyThreadState_Swap(NULL);
+        PyThreadState_Swap(ctx->tstate);
 
-    /* Also set per-interpreter storage so Python code uses the correct loop */
-    PyGILState_STATE gstate = PyGILState_Ensure();
-    set_interpreter_event_loop(loop);
-    PyGILState_Release(gstate);
-
-    return ATOM_OK;
-}
+        erlang_event_loop_t *loop = get_interpreter_event_loop();
 
-/**
- * set_isolation_mode(Mode) -> ok
- *
- * Set the event loop isolation mode.
- * Called from Erlang: py_nif:set_isolation_mode(global | per_loop)
- */
-ERL_NIF_TERM nif_set_isolation_mode(ErlNifEnv *env, int argc,
-                                     const ERL_NIF_TERM argv[]) {
-    (void)argc;
+        /* Restore previous thread state */
+        PyThreadState_Swap(saved_tstate);
 
-    if (enif_is_atom(env, argv[0])) {
-        char atom_buf[32];
-        if (enif_get_atom(env, argv[0], atom_buf, sizeof(atom_buf), ERL_NIF_LATIN1)) {
-            if (strcmp(atom_buf, "per_loop") == 0) {
-                g_isolation_mode = 1;
-            } else {
-                g_isolation_mode = 0;  /* global or any other value */
-            }
-            return ATOM_OK;
+        if (loop == NULL) {
+            return make_error(env, "no_event_loop");
         }
+
+        /* Return reference to the event loop */
+        ERL_NIF_TERM loop_term = enif_make_resource(env, loop);
+        return enif_make_tuple2(env, ATOM_OK, loop_term);
     }
-    return make_error(env, "invalid_mode");
+#endif
+
+    /* Worker mode contexts don't have their own event loop */
+    return make_error(env, "not_subinterp");
 }
 
+/* ============================================================================
+ * Reactor NIFs - Erlang-as-Reactor Architecture
+ *
+ * These NIFs support the Erlang-as-Reactor pattern where:
+ * - Erlang manages TCP accept and routing via gen_tcp
+ * - FDs are passed to py_reactor_context processes
+ * - Python handles HTTP parsing and ASGI/WSGI execution
+ * - Erlang handles I/O readiness via enif_select
+ * ============================================================================ */
+
 /**
- * Set the shared router PID for per-loop created loops.
- * This router will be used by all loops created via _loop_new().
+ * reactor_register_fd(ContextRef, Fd, OwnerPid) -> {ok, FdRef} | {error, Reason}
+ *
+ * Register an FD for reactor monitoring. The FD is owned by the context
+ * and receives {select, FdRes, Ref, ready_input/ready_output} messages.
+ * Initial registration is for read events.
  */
-ERL_NIF_TERM nif_set_shared_router(ErlNifEnv *env, int argc,
-                                    const ERL_NIF_TERM argv[]) {
+ERL_NIF_TERM nif_reactor_register_fd(ErlNifEnv *env, int argc,
+                                      const ERL_NIF_TERM argv[]) {
     (void)argc;
 
-    if (!enif_get_local_pid(env, argv[0], &g_shared_router)) {
-        return make_error(env, "invalid_pid");
+    /* Get context reference - we need PY_CONTEXT_RESOURCE_TYPE from py_nif.h */
+    py_context_t *ctx;
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
     }
-    g_shared_router_valid = 1;
-    return ATOM_OK;
-}
-
-/* Python function: _poll_events(timeout_ms) -> num_events */
-static PyObject *py_poll_events(PyObject *self, PyObject *args) {
-    (void)self;
-    int timeout_ms;
 
-    if (!PyArg_ParseTuple(args, "i", &timeout_ms)) {
-        return NULL;
+    int fd;
+    if (!enif_get_int(env, argv[1], &fd)) {
+        return make_error(env, "invalid_fd");
     }
 
-    /* Use per-interpreter event loop lookup */
-    erlang_event_loop_t *loop = get_interpreter_event_loop();
-    if (loop == NULL) {
-        PyErr_SetString(PyExc_RuntimeError, "Event loop not initialized");
-        return NULL;
+    ErlNifPid owner_pid;
+    if (!enif_get_local_pid(env, argv[2], &owner_pid)) {
+        return make_error(env, "invalid_pid");
     }
 
-    if (loop->shutdown) {
-        return PyLong_FromLong(0);
+    /* Allocate fd resource */
+    fd_resource_t *fd_res = enif_alloc_resource(FD_RESOURCE_TYPE,
+                                                 sizeof(fd_resource_t));
+    if (fd_res == NULL) {
+        return make_error(env, "alloc_failed");
     }
 
-    int num_events;
+    fd_res->fd = fd;
+    fd_res->read_callback_id = 0;  /* Not used for reactor mode */
+    fd_res->write_callback_id = 0;
+    fd_res->owner_pid = owner_pid;
+    fd_res->reader_active = true;
+    fd_res->writer_active = false;
+    fd_res->loop = NULL;  /* No event loop needed for reactor mode */
 
-    /* Release GIL while waiting */
-    Py_BEGIN_ALLOW_THREADS
-    num_events = poll_events_wait(loop, timeout_ms);
-    Py_END_ALLOW_THREADS
+    /* Initialize lifecycle management */
+    atomic_store(&fd_res->closing_state, FD_STATE_OPEN);
+    fd_res->monitor_active = false;
+    fd_res->owns_fd = false;  /* Erlang owns the socket via gen_tcp */
 
-    return PyLong_FromLong(num_events);
-}
+    /* Monitor owner process for cleanup on death */
+    if (enif_monitor_process(env, fd_res, &owner_pid,
+                             &fd_res->owner_monitor) == 0) {
+        fd_res->monitor_active = true;
+    }
 
-/* Python function: _get_pending() -> [(callback_id, type_str), ...] */
-static PyObject *py_get_pending(PyObject *self, PyObject *args) {
-    (void)self;
-    (void)args;
+    /* Register with Erlang scheduler for read monitoring */
+    int ret = enif_select(env, (ErlNifEvent)fd, ERL_NIF_SELECT_READ,
+                          fd_res, &owner_pid, enif_make_ref(env));
 
-    /* Use per-interpreter event loop lookup */
-    erlang_event_loop_t *loop = get_interpreter_event_loop();
-    if (loop == NULL) {
-        return PyList_New(0);
+    if (ret < 0) {
+        if (fd_res->monitor_active) {
+            enif_demonitor_process(env, fd_res, &fd_res->owner_monitor);
+        }
+        enif_release_resource(fd_res);
+        return make_error(env, "select_failed");
     }
 
-    pthread_mutex_lock(&loop->mutex);
+    ERL_NIF_TERM fd_term = enif_make_resource(env, fd_res);
+    /* Don't release - keep reference while registered */
 
-    /* Count pending events */
-    int count = 0;
-    pending_event_t *current = loop->pending_head;
-    while (current != NULL) {
-        count++;
-        current = current->next;
+    return enif_make_tuple2(env, ATOM_OK, fd_term);
+}
+
+/**
+ * reactor_reselect_read(FdRef) -> ok | {error, Reason}
+ *
+ * Re-register for read events after a one-shot event was delivered.
+ */
+ERL_NIF_TERM nif_reactor_reselect_read(ErlNifEnv *env, int argc,
+                                        const ERL_NIF_TERM argv[]) {
+    (void)argc;
+
+    fd_resource_t *fd_res;
+    if (!enif_get_resource(env, argv[0], FD_RESOURCE_TYPE, (void **)&fd_res)) {
+        return make_error(env, "invalid_fd_ref");
+    }
+
+    if (atomic_load(&fd_res->closing_state) != FD_STATE_OPEN) {
+        return make_error(env, "fd_closing");
+    }
+
+    if (fd_res->fd < 0) {
+        return make_error(env, "fd_closed");
+    }
+
+    /* Re-register for read events */
+    int ret = enif_select(env, (ErlNifEvent)fd_res->fd, ERL_NIF_SELECT_READ,
+                          fd_res, &fd_res->owner_pid, enif_make_ref(env));
+
+    if (ret < 0) {
+        return make_error(env, "select_failed");
+    }
+
+    fd_res->reader_active = true;
+
+    return ATOM_OK;
+}
+
+/**
+ * reactor_select_write(FdRef) -> ok | {error, Reason}
+ *
+ * Switch to write monitoring for response sending.
+ */
+ERL_NIF_TERM nif_reactor_select_write(ErlNifEnv *env, int argc,
+                                       const ERL_NIF_TERM argv[]) {
+    (void)argc;
+
+    fd_resource_t *fd_res;
+    if (!enif_get_resource(env, argv[0], FD_RESOURCE_TYPE, (void **)&fd_res)) {
+        return make_error(env, "invalid_fd_ref");
+    }
+
+    if (atomic_load(&fd_res->closing_state) != FD_STATE_OPEN) {
+        return make_error(env, "fd_closing");
+    }
+
+    if (fd_res->fd < 0) {
+        return make_error(env, "fd_closed");
+    }
+
+    /* Register for write events */
+    int ret = enif_select(env, (ErlNifEvent)fd_res->fd, ERL_NIF_SELECT_WRITE,
+                          fd_res, &fd_res->owner_pid, enif_make_ref(env));
+
+    if (ret < 0) {
+        return make_error(env, "select_failed");
+    }
+
+    fd_res->writer_active = true;
+    fd_res->reader_active = false;  /* Typically stop reading when writing */
+
+    return ATOM_OK;
+}
+
+/**
+ * get_fd_from_resource(FdRef) -> Fd | {error, Reason}
+ *
+ * Extract the file descriptor integer from an FD resource.
+ */
+ERL_NIF_TERM nif_get_fd_from_resource(ErlNifEnv *env, int argc,
+                                       const ERL_NIF_TERM argv[]) {
+    (void)argc;
+
+    fd_resource_t *fd_res;
+    if (!enif_get_resource(env, argv[0], FD_RESOURCE_TYPE, (void **)&fd_res)) {
+        return make_error(env, "invalid_fd_ref");
+    }
+
+    if (fd_res->fd < 0) {
+        return make_error(env, "fd_closed");
+    }
+
+    return enif_make_int(env, fd_res->fd);
+}
+
+/**
+ * reactor_on_read_ready(ContextRef, Fd) -> {ok, Action} | {error, Reason}
+ *
+ * Call Python's erlang_reactor.on_read_ready(fd) and return the action.
+ * Action is one of: <<"continue">>, <<"write_pending">>, <<"close">>
+ *
+ * This is a dirty NIF since it acquires the GIL and calls Python.
+ */
+ERL_NIF_TERM nif_reactor_on_read_ready(ErlNifEnv *env, int argc,
+                                        const ERL_NIF_TERM argv[]) {
+    (void)argc;
+
+    py_context_t *ctx;
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
+    }
+
+    int fd;
+    if (!enif_get_int(env, argv[1], &fd)) {
+        return make_error(env, "invalid_fd");
+    }
+
+    /* Acquire GIL and call Python */
+    gil_guard_t guard = gil_acquire();
+
+    /* Import erlang_reactor module */
+    PyObject *reactor_module = PyImport_ImportModule("erlang_reactor");
+    if (reactor_module == NULL) {
+        PyErr_Clear();
+        gil_release(guard);
+        return make_error(env, "import_erlang_reactor_failed");
+    }
+
+    /* Call on_read_ready(fd) */
+    PyObject *result = PyObject_CallMethod(reactor_module, "on_read_ready",
+                                            "i", fd);
+    Py_DECREF(reactor_module);
+
+    if (result == NULL) {
+        PyErr_Clear();
+        gil_release(guard);
+        return make_error(env, "on_read_ready_failed");
+    }
+
+    /* Convert result to Erlang term */
+    ERL_NIF_TERM action;
+    if (PyUnicode_Check(result)) {
+        const char *str = PyUnicode_AsUTF8(result);
+        if (str != NULL) {
+            size_t len = strlen(str);
+            unsigned char *buf = enif_make_new_binary(env, len, &action);
+            memcpy(buf, str, len);
+        } else {
+            action = enif_make_atom(env, "unknown");
+        }
+    } else {
+        action = enif_make_atom(env, "unknown");
+    }
+
+    Py_DECREF(result);
+    gil_release(guard);
+
+    return enif_make_tuple2(env, ATOM_OK, action);
+}
+
+/**
+ * reactor_on_write_ready(ContextRef, Fd) -> {ok, Action} | {error, Reason}
+ *
+ * Call Python's erlang_reactor.on_write_ready(fd) and return the action.
+ * Action is one of: <<"continue">>, <<"read_pending">>, <<"close">>
+ *
+ * This is a dirty NIF since it acquires the GIL and calls Python.
+ */
+ERL_NIF_TERM nif_reactor_on_write_ready(ErlNifEnv *env, int argc,
+                                         const ERL_NIF_TERM argv[]) {
+    (void)argc;
+
+    py_context_t *ctx;
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
+    }
+
+    int fd;
+    if (!enif_get_int(env, argv[1], &fd)) {
+        return make_error(env, "invalid_fd");
+    }
+
+    /* Acquire GIL and call Python */
+    gil_guard_t guard = gil_acquire();
+
+    /* Import erlang_reactor module */
+    PyObject *reactor_module = PyImport_ImportModule("erlang_reactor");
+    if (reactor_module == NULL) {
+        PyErr_Clear();
+        gil_release(guard);
+        return make_error(env, "import_erlang_reactor_failed");
+    }
+
+    /* Call on_write_ready(fd) */
+    PyObject *result = PyObject_CallMethod(reactor_module, "on_write_ready",
+                                            "i", fd);
+    Py_DECREF(reactor_module);
+
+    if (result == NULL) {
+        PyErr_Clear();
+        gil_release(guard);
+        return make_error(env, "on_write_ready_failed");
+    }
+
+    /* Convert result to Erlang term */
+    ERL_NIF_TERM action;
+    if (PyUnicode_Check(result)) {
+        const char *str = PyUnicode_AsUTF8(result);
+        if (str != NULL) {
+            size_t len = strlen(str);
+            unsigned char *buf = enif_make_new_binary(env, len, &action);
+            memcpy(buf, str, len);
+        } else {
+            action = enif_make_atom(env, "unknown");
+        }
+    } else {
+        action = enif_make_atom(env, "unknown");
+    }
+
+    Py_DECREF(result);
+    gil_release(guard);
+
+    return enif_make_tuple2(env, ATOM_OK, action);
+}
+
+/**
+ * reactor_init_connection(ContextRef, Fd, ClientInfo) -> ok | {error, Reason}
+ *
+ * Initialize a Python protocol handler for a new connection.
+ * ClientInfo is a map with keys: addr, port
+ *
+ * This is a dirty NIF since it acquires the GIL and calls Python.
+ */
+ERL_NIF_TERM nif_reactor_init_connection(ErlNifEnv *env, int argc,
+                                          const ERL_NIF_TERM argv[]) {
+    (void)argc;
+
+    py_context_t *ctx;
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
+    }
+
+    int fd;
+    if (!enif_get_int(env, argv[1], &fd)) {
+        return make_error(env, "invalid_fd");
+    }
+
+    /* Convert client_info map to Python dict */
+    if (!enif_is_map(env, argv[2])) {
+        return make_error(env, "invalid_client_info");
+    }
+
+    /* Acquire GIL and call Python */
+    gil_guard_t guard = gil_acquire();
+
+    /* Convert Erlang map to Python dict */
+    PyObject *client_info = term_to_py(env, argv[2]);
+    if (client_info == NULL) {
+        PyErr_Clear();
+        gil_release(guard);
+        return make_error(env, "client_info_conversion_failed");
+    }
+
+    /* Import erlang_reactor module */
+    PyObject *reactor_module = PyImport_ImportModule("erlang_reactor");
+    if (reactor_module == NULL) {
+        Py_DECREF(client_info);
+        PyErr_Clear();
+        gil_release(guard);
+        return make_error(env, "import_erlang_reactor_failed");
+    }
+
+    /* Call init_connection(fd, client_info) */
+    PyObject *result = PyObject_CallMethod(reactor_module, "init_connection",
+                                            "iO", fd, client_info);
+    Py_DECREF(reactor_module);
+    Py_DECREF(client_info);
+
+    if (result == NULL) {
+        PyErr_Clear();
+        gil_release(guard);
+        return make_error(env, "init_connection_failed");
+    }
+
+    Py_DECREF(result);
+    gil_release(guard);
+
+    return ATOM_OK;
+}
+
+/**
+ * reactor_close_fd(FdRef) -> ok | {error, Reason}
+ *
+ * Close an FD and clean up the protocol handler.
+ * Calls Python's erlang_reactor.close_connection(fd) if registered.
+ */
+ERL_NIF_TERM nif_reactor_close_fd(ErlNifEnv *env, int argc,
+                                   const ERL_NIF_TERM argv[]) {
+    (void)argc;
+
+    fd_resource_t *fd_res;
+    if (!enif_get_resource(env, argv[0], FD_RESOURCE_TYPE, (void **)&fd_res)) {
+        return make_error(env, "invalid_fd_ref");
+    }
+
+    int fd = fd_res->fd;
+
+    /* Atomically transition to CLOSING state */
+    int expected = FD_STATE_OPEN;
+    if (!atomic_compare_exchange_strong(&fd_res->closing_state,
+                                        &expected, FD_STATE_CLOSING)) {
+        /* Already closing or closed */
+        return ATOM_OK;
+    }
+
+    /* Call Python to clean up protocol handler */
+    if (fd >= 0) {
+        gil_guard_t guard = gil_acquire();
+
+        PyObject *reactor_module = PyImport_ImportModule("erlang_reactor");
+        if (reactor_module != NULL) {
+            PyObject *result = PyObject_CallMethod(reactor_module,
+                                                    "close_connection", "i", fd);
+            Py_XDECREF(result);
+            Py_DECREF(reactor_module);
+            PyErr_Clear();  /* Ignore errors during cleanup */
+        } else {
+            PyErr_Clear();
+        }
+
+        gil_release(guard);
+    }
+
+    /* Take ownership for cleanup */
+    fd_res->owns_fd = true;
+
+    /* Stop select and close */
+    if (fd_res->reader_active || fd_res->writer_active) {
+        enif_select(env, (ErlNifEvent)fd_res->fd, ERL_NIF_SELECT_STOP,
+                    fd_res, NULL, enif_make_atom(env, "reactor_close"));
+    } else {
+        atomic_store(&fd_res->closing_state, FD_STATE_CLOSED);
+        if (fd_res->fd >= 0) {
+            close(fd_res->fd);
+            fd_res->fd = -1;
+        }
+        if (fd_res->monitor_active) {
+            enif_demonitor_process(env, fd_res, &fd_res->owner_monitor);
+            fd_res->monitor_active = false;
+        }
+    }
+
+    return ATOM_OK;
+}
+
+/* ============================================================================
+ * Python Module: py_event_loop
+ *
+ * This provides Python-callable functions for the event loop, allowing
+ * Python's asyncio to use the Erlang-native event loop.
+ * ============================================================================ */
+
+/**
+ * Initialize the Python event loop.
+ * Note: This function is deprecated - use nif_set_python_event_loop instead.
+ */
+int py_event_loop_init_python(ErlNifEnv *env, erlang_event_loop_t *loop) {
+    (void)env;
+    /* This is called from C code which should have GIL */
+    return set_interpreter_event_loop(loop);
+}
+
+/**
+ * NIF to set the Python event loop.
+ * Called from Erlang: py_nif:set_python_event_loop(LoopRef)
+ *
+ * Stores the event loop in module state for per-interpreter isolation.
+ */
+ERL_NIF_TERM nif_set_python_event_loop(ErlNifEnv *env, int argc,
+                                        const ERL_NIF_TERM argv[]) {
+    (void)argc;
+
+    erlang_event_loop_t *loop;
+    if (!enif_get_resource(env, argv[0], EVENT_LOOP_RESOURCE_TYPE, (void **)&loop)) {
+        return make_error(env, "invalid_event_loop");
+    }
+
+    /* Store in module state with GIL held */
+    PyGILState_STATE gstate = PyGILState_Ensure();
+    int result = set_interpreter_event_loop(loop);
+    PyGILState_Release(gstate);
+
+    if (result < 0) {
+        return make_error(env, "failed_to_set_event_loop");
+    }
+
+    return ATOM_OK;
+}
+
+/**
+ * set_isolation_mode(Mode) -> ok
+ *
+ * Set the event loop isolation mode.
+ * Called from Erlang: py_nif:set_isolation_mode(global | per_loop)
+ */
+ERL_NIF_TERM nif_set_isolation_mode(ErlNifEnv *env, int argc,
+                                     const ERL_NIF_TERM argv[]) {
+    (void)argc;
+
+    if (enif_is_atom(env, argv[0])) {
+        char atom_buf[32];
+        if (enif_get_atom(env, argv[0], atom_buf, sizeof(atom_buf), ERL_NIF_LATIN1)) {
+            int mode = 0;
+            if (strcmp(atom_buf, "per_loop") == 0) {
+                mode = 1;
+            }
+
+            /* Store in module state */
+            PyGILState_STATE gstate = PyGILState_Ensure();
+            py_event_loop_module_state_t *state = get_module_state();
+            if (state != NULL) {
+                state->isolation_mode = mode;
+            }
+            PyGILState_Release(gstate);
+
+            return ATOM_OK;
+        }
+    }
+    return make_error(env, "invalid_mode");
+}
+
+/**
+ * Set the shared router PID for per-loop created loops.
+ * This router will be used by all loops created via _loop_new().
+ * Stores in both module state (for the current interpreter) and
+ * global variable (for subinterpreters).
+ */
+ERL_NIF_TERM nif_set_shared_router(ErlNifEnv *env, int argc,
+                                    const ERL_NIF_TERM argv[]) {
+    (void)argc;
+
+    ErlNifPid router_pid;
+    if (!enif_get_local_pid(env, argv[0], &router_pid)) {
+        return make_error(env, "invalid_pid");
+    }
+
+    /* Store in global variable (accessible from all interpreters) */
+    pthread_mutex_lock(&g_global_router_mutex);
+    g_global_shared_router = router_pid;
+    g_global_shared_router_valid = true;
+    pthread_mutex_unlock(&g_global_router_mutex);
+
+    /* Also store in module state for backward compatibility */
+    PyGILState_STATE gstate = PyGILState_Ensure();
+    py_event_loop_module_state_t *state = get_module_state();
+    if (state != NULL) {
+        state->shared_router = router_pid;
+        state->shared_router_valid = true;
+    }
+    PyGILState_Release(gstate);
+
+    return ATOM_OK;
+}
+
+/* Python function: _poll_events(timeout_ms) -> num_events */
+static PyObject *py_poll_events(PyObject *self, PyObject *args) {
+    (void)self;
+    int timeout_ms;
+
+    if (!PyArg_ParseTuple(args, "i", &timeout_ms)) {
+        return NULL;
+    }
+
+    /* Use per-interpreter event loop lookup */
+    erlang_event_loop_t *loop = get_interpreter_event_loop();
+    if (loop == NULL) {
+        PyErr_SetString(PyExc_RuntimeError, "Event loop not initialized");
+        return NULL;
+    }
+
+    if (loop->shutdown) {
+        return PyLong_FromLong(0);
+    }
+
+    int num_events;
+
+    /* Release GIL while waiting */
+    Py_BEGIN_ALLOW_THREADS
+    num_events = poll_events_wait(loop, timeout_ms);
+    Py_END_ALLOW_THREADS
+
+    return PyLong_FromLong(num_events);
+}
+
+/* Python function: _get_pending() -> [(callback_id, type_str), ...] */
+static PyObject *py_get_pending(PyObject *self, PyObject *args) {
+    (void)self;
+    (void)args;
+
+    /* Use per-interpreter event loop lookup */
+    erlang_event_loop_t *loop = get_interpreter_event_loop();
+    if (loop == NULL) {
+        return PyList_New(0);
+    }
+
+    pthread_mutex_lock(&loop->mutex);
+
+    /* Count pending events */
+    int count = 0;
+    pending_event_t *current = loop->pending_head;
+    while (current != NULL) {
+        count++;
+        current = current->next;
     }
 
     PyObject *list = PyList_New(count);
@@ -2700,7 +3538,8 @@ static PyObject *py_get_isolation_mode(PyObject *self, PyObject *args) {
     (void)self;
     (void)args;
 
-    if (g_isolation_mode == 1) {
+    py_event_loop_module_state_t *state = get_module_state();
+    if (state != NULL && state->isolation_mode == 1) {
         return PyUnicode_FromString("per_loop");
     }
     return PyUnicode_FromString("global");
@@ -2870,7 +3709,7 @@ static PyObject *py_schedule_timer(PyObject *self, PyObject *args) {
 
     /* Use per-interpreter event loop lookup */
     erlang_event_loop_t *loop = get_interpreter_event_loop();
-    if (loop == NULL || (!loop->has_router && !loop->has_worker)) {
+    if (loop == NULL || !event_loop_ensure_router(loop)) {
         PyErr_SetString(PyExc_RuntimeError, "Event loop not initialized");
         return NULL;
     }
@@ -2917,7 +3756,7 @@ static PyObject *py_cancel_timer(PyObject *self, PyObject *args) {
 
     /* Use per-interpreter event loop lookup */
     erlang_event_loop_t *loop = get_interpreter_event_loop();
-    if (loop == NULL || (!loop->has_router && !loop->has_worker)) {
+    if (loop == NULL || !event_loop_ensure_router(loop)) {
         Py_RETURN_NONE;
     }
 
@@ -3172,9 +4011,10 @@ static PyObject *py_loop_new(PyObject *self, PyObject *args) {
     loop->event_freelist = NULL;
     loop->freelist_count = 0;
 
-    /* Use shared router if available (for per-loop mode) */
-    if (g_shared_router_valid) {
-        loop->router_pid = g_shared_router;
+    /* Use shared router if available from module state (for per-loop mode) */
+    py_event_loop_module_state_t *state = get_module_state();
+    if (state != NULL && state->shared_router_valid) {
+        loop->router_pid = state->shared_router;
         loop->has_router = true;
     }
 
@@ -3315,7 +4155,7 @@ static PyObject *py_add_reader_for(PyObject *self, PyObject *args) {
         return NULL;
     }
 
-    if (!loop->has_router && !loop->has_worker) {
+    if (!event_loop_ensure_router(loop)) {
         PyErr_SetString(PyExc_RuntimeError, "Event loop has no router or worker");
         return NULL;
     }
@@ -3396,7 +4236,7 @@ static PyObject *py_add_writer_for(PyObject *self, PyObject *args) {
         return NULL;
     }
 
-    if (!loop->has_router && !loop->has_worker) {
+    if (!event_loop_ensure_router(loop)) {
         PyErr_SetString(PyExc_RuntimeError, "Event loop has no router or worker");
         return NULL;
     }
@@ -3461,6 +4301,146 @@ static PyObject *py_remove_writer_for(PyObject *self, PyObject *args) {
     Py_RETURN_NONE;
 }
 
+/**
+ * Update read callback on existing fd_resource and re-register with enif_select.
+ * Python function: _update_fd_read(fd_key, callback_id) -> None
+ */
+static PyObject *py_update_fd_read(PyObject *self, PyObject *args) {
+    (void)self;
+    unsigned long long fd_key;
+    unsigned long long callback_id;
+
+    if (!PyArg_ParseTuple(args, "KK", &fd_key, &callback_id)) {
+        return NULL;
+    }
+
+    fd_resource_t *fd_res = (fd_resource_t *)(uintptr_t)fd_key;
+    if (fd_res == NULL || fd_res->loop == NULL) {
+        PyErr_SetString(PyExc_ValueError, "Invalid fd resource");
+        return NULL;
+    }
+
+    fd_res->read_callback_id = callback_id;
+    fd_res->reader_active = true;
+
+    /* Re-register for read events (may already be registered, that's OK) */
+    ErlNifPid *target_pid = fd_res->loop->has_worker ?
+        &fd_res->loop->worker_pid : &fd_res->loop->router_pid;
+    enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
+                ERL_NIF_SELECT_READ, fd_res, target_pid, ATOM_UNDEFINED);
+
+    Py_RETURN_NONE;
+}
+
+/**
+ * Update write callback on existing fd_resource and re-register with enif_select.
+ * Python function: _update_fd_write(fd_key, callback_id) -> None
+ */
+static PyObject *py_update_fd_write(PyObject *self, PyObject *args) {
+    (void)self;
+    unsigned long long fd_key;
+    unsigned long long callback_id;
+
+    if (!PyArg_ParseTuple(args, "KK", &fd_key, &callback_id)) {
+        return NULL;
+    }
+
+    fd_resource_t *fd_res = (fd_resource_t *)(uintptr_t)fd_key;
+    if (fd_res == NULL || fd_res->loop == NULL) {
+        PyErr_SetString(PyExc_ValueError, "Invalid fd resource");
+        return NULL;
+    }
+
+    fd_res->write_callback_id = callback_id;
+    fd_res->writer_active = true;
+
+    /* Re-register for write events */
+    ErlNifPid *target_pid = fd_res->loop->has_worker ?
+        &fd_res->loop->worker_pid : &fd_res->loop->router_pid;
+    enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
+                ERL_NIF_SELECT_WRITE, fd_res, target_pid, ATOM_UNDEFINED);
+
+    Py_RETURN_NONE;
+}
+
+/**
+ * Clear read monitoring on fd_resource (cancel READ select).
+ * Python function: _clear_fd_read(fd_key) -> None
+ */
+static PyObject *py_clear_fd_read(PyObject *self, PyObject *args) {
+    (void)self;
+    unsigned long long fd_key;
+
+    if (!PyArg_ParseTuple(args, "K", &fd_key)) {
+        return NULL;
+    }
+
+    fd_resource_t *fd_res = (fd_resource_t *)(uintptr_t)fd_key;
+    if (fd_res == NULL || fd_res->loop == NULL) {
+        Py_RETURN_NONE;  /* Already cleaned up */
+    }
+
+    if (fd_res->reader_active) {
+        enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
+                    ERL_NIF_SELECT_CANCEL | ERL_NIF_SELECT_READ,
+                    fd_res, NULL, ATOM_UNDEFINED);
+        fd_res->reader_active = false;
+        fd_res->read_callback_id = 0;
+    }
+
+    Py_RETURN_NONE;
+}
+
+/**
+ * Clear write monitoring on fd_resource (cancel WRITE select).
+ * Python function: _clear_fd_write(fd_key) -> None
+ */
+static PyObject *py_clear_fd_write(PyObject *self, PyObject *args) {
+    (void)self;
+    unsigned long long fd_key;
+
+    if (!PyArg_ParseTuple(args, "K", &fd_key)) {
+        return NULL;
+    }
+
+    fd_resource_t *fd_res = (fd_resource_t *)(uintptr_t)fd_key;
+    if (fd_res == NULL || fd_res->loop == NULL) {
+        Py_RETURN_NONE;  /* Already cleaned up */
+    }
+
+    if (fd_res->writer_active) {
+        enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
+                    ERL_NIF_SELECT_CANCEL | ERL_NIF_SELECT_WRITE,
+                    fd_res, NULL, ATOM_UNDEFINED);
+        fd_res->writer_active = false;
+        fd_res->write_callback_id = 0;
+    }
+
+    Py_RETURN_NONE;
+}
+
+/**
+ * Release fd_resource (stop all monitoring and release).
+ * Python function: _release_fd_resource(fd_key) -> None
+ */
+static PyObject *py_release_fd_resource(PyObject *self, PyObject *args) {
+    (void)self;
+    unsigned long long fd_key;
+
+    if (!PyArg_ParseTuple(args, "K", &fd_key)) {
+        return NULL;
+    }
+
+    fd_resource_t *fd_res = (fd_resource_t *)(uintptr_t)fd_key;
+    if (fd_res != NULL && fd_res->loop != NULL) {
+        enif_select(fd_res->loop->msg_env, (ErlNifEvent)fd_res->fd,
+                    ERL_NIF_SELECT_STOP, fd_res, NULL, ATOM_UNDEFINED);
+        enif_release_resource(fd_res);
+    }
+
+    Py_RETURN_NONE;
+}
+
 /* Python function: _schedule_timer_for(capsule, delay_ms, callback_id) -> timer_ref */
 static PyObject *py_schedule_timer_for(PyObject *self, PyObject *args) {
     (void)self;
@@ -3477,7 +4457,7 @@ static PyObject *py_schedule_timer_for(PyObject *self, PyObject *args) {
         return NULL;
     }
 
-    if (!loop->has_router && !loop->has_worker) {
+    if (!event_loop_ensure_router(loop)) {
         PyErr_SetString(PyExc_RuntimeError, "Event loop has no router or worker");
         return NULL;
     }
@@ -3533,7 +4513,7 @@ static PyObject *py_cancel_timer_for(PyObject *self, PyObject *args) {
         Py_RETURN_NONE;
     }
 
-    if (!loop->has_router && !loop->has_worker) {
+    if (!event_loop_ensure_router(loop)) {
         Py_RETURN_NONE;
     }
 
@@ -3570,6 +4550,13 @@ static PyObject *py_wakeup_for(PyObject *self, PyObject *args) {
         Py_RETURN_NONE;
     }
 
+    /* Check shutdown flag before accessing mutex - loop may be in teardown.
+     * This is a safety net for any stray executor callbacks that might
+     * arrive after loop destruction has begun. */
+    if (loop->shutdown) {
+        Py_RETURN_NONE;
+    }
+
     pthread_mutex_lock(&loop->mutex);
     pthread_cond_broadcast(&loop->event_cond);
     pthread_mutex_unlock(&loop->mutex);
@@ -3690,7 +4677,7 @@ static PyObject *py_erlang_sleep(PyObject *self, PyObject *args) {
     }
 
     /* Check if we have a worker to send to */
-    if (!loop->has_worker && !loop->has_router) {
+    if (!event_loop_ensure_router(loop)) {
         PyErr_SetString(PyExc_RuntimeError, "No worker or router configured");
         return NULL;
     }
@@ -3698,9 +4685,20 @@ static PyObject *py_erlang_sleep(PyObject *self, PyObject *args) {
     /* Generate a unique sleep ID */
     uint64_t sleep_id = atomic_fetch_add(&loop->next_callback_id, 1);
 
+    /* FIX: Store sleep_id BEFORE sending to prevent race condition.
+     * If completion arrives before storage, it would be dropped and waiter deadlocks. */
+    pthread_mutex_lock(&loop->mutex);
+    atomic_store(&loop->sync_sleep_id, sleep_id);
+    atomic_store(&loop->sync_sleep_complete, false);
+    pthread_mutex_unlock(&loop->mutex);
+
     /* Send {sleep_wait, DelayMs, SleepId} to worker */
     ErlNifEnv *msg_env = enif_alloc_env();
     if (msg_env == NULL) {
+        /* On failure, reset sleep_id */
+        pthread_mutex_lock(&loop->mutex);
+        atomic_store(&loop->sync_sleep_id, 0);
+        pthread_mutex_unlock(&loop->mutex);
         PyErr_SetString(PyExc_MemoryError, "Failed to allocate message environment");
         return NULL;
     }
@@ -3715,16 +4713,18 @@ static PyObject *py_erlang_sleep(PyObject *self, PyObject *args) {
     /* Use worker_pid when available, otherwise fall back to router_pid */
     ErlNifPid *target_pid = loop->has_worker ? &loop->worker_pid : &loop->router_pid;
     if (!enif_send(NULL, target_pid, msg_env, msg)) {
+        /* On failure, reset sleep_id */
+        pthread_mutex_lock(&loop->mutex);
+        atomic_store(&loop->sync_sleep_id, 0);
+        pthread_mutex_unlock(&loop->mutex);
         enif_free_env(msg_env);
         PyErr_SetString(PyExc_RuntimeError, "Failed to send sleep message");
         return NULL;
     }
     enif_free_env(msg_env);
 
-    /* Set up for waiting on this sleep */
+    /* Wait for completion - sleep_id already set above */
     pthread_mutex_lock(&loop->mutex);
-    atomic_store(&loop->sync_sleep_id, sleep_id);
-    atomic_store(&loop->sync_sleep_complete, false);
 
     /* Release GIL and wait for completion */
     Py_BEGIN_ALLOW_THREADS
@@ -3770,6 +4770,12 @@ static PyMethodDef PyEventLoopMethods[] = {
     {"_remove_reader_for", py_remove_reader_for, METH_VARARGS, "Stop monitoring fd for reads on specific loop"},
     {"_add_writer_for", py_add_writer_for, METH_VARARGS, "Register fd for write monitoring on specific loop"},
     {"_remove_writer_for", py_remove_writer_for, METH_VARARGS, "Stop monitoring fd for writes on specific loop"},
+    /* Shared fd resource management (for read+write on same fd) */
+    {"_update_fd_read", py_update_fd_read, METH_VARARGS, "Update read callback on fd resource"},
+    {"_update_fd_write", py_update_fd_write, METH_VARARGS, "Update write callback on fd resource"},
+    {"_clear_fd_read", py_clear_fd_read, METH_VARARGS, "Clear read monitoring on fd resource"},
+    {"_clear_fd_write", py_clear_fd_write, METH_VARARGS, "Clear write monitoring on fd resource"},
+    {"_release_fd_resource", py_release_fd_resource, METH_VARARGS, "Release fd resource"},
     {"_schedule_timer_for", py_schedule_timer_for, METH_VARARGS, "Schedule timer on specific loop"},
     {"_cancel_timer_for", py_cancel_timer_for, METH_VARARGS, "Cancel timer on specific loop"},
     /* Synchronous sleep (for ASGI fast path) */
@@ -3777,28 +4783,42 @@ static PyMethodDef PyEventLoopMethods[] = {
     {NULL, NULL, 0, NULL}
 };
 
-/* Module definition */
+/* Module definition with module state for per-interpreter isolation */
 static struct PyModuleDef PyEventLoopModuleDef = {
     PyModuleDef_HEAD_INIT,
-    "py_event_loop",
-    "Erlang-native asyncio event loop",
-    -1,
-    PyEventLoopMethods
+    .m_name = "py_event_loop",
+    .m_doc = "Erlang-native asyncio event loop",
+    .m_size = sizeof(py_event_loop_module_state_t),
+    .m_methods = PyEventLoopMethods,
 };
 
 /**
  * Create and register the py_event_loop module in Python.
- * Also creates a default event loop so g_python_event_loop is always available.
+ * Initializes module state for per-interpreter isolation.
  * Called during Python initialization.
  */
 int create_py_event_loop_module(void) {
+    /* Check if already registered in this interpreter (idempotent) */
+    PyObject *sys_modules = PyImport_GetModuleDict();
+    PyObject *existing = PyDict_GetItemString(sys_modules, "py_event_loop");
+    if (existing != NULL) {
+        return 0;  /* Already registered */
+    }
+
     PyObject *module = PyModule_Create(&PyEventLoopModuleDef);
     if (module == NULL) {
         return -1;
     }
 
-    /* Add module to sys.modules */
-    PyObject *sys_modules = PyImport_GetModuleDict();
+    /* Initialize module state */
+    py_event_loop_module_state_t *state = PyModule_GetState(module);
+    if (state != NULL) {
+        state->event_loop = NULL;
+        state->shared_router_valid = false;
+        state->isolation_mode = 0;  /* global mode by default */
+    }
+
+    /* Add module to sys.modules (reuse sys_modules from idempotency check) */
     if (PyDict_SetItemString(sys_modules, "py_event_loop", module) < 0) {
         Py_DECREF(module);
         return -1;
@@ -3808,24 +4828,17 @@ int create_py_event_loop_module(void) {
 }
 
 /**
- * Create a default event loop and set it as g_python_event_loop.
+ * Create a default event loop and store in module state.
  * This ensures the event loop is always available for Python asyncio.
  * Called after NIF is fully loaded (with GIL held).
  */
 int create_default_event_loop(ErlNifEnv *env) {
-    /* Check per-interpreter storage first for sub-interpreter support */
+    /* Check module state first */
     erlang_event_loop_t *existing = get_interpreter_event_loop();
     if (existing != NULL) {
         return 0;  /* Already have an event loop for this interpreter */
     }
 
-    /* Also check global for backward compatibility */
-    if (g_python_event_loop != NULL) {
-        /* Global exists but not set for this interpreter - set it now */
-        set_interpreter_event_loop(g_python_event_loop);
-        return 0;
-    }
-
     /* Allocate event loop resource */
     erlang_event_loop_t *loop = enif_alloc_resource(
         EVENT_LOOP_RESOURCE_TYPE, sizeof(erlang_event_loop_t));
@@ -3875,10 +4888,15 @@ int create_default_event_loop(ErlNifEnv *env) {
     loop->has_router = false;
     loop->has_self = false;
 
-    /* Set as global Python event loop (backward compatibility for NIF calls) */
-    g_python_event_loop = loop;
+    /* Try to use the global shared router if available (for subinterpreters) */
+    pthread_mutex_lock(&g_global_router_mutex);
+    if (g_global_shared_router_valid) {
+        loop->router_pid = g_global_shared_router;
+        loop->has_router = true;
+    }
+    pthread_mutex_unlock(&g_global_router_mutex);
 
-    /* Store in per-interpreter storage for Python code to access */
+    /* Store in module state for Python code to access */
     set_interpreter_event_loop(loop);
 
     /* Keep a reference to prevent garbage collection */
@@ -3886,3 +4904,23 @@ int create_default_event_loop(ErlNifEnv *env) {
 
     return 0;
 }
+
+/**
+ * Initialize event loop for a subinterpreter.
+ *
+ * Creates the py_event_loop module and a default event loop for the
+ * current subinterpreter. This must be called after creating a new
+ * subinterpreter to enable asyncio.sleep() and timer functionality.
+ *
+ * @param env NIF environment (can be NULL for worker pool threads)
+ * @return 0 on success, -1 on failure
+ */
+int init_subinterpreter_event_loop(ErlNifEnv *env) {
+    if (create_py_event_loop_module() < 0) {
+        return -1;
+    }
+    if (create_default_event_loop(env) < 0) {
+        return -1;
+    }
+    return 0;
+}
diff --git a/c_src/py_event_loop.h b/c_src/py_event_loop.h
index 2b231e2..2d4a078 100644
--- a/c_src/py_event_loop.h
+++ b/c_src/py_event_loop.h
@@ -455,6 +455,17 @@ ERL_NIF_TERM nif_dispatch_timer(ErlNifEnv *env, int argc,
 ERL_NIF_TERM nif_event_loop_wakeup(ErlNifEnv *env, int argc,
                                    const ERL_NIF_TERM argv[]);
 
+/**
+ * @brief Submit an async coroutine to run on the event loop
+ *
+ * The coroutine result is sent to CallerPid via erlang.send().
+ * This replaces the pthread+usleep polling model with direct message passing.
+ *
+ * NIF: event_loop_run_async(LoopRef, CallerPid, Ref, Module, Func, Args, Kwargs) -> ok | {error, Reason}
+ */
+ERL_NIF_TERM nif_event_loop_run_async(ErlNifEnv *env, int argc,
+                                       const ERL_NIF_TERM argv[]);
+
 /**
  * @brief Signal that a synchronous sleep has completed
  *
@@ -765,4 +776,16 @@ int create_py_event_loop_module(void);
  */
 int create_default_event_loop(ErlNifEnv *env);
 
+/**
+ * @brief Initialize event loop for a subinterpreter
+ *
+ * Creates the py_event_loop module and a default event loop for the
+ * current subinterpreter. This must be called after creating a new
+ * subinterpreter to enable asyncio.sleep() and timer functionality.
+ *
+ * @param env NIF environment (can be NULL for worker pool threads)
+ * @return 0 on success, -1 on failure
+ */
+int init_subinterpreter_event_loop(ErlNifEnv *env);
+
 #endif /* PY_EVENT_LOOP_H */
diff --git a/c_src/py_exec.c b/c_src/py_exec.c
index 6c45d0a..1dd11f3 100644
--- a/c_src/py_exec.c
+++ b/c_src/py_exec.c
@@ -655,7 +655,7 @@ static void *executor_thread_main(void *arg) {
     /* Acquire GIL for this thread */
     PyGILState_STATE gstate = PyGILState_Ensure();
 
-    g_executor_running = true;
+    atomic_store(&g_executor_running, true);
 
     /*
      * Main processing loop.
@@ -670,7 +670,7 @@ static void *executor_thread_main(void *arg) {
         Py_BEGIN_ALLOW_THREADS
 
         pthread_mutex_lock(&g_executor_mutex);
-        while (g_executor_queue_head == NULL && !g_executor_shutdown) {
+        while (g_executor_queue_head == NULL && !atomic_load(&g_executor_shutdown)) {
             pthread_cond_wait(&g_executor_cond, &g_executor_mutex);
         }
 
@@ -682,7 +682,7 @@ static void *executor_thread_main(void *arg) {
                 g_executor_queue_tail = NULL;
             }
             req->next = NULL;
-        } else if (g_executor_shutdown) {
+        } else if (atomic_load(&g_executor_shutdown)) {
             /* Queue is empty and shutdown requested - exit */
             should_exit = true;
         }
@@ -702,6 +702,9 @@ static void *executor_thread_main(void *arg) {
                 /* Process the request with GIL held */
                 process_request(req);
 
+                /* Track completed requests */
+                atomic_fetch_add(&g_counters.complete_count, 1);
+
                 /* Signal completion */
                 pthread_mutex_lock(&req->mutex);
                 req->completed = true;
@@ -711,7 +714,7 @@ static void *executor_thread_main(void *arg) {
         }
     }
 
-    g_executor_running = false;
+    atomic_store(&g_executor_running, false);
     PyGILState_Release(gstate);
 
     return NULL;
@@ -720,8 +723,19 @@ static void *executor_thread_main(void *arg) {
 /**
  * Enqueue a request to the appropriate executor based on execution mode.
  * Routes to multi-executor pool, single executor, or executes directly.
+ *
+ * @return 0 on success, -1 if shutting down (request rejected)
  */
-static void executor_enqueue(py_request_t *req) {
+static int executor_enqueue(py_request_t *req) {
+    /* Reject work if runtime is shutting down (except shutdown requests) */
+    if (runtime_is_shutting_down() && req->type != PY_REQ_SHUTDOWN) {
+        atomic_fetch_add(&g_counters.rejected_count, 1);
+        return -1;
+    }
+
+    /* Track enqueued requests */
+    atomic_fetch_add(&g_counters.enqueue_count, 1);
+
     switch (g_execution_mode) {
 #ifdef HAVE_FREE_THREADED
         case PY_MODE_FREE_THREADED:
@@ -736,15 +750,15 @@ static void executor_enqueue(py_request_t *req) {
                 pthread_cond_signal(&req->cond);
                 pthread_mutex_unlock(&req->mutex);
             }
-            return;
+            return 0;
 #endif
 
         case PY_MODE_MULTI_EXECUTOR:
-            if (g_multi_executor_initialized) {
+            if (atomic_load(&g_multi_executor_initialized)) {
                 /* Route to multi-executor pool */
                 int exec_id = select_executor();
                 multi_executor_enqueue(exec_id, req);
-                return;
+                return 0;
             }
             /* Fall through to single executor if multi not initialized */
             break;
@@ -767,6 +781,7 @@ static void executor_enqueue(py_request_t *req) {
     }
     pthread_cond_signal(&g_executor_cond);
     pthread_mutex_unlock(&g_executor_mutex);
+    return 0;
 }
 
 /**
@@ -785,7 +800,7 @@ static void executor_wait(py_request_t *req) {
  * Called during Python initialization.
  */
 static int executor_start(void) {
-    g_executor_shutdown = false;
+    atomic_store(&g_executor_shutdown, false);
     g_executor_queue_head = NULL;
     g_executor_queue_tail = NULL;
 
@@ -795,11 +810,11 @@ static int executor_start(void) {
 
     /* Wait for executor to be ready */
     int max_wait = 100;  /* 1 second max */
-    while (!g_executor_running && max_wait-- > 0) {
+    while (!atomic_load(&g_executor_running) && max_wait-- > 0) {
         usleep(10000);  /* 10ms */
     }
 
-    return g_executor_running ? 0 : -1;
+    return atomic_load(&g_executor_running) ? 0 : -1;
 }
 
 /**
@@ -807,7 +822,7 @@ static int executor_start(void) {
  * Called during Python finalization.
  */
 static void executor_stop(void) {
-    if (!g_executor_running) {
+    if (!atomic_load(&g_executor_running)) {
         return;
     }
 
@@ -816,7 +831,7 @@ static void executor_stop(void) {
     request_init(&shutdown_req);
     shutdown_req.type = PY_REQ_SHUTDOWN;
 
-    g_executor_shutdown = true;
+    atomic_store(&g_executor_shutdown, true);
     executor_enqueue(&shutdown_req);
     executor_wait(&shutdown_req);
     request_cleanup(&shutdown_req);
@@ -926,7 +941,7 @@ static void multi_executor_enqueue(int exec_id, py_request_t *req) {
  * Start the multi-executor pool.
  */
 static int multi_executor_start(int num_executors) {
-    if (g_multi_executor_initialized) {
+    if (atomic_load(&g_multi_executor_initialized)) {
         return 0;
     }
 
@@ -978,7 +993,7 @@ static int multi_executor_start(int num_executors) {
         }
     }
 
-    g_multi_executor_initialized = all_ready;
+    atomic_store(&g_multi_executor_initialized, all_ready);
     return all_ready ? 0 : -1;
 }
 
@@ -986,7 +1001,7 @@ static int multi_executor_start(int num_executors) {
  * Stop the multi-executor pool.
  */
 static void multi_executor_stop(void) {
-    if (!g_multi_executor_initialized) {
+    if (!atomic_load(&g_multi_executor_initialized)) {
         return;
     }
 
@@ -1023,7 +1038,7 @@ static void multi_executor_stop(void) {
         }
     }
 
-    g_multi_executor_initialized = false;
+    atomic_store(&g_multi_executor_initialized, false);
 }
 
 /*
diff --git a/c_src/py_nif.c b/c_src/py_nif.c
index 5ec2502..04fd809 100644
--- a/c_src/py_nif.c
+++ b/c_src/py_nif.c
@@ -46,13 +46,28 @@
 
 ErlNifResourceType *WORKER_RESOURCE_TYPE = NULL;
 ErlNifResourceType *PYOBJ_RESOURCE_TYPE = NULL;
-ErlNifResourceType *ASYNC_WORKER_RESOURCE_TYPE = NULL;
+/* ASYNC_WORKER_RESOURCE_TYPE removed - async workers replaced by event loop model */
 ErlNifResourceType *SUSPENDED_STATE_RESOURCE_TYPE = NULL;
 #ifdef HAVE_SUBINTERPRETERS
 ErlNifResourceType *SUBINTERP_WORKER_RESOURCE_TYPE = NULL;
 #endif
 
-bool g_python_initialized = false;
+/* Process-per-context resource type (no mutex) */
+ErlNifResourceType *PY_CONTEXT_RESOURCE_TYPE = NULL;
+
+/* py_ref resource type (Python object with interp_id for auto-routing) */
+ErlNifResourceType *PY_REF_RESOURCE_TYPE = NULL;
+
+/* suspended_context_state_t resource type (context suspension for callbacks) */
+ErlNifResourceType *PY_CONTEXT_SUSPENDED_RESOURCE_TYPE = NULL;
+
+_Atomic uint32_t g_context_id_counter = 1;
+
+/* Invariant counters for debugging and leak detection */
+py_invariant_counters_t g_counters = {0};
+
+bool g_python_initialized = false;  /* DEPRECATED: use g_runtime_state */
+_Atomic py_runtime_state_t g_runtime_state = PY_STATE_UNINIT;
 PyThreadState *g_main_thread_state = NULL;
 
 /* Execution mode */
@@ -62,7 +77,7 @@ int g_num_executors = 4;
 /* Multi-executor pool */
 executor_t g_executors[MAX_EXECUTORS];
 _Atomic int g_next_executor = 0;
-bool g_multi_executor_initialized = false;
+_Atomic bool g_multi_executor_initialized = false;
 
 /* Single executor state */
 pthread_t g_executor_thread;
@@ -70,8 +85,8 @@ pthread_mutex_t g_executor_mutex = PTHREAD_MUTEX_INITIALIZER;
 pthread_cond_t g_executor_cond = PTHREAD_COND_INITIALIZER;
 py_request_t *g_executor_queue_head = NULL;
 py_request_t *g_executor_queue_tail = NULL;
-volatile bool g_executor_running = false;
-volatile bool g_executor_shutdown = false;
+_Atomic bool g_executor_running = false;
+_Atomic bool g_executor_shutdown = false;
 
 /* Global counter for callback IDs */
 _Atomic uint64_t g_callback_id_counter = 1;
@@ -87,8 +102,10 @@ PyObject *g_numpy_ndarray_type = NULL;
 
 /* Thread-local callback context */
 __thread py_worker_t *tl_current_worker = NULL;
+__thread py_context_t *tl_current_context = NULL;
 __thread ErlNifEnv *tl_callback_env = NULL;
 __thread suspended_state_t *tl_current_suspended = NULL;
+__thread suspended_context_state_t *tl_current_context_suspended = NULL;
 __thread bool tl_allow_suspension = false;
 
 /* Thread-local pending callback state (flag-based detection, not exception-based) */
@@ -148,6 +165,8 @@ static ERL_NIF_TERM build_suspended_result(ErlNifEnv *env, suspended_state_t *su
 #include "py_event_loop.c"
 #include "py_asgi.c"
 #include "py_wsgi.c"
+#include "py_worker_pool.h"
+#include "py_worker_pool.c"
 
 /* ============================================================================
  * Resource callbacks
@@ -186,77 +205,202 @@ static void pyobj_destructor(ErlNifEnv *env, void *obj) {
     }
 }
 
-static void async_worker_destructor(ErlNifEnv *env, void *obj) {
+/* async_worker_destructor removed - async workers replaced by event loop model */
+
+#ifdef HAVE_SUBINTERPRETERS
+static void subinterp_worker_destructor(ErlNifEnv *env, void *obj) {
     (void)env;
-    py_async_worker_t *worker = (py_async_worker_t *)obj;
+    py_subinterp_worker_t *worker = (py_subinterp_worker_t *)obj;
+
+    /* For OWN_GIL subinterpreters, we cannot safely acquire the GIL from the
+     * GC thread (destructor may run on any thread). PyGILState_Ensure only
+     * works for the main interpreter, and PyThreadState_Swap doesn't actually
+     * acquire the GIL.
+     *
+     * If the user didn't call the explicit destroy function, the subinterpreter
+     * leaks. This is a known limitation - users must call destroy explicitly. */
+    if (worker->tstate != NULL && g_python_initialized) {
+#ifdef DEBUG
+        fprintf(stderr, "Warning: subinterp_worker leaked - not destroyed "
+                "via explicit destroy. Use subinterp_worker_destroy/1.\n");
+#endif
+        /* Skip Python cleanup - we can't safely acquire the subinterpreter's GIL */
+        worker->tstate = NULL;
+        worker->globals = NULL;
+        worker->locals = NULL;
+    }
+
+    /* Destroy the mutex */
+    pthread_mutex_destroy(&worker->mutex);
+}
+#endif
 
-    /* Signal shutdown */
-    worker->shutdown = true;
+/**
+ * @brief Destructor for py_context_t (process-per-context)
+ *
+ * Note: NO MUTEX to destroy - the process ownership model eliminates
+ * the need for mutex locking.
+ */
+static void context_destructor(ErlNifEnv *env, void *obj) {
+    (void)env;
+    py_context_t *ctx = (py_context_t *)obj;
 
-    /* Write to pipe to wake up event loop */
-    if (worker->notify_pipe[1] >= 0) {
-        char c = 'q';
-        (void)write(worker->notify_pipe[1], &c, 1);
+    /* Close callback pipes if open */
+    if (ctx->callback_pipe[0] >= 0) {
+        close(ctx->callback_pipe[0]);
+        ctx->callback_pipe[0] = -1;
+    }
+    if (ctx->callback_pipe[1] >= 0) {
+        close(ctx->callback_pipe[1]);
+        ctx->callback_pipe[1] = -1;
     }
 
-    /* Wait for thread to finish */
-    if (worker->loop_running) {
-        pthread_join(worker->loop_thread, NULL);
+    /* Skip if already destroyed by nif_context_destroy */
+    if (ctx->destroyed) {
+        return;
     }
 
-    /* Clean up pending requests */
-    pthread_mutex_lock(&worker->queue_mutex);
-    async_pending_t *p = worker->pending_head;
-    while (p != NULL) {
-        async_pending_t *next = p->next;
-        if (g_python_initialized && p->future != NULL) {
-            PyGILState_STATE gstate = PyGILState_Ensure();
-            Py_DECREF(p->future);
-            PyGILState_Release(gstate);
-        }
-        enif_free(p);
-        p = next;
+    if (!g_python_initialized) {
+        return;
     }
-    pthread_mutex_unlock(&worker->queue_mutex);
 
-    pthread_mutex_destroy(&worker->queue_mutex);
+    /* If we reach here, the context wasn't properly destroyed via
+     * nif_context_destroy. This can happen if:
+     * - The py_context process crashed
+     * - The resource was leaked
+     *
+     * For subinterpreters with OWN_GIL, cleanup from a different thread
+     * is problematic. We skip cleanup and let Python clean up on exit.
+     * For worker mode, we can safely clean up with PyGILState_Ensure.
+     */
 
-    /* Close pipes */
-    if (worker->notify_pipe[0] >= 0) close(worker->notify_pipe[0]);
-    if (worker->notify_pipe[1] >= 0) close(worker->notify_pipe[1]);
+#ifdef HAVE_SUBINTERPRETERS
+    if (ctx->is_subinterp) {
+        /* Can't safely destroy OWN_GIL subinterpreter from arbitrary thread.
+         * The interpreter and its objects will be cleaned up when Python
+         * finalizes. Log a warning if debugging is enabled. */
+        #ifdef DEBUG
+        fprintf(stderr, "Warning: py_context subinterpreter %u leaked - "
+                "not destroyed via py_context:stop/1\n", ctx->interp_id);
+        #endif
+        return;
+    }
+#endif
 
-    if (worker->msg_env != NULL) {
-        enif_free_env(worker->msg_env);
+    /* Worker mode - clean up with main GIL */
+    PyGILState_STATE gstate = PyGILState_Ensure();
+    Py_XDECREF(ctx->module_cache);
+    Py_XDECREF(ctx->globals);
+    Py_XDECREF(ctx->locals);
+#ifndef HAVE_SUBINTERPRETERS
+    if (ctx->thread_state != NULL) {
+        PyThreadState_Clear(ctx->thread_state);
+        PyThreadState_Delete(ctx->thread_state);
     }
+#endif
+    PyGILState_Release(gstate);
+}
+
+/**
+ * @brief Destructor for py_ref_t (Python object with interp_id)
+ *
+ * This destructor properly cleans up the Python object reference.
+ * The interp_id is used for routing but doesn't need cleanup.
+ */
+static void py_ref_destructor(ErlNifEnv *env, void *obj) {
+    (void)env;
+    py_ref_t *ref = (py_ref_t *)obj;
 
-    /* Clean up event loop */
-    if (g_python_initialized && worker->event_loop != NULL) {
+    if (g_python_initialized && ref->obj != NULL) {
+#ifdef HAVE_SUBINTERPRETERS
+        /* For subinterpreter objects (interp_id > 0), skip cleanup.
+         * We can't safely acquire a subinterpreter's GIL from the GC thread
+         * because PyGILState_Ensure only works for the main interpreter.
+         * The objects will be cleaned up when the subinterpreter is destroyed
+         * via Py_EndInterpreter. */
+        if (ref->interp_id > 0) {
+            return;
+        }
+#endif
+        /* Main interpreter: use PyGILState_Ensure */
         PyGILState_STATE gstate = PyGILState_Ensure();
-        Py_DECREF(worker->event_loop);
+        Py_XDECREF(ref->obj);
+        ref->obj = NULL;  /* Null after DECREF */
         PyGILState_Release(gstate);
     }
 }
 
-#ifdef HAVE_SUBINTERPRETERS
-static void subinterp_worker_destructor(ErlNifEnv *env, void *obj) {
+/**
+ * @brief Destructor for suspended_context_state_t
+ *
+ * Cleans up all resources associated with a suspended context state.
+ */
+static void suspended_context_state_destructor(ErlNifEnv *env, void *obj) {
     (void)env;
-    py_subinterp_worker_t *worker = (py_subinterp_worker_t *)obj;
+    suspended_context_state_t *state = (suspended_context_state_t *)obj;
 
-    if (worker->tstate != NULL && g_python_initialized) {
-        /* Switch to this interpreter's thread state */
-        PyThreadState *old_tstate = PyThreadState_Swap(worker->tstate);
+    /* Clean up Python objects if Python is still initialized */
+    if (g_python_initialized && state->callback_args != NULL) {
+#ifdef HAVE_SUBINTERPRETERS
+        /* For OWN_GIL subinterpreters, skip Python object cleanup.
+         * We can't safely acquire a subinterpreter's GIL from the GC thread
+         * because PyGILState_Ensure only works for the main interpreter, and
+         * PyThreadState_Swap doesn't acquire the GIL (it just swaps thread state).
+         * The objects will be cleaned up when the subinterpreter is destroyed
+         * via Py_EndInterpreter. */
+        if (state->ctx != NULL && state->ctx->is_subinterp) {
+            state->callback_args = NULL;  /* Just null the pointer, don't DECREF */
+        } else
+#endif
+        {
+            /* Main interpreter: use standard GIL */
+            PyGILState_STATE gstate = PyGILState_Ensure();
+            Py_XDECREF(state->callback_args);
+            state->callback_args = NULL;  /* Null after DECREF */
+            PyGILState_Release(gstate);
+        }
+    }
 
-        Py_XDECREF(worker->globals);
-        Py_XDECREF(worker->locals);
+    /* Free allocated memory */
+    if (state->callback_func_name != NULL) {
+        enif_free(state->callback_func_name);
+    }
+    if (state->result_data != NULL) {
+        enif_free(state->result_data);
+    }
 
-        /* End the interpreter */
-        Py_EndInterpreter(worker->tstate);
+    /* Free sequential callback results array */
+    if (state->callback_results != NULL) {
+        for (size_t i = 0; i < state->num_callback_results; i++) {
+            if (state->callback_results[i].data != NULL) {
+                enif_free(state->callback_results[i].data);
+            }
+        }
+        enif_free(state->callback_results);
+    }
+
+    /* Free original context environment */
+    if (state->orig_env != NULL) {
+        enif_free_env(state->orig_env);
+    }
+
+    /* Release binaries */
+    if (state->orig_module.data != NULL) {
+        enif_release_binary(&state->orig_module);
+    }
+    if (state->orig_func.data != NULL) {
+        enif_release_binary(&state->orig_func);
+    }
+    if (state->orig_code.data != NULL) {
+        enif_release_binary(&state->orig_code);
+    }
 
-        /* Restore previous thread state */
-        PyThreadState_Swap(old_tstate);
+    /* Release the context resource (was kept in create_suspended_context_state_*) */
+    if (state->ctx != NULL) {
+        enif_release_resource(state->ctx);
+        state->ctx = NULL;
     }
 }
-#endif
 
 static void suspended_state_destructor(ErlNifEnv *env, void *obj) {
     (void)env;
@@ -292,8 +436,17 @@ static void suspended_state_destructor(ErlNifEnv *env, void *obj) {
  * ============================================================================ */
 
 static ERL_NIF_TERM nif_py_init(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
-    if (g_python_initialized) {
-        return ATOM_OK;
+    /* Try to transition UNINIT -> INITING (only one thread wins) */
+    if (!runtime_transition(PY_STATE_UNINIT, PY_STATE_INITING)) {
+        /* Check if already running (idempotent success) */
+        if (runtime_is_running()) {
+            return ATOM_OK;
+        }
+        /* Also allow reinit from STOPPED state */
+        if (!runtime_transition(PY_STATE_STOPPED, PY_STATE_INITING)) {
+            /* Another thread is initializing or shutting down */
+            return make_error(env, "init_in_progress");
+        }
     }
 
 #ifdef NEED_DLOPEN_GLOBAL
@@ -363,15 +516,18 @@ static ERL_NIF_TERM nif_py_init(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg
     PyConfig_Clear(&config);
 
     if (PyStatus_Exception(status)) {
+        atomic_store(&g_runtime_state, PY_STATE_STOPPED);
         return make_error(env, "python_init_failed");
     }
 
+    /* Keep g_python_initialized for backward compatibility during transition */
     g_python_initialized = true;
 
     /* Create the 'erlang' module for callbacks */
     if (create_erlang_module() < 0) {
         Py_Finalize();
         g_python_initialized = false;
+        atomic_store(&g_runtime_state, PY_STATE_STOPPED);
         return make_error(env, "erlang_module_creation_failed");
     }
 
@@ -379,6 +535,7 @@ static ERL_NIF_TERM nif_py_init(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg
     if (create_py_event_loop_module() < 0) {
         Py_Finalize();
         g_python_initialized = false;
+        atomic_store(&g_runtime_state, PY_STATE_STOPPED);
         return make_error(env, "event_loop_module_creation_failed");
     }
 
@@ -386,6 +543,7 @@ static ERL_NIF_TERM nif_py_init(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg
     if (asgi_scope_init() < 0) {
         Py_Finalize();
         g_python_initialized = false;
+        atomic_store(&g_runtime_state, PY_STATE_STOPPED);
         return make_error(env, "asgi_scope_init_failed");
     }
 
@@ -393,6 +551,7 @@ static ERL_NIF_TERM nif_py_init(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg
     if (wsgi_scope_init() < 0) {
         Py_Finalize();
         g_python_initialized = false;
+        atomic_store(&g_runtime_state, PY_STATE_STOPPED);
         return make_error(env, "wsgi_scope_init_failed");
     }
 
@@ -400,6 +559,7 @@ static ERL_NIF_TERM nif_py_init(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg
     if (create_default_event_loop(env) < 0) {
         Py_Finalize();
         g_python_initialized = false;
+        atomic_store(&g_runtime_state, PY_STATE_STOPPED);
         return make_error(env, "default_event_loop_creation_failed");
     }
 
@@ -467,6 +627,7 @@ static ERL_NIF_TERM nif_py_init(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg
         g_main_thread_state = NULL;
         Py_Finalize();
         g_python_initialized = false;
+        atomic_store(&g_runtime_state, PY_STATE_STOPPED);
         return make_error(env, "executor_start_failed");
     }
 
@@ -475,6 +636,9 @@ static ERL_NIF_TERM nif_py_init(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg
         /* Non-fatal - thread worker support just won't be available */
     }
 
+    /* Transition to RUNNING - initialization complete */
+    atomic_store(&g_runtime_state, PY_STATE_RUNNING);
+
     return ATOM_OK;
 }
 
@@ -482,25 +646,29 @@ static ERL_NIF_TERM nif_finalize(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
     (void)argc;
     (void)argv;
 
-    if (!g_python_initialized) {
-        return ATOM_OK;
+    /* Try to transition RUNNING -> SHUTTING_DOWN (only one thread wins) */
+    if (!runtime_transition(PY_STATE_RUNNING, PY_STATE_SHUTTING_DOWN)) {
+        /* Check current state - if already shutdown, return success */
+        py_runtime_state_t state = runtime_state();
+        if (state == PY_STATE_STOPPED || state == PY_STATE_UNINIT) {
+            return ATOM_OK;
+        }
+        /* Another thread is shutting down - let it finish */
+        if (state == PY_STATE_SHUTTING_DOWN) {
+            return ATOM_OK;
+        }
+        /* If still initializing, can't finalize yet */
+        return make_error(env, "python_not_running");
     }
 
-    /* Clean up thread worker system */
-    thread_worker_cleanup();
-
-    /* Clean up ASGI and WSGI scope key caches */
-    PyGILState_STATE gstate = PyGILState_Ensure();
-    asgi_scope_cleanup();
-    wsgi_scope_cleanup();
-
-    /* Clean up numpy type cache */
-    Py_XDECREF(g_numpy_ndarray_type);
-    g_numpy_ndarray_type = NULL;
-
-    PyGILState_Release(gstate);
+    /*
+     * SHUTDOWN SEQUENCE - ORDER MATTERS:
+     * 1. Stop executors first (they finish in-flight work, join threads)
+     * 2. Clean up thread worker system
+     * 3. Then clean up caches with GIL (no active work at this point)
+     */
 
-    /* Stop executors based on mode */
+    /* Step 1: Stop executors - they will finish in-flight requests and exit */
     switch (g_execution_mode) {
         case PY_MODE_FREE_THREADED:
             /* No executor to stop */
@@ -512,7 +680,7 @@ static ERL_NIF_TERM nif_finalize(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
 
         case PY_MODE_MULTI_EXECUTOR:
         default:
-            if (g_multi_executor_initialized) {
+            if (atomic_load(&g_multi_executor_initialized)) {
                 multi_executor_stop();
             } else {
                 executor_stop();
@@ -520,6 +688,20 @@ static ERL_NIF_TERM nif_finalize(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
             break;
     }
 
+    /* Step 2: Clean up thread worker system */
+    thread_worker_cleanup();
+
+    /* Step 3: Clean up caches with GIL - no executor threads are running now */
+    PyGILState_STATE gstate = PyGILState_Ensure();
+    asgi_scope_cleanup();
+    wsgi_scope_cleanup();
+
+    /* Clean up numpy type cache */
+    Py_XDECREF(g_numpy_ndarray_type);
+    g_numpy_ndarray_type = NULL;
+
+    PyGILState_Release(gstate);
+
     /* Restore main thread state before finalizing */
     if (g_main_thread_state != NULL) {
         PyEval_RestoreThread(g_main_thread_state);
@@ -537,6 +719,9 @@ static ERL_NIF_TERM nif_finalize(ErlNifEnv *env, int argc, const ERL_NIF_TERM ar
 #endif
     g_python_initialized = false;
 
+    /* Transition to STOPPED - shutdown complete */
+    atomic_store(&g_runtime_state, PY_STATE_STOPPED);
+
     return ATOM_OK;
 }
 
@@ -548,8 +733,8 @@ static ERL_NIF_TERM nif_worker_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM
     (void)argc;
     (void)argv;
 
-    if (!g_python_initialized) {
-        return make_error(env, "python_not_initialized");
+    if (!runtime_is_running()) {
+        return make_error(env, "python_not_running");
     }
 
     py_worker_t *worker = enif_alloc_resource(WORKER_RESOURCE_TYPE, sizeof(py_worker_t));
@@ -817,8 +1002,8 @@ static ERL_NIF_TERM nif_memory_stats(ErlNifEnv *env, int argc, const ERL_NIF_TER
     (void)argc;
     (void)argv;
 
-    if (!g_python_initialized) {
-        return make_error(env, "python_not_initialized");
+    if (!runtime_is_running()) {
+        return make_error(env, "python_not_running");
     }
 
     py_request_t req;
@@ -834,9 +1019,64 @@ static ERL_NIF_TERM nif_memory_stats(ErlNifEnv *env, int argc, const ERL_NIF_TER
     return result;
 }
 
+/**
+ * Get invariant counters for debugging and leak detection.
+ * Returns a map with counter names as keys and values as integers.
+ */
+static ERL_NIF_TERM nif_get_debug_counters(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+
+    ERL_NIF_TERM keys[14];
+    ERL_NIF_TERM vals[14];
+    int i = 0;
+
+    /* GIL operations */
+    keys[i] = enif_make_atom(env, "gil_ensure");
+    vals[i++] = enif_make_uint64(env, atomic_load(&g_counters.gil_ensure_count));
+    keys[i] = enif_make_atom(env, "gil_release");
+    vals[i++] = enif_make_uint64(env, atomic_load(&g_counters.gil_release_count));
+
+    /* Python objects */
+    keys[i] = enif_make_atom(env, "pyobj_created");
+    vals[i++] = enif_make_uint64(env, atomic_load(&g_counters.pyobj_created));
+    keys[i] = enif_make_atom(env, "pyobj_destroyed");
+    vals[i++] = enif_make_uint64(env, atomic_load(&g_counters.pyobj_destroyed));
+
+    /* py_ref_t */
+    keys[i] = enif_make_atom(env, "pyref_created");
+    vals[i++] = enif_make_uint64(env, atomic_load(&g_counters.pyref_created));
+    keys[i] = enif_make_atom(env, "pyref_destroyed");
+    vals[i++] = enif_make_uint64(env, atomic_load(&g_counters.pyref_destroyed));
+
+    /* Contexts */
+    keys[i] = enif_make_atom(env, "ctx_created");
+    vals[i++] = enif_make_uint64(env, atomic_load(&g_counters.ctx_created));
+    keys[i] = enif_make_atom(env, "ctx_destroyed");
+    vals[i++] = enif_make_uint64(env, atomic_load(&g_counters.ctx_destroyed));
+
+    /* Suspended states */
+    keys[i] = enif_make_atom(env, "suspended_created");
+    vals[i++] = enif_make_uint64(env, atomic_load(&g_counters.suspended_created));
+    keys[i] = enif_make_atom(env, "suspended_destroyed");
+    vals[i++] = enif_make_uint64(env, atomic_load(&g_counters.suspended_destroyed));
+
+    /* Executor operations */
+    keys[i] = enif_make_atom(env, "enqueue_count");
+    vals[i++] = enif_make_uint64(env, atomic_load(&g_counters.enqueue_count));
+    keys[i] = enif_make_atom(env, "complete_count");
+    vals[i++] = enif_make_uint64(env, atomic_load(&g_counters.complete_count));
+    keys[i] = enif_make_atom(env, "rejected_count");
+    vals[i++] = enif_make_uint64(env, atomic_load(&g_counters.rejected_count));
+
+    ERL_NIF_TERM result;
+    enif_make_map_from_arrays(env, keys, vals, i, &result);
+    return result;
+}
+
 static ERL_NIF_TERM nif_gc(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
-    if (!g_python_initialized) {
-        return make_error(env, "python_not_initialized");
+    if (!runtime_is_running()) {
+        return make_error(env, "python_not_running");
     }
 
     py_request_t req;
@@ -857,8 +1097,8 @@ static ERL_NIF_TERM nif_gc(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[])
 }
 
 static ERL_NIF_TERM nif_tracemalloc_start(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
-    if (!g_python_initialized) {
-        return make_error(env, "python_not_initialized");
+    if (!runtime_is_running()) {
+        return make_error(env, "python_not_running");
     }
 
     PyGILState_STATE gstate = PyGILState_Ensure();
@@ -893,8 +1133,8 @@ static ERL_NIF_TERM nif_tracemalloc_stop(ErlNifEnv *env, int argc, const ERL_NIF
     (void)argc;
     (void)argv;
 
-    if (!g_python_initialized) {
-        return make_error(env, "python_not_initialized");
+    if (!runtime_is_running()) {
+        return make_error(env, "python_not_running");
     }
 
     PyGILState_STATE gstate = PyGILState_Ensure();
@@ -1004,129 +1244,215 @@ static ERL_NIF_TERM nif_send_callback_response(ErlNifEnv *env, int argc, const E
 }
 
 /* ============================================================================
- * Async worker NIFs
+ * Async worker NIFs (deprecated - replaced by event loop model)
+ *
+ * These NIFs are deprecated and return errors. Use py_event_loop_pool and
+ * py_event_loop:run_async/2 instead.
  * ============================================================================ */
 
 static ERL_NIF_TERM nif_async_worker_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
     (void)argc;
     (void)argv;
+    return make_error(env, "async_workers_deprecated_use_event_loop");
+}
 
-    if (!g_python_initialized) {
-        return make_error(env, "python_not_initialized");
+static ERL_NIF_TERM nif_async_worker_destroy(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+    return ATOM_OK;
+}
+
+static ERL_NIF_TERM nif_async_call(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+    return make_error(env, "async_workers_deprecated_use_event_loop");
+}
+
+static ERL_NIF_TERM nif_async_gather(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+    return make_error(env, "async_workers_deprecated_use_event_loop");
+}
+
+static ERL_NIF_TERM nif_async_stream(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+    return make_error(env, "async_workers_deprecated_use_event_loop");
+}
+
+/* ============================================================================
+ * Sub-interpreter support (Python 3.12+)
+ * ============================================================================ */
+
+static ERL_NIF_TERM nif_subinterp_supported(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+
+#ifdef HAVE_SUBINTERPRETERS
+    return ATOM_TRUE;
+#else
+    return ATOM_FALSE;
+#endif
+}
+
+#ifdef HAVE_SUBINTERPRETERS
+
+static ERL_NIF_TERM nif_subinterp_worker_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+
+    if (!runtime_is_running()) {
+        return make_error(env, "python_not_running");
     }
 
-    py_async_worker_t *worker = enif_alloc_resource(ASYNC_WORKER_RESOURCE_TYPE, sizeof(py_async_worker_t));
+    py_subinterp_worker_t *worker = enif_alloc_resource(SUBINTERP_WORKER_RESOURCE_TYPE,
+                                                         sizeof(py_subinterp_worker_t));
     if (worker == NULL) {
         return make_error(env, "alloc_failed");
     }
 
-    /* Initialize fields */
-    worker->event_loop = NULL;
-    worker->loop_running = false;
-    worker->shutdown = false;
-    worker->pending_head = NULL;
-    worker->pending_tail = NULL;
-    worker->msg_env = enif_alloc_env();
-
-    /* Create notification pipe */
-    if (pipe(worker->notify_pipe) < 0) {
-        enif_free_env(worker->msg_env);
+    /* Initialize mutex for thread-safe access */
+    if (pthread_mutex_init(&worker->mutex, NULL) != 0) {
         enif_release_resource(worker);
-        return make_error(env, "pipe_failed");
+        return make_error(env, "mutex_init_failed");
     }
 
-    /* Initialize mutex */
-    pthread_mutex_init(&worker->queue_mutex, NULL);
+    /* Need the main GIL to create sub-interpreter */
+    PyGILState_STATE gstate = PyGILState_Ensure();
+
+    /* Save current thread state so we can restore it after creating sub-interp */
+    PyThreadState *main_tstate = PyThreadState_Get();
+
+    /* Configure sub-interpreter with its own GIL */
+    PyInterpreterConfig config = {
+        .use_main_obmalloc = 0,
+        .allow_fork = 0,
+        .allow_exec = 0,
+        .allow_threads = 1,
+        .allow_daemon_threads = 0,
+        .check_multi_interp_extensions = 1,
+        .gil = PyInterpreterConfig_OWN_GIL,  /* This is the key - own GIL! */
+    };
+
+    PyThreadState *tstate = NULL;
+    PyStatus status = Py_NewInterpreterFromConfig(&tstate, &config);
 
-    /* Start the event loop thread */
-    if (pthread_create(&worker->loop_thread, NULL, async_event_loop_thread, worker) != 0) {
-        close(worker->notify_pipe[0]);
-        close(worker->notify_pipe[1]);
-        pthread_mutex_destroy(&worker->queue_mutex);
-        enif_free_env(worker->msg_env);
+    if (PyStatus_Exception(status) || tstate == NULL) {
+        /* We're still in main interpreter on error */
+        PyGILState_Release(gstate);
         enif_release_resource(worker);
-        return make_error(env, "thread_create_failed");
+        return make_error(env, "subinterp_create_failed");
     }
 
-    /* Wait for event loop to be ready */
-    int max_wait = 100;  /* 1 second max */
-    while (!worker->loop_running && max_wait-- > 0) {
-        usleep(10000);  /* 10ms */
-    }
+    worker->interp = PyThreadState_GetInterpreter(tstate);
+    worker->tstate = tstate;
+
+    /* Create global/local namespaces in the new interpreter */
+    worker->globals = PyDict_New();
+    worker->locals = PyDict_New();
+
+    /* Import __builtins__ */
+    PyObject *builtins = PyEval_GetBuiltins();
+    PyDict_SetItemString(worker->globals, "__builtins__", builtins);
 
-    if (!worker->loop_running) {
-        worker->shutdown = true;
-        pthread_join(worker->loop_thread, NULL);
-        close(worker->notify_pipe[0]);
-        close(worker->notify_pipe[1]);
-        pthread_mutex_destroy(&worker->queue_mutex);
+    /* Initialize event loop for this subinterpreter */
+    if (init_subinterpreter_event_loop(env) < 0) {
+        Py_EndInterpreter(tstate);
+        PyThreadState_Swap(main_tstate);
+        PyGILState_Release(gstate);
         enif_release_resource(worker);
-        return make_error(env, "event_loop_start_failed");
+        return make_error(env, "event_loop_init_failed");
     }
 
+    /* Switch back to main interpreter */
+    PyThreadState_Swap(NULL);
+    PyThreadState_Swap(main_tstate);
+
+    PyGILState_Release(gstate);
+
     ERL_NIF_TERM result = enif_make_resource(env, worker);
     enif_release_resource(worker);
 
     return enif_make_tuple2(env, ATOM_OK, result);
 }
 
-static ERL_NIF_TERM nif_async_worker_destroy(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+static ERL_NIF_TERM nif_subinterp_worker_destroy(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
     (void)argc;
-    py_async_worker_t *worker;
+    py_subinterp_worker_t *worker;
 
-    if (!enif_get_resource(env, argv[0], ASYNC_WORKER_RESOURCE_TYPE, (void **)&worker)) {
+    if (!enif_get_resource(env, argv[0], SUBINTERP_WORKER_RESOURCE_TYPE, (void **)&worker)) {
         return make_error(env, "invalid_worker");
     }
 
-    /* Resource destructor will handle cleanup */
+    if (!runtime_is_running()) {
+        return make_error(env, "python_not_running");
+    }
+
+    /* Lock mutex for thread-safe access */
+    pthread_mutex_lock(&worker->mutex);
+
+    if (worker->tstate != NULL) {
+        /* Use PyEval_RestoreThread to properly acquire the subinterpreter's GIL */
+        PyEval_RestoreThread(worker->tstate);
+
+        /* Clean up Python objects while holding the GIL */
+        Py_XDECREF(worker->globals);
+        worker->globals = NULL;
+        Py_XDECREF(worker->locals);
+        worker->locals = NULL;
+
+        /* End the interpreter - this releases its GIL */
+        Py_EndInterpreter(worker->tstate);
+        worker->tstate = NULL;
+    }
+
+    pthread_mutex_unlock(&worker->mutex);
+
     return ATOM_OK;
 }
 
-/* Counter for unique async call IDs */
-static uint64_t g_async_id_counter = 0;
-
-static ERL_NIF_TERM nif_async_call(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
-    py_async_worker_t *worker;
+static ERL_NIF_TERM nif_subinterp_call(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    py_subinterp_worker_t *worker;
     ErlNifBinary module_bin, func_bin;
-    ErlNifPid caller;
 
-    if (!enif_get_resource(env, argv[0], ASYNC_WORKER_RESOURCE_TYPE, (void **)&worker)) {
+    if (!enif_get_resource(env, argv[0], SUBINTERP_WORKER_RESOURCE_TYPE, (void **)&worker)) {
         return make_error(env, "invalid_worker");
     }
-    if (!worker->loop_running) {
-        return make_error(env, "event_loop_not_running");
-    }
     if (!enif_inspect_binary(env, argv[1], &module_bin)) {
         return make_error(env, "invalid_module");
     }
     if (!enif_inspect_binary(env, argv[2], &func_bin)) {
         return make_error(env, "invalid_func");
     }
-    if (!enif_get_local_pid(env, argv[5], &caller)) {
-        return make_error(env, "invalid_caller");
-    }
 
-    PyGILState_STATE gstate = PyGILState_Ensure();
+    /* Lock mutex for thread-safe access */
+    pthread_mutex_lock(&worker->mutex);
+
+    /* Enter the sub-interpreter */
+    PyThreadState *saved_tstate = PyThreadState_Swap(NULL);
+    PyThreadState_Swap(worker->tstate);
 
-    /* Convert module/func names */
     char *module_name = binary_to_string(&module_bin);
     char *func_name = binary_to_string(&func_bin);
     if (module_name == NULL || func_name == NULL) {
         enif_free(module_name);
         enif_free(func_name);
-        PyGILState_Release(gstate);
+        PyThreadState_Swap(NULL);
+        PyThreadState_Swap(saved_tstate);
+        pthread_mutex_unlock(&worker->mutex);
         return make_error(env, "alloc_failed");
     }
 
     ERL_NIF_TERM result;
 
-    /* Import module and get function */
+    /* Import module */
     PyObject *module = PyImport_ImportModule(module_name);
     if (module == NULL) {
         result = make_py_error(env);
         goto cleanup;
     }
 
+    /* Get function */
     PyObject *func = PyObject_GetAttrString(module, func_name);
     Py_DECREF(module);
     if (func == NULL) {
@@ -1134,7 +1460,7 @@ static ERL_NIF_TERM nif_async_call(ErlNifEnv *env, int argc, const ERL_NIF_TERM
         goto cleanup;
     }
 
-    /* Convert args list to Python tuple */
+    /* Convert args */
     unsigned int args_len;
     if (!enif_get_list_length(env, argv[3], &args_len)) {
         Py_DECREF(func);
@@ -1162,457 +1488,1553 @@ static ERL_NIF_TERM nif_async_call(ErlNifEnv *env, int argc, const ERL_NIF_TERM
         kwargs = term_to_py(env, argv[4]);
     }
 
-    /* Call the function to get coroutine */
-    PyObject *coro = PyObject_Call(func, args, kwargs);
+    /* Call the function */
+    PyObject *py_result = PyObject_Call(func, args, kwargs);
     Py_DECREF(func);
     Py_DECREF(args);
     Py_XDECREF(kwargs);
 
-    if (coro == NULL) {
+    if (py_result == NULL) {
+        result = make_py_error(env);
+    } else {
+        ERL_NIF_TERM term_result = py_to_term(env, py_result);
+        Py_DECREF(py_result);
+        result = enif_make_tuple2(env, ATOM_OK, term_result);
+    }
+
+cleanup:
+    enif_free(module_name);
+    enif_free(func_name);
+
+    /* Exit the sub-interpreter */
+    PyThreadState_Swap(NULL);
+    if (saved_tstate != NULL) {
+        PyThreadState_Swap(saved_tstate);
+    }
+
+    /* Unlock mutex */
+    pthread_mutex_unlock(&worker->mutex);
+
+    return result;
+}
+
+static ERL_NIF_TERM nif_parallel_execute(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    unsigned int workers_len, calls_len;
+
+    if (!enif_get_list_length(env, argv[0], &workers_len)) {
+        return make_error(env, "invalid_workers_list");
+    }
+    if (!enif_get_list_length(env, argv[1], &calls_len)) {
+        return make_error(env, "invalid_calls_list");
+    }
+    if (workers_len == 0 || calls_len == 0) {
+        return enif_make_tuple2(env, ATOM_OK, enif_make_list(env, 0));
+    }
+    if (workers_len < calls_len) {
+        return make_error(env, "not_enough_workers");
+    }
+
+    ERL_NIF_TERM *results = enif_alloc(sizeof(ERL_NIF_TERM) * calls_len);
+    if (results == NULL) {
+        return make_error(env, "alloc_failed");
+    }
+    ERL_NIF_TERM worker_head, worker_tail = argv[0];
+    ERL_NIF_TERM call_head, call_tail = argv[1];
+
+    for (unsigned int i = 0; i < calls_len; i++) {
+        enif_get_list_cell(env, worker_tail, &worker_head, &worker_tail);
+        enif_get_list_cell(env, call_tail, &call_head, &call_tail);
+
+        int arity;
+        const ERL_NIF_TERM *tuple;
+        if (!enif_get_tuple(env, call_head, &arity, &tuple) || arity < 3) {
+            enif_free(results);
+            return make_error(env, "invalid_call_tuple");
+        }
+
+        /* Build args array for subinterp_call */
+        ERL_NIF_TERM call_args[5] = {worker_head, tuple[0], tuple[1], tuple[2],
+                                      (arity > 3) ? tuple[3] : enif_make_new_map(env)};
+
+        results[i] = nif_subinterp_call(env, 5, call_args);
+    }
+
+    ERL_NIF_TERM result_list = enif_make_list_from_array(env, results, calls_len);
+    enif_free(results);
+
+    return enif_make_tuple2(env, ATOM_OK, result_list);
+}
+
+/**
+ * @brief Run an ASGI application in a subinterpreter
+ *
+ * Args: WorkerRef, Runner, Module, Callable, ScopeMap, Body
+ *
+ * This runs ASGI in a subinterpreter with its own GIL for true parallelism.
+ */
+static ERL_NIF_TERM nif_subinterp_asgi_run(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    if (argc < 6) {
+        return make_error(env, "badarg");
+    }
+
+    py_subinterp_worker_t *worker;
+    if (!enif_get_resource(env, argv[0], SUBINTERP_WORKER_RESOURCE_TYPE, (void **)&worker)) {
+        return make_error(env, "invalid_worker");
+    }
+
+    ErlNifBinary runner_bin, module_bin, callable_bin, body_bin;
+    if (!enif_inspect_binary(env, argv[1], &runner_bin)) {
+        return make_error(env, "invalid_runner");
+    }
+    if (!enif_inspect_binary(env, argv[2], &module_bin)) {
+        return make_error(env, "invalid_module");
+    }
+    if (!enif_inspect_binary(env, argv[3], &callable_bin)) {
+        return make_error(env, "invalid_callable");
+    }
+    if (!enif_inspect_binary(env, argv[5], &body_bin)) {
+        return make_error(env, "invalid_body");
+    }
+
+    /* Lock mutex for thread-safe access */
+    pthread_mutex_lock(&worker->mutex);
+
+    /* Enter the sub-interpreter */
+    PyThreadState *saved_tstate = PyThreadState_Swap(NULL);
+    PyThreadState_Swap(worker->tstate);
+
+    char *runner_name = binary_to_string(&runner_bin);
+    char *module_name = binary_to_string(&module_bin);
+    char *callable_name = binary_to_string(&callable_bin);
+    if (runner_name == NULL || module_name == NULL || callable_name == NULL) {
+        enif_free(runner_name);
+        enif_free(module_name);
+        enif_free(callable_name);
+        PyThreadState_Swap(NULL);
+        if (saved_tstate != NULL) {
+            PyThreadState_Swap(saved_tstate);
+        }
+        pthread_mutex_unlock(&worker->mutex);
+        return make_error(env, "alloc_failed");
+    }
+
+    ERL_NIF_TERM result;
+
+    /* Build scope dict from Erlang map */
+    PyObject *scope = asgi_scope_from_map(env, argv[4]);
+    if (scope == NULL) {
+        result = make_py_error(env);
+        goto cleanup;
+    }
+
+    /* Convert body binary to Python bytes */
+    PyObject *body = PyBytes_FromStringAndSize((const char *)body_bin.data, body_bin.size);
+    if (body == NULL) {
+        Py_DECREF(scope);
+        result = make_py_error(env);
+        goto cleanup;
+    }
+
+    /* Import the ASGI runner module */
+    PyObject *runner_module = PyImport_ImportModule(runner_name);
+    if (runner_module == NULL) {
+        Py_DECREF(body);
+        Py_DECREF(scope);
         result = make_py_error(env);
         goto cleanup;
     }
 
-    /* Check if result is a coroutine */
-    PyObject *asyncio = PyImport_ImportModule("asyncio");
-    if (asyncio == NULL) {
-        Py_DECREF(coro);
-        result = make_error(env, "asyncio_import_failed");
-        goto cleanup;
-    }
-
-    PyObject *iscoroutine = PyObject_CallMethod(asyncio, "iscoroutine", "O", coro);
-    bool is_coro = iscoroutine != NULL && PyObject_IsTrue(iscoroutine);
-    Py_XDECREF(iscoroutine);
+    /* Call _run_asgi_sync(module_name, callable_name, scope, body)
+     * or run_asgi(module_name, callable_name, scope, body) depending on runner */
+    PyObject *run_result = PyObject_CallMethod(
+        runner_module, "run_asgi", "ssOO",
+        module_name, callable_name, scope, body);
+
+    /* Fallback to _run_asgi_sync if run_asgi doesn't exist */
+    if (run_result == NULL && PyErr_ExceptionMatches(PyExc_AttributeError)) {
+        PyErr_Clear();
+        run_result = PyObject_CallMethod(
+            runner_module, "_run_asgi_sync", "ssOO",
+            module_name, callable_name, scope, body);
+    }
+
+    Py_DECREF(runner_module);
+    Py_DECREF(body);
+    Py_DECREF(scope);
+
+    if (run_result == NULL) {
+        result = make_py_error(env);
+        goto cleanup;
+    }
+
+    /* Convert result to Erlang term using optimized extraction */
+    ERL_NIF_TERM term_result = extract_asgi_response(env, run_result);
+    Py_DECREF(run_result);
+
+    result = enif_make_tuple2(env, ATOM_OK, term_result);
+
+cleanup:
+    enif_free(runner_name);
+    enif_free(module_name);
+    enif_free(callable_name);
+
+    /* Exit the sub-interpreter */
+    PyThreadState_Swap(NULL);
+    if (saved_tstate != NULL) {
+        PyThreadState_Swap(saved_tstate);
+    }
+
+    /* Unlock mutex */
+    pthread_mutex_unlock(&worker->mutex);
+
+    return result;
+}
+
+#else /* !HAVE_SUBINTERPRETERS */
+
+/* Stub implementations for older Python versions */
+static ERL_NIF_TERM nif_subinterp_worker_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+    return make_error(env, "subinterpreters_not_supported");
+}
+
+static ERL_NIF_TERM nif_subinterp_worker_destroy(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+    return make_error(env, "subinterpreters_not_supported");
+}
+
+static ERL_NIF_TERM nif_subinterp_call(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+    return make_error(env, "subinterpreters_not_supported");
+}
+
+static ERL_NIF_TERM nif_parallel_execute(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+    return make_error(env, "subinterpreters_not_supported");
+}
+
+static ERL_NIF_TERM nif_subinterp_asgi_run(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+    return make_error(env, "subinterpreters_not_supported");
+}
+
+#endif /* HAVE_SUBINTERPRETERS */
+
+/* ============================================================================
+ * Process-per-context NIFs (NO MUTEX)
+ *
+ * These NIFs are designed for the process-per-context architecture.
+ * Each Erlang process owns one context and serializes access through
+ * message passing, eliminating the need for mutex locking.
+ * ============================================================================ */
+
+/**
+ * @brief Create a new Python context
+ *
+ * nif_context_create(Mode) -> {ok, ContextRef, InterpId} | {error, Reason}
+ * Mode: subinterp | worker
+ */
+static ERL_NIF_TERM nif_context_create(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+
+    if (!runtime_is_running()) {
+        return make_error(env, "python_not_running");
+    }
+
+    /* Parse mode atom */
+    char mode_str[32];
+    if (!enif_get_atom(env, argv[0], mode_str, sizeof(mode_str), ERL_NIF_LATIN1)) {
+        return make_error(env, "invalid_mode");
+    }
+
+    bool use_subinterp = (strcmp(mode_str, "subinterp") == 0);
+
+    /* Allocate context resource */
+    py_context_t *ctx = enif_alloc_resource(PY_CONTEXT_RESOURCE_TYPE, sizeof(py_context_t));
+    if (ctx == NULL) {
+        return make_error(env, "alloc_failed");
+    }
+
+    /* Initialize fields */
+    ctx->interp_id = atomic_fetch_add(&g_context_id_counter, 1);
+    ctx->is_subinterp = use_subinterp;
+    ctx->destroyed = false;
+    ctx->has_callback_handler = false;
+    ctx->callback_pipe[0] = -1;
+    ctx->callback_pipe[1] = -1;
+    ctx->globals = NULL;
+    ctx->locals = NULL;
+    ctx->module_cache = NULL;
+
+    /* Create callback pipe for blocking callback responses */
+    if (pipe(ctx->callback_pipe) < 0) {
+        enif_release_resource(ctx);
+        return make_error(env, "pipe_create_failed");
+    }
+
+#ifdef HAVE_SUBINTERPRETERS
+    ctx->interp = NULL;
+    ctx->tstate = NULL;
+
+    if (use_subinterp) {
+        /* Need the main GIL to create sub-interpreter */
+        PyGILState_STATE gstate = PyGILState_Ensure();
+
+        /* Save current thread state */
+        PyThreadState *main_tstate = PyThreadState_Get();
+
+        /* Configure sub-interpreter with its own GIL */
+        PyInterpreterConfig config = {
+            .use_main_obmalloc = 0,
+            .allow_fork = 0,
+            .allow_exec = 0,
+            .allow_threads = 1,
+            .allow_daemon_threads = 0,
+            .check_multi_interp_extensions = 1,
+            .gil = PyInterpreterConfig_OWN_GIL,  /* This is the key - own GIL! */
+        };
+
+        PyThreadState *tstate = NULL;
+        PyStatus status = Py_NewInterpreterFromConfig(&tstate, &config);
+
+        if (PyStatus_Exception(status) || tstate == NULL) {
+            PyGILState_Release(gstate);
+            enif_release_resource(ctx);
+            return make_error(env, "subinterp_create_failed");
+        }
+
+        ctx->interp = PyThreadState_GetInterpreter(tstate);
+        ctx->tstate = tstate;
+
+        /* Create global/local namespaces in the new interpreter */
+        ctx->globals = PyDict_New();
+        ctx->locals = PyDict_New();
+        ctx->module_cache = PyDict_New();
+
+        /* Import __builtins__ */
+        PyObject *builtins = PyEval_GetBuiltins();
+        PyDict_SetItemString(ctx->globals, "__builtins__", builtins);
+
+        /* Create erlang module in this subinterpreter */
+        if (create_erlang_module() >= 0) {
+            /* Import erlang module into globals */
+            PyObject *erlang_module = PyImport_ImportModule("erlang");
+            if (erlang_module != NULL) {
+                PyDict_SetItemString(ctx->globals, "erlang", erlang_module);
+                Py_DECREF(erlang_module);
+            }
+        }
+
+        /* Initialize event loop for this subinterpreter */
+        if (init_subinterpreter_event_loop(env) < 0) {
+            Py_EndInterpreter(tstate);
+            PyThreadState_Swap(main_tstate);
+            PyGILState_Release(gstate);
+            enif_release_resource(ctx);
+            return make_error(env, "event_loop_init_failed");
+        }
+
+        /* Switch back to main interpreter */
+        PyThreadState_Swap(NULL);
+        PyThreadState_Swap(main_tstate);
+
+        PyGILState_Release(gstate);
+    } else
+#else
+    /* Pre-3.12 Python - ignore subinterp mode request */
+    (void)use_subinterp;
+#endif
+    {
+        /* Worker mode - create a thread state in main interpreter */
+        PyGILState_STATE gstate = PyGILState_Ensure();
+
+#ifndef HAVE_SUBINTERPRETERS
+        PyInterpreterState *interp = PyInterpreterState_Get();
+        ctx->thread_state = PyThreadState_New(interp);
+#endif
+
+        ctx->globals = PyDict_New();
+        ctx->locals = PyDict_New();
+        ctx->module_cache = PyDict_New();
+
+        /* Import __builtins__ into globals */
+        PyObject *builtins = PyEval_GetBuiltins();
+        PyDict_SetItemString(ctx->globals, "__builtins__", builtins);
+
+        /* Import erlang module into globals for worker mode */
+        PyObject *erlang_module = PyImport_ImportModule("erlang");
+        if (erlang_module != NULL) {
+            PyDict_SetItemString(ctx->globals, "erlang", erlang_module);
+            Py_DECREF(erlang_module);
+        }
+
+        PyGILState_Release(gstate);
+    }
+
+    ERL_NIF_TERM ref = enif_make_resource(env, ctx);
+    enif_release_resource(ctx);
+
+    return enif_make_tuple3(env, ATOM_OK, ref, enif_make_uint(env, ctx->interp_id));
+}
+
+/**
+ * @brief Destroy a Python context
+ *
+ * nif_context_destroy(ContextRef) -> ok
+ *
+ * This function does the actual cleanup because it's called from the
+ * owning process's thread, which is the same thread that created the
+ * context. This is important for OWN_GIL subinterpreters where the
+ * thread state is tied to the creating thread.
+ */
+static ERL_NIF_TERM nif_context_destroy(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    py_context_t *ctx;
+
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
+    }
+
+    /* Skip if already destroyed */
+    if (ctx->destroyed) {
+        return ATOM_OK;
+    }
+
+    if (!g_python_initialized) {
+        ctx->destroyed = true;
+        return ATOM_OK;
+    }
+
+#ifdef HAVE_SUBINTERPRETERS
+    if (ctx->is_subinterp && ctx->tstate != NULL) {
+        /* For subinterpreters with OWN_GIL, we're on the same thread
+         * that created the context, so we can use the original tstate.
+         *
+         * 1. Switch to the subinterpreter's thread state
+         * 2. Clean up objects
+         * 3. End the interpreter
+         * 4. Restore thread state (NULL is fine)
+         */
+        PyThreadState *old_tstate = PyThreadState_Swap(ctx->tstate);
+
+        /* Clean up Python objects while holding the subinterpreter's GIL */
+        Py_XDECREF(ctx->module_cache);
+        ctx->module_cache = NULL;
+        Py_XDECREF(ctx->globals);
+        ctx->globals = NULL;
+        Py_XDECREF(ctx->locals);
+        ctx->locals = NULL;
+
+        /* End the interpreter - this releases its GIL */
+        Py_EndInterpreter(ctx->tstate);
+        ctx->tstate = NULL;
+        ctx->interp = NULL;
+
+        /* Restore previous thread state if any */
+        if (old_tstate != NULL && old_tstate != ctx->tstate) {
+            PyThreadState_Swap(old_tstate);
+        }
+    } else
+#endif
+    {
+        /* Worker mode - clean up with main GIL */
+        PyGILState_STATE gstate = PyGILState_Ensure();
+        Py_XDECREF(ctx->module_cache);
+        ctx->module_cache = NULL;
+        Py_XDECREF(ctx->globals);
+        ctx->globals = NULL;
+        Py_XDECREF(ctx->locals);
+        ctx->locals = NULL;
+#ifndef HAVE_SUBINTERPRETERS
+        if (ctx->thread_state != NULL) {
+            PyThreadState_Clear(ctx->thread_state);
+            PyThreadState_Delete(ctx->thread_state);
+            ctx->thread_state = NULL;
+        }
+#endif
+        PyGILState_Release(gstate);
+    }
+
+    ctx->destroyed = true;
+    return ATOM_OK;
+}
+
+/**
+ * @brief Get module from cache or import it
+ *
+ * Helper function - no mutex needed since context is process-owned.
+ */
+static PyObject *context_get_module(py_context_t *ctx, const char *module_name) {
+    /* Check cache first */
+    if (ctx->module_cache != NULL) {
+        PyObject *cached = PyDict_GetItemString(ctx->module_cache, module_name);
+        if (cached != NULL) {
+            return cached;  /* Borrowed reference */
+        }
+    }
+
+    /* Import module */
+    PyObject *module = PyImport_ImportModule(module_name);
+    if (module == NULL) {
+        return NULL;
+    }
+
+    /* Cache it */
+    if (ctx->module_cache != NULL) {
+        PyDict_SetItemString(ctx->module_cache, module_name, module);
+        Py_DECREF(module);  /* Dict now owns the reference */
+        return PyDict_GetItemString(ctx->module_cache, module_name);
+    }
+
+    return module;  /* Caller must DECREF if not cached */
+}
+
+/**
+ * @brief Call a Python function in a context
+ *
+ * nif_context_call(ContextRef, Module, Func, Args, Kwargs) -> {ok, Result} | {error, Reason} | {suspended, ...}
+ *
+ * NO MUTEX - caller must ensure exclusive access (process ownership)
+ *
+ * When Python code calls erlang.call(), this NIF may return {suspended, CallbackId, StateRef, {FuncName, Args}}
+ * indicating that the context process should handle the callback and then call context_resume to continue.
+ */
+static ERL_NIF_TERM nif_context_call(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    py_context_t *ctx;
+    ErlNifBinary module_bin, func_bin;
+
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
+    }
+    if (!enif_inspect_binary(env, argv[1], &module_bin)) {
+        return make_error(env, "invalid_module");
+    }
+    if (!enif_inspect_binary(env, argv[2], &func_bin)) {
+        return make_error(env, "invalid_func");
+    }
+
+    char *module_name = binary_to_string(&module_bin);
+    char *func_name = binary_to_string(&func_bin);
+    if (module_name == NULL || func_name == NULL) {
+        enif_free(module_name);
+        enif_free(func_name);
+        return make_error(env, "alloc_failed");
+    }
+
+    ERL_NIF_TERM result;
+
+#ifdef HAVE_SUBINTERPRETERS
+    PyThreadState *saved_tstate = NULL;
+    if (ctx->is_subinterp) {
+        /* Enter the sub-interpreter - NO MUTEX LOCK */
+        saved_tstate = PyThreadState_Swap(NULL);
+        PyThreadState_Swap(ctx->tstate);
+    } else {
+        PyGILState_Ensure();
+    }
+#else
+    PyGILState_STATE gstate = PyGILState_Ensure();
+#endif
+
+    /* Set thread-local context for callback support */
+    py_context_t *prev_context = tl_current_context;
+    tl_current_context = ctx;
+
+    /* Enable suspension for callback support */
+    bool prev_allow_suspension = tl_allow_suspension;
+    tl_allow_suspension = true;
+
+    /* Get or import module */
+    PyObject *module = context_get_module(ctx, module_name);
+    if (module == NULL) {
+        result = make_py_error(env);
+        goto cleanup;
+    }
+
+    /* Get function */
+    PyObject *func = PyObject_GetAttrString(module, func_name);
+    if (func == NULL) {
+        result = make_py_error(env);
+        goto cleanup;
+    }
+
+    /* Convert args */
+    unsigned int args_len;
+    if (!enif_get_list_length(env, argv[3], &args_len)) {
+        Py_DECREF(func);
+        result = make_error(env, "invalid_args");
+        goto cleanup;
+    }
+
+    PyObject *args = PyTuple_New(args_len);
+    ERL_NIF_TERM head, tail = argv[3];
+    for (unsigned int i = 0; i < args_len; i++) {
+        enif_get_list_cell(env, tail, &head, &tail);
+        PyObject *arg = term_to_py(env, head);
+        if (arg == NULL) {
+            Py_DECREF(args);
+            Py_DECREF(func);
+            result = make_error(env, "arg_conversion_failed");
+            goto cleanup;
+        }
+        PyTuple_SET_ITEM(args, i, arg);
+    }
+
+    /* Convert kwargs */
+    PyObject *kwargs = NULL;
+    if (argc > 4 && enif_is_map(env, argv[4])) {
+        kwargs = term_to_py(env, argv[4]);
+    }
+
+    /* Call the function */
+    PyObject *py_result = PyObject_Call(func, args, kwargs);
+    Py_DECREF(func);
+    Py_DECREF(args);
+    Py_XDECREF(kwargs);
+
+    if (py_result == NULL) {
+        /* Check for pending callback (flag-based detection) */
+        if (tl_pending_callback) {
+            PyErr_Clear();  /* Clear whatever exception is set */
+
+            /* Create suspended context state */
+            suspended_context_state_t *suspended = create_suspended_context_state_for_call(
+                env, ctx, &module_bin, &func_bin, argv[3],
+                argc > 4 ? argv[4] : enif_make_new_map(env));
+
+            if (suspended == NULL) {
+                tl_pending_callback = false;
+                result = make_error(env, "create_suspended_state_failed");
+            } else {
+                result = build_suspended_context_result(env, suspended);
+            }
+        } else {
+            result = make_py_error(env);
+        }
+    } else {
+        ERL_NIF_TERM term_result = py_to_term(env, py_result);
+        Py_DECREF(py_result);
+        result = enif_make_tuple2(env, ATOM_OK, term_result);
+    }
+
+cleanup:
+    /* Restore thread-local state */
+    tl_allow_suspension = prev_allow_suspension;
+    tl_current_context = prev_context;
+
+    enif_free(module_name);
+    enif_free(func_name);
+
+#ifdef HAVE_SUBINTERPRETERS
+    if (ctx->is_subinterp) {
+        /* Exit the sub-interpreter - NO MUTEX UNLOCK */
+        PyThreadState_Swap(NULL);
+        if (saved_tstate != NULL) {
+            PyThreadState_Swap(saved_tstate);
+        }
+    } else {
+        PyGILState_Release(PyGILState_UNLOCKED);
+    }
+#else
+    PyGILState_Release(gstate);
+#endif
+
+    return result;
+}
+
+/**
+ * @brief Evaluate a Python expression in a context
+ *
+ * nif_context_eval(ContextRef, Code, Locals) -> {ok, Result} | {error, Reason} | {suspended, ...}
+ *
+ * NO MUTEX - caller must ensure exclusive access (process ownership)
+ *
+ * When Python code calls erlang.call(), this NIF may return {suspended, CallbackId, StateRef, {FuncName, Args}}
+ * indicating that the context process should handle the callback and then call context_resume to continue.
+ */
+static ERL_NIF_TERM nif_context_eval(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    py_context_t *ctx;
+    ErlNifBinary code_bin;
+
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
+    }
+    if (!enif_inspect_binary(env, argv[1], &code_bin)) {
+        return make_error(env, "invalid_code");
+    }
+
+    char *code = binary_to_string(&code_bin);
+    if (code == NULL) {
+        return make_error(env, "alloc_failed");
+    }
+
+    ERL_NIF_TERM result;
+
+#ifdef HAVE_SUBINTERPRETERS
+    PyThreadState *saved_tstate = NULL;
+    if (ctx->is_subinterp) {
+        saved_tstate = PyThreadState_Swap(NULL);
+        PyThreadState_Swap(ctx->tstate);
+    } else {
+        PyGILState_Ensure();
+    }
+#else
+    PyGILState_STATE gstate = PyGILState_Ensure();
+#endif
+
+    /* Set thread-local context for callback support */
+    py_context_t *prev_context = tl_current_context;
+    tl_current_context = ctx;
+
+    /* Enable suspension for callback support */
+    bool prev_allow_suspension = tl_allow_suspension;
+    tl_allow_suspension = true;
+
+    /* Update locals if provided */
+    ERL_NIF_TERM locals_term = argc > 2 ? argv[2] : enif_make_new_map(env);
+    if (argc > 2 && enif_is_map(env, argv[2])) {
+        PyObject *new_locals = term_to_py(env, argv[2]);
+        if (new_locals != NULL && PyDict_Check(new_locals)) {
+            PyDict_Update(ctx->locals, new_locals);
+            Py_DECREF(new_locals);
+        }
+    }
+
+    /* Compile and evaluate */
+    PyObject *py_result = PyRun_String(code, Py_eval_input, ctx->globals, ctx->locals);
+
+    if (py_result == NULL) {
+        /* Check for pending callback (flag-based detection) */
+        if (tl_pending_callback) {
+            PyErr_Clear();  /* Clear whatever exception is set */
+
+            /* Create suspended context state */
+            suspended_context_state_t *suspended = create_suspended_context_state_for_eval(
+                env, ctx, &code_bin, locals_term);
+
+            if (suspended == NULL) {
+                tl_pending_callback = false;
+                result = make_error(env, "create_suspended_state_failed");
+            } else {
+                result = build_suspended_context_result(env, suspended);
+            }
+        } else {
+            result = make_py_error(env);
+        }
+    } else {
+        ERL_NIF_TERM term_result = py_to_term(env, py_result);
+        Py_DECREF(py_result);
+        result = enif_make_tuple2(env, ATOM_OK, term_result);
+    }
+
+    /* Restore thread-local state */
+    tl_allow_suspension = prev_allow_suspension;
+    tl_current_context = prev_context;
+
+    enif_free(code);
+
+#ifdef HAVE_SUBINTERPRETERS
+    if (ctx->is_subinterp) {
+        PyThreadState_Swap(NULL);
+        if (saved_tstate != NULL) {
+            PyThreadState_Swap(saved_tstate);
+        }
+    } else {
+        PyGILState_Release(PyGILState_UNLOCKED);
+    }
+#else
+    PyGILState_Release(gstate);
+#endif
+
+    return result;
+}
+
+/**
+ * @brief Execute Python statements in a context
+ *
+ * nif_context_exec(ContextRef, Code) -> ok | {error, Reason}
+ *
+ * NO MUTEX - caller must ensure exclusive access (process ownership)
+ */
+static ERL_NIF_TERM nif_context_exec(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    py_context_t *ctx;
+    ErlNifBinary code_bin;
+
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
+    }
+    if (!enif_inspect_binary(env, argv[1], &code_bin)) {
+        return make_error(env, "invalid_code");
+    }
+
+    char *code = binary_to_string(&code_bin);
+    if (code == NULL) {
+        return make_error(env, "alloc_failed");
+    }
+
+    ERL_NIF_TERM result;
 
-    if (!is_coro) {
-        Py_DECREF(asyncio);
-        /* Not a coroutine - return result directly */
-        ERL_NIF_TERM term_result = py_to_term(env, coro);
-        Py_DECREF(coro);
-        result = enif_make_tuple2(env, ATOM_OK,
-            enif_make_tuple2(env, enif_make_atom(env, "immediate"), term_result));
-        goto cleanup;
+#ifdef HAVE_SUBINTERPRETERS
+    PyThreadState *saved_tstate = NULL;
+    if (ctx->is_subinterp) {
+        saved_tstate = PyThreadState_Swap(NULL);
+        PyThreadState_Swap(ctx->tstate);
+    } else {
+        PyGILState_Ensure();
     }
+#else
+    PyGILState_STATE gstate = PyGILState_Ensure();
+#endif
 
-    /* Submit coroutine to event loop using run_coroutine_threadsafe */
-    PyObject *future = PyObject_CallMethod(asyncio, "run_coroutine_threadsafe",
-        "OO", coro, worker->event_loop);
-    Py_DECREF(coro);
-    Py_DECREF(asyncio);
+    /* Set thread-local context for callback support */
+    py_context_t *prev_context = tl_current_context;
+    tl_current_context = ctx;
 
-    if (future == NULL) {
+    /* Execute statements.
+     * Use globals for both globals and locals to simulate module-level execution.
+     * This ensures imports are accessible from function definitions. */
+    PyObject *py_result = PyRun_String(code, Py_file_input, ctx->globals, ctx->globals);
+
+    if (py_result == NULL) {
         result = make_py_error(env);
-        goto cleanup;
+    } else {
+        Py_DECREF(py_result);
+        result = ATOM_OK;
     }
 
-    /* Create pending entry */
-    uint64_t async_id = __sync_fetch_and_add(&g_async_id_counter, 1);
+    /* Restore previous context */
+    tl_current_context = prev_context;
 
-    async_pending_t *pending = enif_alloc(sizeof(async_pending_t));
-    if (pending == NULL) {
-        Py_DECREF(future);
-        result = make_error(env, "alloc_failed");
-        goto cleanup;
+    enif_free(code);
+
+#ifdef HAVE_SUBINTERPRETERS
+    if (ctx->is_subinterp) {
+        PyThreadState_Swap(NULL);
+        if (saved_tstate != NULL) {
+            PyThreadState_Swap(saved_tstate);
+        }
+    } else {
+        PyGILState_Release(PyGILState_UNLOCKED);
+    }
+#else
+    PyGILState_Release(gstate);
+#endif
+
+    return result;
+}
+
+/**
+ * @brief Call a method on a Python object in a context
+ *
+ * nif_context_call_method(ContextRef, ObjRef, Method, Args) -> {ok, Result} | {error, Reason}
+ *
+ * NO MUTEX - caller must ensure exclusive access (process ownership)
+ */
+static ERL_NIF_TERM nif_context_call_method(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    py_context_t *ctx;
+    py_object_t *obj_wrapper;
+    ErlNifBinary method_bin;
+
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
+    }
+    if (!enif_get_resource(env, argv[1], PYOBJ_RESOURCE_TYPE, (void **)&obj_wrapper)) {
+        return make_error(env, "invalid_object");
+    }
+    if (!enif_inspect_binary(env, argv[2], &method_bin)) {
+        return make_error(env, "invalid_method");
+    }
+
+    char *method_name = binary_to_string(&method_bin);
+    if (method_name == NULL) {
+        return make_error(env, "alloc_failed");
     }
-    pending->id = async_id;
-    pending->future = future;
-    pending->caller = caller;
-    pending->next = NULL;
 
-    /* Add to pending list */
-    pthread_mutex_lock(&worker->queue_mutex);
-    if (worker->pending_tail == NULL) {
-        worker->pending_head = pending;
-        worker->pending_tail = pending;
+    ERL_NIF_TERM result;
+
+#ifdef HAVE_SUBINTERPRETERS
+    PyThreadState *saved_tstate = NULL;
+    if (ctx->is_subinterp) {
+        saved_tstate = PyThreadState_Swap(NULL);
+        PyThreadState_Swap(ctx->tstate);
     } else {
-        worker->pending_tail->next = pending;
-        worker->pending_tail = pending;
+        PyGILState_Ensure();
+    }
+#else
+    PyGILState_STATE gstate = PyGILState_Ensure();
+#endif
+
+    /* Get method */
+    PyObject *method = PyObject_GetAttrString(obj_wrapper->obj, method_name);
+    if (method == NULL) {
+        result = make_py_error(env);
+        goto cleanup;
+    }
+
+    /* Convert args */
+    unsigned int args_len;
+    if (!enif_get_list_length(env, argv[3], &args_len)) {
+        Py_DECREF(method);
+        result = make_error(env, "invalid_args");
+        goto cleanup;
+    }
+
+    PyObject *args = PyTuple_New(args_len);
+    ERL_NIF_TERM head, tail = argv[3];
+    for (unsigned int i = 0; i < args_len; i++) {
+        enif_get_list_cell(env, tail, &head, &tail);
+        PyObject *arg = term_to_py(env, head);
+        if (arg == NULL) {
+            Py_DECREF(args);
+            Py_DECREF(method);
+            result = make_error(env, "arg_conversion_failed");
+            goto cleanup;
+        }
+        PyTuple_SET_ITEM(args, i, arg);
     }
-    pthread_mutex_unlock(&worker->queue_mutex);
 
-    result = enif_make_tuple2(env, ATOM_OK, enif_make_uint64(env, async_id));
+    /* Call method */
+    PyObject *py_result = PyObject_Call(method, args, NULL);
+    Py_DECREF(method);
+    Py_DECREF(args);
+
+    if (py_result == NULL) {
+        result = make_py_error(env);
+    } else {
+        ERL_NIF_TERM term_result = py_to_term(env, py_result);
+        Py_DECREF(py_result);
+        result = enif_make_tuple2(env, ATOM_OK, term_result);
+    }
 
 cleanup:
-    enif_free(module_name);
-    enif_free(func_name);
+    enif_free(method_name);
+
+#ifdef HAVE_SUBINTERPRETERS
+    if (ctx->is_subinterp) {
+        PyThreadState_Swap(NULL);
+        if (saved_tstate != NULL) {
+            PyThreadState_Swap(saved_tstate);
+        }
+    } else {
+        PyGILState_Release(PyGILState_UNLOCKED);
+    }
+#else
     PyGILState_Release(gstate);
+#endif
 
     return result;
 }
 
-static ERL_NIF_TERM nif_async_gather(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+/**
+ * @brief Convert a Python object reference to an Erlang term
+ *
+ * nif_context_to_term(ObjRef) -> {ok, Term} | {error, Reason}
+ */
+static ERL_NIF_TERM nif_context_to_term(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
     (void)argc;
-    py_async_worker_t *worker;
-    ErlNifPid caller;
+    py_object_t *obj_wrapper;
 
-    if (!enif_get_resource(env, argv[0], ASYNC_WORKER_RESOURCE_TYPE, (void **)&worker)) {
-        return make_error(env, "invalid_worker");
+    if (!enif_get_resource(env, argv[0], PYOBJ_RESOURCE_TYPE, (void **)&obj_wrapper)) {
+        return make_error(env, "invalid_object");
+    }
+
+    PyGILState_STATE gstate = PyGILState_Ensure();
+    ERL_NIF_TERM term_result = py_to_term(env, obj_wrapper->obj);
+    PyGILState_Release(gstate);
+
+    return enif_make_tuple2(env, ATOM_OK, term_result);
+}
+
+/**
+ * @brief Get the interpreter ID from a context reference
+ *
+ * nif_context_interp_id(ContextRef) -> InterpId
+ */
+static ERL_NIF_TERM nif_context_interp_id(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    py_context_t *ctx;
+
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
     }
-    if (!worker->loop_running) {
-        return make_error(env, "event_loop_not_running");
+
+    return enif_make_uint(env, ctx->interp_id);
+}
+
+/**
+ * @brief Set the callback handler for a context
+ *
+ * nif_context_set_callback_handler(ContextRef, Pid) -> ok | {error, Reason}
+ *
+ * This must be called before the context can handle erlang.call() callbacks.
+ */
+static ERL_NIF_TERM nif_context_set_callback_handler(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    py_context_t *ctx;
+    ErlNifPid pid;
+
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
     }
-    if (!enif_get_local_pid(env, argv[2], &caller)) {
-        return make_error(env, "invalid_caller");
+    if (!enif_get_local_pid(env, argv[1], &pid)) {
+        return make_error(env, "invalid_pid");
     }
 
-    unsigned int calls_len;
-    if (!enif_get_list_length(env, argv[1], &calls_len)) {
-        return make_error(env, "invalid_calls_list");
+    ctx->callback_handler = pid;
+    ctx->has_callback_handler = true;
+
+    return ATOM_OK;
+}
+
+/**
+ * @brief Get the callback pipe write FD for a context
+ *
+ * nif_context_get_callback_pipe(ContextRef) -> {ok, WriteFd} | {error, Reason}
+ *
+ * Returns the write end of the callback pipe for sending responses.
+ */
+static ERL_NIF_TERM nif_context_get_callback_pipe(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    py_context_t *ctx;
+
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
     }
 
-    if (calls_len == 0) {
-        return enif_make_tuple2(env, ATOM_OK,
-            enif_make_tuple2(env, enif_make_atom(env, "immediate"), enif_make_list(env, 0)));
+    if (ctx->callback_pipe[1] < 0) {
+        return make_error(env, "pipe_not_initialized");
     }
 
-    PyGILState_STATE gstate = PyGILState_Ensure();
+    return enif_make_tuple2(env, ATOM_OK, enif_make_int(env, ctx->callback_pipe[1]));
+}
 
-    /* Import asyncio */
-    PyObject *asyncio = PyImport_ImportModule("asyncio");
-    if (asyncio == NULL) {
-        PyGILState_Release(gstate);
-        return make_error(env, "asyncio_import_failed");
+/**
+ * @brief Write a callback response to the context's pipe
+ *
+ * nif_context_write_callback_response(ContextRef, Data) -> ok | {error, Reason}
+ *
+ * Writes a length-prefixed binary response to the callback pipe.
+ */
+static ERL_NIF_TERM nif_context_write_callback_response(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    py_context_t *ctx;
+    ErlNifBinary data;
+
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
+    }
+    if (!enif_inspect_binary(env, argv[1], &data)) {
+        return make_error(env, "invalid_data");
     }
 
-    /* Build list of coroutines */
-    PyObject *coros = PyList_New(calls_len);
-    ERL_NIF_TERM head, tail = argv[1];
+    if (ctx->callback_pipe[1] < 0) {
+        return make_error(env, "pipe_not_initialized");
+    }
 
-    for (unsigned int i = 0; i < calls_len; i++) {
-        enif_get_list_cell(env, tail, &head, &tail);
+    /* Write length prefix (4 bytes, native endianness - must match read_length_prefixed_data) */
+    uint32_t len = (uint32_t)data.size;
+    ssize_t written = write(ctx->callback_pipe[1], &len, sizeof(len));
+    if (written != sizeof(len)) {
+        return make_error(env, "write_failed");
+    }
 
-        int arity;
-        const ERL_NIF_TERM *tuple;
-        if (!enif_get_tuple(env, head, &arity, &tuple) || arity < 3) {
-            Py_DECREF(coros);
-            Py_DECREF(asyncio);
-            PyGILState_Release(gstate);
-            return make_error(env, "invalid_call_tuple");
-        }
+    written = write(ctx->callback_pipe[1], data.data, data.size);
+    if (written != (ssize_t)data.size) {
+        return make_error(env, "write_failed");
+    }
 
-        ErlNifBinary module_bin, func_bin;
-        if (!enif_inspect_binary(env, tuple[0], &module_bin) ||
-            !enif_inspect_binary(env, tuple[1], &func_bin)) {
-            Py_DECREF(coros);
-            Py_DECREF(asyncio);
-            PyGILState_Release(gstate);
-            return make_error(env, "invalid_module_or_func");
-        }
+    return ATOM_OK;
+}
 
-        char module_name[256], func_name[256];
-        if (module_bin.size >= 256 || func_bin.size >= 256) {
-            Py_DECREF(coros);
-            Py_DECREF(asyncio);
-            PyGILState_Release(gstate);
-            return make_error(env, "name_too_long");
+/**
+ * @brief Resume a suspended context with callback result
+ *
+ * nif_context_resume(ContextRef, StateRef, ResultBinary) -> {ok, Result} | {error, Reason} | {suspended, ...}
+ *
+ * This NIF resumes Python execution after a callback has been handled.
+ * The ResultBinary contains the callback result that will be returned to Python.
+ *
+ * If Python code makes another erlang.call() during resume, this NIF may
+ * return {suspended, ...} again for nested callback handling.
+ */
+static ERL_NIF_TERM nif_context_resume(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    py_context_t *ctx;
+    suspended_context_state_t *state;
+    ErlNifBinary result_bin;
+
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
+    }
+    if (!enif_get_resource(env, argv[1], PY_CONTEXT_SUSPENDED_RESOURCE_TYPE, (void **)&state)) {
+        return make_error(env, "invalid_state_ref");
+    }
+    if (!enif_inspect_binary(env, argv[2], &result_bin)) {
+        return make_error(env, "invalid_result");
+    }
+
+    /* Verify state belongs to this context */
+    if (state->ctx != ctx) {
+        return make_error(env, "context_mismatch");
+    }
+
+    /* Store the callback result */
+    state->result_data = enif_alloc(result_bin.size);
+    if (state->result_data == NULL) {
+        return make_error(env, "alloc_failed");
+    }
+    memcpy(state->result_data, result_bin.data, result_bin.size);
+    state->result_len = result_bin.size;
+    state->has_result = true;
+
+    ERL_NIF_TERM result;
+
+#ifdef HAVE_SUBINTERPRETERS
+    PyThreadState *saved_tstate = NULL;
+    if (ctx->is_subinterp) {
+        saved_tstate = PyThreadState_Swap(NULL);
+        PyThreadState_Swap(ctx->tstate);
+    } else {
+        PyGILState_Ensure();
+    }
+#else
+    PyGILState_STATE gstate = PyGILState_Ensure();
+#endif
+
+    /* Set thread-local state for replay */
+    py_context_t *prev_context = tl_current_context;
+    tl_current_context = ctx;
+
+    bool prev_allow_suspension = tl_allow_suspension;
+    tl_allow_suspension = true;
+
+    suspended_context_state_t *prev_suspended = tl_current_context_suspended;
+    tl_current_context_suspended = state;
+
+    /* Reset callback result index for this replay */
+    state->callback_result_index = 0;
+
+    if (state->request_type == PY_REQ_CALL) {
+        /* Replay a py:call */
+        char *module_name = enif_alloc(state->orig_module.size + 1);
+        char *func_name = enif_alloc(state->orig_func.size + 1);
+
+        if (module_name == NULL || func_name == NULL) {
+            enif_free(module_name);
+            enif_free(func_name);
+            result = make_error(env, "alloc_failed");
+            goto cleanup;
         }
-        memcpy(module_name, module_bin.data, module_bin.size);
-        module_name[module_bin.size] = '\0';
-        memcpy(func_name, func_bin.data, func_bin.size);
-        func_name[func_bin.size] = '\0';
 
-        /* Import module and get function */
-        PyObject *module = PyImport_ImportModule(module_name);
+        memcpy(module_name, state->orig_module.data, state->orig_module.size);
+        module_name[state->orig_module.size] = '\0';
+        memcpy(func_name, state->orig_func.data, state->orig_func.size);
+        func_name[state->orig_func.size] = '\0';
+
+        /* Get the function */
+        PyObject *func = NULL;
+        PyObject *module = context_get_module(ctx, module_name);
         if (module == NULL) {
-            Py_DECREF(coros);
-            Py_DECREF(asyncio);
-            ERL_NIF_TERM err = make_py_error(env);
-            PyGILState_Release(gstate);
-            return err;
+            enif_free(module_name);
+            enif_free(func_name);
+            result = make_py_error(env);
+            goto cleanup;
         }
 
-        PyObject *func = PyObject_GetAttrString(module, func_name);
-        Py_DECREF(module);
+        func = PyObject_GetAttrString(module, func_name);
         if (func == NULL) {
-            Py_DECREF(coros);
-            Py_DECREF(asyncio);
-            ERL_NIF_TERM err = make_py_error(env);
-            PyGILState_Release(gstate);
-            return err;
+            enif_free(module_name);
+            enif_free(func_name);
+            result = make_py_error(env);
+            goto cleanup;
         }
 
         /* Convert args */
         unsigned int args_len;
-        if (!enif_get_list_length(env, tuple[2], &args_len)) {
+        if (!enif_get_list_length(state->orig_env, state->orig_args, &args_len)) {
             Py_DECREF(func);
-            Py_DECREF(coros);
-            Py_DECREF(asyncio);
-            PyGILState_Release(gstate);
-            return make_error(env, "invalid_args");
+            enif_free(module_name);
+            enif_free(func_name);
+            result = make_error(env, "invalid_args");
+            goto cleanup;
         }
 
         PyObject *args = PyTuple_New(args_len);
-        ERL_NIF_TERM arg_head, arg_tail = tuple[2];
-        for (unsigned int j = 0; j < args_len; j++) {
-            enif_get_list_cell(env, arg_tail, &arg_head, &arg_tail);
-            PyObject *arg = term_to_py(env, arg_head);
+        ERL_NIF_TERM head, tail = state->orig_args;
+        for (unsigned int i = 0; i < args_len; i++) {
+            enif_get_list_cell(state->orig_env, tail, &head, &tail);
+            PyObject *arg = term_to_py(state->orig_env, head);
             if (arg == NULL) {
                 Py_DECREF(args);
                 Py_DECREF(func);
-                Py_DECREF(coros);
-                Py_DECREF(asyncio);
-                PyGILState_Release(gstate);
-                return make_error(env, "arg_conversion_failed");
+                enif_free(module_name);
+                enif_free(func_name);
+                result = make_error(env, "arg_conversion_failed");
+                goto cleanup;
             }
-            PyTuple_SET_ITEM(args, j, arg);
+            PyTuple_SET_ITEM(args, i, arg);
         }
 
-        /* Call function to get coroutine */
-        PyObject *coro = PyObject_Call(func, args, NULL);
+        /* Convert kwargs */
+        PyObject *kwargs = NULL;
+        if (enif_is_map(state->orig_env, state->orig_kwargs)) {
+            kwargs = term_to_py(state->orig_env, state->orig_kwargs);
+        }
+
+        /* Call the function (replay with cached result) */
+        PyObject *py_result = PyObject_Call(func, args, kwargs);
         Py_DECREF(func);
         Py_DECREF(args);
+        Py_XDECREF(kwargs);
+        enif_free(module_name);
+        enif_free(func_name);
 
-        if (coro == NULL) {
-            Py_DECREF(coros);
-            Py_DECREF(asyncio);
-            ERL_NIF_TERM err = make_py_error(env);
-            PyGILState_Release(gstate);
-            return err;
+        if (py_result == NULL) {
+            /* Check for pending callback (nested callback during replay) */
+            if (tl_pending_callback) {
+                PyErr_Clear();
+
+                /* Create new suspended context state for nested callback */
+                suspended_context_state_t *nested = create_suspended_context_state_for_call(
+                    env, ctx, &state->orig_module, &state->orig_func,
+                    state->orig_args, state->orig_kwargs);
+
+                if (nested == NULL) {
+                    tl_pending_callback = false;
+                    result = make_error(env, "create_nested_suspended_state_failed");
+                } else {
+                    /* Copy accumulated callback results from parent to nested state */
+                    if (copy_callback_results_to_nested(nested, state) != 0) {
+                        enif_release_resource(nested);
+                        tl_pending_callback = false;
+                        result = make_error(env, "copy_callback_results_failed");
+                    } else {
+                        result = build_suspended_context_result(env, nested);
+                    }
+                }
+            } else {
+                result = make_py_error(env);
+            }
+        } else {
+            ERL_NIF_TERM term_result = py_to_term(env, py_result);
+            Py_DECREF(py_result);
+            result = enif_make_tuple2(env, ATOM_OK, term_result);
         }
 
-        PyList_SET_ITEM(coros, i, coro);
-    }
+    } else if (state->request_type == PY_REQ_EVAL) {
+        /* Replay a py:eval */
+        char *code = enif_alloc(state->orig_code.size + 1);
+        if (code == NULL) {
+            result = make_error(env, "alloc_failed");
+            goto cleanup;
+        }
+        memcpy(code, state->orig_code.data, state->orig_code.size);
+        code[state->orig_code.size] = '\0';
+
+        /* Update locals if provided */
+        if (enif_is_map(state->orig_env, state->orig_locals)) {
+            PyObject *new_locals = term_to_py(state->orig_env, state->orig_locals);
+            if (new_locals != NULL && PyDict_Check(new_locals)) {
+                PyDict_Update(ctx->locals, new_locals);
+                Py_DECREF(new_locals);
+            }
+        }
 
-    /* Create asyncio.gather(*coros) */
-    PyObject *gather_args = PyTuple_New(calls_len);
-    for (unsigned int i = 0; i < calls_len; i++) {
-        PyObject *coro = PyList_GetItem(coros, i);
-        Py_INCREF(coro);
-        PyTuple_SET_ITEM(gather_args, i, coro);
+        /* Compile and evaluate (replay with cached result) */
+        PyObject *py_result = PyRun_String(code, Py_eval_input, ctx->globals, ctx->locals);
+        enif_free(code);
+
+        if (py_result == NULL) {
+            /* Check for pending callback (nested callback during replay) */
+            if (tl_pending_callback) {
+                PyErr_Clear();
+
+                /* Create new suspended context state for nested callback */
+                suspended_context_state_t *nested = create_suspended_context_state_for_eval(
+                    env, ctx, &state->orig_code, state->orig_locals);
+
+                if (nested == NULL) {
+                    tl_pending_callback = false;
+                    result = make_error(env, "create_nested_suspended_state_failed");
+                } else {
+                    /* Copy accumulated callback results from parent to nested state */
+                    if (copy_callback_results_to_nested(nested, state) != 0) {
+                        enif_release_resource(nested);
+                        tl_pending_callback = false;
+                        result = make_error(env, "copy_callback_results_failed");
+                    } else {
+                        result = build_suspended_context_result(env, nested);
+                    }
+                }
+            } else {
+                result = make_py_error(env);
+            }
+        } else {
+            ERL_NIF_TERM term_result = py_to_term(env, py_result);
+            Py_DECREF(py_result);
+            result = enif_make_tuple2(env, ATOM_OK, term_result);
+        }
+
+    } else {
+        result = make_error(env, "unsupported_request_type");
     }
 
-    PyObject *gather_func = PyObject_GetAttrString(asyncio, "gather");
-    PyObject *gather_coro = PyObject_Call(gather_func, gather_args, NULL);
-    Py_DECREF(gather_func);
-    Py_DECREF(gather_args);
-    Py_DECREF(coros);
+cleanup:
+    /* Restore thread-local state */
+    tl_current_context_suspended = prev_suspended;
+    tl_allow_suspension = prev_allow_suspension;
+    tl_current_context = prev_context;
 
-    if (gather_coro == NULL) {
-        Py_DECREF(asyncio);
-        ERL_NIF_TERM err = make_py_error(env);
-        PyGILState_Release(gstate);
-        return err;
+#ifdef HAVE_SUBINTERPRETERS
+    if (ctx->is_subinterp) {
+        PyThreadState_Swap(NULL);
+        if (saved_tstate != NULL) {
+            PyThreadState_Swap(saved_tstate);
+        }
+    } else {
+        PyGILState_Release(PyGILState_UNLOCKED);
     }
+#else
+    PyGILState_Release(gstate);
+#endif
 
-    /* Submit to event loop */
-    PyObject *future = PyObject_CallMethod(asyncio, "run_coroutine_threadsafe",
-        "OO", gather_coro, worker->event_loop);
-    Py_DECREF(gather_coro);
-    Py_DECREF(asyncio);
+    return result;
+}
 
-    if (future == NULL) {
-        ERL_NIF_TERM err = make_py_error(env);
-        PyGILState_Release(gstate);
-        return err;
+/**
+ * @brief Cancel a suspended context resume (cleanup on error)
+ *
+ * nif_context_cancel_resume(ContextRef, StateRef) -> ok
+ *
+ * Called when callback execution fails and resume won't be called.
+ * Allows proper cleanup of the suspended state.
+ */
+static ERL_NIF_TERM nif_context_cancel_resume(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    py_context_t *ctx;
+    suspended_context_state_t *state;
+
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
+    }
+    if (!enif_get_resource(env, argv[1], PY_CONTEXT_SUSPENDED_RESOURCE_TYPE, (void **)&state)) {
+        return make_error(env, "invalid_state_ref");
+    }
+
+    /* Verify state belongs to this context */
+    if (state->ctx != ctx) {
+        return make_error(env, "context_mismatch");
     }
 
-    /* Create pending entry */
-    uint64_t async_id = __sync_fetch_and_add(&g_async_id_counter, 1);
+    /* Mark as error so destructor knows to clean up properly */
+    state->is_error = true;
+
+    /* The resource destructor will clean up when the resource is GC'd */
+    return ATOM_OK;
+}
+
+/* ============================================================================
+ * py_ref NIFs - Python object references with interp_id for auto-routing
+ * ============================================================================ */
+
+/**
+ * @brief Wrap a Python result as a py_ref with interp_id
+ *
+ * This is called internally when return => ref is specified.
+ * nif_ref_wrap(ContextRef, PyObjTerm) -> RefTerm
+ */
+static ERL_NIF_TERM nif_ref_wrap(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    py_context_t *ctx;
+    py_object_t *py_obj;
+
+    if (!enif_get_resource(env, argv[0], PY_CONTEXT_RESOURCE_TYPE, (void **)&ctx)) {
+        return make_error(env, "invalid_context");
+    }
+    if (!enif_get_resource(env, argv[1], PYOBJ_RESOURCE_TYPE, (void **)&py_obj)) {
+        return make_error(env, "invalid_pyobj");
+    }
 
-    async_pending_t *pending = enif_alloc(sizeof(async_pending_t));
-    if (pending == NULL) {
-        Py_DECREF(future);
-        PyGILState_Release(gstate);
+    /* Allocate py_ref resource */
+    py_ref_t *ref = enif_alloc_resource(PY_REF_RESOURCE_TYPE, sizeof(py_ref_t));
+    if (ref == NULL) {
         return make_error(env, "alloc_failed");
     }
-    pending->id = async_id;
-    pending->future = future;
-    pending->caller = caller;
-    pending->next = NULL;
 
-    /* Add to pending list */
-    pthread_mutex_lock(&worker->queue_mutex);
-    if (worker->pending_tail == NULL) {
-        worker->pending_head = pending;
-        worker->pending_tail = pending;
-    } else {
-        worker->pending_tail->next = pending;
-        worker->pending_tail = pending;
-    }
-    pthread_mutex_unlock(&worker->queue_mutex);
+    /* Copy the PyObject reference and interp_id */
+    ref->obj = py_obj->obj;
+    ref->interp_id = ctx->interp_id;
 
+    /* Increment reference count since we're taking ownership */
+    PyGILState_STATE gstate = PyGILState_Ensure();
+    Py_INCREF(ref->obj);
     PyGILState_Release(gstate);
 
-    return enif_make_tuple2(env, ATOM_OK, enif_make_uint64(env, async_id));
-}
+    ERL_NIF_TERM ref_term = enif_make_resource(env, ref);
+    enif_release_resource(ref);
 
-static ERL_NIF_TERM nif_async_stream(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
-    /* For now, delegate to async_call - async generators will be handled
-     * in the Erlang layer by collecting results */
-    return nif_async_call(env, argc, argv);
+    return enif_make_tuple2(env, ATOM_OK, ref_term);
 }
 
-/* ============================================================================
- * Sub-interpreter support (Python 3.12+)
- * ============================================================================ */
-
-static ERL_NIF_TERM nif_subinterp_supported(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+/**
+ * @brief Check if a term is a py_ref
+ *
+ * nif_is_ref(Term) -> true | false
+ */
+static ERL_NIF_TERM nif_is_ref(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
     (void)argc;
-    (void)argv;
+    py_ref_t *ref;
 
-#ifdef HAVE_SUBINTERPRETERS
-    return ATOM_TRUE;
-#else
+    if (enif_get_resource(env, argv[0], PY_REF_RESOURCE_TYPE, (void **)&ref)) {
+        return ATOM_TRUE;
+    }
     return ATOM_FALSE;
-#endif
 }
 
-#ifdef HAVE_SUBINTERPRETERS
-
-static ERL_NIF_TERM nif_subinterp_worker_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+/**
+ * @brief Get the interpreter ID from a py_ref
+ *
+ * nif_ref_interp_id(Ref) -> InterpId
+ *
+ * This is fast - no GIL needed, just reads the stored interp_id.
+ */
+static ERL_NIF_TERM nif_ref_interp_id(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
     (void)argc;
-    (void)argv;
+    py_ref_t *ref;
 
-    if (!g_python_initialized) {
-        return make_error(env, "python_not_initialized");
+    if (!enif_get_resource(env, argv[0], PY_REF_RESOURCE_TYPE, (void **)&ref)) {
+        return make_error(env, "invalid_ref");
     }
 
-    py_subinterp_worker_t *worker = enif_alloc_resource(SUBINTERP_WORKER_RESOURCE_TYPE,
-                                                         sizeof(py_subinterp_worker_t));
-    if (worker == NULL) {
-        return make_error(env, "alloc_failed");
+    return enif_make_uint(env, ref->interp_id);
+}
+
+/**
+ * @brief Convert a py_ref to an Erlang term
+ *
+ * nif_ref_to_term(Ref) -> {ok, Term} | {error, Reason}
+ */
+static ERL_NIF_TERM nif_ref_to_term(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    py_ref_t *ref;
+
+    if (!enif_get_resource(env, argv[0], PY_REF_RESOURCE_TYPE, (void **)&ref)) {
+        return make_error(env, "invalid_ref");
     }
 
-    /* Need the main GIL to create sub-interpreter */
     PyGILState_STATE gstate = PyGILState_Ensure();
+    ERL_NIF_TERM result = py_to_term(env, ref->obj);
+    PyGILState_Release(gstate);
 
-    /* Save current thread state so we can restore it after creating sub-interp */
-    PyThreadState *main_tstate = PyThreadState_Get();
-
-    /* Configure sub-interpreter with its own GIL */
-    PyInterpreterConfig config = {
-        .use_main_obmalloc = 0,
-        .allow_fork = 0,
-        .allow_exec = 0,
-        .allow_threads = 1,
-        .allow_daemon_threads = 0,
-        .check_multi_interp_extensions = 1,
-        .gil = PyInterpreterConfig_OWN_GIL,  /* This is the key - own GIL! */
-    };
+    return enif_make_tuple2(env, ATOM_OK, result);
+}
 
-    PyThreadState *tstate = NULL;
-    PyStatus status = Py_NewInterpreterFromConfig(&tstate, &config);
+/**
+ * @brief Get an attribute from a py_ref object
+ *
+ * nif_ref_getattr(Ref, AttrName) -> {ok, Value} | {error, Reason}
+ */
+static ERL_NIF_TERM nif_ref_getattr(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    py_ref_t *ref;
+    ErlNifBinary attr_bin;
 
-    if (PyStatus_Exception(status) || tstate == NULL) {
-        /* We're still in main interpreter on error */
-        PyGILState_Release(gstate);
-        enif_release_resource(worker);
-        return make_error(env, "subinterp_create_failed");
+    if (!enif_get_resource(env, argv[0], PY_REF_RESOURCE_TYPE, (void **)&ref)) {
+        return make_error(env, "invalid_ref");
+    }
+    if (!enif_inspect_binary(env, argv[1], &attr_bin)) {
+        return make_error(env, "invalid_attr");
     }
 
-    worker->interp = PyThreadState_GetInterpreter(tstate);
-    worker->tstate = tstate;
-
-    /* Create global/local namespaces in the new interpreter */
-    worker->globals = PyDict_New();
-    worker->locals = PyDict_New();
+    char *attr_name = binary_to_string(&attr_bin);
+    if (attr_name == NULL) {
+        return make_error(env, "alloc_failed");
+    }
 
-    /* Import __builtins__ */
-    PyObject *builtins = PyEval_GetBuiltins();
-    PyDict_SetItemString(worker->globals, "__builtins__", builtins);
+    ERL_NIF_TERM result;
+    PyGILState_STATE gstate = PyGILState_Ensure();
 
-    /* Switch back to main interpreter */
-    PyThreadState_Swap(NULL);
-    PyThreadState_Swap(main_tstate);
+    PyObject *attr = PyObject_GetAttrString(ref->obj, attr_name);
+    if (attr == NULL) {
+        result = make_py_error(env);
+    } else {
+        ERL_NIF_TERM term_result = py_to_term(env, attr);
+        Py_DECREF(attr);
+        result = enif_make_tuple2(env, ATOM_OK, term_result);
+    }
 
     PyGILState_Release(gstate);
+    enif_free(attr_name);
 
-    ERL_NIF_TERM result = enif_make_resource(env, worker);
-    enif_release_resource(worker);
-
-    return enif_make_tuple2(env, ATOM_OK, result);
+    return result;
 }
 
-static ERL_NIF_TERM nif_subinterp_worker_destroy(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
+/**
+ * @brief Call a method on a py_ref object
+ *
+ * nif_ref_call_method(Ref, Method, Args) -> {ok, Result} | {error, Reason}
+ */
+static ERL_NIF_TERM nif_ref_call_method(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
     (void)argc;
-    py_subinterp_worker_t *worker;
-
-    if (!enif_get_resource(env, argv[0], SUBINTERP_WORKER_RESOURCE_TYPE, (void **)&worker)) {
-        return make_error(env, "invalid_worker");
-    }
-
-    /* Resource destructor will handle cleanup */
-    return ATOM_OK;
-}
-
-static ERL_NIF_TERM nif_subinterp_call(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
-    py_subinterp_worker_t *worker;
-    ErlNifBinary module_bin, func_bin;
+    py_ref_t *ref;
+    ErlNifBinary method_bin;
 
-    if (!enif_get_resource(env, argv[0], SUBINTERP_WORKER_RESOURCE_TYPE, (void **)&worker)) {
-        return make_error(env, "invalid_worker");
+    if (!enif_get_resource(env, argv[0], PY_REF_RESOURCE_TYPE, (void **)&ref)) {
+        return make_error(env, "invalid_ref");
     }
-    if (!enif_inspect_binary(env, argv[1], &module_bin)) {
-        return make_error(env, "invalid_module");
-    }
-    if (!enif_inspect_binary(env, argv[2], &func_bin)) {
-        return make_error(env, "invalid_func");
+    if (!enif_inspect_binary(env, argv[1], &method_bin)) {
+        return make_error(env, "invalid_method");
     }
 
-    /* Enter the sub-interpreter */
-    PyThreadState *saved_tstate = PyThreadState_Swap(NULL);
-    PyThreadState_Swap(worker->tstate);
-
-    char *module_name = binary_to_string(&module_bin);
-    char *func_name = binary_to_string(&func_bin);
-    if (module_name == NULL || func_name == NULL) {
-        enif_free(module_name);
-        enif_free(func_name);
-        PyThreadState_Swap(saved_tstate);
+    char *method_name = binary_to_string(&method_bin);
+    if (method_name == NULL) {
         return make_error(env, "alloc_failed");
     }
 
     ERL_NIF_TERM result;
+    PyGILState_STATE gstate = PyGILState_Ensure();
 
-    /* Import module */
-    PyObject *module = PyImport_ImportModule(module_name);
-    if (module == NULL) {
-        result = make_py_error(env);
-        goto cleanup;
-    }
-
-    /* Get function */
-    PyObject *func = PyObject_GetAttrString(module, func_name);
-    Py_DECREF(module);
-    if (func == NULL) {
+    /* Get method */
+    PyObject *method = PyObject_GetAttrString(ref->obj, method_name);
+    if (method == NULL) {
         result = make_py_error(env);
         goto cleanup;
     }
 
     /* Convert args */
     unsigned int args_len;
-    if (!enif_get_list_length(env, argv[3], &args_len)) {
-        Py_DECREF(func);
+    if (!enif_get_list_length(env, argv[2], &args_len)) {
+        Py_DECREF(method);
         result = make_error(env, "invalid_args");
         goto cleanup;
     }
 
     PyObject *args = PyTuple_New(args_len);
-    ERL_NIF_TERM head, tail = argv[3];
+    ERL_NIF_TERM head, tail = argv[2];
     for (unsigned int i = 0; i < args_len; i++) {
         enif_get_list_cell(env, tail, &head, &tail);
         PyObject *arg = term_to_py(env, head);
         if (arg == NULL) {
             Py_DECREF(args);
-            Py_DECREF(func);
+            Py_DECREF(method);
             result = make_error(env, "arg_conversion_failed");
             goto cleanup;
         }
         PyTuple_SET_ITEM(args, i, arg);
     }
 
-    /* Convert kwargs */
-    PyObject *kwargs = NULL;
-    if (argc > 4 && enif_is_map(env, argv[4])) {
-        kwargs = term_to_py(env, argv[4]);
-    }
-
-    /* Call the function */
-    PyObject *py_result = PyObject_Call(func, args, kwargs);
-    Py_DECREF(func);
+    /* Call method */
+    PyObject *py_result = PyObject_Call(method, args, NULL);
+    Py_DECREF(method);
     Py_DECREF(args);
-    Py_XDECREF(kwargs);
 
     if (py_result == NULL) {
         result = make_py_error(env);
@@ -1623,95 +3045,12 @@ static ERL_NIF_TERM nif_subinterp_call(ErlNifEnv *env, int argc, const ERL_NIF_T
     }
 
 cleanup:
-    enif_free(module_name);
-    enif_free(func_name);
-
-    /* Exit the sub-interpreter */
-    PyThreadState_Swap(NULL);
-    if (saved_tstate != NULL) {
-        PyThreadState_Swap(saved_tstate);
-    }
+    PyGILState_Release(gstate);
+    enif_free(method_name);
 
     return result;
 }
 
-static ERL_NIF_TERM nif_parallel_execute(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
-    (void)argc;
-    unsigned int workers_len, calls_len;
-
-    if (!enif_get_list_length(env, argv[0], &workers_len)) {
-        return make_error(env, "invalid_workers_list");
-    }
-    if (!enif_get_list_length(env, argv[1], &calls_len)) {
-        return make_error(env, "invalid_calls_list");
-    }
-    if (workers_len == 0 || calls_len == 0) {
-        return enif_make_tuple2(env, ATOM_OK, enif_make_list(env, 0));
-    }
-    if (workers_len < calls_len) {
-        return make_error(env, "not_enough_workers");
-    }
-
-    ERL_NIF_TERM *results = enif_alloc(sizeof(ERL_NIF_TERM) * calls_len);
-    if (results == NULL) {
-        return make_error(env, "alloc_failed");
-    }
-    ERL_NIF_TERM worker_head, worker_tail = argv[0];
-    ERL_NIF_TERM call_head, call_tail = argv[1];
-
-    for (unsigned int i = 0; i < calls_len; i++) {
-        enif_get_list_cell(env, worker_tail, &worker_head, &worker_tail);
-        enif_get_list_cell(env, call_tail, &call_head, &call_tail);
-
-        int arity;
-        const ERL_NIF_TERM *tuple;
-        if (!enif_get_tuple(env, call_head, &arity, &tuple) || arity < 3) {
-            enif_free(results);
-            return make_error(env, "invalid_call_tuple");
-        }
-
-        /* Build args array for subinterp_call */
-        ERL_NIF_TERM call_args[5] = {worker_head, tuple[0], tuple[1], tuple[2],
-                                      (arity > 3) ? tuple[3] : enif_make_new_map(env)};
-
-        results[i] = nif_subinterp_call(env, 5, call_args);
-    }
-
-    ERL_NIF_TERM result_list = enif_make_list_from_array(env, results, calls_len);
-    enif_free(results);
-
-    return enif_make_tuple2(env, ATOM_OK, result_list);
-}
-
-#else /* !HAVE_SUBINTERPRETERS */
-
-/* Stub implementations for older Python versions */
-static ERL_NIF_TERM nif_subinterp_worker_new(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
-    (void)argc;
-    (void)argv;
-    return make_error(env, "subinterpreters_not_supported");
-}
-
-static ERL_NIF_TERM nif_subinterp_worker_destroy(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
-    (void)argc;
-    (void)argv;
-    return make_error(env, "subinterpreters_not_supported");
-}
-
-static ERL_NIF_TERM nif_subinterp_call(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
-    (void)argc;
-    (void)argv;
-    return make_error(env, "subinterpreters_not_supported");
-}
-
-static ERL_NIF_TERM nif_parallel_execute(ErlNifEnv *env, int argc, const ERL_NIF_TERM argv[]) {
-    (void)argc;
-    (void)argv;
-    return make_error(env, "subinterpreters_not_supported");
-}
-
-#endif /* HAVE_SUBINTERPRETERS */
-
 /* ============================================================================
  * NIF setup
  * ============================================================================ */
@@ -1729,9 +3068,7 @@ static int load(ErlNifEnv *env, void **priv_data, ERL_NIF_TERM load_info) {
         env, NULL, "py_object", pyobj_destructor,
         ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER, NULL);
 
-    ASYNC_WORKER_RESOURCE_TYPE = enif_open_resource_type(
-        env, NULL, "py_async_worker", async_worker_destructor,
-        ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER, NULL);
+    /* ASYNC_WORKER_RESOURCE_TYPE removed - replaced by event loop model */
 
     SUSPENDED_STATE_RESOURCE_TYPE = enif_open_resource_type(
         env, NULL, "py_suspended_state", suspended_state_destructor,
@@ -1741,15 +3078,31 @@ static int load(ErlNifEnv *env, void **priv_data, ERL_NIF_TERM load_info) {
     SUBINTERP_WORKER_RESOURCE_TYPE = enif_open_resource_type(
         env, NULL, "py_subinterp_worker", subinterp_worker_destructor,
         ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER, NULL);
+#endif
+
+    /* Process-per-context resource type (no mutex) */
+    PY_CONTEXT_RESOURCE_TYPE = enif_open_resource_type(
+        env, NULL, "py_context", context_destructor,
+        ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER, NULL);
+
+    /* py_ref resource type (Python object with interp_id for auto-routing) */
+    PY_REF_RESOURCE_TYPE = enif_open_resource_type(
+        env, NULL, "py_ref", py_ref_destructor,
+        ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER, NULL);
+
+    /* suspended_context_state_t resource type (context suspension for callbacks) */
+    PY_CONTEXT_SUSPENDED_RESOURCE_TYPE = enif_open_resource_type(
+        env, NULL, "py_context_suspended", suspended_context_state_destructor,
+        ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER, NULL);
 
     if (WORKER_RESOURCE_TYPE == NULL || PYOBJ_RESOURCE_TYPE == NULL ||
-        ASYNC_WORKER_RESOURCE_TYPE == NULL || SUSPENDED_STATE_RESOURCE_TYPE == NULL ||
-        SUBINTERP_WORKER_RESOURCE_TYPE == NULL) {
+        SUSPENDED_STATE_RESOURCE_TYPE == NULL ||
+        PY_CONTEXT_RESOURCE_TYPE == NULL || PY_REF_RESOURCE_TYPE == NULL ||
+        PY_CONTEXT_SUSPENDED_RESOURCE_TYPE == NULL) {
         return -1;
     }
-#else
-    if (WORKER_RESOURCE_TYPE == NULL || PYOBJ_RESOURCE_TYPE == NULL ||
-        ASYNC_WORKER_RESOURCE_TYPE == NULL || SUSPENDED_STATE_RESOURCE_TYPE == NULL) {
+#ifdef HAVE_SUBINTERPRETERS
+    if (SUBINTERP_WORKER_RESOURCE_TYPE == NULL) {
         return -1;
     }
 #endif
@@ -1787,6 +3140,9 @@ static int load(ErlNifEnv *env, void **priv_data, ERL_NIF_TERM load_info) {
     ATOM_ASGI_QUERY_STRING = enif_make_atom(env, "query_string");
     ATOM_ASGI_METHOD = enif_make_atom(env, "method");
 
+    /* Worker pool atoms */
+    pool_atoms_init(env);
+
     /* ASGI buffer resource type for zero-copy body handling */
     ASGI_BUFFER_RESOURCE_TYPE = enif_open_resource_type(
         env, NULL, "asgi_buffer",
@@ -1851,6 +3207,7 @@ static ErlNifFunc nif_funcs[] = {
 
     /* Memory and GC */
     {"memory_stats", 0, nif_memory_stats, 0},
+    {"get_debug_counters", 0, nif_get_debug_counters, 0},
     {"gc", 0, nif_gc, 0},
     {"gc", 1, nif_gc, 0},
     {"tracemalloc_start", 0, nif_tracemalloc_start, 0},
@@ -1877,6 +3234,7 @@ static ErlNifFunc nif_funcs[] = {
     {"subinterp_worker_destroy", 1, nif_subinterp_worker_destroy, 0},
     {"subinterp_call", 5, nif_subinterp_call, ERL_NIF_DIRTY_JOB_CPU_BOUND},
     {"parallel_execute", 2, nif_parallel_execute, ERL_NIF_DIRTY_JOB_CPU_BOUND},
+    {"subinterp_asgi_run", 6, nif_subinterp_asgi_run, ERL_NIF_DIRTY_JOB_CPU_BOUND},
 
     /* Execution mode info */
     {"execution_mode", 0, nif_execution_mode, 0},
@@ -1907,6 +3265,7 @@ static ErlNifFunc nif_funcs[] = {
     {"event_loop_set_worker", 2, nif_event_loop_set_worker, 0},
     {"event_loop_set_id", 2, nif_event_loop_set_id, 0},
     {"event_loop_wakeup", 1, nif_event_loop_wakeup, 0},
+    {"event_loop_run_async", 7, nif_event_loop_run_async, ERL_NIF_DIRTY_JOB_IO_BOUND},
     {"add_reader", 3, nif_add_reader, 0},
     {"remove_reader", 2, nif_remove_reader, 0},
     {"add_writer", 3, nif_add_writer, 0},
@@ -1955,9 +3314,43 @@ static ErlNifFunc nif_funcs[] = {
     /* ASGI optimizations */
     {"asgi_build_scope", 1, nif_asgi_build_scope, ERL_NIF_DIRTY_JOB_IO_BOUND},
     {"asgi_run", 5, nif_asgi_run, ERL_NIF_DIRTY_JOB_IO_BOUND},
+#ifdef ASGI_PROFILING
+    {"asgi_profile_stats", 0, nif_asgi_profile_stats, 0},
+    {"asgi_profile_reset", 0, nif_asgi_profile_reset, 0},
+#endif
 
     /* WSGI optimizations */
-    {"wsgi_run", 4, nif_wsgi_run, ERL_NIF_DIRTY_JOB_IO_BOUND}
+    {"wsgi_run", 4, nif_wsgi_run, ERL_NIF_DIRTY_JOB_IO_BOUND},
+
+    /* Worker pool */
+    {"pool_start", 1, nif_pool_start, 0},
+    {"pool_stop", 0, nif_pool_stop, 0},
+    {"pool_submit", 5, nif_pool_submit, 0},
+    {"pool_stats", 0, nif_pool_stats, 0},
+
+    /* Process-per-context API (no mutex) */
+    {"context_create", 1, nif_context_create, 0},
+    {"context_destroy", 1, nif_context_destroy, 0},
+    {"context_call", 5, nif_context_call, ERL_NIF_DIRTY_JOB_CPU_BOUND},
+    {"context_eval", 3, nif_context_eval, ERL_NIF_DIRTY_JOB_CPU_BOUND},
+    {"context_exec", 2, nif_context_exec, ERL_NIF_DIRTY_JOB_CPU_BOUND},
+    {"context_call_method", 4, nif_context_call_method, ERL_NIF_DIRTY_JOB_CPU_BOUND},
+    {"context_to_term", 1, nif_context_to_term, 0},
+    {"context_interp_id", 1, nif_context_interp_id, 0},
+    {"context_set_callback_handler", 2, nif_context_set_callback_handler, 0},
+    {"context_get_callback_pipe", 1, nif_context_get_callback_pipe, 0},
+    {"context_write_callback_response", 2, nif_context_write_callback_response, 0},
+    {"context_resume", 3, nif_context_resume, ERL_NIF_DIRTY_JOB_CPU_BOUND},
+    {"context_cancel_resume", 2, nif_context_cancel_resume, 0},
+    {"context_get_event_loop", 1, nif_context_get_event_loop, 0},
+
+    /* py_ref API (Python object references with interp_id) */
+    {"ref_wrap", 2, nif_ref_wrap, 0},
+    {"is_ref", 1, nif_is_ref, 0},
+    {"ref_interp_id", 1, nif_ref_interp_id, 0},
+    {"ref_to_term", 1, nif_ref_to_term, 0},
+    {"ref_getattr", 2, nif_ref_getattr, ERL_NIF_DIRTY_JOB_CPU_BOUND},
+    {"ref_call_method", 3, nif_ref_call_method, ERL_NIF_DIRTY_JOB_CPU_BOUND}
 };
 
 ERL_NIF_INIT(py_nif, nif_funcs, load, NULL, upgrade, unload)
diff --git a/c_src/py_nif.h b/c_src/py_nif.h
index cf77f55..3127e2c 100644
--- a/c_src/py_nif.h
+++ b/c_src/py_nif.h
@@ -184,6 +184,141 @@ typedef enum {
 
 /** @} */
 
+/* ============================================================================
+ * Runtime State Machine
+ * ============================================================================ */
+
+/**
+ * @defgroup runtime_state Runtime State Machine
+ * @brief Atomic state machine for Python runtime lifecycle
+ * @{
+ */
+
+/**
+ * @enum py_runtime_state_t
+ * @brief Runtime state for Python interpreter lifecycle
+ *
+ * State transitions are performed atomically using CAS operations to ensure
+ * thread-safe initialization and shutdown. The state machine prevents race
+ * conditions during concurrent init/finalize calls.
+ *
+ * Valid transitions:
+ *   UNINIT -> INITING (only one thread wins)
+ *   INITING -> RUNNING (on success)
+ *   INITING -> STOPPED (on failure)
+ *   RUNNING -> SHUTTING_DOWN (only one thread wins)
+ *   SHUTTING_DOWN -> STOPPED (after cleanup)
+ */
+typedef enum {
+    /** @brief Initial state, Python not initialized */
+    PY_STATE_UNINIT = 0,
+
+    /** @brief Initialization in progress (transitional) */
+    PY_STATE_INITING = 1,
+
+    /** @brief Python running and ready for work */
+    PY_STATE_RUNNING = 2,
+
+    /** @brief Shutdown in progress, rejecting new work */
+    PY_STATE_SHUTTING_DOWN = 3,
+
+    /** @brief Fully stopped, safe to reinitialize */
+    PY_STATE_STOPPED = 4
+} py_runtime_state_t;
+
+/**
+ * @brief Atomically transition runtime state using CAS
+ * @param from Expected current state
+ * @param to Desired new state
+ * @return true if transition succeeded, false if current state != from
+ */
+static inline bool runtime_transition(py_runtime_state_t from, py_runtime_state_t to) {
+    extern _Atomic py_runtime_state_t g_runtime_state;
+    py_runtime_state_t expected = from;
+    return atomic_compare_exchange_strong(&g_runtime_state, &expected, to);
+}
+
+/**
+ * @brief Get current runtime state
+ * @return Current py_runtime_state_t value
+ */
+static inline py_runtime_state_t runtime_state(void) {
+    extern _Atomic py_runtime_state_t g_runtime_state;
+    return atomic_load(&g_runtime_state);
+}
+
+/**
+ * @brief Check if runtime is in RUNNING state
+ * @return true if Python is running and accepting work
+ */
+static inline bool runtime_is_running(void) {
+    return runtime_state() == PY_STATE_RUNNING;
+}
+
+/**
+ * @brief Check if runtime is shutting down or stopped
+ * @return true if runtime is in SHUTTING_DOWN or STOPPED state
+ */
+static inline bool runtime_is_shutting_down(void) {
+    py_runtime_state_t state = runtime_state();
+    return state >= PY_STATE_SHUTTING_DOWN;
+}
+
+/** @} */
+
+/* ============================================================================
+ * Invariant Counters (Debugging/Diagnostics)
+ * ============================================================================ */
+
+/**
+ * @defgroup invariants Invariant Counters
+ * @brief Atomic counters for tracking resource lifecycle and detecting leaks
+ * @{
+ */
+
+/**
+ * @struct py_invariant_counters_t
+ * @brief Atomic counters for debugging and leak detection
+ *
+ * These counters track paired operations (acquire/release, create/destroy)
+ * to help detect resource leaks and imbalanced operations. At shutdown,
+ * paired counters should be equal.
+ */
+typedef struct {
+    /* GIL operations */
+    _Atomic uint64_t gil_ensure_count;      /**< PyGILState_Ensure calls */
+    _Atomic uint64_t gil_release_count;     /**< PyGILState_Release calls */
+
+    /* Python object references */
+    _Atomic uint64_t pyobj_created;         /**< py_object_t created */
+    _Atomic uint64_t pyobj_destroyed;       /**< py_object_t destroyed */
+
+    /* py_ref_t resources */
+    _Atomic uint64_t pyref_created;         /**< py_ref_t created */
+    _Atomic uint64_t pyref_destroyed;       /**< py_ref_t destroyed */
+
+    /* Context resources */
+    _Atomic uint64_t ctx_created;           /**< py_context_t created */
+    _Atomic uint64_t ctx_destroyed;         /**< py_context_t destroyed */
+    _Atomic uint64_t ctx_keep_count;        /**< enif_keep_resource(ctx) calls */
+    _Atomic uint64_t ctx_release_count;     /**< enif_release_resource(ctx) calls */
+
+    /* Suspended states */
+    _Atomic uint64_t suspended_created;     /**< Suspended states created */
+    _Atomic uint64_t suspended_resumed;     /**< Suspended states resumed */
+    _Atomic uint64_t suspended_destroyed;   /**< Suspended states destroyed */
+
+    /* Executor queue operations */
+    _Atomic uint64_t enqueue_count;         /**< Requests enqueued */
+    _Atomic uint64_t complete_count;        /**< Requests completed */
+    _Atomic uint64_t rejected_count;        /**< Requests rejected (shutdown) */
+} py_invariant_counters_t;
+
+/** @brief Global invariant counters for debugging */
+extern py_invariant_counters_t g_counters;
+
+/** @} */
+
 /* ============================================================================
  * Core Type Definitions
  * ============================================================================ */
@@ -241,70 +376,7 @@ typedef struct {
     ErlNifEnv *callback_env;
 } py_worker_t;
 
-/**
- * @struct async_pending_t
- * @brief Represents a pending asynchronous Python operation
- *
- * Used to track asyncio coroutines submitted to the event loop.
- * Forms a linked list for efficient queue management.
- */
-typedef struct async_pending {
-    /** @brief Unique identifier for this async operation */
-    uint64_t id;
-
-    /** @brief Python Future object from `asyncio.run_coroutine_threadsafe` */
-    PyObject *future;
-
-    /** @brief PID of the Erlang process awaiting the result */
-    ErlNifPid caller;
-
-    /** @brief Next pending operation in the queue */
-    struct async_pending *next;
-} async_pending_t;
-
-/**
- * @struct py_async_worker_t
- * @brief Async worker managing an asyncio event loop
- *
- * Provides support for Python async/await operations by running
- * an asyncio event loop in a dedicated background thread.
- *
- * @see nif_async_worker_new
- * @see nif_async_call
- */
-typedef struct {
-    /** @brief Background thread running the event loop */
-    pthread_t loop_thread;
-
-    /** @brief Python asyncio event loop object */
-    PyObject *event_loop;
-
-    /**
-     * @brief Notification pipe for waking the event loop
-     *
-     * - `notify_pipe[0]` - Read end (event loop monitors)
-     * - `notify_pipe[1]` - Write end (main thread signals)
-     */
-    int notify_pipe[2];
-
-    /** @brief Flag indicating the event loop is running */
-    volatile bool loop_running;
-
-    /** @brief Flag to signal shutdown */
-    volatile bool shutdown;
-
-    /** @brief Mutex protecting the pending queue */
-    pthread_mutex_t queue_mutex;
-
-    /** @brief Head of pending operations queue */
-    async_pending_t *pending_head;
-
-    /** @brief Tail of pending operations queue */
-    async_pending_t *pending_tail;
-
-    /** @brief Environment for sending async result messages */
-    ErlNifEnv *msg_env;
-} py_async_worker_t;
+/* async_pending_t and py_async_worker_t removed - async workers replaced by event loop model */
 
 /**
  * @struct py_object_t
@@ -321,6 +393,25 @@ typedef struct {
     PyObject *obj;
 } py_object_t;
 
+/**
+ * @struct py_ref_t
+ * @brief Python object reference with interpreter ID for auto-routing
+ *
+ * This extends py_object_t by adding the interpreter ID that created
+ * the object. This allows automatic routing of method calls and
+ * attribute access to the correct context.
+ *
+ * @note The interp_id is used by py_context_router to find the owning context
+ * @warning Operations on this ref must be performed in the correct interpreter
+ */
+typedef struct {
+    /** @brief The wrapped Python object (owned reference) */
+    PyObject *obj;
+
+    /** @brief Interpreter ID that owns this object (for routing) */
+    uint32_t interp_id;
+} py_ref_t;
+
 /** @} */
 
 /* ============================================================================
@@ -557,12 +648,18 @@ typedef struct {
  * Sub-interpreters provide true isolation with their own GIL,
  * enabling parallel Python execution on Python 3.12+.
  *
+ * The mutex ensures thread-safe access when multiple dirty scheduler
+ * threads attempt to use the same worker concurrently.
+ *
  * @note Only available when compiled with Python 3.12+
  *
  * @see nif_subinterp_worker_new
  * @see nif_subinterp_call
  */
 typedef struct {
+    /** @brief Mutex for thread-safe access from multiple dirty schedulers */
+    pthread_mutex_t mutex;
+
     /** @brief Python interpreter state */
     PyInterpreterState *interp;
 
@@ -577,6 +674,155 @@ typedef struct {
 } py_subinterp_worker_t;
 #endif
 
+/**
+ * @struct py_context_t
+ * @brief Process-owned Python context (NO MUTEX)
+ *
+ * A py_context_t is owned by a single Erlang process, which serializes
+ * all access to it. This eliminates mutex contention and enables true
+ * N-way parallelism when combined with subinterpreters.
+ *
+ * Unlike py_subinterp_worker_t, this structure has NO mutex - the owning
+ * Erlang process guarantees exclusive access through message passing.
+ *
+ * @note For Python 3.12+, each context has its own GIL (OWN_GIL)
+ * @note For older Python, contexts share the GIL but still avoid mutex overhead
+ *
+ * @see nif_context_create
+ * @see nif_context_call
+ */
+typedef struct {
+    /** @brief Unique interpreter ID for routing (0 = main, >0 = subinterp) */
+    uint32_t interp_id;
+
+    /** @brief Context mode: true=subinterpreter, false=worker */
+    bool is_subinterp;
+
+    /** @brief Flag indicating context has been destroyed */
+    bool destroyed;
+
+    /** @brief Flag: callback handler is configured */
+    bool has_callback_handler;
+
+    /** @brief PID of Erlang process handling callbacks */
+    ErlNifPid callback_handler;
+
+    /** @brief Pipe for callback responses [read, write] */
+    int callback_pipe[2];
+
+#ifdef HAVE_SUBINTERPRETERS
+    /** @brief Python interpreter state (only for subinterp mode) */
+    PyInterpreterState *interp;
+
+    /** @brief Thread state for this interpreter */
+    PyThreadState *tstate;
+#else
+    /** @brief Worker thread state (non-subinterp mode) */
+    PyThreadState *thread_state;
+#endif
+
+    /** @brief Global namespace dictionary */
+    PyObject *globals;
+
+    /** @brief Local namespace dictionary */
+    PyObject *locals;
+
+    /** @brief Module cache (Dict: module_name -> PyModule) */
+    PyObject *module_cache;
+} py_context_t;
+
+/**
+ * @struct suspended_context_state_t
+ * @brief State for a suspended Python context execution awaiting callback result
+ *
+ * Similar to suspended_state_t but for the process-per-context architecture.
+ * When Python code in a context calls `erlang.call()`, execution is suspended
+ * and this structure captures all state needed to resume after the context
+ * process handles the callback inline.
+ *
+ * @par Key Difference from suspended_state_t:
+ * This uses py_context_t (no mutex) instead of py_worker_t, and is designed
+ * for the recursive receive pattern where the context process handles
+ * callbacks inline without blocking.
+ *
+ * @see nif_context_resume
+ */
+typedef struct {
+    /** @brief Context for replay */
+    py_context_t *ctx;
+
+    /** @brief Unique identifier for this callback */
+    uint64_t callback_id;
+
+    /* Callback invocation info */
+
+    /** @brief Name of Erlang function being called */
+    char *callback_func_name;
+
+    /** @brief Length of callback_func_name */
+    size_t callback_func_len;
+
+    /** @brief Arguments passed to the callback */
+    PyObject *callback_args;
+
+    /* Original request context for replay */
+
+    /** @brief Original request type (PY_REQ_CALL or PY_REQ_EVAL) */
+    int request_type;
+
+    /** @brief Original module name binary (for PY_REQ_CALL) */
+    ErlNifBinary orig_module;
+
+    /** @brief Original function name binary (for PY_REQ_CALL) */
+    ErlNifBinary orig_func;
+
+    /** @brief Original arguments (copied to orig_env) */
+    ERL_NIF_TERM orig_args;
+
+    /** @brief Original keyword arguments */
+    ERL_NIF_TERM orig_kwargs;
+
+    /** @brief Original code for eval replay (for PY_REQ_EVAL) */
+    ErlNifBinary orig_code;
+
+    /** @brief Original locals map for eval replay */
+    ERL_NIF_TERM orig_locals;
+
+    /** @brief Environment owning copied terms */
+    ErlNifEnv *orig_env;
+
+    /* Callback result (set before resume) */
+
+    /** @brief Raw result data from Erlang callback (current callback) */
+    unsigned char *result_data;
+
+    /** @brief Length of result_data */
+    size_t result_len;
+
+    /** @brief Flag: result is available for replay */
+    volatile bool has_result;
+
+    /** @brief Flag: result represents an error */
+    volatile bool is_error;
+
+    /* Sequential callback support - stores all accumulated callback results */
+
+    /** @brief Current callback result index for replay */
+    size_t callback_result_index;
+
+    /** @brief Number of cached callback results (from previous callbacks) */
+    size_t num_callback_results;
+
+    /** @brief Capacity of callback_results array */
+    size_t callback_results_capacity;
+
+    /** @brief Cached callback results array (grows with sequential callbacks) */
+    struct {
+        unsigned char *data;
+        size_t len;
+    } *callback_results;
+} suspended_context_state_t;
+
 /** @} */
 
 /* ============================================================================
@@ -646,8 +892,7 @@ extern ErlNifResourceType *WORKER_RESOURCE_TYPE;
 /** @brief Resource type for py_object_t */
 extern ErlNifResourceType *PYOBJ_RESOURCE_TYPE;
 
-/** @brief Resource type for py_async_worker_t */
-extern ErlNifResourceType *ASYNC_WORKER_RESOURCE_TYPE;
+/* ASYNC_WORKER_RESOURCE_TYPE removed - async workers replaced by event loop model */
 
 /** @brief Resource type for suspended_state_t */
 extern ErlNifResourceType *SUSPENDED_STATE_RESOURCE_TYPE;
@@ -657,9 +902,24 @@ extern ErlNifResourceType *SUSPENDED_STATE_RESOURCE_TYPE;
 extern ErlNifResourceType *SUBINTERP_WORKER_RESOURCE_TYPE;
 #endif
 
-/** @brief Flag: Python interpreter is initialized */
+/** @brief Resource type for py_context_t (process-per-context) */
+extern ErlNifResourceType *PY_CONTEXT_RESOURCE_TYPE;
+
+/** @brief Resource type for py_ref_t (Python object with interp_id) */
+extern ErlNifResourceType *PY_REF_RESOURCE_TYPE;
+
+/** @brief Resource type for suspended_context_state_t (context suspension) */
+extern ErlNifResourceType *PY_CONTEXT_SUSPENDED_RESOURCE_TYPE;
+
+/** @brief Atomic counter for unique interpreter IDs */
+extern _Atomic uint32_t g_context_id_counter;
+
+/** @brief Flag: Python interpreter is initialized (DEPRECATED: use g_runtime_state) */
 extern bool g_python_initialized;
 
+/** @brief Atomic runtime state for thread-safe lifecycle management */
+extern _Atomic py_runtime_state_t g_runtime_state;
+
 /** @brief Main Python thread state (saved on init) */
 extern PyThreadState *g_main_thread_state;
 
@@ -675,8 +935,8 @@ extern executor_t g_executors[MAX_EXECUTORS];
 /** @brief Round-robin counter for executor selection */
 extern _Atomic int g_next_executor;
 
-/** @brief Flag: multi-executor pool is initialized */
-extern bool g_multi_executor_initialized;
+/** @brief Flag: multi-executor pool is initialized (atomic for thread-safe access) */
+extern _Atomic bool g_multi_executor_initialized;
 
 /* Single executor state */
 
@@ -695,11 +955,11 @@ extern py_request_t *g_executor_queue_head;
 /** @brief Single executor queue tail */
 extern py_request_t *g_executor_queue_tail;
 
-/** @brief Single executor running flag */
-extern volatile bool g_executor_running;
+/** @brief Single executor running flag (atomic for thread-safe access) */
+extern _Atomic bool g_executor_running;
 
-/** @brief Single executor shutdown flag */
-extern volatile bool g_executor_shutdown;
+/** @brief Single executor shutdown flag (atomic for thread-safe access) */
+extern _Atomic bool g_executor_shutdown;
 
 /** @brief Global counter for unique callback IDs */
 extern _Atomic uint64_t g_callback_id_counter;
@@ -719,9 +979,12 @@ extern PyObject *g_numpy_ndarray_type;
 
 /* Thread-local state */
 
-/** @brief Current worker for callback context */
+/** @brief Current worker for callback context (legacy) */
 extern __thread py_worker_t *tl_current_worker;
 
+/** @brief Current context for callback context (new process-per-context API) */
+extern __thread py_context_t *tl_current_context;
+
 /** @brief Current NIF environment for callbacks */
 extern __thread ErlNifEnv *tl_callback_env;
 
@@ -1076,7 +1339,7 @@ static void process_request(py_request_t *req);
  *
  * @param req Request to submit
  */
-static void executor_enqueue(py_request_t *req);
+static int executor_enqueue(py_request_t *req);
 
 /**
  * @brief Wait for a request to complete
@@ -1215,15 +1478,7 @@ static PyObject *erlang_call_impl(PyObject *self, PyObject *args);
  */
 static PyObject *erlang_module_getattr(PyObject *module, PyObject *name);
 
-/**
- * @brief Background thread running asyncio event loop
- *
- * Manages async Python operations submitted via async_call.
- *
- * @param arg Pointer to py_async_worker_t
- * @return NULL
- */
-static void *async_event_loop_thread(void *arg);
+/* async_event_loop_thread removed - replaced by event loop model */
 
 /**
  * @brief Create suspended state for callback handling
diff --git a/c_src/py_thread_worker.c b/c_src/py_thread_worker.c
index cf3ee8f..0bc8b90 100644
--- a/c_src/py_thread_worker.c
+++ b/c_src/py_thread_worker.c
@@ -206,11 +206,26 @@ static void thread_worker_cleanup(void) {
  * The coordinator is the Erlang process that spawns handler processes
  * for new thread workers.
  *
+ * When a new coordinator is registered (e.g., after app restart), we must
+ * reset all existing workers' has_handler flag since the old handler
+ * processes are dead.
+ *
  * @param pid PID of the coordinator process
  */
 static void thread_worker_set_coordinator(ErlNifPid pid) {
     g_thread_coordinator_pid = pid;
     g_has_thread_coordinator = true;
+
+    /* Reset has_handler on all existing workers since old handlers are dead */
+    pthread_mutex_lock(&g_thread_pool_mutex);
+    thread_worker_t *tw = g_thread_pool_head;
+    while (tw != NULL) {
+        pthread_mutex_lock(&tw->mutex);
+        tw->has_handler = false;
+        pthread_mutex_unlock(&tw->mutex);
+        tw = tw->next;
+    }
+    pthread_mutex_unlock(&g_thread_pool_mutex);
 }
 
 /**
diff --git a/c_src/py_worker_pool.c b/c_src/py_worker_pool.c
new file mode 100644
index 0000000..27eb5c9
--- /dev/null
+++ b/c_src/py_worker_pool.c
@@ -0,0 +1,1231 @@
+/*
+ * Copyright 2026 Benoit Chesneau
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file py_worker_pool.c
+ * @brief Worker thread pool implementation
+ *
+ * Implements a pool of worker threads for processing Python operations.
+ * Each worker can have its own subinterpreter (Python 3.12+) for true
+ * parallelism, or share the GIL with batching for older Python versions.
+ */
+
+/* ============================================================================
+ * Global Pool Instance
+ * ============================================================================ */
+
+py_worker_pool_t g_pool = {
+    .num_workers = 0,
+    .initialized = false,
+    .shutting_down = false,
+    .request_id_counter = 1,
+    .use_subinterpreters = false,
+    .free_threaded = false
+};
+
+/* Atom for response message */
+static ERL_NIF_TERM ATOM_PY_RESPONSE;
+
+/* ============================================================================
+ * Queue Operations
+ * ============================================================================ */
+
+static void queue_init(py_pool_queue_t *queue) {
+    queue->head = NULL;
+    queue->tail = NULL;
+    atomic_store(&queue->pending_count, 0);
+    atomic_store(&queue->total_enqueued, 0);
+    pthread_mutex_init(&queue->mutex, NULL);
+    pthread_cond_init(&queue->cond, NULL);
+}
+
+static void queue_destroy(py_pool_queue_t *queue) {
+    pthread_mutex_destroy(&queue->mutex);
+    pthread_cond_destroy(&queue->cond);
+}
+
+static void queue_enqueue(py_pool_queue_t *queue, py_pool_request_t *req) {
+    pthread_mutex_lock(&queue->mutex);
+
+    req->next = NULL;
+    if (queue->tail == NULL) {
+        queue->head = req;
+        queue->tail = req;
+    } else {
+        queue->tail->next = req;
+        queue->tail = req;
+    }
+
+    atomic_fetch_add(&queue->pending_count, 1);
+    atomic_fetch_add(&queue->total_enqueued, 1);
+
+    pthread_cond_signal(&queue->cond);
+    pthread_mutex_unlock(&queue->mutex);
+}
+
+static py_pool_request_t *queue_dequeue(py_pool_queue_t *queue, bool wait) {
+    pthread_mutex_lock(&queue->mutex);
+
+    while (queue->head == NULL) {
+        if (!wait) {
+            pthread_mutex_unlock(&queue->mutex);
+            return NULL;
+        }
+        pthread_cond_wait(&queue->cond, &queue->mutex);
+
+        /* Check for shutdown after wakeup */
+        if (g_pool.shutting_down) {
+            pthread_mutex_unlock(&queue->mutex);
+            return NULL;
+        }
+    }
+
+    py_pool_request_t *req = queue->head;
+    queue->head = req->next;
+    if (queue->head == NULL) {
+        queue->tail = NULL;
+    }
+    req->next = NULL;
+
+    atomic_fetch_sub(&queue->pending_count, 1);
+    pthread_mutex_unlock(&queue->mutex);
+
+    return req;
+}
+
+/* Wake up all workers waiting on the queue */
+static void queue_broadcast(py_pool_queue_t *queue) {
+    pthread_mutex_lock(&queue->mutex);
+    pthread_cond_broadcast(&queue->cond);
+    pthread_mutex_unlock(&queue->mutex);
+}
+
+/* ============================================================================
+ * Request Management
+ * ============================================================================ */
+
+static py_pool_request_t *py_pool_request_new(py_pool_request_type_t type,
+                                               ErlNifPid caller_pid) {
+    py_pool_request_t *req = enif_alloc(sizeof(py_pool_request_t));
+    if (req == NULL) {
+        return NULL;
+    }
+
+    memset(req, 0, sizeof(py_pool_request_t));
+    req->type = type;
+    req->caller_pid = caller_pid;
+    req->request_id = atomic_fetch_add(&g_pool.request_id_counter, 1);
+    req->msg_env = enif_alloc_env();
+
+    if (req->msg_env == NULL) {
+        enif_free(req);
+        return NULL;
+    }
+
+    return req;
+}
+
+static void py_pool_request_free(py_pool_request_t *req) {
+    if (req == NULL) {
+        return;
+    }
+
+    if (req->module_name) {
+        enif_free(req->module_name);
+    }
+    if (req->func_name) {
+        enif_free(req->func_name);
+    }
+    if (req->code) {
+        enif_free(req->code);
+    }
+    if (req->runner_name) {
+        enif_free(req->runner_name);
+    }
+    if (req->callable_name) {
+        enif_free(req->callable_name);
+    }
+    if (req->body_data) {
+        enif_free(req->body_data);
+    }
+    if (req->msg_env) {
+        enif_free_env(req->msg_env);
+    }
+
+    enif_free(req);
+}
+
+/* ============================================================================
+ * Module Caching
+ * ============================================================================ */
+
+static PyObject *py_pool_get_module(py_pool_worker_t *worker,
+                                     const char *module_name) {
+    /* Check cache first */
+    if (worker->module_cache != NULL) {
+        PyObject *key = PyUnicode_FromString(module_name);
+        if (key != NULL) {
+            PyObject *module = PyDict_GetItem(worker->module_cache, key);
+            Py_DECREF(key);
+            if (module != NULL) {
+                return module;  /* Borrowed reference */
+            }
+        }
+    }
+
+    /* Import module */
+    PyObject *module = PyImport_ImportModule(module_name);
+    if (module == NULL) {
+        return NULL;
+    }
+
+    /* Cache it */
+    if (worker->module_cache != NULL) {
+        PyObject *key = PyUnicode_FromString(module_name);
+        if (key != NULL) {
+            PyDict_SetItem(worker->module_cache, key, module);
+            Py_DECREF(key);
+        }
+    }
+
+    Py_DECREF(module);  /* Dict now owns it, return borrowed ref */
+    return PyDict_GetItemString(worker->module_cache, module_name);
+}
+
+static void py_pool_clear_module_cache(py_pool_worker_t *worker) {
+    if (worker->module_cache != NULL) {
+        PyDict_Clear(worker->module_cache);
+    }
+}
+
+/* ============================================================================
+ * Response Sending
+ * ============================================================================ */
+
+/* Debug: track sent responses */
+static _Atomic uint64_t g_responses_sent = 0;
+static _Atomic uint64_t g_responses_failed = 0;
+
+static void py_pool_send_response(py_pool_request_t *req, ERL_NIF_TERM result) {
+    /* Build message: {py_response, RequestId, Result} */
+    ERL_NIF_TERM request_id_term = enif_make_uint64(req->msg_env, req->request_id);
+    ERL_NIF_TERM msg = enif_make_tuple3(req->msg_env,
+                                         ATOM_PY_RESPONSE,
+                                         request_id_term,
+                                         result);
+
+    int send_result = enif_send(NULL, &req->caller_pid, req->msg_env, msg);
+    if (send_result) {
+        atomic_fetch_add(&g_responses_sent, 1);
+        /* IMPORTANT: enif_send consumes/invalidates the msg_env on success.
+         * Set to NULL to prevent double-free in py_pool_request_free. */
+        req->msg_env = NULL;
+    } else {
+        atomic_fetch_add(&g_responses_failed, 1);
+        fprintf(stderr, "[DEBUG] enif_send FAILED for req_id=%llu\n",
+                (unsigned long long)req->request_id);
+        /* On failure, msg_env is still valid and will be freed in request_free */
+    }
+}
+
+/* ============================================================================
+ * Request Processing - CALL/APPLY
+ * ============================================================================ */
+
+static ERL_NIF_TERM py_pool_process_call(py_pool_worker_t *worker,
+                                          py_pool_request_t *req) {
+    ErlNifEnv *env = req->msg_env;
+
+    /* Get module */
+    PyObject *module = py_pool_get_module(worker, req->module_name);
+    if (module == NULL) {
+        ERL_NIF_TERM err = make_py_error(env);
+        return err;
+    }
+
+    /* Get function */
+    PyObject *func = PyObject_GetAttrString(module, req->func_name);
+    if (func == NULL) {
+        ERL_NIF_TERM err = make_py_error(env);
+        return err;
+    }
+
+    /* Convert args to Python */
+    PyObject *args = term_to_py(env, req->args_term);
+    if (args == NULL) {
+        Py_DECREF(func);
+        return make_error(env, "args_conversion_failed");
+    }
+
+    /* Ensure args is a tuple */
+    if (!PyTuple_Check(args)) {
+        if (PyList_Check(args)) {
+            PyObject *tuple = PyList_AsTuple(args);
+            Py_DECREF(args);
+            args = tuple;
+        } else {
+            PyObject *tuple = PyTuple_Pack(1, args);
+            Py_DECREF(args);
+            args = tuple;
+        }
+    }
+
+    /* Call function */
+    PyObject *result = PyObject_Call(func, args, NULL);
+    Py_DECREF(func);
+    Py_DECREF(args);
+
+    if (result == NULL) {
+        return make_py_error(env);
+    }
+
+    /* Convert result to Erlang */
+    ERL_NIF_TERM result_term = py_to_term(env, result);
+    Py_DECREF(result);
+
+    return enif_make_tuple2(env, ATOM_OK, result_term);
+}
+
+static ERL_NIF_TERM py_pool_process_apply(py_pool_worker_t *worker,
+                                           py_pool_request_t *req) {
+    ErlNifEnv *env = req->msg_env;
+
+    /* Get module */
+    PyObject *module = py_pool_get_module(worker, req->module_name);
+    if (module == NULL) {
+        return make_py_error(env);
+    }
+
+    /* Get function */
+    PyObject *func = PyObject_GetAttrString(module, req->func_name);
+    if (func == NULL) {
+        return make_py_error(env);
+    }
+
+    /* Convert args to Python */
+    PyObject *args = term_to_py(env, req->args_term);
+    if (args == NULL) {
+        Py_DECREF(func);
+        return make_error(env, "args_conversion_failed");
+    }
+
+    /* Ensure args is a tuple */
+    if (!PyTuple_Check(args)) {
+        if (PyList_Check(args)) {
+            PyObject *tuple = PyList_AsTuple(args);
+            Py_DECREF(args);
+            args = tuple;
+        } else {
+            PyObject *tuple = PyTuple_Pack(1, args);
+            Py_DECREF(args);
+            args = tuple;
+        }
+    }
+
+    /* Convert kwargs to Python dict */
+    PyObject *kwargs = NULL;
+    if (enif_is_map(env, req->kwargs_term)) {
+        kwargs = term_to_py(env, req->kwargs_term);
+        if (kwargs != NULL && !PyDict_Check(kwargs)) {
+            Py_DECREF(kwargs);
+            kwargs = NULL;
+        }
+    }
+
+    /* Call function with kwargs */
+    PyObject *result = PyObject_Call(func, args, kwargs);
+    Py_DECREF(func);
+    Py_DECREF(args);
+    Py_XDECREF(kwargs);
+
+    if (result == NULL) {
+        return make_py_error(env);
+    }
+
+    /* Convert result to Erlang */
+    ERL_NIF_TERM result_term = py_to_term(env, result);
+    Py_DECREF(result);
+
+    return enif_make_tuple2(env, ATOM_OK, result_term);
+}
+
+/* ============================================================================
+ * Request Processing - EVAL/EXEC
+ * ============================================================================ */
+
+static ERL_NIF_TERM py_pool_process_eval(py_pool_worker_t *worker,
+                                          py_pool_request_t *req) {
+    ErlNifEnv *env = req->msg_env;
+
+    /* Compile code as expression */
+    PyObject *code = Py_CompileString(req->code, "<eval>", Py_eval_input);
+    if (code == NULL) {
+        return make_py_error(env);
+    }
+
+    /* Prepare locals if provided */
+    PyObject *locals = worker->locals;
+    /* Check if locals_term was set (non-zero) before checking if it's a map */
+    if (req->locals_term != 0 && enif_is_map(env, req->locals_term)) {
+        PyObject *new_locals = term_to_py(env, req->locals_term);
+        if (new_locals != NULL && PyDict_Check(new_locals)) {
+            /* Merge with existing locals */
+            PyDict_Update(locals, new_locals);
+            Py_DECREF(new_locals);
+        }
+    }
+
+    /* Evaluate */
+    PyObject *result = PyEval_EvalCode(code, worker->globals, locals);
+    Py_DECREF(code);
+
+    if (result == NULL) {
+        return make_py_error(env);
+    }
+
+    /* Convert result to Erlang */
+    ERL_NIF_TERM result_term = py_to_term(env, result);
+    Py_DECREF(result);
+
+    return enif_make_tuple2(env, ATOM_OK, result_term);
+}
+
+static ERL_NIF_TERM py_pool_process_exec(py_pool_worker_t *worker,
+                                          py_pool_request_t *req) {
+    ErlNifEnv *env = req->msg_env;
+
+    /* Compile code as statements */
+    PyObject *code = Py_CompileString(req->code, "<exec>", Py_file_input);
+    if (code == NULL) {
+        return make_py_error(env);
+    }
+
+    /* Execute */
+    PyObject *result = PyEval_EvalCode(code, worker->globals, worker->locals);
+    Py_DECREF(code);
+
+    if (result == NULL) {
+        return make_py_error(env);
+    }
+
+    Py_DECREF(result);
+    return enif_make_tuple2(env, ATOM_OK, ATOM_NONE);
+}
+
+/* ============================================================================
+ * Request Processing - ASGI
+ * ============================================================================ */
+
+static ERL_NIF_TERM py_pool_process_asgi(py_pool_worker_t *worker,
+                                          py_pool_request_t *req) {
+    ErlNifEnv *env = req->msg_env;
+
+    /* Get runner module */
+    PyObject *runner_module = py_pool_get_module(worker, req->runner_name);
+    if (runner_module == NULL) {
+        return make_py_error(env);
+    }
+
+    /* Get 'run_asgi' function from runner (hornbeam interface) */
+    PyObject *run_func = PyObject_GetAttrString(runner_module, "run_asgi");
+    if (run_func == NULL) {
+        return make_py_error(env);
+    }
+
+    /* Create module_name Python string */
+    PyObject *py_module_name = PyUnicode_FromString(req->module_name);
+    if (py_module_name == NULL) {
+        Py_DECREF(run_func);
+        return make_py_error(env);
+    }
+
+    /* Create callable_name Python string */
+    PyObject *py_callable_name = PyUnicode_FromString(req->callable_name);
+    if (py_callable_name == NULL) {
+        Py_DECREF(run_func);
+        Py_DECREF(py_module_name);
+        return make_py_error(env);
+    }
+
+    /* Build scope dict from Erlang term using optimized ASGI conversion */
+    PyObject *scope = asgi_scope_from_map(env, req->scope_term);
+    if (scope == NULL) {
+        Py_DECREF(run_func);
+        Py_DECREF(py_module_name);
+        Py_DECREF(py_callable_name);
+        return make_py_error(env);
+    }
+
+    /* Create body bytes from binary */
+    PyObject *body = PyBytes_FromStringAndSize((const char *)req->body_data,
+                                                req->body_len);
+    if (body == NULL) {
+        Py_DECREF(run_func);
+        Py_DECREF(py_module_name);
+        Py_DECREF(py_callable_name);
+        Py_DECREF(scope);
+        return make_py_error(env);
+    }
+
+    /* Call runner.run_asgi(module_name, callable_name, scope, body) */
+    PyObject *args = PyTuple_Pack(4, py_module_name, py_callable_name, scope, body);
+    Py_DECREF(py_module_name);
+    Py_DECREF(py_callable_name);
+    Py_DECREF(scope);
+    Py_DECREF(body);
+
+    if (args == NULL) {
+        Py_DECREF(run_func);
+        return make_py_error(env);
+    }
+
+    PyObject *result = PyObject_Call(run_func, args, NULL);
+    Py_DECREF(run_func);
+    Py_DECREF(args);
+
+    if (result == NULL) {
+        return make_py_error(env);
+    }
+
+    /* Extract ASGI response using optimized extraction (handles dict or tuple) */
+    ERL_NIF_TERM response = extract_asgi_response(env, result);
+    Py_DECREF(result);
+
+    return enif_make_tuple2(env, ATOM_OK, response);
+}
+
+/* ============================================================================
+ * Request Processing - WSGI
+ * ============================================================================ */
+
+static ERL_NIF_TERM py_pool_process_wsgi(py_pool_worker_t *worker,
+                                          py_pool_request_t *req) {
+    ErlNifEnv *env = req->msg_env;
+
+    /* Get app module */
+    PyObject *app_module = py_pool_get_module(worker, req->module_name);
+    if (app_module == NULL) {
+        return make_py_error(env);
+    }
+
+    /* Get WSGI callable */
+    PyObject *app_callable = PyObject_GetAttrString(app_module, req->callable_name);
+    if (app_callable == NULL) {
+        return make_py_error(env);
+    }
+
+    /* Build environ dict */
+    PyObject *environ = term_to_py(env, req->environ_term);
+    if (environ == NULL || !PyDict_Check(environ)) {
+        Py_DECREF(app_callable);
+        Py_XDECREF(environ);
+        return make_error(env, "invalid_environ");
+    }
+
+    /* Create start_response callable */
+    /* For simplicity, use a list to collect status/headers */
+    PyObject *response_started = PyList_New(0);
+    if (response_started == NULL) {
+        Py_DECREF(app_callable);
+        Py_DECREF(environ);
+        return make_py_error(env);
+    }
+
+    /* For now, return error asking to use ASGI instead */
+    /* Full WSGI implementation would need a proper start_response callable */
+    Py_DECREF(app_callable);
+    Py_DECREF(environ);
+    Py_DECREF(response_started);
+
+    return make_error(env, "wsgi_not_fully_implemented_use_asgi");
+}
+
+/* ============================================================================
+ * Request Processing Dispatcher
+ * ============================================================================ */
+
+static void py_pool_process_request(py_pool_worker_t *worker,
+                                     py_pool_request_t *req) {
+    uint64_t start_ns = get_monotonic_ns();
+    ERL_NIF_TERM result;
+
+    switch (req->type) {
+        case PY_POOL_REQ_CALL:
+            result = py_pool_process_call(worker, req);
+            break;
+        case PY_POOL_REQ_APPLY:
+            result = py_pool_process_apply(worker, req);
+            break;
+        case PY_POOL_REQ_EVAL:
+            result = py_pool_process_eval(worker, req);
+            break;
+        case PY_POOL_REQ_EXEC:
+            result = py_pool_process_exec(worker, req);
+            break;
+        case PY_POOL_REQ_ASGI:
+            result = py_pool_process_asgi(worker, req);
+            break;
+        case PY_POOL_REQ_WSGI:
+            result = py_pool_process_wsgi(worker, req);
+            break;
+        case PY_POOL_REQ_SHUTDOWN:
+            /* Shutdown handled by worker thread */
+            return;
+        default:
+            result = make_error(req->msg_env, "unknown_request_type");
+            break;
+    }
+
+    /* Send response */
+    py_pool_send_response(req, result);
+
+    /* Update stats */
+    uint64_t elapsed_ns = get_monotonic_ns() - start_ns;
+    atomic_fetch_add(&worker->requests_processed, 1);
+    atomic_fetch_add(&worker->total_processing_ns, elapsed_ns);
+}
+
+/* ============================================================================
+ * Worker Thread
+ * ============================================================================ */
+
+static int worker_init_python_state(py_pool_worker_t *worker) {
+#ifdef HAVE_SUBINTERPRETERS
+    if (g_pool.use_subinterpreters) {
+        /* Create sub-interpreter with its own GIL */
+        PyInterpreterConfig config = {
+            .use_main_obmalloc = 0,
+            .allow_fork = 0,
+            .allow_exec = 0,
+            .allow_threads = 1,
+            .allow_daemon_threads = 0,
+            .check_multi_interp_extensions = 1,
+            .gil = PyInterpreterConfig_OWN_GIL,
+        };
+
+        PyStatus status = Py_NewInterpreterFromConfig(&worker->tstate, &config);
+        if (PyStatus_Exception(status)) {
+            return -1;
+        }
+
+        worker->interp = PyThreadState_GetInterpreter(worker->tstate);
+
+        /* Initialize event loop for this subinterpreter */
+        if (init_subinterpreter_event_loop(NULL) < 0) {
+            return -1;
+        }
+    } else
+#endif
+    {
+        /* Non-subinterpreter mode: acquire GIL */
+        gil_guard_t guard = gil_acquire();
+        (void)guard;  /* Will be released when worker exits */
+    }
+
+    /* Create per-worker state */
+    worker->module_cache = PyDict_New();
+    worker->globals = PyDict_New();
+    worker->locals = PyDict_New();
+
+    if (worker->module_cache == NULL ||
+        worker->globals == NULL ||
+        worker->locals == NULL) {
+        Py_XDECREF(worker->module_cache);
+        Py_XDECREF(worker->globals);
+        Py_XDECREF(worker->locals);
+        return -1;
+    }
+
+    /* Add builtins to globals */
+    PyObject *builtins = PyEval_GetBuiltins();
+    if (builtins != NULL) {
+        PyDict_SetItemString(worker->globals, "__builtins__", builtins);
+    }
+
+    /* Initialize ASGI state for this interpreter */
+    worker->asgi_state = get_asgi_interp_state();
+
+    return 0;
+}
+
+static void worker_cleanup_python_state(py_pool_worker_t *worker) {
+    Py_XDECREF(worker->module_cache);
+    Py_XDECREF(worker->globals);
+    Py_XDECREF(worker->locals);
+    worker->module_cache = NULL;
+    worker->globals = NULL;
+    worker->locals = NULL;
+
+#ifdef HAVE_SUBINTERPRETERS
+    if (g_pool.use_subinterpreters && worker->tstate != NULL) {
+        Py_EndInterpreter(worker->tstate);
+        worker->tstate = NULL;
+        worker->interp = NULL;
+    }
+#endif
+}
+
+static void *py_pool_worker_thread(void *arg) {
+    py_pool_worker_t *worker = (py_pool_worker_t *)arg;
+
+    /* Initialize Python state */
+    gil_guard_t guard = {0};
+
+#ifdef HAVE_SUBINTERPRETERS
+    if (g_pool.use_subinterpreters) {
+        /* Acquire GIL in main interpreter first */
+        guard = gil_acquire();
+
+        /* Create sub-interpreter */
+        PyInterpreterConfig config = {
+            .use_main_obmalloc = 0,
+            .allow_fork = 0,
+            .allow_exec = 0,
+            .allow_threads = 1,
+            .allow_daemon_threads = 0,
+            .check_multi_interp_extensions = 1,
+            .gil = PyInterpreterConfig_OWN_GIL,
+        };
+
+        PyStatus status = Py_NewInterpreterFromConfig(&worker->tstate, &config);
+        if (PyStatus_Exception(status)) {
+            gil_release(guard);
+            worker->running = false;
+            return NULL;
+        }
+
+        worker->interp = PyThreadState_GetInterpreter(worker->tstate);
+
+        /* Initialize event loop for this subinterpreter */
+        if (init_subinterpreter_event_loop(NULL) < 0) {
+            gil_release(guard);
+            worker->running = false;
+            return NULL;
+        }
+
+        /* Release main GIL - we now have our own */
+        gil_release(guard);
+
+        /* We're now attached to our sub-interpreter */
+    } else
+#endif
+    {
+        /* Non-subinterpreter mode: acquire the shared GIL */
+        guard = gil_acquire();
+    }
+
+    /* Create per-worker state */
+    worker->module_cache = PyDict_New();
+    worker->globals = PyDict_New();
+    worker->locals = PyDict_New();
+
+    if (worker->module_cache == NULL ||
+        worker->globals == NULL ||
+        worker->locals == NULL) {
+        goto cleanup;
+    }
+
+    /* Add builtins to globals */
+    PyObject *builtins = PyEval_GetBuiltins();
+    if (builtins != NULL) {
+        PyDict_SetItemString(worker->globals, "__builtins__", builtins);
+    }
+
+    /* Initialize ASGI state for this interpreter */
+    worker->asgi_state = get_asgi_interp_state();
+
+    worker->running = true;
+
+    /* Main processing loop */
+    while (!worker->shutdown) {
+        py_pool_request_t *req = NULL;
+
+#ifdef HAVE_SUBINTERPRETERS
+        if (g_pool.use_subinterpreters) {
+            /* Subinterpreter mode: we own our GIL, just dequeue and process */
+            req = queue_dequeue(&g_pool.queue, true);
+        } else
+#endif
+        {
+            /* Release GIL while waiting for work */
+            Py_BEGIN_ALLOW_THREADS
+            req = queue_dequeue(&g_pool.queue, true);
+            Py_END_ALLOW_THREADS
+        }
+
+        if (req == NULL || req->type == PY_POOL_REQ_SHUTDOWN) {
+            if (req != NULL) {
+                py_pool_request_free(req);
+            }
+            break;
+        }
+
+        /* Process with GIL held (or in subinterpreter with own GIL) */
+        py_pool_process_request(worker, req);
+        py_pool_request_free(req);
+    }
+
+cleanup:
+    /* Clean up Python state */
+    Py_XDECREF(worker->module_cache);
+    Py_XDECREF(worker->globals);
+    Py_XDECREF(worker->locals);
+    worker->module_cache = NULL;
+    worker->globals = NULL;
+    worker->locals = NULL;
+
+#ifdef HAVE_SUBINTERPRETERS
+    if (g_pool.use_subinterpreters && worker->tstate != NULL) {
+        Py_EndInterpreter(worker->tstate);
+        worker->tstate = NULL;
+        worker->interp = NULL;
+    } else
+#endif
+    {
+        gil_release(guard);
+    }
+
+    worker->running = false;
+    return NULL;
+}
+
+/* ============================================================================
+ * Pool Lifecycle
+ * ============================================================================ */
+
+static int py_pool_init(int num_workers) {
+    if (g_pool.initialized) {
+        return 0;  /* Already initialized */
+    }
+
+    /* Determine number of workers */
+    if (num_workers <= 0) {
+        /* Auto-detect: use number of CPUs */
+        long ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+        num_workers = (ncpus > 0) ? (int)ncpus : 4;
+    }
+    if (num_workers > POOL_MAX_WORKERS) {
+        num_workers = POOL_MAX_WORKERS;
+    }
+
+    /* Detect execution mode */
+#ifdef HAVE_FREE_THREADED
+    g_pool.free_threaded = true;
+    g_pool.use_subinterpreters = false;
+#elif defined(HAVE_SUBINTERPRETERS)
+    g_pool.free_threaded = false;
+    g_pool.use_subinterpreters = true;
+#else
+    g_pool.free_threaded = false;
+    g_pool.use_subinterpreters = false;
+#endif
+
+    /* Initialize queue */
+    queue_init(&g_pool.queue);
+
+    /* Initialize workers */
+    g_pool.num_workers = num_workers;
+    for (int i = 0; i < num_workers; i++) {
+        py_pool_worker_t *worker = &g_pool.workers[i];
+        memset(worker, 0, sizeof(py_pool_worker_t));
+        worker->worker_id = i;
+        worker->shutdown = false;
+        atomic_store(&worker->requests_processed, 0);
+        atomic_store(&worker->total_processing_ns, 0);
+    }
+
+    /* Start worker threads */
+    for (int i = 0; i < num_workers; i++) {
+        py_pool_worker_t *worker = &g_pool.workers[i];
+        int rc = pthread_create(&worker->thread, NULL,
+                                py_pool_worker_thread, worker);
+        if (rc != 0) {
+            /* Failed to create thread - shut down already created ones */
+            g_pool.shutting_down = true;
+            queue_broadcast(&g_pool.queue);
+            for (int j = 0; j < i; j++) {
+                pthread_join(g_pool.workers[j].thread, NULL);
+            }
+            queue_destroy(&g_pool.queue);
+            return -1;
+        }
+    }
+
+    /* Wait for workers to start */
+    for (int i = 0; i < num_workers; i++) {
+        while (!g_pool.workers[i].running && !g_pool.workers[i].shutdown) {
+            usleep(1000);  /* 1ms */
+        }
+    }
+
+    g_pool.initialized = true;
+    return 0;
+}
+
+static void py_pool_shutdown(void) {
+    if (!g_pool.initialized) {
+        return;
+    }
+
+    g_pool.shutting_down = true;
+
+    /* Send shutdown requests to all workers */
+    for (int i = 0; i < g_pool.num_workers; i++) {
+        g_pool.workers[i].shutdown = true;
+
+        /* Enqueue shutdown request to wake up workers */
+        py_pool_request_t *shutdown_req = py_pool_request_new(
+            PY_POOL_REQ_SHUTDOWN, (ErlNifPid){0});
+        if (shutdown_req != NULL) {
+            queue_enqueue(&g_pool.queue, shutdown_req);
+        }
+    }
+
+    /* Wake up all waiting workers */
+    queue_broadcast(&g_pool.queue);
+
+    /* Wait for workers to terminate */
+    for (int i = 0; i < g_pool.num_workers; i++) {
+        pthread_join(g_pool.workers[i].thread, NULL);
+    }
+
+    /* Drain and free remaining requests */
+    py_pool_request_t *req;
+    while ((req = queue_dequeue(&g_pool.queue, false)) != NULL) {
+        /* Send error response for abandoned requests */
+        if (req->type != PY_POOL_REQ_SHUTDOWN && req->msg_env != NULL) {
+            ERL_NIF_TERM error = make_error(req->msg_env, "pool_shutdown");
+            py_pool_send_response(req, error);
+        }
+        py_pool_request_free(req);
+    }
+
+    queue_destroy(&g_pool.queue);
+    g_pool.initialized = false;
+    g_pool.shutting_down = false;
+}
+
+static bool py_pool_is_initialized(void) {
+    return g_pool.initialized;
+}
+
+static int py_pool_enqueue(py_pool_request_t *req) {
+    if (!g_pool.initialized || g_pool.shutting_down) {
+        return -1;
+    }
+
+    queue_enqueue(&g_pool.queue, req);
+    return 0;
+}
+
+/* ============================================================================
+ * Statistics
+ * ============================================================================ */
+
+static void py_pool_get_stats(py_pool_stats_t *stats) {
+    memset(stats, 0, sizeof(py_pool_stats_t));
+
+    stats->num_workers = g_pool.num_workers;
+    stats->initialized = g_pool.initialized;
+    stats->use_subinterpreters = g_pool.use_subinterpreters;
+    stats->free_threaded = g_pool.free_threaded;
+    stats->pending_count = atomic_load(&g_pool.queue.pending_count);
+    stats->total_enqueued = atomic_load(&g_pool.queue.total_enqueued);
+
+    for (int i = 0; i < g_pool.num_workers && i < POOL_MAX_WORKERS; i++) {
+        stats->worker_stats[i].requests_processed =
+            atomic_load(&g_pool.workers[i].requests_processed);
+        stats->worker_stats[i].total_processing_ns =
+            atomic_load(&g_pool.workers[i].total_processing_ns);
+    }
+}
+
+/* ============================================================================
+ * NIF Functions
+ * ============================================================================ */
+
+static ERL_NIF_TERM nif_pool_start(ErlNifEnv *env, int argc,
+                                    const ERL_NIF_TERM argv[]) {
+    if (argc != 1) {
+        return enif_make_badarg(env);
+    }
+
+    int num_workers;
+    if (!enif_get_int(env, argv[0], &num_workers)) {
+        return enif_make_badarg(env);
+    }
+
+    if (py_pool_init(num_workers) != 0) {
+        return make_error(env, "failed_to_start_pool");
+    }
+
+    return ATOM_OK;
+}
+
+static ERL_NIF_TERM nif_pool_stop(ErlNifEnv *env, int argc,
+                                   const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+
+    py_pool_shutdown();
+    return ATOM_OK;
+}
+
+static ERL_NIF_TERM nif_pool_submit(ErlNifEnv *env, int argc,
+                                     const ERL_NIF_TERM argv[]) {
+    if (argc != 5) {
+        return enif_make_badarg(env);
+    }
+
+    if (!g_pool.initialized) {
+        return make_error(env, "pool_not_started");
+    }
+
+    /* Get request type atom */
+    char type_buf[32];
+    if (!enif_get_atom(env, argv[0], type_buf, sizeof(type_buf), ERL_NIF_LATIN1)) {
+        return enif_make_badarg(env);
+    }
+
+    py_pool_request_type_t type;
+    if (strcmp(type_buf, "call") == 0) {
+        type = PY_POOL_REQ_CALL;
+    } else if (strcmp(type_buf, "apply") == 0) {
+        type = PY_POOL_REQ_APPLY;
+    } else if (strcmp(type_buf, "eval") == 0) {
+        type = PY_POOL_REQ_EVAL;
+    } else if (strcmp(type_buf, "exec") == 0) {
+        type = PY_POOL_REQ_EXEC;
+    } else if (strcmp(type_buf, "asgi") == 0) {
+        type = PY_POOL_REQ_ASGI;
+    } else if (strcmp(type_buf, "wsgi") == 0) {
+        type = PY_POOL_REQ_WSGI;
+    } else {
+        return make_error(env, "unknown_request_type");
+    }
+
+    /* Get caller PID */
+    ErlNifPid caller_pid;
+    if (!enif_self(env, &caller_pid)) {
+        return make_error(env, "cannot_get_self_pid");
+    }
+
+    /* Create request */
+    py_pool_request_t *req = py_pool_request_new(type, caller_pid);
+    if (req == NULL) {
+        return make_error(env, "request_allocation_failed");
+    }
+
+    /* Parse arguments based on type */
+    switch (type) {
+        case PY_POOL_REQ_CALL:
+        case PY_POOL_REQ_APPLY: {
+            /* argv[1] = Module, argv[2] = Func, argv[3] = Args, argv[4] = Kwargs/undefined */
+            ErlNifBinary module_bin, func_bin;
+            if (!enif_inspect_binary(env, argv[1], &module_bin) ||
+                !enif_inspect_binary(env, argv[2], &func_bin)) {
+                py_pool_request_free(req);
+                return enif_make_badarg(env);
+            }
+
+            req->module_name = enif_alloc(module_bin.size + 1);
+            req->func_name = enif_alloc(func_bin.size + 1);
+            if (req->module_name == NULL || req->func_name == NULL) {
+                py_pool_request_free(req);
+                return make_error(env, "allocation_failed");
+            }
+
+            memcpy(req->module_name, module_bin.data, module_bin.size);
+            req->module_name[module_bin.size] = '\0';
+            memcpy(req->func_name, func_bin.data, func_bin.size);
+            req->func_name[func_bin.size] = '\0';
+
+            req->args_term = enif_make_copy(req->msg_env, argv[3]);
+
+            if (type == PY_POOL_REQ_APPLY && !enif_is_atom(env, argv[4])) {
+                req->kwargs_term = enif_make_copy(req->msg_env, argv[4]);
+            }
+            break;
+        }
+
+        case PY_POOL_REQ_EVAL:
+        case PY_POOL_REQ_EXEC: {
+            /* argv[1] = Code, argv[2-4] = unused */
+            ErlNifBinary code_bin;
+            if (!enif_inspect_binary(env, argv[1], &code_bin)) {
+                py_pool_request_free(req);
+                return enif_make_badarg(env);
+            }
+
+            req->code = enif_alloc(code_bin.size + 1);
+            if (req->code == NULL) {
+                py_pool_request_free(req);
+                return make_error(env, "allocation_failed");
+            }
+
+            memcpy(req->code, code_bin.data, code_bin.size);
+            req->code[code_bin.size] = '\0';
+
+            if (!enif_is_atom(env, argv[2])) {
+                req->locals_term = enif_make_copy(req->msg_env, argv[2]);
+            }
+            break;
+        }
+
+        case PY_POOL_REQ_ASGI: {
+            /* argv[1] = Runner, argv[2] = Module, argv[3] = Callable, argv[4] = {Scope, Body} */
+            ErlNifBinary runner_bin, module_bin, callable_bin;
+            if (!enif_inspect_binary(env, argv[1], &runner_bin) ||
+                !enif_inspect_binary(env, argv[2], &module_bin) ||
+                !enif_inspect_binary(env, argv[3], &callable_bin)) {
+                py_pool_request_free(req);
+                return enif_make_badarg(env);
+            }
+
+            req->runner_name = enif_alloc(runner_bin.size + 1);
+            req->module_name = enif_alloc(module_bin.size + 1);
+            req->callable_name = enif_alloc(callable_bin.size + 1);
+            if (req->runner_name == NULL || req->module_name == NULL ||
+                req->callable_name == NULL) {
+                py_pool_request_free(req);
+                return make_error(env, "allocation_failed");
+            }
+
+            memcpy(req->runner_name, runner_bin.data, runner_bin.size);
+            req->runner_name[runner_bin.size] = '\0';
+            memcpy(req->module_name, module_bin.data, module_bin.size);
+            req->module_name[module_bin.size] = '\0';
+            memcpy(req->callable_name, callable_bin.data, callable_bin.size);
+            req->callable_name[callable_bin.size] = '\0';
+
+            /* Parse {Scope, Body} tuple */
+            int arity;
+            const ERL_NIF_TERM *tuple;
+            if (!enif_get_tuple(env, argv[4], &arity, &tuple) || arity != 2) {
+                py_pool_request_free(req);
+                return enif_make_badarg(env);
+            }
+
+            req->scope_term = enif_make_copy(req->msg_env, tuple[0]);
+
+            ErlNifBinary body_bin;
+            if (enif_inspect_binary(env, tuple[1], &body_bin)) {
+                req->body_data = enif_alloc(body_bin.size);
+                if (req->body_data == NULL) {
+                    py_pool_request_free(req);
+                    return make_error(env, "allocation_failed");
+                }
+                memcpy(req->body_data, body_bin.data, body_bin.size);
+                req->body_len = body_bin.size;
+            } else {
+                req->body_data = NULL;
+                req->body_len = 0;
+            }
+            break;
+        }
+
+        case PY_POOL_REQ_WSGI: {
+            /* argv[1] = Module, argv[2] = Callable, argv[3] = Environ, argv[4] = unused */
+            ErlNifBinary module_bin, callable_bin;
+            if (!enif_inspect_binary(env, argv[1], &module_bin) ||
+                !enif_inspect_binary(env, argv[2], &callable_bin)) {
+                py_pool_request_free(req);
+                return enif_make_badarg(env);
+            }
+
+            req->module_name = enif_alloc(module_bin.size + 1);
+            req->callable_name = enif_alloc(callable_bin.size + 1);
+            if (req->module_name == NULL || req->callable_name == NULL) {
+                py_pool_request_free(req);
+                return make_error(env, "allocation_failed");
+            }
+
+            memcpy(req->module_name, module_bin.data, module_bin.size);
+            req->module_name[module_bin.size] = '\0';
+            memcpy(req->callable_name, callable_bin.data, callable_bin.size);
+            req->callable_name[callable_bin.size] = '\0';
+
+            req->environ_term = enif_make_copy(req->msg_env, argv[3]);
+            break;
+        }
+
+        default:
+            py_pool_request_free(req);
+            return make_error(env, "unknown_request_type");
+    }
+
+    /* IMPORTANT: Save request_id BEFORE enqueueing.
+     * Once enqueued, a worker can process and free the request at any time.
+     * Accessing req->request_id after enqueue is use-after-free. */
+    uint64_t request_id = req->request_id;
+
+    /* Enqueue request */
+    if (py_pool_enqueue(req) != 0) {
+        py_pool_request_free(req);
+        return make_error(env, "enqueue_failed");
+    }
+
+    /* Return {ok, RequestId} - using saved ID to avoid use-after-free */
+    return enif_make_tuple2(env, ATOM_OK,
+                            enif_make_uint64(env, request_id));
+}
+
+static ERL_NIF_TERM nif_pool_stats(ErlNifEnv *env, int argc,
+                                    const ERL_NIF_TERM argv[]) {
+    (void)argc;
+    (void)argv;
+
+    py_pool_stats_t stats;
+    py_pool_get_stats(&stats);
+
+    /* Build result map */
+    ERL_NIF_TERM keys[8], values[8];
+
+    keys[0] = enif_make_atom(env, "num_workers");
+    values[0] = enif_make_int(env, stats.num_workers);
+
+    keys[1] = enif_make_atom(env, "initialized");
+    values[1] = stats.initialized ? ATOM_TRUE : ATOM_FALSE;
+
+    keys[2] = enif_make_atom(env, "use_subinterpreters");
+    values[2] = stats.use_subinterpreters ? ATOM_TRUE : ATOM_FALSE;
+
+    keys[3] = enif_make_atom(env, "free_threaded");
+    values[3] = stats.free_threaded ? ATOM_TRUE : ATOM_FALSE;
+
+    keys[4] = enif_make_atom(env, "pending_count");
+    values[4] = enif_make_uint64(env, stats.pending_count);
+
+    keys[5] = enif_make_atom(env, "total_enqueued");
+    values[5] = enif_make_uint64(env, stats.total_enqueued);
+
+    keys[6] = enif_make_atom(env, "responses_sent");
+    values[6] = enif_make_uint64(env, atomic_load(&g_responses_sent));
+
+    keys[7] = enif_make_atom(env, "responses_failed");
+    values[7] = enif_make_uint64(env, atomic_load(&g_responses_failed));
+
+    ERL_NIF_TERM result;
+    enif_make_map_from_arrays(env, keys, values, 8, &result);
+
+    return result;
+}
+
+/* Initialize pool-specific atoms */
+static int pool_atoms_init(ErlNifEnv *env) {
+    ATOM_PY_RESPONSE = enif_make_atom(env, "py_response");
+    return 0;
+}
diff --git a/c_src/py_worker_pool.h b/c_src/py_worker_pool.h
new file mode 100644
index 0000000..09e5a71
--- /dev/null
+++ b/c_src/py_worker_pool.h
@@ -0,0 +1,561 @@
+/*
+ * Copyright 2026 Benoit Chesneau
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * @file py_worker_pool.h
+ * @brief Worker thread pool for Python operations
+ * @author Benoit Chesneau
+ *
+ * @section overview Overview
+ *
+ * This module implements a general-purpose worker thread pool for ALL Python
+ * calls (ASGI, WSGI, py:call, py:eval). Each worker has its own subinterpreter
+ * (Python 3.12+) or dedicated GIL-holding thread, processing requests from a
+ * shared queue.
+ *
+ * @section architecture Architecture
+ *
+ * ```
+ * Erlang Processes          Lock-free Queue       Python Workers
+ *      [P1]--enqueue--+     +---------+          +-------------------------+
+ *      [P2]--enqueue--+---->| Request |<--poll---| Worker 0 (Subinterp+GIL)|
+ *      [P3]--enqueue--+     |  Queue  |          |  - Holds GIL            |
+ *      ...            |     | (MPSC)  |<--poll---| Worker 1 (Subinterp+GIL)|
+ *      [PN]--enqueue--+     +---------+          +-------------------------+
+ * ```
+ *
+ * @section benefits Key Benefits
+ *
+ * - No GIL acquire/release per request (workers hold GIL)
+ * - Module/callable cached per worker (no reimport)
+ * - True parallelism with subinterpreters (each has OWN_GIL)
+ *
+ * @section modes Python Mode Support
+ *
+ * | Mode | Python Version | Strategy |
+ * |------|----------------|----------|
+ * | FREE_THREADED | 3.13+ (no-GIL) | N workers, no GIL needed |
+ * | SUBINTERP | 3.12+ | N subinterpreters, each OWN_GIL |
+ * | FALLBACK | <3.12 | N workers share GIL, batching reduces overhead |
+ */
+
+#ifndef PY_WORKER_POOL_H
+#define PY_WORKER_POOL_H
+
+#include "py_nif.h"
+#include "py_asgi.h"
+#include "py_wsgi.h"
+
+/* ============================================================================
+ * Configuration
+ * ============================================================================ */
+
+/**
+ * @def POOL_MAX_WORKERS
+ * @brief Maximum number of workers in the pool
+ */
+#define POOL_MAX_WORKERS 32
+
+/**
+ * @def POOL_QUEUE_SIZE
+ * @brief Size of the request queue (power of 2 for efficient modulo)
+ */
+#define POOL_QUEUE_SIZE 4096
+
+/**
+ * @def POOL_DEFAULT_WORKERS
+ * @brief Default number of workers (0 = use CPU count)
+ */
+#define POOL_DEFAULT_WORKERS 0
+
+/* ============================================================================
+ * Request Types
+ * ============================================================================ */
+
+/**
+ * @enum py_pool_request_type_t
+ * @brief Types of requests that can be submitted to the worker pool
+ */
+typedef enum {
+    PY_POOL_REQ_CALL,      /**< py:call(Module, Func, Args) */
+    PY_POOL_REQ_APPLY,     /**< py:apply(Module, Func, Args, Kwargs) */
+    PY_POOL_REQ_EVAL,      /**< py:eval(Code) */
+    PY_POOL_REQ_EXEC,      /**< py:exec(Code) */
+    PY_POOL_REQ_ASGI,      /**< ASGI request */
+    PY_POOL_REQ_WSGI,      /**< WSGI request */
+    PY_POOL_REQ_SHUTDOWN   /**< Shutdown signal */
+} py_pool_request_type_t;
+
+/* ============================================================================
+ * Request Structure
+ * ============================================================================ */
+
+/**
+ * @struct py_pool_request_t
+ * @brief Request submitted to the worker pool
+ *
+ * Contains all information needed to process a Python operation.
+ * The result is sent back to the caller via enif_send().
+ */
+typedef struct py_pool_request {
+    /** @brief Unique request ID for correlation */
+    uint64_t request_id;
+
+    /** @brief Type of operation to perform */
+    py_pool_request_type_t type;
+
+    /** @brief PID of the calling Erlang process */
+    ErlNifPid caller_pid;
+
+    /** @brief Environment for building result terms (thread-safe copy) */
+    ErlNifEnv *msg_env;
+
+    /* ========== CALL/APPLY parameters ========== */
+
+    /** @brief Module name (heap-allocated, NULL-terminated) */
+    char *module_name;
+
+    /** @brief Function name (heap-allocated, NULL-terminated) */
+    char *func_name;
+
+    /** @brief Arguments list term (copied to msg_env) */
+    ERL_NIF_TERM args_term;
+
+    /** @brief Keyword arguments map term (copied to msg_env, optional) */
+    ERL_NIF_TERM kwargs_term;
+
+    /* ========== EVAL/EXEC parameters ========== */
+
+    /** @brief Python code to evaluate/execute (heap-allocated, NULL-terminated) */
+    char *code;
+
+    /** @brief Local variables for eval (copied to msg_env) */
+    ERL_NIF_TERM locals_term;
+
+    /* ========== ASGI/WSGI parameters ========== */
+
+    /** @brief Runner module name for ASGI (heap-allocated) */
+    char *runner_name;
+
+    /** @brief ASGI callable name (heap-allocated) */
+    char *callable_name;
+
+    /** @brief ASGI scope term (copied to msg_env) */
+    ERL_NIF_TERM scope_term;
+
+    /** @brief Request body binary data */
+    unsigned char *body_data;
+
+    /** @brief Body data length */
+    size_t body_len;
+
+    /* ========== WSGI-specific parameters ========== */
+
+    /** @brief WSGI environ term (copied to msg_env) */
+    ERL_NIF_TERM environ_term;
+
+    /* ========== Timeout ========== */
+
+    /** @brief Timeout in milliseconds (0 = no timeout) */
+    unsigned long timeout_ms;
+
+    /* ========== Queue linkage ========== */
+
+    /** @brief Next request in queue (for linked list) */
+    struct py_pool_request *next;
+} py_pool_request_t;
+
+/* ============================================================================
+ * Worker Structure
+ * ============================================================================ */
+
+/**
+ * @struct py_pool_worker_t
+ * @brief Single worker thread in the pool
+ *
+ * Each worker runs in its own thread and optionally has its own
+ * subinterpreter (Python 3.12+) for true parallelism.
+ */
+typedef struct {
+    /** @brief Worker thread handle */
+    pthread_t thread;
+
+    /** @brief Worker ID (0 to num_workers-1) */
+    int worker_id;
+
+    /** @brief Flag: worker is running */
+    volatile bool running;
+
+    /** @brief Flag: worker should shut down */
+    volatile bool shutdown;
+
+#ifdef HAVE_SUBINTERPRETERS
+    /** @brief Python interpreter for this worker */
+    PyInterpreterState *interp;
+
+    /** @brief Thread state in this interpreter */
+    PyThreadState *tstate;
+#endif
+
+    /* ========== Cached state per worker ========== */
+
+    /** @brief Module cache (Dict: module_name -> PyModule) */
+    PyObject *module_cache;
+
+    /** @brief Global namespace for eval/exec */
+    PyObject *globals;
+
+    /** @brief Local namespace for eval/exec */
+    PyObject *locals;
+
+    /* ========== ASGI/WSGI state ========== */
+
+    /** @brief Per-worker ASGI state (interned keys, etc.) */
+    asgi_interp_state_t *asgi_state;
+
+    /* ========== Statistics ========== */
+
+    /** @brief Total requests processed by this worker */
+    _Atomic uint64_t requests_processed;
+
+    /** @brief Total processing time in nanoseconds */
+    _Atomic uint64_t total_processing_ns;
+} py_pool_worker_t;
+
+/* ============================================================================
+ * Request Queue Structure
+ * ============================================================================ */
+
+/**
+ * @struct py_pool_queue_t
+ * @brief MPSC (Multi-Producer Single-Consumer) queue for requests
+ *
+ * Uses a simple linked list with mutex protection. Workers dequeue
+ * using condition variable waits.
+ */
+typedef struct {
+    /** @brief Queue head (oldest request) */
+    py_pool_request_t *head;
+
+    /** @brief Queue tail (newest request) */
+    py_pool_request_t *tail;
+
+    /** @brief Number of pending requests */
+    _Atomic uint64_t pending_count;
+
+    /** @brief Total requests enqueued */
+    _Atomic uint64_t total_enqueued;
+
+    /** @brief Mutex protecting the queue */
+    pthread_mutex_t mutex;
+
+    /** @brief Condition variable for worker notification */
+    pthread_cond_t cond;
+} py_pool_queue_t;
+
+/* ============================================================================
+ * Worker Pool Structure
+ * ============================================================================ */
+
+/**
+ * @struct py_worker_pool_t
+ * @brief The main worker pool structure
+ */
+typedef struct {
+    /** @brief Array of workers */
+    py_pool_worker_t workers[POOL_MAX_WORKERS];
+
+    /** @brief Number of active workers */
+    int num_workers;
+
+    /** @brief Request queue */
+    py_pool_queue_t queue;
+
+    /** @brief Flag: pool is initialized */
+    volatile bool initialized;
+
+    /** @brief Flag: pool is shutting down */
+    volatile bool shutting_down;
+
+    /** @brief Request ID counter */
+    _Atomic uint64_t request_id_counter;
+
+    /** @brief Mode: use subinterpreters */
+    bool use_subinterpreters;
+
+    /** @brief Mode: free-threaded Python */
+    bool free_threaded;
+} py_worker_pool_t;
+
+/* ============================================================================
+ * Global Pool Instance
+ * ============================================================================ */
+
+/** @brief Global worker pool instance */
+extern py_worker_pool_t g_pool;
+
+/* ============================================================================
+ * Pool Lifecycle Functions
+ * ============================================================================ */
+
+/**
+ * @brief Initialize the worker pool
+ *
+ * Creates and starts num_workers worker threads. If num_workers is 0,
+ * uses the number of CPU cores.
+ *
+ * @param num_workers Number of workers (0 = auto-detect CPU count)
+ * @return 0 on success, -1 on failure
+ */
+static int py_pool_init(int num_workers);
+
+/**
+ * @brief Shut down the worker pool
+ *
+ * Signals all workers to stop and waits for them to terminate.
+ * Processes any remaining requests with error responses.
+ */
+static void py_pool_shutdown(void);
+
+/**
+ * @brief Check if pool is initialized
+ *
+ * @return true if pool is ready to accept requests
+ */
+static bool py_pool_is_initialized(void);
+
+/* ============================================================================
+ * Request Submission Functions
+ * ============================================================================ */
+
+/**
+ * @brief Submit a request to the pool
+ *
+ * Thread-safe enqueue operation. The request is processed by an
+ * available worker and the result is sent to caller_pid.
+ *
+ * @param req Request to submit (ownership transferred to pool)
+ * @return 0 on success, -1 if pool not initialized
+ */
+static int py_pool_enqueue(py_pool_request_t *req);
+
+/**
+ * @brief Create a new pool request
+ *
+ * Allocates and initializes a request structure.
+ *
+ * @param type Request type
+ * @param caller_pid Calling process PID
+ * @return New request, or NULL on allocation failure
+ */
+static py_pool_request_t *py_pool_request_new(py_pool_request_type_t type,
+                                               ErlNifPid caller_pid);
+
+/**
+ * @brief Free a pool request
+ *
+ * Releases all resources associated with the request.
+ *
+ * @param req Request to free
+ */
+static void py_pool_request_free(py_pool_request_t *req);
+
+/* ============================================================================
+ * Worker Functions
+ * ============================================================================ */
+
+/**
+ * @brief Worker thread main function
+ *
+ * Entry point for worker threads. Processes requests until shutdown.
+ *
+ * @param arg Pointer to py_pool_worker_t
+ * @return NULL
+ */
+static void *py_pool_worker_thread(void *arg);
+
+/**
+ * @brief Process a single request
+ *
+ * Dispatches based on request type and sends result to caller.
+ *
+ * @param worker Worker processing the request
+ * @param req Request to process
+ */
+static void py_pool_process_request(py_pool_worker_t *worker,
+                                     py_pool_request_t *req);
+
+/**
+ * @brief Send response to caller
+ *
+ * Uses enif_send() to send result back to calling process.
+ *
+ * @param req Request with caller info
+ * @param result Result term to send
+ */
+static void py_pool_send_response(py_pool_request_t *req, ERL_NIF_TERM result);
+
+/* ============================================================================
+ * Request Processing Functions
+ * ============================================================================ */
+
+/**
+ * @brief Process CALL request
+ *
+ * @param worker Worker processing request
+ * @param req Request with module, func, args
+ * @return Result term
+ */
+static ERL_NIF_TERM py_pool_process_call(py_pool_worker_t *worker,
+                                          py_pool_request_t *req);
+
+/**
+ * @brief Process APPLY request
+ *
+ * @param worker Worker processing request
+ * @param req Request with module, func, args, kwargs
+ * @return Result term
+ */
+static ERL_NIF_TERM py_pool_process_apply(py_pool_worker_t *worker,
+                                           py_pool_request_t *req);
+
+/**
+ * @brief Process EVAL request
+ *
+ * @param worker Worker processing request
+ * @param req Request with code
+ * @return Result term
+ */
+static ERL_NIF_TERM py_pool_process_eval(py_pool_worker_t *worker,
+                                          py_pool_request_t *req);
+
+/**
+ * @brief Process EXEC request
+ *
+ * @param worker Worker processing request
+ * @param req Request with code
+ * @return Result term
+ */
+static ERL_NIF_TERM py_pool_process_exec(py_pool_worker_t *worker,
+                                          py_pool_request_t *req);
+
+/**
+ * @brief Process ASGI request
+ *
+ * @param worker Worker processing request
+ * @param req Request with runner, callable, scope, body
+ * @return Result term
+ */
+static ERL_NIF_TERM py_pool_process_asgi(py_pool_worker_t *worker,
+                                          py_pool_request_t *req);
+
+/**
+ * @brief Process WSGI request
+ *
+ * @param worker Worker processing request
+ * @param req Request with module, callable, environ
+ * @return Result term
+ */
+static ERL_NIF_TERM py_pool_process_wsgi(py_pool_worker_t *worker,
+                                          py_pool_request_t *req);
+
+/* ============================================================================
+ * Module Caching
+ * ============================================================================ */
+
+/**
+ * @brief Get or import a Python module
+ *
+ * Checks the worker's module cache first, imports if not cached.
+ *
+ * @param worker Worker with module cache
+ * @param module_name Module name to get
+ * @return Borrowed reference to module, or NULL on error
+ */
+static PyObject *py_pool_get_module(py_pool_worker_t *worker,
+                                     const char *module_name);
+
+/**
+ * @brief Clear module cache for a worker
+ *
+ * @param worker Worker to clear cache for
+ */
+static void py_pool_clear_module_cache(py_pool_worker_t *worker);
+
+/* ============================================================================
+ * Statistics
+ * ============================================================================ */
+
+/**
+ * @brief Pool statistics structure
+ */
+typedef struct {
+    int num_workers;
+    bool initialized;
+    bool use_subinterpreters;
+    bool free_threaded;
+    uint64_t pending_count;
+    uint64_t total_enqueued;
+    struct {
+        uint64_t requests_processed;
+        uint64_t total_processing_ns;
+    } worker_stats[POOL_MAX_WORKERS];
+} py_pool_stats_t;
+
+/**
+ * @brief Get pool statistics
+ *
+ * @param stats Output structure for statistics
+ */
+static void py_pool_get_stats(py_pool_stats_t *stats);
+
+/* ============================================================================
+ * NIF Functions
+ * ============================================================================ */
+
+/**
+ * @brief NIF: Start the worker pool
+ *
+ * py_nif:pool_start(NumWorkers) -> ok | {error, Reason}
+ */
+static ERL_NIF_TERM nif_pool_start(ErlNifEnv *env, int argc,
+                                    const ERL_NIF_TERM argv[]);
+
+/**
+ * @brief NIF: Stop the worker pool
+ *
+ * py_nif:pool_stop() -> ok
+ */
+static ERL_NIF_TERM nif_pool_stop(ErlNifEnv *env, int argc,
+                                   const ERL_NIF_TERM argv[]);
+
+/**
+ * @brief NIF: Submit a request to the pool
+ *
+ * py_nif:pool_submit(Type, Arg1, Arg2, Arg3, Arg4) -> {ok, RequestId} | {error, Reason}
+ */
+static ERL_NIF_TERM nif_pool_submit(ErlNifEnv *env, int argc,
+                                     const ERL_NIF_TERM argv[]);
+
+/**
+ * @brief NIF: Get pool statistics
+ *
+ * py_nif:pool_stats() -> StatsMap
+ */
+static ERL_NIF_TERM nif_pool_stats(ErlNifEnv *env, int argc,
+                                    const ERL_NIF_TERM argv[]);
+
+#endif /* PY_WORKER_POOL_H */
diff --git a/docs/ai-integration.md b/docs/ai-integration.md
index 9c7158a..71af921 100644
--- a/docs/ai-integration.md
+++ b/docs/ai-integration.md
@@ -505,7 +505,7 @@ For non-blocking LLM calls:
 ```erlang
 %% Start async LLM call
 ask_async(Question) ->
-    py:call_async('__main__', generate, [Question, <<"">>]).
+    py:cast('__main__', generate, [Question, <<"">>]).
 
 %% Gather multiple responses
 ask_many(Questions) ->
diff --git a/docs/asyncio.md b/docs/asyncio.md
index 59d2647..cf21ff9 100644
--- a/docs/asyncio.md
+++ b/docs/asyncio.md
@@ -6,6 +6,15 @@ This guide covers the Erlang-native asyncio event loop implementation that provi
 
 The `ErlangEventLoop` is a custom asyncio event loop backed by Erlang's scheduler using `enif_select` for I/O multiplexing. This replaces Python's polling-based event loop with true event-driven callbacks integrated into the BEAM VM.
 
+All asyncio functionality is available through the unified `erlang` module:
+
+```python
+import erlang
+
+# Preferred way to run async code
+erlang.run(main())
+```
+
 ### Key Benefits
 
 - **Sub-millisecond latency** - Events are delivered immediately via Erlang messages instead of polling every 10ms
@@ -63,38 +72,82 @@ The `ErlangEventLoop` is a custom asyncio event loop backed by Erlang's schedule
 | `py_event_router` | Routes timer/FD events to the correct event loop instance |
 | `erlang_asyncio` | High-level asyncio-compatible API with direct Erlang integration |
 
-## Usage
+## Usage Patterns
 
-```python
-from erlang_loop import ErlangEventLoop
-import asyncio
+### Pattern 1: `erlang.run()` (Recommended)
 
-# Create and set the event loop
-loop = ErlangEventLoop()
-asyncio.set_event_loop(loop)
+The preferred way to run async code, matching uvloop's API:
+
+```python
+import erlang
 
 async def main():
     await asyncio.sleep(1.0)  # Uses erlang:send_after internally
     print("Done!")
 
-asyncio.run(main())
+# Simple and clean
+erlang.run(main())
 ```
 
-Or use the provided event loop policy:
+### Pattern 2: With `asyncio.Runner` (Python 3.11+)
 
 ```python
-from erlang_loop import get_event_loop_policy
 import asyncio
+import erlang
+
+with asyncio.Runner(loop_factory=erlang.new_event_loop) as runner:
+    runner.run(main())
+```
 
-asyncio.set_event_loop_policy(get_event_loop_policy())
+### Pattern 3: `erlang.install()` (Deprecated in Python 3.12+)
 
-async def main():
-    # Uses ErlangEventLoop automatically
-    await asyncio.sleep(0.5)
+This pattern installs the ErlangEventLoopPolicy globally. It's deprecated in Python 3.12+ because `asyncio.run()` no longer respects global policies:
+
+```python
+import asyncio
+import erlang
 
+erlang.install()  # Deprecated in 3.12+, use erlang.run() instead
 asyncio.run(main())
 ```
 
+### Pattern 4: Manual Loop Management
+
+For cases where you need direct control:
+
+```python
+import asyncio
+import erlang
+
+loop = erlang.new_event_loop()
+asyncio.set_event_loop(loop)
+try:
+    loop.run_until_complete(main())
+finally:
+    loop.close()
+```
+
+## Execution Mode Detection
+
+The `erlang` module can detect the Python execution mode:
+
+```python
+from erlang import detect_mode, ExecutionMode
+
+mode = detect_mode()
+if mode == ExecutionMode.FREE_THREADED:
+    print("Running in free-threaded mode (no GIL)")
+elif mode == ExecutionMode.SUBINTERP:
+    print("Running in subinterpreter with per-interpreter GIL")
+else:
+    print("Running with shared GIL")
+```
+
+**ExecutionMode values:**
+- `FREE_THREADED` - Python 3.13+ with `Py_GIL_DISABLED` (no GIL)
+- `SUBINTERP` - Python 3.12+ running in a subinterpreter
+- `SHARED_GIL` - Traditional Python with shared GIL
+
 ## TCP Support
 
 ### Client Connections
@@ -306,7 +359,7 @@ transport.sendto(b'Hello')  # Goes to connected address
 
 ```python
 import asyncio
-from erlang_loop import ErlangEventLoop
+from erlang import ErlangEventLoop
 
 class EchoServerProtocol(asyncio.DatagramProtocol):
     def connection_made(self, transport):
@@ -499,44 +552,19 @@ ok = py_nif:event_loop_set_router(LoopRef, RouterPid).
 
 Events are delivered as Erlang messages, enabling the event loop to participate in BEAM's supervision trees and distributed computing capabilities.
 
-## Isolated Event Loops
-
-By default, all `ErlangEventLoop` instances share a single underlying native event loop managed by Erlang. For multi-threaded applications where each thread needs its own event loop, you can create isolated loops.
-
-### Creating an Isolated Loop
-
-Use the `isolated=True` parameter to create a loop with its own pending queue:
+## Event Loop Architecture
 
-```python
-from erlang_loop import ErlangEventLoop
-
-# Default: uses shared global loop
-shared_loop = ErlangEventLoop()
-
-# Isolated: creates its own native loop
-isolated_loop = ErlangEventLoop(isolated=True)
-```
-
-### When to Use Isolated Loops
-
-| Use Case | Loop Type |
-|----------|-----------|
-| Single-threaded asyncio applications | Default (shared) |
-| Web frameworks (ASGI/WSGI) | Default (shared) |
-| Multi-threaded Python with separate event loops | `isolated=True` |
-| Sub-interpreters | `isolated=True` |
-| Free-threaded Python (3.13+) | `isolated=True` |
-| Testing loop isolation | `isolated=True` |
+Each `ErlangEventLoop` instance has its own isolated capsule with a dedicated pending queue. This ensures that timers and FD events are properly routed to the correct loop instance.
 
 ### Multi-threaded Example
 
 ```python
-from erlang_loop import ErlangEventLoop
+from erlang import ErlangEventLoop
 import threading
 
-def run_isolated_tasks(loop_id):
-    """Each thread gets its own isolated event loop."""
-    loop = ErlangEventLoop(isolated=True)
+def run_tasks(loop_id):
+    """Each thread gets its own event loop."""
+    loop = ErlangEventLoop()
 
     results = []
 
@@ -558,8 +586,8 @@ def run_isolated_tasks(loop_id):
     return results
 
 # Run in separate threads
-t1 = threading.Thread(target=run_isolated_tasks, args=('loop_a',))
-t2 = threading.Thread(target=run_isolated_tasks, args=('loop_b',))
+t1 = threading.Thread(target=run_tasks, args=('loop_a',))
+t2 = threading.Thread(target=run_tasks, args=('loop_b',))
 
 t1.start()
 t2.start()
@@ -568,7 +596,7 @@ t2.join()
 # Each thread only sees its own callbacks
 ```
 
-### Architecture
+### Internal Architecture
 
 A shared router process handles timer and FD events for all loops:
 
@@ -590,15 +618,17 @@ A shared router process handles timer and FD events for all loops:
     └─────────┘          └─────────┘          └─────────┘
 ```
 
-Each isolated loop has its own pending queue, ensuring callbacks are processed only by the loop that scheduled them. The shared router dispatches timer and FD events to the correct loop based on the resource backref.
+Each loop has its own pending queue, ensuring callbacks are processed only by the loop that scheduled them. The shared router dispatches timer and FD events to the correct loop based on the capsule backref.
+
+## Erlang Asyncio Primitives
 
-## erlang_asyncio Module
+> **Note:** The `erlang_asyncio` module has been unified into the main `erlang` module. Use `import erlang` and `erlang.run()` instead.
 
-The `erlang_asyncio` module provides asyncio-compatible primitives that use Erlang's native scheduler for maximum performance. This is the recommended way to use async/await patterns when you need explicit Erlang timer integration.
+The `erlang` module provides asyncio-compatible primitives that use Erlang's native scheduler for maximum performance. This is the recommended way to use async/await patterns when you need explicit Erlang timer integration.
 
 ### Overview
 
-Unlike the standard `asyncio` module which uses Python's polling-based event loop, `erlang_asyncio` uses Erlang's `erlang:send_after/3` for timers and integrates directly with the BEAM scheduler. This eliminates Python event loop overhead (~0.5-1ms per operation) and provides more precise timing.
+Unlike the standard `asyncio` module which uses Python's polling-based event loop, the `erlang` module uses Erlang's `erlang:send_after/3` for timers and integrates directly with the BEAM scheduler. This eliminates Python event loop overhead (~0.5-1ms per operation) and provides more precise timing.
 
 ### Architecture
 
@@ -644,231 +674,308 @@ Unlike the standard `asyncio` module which uses Python's polling-based event loo
 ### Basic Usage
 
 ```python
-import erlang_asyncio
+import erlang
+import asyncio
 
 async def my_handler():
     # Sleep using Erlang's timer system
-    await erlang_asyncio.sleep(0.1)  # 100ms
+    await asyncio.sleep(0.1)  # 100ms - uses erlang:send_after internally
     return "done"
 
-# Run a coroutine
-result = erlang_asyncio.run(my_handler())
+# Run a coroutine with Erlang event loop
+result = erlang.run(my_handler())
 ```
 
 ### API Reference
 
-#### sleep(delay, result=None)
+When using `erlang.run()` or the Erlang event loop, all standard asyncio functions work seamlessly with Erlang's backend.
 
-Sleep for the specified delay using Erlang's native timer system.
+#### asyncio.sleep(delay)
+
+Sleep for the specified delay. Uses Erlang's `erlang:send_after/3` internally.
 
 ```python
-import erlang_asyncio
+import erlang
+import asyncio
 
 async def example():
-    # Simple sleep
-    await erlang_asyncio.sleep(0.05)  # 50ms
+    # Simple sleep - uses Erlang timer system
+    await asyncio.sleep(0.05)  # 50ms
 
-    # Sleep and return a value
-    value = await erlang_asyncio.sleep(0.01, result='ready')
-    assert value == 'ready'
+erlang.run(example())
 ```
 
-**Parameters:**
-- `delay` (float): Time to sleep in seconds
-- `result` (optional): Value to return after sleeping (default: None)
-
-**Returns:** The `result` argument
-
-#### run(coro)
+#### erlang.run(coro)
 
 Run a coroutine to completion using an ErlangEventLoop.
 
 ```python
-import erlang_asyncio
+import erlang
+import asyncio
 
 async def main():
-    await erlang_asyncio.sleep(0.01)
+    await asyncio.sleep(0.01)
     return 42
 
-result = erlang_asyncio.run(main())
+result = erlang.run(main())
 assert result == 42
 ```
 
-#### gather(*coros, return_exceptions=False)
+#### asyncio.gather(*coros, return_exceptions=False)
 
 Run coroutines concurrently and gather results.
 
 ```python
-import erlang_asyncio
+import erlang
+import asyncio
 
 async def task(n):
-    await erlang_asyncio.sleep(0.01)
+    await asyncio.sleep(0.01)
     return n * 2
 
 async def main():
-    results = await erlang_asyncio.gather(task(1), task(2), task(3))
+    results = await asyncio.gather(task(1), task(2), task(3))
     assert results == [2, 4, 6]
 
-erlang_asyncio.run(main())
+erlang.run(main())
 ```
 
-#### wait_for(coro, timeout)
+#### asyncio.wait_for(coro, timeout)
 
 Wait for a coroutine with a timeout.
 
 ```python
-import erlang_asyncio
+import erlang
+import asyncio
 
 async def fast_task():
-    await erlang_asyncio.sleep(0.01)
+    await asyncio.sleep(0.01)
     return 'done'
 
 async def main():
     try:
-        result = await erlang_asyncio.wait_for(fast_task(), timeout=1.0)
-    except erlang_asyncio.TimeoutError:
+        result = await asyncio.wait_for(fast_task(), timeout=1.0)
+    except asyncio.TimeoutError:
         print("Task timed out")
 
-erlang_asyncio.run(main())
+erlang.run(main())
 ```
 
-#### create_task(coro, *, name=None)
+#### asyncio.create_task(coro, *, name=None)
 
 Create a task to run a coroutine in the background.
 
 ```python
-import erlang_asyncio
+import erlang
+import asyncio
 
 async def background_work():
-    await erlang_asyncio.sleep(0.1)
+    await asyncio.sleep(0.1)
     return 'background_done'
 
 async def main():
-    task = erlang_asyncio.create_task(background_work())
+    task = asyncio.create_task(background_work())
 
     # Do other work while task runs
-    await erlang_asyncio.sleep(0.05)
+    await asyncio.sleep(0.05)
 
     # Wait for task to complete
     result = await task
     assert result == 'background_done'
 
-erlang_asyncio.run(main())
+erlang.run(main())
 ```
 
-#### wait(fs, *, timeout=None, return_when=ALL_COMPLETED)
+#### asyncio.wait(fs, *, timeout=None, return_when=ALL_COMPLETED)
 
 Wait for multiple futures/tasks.
 
 ```python
-import erlang_asyncio
+import erlang
+import asyncio
 
 async def main():
     tasks = [
-        erlang_asyncio.create_task(erlang_asyncio.sleep(0.01, result=i))
+        asyncio.create_task(asyncio.sleep(0.01))
         for i in range(3)
     ]
 
-    done, pending = await erlang_asyncio.wait(
+    done, pending = await asyncio.wait(
         tasks,
-        return_when=erlang_asyncio.ALL_COMPLETED
+        return_when=asyncio.ALL_COMPLETED
     )
 
     assert len(done) == 3
     assert len(pending) == 0
 
-erlang_asyncio.run(main())
+erlang.run(main())
 ```
 
 #### Event Loop Functions
 
 ```python
-import erlang_asyncio
-
-# Get the current event loop (creates ErlangEventLoop if needed)
-loop = erlang_asyncio.get_event_loop()
+import erlang
+import asyncio
 
-# Create a new event loop
-loop = erlang_asyncio.new_event_loop()
+# Create a new Erlang-backed event loop
+loop = erlang.new_event_loop()
 
 # Set the current event loop
-erlang_asyncio.set_event_loop(loop)
+asyncio.set_event_loop(loop)
 
 # Get the running loop (raises RuntimeError if none)
-loop = erlang_asyncio.get_running_loop()
+loop = asyncio.get_running_loop()
 ```
 
-#### Additional Functions
-
-- `ensure_future(coro_or_future, *, loop=None)` - Wrap a coroutine in a Future
-- `shield(arg)` - Protect a coroutine from cancellation
-
-#### Context Manager
+#### Context Manager for Timeouts
 
 ```python
-import erlang_asyncio
+import erlang
+import asyncio
 
 async def main():
-    async with erlang_asyncio.timeout(1.0):
+    async with asyncio.timeout(1.0):
         await slow_operation()  # Raises TimeoutError if > 1s
-```
-
-#### Exceptions and Constants
-
-```python
-import erlang_asyncio
-
-# Exceptions
-erlang_asyncio.TimeoutError
-erlang_asyncio.CancelledError
 
-# Constants for wait()
-erlang_asyncio.ALL_COMPLETED
-erlang_asyncio.FIRST_COMPLETED
-erlang_asyncio.FIRST_EXCEPTION
+erlang.run(main())
 ```
 
 ### Performance Comparison
 
-| Operation | asyncio | erlang_asyncio | Improvement |
-|-----------|---------|----------------|-------------|
+| Operation | Standard asyncio | Erlang Event Loop | Improvement |
+|-----------|------------------|-------------------|-------------|
 | sleep(1ms) | ~1.5ms | ~1.1ms | ~27% faster |
-| Event loop overhead | ~0.5-1ms | ~0 | No Python loop |
+| Event loop overhead | ~0.5-1ms | ~0 | Erlang scheduler |
 | Timer precision | 10ms polling | Sub-ms | BEAM scheduler |
 | Idle CPU | Polling | Zero | Event-driven |
 
-### When to Use erlang_asyncio
+### When to Use Erlang Event Loop
 
-**Use `erlang_asyncio` when:**
+**Use `erlang.run()` when:**
 - You need precise sub-millisecond timing
 - Your app makes many small sleep calls
 - You want to eliminate Python event loop overhead
 - Building ASGI handlers that need efficient sleep
+- Your app is running inside erlang_python
 
-**Use standard `asyncio` when:**
-- You need full asyncio compatibility (aiohttp, asyncpg, etc.)
-- You're using third-party async libraries
-- You need complex I/O multiplexing
+**Use standard `asyncio.run()` when:**
+- You're running outside the Erlang VM
+- Testing Python code in isolation
 
 ### Integration with ASGI Frameworks
 
-For ASGI applications (FastAPI, Starlette, etc.), you can use `erlang_asyncio.sleep` as a drop-in replacement:
+For ASGI applications (FastAPI, Starlette, etc.), you can use the Erlang event loop for better performance:
 
 ```python
 from fastapi import FastAPI
-import erlang_asyncio
+import asyncio
 
 app = FastAPI()
 
 @app.get("/delay")
 async def delay_endpoint(ms: int = 100):
-    # Uses Erlang timer instead of asyncio event loop
-    await erlang_asyncio.sleep(ms / 1000.0)
+    # When running via py_asgi, uses Erlang timer
+    await asyncio.sleep(ms / 1000.0)
     return {"slept_ms": ms}
 ```
 
+## Async Worker Backend (Internal)
+
+The `py:async_call/3,4` and `py:await/1,2` APIs use an event-driven backend based on `py_event_loop`.
+
+### Architecture
+
+```
+┌─────────────┐     ┌─────────────────┐     ┌──────────────────────┐
+│ Erlang      │     │ C NIF           │     │ py_event_loop        │
+│ py:async_   │     │ (no thread)     │     │ (Erlang process)     │
+│ call()      │     │                 │     │                      │
+└──────┬──────┘     └────────┬────────┘     └──────────┬───────────┘
+       │                     │                         │
+       │ 1. Message to       │                         │
+       │    event_loop       │                         │
+       │─────────────────────┼────────────────────────>│
+       │                     │                         │
+       │ 2. Return Ref       │                         │
+       │<────────────────────┼─────────────────────────│
+       │                     │                         │
+       │                     │   enif_select (wait)    │
+       │                     │   ┌───────────────────┐ │
+       │                     │   │ Run Python        │ │
+       │                     │   │ erlang.send(pid,  │ │
+       │                     │   │   result)         │ │
+       │                     │   └───────────────────┘ │
+       │                     │                         │
+       │ 3. {async_result}   │                         │
+       │<──────────────────────────────────────────────│
+       │     (direct erlang.send from Python)          │
+       │                     │                         │
+```
+
+### Key Components
+
+| Component | Role |
+|-----------|------|
+| `py_event_loop_pool` | Pool manager for event loop-based async execution |
+| `py_event_loop:run_async/2` | Submit coroutine to event loop |
+| `_run_and_send` | Python wrapper that sends result via `erlang.send()` |
+| `nif_event_loop_run_async` | NIF for direct coroutine submission |
+
+### Performance Benefits
+
+| Aspect | Previous (pthread) | Current (event_loop) |
+|--------|-------------------|---------------------|
+| Latency | ~10-20ms polling | <1ms (enif_select) |
+| CPU idle | 100 wakeups/sec | Zero |
+| Threads | N pthreads | 0 extra threads |
+| GIL | Acquire/release in thread | Already held in callback |
+| Shutdown | pthread_join (blocking) | Clean Erlang messages |
+
+The event-driven model eliminates the polling overhead of the previous pthread+usleep
+implementation, resulting in significantly lower latency for async operations.
+
+## Limitations
+
+### Subprocess Operations Not Supported
+
+The `ErlangEventLoop` does not support subprocess operations:
+
+```python
+# These will raise NotImplementedError:
+loop.subprocess_shell(...)
+loop.subprocess_exec(...)
+
+# asyncio.create_subprocess_* will also fail
+await asyncio.create_subprocess_shell(...)
+await asyncio.create_subprocess_exec(...)
+```
+
+**Why?** Subprocess operations use `fork()` which would corrupt the Erlang VM. See [Security](security.md) for details.
+
+**Alternative:** Use Erlang ports (`open_port/2`) for subprocess management. You can register an Erlang function that runs shell commands and call it from Python via `erlang.call()`.
+
+### Signal Handling Not Supported
+
+The `ErlangEventLoop` does not support signal handlers:
+
+```python
+# These will raise NotImplementedError:
+loop.add_signal_handler(signal.SIGTERM, handler)
+loop.remove_signal_handler(signal.SIGTERM)
+```
+
+**Why?** Signal handling should be done at the Erlang VM level. The BEAM has its own signal handling infrastructure that's integrated with supervisors and the OTP design patterns.
+
+**Alternative:** Handle signals in Erlang using the `kernel` application's signal handling or write a port program that forwards signals to Erlang processes.
+
+## Protocol-Based I/O
+
+For building custom servers with low-level protocol handling, see the [Reactor](reactor.md) module. The reactor provides FD-based protocol handling where Erlang manages I/O scheduling via `enif_select` and Python implements protocol logic.
+
 ## See Also
 
+- [Reactor](reactor.md) - Low-level FD-based protocol handling
+- [Security](security.md) - Sandbox and blocked operations
 - [Threading](threading.md) - For `erlang.async_call()` in asyncio contexts
 - [Streaming](streaming.md) - For working with Python generators
 - [Getting Started](getting-started.md) - Basic usage guide
diff --git a/docs/getting-started.md b/docs/getting-started.md
index 88b7ddd..be9334e 100644
--- a/docs/getting-started.md
+++ b/docs/getting-started.md
@@ -95,7 +95,7 @@ For non-blocking operations:
 
 ```erlang
 %% Start async call
-Ref = py:call_async(math, factorial, [1000]).
+Ref = py:cast(math, factorial, [1000]).
 
 %% Do other work...
 
@@ -350,28 +350,51 @@ elixir --erl "-pa _build/default/lib/erlang_python/ebin" examples/elixir_example
 
 This demonstrates basic calls, data conversion, callbacks, parallel processing (10x speedup), and AI integration.
 
-## Using erlang_asyncio
+## Using the Erlang Event Loop
 
-For async Python code that uses `await asyncio.sleep()`, you can use `erlang_asyncio` for better performance. This module uses Erlang's native timer system instead of Python's event loop:
+For async Python code, use the `erlang` module which provides an Erlang-backed asyncio event loop for better performance:
 
 ```python
-import erlang_asyncio
+import erlang
+import asyncio
 
 async def my_handler():
     # Uses Erlang's erlang:send_after/3 - no Python event loop overhead
-    await erlang_asyncio.sleep(0.1)  # 100ms
+    await asyncio.sleep(0.1)  # 100ms
     return "done"
 
-# Run a coroutine
-result = erlang_asyncio.run(my_handler())
+# Run a coroutine with the Erlang event loop
+result = erlang.run(my_handler())
 
-# Also supports gather, wait_for, create_task, etc.
+# Standard asyncio functions work seamlessly
 async def main():
-    results = await erlang_asyncio.gather(task1(), task2(), task3())
+    results = await asyncio.gather(task1(), task2(), task3())
+
+erlang.run(main())
 ```
 
 This is especially useful in ASGI handlers where sleep operations are common. See [Asyncio](asyncio.md) for the full API reference.
 
+## Security Considerations
+
+When Python runs inside the Erlang VM, certain operations are blocked for safety:
+
+- **Subprocess operations blocked** - `subprocess.Popen`, `os.fork()`, `os.system()`, etc. would corrupt the Erlang VM
+- **Signal handling not supported** - Signal handling should be done at the Erlang level
+
+If you need to run external commands, use Erlang ports (`open_port/2`) instead:
+
+```erlang
+%% From Erlang - run a shell command
+Port = open_port({spawn, "ls -la"}, [exit_status, binary]),
+receive
+    {Port, {data, Data}} -> Data;
+    {Port, {exit_status, 0}} -> ok
+end.
+```
+
+See [Security](security.md) for details on blocked operations and recommended alternatives.
+
 ## Next Steps
 
 - See [Type Conversion](type-conversion.md) for detailed type mapping
@@ -382,4 +405,6 @@ This is especially useful in ASGI handlers where sleep operations are common. Se
 - See [Logging and Tracing](logging.md) for Python logging and distributed tracing
 - See [AI Integration](ai-integration.md) for ML/AI examples
 - See [Asyncio Event Loop](asyncio.md) for the Erlang-native asyncio implementation with TCP and UDP support
+- See [Reactor](reactor.md) for FD-based protocol handling
+- See [Security](security.md) for sandbox and blocked operations
 - See [Web Frameworks](web-frameworks.md) for ASGI/WSGI integration
diff --git a/docs/migration.md b/docs/migration.md
new file mode 100644
index 0000000..14d6298
--- /dev/null
+++ b/docs/migration.md
@@ -0,0 +1,262 @@
+# Migration Guide: v1.8.x to v2.0
+
+This guide covers breaking changes and migration steps when upgrading from erlang_python v1.8.x to v2.0.
+
+## Quick Checklist
+
+- [ ] Rename `py:call_async` → `py:cast`
+- [ ] Replace `py:bind`/`py:unbind` with `py_context_router`
+- [ ] Replace `py:ctx_*` functions with `py_context:*`
+- [ ] Replace `erlang_asyncio` imports with `erlang`
+- [ ] Replace `erlang_asyncio.run()` with `erlang.run()`
+- [ ] Replace subprocess calls with Erlang ports
+- [ ] Move signal handlers to Erlang level
+- [ ] Review any `os.fork`/`os.exec` usage
+
+## API Changes
+
+### `py:call_async` renamed to `py:cast`
+
+The function for non-blocking Python calls has been renamed to follow gen_server conventions:
+
+**Before (v1.8.x):**
+```erlang
+Ref = py:call_async(math, factorial, [100]),
+{ok, Result} = py:await(Ref).
+```
+
+**After (v2.0):**
+```erlang
+Ref = py:cast(math, factorial, [100]),
+{ok, Result} = py:await(Ref).
+```
+
+The semantics are identical - only the name changed.
+
+### `erlang_asyncio` module removed
+
+The separate `erlang_asyncio` Python module has been consolidated into the main `erlang` module.
+
+**Before (v1.8.x):**
+```python
+import erlang_asyncio
+
+async def handler():
+    await erlang_asyncio.sleep(0.1)
+    return "done"
+
+result = erlang_asyncio.run(handler())
+```
+
+**After (v2.0):**
+```python
+import erlang
+
+async def handler():
+    await erlang.sleep(0.1)
+    return "done"
+
+result = erlang.run(handler())
+```
+
+**Function mapping:**
+
+| v1.8.x | v2.0 |
+|--------|------|
+| `erlang_asyncio.run(coro)` | `erlang.run(coro)` |
+| `erlang_asyncio.sleep(delay)` | `erlang.sleep(delay)` |
+| `erlang_asyncio.gather(*coros)` | `erlang.gather(*coros)` |
+| `erlang_asyncio.wait_for(coro, timeout)` | `erlang.wait_for(coro, timeout)` |
+| `erlang_asyncio.create_task(coro)` | `erlang.create_task(coro)` |
+
+## Removed Features
+
+### Context Affinity Functions (`bind`/`unbind`)
+
+The process-binding functions have been removed. The new architecture uses `py_context_router` for automatic scheduler-affinity routing.
+
+**Before (v1.8.x):**
+```erlang
+ok = py:bind(),
+ok = py:exec(<<"x = 42">>),
+{ok, 42} = py:eval(<<"x">>),
+ok = py:unbind().
+
+%% Or with explicit contexts
+{ok, Ctx} = py:bind(new),
+ok = py:ctx_exec(Ctx, <<"y = 100">>),
+{ok, 100} = py:ctx_eval(Ctx, <<"y">>),
+ok = py:unbind(Ctx).
+```
+
+**After (v2.0) - Use context router:**
+```erlang
+%% Automatic scheduler-affinity routing (recommended)
+{ok, _} = py:call(math, sqrt, [16]).
+
+%% Or explicit context binding via router
+Ctx = py_context_router:get_context(),
+py_context_router:bind_context(Ctx),
+{ok, _} = py:call(math, sqrt, [16]),  %% Uses bound context
+py_context_router:unbind_context().
+
+%% For isolated state, use py_context directly
+{ok, Contexts} = py_context_router:start(),
+Ctx = py_context_router:get_context(1),
+ok = py_context:exec(Ctx, <<"x = 42">>),
+{ok, 42} = py_context:eval(Ctx, <<"x">>, #{}).
+```
+
+**Removed functions:**
+- `py:bind/0`, `py:bind/1`
+- `py:unbind/0`, `py:unbind/1`
+- `py:is_bound/0`
+- `py:with_context/1`
+- `py:ctx_call/4,5,6`
+- `py:ctx_eval/2,3,4`
+- `py:ctx_exec/2`
+
+### Subprocess Support
+
+Python subprocess operations (`subprocess.Popen`, `asyncio.create_subprocess_*`, etc.) are no longer available. They are blocked by the audit hook sandbox because `fork()` would corrupt the Erlang VM.
+
+**Before (v1.8.x):**
+```python
+import subprocess
+result = subprocess.run(["ls", "-la"], capture_output=True)
+```
+
+**After (v2.0) - Use Erlang ports:**
+```erlang
+%% Register a shell command helper
+py:register_function(run_command, fun([Cmd, Args]) ->
+    Port = open_port({spawn_executable, Cmd},
+                     [{args, Args}, binary, exit_status, stderr_to_stdout]),
+    collect_output(Port, [])
+end).
+
+collect_output(Port, Acc) ->
+    receive
+        {Port, {data, Data}} -> collect_output(Port, [Data | Acc]);
+        {Port, {exit_status, Status}} ->
+            {Status, iolist_to_binary(lists:reverse(Acc))}
+    end.
+```
+
+```python
+from erlang import run_command
+status, output = run_command("/bin/ls", ["-la"])
+```
+
+See [Security](security.md) for details on blocked operations.
+
+### Signal Handling
+
+Signal handlers can no longer be registered from Python. The ErlangEventLoop raises `NotImplementedError` for `add_signal_handler` and `remove_signal_handler`.
+
+**Before (v1.8.x):**
+```python
+import signal
+import asyncio
+
+loop = asyncio.get_event_loop()
+loop.add_signal_handler(signal.SIGTERM, shutdown_handler)
+```
+
+**After (v2.0) - Handle at Erlang level:**
+```erlang
+%% In your application supervisor or main module
+os:set_signal(sigterm, handle),
+
+%% Then in a process that handles system messages
+receive
+    {signal, sigterm} ->
+        %% Graceful shutdown
+        application:stop(my_app)
+end.
+```
+
+## New Features to Consider
+
+### Scheduler-Affinity Context Router
+
+The new `py_context_router` automatically routes Python calls based on scheduler ID, providing better cache locality:
+
+```erlang
+%% Automatically uses scheduler-based routing
+{ok, Result} = py:call(math, sqrt, [16]).
+
+%% Or explicitly bind a context to a process
+Ctx = py_context_router:get_context(),
+py_context_router:bind_context(Ctx),
+%% All calls from this process now go to Ctx
+```
+
+### `erlang.reactor` for Protocol Handling
+
+For building custom servers, the new reactor module provides FD-based protocol handling:
+
+```python
+from erlang.reactor import Protocol, serve
+
+class EchoProtocol(Protocol):
+    def data_received(self, data):
+        self.write(data)
+        return "continue"
+
+serve(sock, EchoProtocol)
+```
+
+See [Reactor](reactor.md) for full documentation.
+
+### `erlang.send()` for Fire-and-Forget Messages
+
+Send messages directly to Erlang processes without waiting:
+
+```python
+import erlang
+
+# Send to a registered process
+erlang.send(("my_server", "node@host"), {"event": "user_login", "user": 123})
+
+# Send to a PID
+erlang.send(pid, "hello")
+```
+
+## Performance Improvements
+
+The v2.0 release includes significant performance improvements:
+
+| Operation | v1.8.1 | v2.0 | Improvement |
+|-----------|--------|------|-------------|
+| Sync py:call | 0.011 ms | 0.004 ms | 2.9x faster |
+| Sync py:eval | 0.014 ms | 0.007 ms | 2.0x faster |
+| Cast (async) | 0.011 ms | 0.004 ms | 2.8x faster |
+| Throughput | ~90K/s | ~250K/s | 2.8x higher |
+
+These improvements come from:
+- Event-driven async model (no pthread polling)
+- Scheduler-affinity routing
+- Per-interpreter isolation
+- Optimized NIF paths
+
+## Troubleshooting
+
+### "RuntimeError: fork() blocked by sandbox"
+
+You're trying to use subprocess or os.fork(). Use Erlang ports instead. See [Security](security.md).
+
+### "NotImplementedError: Signal handlers not supported"
+
+Signal handling must be done at the Erlang level. See the Signal Handling section above.
+
+### "AttributeError: module 'erlang_asyncio' has no attribute..."
+
+The `erlang_asyncio` module has been removed. Update imports to use `erlang` directly.
+
+### Module not found: `_erlang_impl._loop`
+
+If you see this error with `py:async_call`, ensure the application is fully started:
+```erlang
+{ok, _} = application:ensure_all_started(erlang_python).
+```
diff --git a/docs/reactor.md b/docs/reactor.md
new file mode 100644
index 0000000..562c8d3
--- /dev/null
+++ b/docs/reactor.md
@@ -0,0 +1,379 @@
+# Reactor Module
+
+The `erlang.reactor` module provides low-level FD-based protocol handling for building custom servers. It enables Python to implement protocol logic while Erlang handles I/O scheduling via `enif_select`.
+
+## Overview
+
+The reactor pattern separates I/O multiplexing (handled by Erlang) from protocol logic (handled by Python). This provides:
+
+- **Efficient I/O** - Erlang's `enif_select` for event notification
+- **Protocol flexibility** - Python implements the protocol state machine
+- **Zero-copy potential** - Direct fd access for high-throughput scenarios
+- **Works with any fd** - TCP, UDP, Unix sockets, pipes, etc.
+
+### Architecture
+
+```
+┌──────────────────────────────────────────────────────────────────────┐
+│                       Reactor Architecture                            │
+├──────────────────────────────────────────────────────────────────────┤
+│                                                                       │
+│  Erlang (BEAM)                        Python                          │
+│  ─────────────                        ──────                          │
+│                                                                       │
+│  ┌─────────────────────┐              ┌─────────────────────────────┐ │
+│  │  py_reactor_context │              │      erlang.reactor         │ │
+│  │                     │              │                             │ │
+│  │  accept() ──────────┼──fd_handoff─▶│  init_connection(fd, info)  │ │
+│  │                     │              │       │                     │ │
+│  │  enif_select(READ)  │              │       ▼                     │ │
+│  │       │             │              │  Protocol.connection_made() │ │
+│  │       ▼             │              │                             │ │
+│  │  {select, fd, READ} │              │                             │ │
+│  │       │             │              │                             │ │
+│  │       └─────────────┼─on_read_ready│  Protocol.data_received()   │ │
+│  │                     │              │       │                     │ │
+│  │  action = "write_   │◀─────────────┼───────┘                     │ │
+│  │           pending"  │              │                             │ │
+│  │       │             │              │                             │ │
+│  │  enif_select(WRITE) │              │                             │ │
+│  │       │             │              │                             │ │
+│  │       ▼             │              │                             │ │
+│  │  {select, fd, WRITE}│              │                             │ │
+│  │       │             │              │                             │ │
+│  │       └─────────────┼on_write_ready│  Protocol.write_ready()     │ │
+│  │                     │              │                             │ │
+│  └─────────────────────┘              └─────────────────────────────┘ │
+│                                                                       │
+└──────────────────────────────────────────────────────────────────────┘
+```
+
+## Protocol Base Class
+
+The `Protocol` class is the base for implementing custom protocols:
+
+```python
+import erlang.reactor as reactor
+
+class Protocol(reactor.Protocol):
+    """Base protocol attributes and methods."""
+
+    # Set by reactor on connection
+    fd: int           # File descriptor
+    client_info: dict # Connection metadata from Erlang
+    write_buffer: bytearray  # Buffer for outgoing data
+    closed: bool      # Whether connection is closed
+```
+
+### Lifecycle Methods
+
+#### `connection_made(fd, client_info)`
+
+Called when a file descriptor is handed off from Erlang.
+
+```python
+def connection_made(self, fd: int, client_info: dict):
+    """Called when fd is handed off from Erlang.
+
+    Args:
+        fd: File descriptor for the connection
+        client_info: Dict with connection metadata
+            - 'addr': Client IP address
+            - 'port': Client port
+            - 'type': Connection type (tcp, udp, unix, etc.)
+    """
+    # Initialize per-connection state
+    self.request_buffer = bytearray()
+```
+
+#### `data_received(data) -> action`
+
+Called when data has been read from the fd.
+
+```python
+def data_received(self, data: bytes) -> str:
+    """Handle received data.
+
+    Args:
+        data: The bytes that were read
+
+    Returns:
+        Action string indicating what to do next
+    """
+    self.request_buffer.extend(data)
+
+    if self.request_complete():
+        self.prepare_response()
+        return "write_pending"
+
+    return "continue"  # Need more data
+```
+
+#### `write_ready() -> action`
+
+Called when the fd is ready for writing.
+
+```python
+def write_ready(self) -> str:
+    """Handle write readiness.
+
+    Returns:
+        Action string indicating what to do next
+    """
+    if not self.write_buffer:
+        return "read_pending"
+
+    written = self.write(bytes(self.write_buffer))
+    del self.write_buffer[:written]
+
+    if self.write_buffer:
+        return "continue"  # More to write
+    return "read_pending"  # Done writing
+```
+
+#### `connection_lost()`
+
+Called when the connection is closed.
+
+```python
+def connection_lost(self):
+    """Called when connection closes.
+
+    Override to perform cleanup.
+    """
+    # Clean up resources
+    self.cleanup()
+```
+
+### I/O Helper Methods
+
+#### `read(size) -> bytes`
+
+Read from the file descriptor:
+
+```python
+data = self.read(65536)  # Read up to 64KB
+if not data:
+    return "close"  # EOF or error
+```
+
+#### `write(data) -> int`
+
+Write to the file descriptor:
+
+```python
+written = self.write(response_bytes)
+del self.write_buffer[:written]  # Remove written bytes
+```
+
+## Action Return Values
+
+Protocol methods return action strings that tell Erlang what to do next:
+
+| Action | Description | Erlang Behavior |
+|--------|-------------|-----------------|
+| `"continue"` | Keep current mode | Re-register same event |
+| `"write_pending"` | Ready to write | Switch to write mode (`enif_select` WRITE) |
+| `"read_pending"` | Ready to read | Switch to read mode (`enif_select` READ) |
+| `"close"` | Close connection | Close fd and call `connection_lost()` |
+
+## Factory Pattern
+
+Register a protocol factory to create protocol instances for each connection:
+
+```python
+import erlang.reactor as reactor
+
+class MyProtocol(reactor.Protocol):
+    # ... implementation
+
+# Set the factory - called for each new connection
+reactor.set_protocol_factory(MyProtocol)
+
+# Get the protocol instance for an fd
+proto = reactor.get_protocol(fd)
+```
+
+## Complete Example: Echo Protocol
+
+Here's a complete echo server protocol:
+
+```python
+import erlang.reactor as reactor
+
+class EchoProtocol(reactor.Protocol):
+    """Simple echo protocol - sends back whatever it receives."""
+
+    def connection_made(self, fd, client_info):
+        super().connection_made(fd, client_info)
+        print(f"Connection from {client_info.get('addr')}:{client_info.get('port')}")
+
+    def data_received(self, data):
+        """Echo received data back to client."""
+        if not data:
+            return "close"
+
+        # Buffer the data for writing
+        self.write_buffer.extend(data)
+        return "write_pending"
+
+    def write_ready(self):
+        """Write buffered data."""
+        if not self.write_buffer:
+            return "read_pending"
+
+        written = self.write(bytes(self.write_buffer))
+        del self.write_buffer[:written]
+
+        if self.write_buffer:
+            return "continue"  # More data to write
+        return "read_pending"  # Done, wait for more input
+
+    def connection_lost(self):
+        print(f"Connection closed: fd={self.fd}")
+
+# Register the factory
+reactor.set_protocol_factory(EchoProtocol)
+```
+
+## Example: HTTP Protocol (Simplified)
+
+```python
+import erlang.reactor as reactor
+
+class SimpleHTTPProtocol(reactor.Protocol):
+    """Minimal HTTP/1.0 protocol."""
+
+    def __init__(self):
+        super().__init__()
+        self.request_buffer = bytearray()
+
+    def data_received(self, data):
+        self.request_buffer.extend(data)
+
+        # Check for end of headers
+        if b'\r\n\r\n' in self.request_buffer:
+            self.handle_request()
+            return "write_pending"
+
+        return "continue"
+
+    def handle_request(self):
+        """Parse request and prepare response."""
+        request = self.request_buffer.decode('utf-8', errors='replace')
+        first_line = request.split('\r\n')[0]
+        method, path, _ = first_line.split(' ', 2)
+
+        # Simple response
+        body = f"Hello! You requested {path}"
+        response = (
+            f"HTTP/1.0 200 OK\r\n"
+            f"Content-Length: {len(body)}\r\n"
+            f"Content-Type: text/plain\r\n"
+            f"\r\n"
+            f"{body}"
+        )
+        self.write_buffer.extend(response.encode())
+
+    def write_ready(self):
+        if not self.write_buffer:
+            return "close"  # HTTP/1.0: close after response
+
+        written = self.write(bytes(self.write_buffer))
+        del self.write_buffer[:written]
+
+        if self.write_buffer:
+            return "continue"
+        return "close"
+
+reactor.set_protocol_factory(SimpleHTTPProtocol)
+```
+
+## Integration with Erlang
+
+### From Erlang: Starting a Reactor Server
+
+```erlang
+%% In your Erlang code
+-module(my_server).
+-export([start/1]).
+
+start(Port) ->
+    %% Set up the Python protocol factory first
+    ok = py:exec(<<"
+import erlang.reactor as reactor
+from my_protocols import MyProtocol
+reactor.set_protocol_factory(MyProtocol)
+">>),
+
+    %% Start accepting connections
+    {ok, ListenSock} = gen_tcp:listen(Port, [binary, {active, false}, {reuseaddr, true}]),
+    accept_loop(ListenSock).
+
+accept_loop(ListenSock) ->
+    {ok, ClientSock} = gen_tcp:accept(ListenSock),
+    {ok, {Addr, Port}} = inet:peername(ClientSock),
+
+    %% Hand off to Python reactor
+    {ok, Fd} = inet:getfd(ClientSock),
+    py_reactor_context:handoff(Fd, #{
+        addr => inet:ntoa(Addr),
+        port => Port,
+        type => tcp
+    }),
+
+    accept_loop(ListenSock).
+```
+
+### How FDs Are Passed from Erlang to Python
+
+1. Erlang accepts a connection and gets the socket fd
+2. Erlang calls `py_reactor_context:handoff(Fd, ClientInfo)`
+3. The NIF calls Python's `reactor.init_connection(fd, client_info)`
+4. Protocol factory creates a new Protocol instance
+5. `enif_select` is registered for read events on the fd
+6. When events occur, Python callbacks handle the protocol logic
+
+## Module API Reference
+
+### `set_protocol_factory(factory)`
+
+Set the factory function for creating protocols.
+
+```python
+reactor.set_protocol_factory(MyProtocol)
+# or with a custom factory
+reactor.set_protocol_factory(lambda: MyProtocol(custom_arg))
+```
+
+### `get_protocol(fd)`
+
+Get the protocol instance for a file descriptor.
+
+```python
+proto = reactor.get_protocol(fd)
+if proto:
+    print(f"Protocol state: {proto.client_info}")
+```
+
+### `init_connection(fd, client_info)`
+
+Internal - called by NIF on fd handoff.
+
+### `on_read_ready(fd)`
+
+Internal - called by NIF when fd is readable.
+
+### `on_write_ready(fd)`
+
+Internal - called by NIF when fd is writable.
+
+### `close_connection(fd)`
+
+Internal - called by NIF to close connection.
+
+## See Also
+
+- [Asyncio](asyncio.md) - Higher-level asyncio event loop for Python
+- [Security](security.md) - Security sandbox documentation
+- [Getting Started](getting-started.md) - Basic usage guide
diff --git a/docs/security.md b/docs/security.md
new file mode 100644
index 0000000..f9e5247
--- /dev/null
+++ b/docs/security.md
@@ -0,0 +1,159 @@
+# Security
+
+This guide covers the security sandbox that protects the Erlang VM when running embedded Python code.
+
+## Overview
+
+When Python runs embedded inside the Erlang VM (BEAM), certain operations must be blocked because they would corrupt or destabilize the runtime. The `erlang_python` library automatically installs a sandbox that blocks these dangerous operations.
+
+### Why Fork/Exec Are Blocked
+
+The Erlang VM is a sophisticated runtime with:
+- Multiple scheduler threads managing lightweight processes
+- Complex memory management and garbage collection
+- Intricate internal state for message passing and I/O
+
+When `fork()` is called, the child process gets a copy of the parent's memory but only the calling thread. This leaves the child with corrupted state - scheduler threads are missing, locks are in inconsistent states, and internal data structures are broken. The child process will crash or behave unpredictably.
+
+Similarly, `exec()` replaces the current process image entirely, terminating the Erlang VM.
+
+## Audit Hook Mechanism
+
+The sandbox uses Python's audit hook system (PEP 578) to intercept dangerous operations at a low level, before they can execute:
+
+```python
+# Automatically installed when running inside Erlang
+import sys
+sys.addaudithook(sandbox_hook)  # Cannot be removed once installed
+```
+
+This provides defense-in-depth - even if Python code tries to import `os` or `subprocess` directly, the operations are blocked.
+
+## Blocked Operations
+
+| Operation | Module | Reason |
+|-----------|--------|--------|
+| `fork()` | `os` | Corrupts Erlang VM state |
+| `forkpty()` | `os` | Uses fork internally |
+| `system()` | `os` | Executes via shell (uses fork) |
+| `popen()` | `os` | Opens pipe to subprocess (uses fork) |
+| `exec*()` | `os` | Replaces process image |
+| `spawn*()` | `os` | Creates subprocess (uses fork) |
+| `posix_spawn*()` | `os` | POSIX subprocess creation |
+| `Popen` | `subprocess` | Creates subprocess (uses fork) |
+| `run()` | `subprocess` | Wrapper around Popen |
+| `call()` | `subprocess` | Wrapper around Popen |
+
+## Error Messages
+
+When blocked operations are attempted, you'll see:
+
+```python
+>>> import subprocess
+>>> subprocess.run(['ls'])
+RuntimeError: subprocess.Popen is blocked in Erlang VM context.
+fork()/exec() would corrupt the Erlang runtime.
+Use Erlang ports (open_port/2) for subprocess management.
+```
+
+```python
+>>> import os
+>>> os.fork()
+RuntimeError: os.fork is blocked in Erlang VM context.
+fork()/exec() would corrupt the Erlang runtime.
+Use Erlang ports (open_port/2) for subprocess management.
+```
+
+## Recommended Alternatives
+
+Instead of using Python's subprocess facilities, use Erlang's port mechanism which properly manages external processes.
+
+### From Erlang: Running Shell Commands
+
+```erlang
+%% Run a command and capture output
+run_command(Cmd) ->
+    Port = open_port({spawn, Cmd}, [exit_status, binary, stderr_to_stdout]),
+    collect_output(Port, []).
+
+collect_output(Port, Acc) ->
+    receive
+        {Port, {data, Data}} ->
+            collect_output(Port, [Data | Acc]);
+        {Port, {exit_status, Status}} ->
+            {Status, iolist_to_binary(lists:reverse(Acc))}
+    after 30000 ->
+        port_close(Port),
+        {error, timeout}
+    end.
+
+%% Usage
+{0, Output} = run_command("ls -la").
+```
+
+### From Python: Calling Erlang to Run Commands
+
+Register an Erlang function that runs commands:
+
+```erlang
+%% In Erlang
+py:register_function(run_shell, fun([Cmd]) ->
+    Port = open_port({spawn, binary_to_list(Cmd)},
+                     [exit_status, binary, stderr_to_stdout]),
+    collect_output(Port, [])
+end).
+```
+
+```python
+# In Python
+from erlang import run_shell
+
+# This calls through Erlang, which properly manages the subprocess
+result = run_shell("ls -la")
+```
+
+### Using Erlang Ports for Long-Running Processes
+
+```erlang
+%% Start a long-running process
+{ok, Port} = py:call('__main__', start_worker_via_erlang, []),
+
+%% The Python code registers a function:
+py:register_function(start_worker_via_erlang, fun([]) ->
+    Port = open_port({spawn, "python3 worker.py"},
+                     [binary, {line, 1024}, use_stdio]),
+    Port  % Return port reference to Python
+end).
+```
+
+### Alternative: Use `erlang.send()` for Communication
+
+For Python code that needs to trigger external processes, use message passing to coordinate with Erlang supervisors:
+
+```python
+import erlang
+
+# Send a request to an Erlang process that manages subprocesses
+erlang.send(supervisor_pid, ('spawn_worker', worker_args))
+```
+
+## Checking Sandbox Status
+
+From Python, you can check if the sandbox is active:
+
+```python
+from erlang._sandbox import is_sandboxed
+
+if is_sandboxed():
+    print("Running inside Erlang VM - subprocess operations blocked")
+```
+
+## Signal Handling Note
+
+Signal handling is also not supported in the Erlang event loop. The `ErlangEventLoop` raises `NotImplementedError` for `add_signal_handler()` and `remove_signal_handler()`. Signal handling should be done at the Erlang VM level using Erlang's signal handling facilities.
+
+## See Also
+
+- [Getting Started](getting-started.md) - Basic usage guide
+- [Asyncio](asyncio.md) - Erlang-native asyncio event loop
+- [Threading](threading.md) - Python threading support
diff --git a/examples/basic_example.erl b/examples/basic_example.erl
index 687f0d0..b3a95da 100644
--- a/examples/basic_example.erl
+++ b/examples/basic_example.erl
@@ -53,8 +53,8 @@ main(_) ->
     io:format("~n=== Async Calls ===~n~n"),
 
     %% Async call
-    Ref1 = py:call_async(math, factorial, [10]),
-    Ref2 = py:call_async(math, factorial, [20]),
+    Ref1 = py:cast(math, factorial, [10]),
+    Ref2 = py:cast(math, factorial, [20]),
 
     {ok, Fact10} = py:await(Ref1),
     {ok, Fact20} = py:await(Ref2),
diff --git a/examples/bench_resource_pool.erl b/examples/bench_resource_pool.erl
new file mode 100644
index 0000000..356e94e
--- /dev/null
+++ b/examples/bench_resource_pool.erl
@@ -0,0 +1,138 @@
+#!/usr/bin/env escript
+%% -*- erlang -*-
+%%! -pa _build/default/lib/erlang_python/ebin
+
+%%% @doc Benchmark script for py_resource_pool performance testing.
+%%%
+%%% Compares the new resource pool with the existing worker pool.
+%%%
+%%% Run with:
+%%%   rebar3 compile && escript examples/bench_resource_pool.erl
+
+-mode(compile).
+
+main(_Args) ->
+    io:format("~n=== py_resource_pool Benchmark ===~n~n"),
+
+    %% Start the application
+    {ok, _} = application:ensure_all_started(erlang_python),
+
+    %% Print system info
+    print_system_info(),
+
+    %% Benchmark the resource pool
+    io:format("~n--- Resource Pool Benchmarks ---~n~n"),
+    ok = py_resource_pool:start(),
+    Stats = py_resource_pool:stats(),
+    io:format("Pool stats: ~p~n~n", [Stats]),
+
+    %% Sequential calls
+    bench_resource_pool_sequential(1000),
+
+    %% Concurrent calls
+    bench_resource_pool_concurrent(10, 100),
+    bench_resource_pool_concurrent(50, 100),
+    bench_resource_pool_concurrent(100, 100),
+
+    %% Stop resource pool
+    ok = py_resource_pool:stop(),
+
+    %% Now benchmark the old worker pool for comparison
+    io:format("~n--- Old Worker Pool (py:call) Benchmarks ---~n~n"),
+
+    %% Sequential calls
+    bench_old_pool_sequential(1000),
+
+    %% Concurrent calls
+    bench_old_pool_concurrent(10, 100),
+    bench_old_pool_concurrent(50, 100),
+    bench_old_pool_concurrent(100, 100),
+
+    io:format("~n=== Benchmark Complete ===~n"),
+    halt(0).
+
+print_system_info() ->
+    io:format("System Information:~n"),
+    io:format("  Erlang/OTP: ~s~n", [erlang:system_info(otp_release)]),
+    io:format("  Schedulers: ~p~n", [erlang:system_info(schedulers)]),
+    io:format("  Python: "),
+    {ok, PyVer} = py:version(),
+    io:format("~s~n", [PyVer]),
+    io:format("  Execution Mode: ~p~n", [py:execution_mode()]),
+    io:format("~n").
+
+%% Resource pool benchmarks
+bench_resource_pool_sequential(N) ->
+    io:format("Resource Pool: Sequential calls (math.sqrt)~n"),
+    io:format("  Iterations: ~p~n", [N]),
+
+    {Time, _} = timer:tc(fun() ->
+        lists:foreach(fun(I) ->
+            {ok, _} = py_resource_pool:call(math, sqrt, [I])
+        end, lists:seq(1, N))
+    end),
+
+    print_results(Time, N).
+
+bench_resource_pool_concurrent(NumProcs, CallsPerProc) ->
+    TotalCalls = NumProcs * CallsPerProc,
+    io:format("Resource Pool: Concurrent calls~n"),
+    io:format("  Processes: ~p, Calls/process: ~p, Total: ~p~n",
+              [NumProcs, CallsPerProc, TotalCalls]),
+
+    Parent = self(),
+
+    {Time, _} = timer:tc(fun() ->
+        Pids = [spawn_link(fun() ->
+            lists:foreach(fun(I) ->
+                {ok, _} = py_resource_pool:call(math, sqrt, [I])
+            end, lists:seq(1, CallsPerProc)),
+            Parent ! {done, self()}
+        end) || _ <- lists:seq(1, NumProcs)],
+
+        [receive {done, Pid} -> ok end || Pid <- Pids]
+    end),
+
+    print_results(Time, TotalCalls).
+
+%% Old pool benchmarks (py:call)
+bench_old_pool_sequential(N) ->
+    io:format("Old Pool (py:call): Sequential calls (math.sqrt)~n"),
+    io:format("  Iterations: ~p~n", [N]),
+
+    {Time, _} = timer:tc(fun() ->
+        lists:foreach(fun(I) ->
+            {ok, _} = py:call(math, sqrt, [I])
+        end, lists:seq(1, N))
+    end),
+
+    print_results(Time, N).
+
+bench_old_pool_concurrent(NumProcs, CallsPerProc) ->
+    TotalCalls = NumProcs * CallsPerProc,
+    io:format("Old Pool (py:call): Concurrent calls~n"),
+    io:format("  Processes: ~p, Calls/process: ~p, Total: ~p~n",
+              [NumProcs, CallsPerProc, TotalCalls]),
+
+    Parent = self(),
+
+    {Time, _} = timer:tc(fun() ->
+        Pids = [spawn_link(fun() ->
+            lists:foreach(fun(I) ->
+                {ok, _} = py:call(math, sqrt, [I])
+            end, lists:seq(1, CallsPerProc)),
+            Parent ! {done, self()}
+        end) || _ <- lists:seq(1, NumProcs)],
+
+        [receive {done, Pid} -> ok end || Pid <- Pids]
+    end),
+
+    print_results(Time, TotalCalls).
+
+print_results(TimeUs, N) ->
+    TimeMs = TimeUs / 1000,
+    CallsPerSec = N / (TimeMs / 1000),
+    PerCall = TimeMs / N,
+    io:format("  Total time: ~.2f ms~n", [TimeMs]),
+    io:format("  Per call: ~.3f ms~n", [PerCall]),
+    io:format("  Throughput: ~p calls/sec~n~n", [round(CallsPerSec)]).
diff --git a/examples/benchmark_compare.erl b/examples/benchmark_compare.erl
new file mode 100644
index 0000000..3065454
--- /dev/null
+++ b/examples/benchmark_compare.erl
@@ -0,0 +1,282 @@
+#!/usr/bin/env escript
+%% -*- erlang -*-
+%%! -pa _build/default/lib/erlang_python/ebin
+
+%%% @doc Benchmark for comparing performance between erlang_python versions.
+
+-mode(compile).
+
+main(_Args) ->
+    io:format("~n"),
+    io:format("╔══════════════════════════════════════════════════════════════╗~n"),
+    io:format("║          erlang_python Version Comparison Benchmark          ║~n"),
+    io:format("╚══════════════════════════════════════════════════════════════╝~n~n"),
+
+    {ok, _} = application:ensure_all_started(erlang_python),
+
+    print_system_info(),
+
+    io:format("Running benchmarks...~n"),
+    io:format("════════════════════════════════════════════════════════════════~n~n"),
+
+    %% Run all benchmarks and collect results
+    Results = [
+        bench_sync_call(),
+        bench_sync_eval(),
+        bench_cast_single(),
+        bench_cast_multiple(),
+        bench_cast_parallel(),
+        bench_concurrent_sync(),
+        bench_concurrent_cast()
+    ],
+
+    %% Print summary
+    print_summary(Results),
+
+    halt(0).
+
+print_system_info() ->
+    io:format("System Information~n"),
+    io:format("──────────────────~n"),
+    io:format("  Erlang/OTP:     ~s~n", [erlang:system_info(otp_release)]),
+    io:format("  Schedulers:     ~p~n", [erlang:system_info(schedulers)]),
+    {ok, PyVer} = py:version(),
+    io:format("  Python:         ~s~n", [PyVer]),
+    io:format("  Execution Mode: ~p~n", [py:execution_mode()]),
+    io:format("  Max Concurrent: ~p~n", [py_semaphore:max_concurrent()]),
+    io:format("~n").
+
+%% ============================================================================
+%% Benchmark: Synchronous Calls
+%% ============================================================================
+
+bench_sync_call() ->
+    Name = "Sync py:call (math.sqrt)",
+    N = 1000,
+
+    io:format("▶ ~s~n", [Name]),
+    io:format("  Iterations: ~p~n", [N]),
+
+    {Time, _} = timer:tc(fun() ->
+        lists:foreach(fun(I) ->
+            {ok, _} = py:call(math, sqrt, [I])
+        end, lists:seq(1, N))
+    end),
+
+    TimeMs = Time / 1000,
+    PerCall = TimeMs / N,
+    Throughput = round(N / (TimeMs / 1000)),
+
+    io:format("  Total:      ~.2f ms~n", [TimeMs]),
+    io:format("  Per call:   ~.3f ms~n", [PerCall]),
+    io:format("  Throughput: ~p calls/sec~n~n", [Throughput]),
+
+    {Name, PerCall, Throughput}.
+
+bench_sync_eval() ->
+    Name = "Sync py:eval (arithmetic)",
+    N = 1000,
+
+    io:format("▶ ~s~n", [Name]),
+    io:format("  Iterations: ~p~n", [N]),
+
+    {Time, _} = timer:tc(fun() ->
+        lists:foreach(fun(I) ->
+            {ok, _} = py:eval(<<"x * x + y">>, #{x => I, y => I})
+        end, lists:seq(1, N))
+    end),
+
+    TimeMs = Time / 1000,
+    PerCall = TimeMs / N,
+    Throughput = round(N / (TimeMs / 1000)),
+
+    io:format("  Total:      ~.2f ms~n", [TimeMs]),
+    io:format("  Per eval:   ~.3f ms~n", [PerCall]),
+    io:format("  Throughput: ~p evals/sec~n~n", [Throughput]),
+
+    {Name, PerCall, Throughput}.
+
+%% ============================================================================
+%% Benchmark: Cast (non-blocking) Calls
+%% ============================================================================
+
+bench_cast_single() ->
+    Name = "Cast py:cast single",
+    N = 1000,
+
+    io:format("▶ ~s~n", [Name]),
+    io:format("  Iterations: ~p~n", [N]),
+
+    {Time, _} = timer:tc(fun() ->
+        lists:foreach(fun(I) ->
+            Ref = py:cast(math, sqrt, [I]),
+            {ok, _} = py:await(Ref, 5000)
+        end, lists:seq(1, N))
+    end),
+
+    TimeMs = Time / 1000,
+    PerCall = TimeMs / N,
+    Throughput = round(N / (TimeMs / 1000)),
+
+    io:format("  Total:      ~.2f ms~n", [TimeMs]),
+    io:format("  Per call:   ~.3f ms~n", [PerCall]),
+    io:format("  Throughput: ~p calls/sec~n~n", [Throughput]),
+
+    {Name, PerCall, Throughput}.
+
+bench_cast_multiple() ->
+    Name = "Cast py:cast batch (10 calls)",
+    N = 100,
+
+    io:format("▶ ~s~n", [Name]),
+    io:format("  Batches: ~p (10 cast calls each)~n", [N]),
+
+    {Time, _} = timer:tc(fun() ->
+        lists:foreach(fun(Batch) ->
+            %% Start 10 cast calls
+            Refs = [py:cast(math, sqrt, [Batch * 10 + I])
+                    || I <- lists:seq(1, 10)],
+            %% Await all
+            [{ok, _} = py:await(Ref, 5000) || Ref <- Refs]
+        end, lists:seq(1, N))
+    end),
+
+    TotalCalls = N * 10,
+    TimeMs = Time / 1000,
+    PerBatch = TimeMs / N,
+    Throughput = round(TotalCalls / (TimeMs / 1000)),
+
+    io:format("  Total:      ~.2f ms~n", [TimeMs]),
+    io:format("  Per batch:  ~.3f ms~n", [PerBatch]),
+    io:format("  Throughput: ~p calls/sec~n~n", [Throughput]),
+
+    {Name, PerBatch, Throughput}.
+
+bench_cast_parallel() ->
+    Name = "Cast py:cast parallel (10 concurrent)",
+    N = 100,
+
+    io:format("▶ ~s~n", [Name]),
+    io:format("  Batches: ~p (10 concurrent calls each)~n", [N]),
+
+    {Time, _} = timer:tc(fun() ->
+        lists:foreach(fun(Batch) ->
+            %% Start 10 cast calls in parallel
+            Refs = [py:cast(math, factorial, [20 + (Batch rem 10)])
+                    || _ <- lists:seq(1, 10)],
+            %% Await all results
+            [py:await(Ref, 5000) || Ref <- Refs]
+        end, lists:seq(1, N))
+    end),
+
+    TotalCalls = N * 10,
+    TimeMs = Time / 1000,
+    PerBatch = TimeMs / N,
+    Throughput = round(TotalCalls / (TimeMs / 1000)),
+
+    io:format("  Total:      ~.2f ms~n", [TimeMs]),
+    io:format("  Per batch:  ~.3f ms~n", [PerBatch]),
+    io:format("  Throughput: ~p calls/sec~n~n", [Throughput]),
+
+    {Name, PerBatch, Throughput}.
+
+%% ============================================================================
+%% Benchmark: Concurrent Operations
+%% ============================================================================
+
+bench_concurrent_sync() ->
+    Name = "Concurrent sync (50 procs x 20 calls)",
+    NumProcs = 50,
+    CallsPerProc = 20,
+    TotalCalls = NumProcs * CallsPerProc,
+
+    io:format("▶ ~s~n", [Name]),
+    io:format("  Processes: ~p, Calls/proc: ~p, Total: ~p~n",
+              [NumProcs, CallsPerProc, TotalCalls]),
+
+    Parent = self(),
+
+    {Time, _} = timer:tc(fun() ->
+        Pids = [spawn_link(fun() ->
+            lists:foreach(fun(I) ->
+                {ok, _} = py:call(math, sqrt, [I])
+            end, lists:seq(1, CallsPerProc)),
+            Parent ! {done, self()}
+        end) || _ <- lists:seq(1, NumProcs)],
+
+        [receive {done, Pid} -> ok end || Pid <- Pids]
+    end),
+
+    TimeMs = Time / 1000,
+    Throughput = round(TotalCalls / (TimeMs / 1000)),
+
+    io:format("  Total:      ~.2f ms~n", [TimeMs]),
+    io:format("  Throughput: ~p calls/sec~n~n", [Throughput]),
+
+    {Name, TimeMs, Throughput}.
+
+bench_concurrent_cast() ->
+    Name = "Concurrent cast (50 procs x 5 casts)",
+    NumProcs = 50,
+    CallsPerProc = 5,
+    TotalCalls = NumProcs * CallsPerProc,
+
+    io:format("▶ ~s~n", [Name]),
+    io:format("  Processes: ~p, Casts/proc: ~p, Total: ~p~n",
+              [NumProcs, CallsPerProc, TotalCalls]),
+
+    Parent = self(),
+
+    {Time, _} = timer:tc(fun() ->
+        Pids = [spawn_link(fun() ->
+            lists:foreach(fun(I) ->
+                Ref = py:cast(math, factorial, [20 + I]),
+                {ok, _} = py:await(Ref, 5000)
+            end, lists:seq(1, CallsPerProc)),
+            Parent ! {done, self()}
+        end) || _ <- lists:seq(1, NumProcs)],
+
+        [receive {done, Pid} -> ok end || Pid <- Pids]
+    end),
+
+    TimeMs = Time / 1000,
+    Throughput = round(TotalCalls / (TimeMs / 1000)),
+
+    io:format("  Total:      ~.2f ms~n", [TimeMs]),
+    io:format("  Throughput: ~p calls/sec~n~n", [Throughput]),
+
+    {Name, TimeMs, Throughput}.
+
+%% ============================================================================
+%% Summary
+%% ============================================================================
+
+print_summary(Results) ->
+    io:format("════════════════════════════════════════════════════════════════~n"),
+    io:format("SUMMARY~n"),
+    io:format("════════════════════════════════════════════════════════════════~n~n"),
+
+    io:format("┌────────────────────────────────────────┬───────────┬───────────┐~n"),
+    io:format("│ Benchmark                              │ Latency   │ Thru/sec  │~n"),
+    io:format("├────────────────────────────────────────┼───────────┼───────────┤~n"),
+
+    lists:foreach(fun({Name, Latency, Throughput}) ->
+        LatStr = if
+            Latency < 0 -> "N/A";
+            Latency < 1000 -> io_lib:format("~.3f ms", [Latency]);
+            true -> io_lib:format("~.1f ms", [Latency])
+        end,
+        ThrStr = if
+            Throughput =:= 0 -> "N/A";
+            true -> integer_to_list(Throughput)
+        end,
+        io:format("│ ~-38s │ ~-9s │ ~-9s │~n",
+                  [string:slice(Name, 0, 38), LatStr, ThrStr])
+    end, Results),
+
+    io:format("└────────────────────────────────────────┴───────────┴───────────┘~n~n"),
+
+    io:format("Key metrics to compare between versions:~n"),
+    io:format("  * Sync call performance should be similar~n"),
+    io:format("  * Cast (non-blocking) call overhead~n"),
+    io:format("  * Concurrent throughput with multiple processes~n~n").
diff --git a/examples/benchmark_event_loop.py b/examples/benchmark_event_loop.py
index 81262d0..03179a4 100644
--- a/examples/benchmark_event_loop.py
+++ b/examples/benchmark_event_loop.py
@@ -459,16 +459,19 @@ def main():
 
     # Test with Erlang event loop
     try:
-        # Try to import the Erlang event loop
+        # Try to import from erlang module (primary API)
         try:
-            from erlang_loop import ErlangEventLoop
+            from erlang import ErlangEventLoop
         except ImportError:
-            # Try loading from priv directory
-            import os
-            priv_path = os.path.join(os.path.dirname(__file__), '..', 'priv')
-            if priv_path not in sys.path:
-                sys.path.insert(0, priv_path)
-            from erlang_loop import ErlangEventLoop
+            # Fallback to _erlang_impl if erlang module not extended
+            try:
+                from _erlang_impl import ErlangEventLoop
+            except ImportError:
+                import os
+                priv_path = os.path.join(os.path.dirname(__file__), '..', 'priv')
+                if priv_path not in sys.path:
+                    sys.path.insert(0, priv_path)
+                from _erlang_impl import ErlangEventLoop
 
         erlang_loop = ErlangEventLoop()
         erlang_results = run_benchmark_suite("Erlang Event Loop", erlang_loop)
diff --git a/examples/reactor_echo.erl b/examples/reactor_echo.erl
new file mode 100644
index 0000000..bd45d25
--- /dev/null
+++ b/examples/reactor_echo.erl
@@ -0,0 +1,111 @@
+#!/usr/bin/env escript
+%%% @doc Simple TCP echo server using erlang.reactor.
+%%%
+%%% This example demonstrates the Erlang-as-Reactor architecture where:
+%%% - Erlang handles TCP accept and I/O scheduling via enif_select
+%%% - Python handles protocol logic (echo in this case)
+%%%
+%%% Prerequisites: rebar3 compile
+%%% Run from project root: escript examples/reactor_echo.erl
+%%%
+%%% Test with: echo "hello" | nc localhost 9999
+
+-mode(compile).
+
+main(_) ->
+    %% Add the compiled beam files to the code path
+    ScriptDir = filename:dirname(escript:script_name()),
+    ProjectRoot = filename:dirname(ScriptDir),
+    EbinDir = filename:join([ProjectRoot, "_build", "default", "lib", "erlang_python", "ebin"]),
+    true = code:add_pathz(EbinDir),
+
+    %% Start the application
+    {ok, _} = application:ensure_all_started(erlang_python),
+
+    io:format("~n=== Erlang Reactor Echo Server ===~n~n"),
+
+    %% Start a reactor context
+    {ok, Ctx} = py_reactor_context:start_link(1, auto),
+
+    %% Set up Python echo protocol
+    ok = py:exec(Ctx, <<"
+import erlang.reactor as reactor
+
+class EchoProtocol(reactor.Protocol):
+    '''Simple echo protocol - sends back whatever it receives.'''
+
+    def data_received(self, data):
+        # Echo data back
+        self.write_buffer.extend(data)
+        return 'write_pending'
+
+    def write_ready(self):
+        if not self.write_buffer:
+            return 'read_pending'
+        written = self.write(bytes(self.write_buffer))
+        del self.write_buffer[:written]
+        if self.write_buffer:
+            return 'continue'
+        return 'read_pending'
+
+    def connection_lost(self):
+        print(f'Connection closed: fd={self.fd}')
+
+reactor.set_protocol_factory(EchoProtocol)
+print('Echo protocol registered')
+">>),
+
+    %% Listen on port 9999
+    Port = 9999,
+    {ok, LSock} = gen_tcp:listen(Port, [
+        binary,
+        {active, false},
+        {reuseaddr, true},
+        {nodelay, true}
+    ]),
+
+    io:format("Listening on port ~p~n", [Port]),
+    io:format("Test with: echo 'hello' | nc localhost ~p~n~n", [Port]),
+
+    %% Accept loop (in main process for simplicity)
+    accept_loop(LSock, Ctx).
+
+accept_loop(LSock, Ctx) ->
+    case gen_tcp:accept(LSock, 5000) of
+        {ok, Sock} ->
+            %% Get the fd
+            {ok, Fd} = prim_inet:getfd(Sock),
+
+            %% Get client info
+            ClientInfo = case inet:peername(Sock) of
+                {ok, {Addr, Port}} ->
+                    #{addr => format_addr(Addr), port => Port};
+                _ ->
+                    #{addr => <<"unknown">>, port => 0}
+            end,
+
+            io:format("Accepted connection from ~s:~p (fd=~p)~n",
+                      [maps:get(addr, ClientInfo), maps:get(port, ClientInfo), Fd]),
+
+            %% Hand off to reactor context
+            Ctx ! {fd_handoff, Fd, ClientInfo},
+
+            accept_loop(LSock, Ctx);
+
+        {error, timeout} ->
+            %% No connection, keep waiting
+            accept_loop(LSock, Ctx);
+
+        {error, closed} ->
+            io:format("Listen socket closed~n"),
+            ok;
+
+        {error, Reason} ->
+            io:format("Accept error: ~p~n", [Reason]),
+            accept_loop(LSock, Ctx)
+    end.
+
+format_addr({A, B, C, D}) ->
+    iolist_to_binary(io_lib:format("~B.~B.~B.~B", [A, B, C, D]));
+format_addr(_) ->
+    <<"unknown">>.
diff --git a/priv/_erlang_impl/__init__.py b/priv/_erlang_impl/__init__.py
new file mode 100644
index 0000000..77a2c80
--- /dev/null
+++ b/priv/_erlang_impl/__init__.py
@@ -0,0 +1,202 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Erlang-backed asyncio event loop - uvloop-compatible API.
+
+This module provides a drop-in replacement for uvloop, using Erlang's
+BEAM VM scheduler for I/O multiplexing via enif_select.
+
+Usage patterns (matching uvloop exactly):
+
+    # Pattern 1: Recommended (Python 3.11+)
+    import erlang
+    erlang.run(main())
+
+    # Pattern 2: With asyncio.Runner (Python 3.11+)
+    import asyncio
+    import erlang
+    with asyncio.Runner(loop_factory=erlang.new_event_loop) as runner:
+        runner.run(main())
+
+    # Pattern 3: Legacy (deprecated in 3.12+)
+    import asyncio
+    import erlang
+    erlang.install()
+    asyncio.run(main())
+
+    # Pattern 4: Manual
+    import asyncio
+    import erlang
+    loop = erlang.new_event_loop()
+    asyncio.set_event_loop(loop)
+    loop.run_until_complete(main())
+"""
+
+import sys
+import asyncio
+import warnings
+
+# Install sandbox when running inside Erlang VM
+# This must happen before any other imports to block subprocess/fork
+try:
+    import py_event_loop  # Only available when running in Erlang NIF
+    from ._sandbox import install_sandbox
+    install_sandbox()
+except ImportError:
+    pass  # Not running inside Erlang VM
+
+from ._loop import ErlangEventLoop
+from ._policy import ErlangEventLoopPolicy
+from ._mode import detect_mode, ExecutionMode
+from . import _reactor as reactor
+
+__all__ = [
+    'run',
+    'new_event_loop',
+    'get_event_loop_policy',
+    'install',
+    'EventLoopPolicy',
+    'ErlangEventLoopPolicy',
+    'ErlangEventLoop',
+    'detect_mode',
+    'ExecutionMode',
+    'reactor',
+]
+
+# Re-export for uvloop API compatibility
+EventLoopPolicy = ErlangEventLoopPolicy
+
+
+def get_event_loop_policy() -> ErlangEventLoopPolicy:
+    """Get an Erlang event loop policy instance.
+
+    Returns a policy that uses ErlangEventLoop for event loops.
+    This is used by Erlang code to set the default asyncio policy.
+
+    Returns:
+        ErlangEventLoopPolicy: A new policy instance.
+    """
+    return ErlangEventLoopPolicy()
+
+
+def new_event_loop() -> ErlangEventLoop:
+    """Create a new Erlang-backed event loop.
+
+    Returns:
+        ErlangEventLoop: A new event loop instance backed by Erlang's
+            scheduler via enif_select. Each loop has its own isolated
+            capsule for proper timer and FD event routing.
+    """
+    return ErlangEventLoop()
+
+
+def run(main, *, debug=None, **run_kwargs):
+    """Run a coroutine using Erlang event loop.
+
+    The preferred way to run async code with Erlang backend.
+    Equivalent to uvloop.run().
+
+    Args:
+        main: The coroutine to run.
+        debug: Enable debug mode if True.
+        **run_kwargs: Additional arguments passed to asyncio.run() or Runner.
+
+    Returns:
+        The return value of the coroutine.
+
+    Example:
+        import erlang
+
+        async def main():
+            await asyncio.sleep(1)
+            return "done"
+
+        result = erlang.run(main())
+    """
+    if sys.version_info >= (3, 12):
+        # Python 3.12+ supports loop_factory in asyncio.run()
+        return asyncio.run(
+            main,
+            loop_factory=new_event_loop,
+            debug=debug,
+            **run_kwargs
+        )
+    elif sys.version_info >= (3, 11):
+        # Python 3.11 has asyncio.Runner with loop_factory
+        with asyncio.Runner(loop_factory=new_event_loop, debug=debug) as runner:
+            return runner.run(main)
+    else:
+        # Python 3.10 and earlier: manual loop management
+        loop = new_event_loop()
+        if debug is not None:
+            loop.set_debug(debug)
+        try:
+            asyncio.set_event_loop(loop)
+            return loop.run_until_complete(main)
+        finally:
+            try:
+                _cancel_all_tasks(loop)
+                loop.run_until_complete(loop.shutdown_asyncgens())
+                if hasattr(loop, 'shutdown_default_executor'):
+                    loop.run_until_complete(loop.shutdown_default_executor())
+            finally:
+                asyncio.set_event_loop(None)
+                loop.close()
+
+
+def install():
+    """Install ErlangEventLoopPolicy as the default event loop policy.
+
+    This function is deprecated in Python 3.12+. Use run() instead.
+
+    Example (legacy pattern):
+        import asyncio
+        import erlang
+
+        erlang.install()
+        asyncio.run(main())  # Uses Erlang event loop
+    """
+    if sys.version_info >= (3, 12):
+        warnings.warn(
+            "erlang.install() is deprecated in Python 3.12+. "
+            "Use erlang.run(main()) instead.",
+            DeprecationWarning,
+            stacklevel=2
+        )
+    asyncio.set_event_loop_policy(ErlangEventLoopPolicy())
+
+
+def _cancel_all_tasks(loop):
+    """Cancel all tasks in the loop (helper for run())."""
+    to_cancel = asyncio.all_tasks(loop)
+    if not to_cancel:
+        return
+
+    for task in to_cancel:
+        task.cancel()
+
+    loop.run_until_complete(
+        asyncio.gather(*to_cancel, return_exceptions=True)
+    )
+
+    for task in to_cancel:
+        if task.cancelled():
+            continue
+        if task.exception() is not None:
+            loop.call_exception_handler({
+                'message': 'unhandled exception during erlang.run() shutdown',
+                'exception': task.exception(),
+                'task': task,
+            })
diff --git a/priv/erlang_loop.py b/priv/_erlang_impl/_loop.py
similarity index 52%
rename from priv/erlang_loop.py
rename to priv/_erlang_impl/_loop.py
index 205cced..dcc72d2 100644
--- a/priv/erlang_loop.py
+++ b/priv/_erlang_impl/_loop.py
@@ -15,49 +15,39 @@
 """
 Erlang-native asyncio event loop implementation.
 
-This module provides an asyncio event loop backed by Erlang's scheduler
-using enif_select for I/O multiplexing. This replaces Python's polling-based
-event loop with true event-driven callbacks integrated into the BEAM VM.
-
-Usage:
-    from erlang_loop import ErlangEventLoop
-    import asyncio
-
-    loop = ErlangEventLoop(nif_module)
-    asyncio.set_event_loop(loop)
-
-    async def main():
-        await asyncio.sleep(1.0)  # Uses erlang:send_after
-
-    asyncio.run(main())
+This module provides the core ErlangEventLoop class that implements
+asyncio.AbstractEventLoop using Erlang's scheduler via enif_select
+for I/O multiplexing.
+
+Architecture:
+- Single event loop per interpreter (no multi-loop complexity)
+- Uses enif_select for fd monitoring
+- Uses erlang:send_after for timers
+- Full GIL release during waits
 """
 
 import asyncio
-import time
-import threading
-import sys
+import errno
+import heapq
+import os
 import socket
 import ssl
-import weakref
-import heapq
-from asyncio import events, futures, tasks, protocols, transports
-from asyncio import constants, coroutines, base_events
+import sys
+import threading
+import time
+from asyncio import events, futures, tasks, transports
 from collections import deque
+from typing import Any, Callable, Optional, Tuple
+
+from ._mode import detect_mode, ExecutionMode
+
+__all__ = ['ErlangEventLoop', '_run_and_send']
 
 # Event type constants (match C enum values for fast integer comparison)
 EVENT_TYPE_READ = 1
 EVENT_TYPE_WRITE = 2
 EVENT_TYPE_TIMER = 3
 
-# Try to import selector_events for transport classes
-try:
-    from asyncio import selector_events
-    _SelectorSocketTransport = selector_events._SelectorSocketTransport
-    _SelectorDatagramTransport = selector_events._SelectorDatagramTransport
-except ImportError:
-    _SelectorSocketTransport = None
-    _SelectorDatagramTransport = None
-
 
 class ErlangEventLoop(asyncio.AbstractEventLoop):
     """asyncio event loop backed by Erlang's scheduler.
@@ -69,6 +59,7 @@ class ErlangEventLoop(asyncio.AbstractEventLoop):
     - Zero CPU usage when idle
     - Full GIL release during waits
     - Native Erlang scheduler integration
+    - Subinterpreter and free-threaded Python support
 
     The loop works by:
     1. add_reader/add_writer register fds with enif_select
@@ -79,68 +70,67 @@ class ErlangEventLoop(asyncio.AbstractEventLoop):
 
     # Use __slots__ for faster attribute access and reduced memory
     __slots__ = (
-        '_pel', '_loop_handle',  # Native loop handle (capsule) for per-loop isolation
+        '_pel', '_loop_capsule',
         '_readers', '_writers', '_readers_by_cid', '_writers_by_cid',
+        '_fd_resources',  # fd -> fd_key (shared fd_resource_t per fd)
         '_timers', '_timer_refs', '_timer_heap', '_handle_to_callback_id',
-        '_ready', '_callback_id',
+        '_ready',
         '_handle_pool', '_handle_pool_max', '_running', '_stopping', '_closed',
         '_thread_id', '_clock_resolution', '_exception_handler', '_current_handle',
         '_debug', '_task_factory', '_default_executor',
-        # Cached method references for hot paths
         '_ready_append', '_ready_popleft',
+        '_signal_handlers',
+        '_execution_mode',
+        '_callback_id',
     )
 
-    def __init__(self, isolated=False):
+    def __init__(self):
         """Initialize the Erlang event loop.
 
         The event loop is backed by Erlang's scheduler via the py_event_loop
-        C module. This module provides direct access to the event loop
-        without going through Erlang callbacks.
-
-        Args:
-            isolated: If True, create an isolated event loop with its own
-                pending queue. Useful for multi-threaded applications where
-                each thread needs its own event loop. If False (default),
-                use the shared global loop managed by Erlang.
+        C module. This provides direct access to the event loop without
+        going through Erlang callbacks.
+
+        Each loop instance has its own isolated capsule for proper timer
+        and FD event routing.
         """
+        # Detect execution mode for proper behavior
+        self._execution_mode = detect_mode()
+
         try:
             import py_event_loop as pel
             self._pel = pel
 
-            if isolated:
-                # Create a new isolated loop handle
-                self._loop_handle = pel._loop_new()
-            else:
-                # Use shared global loop - check it's initialized
-                if not pel._is_initialized():
-                    raise RuntimeError("Erlang event loop not initialized. "
-                                     "Make sure erlang_python application is started.")
-                self._loop_handle = None
+            # Check if initialized
+            if not pel._is_initialized():
+                raise RuntimeError(
+                    "Erlang event loop not initialized. "
+                    "Make sure erlang_python application is started."
+                )
         except ImportError:
             # Fallback for testing without actual NIF
             self._pel = _MockNifModule()
-            self._loop_handle = None
+
+        # Create isolated loop capsule
+        self._loop_capsule = self._pel._loop_new()
 
         # Callback management
-        self._readers = {}  # fd -> (callback, args, callback_id, fd_key)
-        self._writers = {}  # fd -> (callback, args, callback_id, fd_key)
+        self._readers = {}  # fd -> (callback, args, callback_id)
+        self._writers = {}  # fd -> (callback, args, callback_id)
         self._readers_by_cid = {}  # callback_id -> fd (reverse map for O(1) lookup)
         self._writers_by_cid = {}  # callback_id -> fd (reverse map for O(1) lookup)
+        self._fd_resources = {}  # fd -> fd_key (shared fd_resource_t per fd)
         self._timers = {}   # callback_id -> handle
         self._timer_refs = {}  # callback_id -> timer_ref (for cancellation)
-        self._timer_heap = []  # min-heap of (when, callback_id) for O(1) minimum lookup
-        self._handle_to_callback_id = {}  # handle -> callback_id (reverse map for O(1) cancellation)
+        self._timer_heap = []  # min-heap of (when, callback_id)
+        self._handle_to_callback_id = {}  # handle -> callback_id
         self._ready = deque()  # Callbacks ready to run
-        self._callback_id = 0
 
-        # Cache deque methods for hot path (avoids attribute lookup)
+        # Cache deque methods for hot path
         self._ready_append = self._ready.append
         self._ready_popleft = self._ready.popleft
 
         # Handle object pool for reduced allocations
-        # Trade-off: smaller pool = less GC (better high_concurrency)
-        #            larger pool = more reuse (better large_response)
-        # 150 balances both workloads
         self._handle_pool = []
         self._handle_pool_max = 150
 
@@ -161,11 +151,17 @@ def __init__(self, isolated=False):
         # Task factory
         self._task_factory = None
 
-        # SSL context
+        # Executor
         self._default_executor = None
 
+        # Signal handlers
+        self._signal_handlers = {}
+
+        # Callback ID counter
+        self._callback_id = 0
+
     def _next_id(self):
-        """Generate a unique callback ID."""
+        """Generate a unique callback ID for this loop."""
         self._callback_id += 1
         return self._callback_id
 
@@ -181,9 +177,9 @@ def run_forever(self):
 
         self._thread_id = threading.get_ident()
         self._running = True
-        self._stopping = False
+        # Don't reset _stopping here - honor stop() called before run_forever()
 
-        # Register as the running loop so asyncio.get_running_loop() works
+        # Register as the running loop
         old_running_loop = events._get_running_loop()
         events._set_running_loop(self)
         try:
@@ -207,8 +203,6 @@ def run_until_complete(self, future):
         if new_task:
             future._log_destroy_pending = False
 
-        # Use a single callback reference to ensure proper removal
-        # (two different lambdas would be different objects)
         def _done_callback(f):
             self.stop()
 
@@ -231,9 +225,8 @@ def _done_callback(f):
     def stop(self):
         """Stop the event loop."""
         self._stopping = True
-        # Wake up the event loop if it's waiting
         try:
-            self._pel._wakeup()
+            self._pel._wakeup_for(self._loop_capsule)
         except Exception:
             pass
 
@@ -260,10 +253,7 @@ def close(self):
             timer_ref = self._timer_refs.get(callback_id)
             if timer_ref is not None:
                 try:
-                    if self._loop_handle is not None:
-                        self._pel._cancel_timer_for(self._loop_handle, timer_ref)
-                    else:
-                        self._pel._cancel_timer(timer_ref)
+                    self._pel._cancel_timer_for(self._loop_capsule, timer_ref)
                 except (AttributeError, RuntimeError):
                     pass
         self._timers.clear()
@@ -277,22 +267,30 @@ def close(self):
         for fd in list(self._writers.keys()):
             self.remove_writer(fd)
 
-        # Destroy the native loop handle if we have one
-        if self._loop_handle is not None:
-            try:
-                self._pel._loop_destroy(self._loop_handle)
-            except (AttributeError, RuntimeError):
-                pass
-            self._loop_handle = None
+        # Clear signal handlers
+        self._signal_handlers.clear()
 
-        # Shutdown default executor
+        # Shutdown default executor - wait=True ensures all executor callbacks
+        # complete before loop destruction to prevent use-after-free
         if self._default_executor is not None:
-            self._default_executor.shutdown(wait=False)
+            self._default_executor.shutdown(wait=True)
             self._default_executor = None
 
+        # Destroy loop capsule
+        try:
+            self._pel._loop_destroy(self._loop_capsule)
+        except Exception:
+            pass
+        self._loop_capsule = None
+
     async def shutdown_asyncgens(self):
-        """Shutdown all active asynchronous generators."""
-        # Not implemented - would need tracking of async generators
+        """Shutdown all active asynchronous generators.
+
+        Note: This is a no-op in ErlangEventLoop. Async generators are
+        managed by Python's garbage collector. For proper cleanup, ensure
+        async generators are explicitly closed or exhausted before loop shutdown.
+        """
+        # No-op: we don't track async generators to avoid global hook issues
         pass
 
     async def shutdown_default_executor(self, timeout=None):
@@ -309,18 +307,14 @@ def call_soon(self, callback, *args, context=None):
         """Schedule a callback to be called soon."""
         self._check_closed()
         handle = events.Handle(callback, args, self, context)
-        self._ready_append(handle)  # Use cached method
+        self._ready_append(handle)
         return handle
 
     def call_soon_threadsafe(self, callback, *args, context=None):
         """Thread-safe version of call_soon."""
         handle = self.call_soon(callback, *args, context=context)
-        # Wake up the event loop
         try:
-            if self._loop_handle is not None:
-                self._pel._wakeup_for(self._loop_handle)
-            else:
-                self._pel._wakeup()
+            self._pel._wakeup_for(self._loop_capsule)
         except Exception:
             pass
         return handle
@@ -328,33 +322,33 @@ def call_soon_threadsafe(self, callback, *args, context=None):
     def call_later(self, delay, callback, *args, context=None):
         """Schedule a callback to be called after delay seconds."""
         self._check_closed()
-        timer = self.call_at(self.time() + delay, callback, *args, context=context)
-        return timer
+        return self.call_at(self.time() + delay, callback, *args, context=context)
 
     def call_at(self, when, callback, *args, context=None):
         """Schedule a callback to be called at a specific time."""
         self._check_closed()
+
+        # For zero or past times, schedule immediately via call_soon
+        delay_ms = int((when - self.time()) * 1000)
+        if delay_ms <= 0:
+            return self.call_soon(callback, *args, context=context)
+
         callback_id = self._next_id()
 
         handle = events.TimerHandle(when, callback, args, self, context)
         self._timers[callback_id] = handle
-        self._handle_to_callback_id[id(handle)] = callback_id  # Reverse map for O(1) cancellation
+        self._handle_to_callback_id[id(handle)] = callback_id
 
-        # Push to timer heap for O(1) minimum lookup
+        # Push to timer heap
         heapq.heappush(self._timer_heap, (when, callback_id))
 
         # Schedule with Erlang's native timer system
-        delay_ms = max(0, int((when - self.time()) * 1000))
         try:
-            if self._loop_handle is not None:
-                timer_ref = self._pel._schedule_timer_for(self._loop_handle, delay_ms, callback_id)
-            else:
-                timer_ref = self._pel._schedule_timer(delay_ms, callback_id)
+            timer_ref = self._pel._schedule_timer_for(self._loop_capsule, delay_ms, callback_id)
             self._timer_refs[callback_id] = timer_ref
         except AttributeError:
-            pass  # Fallback: mock module doesn't have _schedule_timer
+            pass
         except RuntimeError as e:
-            # Fail fast on initialization errors - don't silently hang
             raise RuntimeError(f"Timer scheduling failed: {e}") from e
 
         return handle
@@ -375,7 +369,6 @@ def create_task(self, coro, *, name=None, context=None):
         """Schedule a coroutine to be executed."""
         self._check_closed()
         if self._task_factory is None:
-            # Python 3.9 doesn't support context parameter
             if sys.version_info >= (3, 11):
                 task = tasks.Task(coro, loop=self, name=name, context=context)
             elif sys.version_info >= (3, 8):
@@ -408,74 +401,108 @@ def get_task_factory(self):
     def add_reader(self, fd, callback, *args):
         """Register a reader callback for a file descriptor."""
         self._check_closed()
-        self.remove_reader(fd)
+
+        # Remove old callback (but not the fd_resource)
+        if fd in self._readers:
+            old_entry = self._readers[fd]
+            self._readers_by_cid.pop(old_entry[2], None)
 
         callback_id = self._next_id()
 
         try:
-            if self._loop_handle is not None:
-                fd_key = self._pel._add_reader_for(self._loop_handle, fd, callback_id)
+            if fd in self._fd_resources:
+                # Reuse existing fd_resource, just update read callback
+                fd_key = self._fd_resources[fd]
+                self._pel._update_fd_read(fd_key, callback_id)
             else:
-                fd_key = self._pel._add_reader(fd, callback_id)
-            self._readers[fd] = (callback, args, callback_id, fd_key)
-            self._readers_by_cid[callback_id] = fd  # Reverse map for O(1) dispatch
+                # Create new fd_resource
+                fd_key = self._pel._add_reader_for(self._loop_capsule, fd, callback_id)
+                self._fd_resources[fd] = fd_key
+
+            self._readers[fd] = (callback, args, callback_id)
+            self._readers_by_cid[callback_id] = fd
         except Exception as e:
             raise RuntimeError(f"Failed to add reader: {e}")
 
     def remove_reader(self, fd):
         """Unregister a reader callback for a file descriptor."""
-        if fd in self._readers:
-            entry = self._readers[fd]
-            callback_id = entry[2]
-            fd_key = entry[3] if len(entry) > 3 else None
-            del self._readers[fd]
-            self._readers_by_cid.pop(callback_id, None)  # Clean up reverse map
+        if fd not in self._readers:
+            return False
+
+        entry = self._readers.pop(fd)
+        callback_id = entry[2]
+        self._readers_by_cid.pop(callback_id, None)
+
+        if fd in self._fd_resources:
+            fd_key = self._fd_resources[fd]
+            # Clear read monitoring but keep resource if writer active
             try:
-                if fd_key is not None:
-                    if self._loop_handle is not None:
-                        self._pel._remove_reader_for(self._loop_handle, fd_key)
-                    else:
-                        self._pel._remove_reader(fd_key)
+                self._pel._clear_fd_read(fd_key)
             except Exception:
                 pass
-            return True
-        return False
+
+            # Only release resource if no writer either
+            if fd not in self._writers:
+                try:
+                    self._pel._release_fd_resource(fd_key)
+                except Exception:
+                    pass
+                del self._fd_resources[fd]
+
+        return True
 
     def add_writer(self, fd, callback, *args):
         """Register a writer callback for a file descriptor."""
         self._check_closed()
-        self.remove_writer(fd)
+
+        # Remove old callback (but not the fd_resource)
+        if fd in self._writers:
+            old_entry = self._writers[fd]
+            self._writers_by_cid.pop(old_entry[2], None)
 
         callback_id = self._next_id()
 
         try:
-            if self._loop_handle is not None:
-                fd_key = self._pel._add_writer_for(self._loop_handle, fd, callback_id)
+            if fd in self._fd_resources:
+                # Reuse existing fd_resource, just update write callback
+                fd_key = self._fd_resources[fd]
+                self._pel._update_fd_write(fd_key, callback_id)
             else:
-                fd_key = self._pel._add_writer(fd, callback_id)
-            self._writers[fd] = (callback, args, callback_id, fd_key)
-            self._writers_by_cid[callback_id] = fd  # Reverse map for O(1) dispatch
+                # Create new fd_resource
+                fd_key = self._pel._add_writer_for(self._loop_capsule, fd, callback_id)
+                self._fd_resources[fd] = fd_key
+
+            self._writers[fd] = (callback, args, callback_id)
+            self._writers_by_cid[callback_id] = fd
         except Exception as e:
             raise RuntimeError(f"Failed to add writer: {e}")
 
     def remove_writer(self, fd):
         """Unregister a writer callback for a file descriptor."""
-        if fd in self._writers:
-            entry = self._writers[fd]
-            callback_id = entry[2]
-            fd_key = entry[3] if len(entry) > 3 else None
-            del self._writers[fd]
-            self._writers_by_cid.pop(callback_id, None)  # Clean up reverse map
+        if fd not in self._writers:
+            return False
+
+        entry = self._writers.pop(fd)
+        callback_id = entry[2]
+        self._writers_by_cid.pop(callback_id, None)
+
+        if fd in self._fd_resources:
+            fd_key = self._fd_resources[fd]
+            # Clear write monitoring but keep resource if reader active
             try:
-                if fd_key is not None:
-                    if self._loop_handle is not None:
-                        self._pel._remove_writer_for(self._loop_handle, fd_key)
-                    else:
-                        self._pel._remove_writer(fd_key)
+                self._pel._clear_fd_write(fd_key)
             except Exception:
                 pass
-            return True
-        return False
+
+            # Only release resource if no reader either
+            if fd not in self._readers:
+                try:
+                    self._pel._release_fd_resource(fd_key)
+                except Exception:
+                    pass
+                del self._fd_resources[fd]
+
+        return True
 
     # ========================================================================
     # Socket operations
@@ -490,7 +517,7 @@ def _recv():
                 data = sock.recv(nbytes)
                 self.call_soon(fut.set_result, data)
             except (BlockingIOError, InterruptedError):
-                return  # Not ready, keep waiting
+                return
             except Exception as e:
                 self.call_soon(fut.set_exception, e)
             self.remove_reader(sock.fileno())
@@ -583,6 +610,57 @@ async def sock_sendfile(self, sock, file, offset=0, count=None, *, fallback=True
         """Send a file through a socket."""
         raise NotImplementedError("sock_sendfile not implemented")
 
+    # ========================================================================
+    # Unix socket operations
+    # ========================================================================
+
+    async def create_unix_connection(
+            self, protocol_factory, path=None, *,
+            ssl=None, sock=None, server_hostname=None,
+            ssl_handshake_timeout=None, ssl_shutdown_timeout=None):
+        """Create a Unix socket connection."""
+        from ._transport import ErlangSocketTransport
+
+        if sock is None:
+            sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+            sock.setblocking(False)
+            await self.sock_connect(sock, path)
+        else:
+            sock.setblocking(False)
+
+        protocol = protocol_factory()
+        transport = ErlangSocketTransport(self, sock, protocol)
+        await transport._start()
+
+        return transport, protocol
+
+    async def create_unix_server(
+            self, protocol_factory, path=None, *,
+            sock=None, backlog=100, ssl=None,
+            ssl_handshake_timeout=None, ssl_shutdown_timeout=None,
+            start_serving=True):
+        """Create a Unix socket server."""
+        from ._transport import ErlangServer
+
+        if sock is None:
+            sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+            sock.setblocking(False)
+            try:
+                os.unlink(path)
+            except OSError:
+                if os.path.exists(path):
+                    raise
+            sock.bind(path)
+            sock.listen(backlog)
+        else:
+            sock.setblocking(False)
+
+        server = ErlangServer(self, [sock], protocol_factory, ssl, backlog)
+        if start_serving:
+            server._start_serving()
+
+        return server
+
     # ========================================================================
     # High-level connection methods
     # ========================================================================
@@ -591,15 +669,14 @@ async def create_connection(
             self, protocol_factory, host=None, port=None,
             *, ssl=None, family=0, proto=0, flags=0, sock=None,
             local_addr=None, server_hostname=None,
-            ssl_handshake_timeout=None,
-            ssl_shutdown_timeout=None,
+            ssl_handshake_timeout=None, ssl_shutdown_timeout=None,
             happy_eyeballs_delay=None, interleave=None):
         """Create a streaming transport connection."""
+        from ._transport import ErlangSocketTransport
+
         if sock is not None:
-            # Use provided socket
             sock.setblocking(False)
         else:
-            # Resolve address and connect
             infos = await self.getaddrinfo(
                 host, port, family=family, type=socket.SOCK_STREAM,
                 proto=proto, flags=flags)
@@ -623,9 +700,8 @@ async def create_connection(
                     raise exceptions[0]
                 raise OSError(f'Multiple exceptions: {exceptions}')
 
-        # Create transport and protocol
         protocol = protocol_factory()
-        transport = _ErlangSocketTransport(self, sock, protocol)
+        transport = ErlangSocketTransport(self, sock, protocol)
 
         try:
             await transport._start()
@@ -640,10 +716,11 @@ async def create_server(
             *, family=socket.AF_UNSPEC, flags=socket.AI_PASSIVE,
             sock=None, backlog=100, ssl=None,
             reuse_address=None, reuse_port=None,
-            ssl_handshake_timeout=None,
-            ssl_shutdown_timeout=None,
+            ssl_handshake_timeout=None, ssl_shutdown_timeout=None,
             start_serving=True):
         """Create a TCP server."""
+        from ._transport import ErlangServer
+
         if sock is not None:
             sockets = [sock]
         else:
@@ -684,52 +761,30 @@ async def create_server(
                 sock.listen(backlog)
                 sockets.append(sock)
 
-        server = _ErlangServer(self, sockets, protocol_factory, ssl, backlog)
+        server = ErlangServer(self, sockets, protocol_factory, ssl, backlog)
         if start_serving:
             server._start_serving()
 
         return server
 
-    async def create_datagram_endpoint(self, protocol_factory,
-                                       local_addr=None, remote_addr=None, *,
-                                       family=0, proto=0, flags=0,
-                                       reuse_address=None, reuse_port=None,
-                                       allow_broadcast=None, sock=None):
-        """Create datagram (UDP) connection.
-
-        Args:
-            protocol_factory: Factory function returning a DatagramProtocol
-            local_addr: Local (host, port) tuple to bind to
-            remote_addr: Remote (host, port) tuple to connect to (optional)
-            family: Socket family (AF_INET or AF_INET6)
-            proto: Socket protocol number
-            flags: getaddrinfo flags
-            reuse_address: Allow reuse of local address (SO_REUSEADDR)
-            reuse_port: Allow reuse of local port (SO_REUSEPORT)
-            allow_broadcast: Allow sending to broadcast addresses (SO_BROADCAST)
-            sock: Pre-existing socket to use (overrides other options)
-
-        Returns:
-            (transport, protocol) tuple
-        """
+    async def create_datagram_endpoint(
+            self, protocol_factory,
+            local_addr=None, remote_addr=None, *,
+            family=0, proto=0, flags=0,
+            reuse_address=None, reuse_port=None,
+            allow_broadcast=None, sock=None):
+        """Create datagram (UDP) connection."""
+        from ._transport import ErlangDatagramTransport
+
         if sock is not None:
-            # Use provided socket
             sock.setblocking(False)
         else:
-            # Determine address family
             if family == 0:
-                if local_addr:
-                    family = socket.AF_INET
-                elif remote_addr:
-                    family = socket.AF_INET
-                else:
-                    family = socket.AF_INET
+                family = socket.AF_INET
 
-            # Create UDP socket
             sock = socket.socket(family, socket.SOCK_DGRAM, proto)
             sock.setblocking(False)
 
-            # Apply socket options
             if reuse_address:
                 sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
 
@@ -737,28 +792,20 @@ async def create_datagram_endpoint(self, protocol_factory,
                 try:
                     sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
                 except (AttributeError, OSError):
-                    # SO_REUSEPORT not available on all platforms
                     pass
 
             if allow_broadcast:
                 sock.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
 
-            # Bind to local address
             if local_addr:
                 sock.bind(local_addr)
 
-            # Connect to remote address (makes it a connected UDP socket)
             if remote_addr:
                 sock.connect(remote_addr)
 
-        # Create transport and protocol
         protocol = protocol_factory()
-        transport = _ErlangDatagramTransport(
-            self, sock, protocol, address=remote_addr
-        )
-
-        # Start the transport
-        transport._start()
+        transport = ErlangDatagramTransport(self, sock, protocol, address=remote_addr)
+        await transport._start()
 
         return transport, protocol
 
@@ -767,12 +814,66 @@ async def create_datagram_endpoint(self, protocol_factory,
     # ========================================================================
 
     def add_signal_handler(self, sig, callback, *args):
-        """Add a signal handler."""
-        raise NotImplementedError("Signal handlers not supported in ErlangEventLoop")
+        """Add a signal handler.
+
+        Note: Signal handling in Erlang integration is different from
+        traditional Python. Signals are trapped by Erlang and dispatched
+        to Python callbacks.
+        """
+        self._check_closed()
+
+        # Import signal here to avoid issues on Windows
+        import signal as signal_mod
+
+        if sig not in (signal_mod.SIGINT, signal_mod.SIGTERM, signal_mod.SIGHUP):
+            raise ValueError(f"Signal {sig} not supported")
+
+        self._signal_handlers[sig] = (callback, args)
+
+        # Register with Erlang's signal system
+        try:
+            self._pel._signal_add_handler(sig, self._next_id())
+        except AttributeError:
+            # Fallback: use Python's signal module
+            signal_mod.signal(sig, lambda s, f: self.call_soon_threadsafe(callback, *args))
 
     def remove_signal_handler(self, sig):
         """Remove a signal handler."""
-        raise NotImplementedError("Signal handlers not supported in ErlangEventLoop")
+        if sig in self._signal_handlers:
+            del self._signal_handlers[sig]
+
+            try:
+                self._pel._signal_remove_handler(sig)
+            except AttributeError:
+                import signal as signal_mod
+                signal_mod.signal(sig, signal_mod.SIG_DFL)
+
+            return True
+        return False
+
+    # ========================================================================
+    # Subprocess (via Erlang ports)
+    # ========================================================================
+
+    async def subprocess_shell(
+            self, protocol_factory, cmd, *,
+            stdin=None, stdout=None, stderr=None, **kwargs):
+        """Run a shell command in a subprocess."""
+        from ._subprocess import create_subprocess_shell
+        return await create_subprocess_shell(
+            self, protocol_factory, cmd,
+            stdin=stdin, stdout=stdout, stderr=stderr, **kwargs
+        )
+
+    async def subprocess_exec(
+            self, protocol_factory, program, *args,
+            stdin=None, stdout=None, stderr=None, **kwargs):
+        """Execute a program in a subprocess."""
+        from ._subprocess import create_subprocess_exec
+        return await create_subprocess_exec(
+            self, protocol_factory, program, *args,
+            stdin=stdin, stdout=stdout, stderr=stderr, **kwargs
+        )
 
     # ========================================================================
     # Error handling
@@ -827,12 +928,11 @@ def set_debug(self, enabled):
 
     def _run_once(self):
         """Run one iteration of the event loop."""
-        # Local aliases for hot-path attributes (avoids repeated lookups)
         ready = self._ready
         popleft = self._ready_popleft
         return_handle = self._return_handle
 
-        # Run all ready callbacks (timer cleanup happens lazily during dispatch)
+        # Run all ready callbacks
         ntodo = len(ready)
         for _ in range(ntodo):
             if not ready:
@@ -854,11 +954,11 @@ def _run_once(self):
                 self._current_handle = None
                 return_handle(handle)
 
-        # Calculate timeout based on next timer using heap with lazy deletion
+        # Calculate timeout based on next timer
         if ready or self._stopping:
             timeout = 0
         elif self._timer_heap:
-            # Lazy cleanup - pop stale/cancelled entries from heap
+            # Lazy cleanup - pop stale/cancelled entries
             timer_heap = self._timer_heap
             timers = self._timers
             while timer_heap:
@@ -867,70 +967,43 @@ def _run_once(self):
                 if handle is None or handle._cancelled:
                     heapq.heappop(timer_heap)
                     continue
-                break  # Found valid minimum
+                break
 
             if timer_heap:
                 when, _ = timer_heap[0]
                 timeout = max(0, int((when - self.time()) * 1000))
-                timeout = max(1, min(timeout, 1000))  # Cap at 1s, min 1ms
+                timeout = max(1, min(timeout, 1000))
             else:
-                # All timers cancelled - bulk cleanup on rare path
                 timers.clear()
                 self._timer_refs.clear()
                 timeout = 1000
         else:
-            timeout = 1000  # 1s max wait
+            timeout = 1000
 
-        # Use combined poll + get_pending (single NIF call, integer event types)
+        # Poll for events
         try:
-            # Use handle-based API when loop_handle is set, legacy otherwise
-            if self._loop_handle is not None:
-                pending = self._pel._run_once_native_for(self._loop_handle, timeout)
-            else:
-                pending = self._pel._run_once_native(timeout)
+            pending = self._pel._run_once_native_for(self._loop_capsule, timeout)
             dispatch = self._dispatch
             for callback_id, event_type in pending:
                 dispatch(callback_id, event_type)
-        except AttributeError:
-            # Fallback for old NIF without _run_once_native
-            try:
-                num_events = self._pel._poll_events(timeout)
-                if num_events > 0:
-                    pending = self._pel._get_pending()
-                    dispatch = self._dispatch
-                    for callback_id, event_type in pending:
-                        dispatch(callback_id, event_type)
-            except AttributeError:
-                pass  # Mock module without these methods
-            except RuntimeError as e:
-                # Fail fast on initialization errors - don't silently hang
-                raise RuntimeError(f"Event loop poll failed: {e}") from e
         except RuntimeError as e:
-            # Fail fast on initialization errors - don't silently hang
             raise RuntimeError(f"Event loop poll failed: {e}") from e
 
     def _dispatch(self, callback_id, event_type):
-        """Dispatch a callback based on event type.
-
-        Uses O(1) reverse map lookup for fd events instead of O(n) iteration.
-        Event types are integers for fast comparison (no string allocation).
-        Inlined lookup: dict.get(None) returns None, so single expression is safe.
-        """
-        # Integer comparison is faster than string - NIF returns integers
-        if event_type == 1:  # EVENT_TYPE_READ
-            # Inlined lookup: _readers.get(None) returns None (safe)
+        """Dispatch a callback based on event type."""
+        if event_type == EVENT_TYPE_READ:
             entry = self._readers.get(self._readers_by_cid.get(callback_id))
             if entry is not None:
                 self._ready_append(self._get_handle(entry[0], entry[1]))
-        elif event_type == 2:  # EVENT_TYPE_WRITE
+        elif event_type == EVENT_TYPE_WRITE:
             entry = self._writers.get(self._writers_by_cid.get(callback_id))
             if entry is not None:
                 self._ready_append(self._get_handle(entry[0], entry[1]))
-        elif event_type == 3:  # EVENT_TYPE_TIMER
+        elif event_type == EVENT_TYPE_TIMER:
             handle = self._timers.pop(callback_id, None)
             if handle is not None:
                 self._timer_refs.pop(callback_id, None)
-                self._handle_to_callback_id.pop(id(handle), None)  # Clean up reverse map
+                self._handle_to_callback_id.pop(id(handle), None)
                 if not handle._cancelled:
                     self._ready_append(handle)
 
@@ -945,18 +1018,14 @@ def _check_running(self):
             raise RuntimeError('This event loop is already running')
 
     def _timer_handle_cancelled(self, handle):
-        """Called when a TimerHandle is cancelled.
-
-        Uses O(1) reverse map lookup instead of O(n) iteration.
-        """
-        # O(1) lookup via reverse map
+        """Called when a TimerHandle is cancelled."""
         callback_id = self._handle_to_callback_id.pop(id(handle), None)
         if callback_id is not None:
             self._timers.pop(callback_id, None)
             timer_ref = self._timer_refs.pop(callback_id, None)
             if timer_ref is not None:
                 try:
-                    self._pel._cancel_timer(timer_ref)
+                    self._pel._cancel_timer_for(self._loop_capsule, timer_ref)
                 except (AttributeError, RuntimeError):
                     pass
 
@@ -984,7 +1053,6 @@ def _get_handle(self, callback, args):
     def _return_handle(self, handle):
         """Return a Handle to the pool for reuse."""
         if len(self._handle_pool) < self._handle_pool_max:
-            # Clear references to avoid keeping objects alive
             handle._callback = None
             handle._args = None
             self._handle_pool.append(handle)
@@ -1029,441 +1097,33 @@ async def getnameinfo(self, sockaddr, flags=0):
         return await self.run_in_executor(None, socket.getnameinfo, sockaddr, flags)
 
 
-class _ErlangSocketTransport(transports.Transport):
-    """Socket transport for ErlangEventLoop."""
-
-    __slots__ = (
-        '_loop', '_sock', '_protocol', '_buffer', '_closing', '_conn_lost',
-        '_write_ready', '_paused', '_extra', '_fileno',
-    )
-
-    _buffer_factory = bytearray
-    max_size = 256 * 1024  # 256 KB
-
-    def __init__(self, loop, sock, protocol, extra=None):
-        super().__init__(extra)
-        self._loop = loop
-        self._sock = sock
-        self._protocol = protocol
-        self._buffer = self._buffer_factory()
-        self._closing = False
-        self._conn_lost = 0
-        self._write_ready = True
-        self._paused = False
-        self._fileno = sock.fileno()  # Cache fileno to avoid repeated calls
-        self._extra = extra or {}
-        self._extra['socket'] = sock
-        try:
-            self._extra['sockname'] = sock.getsockname()
-        except OSError:
-            pass
-        try:
-            self._extra['peername'] = sock.getpeername()
-        except OSError:
-            pass
-
-    async def _start(self):
-        """Start the transport."""
-        self._loop.call_soon(self._protocol.connection_made, self)
-        self._loop.add_reader(self._fileno, self._read_ready)
-
-    def _read_ready(self):
-        """Called when data is available to read."""
-        if self._conn_lost:
-            return
-        try:
-            data = self._sock.recv(self.max_size)
-        except (BlockingIOError, InterruptedError):
-            return
-        except Exception as exc:
-            self._fatal_error(exc, 'Fatal read error')
-            return
-
-        if data:
-            self._protocol.data_received(data)
-        else:
-            # Connection closed
-            self._loop.remove_reader(self._fileno)
-            self._protocol.eof_received()
-
-    def write(self, data):
-        """Write data to the transport."""
-        if self._conn_lost or self._closing:
-            return
-        if not data:
-            return
-
-        if not self._buffer:
-            try:
-                n = self._sock.send(data)
-            except (BlockingIOError, InterruptedError):
-                n = 0
-            except Exception as exc:
-                self._fatal_error(exc, 'Fatal write error')
-                return
-
-            if n == len(data):
-                return
-            elif n > 0:
-                data = data[n:]
-            self._loop.add_writer(self._fileno, self._write_ready_cb)
-
-        self._buffer.extend(data)
-
-    def _write_ready_cb(self):
-        """Called when socket is ready for writing."""
-        if not self._buffer:
-            self._loop.remove_writer(self._fileno)
-            if self._closing:
-                self._call_connection_lost(None)
-            return
-
-        try:
-            n = self._sock.send(self._buffer)
-        except (BlockingIOError, InterruptedError):
-            return
-        except Exception as exc:
-            self._loop.remove_writer(self._fileno)
-            self._fatal_error(exc, 'Fatal write error')
-            return
-
-        if n:
-            del self._buffer[:n]
-
-        if not self._buffer:
-            self._loop.remove_writer(self._fileno)
-            if self._closing:
-                self._call_connection_lost(None)
-
-    def write_eof(self):
-        """Close the write end."""
-        if self._closing:
-            return
-        self._closing = True
-        if not self._buffer:
-            self._loop.remove_reader(self._fileno)
-            self._call_connection_lost(None)
-
-    def can_write_eof(self):
-        return True
-
-    def close(self):
-        """Close the transport."""
-        if self._closing:
-            return
-        self._closing = True
-        self._loop.remove_reader(self._fileno)
-        if not self._buffer:
-            self._conn_lost += 1
-            self._call_connection_lost(None)
-
-    def _call_connection_lost(self, exc):
-        """Call protocol.connection_lost()."""
-        try:
-            self._protocol.connection_lost(exc)
-        finally:
-            self._sock.close()
-
-    def _fatal_error(self, exc, message='Fatal error'):
-        """Handle fatal errors."""
-        self._loop.call_exception_handler({
-            'message': message,
-            'exception': exc,
-            'transport': self,
-            'protocol': self._protocol,
-        })
-        self.close()
-
-    def get_extra_info(self, name, default=None):
-        return self._extra.get(name, default)
-
-    def is_closing(self):
-        return self._closing
-
-    def get_write_buffer_size(self):
-        return len(self._buffer)
-
-    def abort(self):
-        """Close immediately."""
-        self._closing = True
-        self._conn_lost += 1
-        self._loop.remove_reader(self._fileno)
-        self._loop.remove_writer(self._fileno)
-        self._call_connection_lost(None)
-
-
-class _ErlangDatagramTransport(transports.DatagramTransport):
-    """Datagram (UDP) transport for ErlangEventLoop."""
-
-    __slots__ = (
-        '_loop', '_sock', '_protocol', '_address', '_buffer',
-        '_closing', '_conn_lost', '_extra', '_fileno',
-    )
-
-    max_size = 256 * 1024  # 256 KB
-
-    def __init__(self, loop, sock, protocol, address=None, extra=None):
-        super().__init__(extra)
-        self._loop = loop
-        self._sock = sock
-        self._protocol = protocol
-        self._address = address  # Default remote address (for connected UDP)
-        self._buffer = deque()  # Deque of (data, addr) tuples for O(1) popleft
-        self._closing = False
-        self._conn_lost = 0
-        self._fileno = sock.fileno()  # Cache fileno to avoid repeated calls
-        self._extra = extra or {}
-        self._extra['socket'] = sock
-        try:
-            self._extra['sockname'] = sock.getsockname()
-        except OSError:
-            pass
-        if address:
-            self._extra['peername'] = address
-
-    def _start(self):
-        """Start the transport."""
-        self._loop.call_soon(self._protocol.connection_made, self)
-        self._loop.add_reader(self._fileno, self._read_ready)
-
-    def _read_ready(self):
-        """Called when data is available to read."""
-        if self._conn_lost:
-            return
-        try:
-            data, addr = self._sock.recvfrom(self.max_size)
-        except (BlockingIOError, InterruptedError):
-            return
-        except OSError as exc:
-            self._protocol.error_received(exc)
-            return
-        except Exception as exc:
-            self._fatal_error(exc, 'Fatal read error on datagram transport')
-            return
-
-        self._protocol.datagram_received(data, addr)
-
-    def sendto(self, data, addr=None):
-        """Send data to the transport."""
-        if self._conn_lost or self._closing:
-            return
-        if not data:
-            return
-
-        if addr is None:
-            addr = self._address
-
-        if not self._buffer:
-            # Try to send immediately
-            try:
-                if addr:
-                    self._sock.sendto(data, addr)
-                else:
-                    self._sock.send(data)
-                return
-            except (BlockingIOError, InterruptedError):
-                # Buffer and wait for write ready
-                self._loop.add_writer(self._fileno, self._write_ready)
-            except OSError as exc:
-                self._protocol.error_received(exc)
-                return
-            except Exception as exc:
-                self._fatal_error(exc, 'Fatal write error on datagram transport')
-                return
-
-        # Buffer the data
-        self._buffer.append((data, addr))
-
-    def _write_ready(self):
-        """Called when socket is ready for writing."""
-        while self._buffer:
-            data, addr = self._buffer[0]
-            try:
-                if addr:
-                    self._sock.sendto(data, addr)
-                else:
-                    self._sock.send(data)
-            except (BlockingIOError, InterruptedError):
-                return
-            except OSError as exc:
-                self._buffer.popleft()
-                self._protocol.error_received(exc)
-                return
-            except Exception as exc:
-                self._fatal_error(exc, 'Fatal write error on datagram transport')
-                return
-
-            self._buffer.popleft()
-
-        self._loop.remove_writer(self._fileno)
-        if self._closing:
-            self._call_connection_lost(None)
-
-    def close(self):
-        """Close the transport."""
-        if self._closing:
-            return
-        self._closing = True
-        self._loop.remove_reader(self._fileno)
-        if not self._buffer:
-            self._conn_lost += 1
-            self._call_connection_lost(None)
-
-    def _call_connection_lost(self, exc):
-        """Call protocol.connection_lost()."""
-        try:
-            self._protocol.connection_lost(exc)
-        finally:
-            self._sock.close()
-
-    def _fatal_error(self, exc, message='Fatal error on datagram transport'):
-        """Handle fatal errors."""
-        self._loop.call_exception_handler({
-            'message': message,
-            'exception': exc,
-            'transport': self,
-            'protocol': self._protocol,
-        })
-        self.close()
-
-    def get_extra_info(self, name, default=None):
-        return self._extra.get(name, default)
-
-    def is_closing(self):
-        return self._closing
-
-    def abort(self):
-        """Close immediately."""
-        self._closing = True
-        self._conn_lost += 1
-        self._loop.remove_reader(self._fileno)
-        self._loop.remove_writer(self._fileno)
-        self._buffer.clear()
-        self._call_connection_lost(None)
-
-    def get_write_buffer_size(self):
-        """Return the current size of the write buffer."""
-        return sum(len(data) for data, _ in self._buffer)
-
-
-class _ErlangServer:
-    """TCP server for ErlangEventLoop."""
-
-    def __init__(self, loop, sockets, protocol_factory, ssl_context, backlog):
-        self._loop = loop
-        self._sockets = sockets
-        self._protocol_factory = protocol_factory
-        self._ssl_context = ssl_context
-        self._backlog = backlog
-        self._serving = False
-        self._waiters = []
-
-    def _start_serving(self):
-        """Start accepting connections."""
-        if self._serving:
-            return
-        self._serving = True
-        for sock in self._sockets:
-            self._loop.add_reader(sock.fileno(), self._accept_connection, sock)
-
-    def _accept_connection(self, server_sock):
-        """Accept a new connection."""
-        try:
-            conn, addr = server_sock.accept()
-            conn.setblocking(False)
-        except (BlockingIOError, InterruptedError):
-            return
-        except OSError as exc:
-            if exc.errno not in (errno.EMFILE, errno.ENFILE,
-                                 errno.ENOBUFS, errno.ENOMEM):
-                raise
-            return
-
-        protocol = self._protocol_factory()
-        transport = _ErlangSocketTransport(self._loop, conn, protocol)
-        self._loop.create_task(transport._start())
-
-    def close(self):
-        """Stop the server."""
-        if not self._serving:
-            return
-        self._serving = False
-        for sock in self._sockets:
-            self._loop.remove_reader(sock.fileno())
-            sock.close()
-        self._sockets.clear()
-
-    async def start_serving(self):
-        """Start serving."""
-        self._start_serving()
-
-    async def serve_forever(self):
-        """Serve forever."""
-        if not self._serving:
-            self._start_serving()
-        waiter = self._loop.create_future()
-        self._waiters.append(waiter)
-        try:
-            await waiter
-        finally:
-            self._waiters.remove(waiter)
-
-    def is_serving(self):
-        return self._serving
-
-    def get_loop(self):
-        return self._loop
-
-    @property
-    def sockets(self):
-        return tuple(self._sockets)
-
-    async def __aenter__(self):
-        return self
-
-    async def __aexit__(self, *exc):
-        self.close()
-        await self.wait_closed()
-
-    async def wait_closed(self):
-        """Wait until server is closed."""
-        if self._sockets:
-            await asyncio.sleep(0)
-
-
-# Import errno for _ErlangServer
-import errno
-
-
-class _MockNifModule:
-    """Mock NIF module for testing without actual Erlang integration."""
+class _MockLoopCapsule:
+    """Mock loop capsule for testing."""
 
     def __init__(self):
         self.readers = {}
         self.writers = {}
         self.pending = []
         self._counter = 0
+        self._fd_resources = {}  # fd_key -> {fd, read_active, write_active, read_cid, write_cid}
+
+
+class _MockNifModule:
+    """Mock NIF module for testing without actual Erlang integration."""
 
     def _is_initialized(self):
         return True
 
-    def _poll_events(self, timeout_ms):
-        import time
-        time.sleep(min(timeout_ms, 10) / 1000.0)
-        return len(self.pending)
+    def _loop_new(self):
+        return _MockLoopCapsule()
 
-    def _get_pending(self):
-        result = list(self.pending)
-        self.pending.clear()
-        return result
+    def _loop_destroy(self, capsule):
+        pass
 
-    def _run_once_native(self, timeout_ms):
-        """Combined poll + get_pending returning integer event types."""
-        import time
+    def _run_once_native_for(self, capsule, timeout_ms):
         time.sleep(min(timeout_ms, 10) / 1000.0)
-        # Convert string event types to integers
         result = []
-        for callback_id, event_type in self.pending:
+        for callback_id, event_type in capsule.pending:
             if isinstance(event_type, str):
                 if event_type == 'read':
                     event_type = EVENT_TYPE_READ
@@ -1472,74 +1132,88 @@ def _run_once_native(self, timeout_ms):
                 else:
                     event_type = EVENT_TYPE_TIMER
             result.append((callback_id, event_type))
-        self.pending.clear()
+        capsule.pending.clear()
         return result
 
-    def _wakeup(self):
+    def _wakeup_for(self, capsule):
         pass
 
-    def _add_pending(self, callback_id, type_str):
-        self.pending.append((callback_id, type_str))
+    def _add_reader_for(self, capsule, fd, callback_id):
+        capsule._counter += 1
+        capsule.readers[fd] = (callback_id, capsule._counter)
+        return capsule._counter
 
-    def _add_reader(self, fd, callback_id):
-        self._counter += 1
-        self.readers[fd] = (callback_id, self._counter)
-        return self._counter
-
-    def _remove_reader(self, fd_key):
-        for fd, (cid, key) in list(self.readers.items()):
+    def _remove_reader_for(self, capsule, fd_key):
+        for fd, (cid, key) in list(capsule.readers.items()):
             if key == fd_key:
-                del self.readers[fd]
+                del capsule.readers[fd]
                 break
 
-    def _add_writer(self, fd, callback_id):
-        self._counter += 1
-        self.writers[fd] = (callback_id, self._counter)
-        return self._counter
+    def _add_writer_for(self, capsule, fd, callback_id):
+        capsule._counter += 1
+        capsule.writers[fd] = (callback_id, capsule._counter)
+        return capsule._counter
 
-    def _remove_writer(self, fd_key):
-        for fd, (cid, key) in list(self.writers.items()):
+    def _remove_writer_for(self, capsule, fd_key):
+        for fd, (cid, key) in list(capsule.writers.items()):
             if key == fd_key:
-                del self.writers[fd]
+                del capsule.writers[fd]
                 break
 
-    def _schedule_timer(self, delay_ms, callback_id):
-        """Mock timer scheduling."""
-        return callback_id  # Return callback_id as timer_ref
+    def _update_fd_read(self, fd_key, callback_id):
+        """Update read callback on existing fd_resource."""
+        pass
+
+    def _update_fd_write(self, fd_key, callback_id):
+        """Update write callback on existing fd_resource."""
+        pass
 
-    def _cancel_timer(self, timer_ref):
-        """Mock timer cancellation."""
+    def _clear_fd_read(self, fd_key):
+        """Clear read monitoring on fd_resource."""
         pass
 
+    def _clear_fd_write(self, fd_key):
+        """Clear write monitoring on fd_resource."""
+        pass
 
-def get_event_loop_policy():
-    """Get an event loop policy that uses ErlangEventLoop for the main thread.
+    def _release_fd_resource(self, fd_key):
+        """Release fd_resource."""
+        pass
 
-    Non-main threads get the default SelectorEventLoop to avoid conflicts
-    with the Erlang-native event loop which is designed for the main thread.
-    """
-    # Capture main thread ID at policy creation time
-    main_thread_id = threading.main_thread().ident
-
-    class ErlangEventLoopPolicy(asyncio.AbstractEventLoopPolicy):
-        def __init__(self):
-            self._local = threading.local()
-
-        def get_event_loop(self):
-            if not hasattr(self._local, 'loop') or self._local.loop is None:
-                self._local.loop = self.new_event_loop()
-            return self._local.loop
-
-        def set_event_loop(self, loop):
-            self._local.loop = loop
-
-        def new_event_loop(self):
-            # Only use ErlangEventLoop for the main thread
-            # Worker threads should use the default selector-based loop
-            if threading.current_thread().ident == main_thread_id:
-                return ErlangEventLoop()
-            else:
-                # Return default selector event loop for non-main threads
-                return asyncio.SelectorEventLoop()
+    def _schedule_timer_for(self, capsule, delay_ms, callback_id):
+        return callback_id
 
-    return ErlangEventLoopPolicy()
+    def _cancel_timer_for(self, capsule, timer_ref):
+        pass
+
+
+# =============================================================================
+# Async coroutine wrapper for result delivery
+# =============================================================================
+
+async def _run_and_send(coro, caller_pid, ref):
+    """Run a coroutine and send the result to an Erlang caller via erlang.send().
+
+    This function wraps a coroutine and sends its result (or error) to the
+    specified Erlang process using erlang.send(). Used by the async worker
+    backend to deliver results without pthread polling.
+
+    Args:
+        coro: The coroutine to run
+        caller_pid: An erlang.Pid object for the caller process
+        ref: A reference to include in the result message
+
+    The result message format is:
+        ('async_result', ref, ('ok', result)) - on success
+        ('async_result', ref, ('error', error_str)) - on failure
+    """
+    import erlang
+    try:
+        result = await coro
+        erlang.send(caller_pid, ('async_result', ref, ('ok', result)))
+    except asyncio.CancelledError:
+        erlang.send(caller_pid, ('async_result', ref, ('error', 'cancelled')))
+    except Exception as e:
+        import traceback
+        tb = traceback.format_exc()
+        erlang.send(caller_pid, ('async_result', ref, ('error', f'{type(e).__name__}: {e}\n{tb}')))
diff --git a/priv/_erlang_impl/_mode.py b/priv/_erlang_impl/_mode.py
new file mode 100644
index 0000000..25efd32
--- /dev/null
+++ b/priv/_erlang_impl/_mode.py
@@ -0,0 +1,158 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Python execution mode detection.
+
+This module detects the Python execution mode to enable proper
+event loop behavior for different Python configurations:
+
+- free_threaded: Python 3.13+ with Py_GIL_DISABLED (no GIL)
+- subinterp: Python 3.12+ with per-interpreter GIL
+- shared_gil: Traditional Python with shared GIL
+"""
+
+import sys
+import sysconfig
+from enum import Enum
+from typing import Optional
+
+__all__ = ['ExecutionMode', 'detect_mode', 'is_free_threaded', 'is_subinterpreter']
+
+
+class ExecutionMode(Enum):
+    """Python execution mode."""
+    FREE_THREADED = 'free_threaded'
+    SUBINTERP = 'subinterp'
+    SHARED_GIL = 'shared_gil'
+
+
+# Cache the detected mode
+_cached_mode: Optional[ExecutionMode] = None
+
+
+def detect_mode() -> ExecutionMode:
+    """Detect the current Python execution mode.
+
+    Returns:
+        ExecutionMode: One of FREE_THREADED, SUBINTERP, or SHARED_GIL.
+
+    The detection logic:
+    1. Check for free-threaded Python (3.13+ with Py_GIL_DISABLED)
+    2. Check for subinterpreter mode (3.12+ per-interpreter GIL)
+    3. Default to shared GIL mode
+    """
+    global _cached_mode
+    if _cached_mode is not None:
+        return _cached_mode
+
+    mode = _detect_mode_impl()
+    _cached_mode = mode
+    return mode
+
+
+def _detect_mode_impl() -> ExecutionMode:
+    """Implementation of mode detection."""
+    # Check for free-threaded Python (3.13+ with Py_GIL_DISABLED)
+    if sys.version_info >= (3, 13):
+        # Python 3.13+ has sys._is_gil_enabled()
+        if hasattr(sys, '_is_gil_enabled'):
+            if not sys._is_gil_enabled():
+                return ExecutionMode.FREE_THREADED
+
+    # Check sysconfig for Py_GIL_DISABLED build flag
+    gil_disabled = sysconfig.get_config_var("Py_GIL_DISABLED")
+    if gil_disabled == 1:
+        return ExecutionMode.FREE_THREADED
+
+    # Check for per-interpreter GIL (Python 3.12+)
+    if sys.version_info >= (3, 12):
+        # In Python 3.12+, subinterpreters can have their own GIL
+        # We check if we're in a subinterpreter by comparing interpreter IDs
+        if _is_in_subinterpreter():
+            return ExecutionMode.SUBINTERP
+
+    # Default to shared GIL mode
+    return ExecutionMode.SHARED_GIL
+
+
+def _is_in_subinterpreter() -> bool:
+    """Check if we're running in a subinterpreter.
+
+    Returns:
+        bool: True if running in a subinterpreter, False if in main interpreter.
+    """
+    if sys.version_info < (3, 12):
+        return False
+
+    try:
+        # Python 3.12+ has interpreter ID support
+        import _interpreters
+        current_id = _interpreters.get_current()
+        main_id = _interpreters.get_main()
+        return current_id != main_id
+    except (ImportError, AttributeError):
+        pass
+
+    try:
+        # Alternative: check via sys
+        if hasattr(sys, 'get_interpreter_id'):
+            # Main interpreter typically has ID 0
+            return sys.get_interpreter_id() != 0
+    except (AttributeError, TypeError):
+        pass
+
+    return False
+
+
+def is_free_threaded() -> bool:
+    """Check if Python is running in free-threaded mode (no GIL).
+
+    Returns:
+        bool: True if running without GIL, False otherwise.
+    """
+    return detect_mode() == ExecutionMode.FREE_THREADED
+
+
+def is_subinterpreter() -> bool:
+    """Check if we're running in a subinterpreter.
+
+    Returns:
+        bool: True if in a subinterpreter, False if in main interpreter.
+    """
+    return detect_mode() == ExecutionMode.SUBINTERP
+
+
+def get_interpreter_id() -> int:
+    """Get the current interpreter ID.
+
+    Returns:
+        int: The interpreter ID (0 for main interpreter).
+    """
+    if sys.version_info < (3, 12):
+        return 0
+
+    try:
+        import _interpreters
+        return _interpreters.get_current()
+    except (ImportError, AttributeError):
+        pass
+
+    try:
+        if hasattr(sys, 'get_interpreter_id'):
+            return sys.get_interpreter_id()
+    except (AttributeError, TypeError):
+        pass
+
+    return 0
diff --git a/priv/_erlang_impl/_policy.py b/priv/_erlang_impl/_policy.py
new file mode 100644
index 0000000..37b18af
--- /dev/null
+++ b/priv/_erlang_impl/_policy.py
@@ -0,0 +1,196 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Event loop policy for Erlang-backed asyncio integration.
+
+This module provides an asyncio event loop policy that creates
+Erlang-backed event loops, enabling transparent integration with
+asyncio.run() and other asyncio APIs.
+"""
+
+import asyncio
+import threading
+from typing import Optional
+
+__all__ = ['ErlangEventLoopPolicy']
+
+
+class ErlangEventLoopPolicy(asyncio.AbstractEventLoopPolicy):
+    """Event loop policy that uses Erlang-backed event loops.
+
+    This policy creates ErlangEventLoop instances for the main thread
+    and optionally for child threads depending on configuration.
+
+    Usage:
+        import asyncio
+        import erlang
+
+        # Install the policy
+        asyncio.set_event_loop_policy(erlang.EventLoopPolicy())
+
+        # Now asyncio.run() uses Erlang event loop
+        asyncio.run(main())
+
+    Note:
+        This approach is deprecated in Python 3.12+.
+        Use erlang.run() instead.
+    """
+
+    def __init__(self):
+        """Initialize the policy with thread-local storage."""
+        self._local = threading.local()
+        self._main_thread_id = threading.main_thread().ident
+        self._watcher = None
+
+    def get_event_loop(self) -> asyncio.AbstractEventLoop:
+        """Get the event loop for the current context.
+
+        Creates a new event loop if one doesn't exist for the current thread.
+
+        Returns:
+            asyncio.AbstractEventLoop: The event loop for this thread.
+
+        Raises:
+            RuntimeError: If there is no current event loop and the current
+                thread is not the main thread (and no loop was set explicitly).
+        """
+        loop = getattr(self._local, 'loop', None)
+        if loop is not None and not loop.is_closed():
+            return loop
+
+        # Check if we're in the main thread
+        if threading.current_thread().ident == self._main_thread_id:
+            loop = self.new_event_loop()
+            self.set_event_loop(loop)
+            return loop
+
+        # For non-main threads, raise error (matches asyncio behavior)
+        raise RuntimeError(
+            "There is no current event loop in thread %r. "
+            "Use asyncio.set_event_loop() or set an explicit loop."
+            % threading.current_thread().name
+        )
+
+    def set_event_loop(self, loop: Optional[asyncio.AbstractEventLoop]) -> None:
+        """Set the event loop for the current context.
+
+        Args:
+            loop: The event loop to set, or None to clear.
+        """
+        self._local.loop = loop
+
+    def new_event_loop(self) -> asyncio.AbstractEventLoop:
+        """Create a new Erlang-backed event loop.
+
+        Returns:
+            ErlangEventLoop: A new event loop instance.
+
+        Note:
+            Always returns ErlangEventLoop when using this policy.
+            The Erlang event loop handles thread safety internally.
+        """
+        # Import here to avoid circular imports
+        from ._loop import ErlangEventLoop
+        return ErlangEventLoop()
+
+    # Child watcher methods (for subprocess support)
+
+    def get_child_watcher(self):
+        """Get the child watcher.
+
+        Deprecated in Python 3.12.
+        """
+        if self._watcher is None:
+            self._init_watcher()
+        return self._watcher
+
+    def set_child_watcher(self, watcher):
+        """Set the child watcher.
+
+        Deprecated in Python 3.12.
+        """
+        self._watcher = watcher
+
+    def _init_watcher(self):
+        """Initialize the child watcher.
+
+        Uses ThreadedChildWatcher which works well with Erlang integration.
+        """
+        import sys
+        if sys.version_info >= (3, 12):
+            # Child watchers are deprecated in 3.12
+            return
+
+        if hasattr(asyncio, 'ThreadedChildWatcher'):
+            self._watcher = asyncio.ThreadedChildWatcher()
+        elif hasattr(asyncio, 'SafeChildWatcher'):
+            self._watcher = asyncio.SafeChildWatcher()
+
+
+class _ErlangChildWatcher:
+    """Child watcher that delegates to Erlang for process monitoring.
+
+    This watcher uses Erlang ports and monitors instead of SIGCHLD,
+    making it compatible with subinterpreters and free-threaded Python.
+    """
+
+    def __init__(self):
+        self._callbacks = {}
+        self._loop = None
+
+    def attach_loop(self, loop):
+        """Attach to an event loop."""
+        self._loop = loop
+
+    def close(self):
+        """Close the watcher."""
+        self._callbacks.clear()
+        self._loop = None
+
+    def is_active(self):
+        """Return True if the watcher is active."""
+        return self._loop is not None and not self._loop.is_closed()
+
+    def add_child_handler(self, pid, callback, *args):
+        """Register a callback for when a child process exits.
+
+        Args:
+            pid: Process ID to watch.
+            callback: Callback function(pid, returncode, *args).
+            *args: Additional arguments for the callback.
+        """
+        self._callbacks[pid] = (callback, args)
+        # TODO: Use Erlang port monitoring
+
+    def remove_child_handler(self, pid):
+        """Remove the handler for a child process.
+
+        Returns:
+            bool: True if handler was removed, False if not found.
+        """
+        return self._callbacks.pop(pid, None) is not None
+
+    def _do_waitpid(self, pid, returncode):
+        """Called when a child process exits.
+
+        Args:
+            pid: Process ID that exited.
+            returncode: Exit code of the process.
+        """
+        entry = self._callbacks.pop(pid, None)
+        if entry is not None:
+            callback, args = entry
+            if self._loop is not None and not self._loop.is_closed():
+                self._loop.call_soon_threadsafe(callback, pid, returncode, *args)
diff --git a/priv/_erlang_impl/_reactor.py b/priv/_erlang_impl/_reactor.py
new file mode 100644
index 0000000..2144df9
--- /dev/null
+++ b/priv/_erlang_impl/_reactor.py
@@ -0,0 +1,251 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Erlang Reactor - fd-based protocol layer.
+
+This module provides a low-level fd-based API where Erlang handles I/O
+scheduling via enif_select and Python handles protocol logic.
+
+Works with any fd - TCP, UDP, Unix sockets, pipes, etc.
+
+Example usage:
+
+    import erlang.reactor as reactor
+
+    class EchoProtocol(reactor.Protocol):
+        def data_received(self, data):
+            self.write_buffer.extend(data)
+            return "write_pending"
+
+        def write_ready(self):
+            written = self.write(bytes(self.write_buffer))
+            del self.write_buffer[:written]
+            return "continue" if self.write_buffer else "read_pending"
+
+    reactor.set_protocol_factory(EchoProtocol)
+"""
+
+import os
+from typing import Dict, Optional, Callable
+
+__all__ = [
+    'Protocol',
+    'set_protocol_factory',
+    'get_protocol',
+    'init_connection',
+    'on_read_ready',
+    'on_write_ready',
+    'close_connection',
+]
+
+
+class Protocol:
+    """Base protocol for Erlang reactor.
+
+    Subclasses implement data_received() and write_ready() to handle
+    I/O events. The base class provides buffer management and fd I/O.
+
+    Attributes:
+        fd: The file descriptor for this connection
+        client_info: Dict with connection metadata (addr, port, type, etc.)
+        write_buffer: Bytearray for buffering writes
+        closed: Whether the connection is closed
+    """
+
+    def __init__(self):
+        """Initialize protocol with empty state.
+
+        Note: fd and client_info are set later via connection_made().
+        """
+        self.fd = -1
+        self.client_info = {}
+        self.write_buffer = bytearray()
+        self.closed = False
+
+    def connection_made(self, fd: int, client_info: dict):
+        """Called when fd is handed off from Erlang.
+
+        Args:
+            fd: File descriptor for the connection
+            client_info: Dict with connection metadata (e.g., addr, port, type)
+        """
+        self.fd = fd
+        self.client_info = client_info
+
+    def data_received(self, data: bytes) -> str:
+        """Handle received data.
+
+        Called when data has been read from the fd.
+
+        Args:
+            data: The bytes that were read
+
+        Returns:
+            Action string:
+            - "continue": More data expected, re-register for read
+            - "write_pending": Response ready, switch to write mode
+            - "close": Close the connection
+        """
+        raise NotImplementedError
+
+    def write_ready(self) -> str:
+        """Handle write readiness.
+
+        Called when the fd is ready for writing.
+
+        Returns:
+            Action string:
+            - "continue": More data to write, stay in write mode
+            - "read_pending": Done writing, switch back to read mode
+            - "close": Close the connection
+        """
+        raise NotImplementedError
+
+    def connection_lost(self):
+        """Called when connection closes.
+
+        Override to perform cleanup when the connection ends.
+        """
+        pass
+
+    def read(self, size: int = 65536) -> bytes:
+        """Read from fd.
+
+        Args:
+            size: Maximum bytes to read
+
+        Returns:
+            Bytes read, or empty bytes on EOF/error
+        """
+        try:
+            return os.read(self.fd, size)
+        except (BlockingIOError, OSError):
+            return b''
+
+    def write(self, data: bytes) -> int:
+        """Write to fd.
+
+        Args:
+            data: Bytes to write
+
+        Returns:
+            Number of bytes written, or 0 on error
+        """
+        try:
+            return os.write(self.fd, data)
+        except (BlockingIOError, OSError):
+            return 0
+
+
+# =============================================================================
+# Registry
+# =============================================================================
+
+_protocols: Dict[int, Protocol] = {}
+_protocol_factory: Optional[Callable[[], Protocol]] = None
+
+
+def set_protocol_factory(factory: Callable[[], Protocol]):
+    """Set factory for creating protocols.
+
+    The factory is called for each new connection to create a Protocol instance.
+
+    Args:
+        factory: Callable that returns a Protocol instance
+    """
+    global _protocol_factory
+    _protocol_factory = factory
+
+
+def get_protocol(fd: int) -> Optional[Protocol]:
+    """Get the protocol instance for an fd.
+
+    Args:
+        fd: File descriptor
+
+    Returns:
+        Protocol instance or None if not found
+    """
+    return _protocols.get(fd)
+
+
+# =============================================================================
+# NIF callbacks (called by py_reactor_context)
+# =============================================================================
+
+def init_connection(fd: int, client_info: dict):
+    """Called by NIF on fd_handoff.
+
+    Creates a protocol instance using the factory and registers it.
+
+    Args:
+        fd: File descriptor
+        client_info: Connection metadata from Erlang
+    """
+    global _protocols, _protocol_factory
+    if _protocol_factory is not None:
+        proto = _protocol_factory()
+        proto.connection_made(fd, client_info)
+        _protocols[fd] = proto
+
+
+def on_read_ready(fd: int) -> str:
+    """Called by NIF when fd readable.
+
+    Reads data from the fd and passes it to the protocol.
+
+    Args:
+        fd: File descriptor
+
+    Returns:
+        Action string from protocol.data_received()
+    """
+    proto = _protocols.get(fd)
+    if proto is None:
+        return "close"
+    data = proto.read()
+    if not data:
+        return "close"
+    return proto.data_received(data)
+
+
+def on_write_ready(fd: int) -> str:
+    """Called by NIF when fd writable.
+
+    Calls the protocol's write_ready method.
+
+    Args:
+        fd: File descriptor
+
+    Returns:
+        Action string from protocol.write_ready()
+    """
+    proto = _protocols.get(fd)
+    if proto is None:
+        return "close"
+    return proto.write_ready()
+
+
+def close_connection(fd: int):
+    """Called by NIF on close.
+
+    Removes the protocol from the registry and calls connection_lost.
+
+    Args:
+        fd: File descriptor
+    """
+    proto = _protocols.pop(fd, None)
+    if proto is not None:
+        proto.closed = True
+        proto.connection_lost()
diff --git a/priv/_erlang_impl/_sandbox.py b/priv/_erlang_impl/_sandbox.py
new file mode 100644
index 0000000..c034378
--- /dev/null
+++ b/priv/_erlang_impl/_sandbox.py
@@ -0,0 +1,86 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Sandbox module using Python's audit hook mechanism (PEP 578).
+
+When Python runs embedded in Erlang, certain operations are unsafe and must
+be blocked at a low level. This module uses audit hooks to intercept these
+operations before they execute.
+
+Blocked operations:
+- subprocess.Popen - would use fork() which corrupts Erlang VM
+- os.system, os.popen - shell execution
+- os.fork, os.forkpty - process forking
+- os.exec*, os.spawn* - process execution
+- os.posix_spawn* - POSIX process spawning
+"""
+
+import sys
+
+__all__ = ['install_sandbox', 'is_sandboxed']
+
+_sandboxed = False
+
+# Subprocess/process audit events to block
+_SUBPROCESS_EVENTS = frozenset({
+    'subprocess.Popen',
+    'os.system',
+    'os.popen',
+    'os.fork',
+    'os.forkpty',
+    'os.posix_spawn',
+    'os.posix_spawnp',
+})
+
+# os.exec* and os.spawn* prefixes
+_EXEC_PREFIXES = ('os.exec', 'os.spawn')
+
+_ERROR_MSG = (
+    "blocked in Erlang VM context. "
+    "fork()/exec() would corrupt the Erlang runtime. "
+    "Use Erlang ports (open_port/2) for subprocess management."
+)
+
+
+def _sandbox_hook(event, args):
+    """Audit hook that blocks dangerous subprocess operations."""
+    # Fast path: check direct matches
+    if event in _SUBPROCESS_EVENTS:
+        raise RuntimeError(f"{event} is {_ERROR_MSG}")
+
+    # Check exec/spawn prefixes
+    for prefix in _EXEC_PREFIXES:
+        if event.startswith(prefix):
+            raise RuntimeError(f"{event} is {_ERROR_MSG}")
+
+
+def install_sandbox():
+    """Install the sandbox audit hook.
+
+    Once installed, audit hooks cannot be removed. This blocks all
+    subprocess/fork/exec operations for the lifetime of the Python
+    interpreter.
+    """
+    global _sandboxed
+    if _sandboxed:
+        return
+
+    sys.addaudithook(_sandbox_hook)
+    _sandboxed = True
+
+
+def is_sandboxed():
+    """Check if sandbox is active."""
+    return _sandboxed
diff --git a/priv/_erlang_impl/_ssl.py b/priv/_erlang_impl/_ssl.py
new file mode 100644
index 0000000..133428f
--- /dev/null
+++ b/priv/_erlang_impl/_ssl.py
@@ -0,0 +1,329 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+SSL/TLS transport using ssl.MemoryBIO.
+
+This module provides SSL/TLS support that works with the Erlang event
+loop by using Python's ssl.MemoryBIO for encryption while letting
+Erlang handle the socket I/O.
+
+Architecture:
+- Raw socket data flows through Erlang (enif_select)
+- Encryption/decryption happens in Python via MemoryBIO
+- Application sees decrypted data
+"""
+
+import ssl
+from asyncio import transports
+from typing import Any, Optional, Callable
+
+__all__ = ['SSLTransport', 'create_ssl_transport']
+
+
+class SSLTransport(transports.Transport):
+    """SSL transport using ssl.MemoryBIO for encryption.
+
+    This transport wraps a raw transport and provides transparent
+    SSL/TLS encryption using Python's ssl module with MemoryBIO.
+
+    The key insight is that MemoryBIO allows us to do SSL encryption
+    without requiring a real socket file descriptor, which works
+    perfectly with Erlang's enif_select model.
+    """
+
+    max_size = 256 * 1024  # 256 KB
+
+    def __init__(self, loop, raw_transport, protocol, ssl_context,
+                 server_hostname=None, server_side=False,
+                 ssl_handshake_timeout=None, call_connection_made=True):
+        """Initialize the SSL transport.
+
+        Args:
+            loop: The event loop.
+            raw_transport: The underlying raw transport.
+            protocol: The application protocol.
+            ssl_context: SSL context for encryption.
+            server_hostname: Hostname for SNI (client side).
+            server_side: True if this is a server connection.
+            ssl_handshake_timeout: Timeout for the SSL handshake.
+            call_connection_made: Whether to call connection_made.
+        """
+        self._loop = loop
+        self._raw_transport = raw_transport
+        self._protocol = protocol
+        self._ssl_context = ssl_context
+        self._server_hostname = server_hostname
+        self._server_side = server_side
+        self._handshake_timeout = ssl_handshake_timeout
+        self._call_connection_made = call_connection_made
+
+        # SSL state
+        self._incoming = ssl.MemoryBIO()
+        self._outgoing = ssl.MemoryBIO()
+        self._ssl_object = ssl_context.wrap_bio(
+            self._incoming, self._outgoing,
+            server_side=server_side,
+            server_hostname=server_hostname
+        )
+
+        # State flags
+        self._handshake_complete = False
+        self._closing = False
+        self._closed = False
+        self._write_buffer = []
+
+        # Extra info
+        self._extra = {
+            'ssl_context': ssl_context,
+        }
+
+        # Create a protocol that receives raw data
+        self._raw_protocol = _SSLRawProtocol(self)
+
+    async def _start(self):
+        """Start the SSL transport and perform handshake."""
+        # Replace the raw transport's protocol with ours
+        self._raw_transport._protocol = self._raw_protocol
+
+        # Perform SSL handshake
+        await self._do_handshake()
+
+        # Update extra info
+        self._extra['peercert'] = self._ssl_object.getpeercert()
+        self._extra['cipher'] = self._ssl_object.cipher()
+        self._extra['compression'] = self._ssl_object.compression()
+        self._extra['ssl_object'] = self._ssl_object
+
+        # Notify application protocol
+        if self._call_connection_made:
+            self._loop.call_soon(self._protocol.connection_made, self)
+
+    async def _do_handshake(self):
+        """Perform SSL handshake."""
+        while not self._handshake_complete:
+            try:
+                self._ssl_object.do_handshake()
+                self._handshake_complete = True
+            except ssl.SSLWantReadError:
+                # Need to send data and receive more
+                self._flush_outgoing()
+                await self._wait_for_data()
+            except ssl.SSLWantWriteError:
+                # Need to send buffered data
+                self._flush_outgoing()
+
+    def _flush_outgoing(self):
+        """Flush outgoing encrypted data to raw transport."""
+        data = self._outgoing.read()
+        if data:
+            self._raw_transport.write(data)
+
+    async def _wait_for_data(self):
+        """Wait for data from the raw transport."""
+        fut = self._loop.create_future()
+
+        def on_data():
+            if not fut.done():
+                fut.set_result(None)
+
+        self._raw_protocol._read_waiter = on_data
+        try:
+            await fut
+        finally:
+            self._raw_protocol._read_waiter = None
+
+    def _on_raw_data(self, data: bytes):
+        """Called when raw encrypted data is received.
+
+        Args:
+            data: Encrypted data from the network.
+        """
+        self._incoming.write(data)
+
+        if not self._handshake_complete:
+            # Still handshaking, notify waiter
+            return
+
+        # Decrypt and deliver to application
+        try:
+            while True:
+                chunk = self._ssl_object.read(self.max_size)
+                if chunk:
+                    self._protocol.data_received(chunk)
+                else:
+                    break
+        except ssl.SSLWantReadError:
+            pass
+        except ssl.SSLError as e:
+            self._fatal_error(e, 'SSL read error')
+
+    def _on_raw_eof(self):
+        """Called when the raw transport receives EOF."""
+        try:
+            self._ssl_object.unwrap()
+        except ssl.SSLError:
+            pass
+
+        if hasattr(self._protocol, 'eof_received'):
+            self._protocol.eof_received()
+
+    def write(self, data: bytes):
+        """Write data to the transport.
+
+        Args:
+            data: Plaintext data to send.
+        """
+        if self._closing or self._closed:
+            return
+        if not data:
+            return
+
+        if not self._handshake_complete:
+            self._write_buffer.append(data)
+            return
+
+        try:
+            self._ssl_object.write(data)
+            self._flush_outgoing()
+        except ssl.SSLError as e:
+            self._fatal_error(e, 'SSL write error')
+
+    def writelines(self, list_of_data):
+        """Write a list of data items."""
+        for data in list_of_data:
+            self.write(data)
+
+    def write_eof(self):
+        """Close the write end of the transport."""
+        if self._closing:
+            return
+        self._closing = True
+
+        try:
+            self._ssl_object.unwrap()
+            self._flush_outgoing()
+        except ssl.SSLError:
+            pass
+
+        self._raw_transport.write_eof()
+
+    def can_write_eof(self):
+        return False
+
+    def close(self):
+        """Close the transport."""
+        if self._closed:
+            return
+        self._closing = True
+
+        try:
+            self._ssl_object.unwrap()
+            self._flush_outgoing()
+        except ssl.SSLError:
+            pass
+
+        self._raw_transport.close()
+        self._closed = True
+
+    def is_closing(self):
+        return self._closing
+
+    def abort(self):
+        """Close immediately without flushing."""
+        self._closed = True
+        self._raw_transport.abort()
+
+    def get_extra_info(self, name, default=None):
+        if name in self._extra:
+            return self._extra[name]
+        return self._raw_transport.get_extra_info(name, default)
+
+    def get_write_buffer_size(self):
+        return self._raw_transport.get_write_buffer_size()
+
+    def get_write_buffer_limits(self):
+        return self._raw_transport.get_write_buffer_limits()
+
+    def set_write_buffer_limits(self, high=None, low=None):
+        self._raw_transport.set_write_buffer_limits(high, low)
+
+    def pause_reading(self):
+        self._raw_transport.pause_reading()
+
+    def resume_reading(self):
+        self._raw_transport.resume_reading()
+
+    def is_reading(self):
+        return self._raw_transport.is_reading()
+
+    def _fatal_error(self, exc, message='Fatal SSL error'):
+        """Handle fatal SSL errors."""
+        self._loop.call_exception_handler({
+            'message': message,
+            'exception': exc,
+            'transport': self,
+            'protocol': self._protocol,
+        })
+        self.abort()
+
+
+class _SSLRawProtocol:
+    """Protocol that receives raw encrypted data for SSLTransport."""
+
+    def __init__(self, ssl_transport):
+        self._ssl_transport = ssl_transport
+        self._read_waiter = None
+
+    def connection_made(self, transport):
+        pass
+
+    def data_received(self, data):
+        self._ssl_transport._on_raw_data(data)
+        if self._read_waiter is not None:
+            self._read_waiter()
+
+    def eof_received(self):
+        self._ssl_transport._on_raw_eof()
+
+    def connection_lost(self, exc):
+        self._ssl_transport._protocol.connection_lost(exc)
+
+
+async def create_ssl_transport(
+        loop, raw_transport, protocol, ssl_context,
+        server_hostname=None, server_side=False,
+        ssl_handshake_timeout=None):
+    """Create an SSL transport wrapping a raw transport.
+
+    Args:
+        loop: The event loop.
+        raw_transport: The underlying raw transport.
+        protocol: The application protocol.
+        ssl_context: SSL context for encryption.
+        server_hostname: Hostname for SNI (client side).
+        server_side: True if this is a server connection.
+        ssl_handshake_timeout: Timeout for the SSL handshake.
+
+    Returns:
+        The SSL transport.
+    """
+    transport = SSLTransport(
+        loop, raw_transport, protocol, ssl_context,
+        server_hostname=server_hostname,
+        server_side=server_side,
+        ssl_handshake_timeout=ssl_handshake_timeout
+    )
+    await transport._start()
+    return transport
diff --git a/priv/_erlang_impl/_subprocess.py b/priv/_erlang_impl/_subprocess.py
new file mode 100644
index 0000000..b8697af
--- /dev/null
+++ b/priv/_erlang_impl/_subprocess.py
@@ -0,0 +1,77 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Subprocess support is not available in ErlangEventLoop.
+
+Rationale:
+    Erlang's port system provides superior subprocess management with
+    built-in supervision, monitoring, and fault tolerance. Additionally,
+    Python's subprocess module uses fork() which corrupts the Erlang VM
+    when called from within the NIF.
+
+Alternative:
+    Use Erlang ports directly via erlang.call() for subprocess needs.
+
+Example:
+    # In Python:
+    result = erlang.call('my_module', 'run_shell', [b'echo hello'])
+
+    # In Erlang (my_module.erl):
+    run_shell(Cmd) ->
+        Port = open_port({spawn, binary_to_list(Cmd)},
+                         [binary, exit_status, stderr_to_stdout]),
+        collect_output(Port, []).
+
+    collect_output(Port, Acc) ->
+        receive
+            {Port, {data, Data}} ->
+                collect_output(Port, [Data | Acc]);
+            {Port, {exit_status, 0}} ->
+                {ok, iolist_to_binary(lists:reverse(Acc))};
+            {Port, {exit_status, N}} ->
+                {error, {exit_status, N, iolist_to_binary(lists:reverse(Acc))}}
+        after 30000 ->
+            port_close(Port),
+            {error, timeout}
+        end.
+"""
+
+__all__ = [
+    'create_subprocess_shell',
+    'create_subprocess_exec',
+]
+
+
+_NOT_SUPPORTED_MSG = """\
+Subprocess is not supported in ErlangEventLoop.
+
+Python's subprocess module uses fork() which corrupts the Erlang VM.
+Use Erlang ports directly via erlang.call() instead.
+
+Example:
+    result = erlang.call('my_module', 'run_shell', [b'echo hello'])
+
+See the module docstring for a complete Erlang implementation example.
+"""
+
+
+async def create_subprocess_shell(loop, protocol_factory, cmd, **kwargs):
+    """Not supported - raises NotImplementedError."""
+    raise NotImplementedError(_NOT_SUPPORTED_MSG)
+
+
+async def create_subprocess_exec(loop, protocol_factory, program, *args, **kwargs):
+    """Not supported - raises NotImplementedError."""
+    raise NotImplementedError(_NOT_SUPPORTED_MSG)
diff --git a/priv/_erlang_impl/_transport.py b/priv/_erlang_impl/_transport.py
new file mode 100644
index 0000000..e3b0b12
--- /dev/null
+++ b/priv/_erlang_impl/_transport.py
@@ -0,0 +1,480 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Transport classes for the Erlang event loop.
+
+This module provides asyncio-compatible transport implementations
+for TCP, UDP, and Unix sockets backed by the Erlang event loop.
+"""
+
+import asyncio
+import errno
+import socket
+from asyncio import transports
+from collections import deque
+from typing import Any, Optional, Tuple
+
+__all__ = [
+    'ErlangSocketTransport',
+    'ErlangDatagramTransport',
+    'ErlangServer',
+]
+
+
+class ErlangSocketTransport(transports.Transport):
+    """Socket transport for ErlangEventLoop.
+
+    Implements asyncio.Transport for TCP and Unix stream sockets.
+    """
+
+    __slots__ = (
+        '_loop', '_sock', '_protocol', '_buffer', '_closing', '_conn_lost',
+        '_write_ready', '_paused', '_extra', '_fileno',
+    )
+
+    _buffer_factory = bytearray
+    max_size = 256 * 1024  # 256 KB
+
+    def __init__(self, loop, sock, protocol, extra=None):
+        super().__init__(extra)
+        self._loop = loop
+        self._sock = sock
+        self._protocol = protocol
+        self._buffer = self._buffer_factory()
+        self._closing = False
+        self._conn_lost = 0
+        self._write_ready = True
+        self._paused = False
+        self._fileno = sock.fileno()
+        self._extra = extra or {}
+        self._extra['socket'] = sock
+        try:
+            self._extra['sockname'] = sock.getsockname()
+        except OSError:
+            pass
+        try:
+            self._extra['peername'] = sock.getpeername()
+        except OSError:
+            pass
+
+    async def _start(self):
+        """Start the transport."""
+        self._loop.call_soon(self._protocol.connection_made, self)
+        self._loop.add_reader(self._fileno, self._read_ready)
+
+    def _read_ready(self):
+        """Called when data is available to read."""
+        if self._conn_lost:
+            return
+        try:
+            data = self._sock.recv(self.max_size)
+        except (BlockingIOError, InterruptedError):
+            return
+        except Exception as exc:
+            self._fatal_error(exc, 'Fatal read error')
+            return
+
+        if data:
+            self._protocol.data_received(data)
+        else:
+            # Connection closed (EOF received)
+            self._loop.remove_reader(self._fileno)
+            keep_open = self._protocol.eof_received()
+            # If eof_received returns False/None, close the transport
+            if not keep_open:
+                self._closing = True
+                self._conn_lost += 1
+                self._call_connection_lost(None)
+
+    def write(self, data):
+        """Write data to the transport."""
+        if self._conn_lost or self._closing:
+            return
+        if not data:
+            return
+
+        if not self._buffer:
+            try:
+                n = self._sock.send(data)
+            except (BlockingIOError, InterruptedError):
+                n = 0
+            except Exception as exc:
+                self._fatal_error(exc, 'Fatal write error')
+                return
+
+            if n == len(data):
+                return
+            elif n > 0:
+                data = data[n:]
+            self._loop.add_writer(self._fileno, self._write_ready_cb)
+
+        self._buffer.extend(data)
+
+    def _write_ready_cb(self):
+        """Called when socket is ready for writing."""
+        if not self._buffer:
+            self._loop.remove_writer(self._fileno)
+            if self._closing:
+                self._call_connection_lost(None)
+            return
+
+        try:
+            n = self._sock.send(self._buffer)
+        except (BlockingIOError, InterruptedError):
+            return
+        except Exception as exc:
+            self._loop.remove_writer(self._fileno)
+            self._fatal_error(exc, 'Fatal write error')
+            return
+
+        if n:
+            del self._buffer[:n]
+
+        if not self._buffer:
+            self._loop.remove_writer(self._fileno)
+            if self._closing:
+                self._call_connection_lost(None)
+
+    def write_eof(self):
+        """Close the write end."""
+        if self._closing:
+            return
+        self._closing = True
+        if not self._buffer:
+            self._loop.remove_reader(self._fileno)
+            self._call_connection_lost(None)
+
+    def can_write_eof(self):
+        return True
+
+    def close(self):
+        """Close the transport."""
+        if self._closing:
+            return
+        self._closing = True
+        self._loop.remove_reader(self._fileno)
+        if not self._buffer:
+            self._conn_lost += 1
+            self._call_connection_lost(None)
+
+    def _call_connection_lost(self, exc):
+        """Call protocol.connection_lost()."""
+        try:
+            self._protocol.connection_lost(exc)
+        finally:
+            self._sock.close()
+
+    def _fatal_error(self, exc, message='Fatal error'):
+        """Handle fatal errors."""
+        self._loop.call_exception_handler({
+            'message': message,
+            'exception': exc,
+            'transport': self,
+            'protocol': self._protocol,
+        })
+        self.close()
+
+    def get_extra_info(self, name, default=None):
+        return self._extra.get(name, default)
+
+    def is_closing(self):
+        return self._closing
+
+    def get_write_buffer_size(self):
+        return len(self._buffer)
+
+    def get_write_buffer_limits(self):
+        return (0, 0)
+
+    def set_write_buffer_limits(self, high=None, low=None):
+        pass
+
+    def abort(self):
+        """Close immediately."""
+        self._closing = True
+        self._conn_lost += 1
+        self._loop.remove_reader(self._fileno)
+        self._loop.remove_writer(self._fileno)
+        self._call_connection_lost(None)
+
+    def pause_reading(self):
+        """Pause reading from the transport."""
+        if self._closing or self._paused:
+            return
+        self._paused = True
+        self._loop.remove_reader(self._fileno)
+
+    def resume_reading(self):
+        """Resume reading from the transport."""
+        if self._closing or not self._paused:
+            return
+        self._paused = False
+        self._loop.add_reader(self._fileno, self._read_ready)
+
+    def is_reading(self):
+        """Return True if the transport is receiving."""
+        return not self._paused and not self._closing
+
+
+class ErlangDatagramTransport(transports.DatagramTransport):
+    """Datagram (UDP) transport for ErlangEventLoop."""
+
+    __slots__ = (
+        '_loop', '_sock', '_protocol', '_address', '_buffer',
+        '_closing', '_conn_lost', '_extra', '_fileno',
+    )
+
+    max_size = 256 * 1024  # 256 KB
+
+    def __init__(self, loop, sock, protocol, address=None, extra=None):
+        super().__init__(extra)
+        self._loop = loop
+        self._sock = sock
+        self._protocol = protocol
+        self._address = address
+        self._buffer = deque()
+        self._closing = False
+        self._conn_lost = 0
+        self._fileno = sock.fileno()
+        self._extra = extra or {}
+        self._extra['socket'] = sock
+        try:
+            self._extra['sockname'] = sock.getsockname()
+        except OSError:
+            pass
+        if address:
+            self._extra['peername'] = address
+
+    async def _start(self):
+        """Start the transport."""
+        # Call connection_made directly to ensure it runs before returning
+        self._protocol.connection_made(self)
+        self._loop.add_reader(self._fileno, self._read_ready)
+
+    def _read_ready(self):
+        """Called when data is available to read."""
+        if self._conn_lost:
+            return
+        try:
+            data, addr = self._sock.recvfrom(self.max_size)
+        except (BlockingIOError, InterruptedError):
+            return
+        except OSError as exc:
+            self._protocol.error_received(exc)
+            return
+        except Exception as exc:
+            self._fatal_error(exc, 'Fatal read error on datagram transport')
+            return
+
+        self._protocol.datagram_received(data, addr)
+
+    def sendto(self, data, addr=None):
+        """Send data to the transport."""
+        if self._conn_lost or self._closing:
+            return
+        if not data:
+            return
+
+        if addr is None:
+            addr = self._address
+
+        if not self._buffer:
+            try:
+                # For connected sockets (self._address is set), use send() not sendto()
+                # because sendto() with an address fails with "Socket is already connected"
+                if self._address is not None:
+                    # Connected socket - use send()
+                    self._sock.send(data)
+                elif addr:
+                    # Not connected, addr provided - use sendto()
+                    self._sock.sendto(data, addr)
+                else:
+                    # Not connected, no addr - use send() (will fail if not connected)
+                    self._sock.send(data)
+                return
+            except (BlockingIOError, InterruptedError):
+                self._loop.add_writer(self._fileno, self._write_ready)
+            except OSError as exc:
+                self._protocol.error_received(exc)
+                return
+            except Exception as exc:
+                self._fatal_error(exc, 'Fatal write error on datagram transport')
+                return
+
+        self._buffer.append((data, addr))
+
+    def _write_ready(self):
+        """Called when socket is ready for writing."""
+        while self._buffer:
+            data, addr = self._buffer[0]
+            try:
+                # For connected sockets (self._address is set), use send() not sendto()
+                if self._address is not None:
+                    self._sock.send(data)
+                elif addr:
+                    self._sock.sendto(data, addr)
+                else:
+                    self._sock.send(data)
+            except (BlockingIOError, InterruptedError):
+                return
+            except OSError as exc:
+                self._buffer.popleft()
+                self._protocol.error_received(exc)
+                return
+            except Exception as exc:
+                self._fatal_error(exc, 'Fatal write error on datagram transport')
+                return
+
+            self._buffer.popleft()
+
+        self._loop.remove_writer(self._fileno)
+        if self._closing:
+            self._call_connection_lost(None)
+
+    def close(self):
+        """Close the transport."""
+        if self._closing:
+            return
+        self._closing = True
+        self._loop.remove_reader(self._fileno)
+        if not self._buffer:
+            self._conn_lost += 1
+            self._call_connection_lost(None)
+
+    def _call_connection_lost(self, exc):
+        """Call protocol.connection_lost()."""
+        try:
+            self._protocol.connection_lost(exc)
+        finally:
+            self._sock.close()
+
+    def _fatal_error(self, exc, message='Fatal error on datagram transport'):
+        """Handle fatal errors."""
+        self._loop.call_exception_handler({
+            'message': message,
+            'exception': exc,
+            'transport': self,
+            'protocol': self._protocol,
+        })
+        self.close()
+
+    def get_extra_info(self, name, default=None):
+        return self._extra.get(name, default)
+
+    def is_closing(self):
+        return self._closing
+
+    def abort(self):
+        """Close immediately."""
+        self._closing = True
+        self._conn_lost += 1
+        self._loop.remove_reader(self._fileno)
+        self._loop.remove_writer(self._fileno)
+        self._buffer.clear()
+        self._call_connection_lost(None)
+
+    def get_write_buffer_size(self):
+        """Return the current size of the write buffer."""
+        return sum(len(data) for data, _ in self._buffer)
+
+
+class ErlangServer:
+    """TCP/Unix server for ErlangEventLoop."""
+
+    def __init__(self, loop, sockets, protocol_factory, ssl_context, backlog):
+        self._loop = loop
+        self._sockets = sockets
+        self._protocol_factory = protocol_factory
+        self._ssl_context = ssl_context
+        self._backlog = backlog
+        self._serving = False
+        self._waiters = []
+
+    def _start_serving(self):
+        """Start accepting connections."""
+        if self._serving:
+            return
+        self._serving = True
+        for sock in self._sockets:
+            self._loop.add_reader(sock.fileno(), self._accept_connection, sock)
+
+    def _accept_connection(self, server_sock):
+        """Accept a new connection."""
+        try:
+            conn, addr = server_sock.accept()
+            conn.setblocking(False)
+        except (BlockingIOError, InterruptedError):
+            return
+        except OSError as exc:
+            if exc.errno not in (errno.EMFILE, errno.ENFILE,
+                                 errno.ENOBUFS, errno.ENOMEM):
+                raise
+            return
+
+        protocol = self._protocol_factory()
+        transport = ErlangSocketTransport(self._loop, conn, protocol)
+        self._loop.create_task(transport._start())
+
+    def close(self):
+        """Stop the server."""
+        if not self._serving:
+            return
+        self._serving = False
+        for sock in self._sockets:
+            self._loop.remove_reader(sock.fileno())
+            sock.close()
+        self._sockets.clear()
+
+        # Wake up waiters
+        for waiter in self._waiters:
+            if not waiter.done():
+                waiter.set_result(None)
+
+    async def start_serving(self):
+        """Start serving."""
+        self._start_serving()
+
+    async def serve_forever(self):
+        """Serve forever."""
+        if not self._serving:
+            self._start_serving()
+        waiter = self._loop.create_future()
+        self._waiters.append(waiter)
+        try:
+            await waiter
+        finally:
+            self._waiters.remove(waiter)
+
+    def is_serving(self):
+        return self._serving
+
+    def get_loop(self):
+        return self._loop
+
+    @property
+    def sockets(self):
+        return tuple(self._sockets)
+
+    async def __aenter__(self):
+        return self
+
+    async def __aexit__(self, *exc):
+        self.close()
+        await self.wait_closed()
+
+    async def wait_closed(self):
+        """Wait until server is closed."""
+        if self._sockets:
+            await asyncio.sleep(0)
diff --git a/priv/erlang_asyncio.py b/priv/erlang_asyncio.py
deleted file mode 100644
index 9b3ba82..0000000
--- a/priv/erlang_asyncio.py
+++ /dev/null
@@ -1,348 +0,0 @@
-# Copyright 2026 Benoit Chesneau
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Erlang-native asyncio primitives.
-
-This module provides async primitives that use Erlang's native scheduler
-instead of Python's asyncio event loop, for maximum performance.
-
-Usage:
-    import erlang_asyncio
-
-    # Get the event loop
-    loop = erlang_asyncio.get_event_loop()
-
-    # Use sleep
-    async def handler():
-        await erlang_asyncio.sleep(0.001)  # 1ms sleep using Erlang timer
-"""
-
-import asyncio
-import py_event_loop as _pel
-
-# Import ErlangEventLoop
-try:
-    from erlang_loop import ErlangEventLoop, get_event_loop_policy as _get_policy
-    _has_erlang_loop = True
-except ImportError:
-    ErlangEventLoop = None
-    _get_policy = None
-    _has_erlang_loop = False
-
-
-def get_event_loop():
-    """Get the current Erlang event loop.
-
-    Returns an ErlangEventLoop instance that uses Erlang's scheduler
-    for I/O multiplexing and timers.
-
-    Returns:
-        ErlangEventLoop instance
-
-    Example:
-        import erlang_asyncio
-
-        loop = erlang_asyncio.get_event_loop()
-        loop.run_until_complete(my_coro())
-    """
-    if _has_erlang_loop:
-        # Set policy if not already set
-        policy = asyncio.get_event_loop_policy()
-        if not isinstance(policy, type(_get_policy())):
-            asyncio.set_event_loop_policy(_get_policy())
-        return asyncio.get_event_loop()
-    else:
-        return asyncio.get_event_loop()
-
-
-def new_event_loop():
-    """Create a new Erlang event loop.
-
-    Returns:
-        New ErlangEventLoop instance
-    """
-    if _has_erlang_loop:
-        return ErlangEventLoop()
-    else:
-        return asyncio.new_event_loop()
-
-
-def set_event_loop(loop):
-    """Set the current event loop."""
-    asyncio.set_event_loop(loop)
-
-
-def get_running_loop():
-    """Get the running event loop.
-
-    Raises RuntimeError if no loop is running.
-    """
-    return asyncio.get_running_loop()
-
-
-async def sleep(delay: float, result=None):
-    """Sleep for the specified delay using Erlang's timer system.
-
-    This is a drop-in replacement for asyncio.sleep() that uses
-    Erlang's native timer system instead of the asyncio event loop.
-
-    Args:
-        delay: Time to sleep in seconds (float)
-        result: Optional value to return after sleeping (default None)
-
-    Returns:
-        The result argument
-
-    Example:
-        import erlang_asyncio
-
-        async def my_handler():
-            await erlang_asyncio.sleep(0.1)  # Sleep 100ms
-            value = await erlang_asyncio.sleep(0.05, result='done')
-    """
-    if delay <= 0:
-        return result
-
-    # Convert seconds to milliseconds
-    delay_ms = int(delay * 1000)
-    if delay_ms < 1:
-        delay_ms = 1  # Minimum 1ms
-
-    # Use the synchronous Erlang sleep
-    _pel._erlang_sleep(delay_ms)
-
-    return result
-
-
-def run(coro):
-    """Run a coroutine using the Erlang event loop.
-
-    Similar to asyncio.run() but uses ErlangEventLoop.
-
-    Args:
-        coro: Coroutine to run
-
-    Returns:
-        The coroutine's return value
-    """
-    loop = new_event_loop()
-    try:
-        set_event_loop(loop)
-        return loop.run_until_complete(coro)
-    finally:
-        try:
-            loop.close()
-        except Exception:
-            pass
-
-
-async def gather(*coros_or_futures, return_exceptions=False):
-    """Run coroutines concurrently and gather results.
-
-    Similar to asyncio.gather() - runs all coroutines concurrently
-    using the Erlang event loop.
-
-    Args:
-        *coros_or_futures: Coroutines or futures to run
-        return_exceptions: If True, exceptions are returned as results
-                          instead of being raised
-
-    Returns:
-        List of results in the same order as inputs
-
-    Example:
-        import erlang_asyncio
-
-        async def task(n):
-            await erlang_asyncio.sleep(0.01)
-            return n * 2
-
-        results = await erlang_asyncio.gather(task(1), task(2), task(3))
-        # results = [2, 4, 6]
-    """
-    return await asyncio.gather(*coros_or_futures, return_exceptions=return_exceptions)
-
-
-async def wait_for(coro, timeout):
-    """Wait for a coroutine with a timeout.
-
-    Similar to asyncio.wait_for() - runs the coroutine with a timeout
-    using the Erlang event loop.
-
-    Args:
-        coro: Coroutine to run
-        timeout: Timeout in seconds (float)
-
-    Returns:
-        The coroutine's return value
-
-    Raises:
-        asyncio.TimeoutError: If the timeout expires
-
-    Example:
-        import erlang_asyncio
-
-        try:
-            result = await erlang_asyncio.wait_for(slow_task(), timeout=1.0)
-        except asyncio.TimeoutError:
-            print("Task timed out")
-    """
-    return await asyncio.wait_for(coro, timeout)
-
-
-async def wait(fs, *, timeout=None, return_when=asyncio.ALL_COMPLETED):
-    """Wait for multiple futures/tasks.
-
-    Similar to asyncio.wait() - waits for futures to complete.
-
-    Args:
-        fs: Iterable of futures/tasks
-        timeout: Optional timeout in seconds
-        return_when: When to return (ALL_COMPLETED, FIRST_COMPLETED, FIRST_EXCEPTION)
-
-    Returns:
-        Tuple of (done, pending) sets
-
-    Example:
-        import erlang_asyncio
-
-        tasks = [erlang_asyncio.create_task(coro()) for coro in coros]
-        done, pending = await erlang_asyncio.wait(tasks, timeout=5.0)
-    """
-    return await asyncio.wait(fs, timeout=timeout, return_when=return_when)
-
-
-def create_task(coro, *, name=None):
-    """Create a task to run the coroutine.
-
-    Similar to asyncio.create_task() - schedules the coroutine
-    to run on the event loop.
-
-    Args:
-        coro: Coroutine to run
-        name: Optional name for the task
-
-    Returns:
-        asyncio.Task instance
-
-    Example:
-        import erlang_asyncio
-
-        async def background_work():
-            await erlang_asyncio.sleep(1.0)
-            return "done"
-
-        task = erlang_asyncio.create_task(background_work())
-        # ... do other work ...
-        result = await task
-    """
-    loop = asyncio.get_event_loop()
-    if name is not None:
-        return loop.create_task(coro, name=name)
-    return loop.create_task(coro)
-
-
-def ensure_future(coro_or_future, *, loop=None):
-    """Wrap a coroutine in a Future.
-
-    Similar to asyncio.ensure_future().
-
-    Args:
-        coro_or_future: Coroutine or Future
-        loop: Optional event loop
-
-    Returns:
-        asyncio.Future or asyncio.Task
-    """
-    return asyncio.ensure_future(coro_or_future, loop=loop)
-
-
-async def shield(arg):
-    """Protect a coroutine from cancellation.
-
-    Similar to asyncio.shield() - the inner coroutine continues
-    even if the outer task is cancelled.
-
-    Args:
-        arg: Coroutine or future to shield
-
-    Returns:
-        The result of the shielded coroutine
-    """
-    return await asyncio.shield(arg)
-
-
-class timeout:
-    """Context manager for timeout.
-
-    Similar to asyncio.timeout() (Python 3.11+).
-
-    Example:
-        import erlang_asyncio
-
-        async with erlang_asyncio.timeout(1.0):
-            await slow_operation()
-    """
-
-    def __init__(self, delay):
-        self.delay = delay
-        self._task = None
-        self._cancelled = False
-
-    async def __aenter__(self):
-        return self
-
-    async def __aexit__(self, exc_type, exc_val, exc_tb):
-        return False
-
-    def reschedule(self, delay):
-        """Reschedule the timeout."""
-        self.delay = delay
-
-
-# Re-export common exceptions
-TimeoutError = asyncio.TimeoutError
-CancelledError = asyncio.CancelledError
-
-# Constants for wait()
-ALL_COMPLETED = asyncio.ALL_COMPLETED
-FIRST_COMPLETED = asyncio.FIRST_COMPLETED
-FIRST_EXCEPTION = asyncio.FIRST_EXCEPTION
-
-
-__all__ = [
-    # Core functions
-    'sleep',
-    'run',
-    'gather',
-    'wait',
-    'wait_for',
-    'create_task',
-    'ensure_future',
-    'shield',
-    'timeout',
-    # Event loop
-    'get_event_loop',
-    'new_event_loop',
-    'set_event_loop',
-    'get_running_loop',
-    'ErlangEventLoop',
-    # Exceptions
-    'TimeoutError',
-    'CancelledError',
-    # Constants
-    'ALL_COMPLETED',
-    'FIRST_COMPLETED',
-    'FIRST_EXCEPTION',
-]
diff --git a/priv/test_erlang_loop.py b/priv/test_erlang_loop.py
new file mode 100644
index 0000000..06a4f28
--- /dev/null
+++ b/priv/test_erlang_loop.py
@@ -0,0 +1,823 @@
+#!/usr/bin/env python3
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Test suite for ErlangEventLoop asyncio compatibility.
+
+This test suite verifies that ErlangEventLoop can fully replace asyncio's
+default event loop, similar to uvloop. Tests are adapted from uvloop's
+test suite.
+
+Run with:
+    python test_erlang_loop.py
+
+Or via Erlang:
+    py:exec(<<"exec(open('priv/test_erlang_loop.py').read())">>).
+"""
+
+import asyncio
+import gc
+import os
+import signal
+import socket
+import sys
+import tempfile
+import threading
+import time
+import unittest
+import weakref
+
+# Import the event loop
+try:
+    from _erlang_impl import ErlangEventLoop, get_event_loop_policy
+except ImportError:
+    # Try the erlang package
+    from erlang import ErlangEventLoop
+    from erlang._policy import ErlangEventLoopPolicy
+    def get_event_loop_policy():
+        return ErlangEventLoopPolicy()
+
+
+def find_free_port():
+    """Find a free port on localhost."""
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind(('127.0.0.1', 0))
+        return s.getsockname()[1]
+
+
+class ErlangLoopTestCase(unittest.TestCase):
+    """Base test case for ErlangEventLoop tests."""
+
+    def setUp(self):
+        self.loop = ErlangEventLoop()
+        self.exceptions = []
+
+    def tearDown(self):
+        if not self.loop.is_closed():
+            self.loop.close()
+        # Force garbage collection
+        gc.collect()
+
+    def loop_exception_handler(self, loop, context):
+        """Custom exception handler that records exceptions."""
+        self.exceptions.append(context)
+
+    def run_briefly(self):
+        """Run the loop briefly to process pending callbacks."""
+        async def noop():
+            pass
+        self.loop.run_until_complete(noop())
+
+
+class TestCallSoon(ErlangLoopTestCase):
+    """Test call_soon functionality."""
+
+    def test_call_soon_basic(self):
+        """Test basic call_soon scheduling."""
+        results = []
+
+        def callback(x):
+            results.append(x)
+
+        self.loop.call_soon(callback, 1)
+        self.loop.call_soon(callback, 2)
+        self.loop.call_soon(callback, 3)
+
+        self.run_briefly()
+
+        self.assertEqual(results, [1, 2, 3])
+
+    def test_call_soon_order(self):
+        """Test that call_soon preserves order."""
+        results = []
+
+        for i in range(10):
+            self.loop.call_soon(results.append, i)
+
+        self.run_briefly()
+
+        self.assertEqual(results, list(range(10)))
+
+    def test_call_soon_cancel(self):
+        """Test cancelling a call_soon handle."""
+        results = []
+
+        def callback(x):
+            results.append(x)
+
+        self.loop.call_soon(callback, 1)
+        handle = self.loop.call_soon(callback, 2)
+        self.loop.call_soon(callback, 3)
+
+        handle.cancel()
+        self.run_briefly()
+
+        self.assertEqual(results, [1, 3])
+
+    def test_call_soon_threadsafe(self):
+        """Test call_soon_threadsafe from another thread."""
+        results = []
+        event = threading.Event()
+
+        def callback(x):
+            results.append(x)
+            if x == 3:
+                self.loop.stop()
+
+        def thread_func():
+            event.wait()
+            self.loop.call_soon_threadsafe(callback, 2)
+            self.loop.call_soon_threadsafe(callback, 3)
+
+        self.loop.call_soon(callback, 1)
+        thread = threading.Thread(target=thread_func)
+        thread.start()
+
+        self.loop.call_soon(event.set)
+        self.loop.run_forever()
+
+        thread.join()
+        self.assertEqual(results, [1, 2, 3])
+
+
+class TestCallLater(ErlangLoopTestCase):
+    """Test call_later and call_at functionality."""
+
+    def test_call_later_basic(self):
+        """Test basic call_later scheduling."""
+        results = []
+        start = time.monotonic()
+
+        def callback():
+            results.append(time.monotonic() - start)
+            self.loop.stop()
+
+        self.loop.call_later(0.05, callback)
+        self.loop.run_forever()
+
+        self.assertEqual(len(results), 1)
+        self.assertGreaterEqual(results[0], 0.04)
+
+    def test_call_later_ordering(self):
+        """Test that call_later respects timing order."""
+        results = []
+
+        def callback(x):
+            results.append(x)
+            if x == 3:
+                self.loop.stop()
+
+        # Schedule out of order
+        self.loop.call_later(0.03, callback, 3)
+        self.loop.call_later(0.01, callback, 1)
+        self.loop.call_later(0.02, callback, 2)
+
+        self.loop.run_forever()
+
+        self.assertEqual(results, [1, 2, 3])
+
+    def test_call_later_cancel(self):
+        """Test cancelling a call_later handle."""
+        results = []
+
+        def callback(x):
+            results.append(x)
+            if len(results) == 2:
+                self.loop.stop()
+
+        self.loop.call_later(0.01, callback, 1)
+        handle = self.loop.call_later(0.02, callback, 2)
+        self.loop.call_later(0.03, callback, 3)
+
+        handle.cancel()
+        self.loop.run_forever()
+
+        self.assertEqual(results, [1, 3])
+
+    def test_call_later_zero_delay(self):
+        """Test call_later with zero delay."""
+        results = []
+
+        def callback(x):
+            results.append(x)
+
+        self.loop.call_later(0, callback, 1)
+        self.loop.call_later(0, callback, 2)
+
+        self.run_briefly()
+
+        self.assertEqual(results, [1, 2])
+
+    def test_call_at(self):
+        """Test call_at scheduling."""
+        results = []
+        now = self.loop.time()
+
+        def callback():
+            results.append(True)
+            self.loop.stop()
+
+        self.loop.call_at(now + 0.05, callback)
+        self.loop.run_forever()
+
+        self.assertEqual(results, [True])
+
+
+class TestRunMethods(ErlangLoopTestCase):
+    """Test run_forever, run_until_complete, stop, close."""
+
+    def test_run_until_complete_basic(self):
+        """Test run_until_complete with a coroutine."""
+        async def coro():
+            return 42
+
+        result = self.loop.run_until_complete(coro())
+        self.assertEqual(result, 42)
+
+    def test_run_until_complete_future(self):
+        """Test run_until_complete with a future."""
+        future = self.loop.create_future()
+        self.loop.call_soon(future.set_result, 'hello')
+        result = self.loop.run_until_complete(future)
+        self.assertEqual(result, 'hello')
+
+    def test_run_forever_stop(self):
+        """Test run_forever and stop."""
+        results = []
+
+        def callback():
+            results.append(1)
+            self.loop.stop()
+
+        self.loop.call_soon(callback)
+        self.loop.run_forever()
+
+        self.assertEqual(results, [1])
+        self.assertFalse(self.loop.is_running())
+
+    def test_close(self):
+        """Test closing the loop."""
+        self.assertFalse(self.loop.is_closed())
+        self.loop.close()
+        self.assertTrue(self.loop.is_closed())
+
+        # Should be idempotent
+        self.loop.close()
+        self.assertTrue(self.loop.is_closed())
+
+    def test_close_running_raises(self):
+        """Test that closing a running loop raises."""
+        async def try_close():
+            with self.assertRaises(RuntimeError):
+                self.loop.close()
+
+        self.loop.run_until_complete(try_close())
+
+    def test_run_until_complete_nested_raises(self):
+        """Test that nested run_until_complete raises."""
+        async def outer():
+            with self.assertRaises(RuntimeError):
+                self.loop.run_until_complete(asyncio.sleep(0))
+
+        self.loop.run_until_complete(outer())
+
+
+class TestTasks(ErlangLoopTestCase):
+    """Test task creation and management."""
+
+    def test_create_task(self):
+        """Test create_task."""
+        async def coro():
+            await asyncio.sleep(0.01)
+            return 42
+
+        async def main():
+            task = self.loop.create_task(coro())
+            result = await task
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, 42)
+
+    def test_create_future(self):
+        """Test create_future."""
+        future = self.loop.create_future()
+        self.assertIsInstance(future, asyncio.Future)
+        self.assertFalse(future.done())
+
+        self.loop.call_soon(future.set_result, 123)
+        result = self.loop.run_until_complete(future)
+        self.assertEqual(result, 123)
+
+    def test_task_cancel(self):
+        """Test task cancellation."""
+        async def long_running():
+            await asyncio.sleep(10)
+
+        async def main():
+            task = self.loop.create_task(long_running())
+            await asyncio.sleep(0.01)
+            task.cancel()
+            with self.assertRaises(asyncio.CancelledError):
+                await task
+
+        self.loop.run_until_complete(main())
+
+    def test_gather(self):
+        """Test asyncio.gather with our loop."""
+        async def task(n):
+            await asyncio.sleep(0.01)
+            return n * 2
+
+        async def main():
+            results = await asyncio.gather(
+                task(1), task(2), task(3)
+            )
+            return results
+
+        results = self.loop.run_until_complete(main())
+        self.assertEqual(results, [2, 4, 6])
+
+
+class TestSockets(ErlangLoopTestCase):
+    """Test socket operations."""
+
+    def test_sock_connect_recv_send(self):
+        """Test sock_connect, sock_recv, sock_sendall."""
+        port = find_free_port()
+        received = []
+
+        def server_thread():
+            server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+            server.bind(('127.0.0.1', port))
+            server.listen(1)
+            conn, _ = server.accept()
+            data = conn.recv(1024)
+            received.append(data)
+            conn.sendall(b'pong')
+            conn.close()
+            server.close()
+
+        thread = threading.Thread(target=server_thread)
+        thread.start()
+        time.sleep(0.05)  # Let server start
+
+        async def client():
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.setblocking(False)
+            await self.loop.sock_connect(sock, ('127.0.0.1', port))
+            await self.loop.sock_sendall(sock, b'ping')
+            data = await self.loop.sock_recv(sock, 1024)
+            sock.close()
+            return data
+
+        result = self.loop.run_until_complete(client())
+        thread.join()
+
+        self.assertEqual(result, b'pong')
+        self.assertEqual(received, [b'ping'])
+
+    def test_sock_accept(self):
+        """Test sock_accept."""
+        server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        server.bind(('127.0.0.1', 0))
+        server.listen(1)
+        server.setblocking(False)
+        port = server.getsockname()[1]
+
+        def client_thread():
+            time.sleep(0.05)
+            client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            client.connect(('127.0.0.1', port))
+            client.sendall(b'hello')
+            client.close()
+
+        thread = threading.Thread(target=client_thread)
+        thread.start()
+
+        async def accept():
+            conn, addr = await self.loop.sock_accept(server)
+            data = await self.loop.sock_recv(conn, 1024)
+            conn.close()
+            server.close()
+            return data
+
+        result = self.loop.run_until_complete(accept())
+        thread.join()
+
+        self.assertEqual(result, b'hello')
+
+
+class TestCreateConnection(ErlangLoopTestCase):
+    """Test create_connection."""
+
+    def test_create_connection_basic(self):
+        """Test basic TCP connection."""
+        port = find_free_port()
+        received = []
+
+        class ServerProtocol(asyncio.Protocol):
+            def connection_made(self, transport):
+                self.transport = transport
+
+            def data_received(self, data):
+                received.append(data)
+                self.transport.write(b'echo:' + data)
+                self.transport.close()
+
+        async def main():
+            server = await self.loop.create_server(
+                ServerProtocol, '127.0.0.1', port
+            )
+
+            class ClientProtocol(asyncio.Protocol):
+                def __init__(self):
+                    self.received = []
+                    self.done = asyncio.Future()
+
+                def connection_made(self, transport):
+                    self.transport = transport
+                    transport.write(b'hello')
+
+                def data_received(self, data):
+                    self.received.append(data)
+
+                def connection_lost(self, exc):
+                    self.done.set_result(self.received)
+
+            transport, protocol = await self.loop.create_connection(
+                ClientProtocol, '127.0.0.1', port
+            )
+
+            result = await protocol.done
+            server.close()
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, [b'echo:hello'])
+        self.assertEqual(received, [b'hello'])
+
+
+class TestCreateServer(ErlangLoopTestCase):
+    """Test create_server."""
+
+    def test_create_server_basic(self):
+        """Test basic TCP server."""
+        connections = []
+
+        class ServerProtocol(asyncio.Protocol):
+            def connection_made(self, transport):
+                connections.append(transport)
+                transport.write(b'welcome')
+
+            def data_received(self, data):
+                pass
+
+            def connection_lost(self, exc):
+                pass
+
+        async def main():
+            server = await self.loop.create_server(
+                ServerProtocol, '127.0.0.1', 0
+            )
+            port = server.sockets[0].getsockname()[1]
+
+            # Connect a client
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.setblocking(False)
+            await self.loop.sock_connect(sock, ('127.0.0.1', port))
+            data = await self.loop.sock_recv(sock, 1024)
+            sock.close()
+
+            server.close()
+            await server.wait_closed()
+
+            return data
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, b'welcome')
+        self.assertEqual(len(connections), 1)
+
+
+class TestDatagramEndpoint(ErlangLoopTestCase):
+    """Test create_datagram_endpoint."""
+
+    def test_udp_echo(self):
+        """Test UDP echo server."""
+        received = []
+
+        class EchoServerProtocol(asyncio.DatagramProtocol):
+            def datagram_received(self, data, addr):
+                received.append(data)
+                self.transport.sendto(b'echo:' + data, addr)
+
+        class ClientProtocol(asyncio.DatagramProtocol):
+            def __init__(self):
+                self.received = []
+                self.done = asyncio.Future()
+
+            def datagram_received(self, data, addr):
+                self.received.append(data)
+                self.done.set_result(data)
+
+        async def main():
+            # Create server
+            transport, _ = await self.loop.create_datagram_endpoint(
+                EchoServerProtocol,
+                local_addr=('127.0.0.1', 0)
+            )
+            server_addr = transport.get_extra_info('sockname')
+
+            # Create client
+            client_transport, client_protocol = await self.loop.create_datagram_endpoint(
+                ClientProtocol,
+                remote_addr=server_addr
+            )
+
+            client_transport.sendto(b'hello')
+            result = await asyncio.wait_for(client_protocol.done, timeout=5.0)
+
+            client_transport.close()
+            transport.close()
+
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, b'echo:hello')
+        self.assertEqual(received, [b'hello'])
+
+
+class TestExecutor(ErlangLoopTestCase):
+    """Test run_in_executor."""
+
+    def test_run_in_executor_basic(self):
+        """Test basic executor usage."""
+        def blocking_func(x):
+            time.sleep(0.01)
+            return x * 2
+
+        async def main():
+            result = await self.loop.run_in_executor(None, blocking_func, 21)
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, 42)
+
+    def test_run_in_executor_multiple(self):
+        """Test multiple executor tasks."""
+        def blocking_func(x):
+            time.sleep(0.01)
+            return x
+
+        async def main():
+            tasks = [
+                self.loop.run_in_executor(None, blocking_func, i)
+                for i in range(5)
+            ]
+            results = await asyncio.gather(*tasks)
+            return results
+
+        results = self.loop.run_until_complete(main())
+        self.assertEqual(sorted(results), [0, 1, 2, 3, 4])
+
+
+class TestDNS(ErlangLoopTestCase):
+    """Test DNS resolution."""
+
+    def test_getaddrinfo(self):
+        """Test getaddrinfo."""
+        async def main():
+            result = await self.loop.getaddrinfo(
+                'localhost', 80,
+                family=socket.AF_INET,
+                type=socket.SOCK_STREAM
+            )
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+        # Check structure
+        family, type_, proto, canonname, sockaddr = result[0]
+        self.assertEqual(family, socket.AF_INET)
+        self.assertEqual(type_, socket.SOCK_STREAM)
+
+
+class TestExceptionHandling(ErlangLoopTestCase):
+    """Test exception handling."""
+
+    def test_default_exception_handler(self):
+        """Test default exception handler."""
+        self.loop.set_exception_handler(self.loop_exception_handler)
+
+        def callback():
+            raise ValueError("test error")
+
+        self.loop.call_soon(callback)
+        self.run_briefly()
+
+        self.assertEqual(len(self.exceptions), 1)
+        self.assertIn('exception', self.exceptions[0])
+        self.assertIsInstance(self.exceptions[0]['exception'], ValueError)
+
+    def test_custom_exception_handler(self):
+        """Test custom exception handler."""
+        errors = []
+
+        def handler(loop, context):
+            errors.append(context)
+
+        self.loop.set_exception_handler(handler)
+
+        def callback():
+            raise RuntimeError("custom test")
+
+        self.loop.call_soon(callback)
+        self.run_briefly()
+
+        self.assertEqual(len(errors), 1)
+        self.assertIsInstance(errors[0]['exception'], RuntimeError)
+
+
+class TestDebugMode(ErlangLoopTestCase):
+    """Test debug mode."""
+
+    def test_debug_mode(self):
+        """Test debug mode toggle."""
+        self.assertFalse(self.loop.get_debug())
+        self.loop.set_debug(True)
+        self.assertTrue(self.loop.get_debug())
+        self.loop.set_debug(False)
+        self.assertFalse(self.loop.get_debug())
+
+
+class TestUnixSockets(ErlangLoopTestCase):
+    """Test Unix socket operations."""
+
+    def test_unix_server_client(self):
+        """Test Unix socket server and client."""
+        if sys.platform == 'win32':
+            self.skipTest("Unix sockets not available on Windows")
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = os.path.join(tmpdir, 'test.sock')
+            received = []
+
+            class ServerProtocol(asyncio.Protocol):
+                def connection_made(self, transport):
+                    self.transport = transport
+
+                def data_received(self, data):
+                    received.append(data)
+                    self.transport.write(b'ack')
+                    self.transport.close()
+
+            async def main():
+                # Create Unix server
+                server = await self.loop.create_unix_server(
+                    ServerProtocol, path
+                )
+
+                # Connect client
+                sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+                sock.setblocking(False)
+                await self.loop.sock_connect(sock, path)
+                await self.loop.sock_sendall(sock, b'unix test')
+                data = await self.loop.sock_recv(sock, 1024)
+                sock.close()
+
+                server.close()
+                await server.wait_closed()
+
+                return data
+
+            result = self.loop.run_until_complete(main())
+            self.assertEqual(result, b'ack')
+            self.assertEqual(received, [b'unix test'])
+
+
+class TestPolicy(unittest.TestCase):
+    """Test event loop policy."""
+
+    def test_policy_creates_erlang_loop(self):
+        """Test that the policy creates ErlangEventLoop."""
+        policy = get_event_loop_policy()
+        loop = policy.new_event_loop()
+        self.assertIsInstance(loop, ErlangEventLoop)
+        loop.close()
+
+
+class TestAsyncioIntegration(ErlangLoopTestCase):
+    """Test integration with asyncio APIs."""
+
+    def test_asyncio_sleep(self):
+        """Test asyncio.sleep works correctly."""
+        async def main():
+            start = time.monotonic()
+            await asyncio.sleep(0.05)
+            elapsed = time.monotonic() - start
+            return elapsed
+
+        elapsed = self.loop.run_until_complete(main())
+        self.assertGreaterEqual(elapsed, 0.04)
+
+    def test_asyncio_wait_for(self):
+        """Test asyncio.wait_for."""
+        async def slow():
+            await asyncio.sleep(10)
+
+        async def main():
+            with self.assertRaises(asyncio.TimeoutError):
+                await asyncio.wait_for(slow(), timeout=0.05)
+
+        self.loop.run_until_complete(main())
+
+    def test_asyncio_shield(self):
+        """Test asyncio.shield."""
+        async def important():
+            await asyncio.sleep(0.01)
+            return "done"
+
+        async def main():
+            task = asyncio.shield(important())
+            result = await task
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, "done")
+
+    def test_asyncio_all_tasks(self):
+        """Test asyncio.all_tasks."""
+        async def bg_task():
+            await asyncio.sleep(1)
+
+        async def main():
+            task = self.loop.create_task(bg_task())
+            await asyncio.sleep(0)
+            all_tasks = asyncio.all_tasks(self.loop)
+            task.cancel()
+            try:
+                await task
+            except asyncio.CancelledError:
+                pass
+            return len(all_tasks)
+
+        count = self.loop.run_until_complete(main())
+        self.assertGreaterEqual(count, 1)
+
+    def test_asyncio_current_task(self):
+        """Test asyncio.current_task."""
+        async def main():
+            current = asyncio.current_task(self.loop)
+            self.assertIsNotNone(current)
+            return current
+
+        task = self.loop.run_until_complete(main())
+        self.assertIsInstance(task, asyncio.Task)
+
+
+def run_tests():
+    """Run all tests."""
+    loader = unittest.TestLoader()
+    suite = unittest.TestSuite()
+
+    # Add all test classes
+    suite.addTests(loader.loadTestsFromTestCase(TestCallSoon))
+    suite.addTests(loader.loadTestsFromTestCase(TestCallLater))
+    suite.addTests(loader.loadTestsFromTestCase(TestRunMethods))
+    suite.addTests(loader.loadTestsFromTestCase(TestTasks))
+    suite.addTests(loader.loadTestsFromTestCase(TestSockets))
+    suite.addTests(loader.loadTestsFromTestCase(TestCreateConnection))
+    suite.addTests(loader.loadTestsFromTestCase(TestCreateServer))
+    suite.addTests(loader.loadTestsFromTestCase(TestDatagramEndpoint))
+    suite.addTests(loader.loadTestsFromTestCase(TestExecutor))
+    suite.addTests(loader.loadTestsFromTestCase(TestDNS))
+    suite.addTests(loader.loadTestsFromTestCase(TestExceptionHandling))
+    suite.addTests(loader.loadTestsFromTestCase(TestDebugMode))
+    suite.addTests(loader.loadTestsFromTestCase(TestUnixSockets))
+    suite.addTests(loader.loadTestsFromTestCase(TestPolicy))
+    suite.addTests(loader.loadTestsFromTestCase(TestAsyncioIntegration))
+
+    runner = unittest.TextTestRunner(verbosity=2)
+    result = runner.run(suite)
+
+    return result.wasSuccessful()
+
+
+if __name__ == '__main__':
+    success = run_tests()
+    sys.exit(0 if success else 1)
diff --git a/priv/test_multi_loop.py b/priv/test_multi_loop.py
deleted file mode 100644
index 0e6cb7a..0000000
--- a/priv/test_multi_loop.py
+++ /dev/null
@@ -1,228 +0,0 @@
-"""
-Test helpers for multi-loop isolation testing.
-
-These helpers are used to verify that multiple ErlangEventLoop instances
-can operate concurrently without cross-talk between their pending queues.
-
-Usage from Erlang:
-    py:call(test_multi_loop, run_concurrent_timers, [100])
-"""
-
-import asyncio
-import time
-from typing import List, Tuple, Dict, Set
-
-# Try to import the actual event loop module
-try:
-    import py_event_loop as pel
-    from erlang_loop import ErlangEventLoop
-    HAS_EVENT_LOOP = True
-except ImportError:
-    HAS_EVENT_LOOP = False
-
-
-def check_available() -> bool:
-    """Check if multi-loop testing is available."""
-    return HAS_EVENT_LOOP
-
-
-def create_isolated_loops(count: int = 2) -> List:
-    """
-    Create multiple isolated ErlangEventLoop instances.
-
-    This tests that _loop_new() creates truly independent loops.
-    With proper isolation, each loop has its own:
-    - pending queue
-    - callback ID space (though we use Python-side IDs)
-    - condition variable for wakeup
-
-    Returns:
-        List of ErlangEventLoop instances
-    """
-    if not HAS_EVENT_LOOP:
-        raise RuntimeError("Event loop module not available")
-
-    loops = []
-    for _ in range(count):
-        loop = ErlangEventLoop()
-        loops.append(loop)
-    return loops
-
-
-def run_concurrent_timers_on_loop(loop, timer_count: int, base_id: int) -> List[int]:
-    """
-    Schedule timers on a loop and collect the callback IDs that fire.
-
-    Args:
-        loop: ErlangEventLoop instance
-        timer_count: Number of timers to schedule
-        base_id: Starting callback ID (for identification)
-
-    Returns:
-        List of callback IDs that were dispatched
-    """
-    received_ids = []
-
-    def make_callback(cid):
-        def callback():
-            received_ids.append(cid)
-        return callback
-
-    # Schedule all timers with small delays
-    handles = []
-    for i in range(timer_count):
-        callback_id = base_id + i
-        handle = loop.call_later(0.001, make_callback(callback_id))  # 1ms delay
-        handles.append(handle)
-
-    # Run the loop until all timers fire or timeout
-    start = time.monotonic()
-    timeout = 1.0  # 1 second timeout
-
-    while len(received_ids) < timer_count:
-        if time.monotonic() - start > timeout:
-            break
-        loop._run_once()
-
-    return received_ids
-
-
-def test_two_loops_concurrent_timers(timer_count: int = 100) -> Dict:
-    """
-    Test that two loops can schedule concurrent timers without cross-talk.
-
-    Returns:
-        Dict with test results:
-        - loop_a_count: Number of events received by loop A
-        - loop_b_count: Number of events received by loop B
-        - loop_a_ids: Set of callback IDs received by loop A
-        - loop_b_ids: Set of callback IDs received by loop B
-        - overlap: Any IDs that appear in both (should be empty)
-        - passed: True if test passed
-    """
-    if not HAS_EVENT_LOOP:
-        return {"error": "Event loop not available", "passed": False}
-
-    loop_a = ErlangEventLoop()
-    loop_b = ErlangEventLoop()
-
-    loop_a_base = 1       # IDs 1-100
-    loop_b_base = 1001    # IDs 1001-1100
-
-    # Run timers on both loops
-    # Note: In real implementation with _for methods, these would be isolated
-    ids_a = run_concurrent_timers_on_loop(loop_a, timer_count, loop_a_base)
-    ids_b = run_concurrent_timers_on_loop(loop_b, timer_count, loop_b_base)
-
-    # Check for overlap
-    set_a = set(ids_a)
-    set_b = set(ids_b)
-    overlap = set_a & set_b
-
-    # Expected IDs
-    expected_a = set(range(loop_a_base, loop_a_base + timer_count))
-    expected_b = set(range(loop_b_base, loop_b_base + timer_count))
-
-    passed = (
-        len(ids_a) == timer_count and
-        len(ids_b) == timer_count and
-        set_a == expected_a and
-        set_b == expected_b and
-        len(overlap) == 0
-    )
-
-    loop_a.close()
-    loop_b.close()
-
-    return {
-        "loop_a_count": len(ids_a),
-        "loop_b_count": len(ids_b),
-        "loop_a_ids": sorted(ids_a),
-        "loop_b_ids": sorted(ids_b),
-        "overlap": sorted(overlap),
-        "passed": passed
-    }
-
-
-def test_cross_isolation() -> Dict:
-    """
-    Test that events on loop A are not visible to loop B.
-
-    Returns:
-        Dict with test results
-    """
-    if not HAS_EVENT_LOOP:
-        return {"error": "Event loop not available", "passed": False}
-
-    loop_a = ErlangEventLoop()
-    loop_b = ErlangEventLoop()
-
-    a_received = []
-    b_received = []
-
-    def callback_a():
-        a_received.append("event_a")
-
-    def callback_b():
-        b_received.append("event_b")
-
-    # Schedule only on loop A
-    loop_a.call_soon(callback_a)
-
-    # Run loop A - should receive the event
-    loop_a._run_once()
-
-    # Run loop B - should NOT receive loop A's event
-    loop_b._run_once()
-
-    passed = (
-        len(a_received) == 1 and
-        len(b_received) == 0
-    )
-
-    loop_a.close()
-    loop_b.close()
-
-    return {
-        "loop_a_events": len(a_received),
-        "loop_b_events": len(b_received),
-        "passed": passed
-    }
-
-
-def test_cleanup_no_leak() -> Dict:
-    """
-    Test that destroying one loop doesn't affect another.
-
-    Returns:
-        Dict with test results
-    """
-    if not HAS_EVENT_LOOP:
-        return {"error": "Event loop not available", "passed": False}
-
-    loop_a = ErlangEventLoop()
-    loop_b = ErlangEventLoop()
-
-    b_received = []
-
-    def callback_b():
-        b_received.append("event_b")
-
-    # Schedule timer on loop B for after loop A is destroyed
-    loop_b.call_later(0.05, callback_b)  # 50ms delay
-
-    # Close loop A
-    loop_a.close()
-
-    # Wait and run loop B - should still work
-    time.sleep(0.1)  # 100ms
-    loop_b._run_once()
-
-    passed = len(b_received) == 1
-
-    loop_b.close()
-
-    return {
-        "loop_b_events": len(b_received),
-        "passed": passed
-    }
diff --git a/priv/tests/__init__.py b/priv/tests/__init__.py
new file mode 100644
index 0000000..1cdf8cd
--- /dev/null
+++ b/priv/tests/__init__.py
@@ -0,0 +1,55 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Test suite for ErlangEventLoop asyncio compatibility.
+
+This test suite is adapted from uvloop's test suite to verify that
+ErlangEventLoop is a full drop-in replacement for asyncio's default
+event loop.
+
+Test Architecture:
+- Uses a mixin pattern for test reuse (like uvloop)
+- Tests run against both ErlangEventLoop and asyncio for comparison
+- Supports pytest for test discovery and execution
+
+Run tests:
+    cd priv && python -m pytest tests/ -v
+
+Run against ErlangEventLoop only:
+    cd priv && python -m pytest tests/ -v -k "Erlang"
+
+Run comparison tests:
+    cd priv && python -m pytest tests/ -v
+"""
+
+from ._testbase import (
+    BaseTestCase,
+    ErlangTestCase,
+    AIOTestCase,
+    find_free_port,
+    HAVE_SSL,
+    ONLYUV,
+    ONLYERL,
+)
+
+__all__ = [
+    'BaseTestCase',
+    'ErlangTestCase',
+    'AIOTestCase',
+    'find_free_port',
+    'HAVE_SSL',
+    'ONLYUV',
+    'ONLYERL',
+]
diff --git a/priv/tests/_testbase.py b/priv/tests/_testbase.py
new file mode 100644
index 0000000..7efab1a
--- /dev/null
+++ b/priv/tests/_testbase.py
@@ -0,0 +1,569 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Test infrastructure for ErlangEventLoop tests.
+
+This module provides the base test classes and utilities for testing
+asyncio compatibility. The design follows uvloop's mixin pattern for
+maximum test reuse across different event loop implementations.
+
+Usage pattern (uvloop-style mixin):
+
+    class _TestSockets:
+        # All test methods - generic to any event loop
+        def test_socket_accept_recv_send(self):
+            ...
+
+    class TestErlangSockets(_TestSockets, tb.ErlangTestCase):
+        pass  # Runs against ErlangEventLoop
+
+    class TestAIOSockets(_TestSockets, tb.AIOTestCase):
+        pass  # Runs against asyncio
+"""
+
+import asyncio
+import gc
+import os
+import socket
+import ssl
+import sys
+import tempfile
+import threading
+import time
+import unittest
+from typing import Optional, Callable, Any
+
+# Check for SSL support
+try:
+    import ssl as ssl_module
+    HAVE_SSL = True
+except ImportError:
+    HAVE_SSL = False
+
+
+def _is_inside_erlang_nif():
+    """Check if we're running inside the Erlang NIF environment.
+
+    Returns True if py_event_loop module is available, which indicates
+    Python is embedded inside the Erlang NIF. In this environment,
+    fork() operations will corrupt the Erlang VM.
+    """
+    try:
+        import py_event_loop
+        return True
+    except ImportError:
+        return False
+
+
+INSIDE_ERLANG_NIF = _is_inside_erlang_nif()
+
+
+# Subprocess is not supported in ErlangEventLoop.
+# Python subprocess uses fork() which corrupts the Erlang VM.
+# Use Erlang ports directly via erlang.call() instead.
+HAS_SUBPROCESS_SUPPORT = False
+
+# Markers for test filtering
+ONLYUV = unittest.skipUnless(False, "uvloop-only test")
+ONLYERL = object()  # Marker for Erlang-only tests
+
+
+def find_free_port(host: str = '127.0.0.1') -> int:
+    """Find a free TCP port on the given host.
+
+    Args:
+        host: Host address to bind to.
+
+    Returns:
+        An available port number.
+    """
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.bind((host, 0))
+        return s.getsockname()[1]
+
+
+def find_free_udp_port(host: str = '127.0.0.1') -> int:
+    """Find a free UDP port on the given host.
+
+    Args:
+        host: Host address to bind to.
+
+    Returns:
+        An available port number.
+    """
+    with socket.socket(socket.AF_INET, socket.SOCK_DGRAM) as s:
+        s.bind((host, 0))
+        return s.getsockname()[1]
+
+
+def make_unix_socket_path() -> str:
+    """Create a temporary path for Unix socket.
+
+    Returns:
+        A path string suitable for Unix socket binding.
+    """
+    fd, path = tempfile.mkstemp(prefix='erlang_test_sock_')
+    os.close(fd)
+    os.unlink(path)
+    return path
+
+
+class BaseTestCase(unittest.TestCase):
+    """Base test case for event loop tests.
+
+    Subclasses must implement new_loop() to provide the event loop
+    implementation to test.
+    """
+
+    def new_loop(self) -> asyncio.AbstractEventLoop:
+        """Create a new event loop instance.
+
+        Must be implemented by subclasses.
+
+        Returns:
+            A new event loop instance.
+        """
+        raise NotImplementedError
+
+    def setUp(self):
+        """Set up the test case."""
+        self.loop = self.new_loop()
+        self.exceptions = []
+
+    def tearDown(self):
+        """Tear down the test case."""
+        if self.loop is not None and not self.loop.is_closed():
+            # Cancel all pending tasks before closing
+            try:
+                pending = asyncio.all_tasks(self.loop)
+                for task in pending:
+                    task.cancel()
+                if pending:
+                    self.loop.run_until_complete(
+                        asyncio.gather(*pending, return_exceptions=True)
+                    )
+            except Exception:
+                pass
+
+            # Shutdown async generators
+            try:
+                self.loop.run_until_complete(self.loop.shutdown_asyncgens())
+            except Exception:
+                pass
+
+            self.loop.close()
+
+        # Force garbage collection to catch resource leaks
+        gc.collect()
+        gc.collect()
+        gc.collect()
+
+    def loop_exception_handler(
+        self, loop: asyncio.AbstractEventLoop, context: dict
+    ):
+        """Custom exception handler that records exceptions.
+
+        Args:
+            loop: The event loop.
+            context: Exception context dictionary.
+        """
+        self.exceptions.append(context)
+
+    def run_briefly(self, *, timeout: float = 1.0):
+        """Run the loop briefly to process pending callbacks.
+
+        Args:
+            timeout: Maximum time to run in seconds.
+        """
+        async def noop():
+            pass
+        self.loop.run_until_complete(
+            asyncio.wait_for(noop(), timeout=timeout)
+        )
+
+    def run_until(
+        self,
+        predicate: Callable[[], bool],
+        timeout: float = 5.0
+    ):
+        """Run the loop until predicate returns True or timeout.
+
+        Args:
+            predicate: Function that returns True when done.
+            timeout: Maximum time to wait in seconds.
+
+        Raises:
+            TimeoutError: If predicate doesn't become True within timeout.
+        """
+        async def wait_for_predicate():
+            deadline = time.monotonic() + timeout
+            while not predicate():
+                if time.monotonic() > deadline:
+                    raise TimeoutError(
+                        f"Condition not met within {timeout}s"
+                    )
+                await asyncio.sleep(0.01)
+
+        self.loop.run_until_complete(wait_for_predicate())
+
+    def suppress_log_errors(self):
+        """Context manager to suppress error logging during test."""
+        return _SuppressLogErrors(self.loop)
+
+    # Assertion helpers
+
+    def assertIsSubclass(self, cls, parent_cls, msg=None):
+        """Assert that cls is a subclass of parent_cls."""
+        if not issubclass(cls, parent_cls):
+            self.fail(msg or f"{cls} is not a subclass of {parent_cls}")
+
+    def assertRunsWithin(
+        self, coro, timeout: float, msg: Optional[str] = None
+    ):
+        """Assert that a coroutine completes within timeout.
+
+        Args:
+            coro: Coroutine to run.
+            timeout: Maximum time in seconds.
+            msg: Optional failure message.
+        """
+        try:
+            return self.loop.run_until_complete(
+                asyncio.wait_for(coro, timeout=timeout)
+            )
+        except asyncio.TimeoutError:
+            self.fail(msg or f"Coroutine did not complete within {timeout}s")
+
+
+class _SuppressLogErrors:
+    """Context manager that suppresses error logging."""
+
+    def __init__(self, loop):
+        self._loop = loop
+        self._handler = None
+
+    def __enter__(self):
+        self._handler = self._loop.get_exception_handler()
+        self._loop.set_exception_handler(lambda loop, ctx: None)
+        return self
+
+    def __exit__(self, *args):
+        self._loop.set_exception_handler(self._handler)
+
+
+class ErlangTestCase(BaseTestCase):
+    """Test case for ErlangEventLoop.
+
+    This class creates ErlangEventLoop instances for testing.
+    """
+
+    def new_loop(self) -> asyncio.AbstractEventLoop:
+        """Create a new ErlangEventLoop instance.
+
+        Returns:
+            A new ErlangEventLoop instance.
+        """
+        # Try to use the unified erlang module (C module with Python extensions)
+        try:
+            import erlang
+            if hasattr(erlang, 'new_event_loop'):
+                return erlang.new_event_loop()
+        except ImportError:
+            pass
+
+        # Try to import from _erlang_impl package
+        try:
+            from _erlang_impl import ErlangEventLoop
+            return ErlangEventLoop()
+        except ImportError:
+            pass
+
+        # Add parent directory to path and try again
+        import sys
+        import os
+        parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+        if parent_dir not in sys.path:
+            sys.path.insert(0, parent_dir)
+
+        from _erlang_impl import ErlangEventLoop
+        return ErlangEventLoop()
+
+
+class AIOTestCase(BaseTestCase):
+    """Test case for asyncio's default event loop.
+
+    This class creates asyncio's SelectorEventLoop for comparison testing.
+    """
+
+    def new_loop(self) -> asyncio.AbstractEventLoop:
+        """Create a new asyncio event loop instance.
+
+        Returns:
+            A new asyncio event loop instance.
+        """
+        return asyncio.new_event_loop()
+
+
+# Convenience mixins for common test patterns
+
+class ServerMixin:
+    """Mixin providing server testing utilities."""
+
+    def start_server(
+        self,
+        protocol_factory,
+        host: str = '127.0.0.1',
+        port: int = 0,
+        **kwargs
+    ):
+        """Start a TCP server for testing.
+
+        Args:
+            protocol_factory: Factory for creating protocols.
+            host: Host to bind to.
+            port: Port to bind to (0 for auto).
+            **kwargs: Additional server arguments.
+
+        Returns:
+            A tuple of (server, address) where address is (host, port).
+        """
+        async def create():
+            server = await self.loop.create_server(
+                protocol_factory, host, port, **kwargs
+            )
+            addr = server.sockets[0].getsockname()
+            return server, addr
+
+        return self.loop.run_until_complete(create())
+
+    def connect_to_server(self, protocol_factory, host: str, port: int):
+        """Connect to a server.
+
+        Args:
+            protocol_factory: Factory for creating protocols.
+            host: Host to connect to.
+            port: Port to connect to.
+
+        Returns:
+            A tuple of (transport, protocol).
+        """
+        async def connect():
+            return await self.loop.create_connection(
+                protocol_factory, host, port
+            )
+
+        return self.loop.run_until_complete(connect())
+
+
+class SocketMixin:
+    """Mixin providing socket testing utilities."""
+
+    def create_tcp_socket_pair(self):
+        """Create a connected pair of TCP sockets.
+
+        Returns:
+            A tuple of (server_sock, client_sock) both in non-blocking mode.
+        """
+        server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        server.bind(('127.0.0.1', 0))
+        server.listen(1)
+        server.setblocking(False)
+
+        client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        client.setblocking(False)
+
+        try:
+            client.connect(server.getsockname())
+        except BlockingIOError:
+            pass
+
+        conn, _ = server.accept()
+        conn.setblocking(False)
+
+        server.close()
+        return conn, client
+
+    def create_unix_socket_pair(self):
+        """Create a connected pair of Unix sockets.
+
+        Returns:
+            A tuple of (server_sock, client_sock) both in non-blocking mode.
+        """
+        if sys.platform == 'win32':
+            self.skipTest("Unix sockets not available on Windows")
+
+        path = make_unix_socket_path()
+        try:
+            server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+            server.bind(path)
+            server.listen(1)
+            server.setblocking(False)
+
+            client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+            client.setblocking(False)
+
+            try:
+                client.connect(path)
+            except BlockingIOError:
+                pass
+
+            conn, _ = server.accept()
+            conn.setblocking(False)
+
+            server.close()
+            return conn, client
+        finally:
+            try:
+                os.unlink(path)
+            except OSError:
+                pass
+
+
+class SSLMixin:
+    """Mixin providing SSL testing utilities."""
+
+    @staticmethod
+    def create_ssl_context(*, server: bool = False) -> ssl.SSLContext:
+        """Create an SSL context for testing.
+
+        Args:
+            server: If True, create a server context.
+
+        Returns:
+            An SSL context configured for testing.
+        """
+        if not HAVE_SSL:
+            raise unittest.SkipTest("SSL not available")
+
+        # Use a self-signed certificate for testing
+        # In a real implementation, you would load actual certificates
+        if server:
+            ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)
+        else:
+            ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
+            ctx.check_hostname = False
+            ctx.verify_mode = ssl.CERT_NONE
+
+        return ctx
+
+
+# Test protocol classes for common patterns
+
+class EchoProtocol(asyncio.Protocol):
+    """Protocol that echoes received data back to the sender."""
+
+    def connection_made(self, transport):
+        self.transport = transport
+
+    def data_received(self, data):
+        self.transport.write(data)
+
+    def connection_lost(self, exc):
+        pass
+
+
+class AccumulatingProtocol(asyncio.Protocol):
+    """Protocol that accumulates received data."""
+
+    def __init__(self):
+        self.data = bytearray()
+        self.done = None
+        self.transport = None
+
+    def connection_made(self, transport):
+        self.transport = transport
+        self.done = asyncio.get_event_loop().create_future()
+
+    def data_received(self, data):
+        self.data.extend(data)
+
+    def connection_lost(self, exc):
+        if self.done and not self.done.done():
+            if exc:
+                self.done.set_exception(exc)
+            else:
+                self.done.set_result(bytes(self.data))
+
+    def eof_received(self):
+        pass
+
+
+class EchoDatagramProtocol(asyncio.DatagramProtocol):
+    """Datagram protocol that echoes received data back."""
+
+    def __init__(self):
+        self.transport = None
+
+    def connection_made(self, transport):
+        self.transport = transport
+
+    def datagram_received(self, data, addr):
+        self.transport.sendto(data, addr)
+
+    def error_received(self, exc):
+        pass
+
+
+class AccumulatingDatagramProtocol(asyncio.DatagramProtocol):
+    """Datagram protocol that accumulates received data."""
+
+    def __init__(self):
+        self.data = []
+        self.done = None
+        self.transport = None
+
+    def connection_made(self, transport):
+        self.transport = transport
+        self.done = asyncio.get_event_loop().create_future()
+
+    def datagram_received(self, data, addr):
+        self.data.append((data, addr))
+
+    def error_received(self, exc):
+        if self.done and not self.done.done():
+            self.done.set_exception(exc)
+
+
+# Utility functions
+
+def run_test_server(
+    host: str,
+    port: int,
+    handler: Callable[[socket.socket, tuple], None],
+    ready_event: Optional[threading.Event] = None
+):
+    """Run a simple test server in a thread.
+
+    Args:
+        host: Host to bind to.
+        port: Port to bind to.
+        handler: Function to handle connections.
+        ready_event: Event to set when server is ready.
+    """
+    server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+    server.bind((host, port))
+    server.listen(1)
+
+    if ready_event:
+        ready_event.set()
+
+    conn, addr = server.accept()
+    try:
+        handler(conn, addr)
+    finally:
+        conn.close()
+        server.close()
diff --git a/priv/tests/async_test_runner.py b/priv/tests/async_test_runner.py
new file mode 100644
index 0000000..2ce34b8
--- /dev/null
+++ b/priv/tests/async_test_runner.py
@@ -0,0 +1,295 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Test runner for ErlangEventLoop tests.
+
+Tests create their own isolated ErlangEventLoop via erlang.new_event_loop()
+and manage it directly. Each test's loop has its own capsule for proper
+timer and FD event routing.
+
+Usage from Erlang:
+    {ok, Results} = py:call(Ctx, 'tests.async_test_runner', run_tests,
+                            [<<"tests.test_base">>, <<"TestErlang*">>]).
+
+Test Flow:
+    run_tests() runs synchronously
+        │
+        └─→ For each test:
+                test.setUp() creates self.loop = erlang.new_event_loop()
+                    │
+                    └─→ Isolated ErlangEventLoop with own capsule
+                │
+                test runs using self.loop.run_until_complete()
+                    │
+                    └─→ Timers route to this loop's capsule
+                │
+                test.tearDown() closes self.loop
+"""
+
+import fnmatch
+import io
+import sys
+import traceback
+import unittest
+from typing import Dict, Any, List
+
+
+def run_test_method(test_case, method_name: str, timeout: float = 30.0) -> Dict[str, Any]:
+    """Run a single test method.
+
+    Args:
+        test_case: The test case class
+        method_name: Name of the test method to run
+        timeout: Per-test timeout (relies on CT for enforcement)
+
+    Returns:
+        Dict with test result including name, status, and error if any
+    """
+    test = test_case(method_name)
+    result = {
+        'name': f"{test_case.__name__}.{method_name}",
+        'status': 'ok',
+        'error': None
+    }
+
+    try:
+        test.setUp()
+        getattr(test, method_name)()
+    except unittest.SkipTest as e:
+        result['status'] = 'skipped'
+        result['error'] = str(e)
+    except AssertionError:
+        result['status'] = 'failure'
+        result['error'] = traceback.format_exc()
+    except Exception:
+        result['status'] = 'error'
+        result['error'] = traceback.format_exc()
+    finally:
+        try:
+            test.tearDown()
+        except Exception:
+            pass
+
+    return result
+
+
+def run_test_class(test_class, timeout: float = 30.0) -> List[Dict[str, Any]]:
+    """Run all test methods in a test class.
+
+    Args:
+        test_class: The test case class to run
+        timeout: Per-test timeout in seconds
+
+    Returns:
+        List of test result dicts
+    """
+    results = []
+    loader = unittest.TestLoader()
+
+    for method_name in loader.getTestCaseNames(test_class):
+        result = run_test_method(test_class, method_name, timeout)
+        results.append(result)
+
+    return results
+
+
+def run_tests(module_name: str, pattern: str, timeout: float = 30.0) -> Dict[str, Any]:
+    """
+    Run tests matching pattern synchronously.
+
+    Each test creates its own isolated ErlangEventLoop via erlang.new_event_loop()
+    and manages it directly. Tests use self.loop.run_until_complete() which
+    works correctly because each loop has its own capsule for timer routing.
+
+    Args:
+        module_name: Fully qualified module name (e.g., 'tests.test_base')
+        pattern: fnmatch pattern for test class names (e.g., 'TestErlang*')
+        timeout: Timeout in seconds for each individual test (default 30s)
+
+    Returns:
+        Dictionary with keys:
+            - tests_run: Number of tests executed
+            - failures: Number of test failures
+            - errors: Number of test errors
+            - skipped: Number of skipped tests
+            - success: Boolean indicating all tests passed
+            - output: Formatted test output (string)
+            - failure_details: List of failure/error details
+    """
+    # Handle binary strings from Erlang
+    if isinstance(module_name, bytes):
+        module_name = module_name.decode('utf-8')
+    if isinstance(pattern, bytes):
+        pattern = pattern.decode('utf-8')
+
+    try:
+        module = __import__(module_name, fromlist=[''])
+        all_results = []
+
+        # Find all test classes matching pattern
+        for name in dir(module):
+            if fnmatch.fnmatch(name, pattern):
+                obj = getattr(module, name)
+                if isinstance(obj, type) and issubclass(obj, unittest.TestCase):
+                    if obj is not unittest.TestCase:
+                        results = run_test_class(obj, timeout)
+                        all_results.extend(results)
+
+        # Aggregate results
+        tests_run = len(all_results)
+        failures = sum(1 for r in all_results if r['status'] == 'failure')
+        errors = sum(1 for r in all_results if r['status'] in ('error', 'timeout'))
+        skipped = sum(1 for r in all_results if r['status'] == 'skipped')
+
+        # Build failure details for CT reporting
+        failure_details = []
+        for r in all_results:
+            if r['status'] in ('failure', 'error', 'timeout'):
+                failure_details.append({
+                    'test': r['name'],
+                    'traceback': r['error'] or ''
+                })
+
+        return {
+            'tests_run': tests_run,
+            'failures': failures,
+            'errors': errors,
+            'skipped': skipped,
+            'success': failures == 0 and errors == 0,
+            'results': all_results,
+            'output': _format_results(all_results),
+            'failure_details': failure_details
+        }
+    except Exception as e:
+        return {
+            'tests_run': 0,
+            'failures': 0,
+            'errors': 1,
+            'skipped': 0,
+            'success': False,
+            'results': [],
+            'output': traceback.format_exc(),
+            'failure_details': [{'test': 'import', 'traceback': str(e)}]
+        }
+
+
+def _format_results(results: List[Dict]) -> str:
+    """Format results as text output for CT logs.
+
+    Args:
+        results: List of test result dicts
+
+    Returns:
+        Formatted string output
+    """
+    lines = []
+    status_map = {
+        'ok': 'ok',
+        'failure': 'FAIL',
+        'error': 'ERROR',
+        'timeout': 'TIMEOUT',
+        'skipped': 'skipped'
+    }
+
+    for r in results:
+        status = status_map.get(r['status'], r['status'])
+        lines.append(f"{r['name']} ... {status}")
+        if r['error'] and r['status'] != 'ok':
+            # Indent error output
+            error_lines = r['error'].split('\n')
+            for line in error_lines:
+                lines.append(f"  {line}")
+
+    # Summary line
+    lines.append("")
+    lines.append("-" * 70)
+    total = len(results)
+    lines.append(f"Ran {total} test{'s' if total != 1 else ''}")
+
+    return '\n'.join(lines)
+
+
+def run_erlang_tests(module_name: str, timeout: float = 30.0) -> Dict[str, Any]:
+    """Run only Erlang event loop tests from a module.
+
+    Convenience function that runs only TestErlang* classes.
+
+    Args:
+        module_name: Fully qualified module name (e.g., 'tests.test_base')
+        timeout: Per-test timeout in seconds
+
+    Returns:
+        Same as run_tests()
+    """
+    return run_tests(module_name, 'TestErlang*', timeout)
+
+
+def run_asyncio_tests(module_name: str, timeout: float = 30.0) -> Dict[str, Any]:
+    """Run only asyncio comparison tests from a module.
+
+    Convenience function that runs only TestAIO* classes.
+
+    Args:
+        module_name: Fully qualified module name (e.g., 'tests.test_base')
+        timeout: Per-test timeout in seconds
+
+    Returns:
+        Same as run_tests()
+    """
+    return run_tests(module_name, 'TestAIO*', timeout)
+
+
+def list_test_classes(module_name: str) -> List[str]:
+    """List all test classes in a module.
+
+    Args:
+        module_name: Fully qualified module name
+
+    Returns:
+        List of test class names
+    """
+    if isinstance(module_name, bytes):
+        module_name = module_name.decode('utf-8')
+
+    try:
+        module = __import__(module_name, fromlist=[''])
+        classes = []
+        for name in dir(module):
+            obj = getattr(module, name)
+            if isinstance(obj, type) and issubclass(obj, unittest.TestCase):
+                if obj is not unittest.TestCase:
+                    classes.append(name)
+        return classes
+    except Exception:
+        return []
+
+
+if __name__ == '__main__':
+    # Allow running from command line for debugging
+    if len(sys.argv) >= 2:
+        module = sys.argv[1]
+        pattern = sys.argv[2] if len(sys.argv) >= 3 else '*'
+        timeout = float(sys.argv[3]) if len(sys.argv) >= 4 else 30.0
+        result = run_tests(module, pattern, timeout)
+        print(result['output'])
+        print(f"\nTests run: {result['tests_run']}")
+        print(f"Failures: {result['failures']}")
+        print(f"Errors: {result['errors']}")
+        print(f"Skipped: {result['skipped']}")
+        print(f"Success: {result['success']}")
+        sys.exit(0 if result['success'] else 1)
+    else:
+        print("Usage: python async_test_runner.py <module_name> [pattern] [timeout]")
+        print("Example: python async_test_runner.py tests.test_base TestErlang* 30")
diff --git a/priv/tests/conftest.py b/priv/tests/conftest.py
new file mode 100644
index 0000000..1e98e31
--- /dev/null
+++ b/priv/tests/conftest.py
@@ -0,0 +1,50 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Pytest configuration for ErlangEventLoop tests.
+
+This file configures pytest for running the asyncio compatibility tests.
+"""
+
+import os
+import sys
+
+# Add priv directory to path for imports
+priv_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+if priv_dir not in sys.path:
+    sys.path.insert(0, priv_dir)
+
+
+def pytest_configure(config):
+    """Configure pytest markers."""
+    config.addinivalue_line(
+        "markers", "erlang: marks tests for ErlangEventLoop only"
+    )
+    config.addinivalue_line(
+        "markers", "asyncio: marks tests for asyncio event loop only"
+    )
+    config.addinivalue_line(
+        "markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')"
+    )
+
+
+def pytest_collection_modifyitems(config, items):
+    """Modify test collection based on markers and keywords."""
+    # Add skip markers based on test class names
+    for item in items:
+        if 'Erlang' in item.nodeid:
+            item.add_marker('erlang')
+        elif 'AIO' in item.nodeid:
+            item.add_marker('asyncio')
diff --git a/priv/tests/ct_runner.py b/priv/tests/ct_runner.py
new file mode 100644
index 0000000..dd2843b
--- /dev/null
+++ b/priv/tests/ct_runner.py
@@ -0,0 +1,303 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Test runner for CT (Common Test) integration.
+
+This module provides functions to run Python unittest tests from Erlang
+and return results in a format suitable for CT reporting.
+
+Usage from Erlang:
+    {ok, Results} = py:call(Ctx, 'tests.ct_runner', run_tests,
+                            [<<"tests.test_base">>, <<"TestErlang*">>]).
+"""
+
+import fnmatch
+import io
+import sys
+import traceback
+import unittest
+from typing import Any, Dict, List
+
+
+def run_tests(module_name: str, pattern: str, timeout: float = 30.0) -> Dict[str, Any]:
+    """Run unittest tests matching a pattern and return results.
+
+    This function is designed to be called from Erlang via py:call().
+    It discovers and runs test classes matching the given pattern,
+    then returns a dictionary with test results.
+
+    Args:
+        module_name: Fully qualified module name (e.g., 'tests.test_base')
+        pattern: fnmatch pattern for test class names (e.g., 'TestErlang*')
+        timeout: Timeout in seconds for each individual test (default 30s)
+
+    Returns:
+        Dictionary with keys:
+            - tests_run: Number of tests executed
+            - failures: Number of test failures
+            - errors: Number of test errors
+            - skipped: Number of skipped tests
+            - success: Boolean indicating all tests passed
+            - output: Test runner output (string)
+            - failure_details: List of failure/error details
+    """
+    import signal
+    import threading
+    from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
+
+    # Handle binary strings from Erlang
+    if isinstance(module_name, bytes):
+        module_name = module_name.decode('utf-8')
+    if isinstance(pattern, bytes):
+        pattern = pattern.decode('utf-8')
+
+    output = io.StringIO()
+    loader = unittest.TestLoader()
+
+    try:
+        # Import the module
+        module = __import__(module_name, fromlist=[''])
+
+        # Find test classes matching pattern
+        suite = unittest.TestSuite()
+        for name in dir(module):
+            if fnmatch.fnmatch(name, pattern):
+                obj = getattr(module, name)
+                if isinstance(obj, type) and issubclass(obj, unittest.TestCase):
+                    tests = loader.loadTestsFromTestCase(obj)
+                    suite.addTests(tests)
+
+        # Create a custom test result with timeout support
+        class TimeoutTestResult(unittest.TestResult):
+            """Test result that wraps tests with timeout."""
+
+            def __init__(self, stream, descriptions, verbosity, test_timeout):
+                super().__init__(stream, descriptions, verbosity)
+                self.stream = stream
+                self.descriptions = descriptions
+                self.verbosity = verbosity
+                self.test_timeout = test_timeout
+                self._test_executor = ThreadPoolExecutor(max_workers=1)
+
+            def startTest(self, test):
+                super().startTest(test)
+                if self.verbosity > 1:
+                    self.stream.write(str(test))
+                    self.stream.write(" ... ")
+                    self.stream.flush()
+
+            def addSuccess(self, test):
+                super().addSuccess(test)
+                if self.verbosity > 1:
+                    self.stream.write("ok\n")
+
+            def addError(self, test, err):
+                super().addError(test, err)
+                if self.verbosity > 1:
+                    self.stream.write("ERROR\n")
+
+            def addFailure(self, test, err):
+                super().addFailure(test, err)
+                if self.verbosity > 1:
+                    self.stream.write("FAIL\n")
+
+            def addSkip(self, test, reason):
+                super().addSkip(test, reason)
+                if self.verbosity > 1:
+                    self.stream.write(f"skipped {reason!r}\n")
+
+        class TimeoutTestRunner:
+            """Test runner that applies timeout to each test."""
+
+            def __init__(self, stream, verbosity, test_timeout):
+                self.stream = stream
+                self.verbosity = verbosity
+                self.test_timeout = test_timeout
+
+            def run(self, test_suite):
+                result = TimeoutTestResult(
+                    self.stream,
+                    descriptions=True,
+                    verbosity=self.verbosity,
+                    test_timeout=self.test_timeout
+                )
+
+                # Run each test with timeout
+                for test in _iter_tests(test_suite):
+                    if result.shouldStop:
+                        break
+
+                    def run_test():
+                        test(result)
+
+                    with ThreadPoolExecutor(max_workers=1) as executor:
+                        future = executor.submit(run_test)
+                        try:
+                            future.result(timeout=self.test_timeout)
+                        except FuturesTimeoutError:
+                            # Test timed out
+                            result.addError(
+                                test,
+                                (TimeoutError,
+                                 TimeoutError(f"Test timed out after {self.test_timeout}s"),
+                                 None)
+                            )
+                        except Exception as e:
+                            # Unexpected error running test
+                            result.addError(test, sys.exc_info())
+
+                # Print summary
+                self.stream.write("\n")
+                self.stream.write("-" * 70)
+                self.stream.write("\n")
+                run = result.testsRun
+                self.stream.write(f"Ran {run} test{'s' if run != 1 else ''}\n")
+
+                return result
+
+        def _iter_tests(suite):
+            """Iterate over all tests in a suite."""
+            for test in suite:
+                if isinstance(test, unittest.TestSuite):
+                    yield from _iter_tests(test)
+                else:
+                    yield test
+
+        # Run tests with timeout support
+        runner = TimeoutTestRunner(stream=output, verbosity=2, test_timeout=timeout)
+        result = runner.run(suite)
+
+        # Get output for logging
+        test_output = output.getvalue()
+
+        # Build failure details
+        failure_details = []
+        for test, trace in result.failures:
+            failure_details.append({
+                'test': str(test),
+                'traceback': trace
+            })
+        for test, trace in result.errors:
+            if isinstance(trace, tuple):
+                # Format exception info
+                import traceback as tb
+                trace = ''.join(tb.format_exception(*trace)) if trace[2] else str(trace[1])
+            failure_details.append({
+                'test': str(test),
+                'traceback': trace
+            })
+
+        return {
+            'tests_run': result.testsRun,
+            'failures': len(result.failures),
+            'errors': len(result.errors),
+            'skipped': len(result.skipped),
+            'success': result.wasSuccessful(),
+            'output': test_output,
+            'failure_details': failure_details
+        }
+    except Exception as e:
+        return {
+            'tests_run': 0,
+            'failures': 0,
+            'errors': 1,
+            'skipped': 0,
+            'success': False,
+            'output': traceback.format_exc(),
+            'failure_details': [{'test': 'import', 'traceback': str(e)}]
+        }
+
+
+def run_module_tests(module_name: str) -> Dict[str, Any]:
+    """Run all tests in a module.
+
+    Convenience function that runs all TestCase classes in a module.
+
+    Args:
+        module_name: Fully qualified module name (e.g., 'tests.test_base')
+
+    Returns:
+        Same as run_tests()
+    """
+    return run_tests(module_name, '*')
+
+
+def run_erlang_tests(module_name: str) -> Dict[str, Any]:
+    """Run only Erlang event loop tests from a module.
+
+    Convenience function that runs only TestErlang* classes.
+
+    Args:
+        module_name: Fully qualified module name (e.g., 'tests.test_base')
+
+    Returns:
+        Same as run_tests()
+    """
+    return run_tests(module_name, 'TestErlang*')
+
+
+def run_asyncio_tests(module_name: str) -> Dict[str, Any]:
+    """Run only asyncio comparison tests from a module.
+
+    Convenience function that runs only TestAIO* classes.
+
+    Args:
+        module_name: Fully qualified module name (e.g., 'tests.test_base')
+
+    Returns:
+        Same as run_tests()
+    """
+    return run_tests(module_name, 'TestAIO*')
+
+
+def list_test_classes(module_name: str) -> List[str]:
+    """List all test classes in a module.
+
+    Args:
+        module_name: Fully qualified module name
+
+    Returns:
+        List of test class names
+    """
+    if isinstance(module_name, bytes):
+        module_name = module_name.decode('utf-8')
+
+    try:
+        module = __import__(module_name, fromlist=[''])
+        classes = []
+        for name in dir(module):
+            obj = getattr(module, name)
+            if isinstance(obj, type) and issubclass(obj, unittest.TestCase):
+                if obj is not unittest.TestCase:
+                    classes.append(name)
+        return classes
+    except Exception:
+        return []
+
+
+if __name__ == '__main__':
+    # Allow running from command line for debugging
+    import sys
+    if len(sys.argv) >= 2:
+        module = sys.argv[1]
+        pattern = sys.argv[2] if len(sys.argv) >= 3 else '*'
+        result = run_tests(module, pattern)
+        print(f"\nTests run: {result['tests_run']}")
+        print(f"Failures: {result['failures']}")
+        print(f"Errors: {result['errors']}")
+        print(f"Skipped: {result['skipped']}")
+        print(f"Success: {result['success']}")
+    else:
+        print("Usage: python ct_runner.py <module_name> [pattern]")
diff --git a/priv/tests/test_base.py b/priv/tests/test_base.py
new file mode 100644
index 0000000..724b86d
--- /dev/null
+++ b/priv/tests/test_base.py
@@ -0,0 +1,803 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Core event loop tests adapted from uvloop's test_base.py.
+
+These tests verify fundamental event loop operations:
+- call_soon, call_later, call_at scheduling
+- run_forever, run_until_complete, stop, close
+- Future and Task creation
+- Exception handling
+- Debug mode
+"""
+
+import asyncio
+import contextvars
+import gc
+import socket
+import threading
+import time
+import unittest
+import weakref
+
+from . import _testbase as tb
+
+
+class _TestCallSoon:
+    """Tests for call_soon functionality."""
+
+    def test_call_soon_basic(self):
+        """Test basic call_soon scheduling."""
+        results = []
+
+        def callback(x):
+            results.append(x)
+
+        self.loop.call_soon(callback, 1)
+        self.loop.call_soon(callback, 2)
+        self.loop.call_soon(callback, 3)
+
+        self.run_briefly()
+
+        self.assertEqual(results, [1, 2, 3])
+
+    def test_call_soon_order(self):
+        """Test that call_soon preserves FIFO order."""
+        results = []
+
+        for i in range(10):
+            self.loop.call_soon(results.append, i)
+
+        self.run_briefly()
+
+        self.assertEqual(results, list(range(10)))
+
+    def test_call_soon_cancel(self):
+        """Test cancelling a call_soon handle."""
+        results = []
+
+        def callback(x):
+            results.append(x)
+
+        self.loop.call_soon(callback, 1)
+        handle = self.loop.call_soon(callback, 2)
+        self.loop.call_soon(callback, 3)
+
+        handle.cancel()
+        self.run_briefly()
+
+        self.assertEqual(results, [1, 3])
+
+    def test_call_soon_double_cancel(self):
+        """Test that cancelling twice is safe."""
+        results = []
+        handle = self.loop.call_soon(results.append, 1)
+
+        handle.cancel()
+        handle.cancel()  # Should not raise
+
+        self.run_briefly()
+        self.assertEqual(results, [])
+
+    def test_call_soon_threadsafe(self):
+        """Test call_soon_threadsafe from another thread."""
+        results = []
+        event = threading.Event()
+
+        def callback(x):
+            results.append(x)
+            if x == 3:
+                self.loop.stop()
+
+        def thread_func():
+            event.wait()
+            self.loop.call_soon_threadsafe(callback, 2)
+            self.loop.call_soon_threadsafe(callback, 3)
+
+        self.loop.call_soon(callback, 1)
+        thread = threading.Thread(target=thread_func)
+        thread.start()
+
+        self.loop.call_soon(event.set)
+        self.loop.run_forever()
+
+        thread.join(timeout=5)
+        self.assertEqual(results, [1, 2, 3])
+
+    def test_call_soon_exception(self):
+        """Test that exceptions in callbacks are handled."""
+        self.loop.set_exception_handler(self.loop_exception_handler)
+        results = []
+
+        def bad_callback():
+            raise ValueError("test error")
+
+        def good_callback():
+            results.append(1)
+
+        self.loop.call_soon(bad_callback)
+        self.loop.call_soon(good_callback)
+
+        self.run_briefly()
+
+        # Good callback should still run
+        self.assertEqual(results, [1])
+        # Exception should be recorded
+        self.assertEqual(len(self.exceptions), 1)
+        self.assertIsInstance(self.exceptions[0]['exception'], ValueError)
+
+    def test_call_soon_with_context(self):
+        """Test call_soon with context argument."""
+        var = contextvars.ContextVar('var')
+        results = []
+
+        def callback():
+            results.append(var.get())
+
+        # Set value first, then copy context
+        var.set('test_value')
+        ctx = contextvars.copy_context()
+
+        # Change value after copying - callback should see old value
+        var.set('different_value')
+
+        # Schedule with the copied context
+        self.loop.call_soon(callback, context=ctx)
+
+        self.run_briefly()
+
+        # Should use the context's value (test_value)
+        self.assertEqual(results, ['test_value'])
+
+
+class _TestCallLater:
+    """Tests for call_later and call_at functionality."""
+
+    def test_call_later_basic(self):
+        """Test basic call_later scheduling."""
+        results = []
+        start = time.monotonic()
+
+        def callback():
+            results.append(time.monotonic() - start)
+            self.loop.stop()
+
+        self.loop.call_later(0.05, callback)
+        self.loop.run_forever()
+
+        self.assertEqual(len(results), 1)
+        self.assertGreaterEqual(results[0], 0.04)
+
+    def test_call_later_ordering(self):
+        """Test that call_later respects timing order."""
+        results = []
+
+        def callback(x):
+            results.append(x)
+            if x == 3:
+                self.loop.stop()
+
+        # Schedule out of order
+        self.loop.call_later(0.03, callback, 3)
+        self.loop.call_later(0.01, callback, 1)
+        self.loop.call_later(0.02, callback, 2)
+
+        self.loop.run_forever()
+
+        self.assertEqual(results, [1, 2, 3])
+
+    def test_call_later_cancel(self):
+        """Test cancelling a call_later handle."""
+        results = []
+
+        def callback(x):
+            results.append(x)
+            if len(results) == 2:
+                self.loop.stop()
+
+        self.loop.call_later(0.01, callback, 1)
+        handle = self.loop.call_later(0.02, callback, 2)
+        self.loop.call_later(0.03, callback, 3)
+
+        handle.cancel()
+        self.loop.run_forever()
+
+        self.assertEqual(results, [1, 3])
+
+    def test_call_later_zero_delay(self):
+        """Test call_later with zero delay."""
+        results = []
+
+        def callback(x):
+            results.append(x)
+
+        self.loop.call_later(0, callback, 1)
+        self.loop.call_later(0, callback, 2)
+
+        self.run_briefly()
+
+        self.assertEqual(results, [1, 2])
+
+    def test_call_later_negative_delay(self):
+        """Test call_later with negative delay (treated as 0)."""
+        results = []
+
+        def callback(x):
+            results.append(x)
+
+        self.loop.call_later(-1, callback, 1)
+
+        self.run_briefly()
+
+        self.assertEqual(results, [1])
+
+    def test_call_at(self):
+        """Test call_at scheduling."""
+        results = []
+        now = self.loop.time()
+
+        def callback():
+            results.append(True)
+            self.loop.stop()
+
+        self.loop.call_at(now + 0.05, callback)
+        self.loop.run_forever()
+
+        self.assertEqual(results, [True])
+
+    def test_call_at_past_time(self):
+        """Test call_at with time in the past (should run immediately)."""
+        results = []
+        now = self.loop.time()
+
+        self.loop.call_at(now - 1, lambda: results.append(1))
+
+        self.run_briefly()
+
+        self.assertEqual(results, [1])
+
+    def test_timer_handle_cancelled_property(self):
+        """Test TimerHandle.cancelled() method."""
+        handle = self.loop.call_later(100, lambda: None)
+
+        self.assertFalse(handle.cancelled())
+        handle.cancel()
+        self.assertTrue(handle.cancelled())
+
+
+class _TestRunMethods:
+    """Tests for run_forever, run_until_complete, stop, close."""
+
+    def test_run_until_complete_coroutine(self):
+        """Test run_until_complete with a coroutine."""
+        async def coro():
+            return 42
+
+        result = self.loop.run_until_complete(coro())
+        self.assertEqual(result, 42)
+
+    def test_run_until_complete_future(self):
+        """Test run_until_complete with a future."""
+        future = self.loop.create_future()
+        self.loop.call_soon(future.set_result, 'hello')
+        result = self.loop.run_until_complete(future)
+        self.assertEqual(result, 'hello')
+
+    def test_run_until_complete_task(self):
+        """Test run_until_complete with a task."""
+        async def coro():
+            await asyncio.sleep(0.01)
+            return 'task_result'
+
+        task = self.loop.create_task(coro())
+        result = self.loop.run_until_complete(task)
+        self.assertEqual(result, 'task_result')
+
+    def test_run_forever_stop(self):
+        """Test run_forever and stop."""
+        results = []
+
+        def callback():
+            results.append(1)
+            self.loop.stop()
+
+        self.loop.call_soon(callback)
+        self.loop.run_forever()
+
+        self.assertEqual(results, [1])
+        self.assertFalse(self.loop.is_running())
+
+    def test_stop_before_run(self):
+        """Test calling stop before run_forever."""
+        self.loop.stop()
+        self.loop.run_forever()  # Should return immediately
+
+    def test_close(self):
+        """Test closing the loop."""
+        self.assertFalse(self.loop.is_closed())
+        self.loop.close()
+        self.assertTrue(self.loop.is_closed())
+
+        # Should be idempotent
+        self.loop.close()
+        self.assertTrue(self.loop.is_closed())
+
+    def test_close_running_raises(self):
+        """Test that closing a running loop raises."""
+        async def try_close():
+            with self.assertRaises(RuntimeError):
+                self.loop.close()
+
+        self.loop.run_until_complete(try_close())
+
+    def test_run_until_complete_nested_raises(self):
+        """Test that nested run_until_complete raises."""
+        async def outer():
+            # Use 0.1 to ensure it goes through timer path, not fast path
+            sleep_coro = asyncio.sleep(0.1)
+            try:
+                with self.assertRaises(RuntimeError):
+                    self.loop.run_until_complete(sleep_coro)
+            finally:
+                sleep_coro.close()
+
+        self.loop.run_until_complete(outer())
+
+    def test_run_until_complete_on_closed_raises(self):
+        """Test that run_until_complete on closed loop raises."""
+        self.loop.close()
+
+        async def coro():
+            pass
+
+        c = coro()
+        try:
+            with self.assertRaises(RuntimeError):
+                self.loop.run_until_complete(c)
+        finally:
+            c.close()
+
+    def test_is_running(self):
+        """Test is_running() method."""
+        self.assertFalse(self.loop.is_running())
+
+        async def check():
+            self.assertTrue(self.loop.is_running())
+
+        self.loop.run_until_complete(check())
+        self.assertFalse(self.loop.is_running())
+
+    def test_time(self):
+        """Test time() method returns monotonic time."""
+        t1 = self.loop.time()
+        time.sleep(0.01)
+        t2 = self.loop.time()
+
+        self.assertGreater(t2, t1)
+        self.assertAlmostEqual(t2 - t1, 0.01, places=2)
+
+
+class _TestFuturesAndTasks:
+    """Tests for Future and Task creation."""
+
+    def test_create_future(self):
+        """Test create_future."""
+        future = self.loop.create_future()
+        self.assertIsInstance(future, asyncio.Future)
+        self.assertFalse(future.done())
+
+        self.loop.call_soon(future.set_result, 123)
+        result = self.loop.run_until_complete(future)
+        self.assertEqual(result, 123)
+
+    def test_create_future_exception(self):
+        """Test create_future with exception."""
+        future = self.loop.create_future()
+
+        self.loop.call_soon(
+            future.set_exception,
+            ValueError("test error")
+        )
+
+        with self.assertRaises(ValueError):
+            self.loop.run_until_complete(future)
+
+    def test_create_task(self):
+        """Test create_task."""
+        async def coro():
+            await asyncio.sleep(0.01)
+            return 42
+
+        async def main():
+            task = self.loop.create_task(coro())
+            result = await task
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, 42)
+
+    def test_create_task_with_name(self):
+        """Test create_task with name argument."""
+        async def coro():
+            return 1
+
+        async def main():
+            task = self.loop.create_task(coro(), name='test_task')
+            self.assertEqual(task.get_name(), 'test_task')
+            await task
+
+        self.loop.run_until_complete(main())
+
+    def test_task_cancel(self):
+        """Test task cancellation."""
+        async def long_running():
+            await asyncio.sleep(10)
+
+        async def main():
+            task = self.loop.create_task(long_running())
+            await asyncio.sleep(0.01)
+            task.cancel()
+            with self.assertRaises(asyncio.CancelledError):
+                await task
+
+        self.loop.run_until_complete(main())
+
+    def test_gather(self):
+        """Test asyncio.gather with our loop."""
+        async def task(n):
+            await asyncio.sleep(0.01)
+            return n * 2
+
+        async def main():
+            results = await asyncio.gather(
+                task(1), task(2), task(3)
+            )
+            return results
+
+        results = self.loop.run_until_complete(main())
+        self.assertEqual(results, [2, 4, 6])
+
+    def test_task_factory(self):
+        """Test custom task factory."""
+        factory_calls = []
+
+        def task_factory(loop, coro):
+            factory_calls.append(coro)
+            return asyncio.Task(coro, loop=loop)
+
+        self.loop.set_task_factory(task_factory)
+        self.assertEqual(self.loop.get_task_factory(), task_factory)
+
+        async def coro():
+            return 1
+
+        self.loop.run_until_complete(coro())
+
+        self.assertEqual(len(factory_calls), 1)
+
+        # Reset
+        self.loop.set_task_factory(None)
+        self.assertIsNone(self.loop.get_task_factory())
+
+
+class _TestExceptionHandling:
+    """Tests for exception handling."""
+
+    def test_default_exception_handler(self):
+        """Test default exception handler."""
+        self.loop.set_exception_handler(self.loop_exception_handler)
+
+        def callback():
+            raise ValueError("test error")
+
+        self.loop.call_soon(callback)
+        self.run_briefly()
+
+        self.assertEqual(len(self.exceptions), 1)
+        self.assertIn('exception', self.exceptions[0])
+        self.assertIsInstance(self.exceptions[0]['exception'], ValueError)
+
+    def test_custom_exception_handler(self):
+        """Test custom exception handler."""
+        errors = []
+
+        def handler(loop, context):
+            errors.append(context)
+
+        self.loop.set_exception_handler(handler)
+        self.assertEqual(self.loop.get_exception_handler(), handler)
+
+        def callback():
+            raise RuntimeError("custom test")
+
+        self.loop.call_soon(callback)
+        self.run_briefly()
+
+        self.assertEqual(len(errors), 1)
+        self.assertIsInstance(errors[0]['exception'], RuntimeError)
+
+    def test_call_exception_handler(self):
+        """Test call_exception_handler method."""
+        errors = []
+
+        def handler(loop, context):
+            errors.append(context)
+
+        self.loop.set_exception_handler(handler)
+        self.loop.call_exception_handler({
+            'message': 'test message',
+            'exception': ValueError('test'),
+        })
+
+        self.assertEqual(len(errors), 1)
+        self.assertEqual(errors[0]['message'], 'test message')
+
+
+class _TestDebugMode:
+    """Tests for debug mode."""
+
+    def test_debug_mode_toggle(self):
+        """Test debug mode toggle."""
+        self.assertFalse(self.loop.get_debug())
+        self.loop.set_debug(True)
+        self.assertTrue(self.loop.get_debug())
+        self.loop.set_debug(False)
+        self.assertFalse(self.loop.get_debug())
+
+
+class _TestReaderWriter:
+    """Tests for add_reader/add_writer/remove_reader/remove_writer."""
+
+    def test_add_remove_reader(self):
+        """Test add_reader and remove_reader."""
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.setblocking(False)
+
+        try:
+            results = []
+
+            def reader_callback():
+                results.append('read')
+                self.loop.remove_reader(sock.fileno())
+                self.loop.stop()
+
+            self.loop.add_reader(sock.fileno(), reader_callback)
+
+            # Trigger readability by connecting to a server
+            # For this test, we'll use a timeout approach
+            self.loop.call_later(0.01, self.loop.stop)
+            self.loop.run_forever()
+
+            # Remove reader should return True
+            removed = self.loop.remove_reader(sock.fileno())
+            # May be False if already removed
+            self.assertIn(removed, [True, False])
+
+            # Remove again should return False
+            removed = self.loop.remove_reader(sock.fileno())
+            self.assertFalse(removed)
+
+        finally:
+            sock.close()
+
+    def test_add_remove_writer(self):
+        """Test add_writer and remove_writer."""
+        # Use a socket pair for reliable write readiness
+        rsock, wsock = socket.socketpair()
+        rsock.setblocking(False)
+        wsock.setblocking(False)
+
+        try:
+            results = []
+
+            def writer_callback():
+                results.append('write')
+                self.loop.remove_writer(wsock.fileno())
+                self.loop.stop()
+
+            self.loop.add_writer(wsock.fileno(), writer_callback)
+
+            # Add timeout fallback in case writer doesn't fire immediately
+            self.loop.call_later(0.1, self.loop.stop)
+            self.loop.run_forever()
+
+            # Remove writer if still registered
+            self.loop.remove_writer(wsock.fileno())
+
+            # Socket should be writable immediately (or within timeout)
+            self.assertIn('write', results)
+
+        finally:
+            rsock.close()
+            wsock.close()
+
+
+class _TestAsyncioIntegration:
+    """Tests for integration with asyncio APIs."""
+
+    def test_asyncio_sleep(self):
+        """Test asyncio.sleep works correctly."""
+        async def main():
+            start = time.monotonic()
+            await asyncio.sleep(0.05)
+            elapsed = time.monotonic() - start
+            return elapsed
+
+        elapsed = self.loop.run_until_complete(main())
+        self.assertGreaterEqual(elapsed, 0.04)
+
+    def test_asyncio_sleep_zero_fast_path(self):
+        """Test asyncio.sleep(0) fast path returns immediately."""
+        async def main():
+            start = time.monotonic()
+            # sleep(0) should use fast path and return immediately
+            await asyncio.sleep(0)
+            elapsed = time.monotonic() - start
+            return elapsed
+
+        elapsed = self.loop.run_until_complete(main())
+        # Should complete very quickly (fast path)
+        self.assertLess(elapsed, 0.01)
+
+    def test_asyncio_wait_for(self):
+        """Test asyncio.wait_for."""
+        async def slow():
+            await asyncio.sleep(10)
+
+        async def main():
+            with self.assertRaises(asyncio.TimeoutError):
+                await asyncio.wait_for(slow(), timeout=0.05)
+
+        self.loop.run_until_complete(main())
+
+    def test_asyncio_wait_for_completed(self):
+        """Test asyncio.wait_for with already completed future."""
+        async def fast():
+            return 'done'
+
+        async def main():
+            result = await asyncio.wait_for(fast(), timeout=10)
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, 'done')
+
+    def test_asyncio_shield(self):
+        """Test asyncio.shield."""
+        async def important():
+            await asyncio.sleep(0.01)
+            return "done"
+
+        async def main():
+            task = asyncio.shield(important())
+            result = await task
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, "done")
+
+    def test_asyncio_all_tasks(self):
+        """Test asyncio.all_tasks."""
+        async def bg_task():
+            await asyncio.sleep(1)
+
+        async def main():
+            task = self.loop.create_task(bg_task())
+            await asyncio.sleep(0)
+            all_tasks = asyncio.all_tasks(self.loop)
+            task.cancel()
+            try:
+                await task
+            except asyncio.CancelledError:
+                pass
+            return len(all_tasks)
+
+        count = self.loop.run_until_complete(main())
+        self.assertGreaterEqual(count, 1)
+
+    def test_asyncio_current_task(self):
+        """Test asyncio.current_task."""
+        async def main():
+            current = asyncio.current_task(self.loop)
+            self.assertIsNotNone(current)
+            return current
+
+        task = self.loop.run_until_complete(main())
+        self.assertIsInstance(task, asyncio.Task)
+
+    def test_asyncio_ensure_future(self):
+        """Test asyncio.ensure_future."""
+        async def coro():
+            return 42
+
+        async def main():
+            future = asyncio.ensure_future(coro(), loop=self.loop)
+            result = await future
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, 42)
+
+
+# =============================================================================
+# Test classes that combine mixins with test cases
+# =============================================================================
+
+class TestErlangCallSoon(_TestCallSoon, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOCallSoon(_TestCallSoon, tb.AIOTestCase):
+    pass
+
+
+class TestErlangCallLater(_TestCallLater, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOCallLater(_TestCallLater, tb.AIOTestCase):
+    pass
+
+
+class TestErlangRunMethods(_TestRunMethods, tb.ErlangTestCase):
+    pass
+
+
+class TestAIORunMethods(_TestRunMethods, tb.AIOTestCase):
+    pass
+
+
+class TestErlangFuturesAndTasks(_TestFuturesAndTasks, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOFuturesAndTasks(_TestFuturesAndTasks, tb.AIOTestCase):
+    pass
+
+
+class TestErlangExceptionHandling(_TestExceptionHandling, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOExceptionHandling(_TestExceptionHandling, tb.AIOTestCase):
+    pass
+
+
+class TestErlangDebugMode(_TestDebugMode, tb.ErlangTestCase):
+    pass
+
+
+class TestAIODebugMode(_TestDebugMode, tb.AIOTestCase):
+    pass
+
+
+class TestErlangReaderWriter(_TestReaderWriter, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOReaderWriter(_TestReaderWriter, tb.AIOTestCase):
+    pass
+
+
+class TestErlangAsyncioIntegration(_TestAsyncioIntegration, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOAsyncioIntegration(_TestAsyncioIntegration, tb.AIOTestCase):
+    pass
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/priv/tests/test_context.py b/priv/tests/test_context.py
new file mode 100644
index 0000000..67554f8
--- /dev/null
+++ b/priv/tests/test_context.py
@@ -0,0 +1,348 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Context variable tests adapted from uvloop's test_context.py.
+
+These tests verify context variable functionality:
+- Context propagation in tasks
+- Context isolation between tasks
+- Context in callbacks
+"""
+
+import asyncio
+import contextvars
+import unittest
+
+from . import _testbase as tb
+
+
+# Context variables for testing
+request_id = contextvars.ContextVar('request_id', default=None)
+user_name = contextvars.ContextVar('user_name', default='anonymous')
+
+
+class _TestContextBasic:
+    """Tests for basic context variable functionality."""
+
+    def test_context_in_task(self):
+        """Test context variable in a task."""
+        results = []
+
+        async def task_func():
+            results.append(request_id.get())
+            request_id.set('task_request')
+            results.append(request_id.get())
+
+        async def main():
+            request_id.set('main_request')
+            results.append(request_id.get())
+            await task_func()
+            results.append(request_id.get())
+
+        self.loop.run_until_complete(main())
+
+        self.assertEqual(results, [
+            'main_request',  # Before task
+            'main_request',  # In task, inherited
+            'task_request',  # In task, after set
+            'task_request',  # Back in main, context shared with await
+        ])
+
+    def test_context_in_create_task(self):
+        """Test context variable with create_task."""
+        results = []
+
+        async def task_func():
+            results.append(('task', request_id.get()))
+
+        async def main():
+            request_id.set('main')
+            task = self.loop.create_task(task_func())
+            await task
+            results.append(('main', request_id.get()))
+
+        self.loop.run_until_complete(main())
+
+        self.assertEqual(results, [
+            ('task', 'main'),  # Task inherits context
+            ('main', 'main'),
+        ])
+
+    def test_context_isolation_between_tasks(self):
+        """Test context isolation between concurrent tasks."""
+        results = []
+
+        async def task_func(name, value):
+            request_id.set(value)
+            await asyncio.sleep(0.01)  # Yield to other tasks
+            results.append((name, request_id.get()))
+
+        async def main():
+            await asyncio.gather(
+                task_func('task1', 'value1'),
+                task_func('task2', 'value2'),
+                task_func('task3', 'value3'),
+            )
+
+        self.loop.run_until_complete(main())
+
+        # Each task should see its own value
+        task1_results = [r for r in results if r[0] == 'task1']
+        task2_results = [r for r in results if r[0] == 'task2']
+        task3_results = [r for r in results if r[0] == 'task3']
+
+        self.assertEqual(task1_results, [('task1', 'value1')])
+        self.assertEqual(task2_results, [('task2', 'value2')])
+        self.assertEqual(task3_results, [('task3', 'value3')])
+
+
+class _TestContextCallbacks:
+    """Tests for context in callbacks."""
+
+    def test_context_in_call_soon(self):
+        """Test context variable in call_soon callback."""
+        results = []
+
+        def callback():
+            results.append(request_id.get())
+            self.loop.stop()
+
+        request_id.set('callback_context')
+        ctx = contextvars.copy_context()
+
+        # Clear context
+        request_id.set(None)
+
+        # Schedule with context
+        self.loop.call_soon(callback, context=ctx)
+        self.loop.run_forever()
+
+        self.assertEqual(results, ['callback_context'])
+
+    def test_context_in_call_later(self):
+        """Test context variable in call_later callback."""
+        results = []
+
+        def callback():
+            results.append(request_id.get())
+            self.loop.stop()
+
+        request_id.set('later_context')
+        ctx = contextvars.copy_context()
+
+        request_id.set('different')
+
+        self.loop.call_later(0.01, callback, context=ctx)
+        self.loop.run_forever()
+
+        self.assertEqual(results, ['later_context'])
+
+    def test_context_default_without_context_arg(self):
+        """Test that callbacks use current context when no context arg."""
+        results = []
+
+        def callback():
+            results.append(request_id.get())
+            self.loop.stop()
+
+        request_id.set('current_context')
+
+        self.loop.call_soon(callback)
+        self.loop.run_forever()
+
+        # Should use the current context
+        self.assertIn(results[0], ['current_context', None])
+
+
+class _TestContextCopy:
+    """Tests for context copying behavior."""
+
+    def test_context_copy(self):
+        """Test contextvars.copy_context()."""
+        request_id.set('original')
+
+        ctx = contextvars.copy_context()
+
+        request_id.set('modified')
+
+        # Context copy should have original value
+        self.assertEqual(ctx.get(request_id), 'original')
+        self.assertEqual(request_id.get(), 'modified')
+
+    def test_context_run(self):
+        """Test context.run() method."""
+        results = []
+
+        def func():
+            results.append(request_id.get())
+            request_id.set('inside_run')
+            results.append(request_id.get())
+
+        request_id.set('before_run')
+        ctx = contextvars.copy_context()
+
+        ctx.run(func)
+
+        results.append(request_id.get())  # Should still be 'before_run'
+
+        self.assertEqual(results, [
+            'before_run',   # Inside run, inherited
+            'inside_run',   # Inside run, after set
+            'before_run',   # Outside run, unchanged
+        ])
+
+
+class _TestContextTaskSpecific:
+    """Tests for task-specific context behavior."""
+
+    def test_task_context_argument(self):
+        """Test create_task with context argument (Python 3.11+)."""
+        import sys
+        if sys.version_info < (3, 11):
+            self.skipTest("context argument requires Python 3.11+")
+
+        results = []
+
+        async def task_func():
+            results.append(request_id.get())
+
+        async def main():
+            request_id.set('main')
+            ctx = contextvars.copy_context()
+
+            request_id.set('different')
+
+            task = self.loop.create_task(task_func(), context=ctx)
+            await task
+
+        self.loop.run_until_complete(main())
+
+        self.assertEqual(results, ['main'])
+
+    def test_current_task_context(self):
+        """Test that current_task sees correct context."""
+        results = []
+
+        async def check_context():
+            current = asyncio.current_task()
+            self.assertIsNotNone(current)
+            results.append(request_id.get())
+
+        async def main():
+            request_id.set('task_context')
+            await check_context()
+
+        self.loop.run_until_complete(main())
+
+        self.assertEqual(results, ['task_context'])
+
+
+class _TestContextMultipleVars:
+    """Tests for multiple context variables."""
+
+    def test_multiple_context_vars(self):
+        """Test multiple context variables in same task."""
+        results = []
+
+        async def task_func():
+            results.append((request_id.get(), user_name.get()))
+            request_id.set('new_request')
+            user_name.set('new_user')
+            results.append((request_id.get(), user_name.get()))
+
+        async def main():
+            request_id.set('req1')
+            user_name.set('user1')
+            await task_func()
+            results.append((request_id.get(), user_name.get()))
+
+        self.loop.run_until_complete(main())
+
+        self.assertEqual(results, [
+            ('req1', 'user1'),           # Inherited
+            ('new_request', 'new_user'),  # After modification
+            ('new_request', 'new_user'),  # Back in main, context shared
+        ])
+
+    def test_context_vars_parallel_tasks(self):
+        """Test multiple context vars in parallel tasks."""
+        results = {}
+
+        async def task_func(task_id, req_val, user_val):
+            request_id.set(req_val)
+            user_name.set(user_val)
+            await asyncio.sleep(0.01)
+            results[task_id] = (request_id.get(), user_name.get())
+
+        async def main():
+            await asyncio.gather(
+                task_func('t1', 'req1', 'user1'),
+                task_func('t2', 'req2', 'user2'),
+                task_func('t3', 'req3', 'user3'),
+            )
+
+        self.loop.run_until_complete(main())
+
+        self.assertEqual(results['t1'], ('req1', 'user1'))
+        self.assertEqual(results['t2'], ('req2', 'user2'))
+        self.assertEqual(results['t3'], ('req3', 'user3'))
+
+
+# =============================================================================
+# Test classes that combine mixins with test cases
+# =============================================================================
+
+class TestErlangContextBasic(_TestContextBasic, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOContextBasic(_TestContextBasic, tb.AIOTestCase):
+    pass
+
+
+class TestErlangContextCallbacks(_TestContextCallbacks, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOContextCallbacks(_TestContextCallbacks, tb.AIOTestCase):
+    pass
+
+
+class TestErlangContextCopy(_TestContextCopy, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOContextCopy(_TestContextCopy, tb.AIOTestCase):
+    pass
+
+
+class TestErlangContextTaskSpecific(_TestContextTaskSpecific, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOContextTaskSpecific(_TestContextTaskSpecific, tb.AIOTestCase):
+    pass
+
+
+class TestErlangContextMultipleVars(_TestContextMultipleVars, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOContextMultipleVars(_TestContextMultipleVars, tb.AIOTestCase):
+    pass
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/priv/tests/test_dns.py b/priv/tests/test_dns.py
new file mode 100644
index 0000000..8db3e44
--- /dev/null
+++ b/priv/tests/test_dns.py
@@ -0,0 +1,289 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+DNS resolution tests adapted from uvloop's test_dns.py.
+
+These tests verify DNS operations:
+- getaddrinfo
+- getnameinfo
+"""
+
+import asyncio
+import socket
+import unittest
+
+from . import _testbase as tb
+
+
+class _TestGetaddrinfo:
+    """Tests for getaddrinfo functionality."""
+
+    def test_getaddrinfo_localhost(self):
+        """Test getaddrinfo for localhost."""
+        async def main():
+            result = await self.loop.getaddrinfo(
+                'localhost', 80,
+                family=socket.AF_INET,
+                type=socket.SOCK_STREAM
+            )
+            return result
+
+        result = self.loop.run_until_complete(main())
+
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+
+        # Check structure: (family, type, proto, canonname, sockaddr)
+        family, type_, proto, canonname, sockaddr = result[0]
+        self.assertEqual(family, socket.AF_INET)
+        self.assertEqual(type_, socket.SOCK_STREAM)
+        self.assertIsInstance(sockaddr, tuple)
+        self.assertEqual(len(sockaddr), 2)  # (host, port)
+        self.assertEqual(sockaddr[1], 80)
+
+    def test_getaddrinfo_127_0_0_1(self):
+        """Test getaddrinfo for IP address."""
+        async def main():
+            result = await self.loop.getaddrinfo(
+                '127.0.0.1', 8080,
+                family=socket.AF_INET,
+                type=socket.SOCK_STREAM
+            )
+            return result
+
+        result = self.loop.run_until_complete(main())
+
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+
+        family, type_, proto, canonname, sockaddr = result[0]
+        self.assertEqual(family, socket.AF_INET)
+        self.assertEqual(sockaddr[0], '127.0.0.1')
+        self.assertEqual(sockaddr[1], 8080)
+
+    def test_getaddrinfo_no_port(self):
+        """Test getaddrinfo without port."""
+        async def main():
+            result = await self.loop.getaddrinfo(
+                'localhost', None,
+                family=socket.AF_INET,
+                type=socket.SOCK_STREAM
+            )
+            return result
+
+        result = self.loop.run_until_complete(main())
+
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+
+    def test_getaddrinfo_service_name(self):
+        """Test getaddrinfo with service name."""
+        async def main():
+            result = await self.loop.getaddrinfo(
+                'localhost', 'http',
+                family=socket.AF_INET,
+                type=socket.SOCK_STREAM
+            )
+            return result
+
+        result = self.loop.run_until_complete(main())
+
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+
+        family, type_, proto, canonname, sockaddr = result[0]
+        self.assertEqual(sockaddr[1], 80)  # HTTP port
+
+    def test_getaddrinfo_udp(self):
+        """Test getaddrinfo for UDP."""
+        async def main():
+            result = await self.loop.getaddrinfo(
+                'localhost', 53,
+                family=socket.AF_INET,
+                type=socket.SOCK_DGRAM
+            )
+            return result
+
+        result = self.loop.run_until_complete(main())
+
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+
+        family, type_, proto, canonname, sockaddr = result[0]
+        self.assertEqual(type_, socket.SOCK_DGRAM)
+
+    def test_getaddrinfo_any_family(self):
+        """Test getaddrinfo with any address family."""
+        async def main():
+            result = await self.loop.getaddrinfo(
+                'localhost', 80,
+                family=socket.AF_UNSPEC,
+                type=socket.SOCK_STREAM
+            )
+            return result
+
+        result = self.loop.run_until_complete(main())
+
+        self.assertIsInstance(result, list)
+        self.assertGreater(len(result), 0)
+
+    def test_getaddrinfo_flags(self):
+        """Test getaddrinfo with flags."""
+        async def main():
+            result = await self.loop.getaddrinfo(
+                'localhost', 80,
+                family=socket.AF_INET,
+                type=socket.SOCK_STREAM,
+                flags=socket.AI_PASSIVE
+            )
+            return result
+
+        result = self.loop.run_until_complete(main())
+
+        self.assertIsInstance(result, list)
+
+    def test_getaddrinfo_parallel(self):
+        """Test multiple parallel getaddrinfo calls."""
+        async def main():
+            results = await asyncio.gather(
+                self.loop.getaddrinfo('localhost', 80),
+                self.loop.getaddrinfo('localhost', 443),
+                self.loop.getaddrinfo('127.0.0.1', 8080),
+            )
+            return results
+
+        results = self.loop.run_until_complete(main())
+
+        self.assertEqual(len(results), 3)
+        for result in results:
+            self.assertIsInstance(result, list)
+            self.assertGreater(len(result), 0)
+
+    def test_getaddrinfo_bad_host(self):
+        """Test getaddrinfo with non-existent host."""
+        async def main():
+            with self.assertRaises(socket.gaierror):
+                await self.loop.getaddrinfo(
+                    'invalid.host.that.does.not.exist.example',
+                    80
+                )
+
+        self.loop.run_until_complete(main())
+
+
+class _TestGetnameinfo:
+    """Tests for getnameinfo functionality."""
+
+    def test_getnameinfo_basic(self):
+        """Test getnameinfo for localhost address."""
+        async def main():
+            result = await self.loop.getnameinfo(
+                ('127.0.0.1', 80)
+            )
+            return result
+
+        result = self.loop.run_until_complete(main())
+
+        self.assertIsInstance(result, tuple)
+        self.assertEqual(len(result), 2)
+        host, port = result
+        self.assertIsInstance(host, str)
+        self.assertIsInstance(port, str)
+
+    def test_getnameinfo_numeric(self):
+        """Test getnameinfo with numeric flags."""
+        async def main():
+            result = await self.loop.getnameinfo(
+                ('127.0.0.1', 80),
+                socket.NI_NUMERICHOST | socket.NI_NUMERICSERV
+            )
+            return result
+
+        result = self.loop.run_until_complete(main())
+
+        host, port = result
+        self.assertEqual(host, '127.0.0.1')
+        self.assertEqual(port, '80')
+
+    def test_getnameinfo_ipv6(self):
+        """Test getnameinfo with IPv6 address."""
+        async def main():
+            result = await self.loop.getnameinfo(
+                ('::1', 80),
+                socket.NI_NUMERICHOST | socket.NI_NUMERICSERV
+            )
+            return result
+
+        try:
+            result = self.loop.run_until_complete(main())
+            host, port = result
+            self.assertIn(':', host)  # IPv6 contains colons
+            self.assertEqual(port, '80')
+        except socket.gaierror:
+            # IPv6 may not be available
+            pass
+
+
+class _TestDNSConcurrent:
+    """Tests for concurrent DNS operations."""
+
+    def test_concurrent_getaddrinfo(self):
+        """Test many concurrent getaddrinfo operations."""
+        async def main():
+            tasks = [
+                self.loop.getaddrinfo('localhost', port)
+                for port in range(8000, 8010)
+            ]
+            results = await asyncio.gather(*tasks)
+            return results
+
+        results = self.loop.run_until_complete(main())
+
+        self.assertEqual(len(results), 10)
+        for result in results:
+            self.assertIsInstance(result, list)
+            self.assertGreater(len(result), 0)
+
+
+# =============================================================================
+# Test classes that combine mixins with test cases
+# =============================================================================
+
+class TestErlangGetaddrinfo(_TestGetaddrinfo, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOGetaddrinfo(_TestGetaddrinfo, tb.AIOTestCase):
+    pass
+
+
+class TestErlangGetnameinfo(_TestGetnameinfo, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOGetnameinfo(_TestGetnameinfo, tb.AIOTestCase):
+    pass
+
+
+class TestErlangDNSConcurrent(_TestDNSConcurrent, tb.ErlangTestCase):
+    pass
+
+
+class TestAIODNSConcurrent(_TestDNSConcurrent, tb.AIOTestCase):
+    pass
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/priv/tests/test_erlang_api.py b/priv/tests/test_erlang_api.py
new file mode 100644
index 0000000..7c5f440
--- /dev/null
+++ b/priv/tests/test_erlang_api.py
@@ -0,0 +1,577 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Erlang-specific API tests.
+
+These tests verify the Erlang-specific extensions and compatibility APIs:
+- ErlangEventLoop
+- ErlangEventLoopPolicy
+- erlang.run()
+- erlang.install()
+- erlang.new_event_loop()
+
+The unified 'erlang' module provides both:
+- Callback support: erlang.call(), erlang.async_call(), dynamic function access
+- Event loop API: erlang.run(), erlang.new_event_loop(), erlang.ErlangEventLoop
+"""
+
+import asyncio
+import sys
+import unittest
+import warnings
+
+from . import _testbase as tb
+
+
+def _get_erlang_event_loop():
+    """Get ErlangEventLoop class from available sources."""
+    # Try unified erlang module first
+    try:
+        import erlang
+        if hasattr(erlang, 'ErlangEventLoop'):
+            return erlang.ErlangEventLoop
+    except ImportError:
+        pass
+
+    # Try _erlang_impl package
+    try:
+        from _erlang_impl import ErlangEventLoop
+        return ErlangEventLoop
+    except ImportError:
+        pass
+
+    # Add parent directory to path and try again
+    import os
+    parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    if parent_dir not in sys.path:
+        sys.path.insert(0, parent_dir)
+
+    from _erlang_impl import ErlangEventLoop
+    return ErlangEventLoop
+
+
+def _get_erlang_module():
+    """Get the erlang module with event loop API."""
+    import erlang
+    if hasattr(erlang, 'run'):
+        return erlang
+
+    # Extension not loaded - try to load _erlang_impl manually
+    import os
+    parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    if parent_dir not in sys.path:
+        sys.path.insert(0, parent_dir)
+
+    import _erlang_impl
+    # Manually extend
+    erlang.run = _erlang_impl.run
+    erlang.new_event_loop = _erlang_impl.new_event_loop
+    erlang.install = _erlang_impl.install
+    erlang.EventLoopPolicy = _erlang_impl.EventLoopPolicy
+    erlang.ErlangEventLoop = _erlang_impl.ErlangEventLoop
+    return erlang
+
+
+class TestErlangEventLoopCreation(unittest.TestCase):
+    """Tests for ErlangEventLoop creation and basic properties."""
+
+    def test_create_event_loop(self):
+        """Test creating an ErlangEventLoop."""
+        ErlangEventLoop = _get_erlang_event_loop()
+
+        loop = ErlangEventLoop()
+        self.assertIsInstance(loop, asyncio.AbstractEventLoop)
+        self.assertFalse(loop.is_running())
+        self.assertFalse(loop.is_closed())
+        loop.close()
+        self.assertTrue(loop.is_closed())
+
+    def test_event_loop_implements_interface(self):
+        """Test that ErlangEventLoop implements AbstractEventLoop interface."""
+        ErlangEventLoop = _get_erlang_event_loop()
+
+        loop = ErlangEventLoop()
+        try:
+            # Check required methods exist
+            methods = [
+                'run_forever',
+                'run_until_complete',
+                'stop',
+                'close',
+                'is_running',
+                'is_closed',
+                'call_soon',
+                'call_later',
+                'call_at',
+                'time',
+                'create_future',
+                'create_task',
+                'add_reader',
+                'remove_reader',
+                'add_writer',
+                'remove_writer',
+                'sock_recv',
+                'sock_sendall',
+                'sock_connect',
+                'sock_accept',
+                'create_server',
+                'create_connection',
+                'create_datagram_endpoint',
+                'getaddrinfo',
+                'getnameinfo',
+                'run_in_executor',
+                'set_exception_handler',
+                'get_exception_handler',
+                'get_debug',
+                'set_debug',
+            ]
+
+            for method in methods:
+                self.assertTrue(
+                    hasattr(loop, method),
+                    f"ErlangEventLoop missing method: {method}"
+                )
+                self.assertTrue(
+                    callable(getattr(loop, method)),
+                    f"ErlangEventLoop.{method} is not callable"
+                )
+        finally:
+            loop.close()
+
+
+def _get_event_loop_policy():
+    """Get ErlangEventLoopPolicy class from available sources."""
+    # Try unified erlang module first
+    try:
+        import erlang
+        if hasattr(erlang, 'EventLoopPolicy'):
+            return erlang.EventLoopPolicy
+    except ImportError:
+        pass
+
+    # Try _erlang_impl package
+    try:
+        from _erlang_impl._policy import ErlangEventLoopPolicy
+        return ErlangEventLoopPolicy
+    except ImportError:
+        pass
+
+    # Add parent directory to path and try again
+    import os
+    parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    if parent_dir not in sys.path:
+        sys.path.insert(0, parent_dir)
+
+    from _erlang_impl._policy import ErlangEventLoopPolicy
+    return ErlangEventLoopPolicy
+
+
+class TestErlangEventLoopPolicy(unittest.TestCase):
+    """Tests for ErlangEventLoopPolicy."""
+
+    def test_policy_creates_erlang_loop(self):
+        """Test that policy creates ErlangEventLoop instances."""
+        ErlangEventLoop = _get_erlang_event_loop()
+        ErlangEventLoopPolicy = _get_event_loop_policy()
+
+        policy = ErlangEventLoopPolicy()
+        loop = policy.new_event_loop()
+
+        try:
+            self.assertIsInstance(loop, ErlangEventLoop)
+        finally:
+            loop.close()
+
+    def test_policy_get_event_loop(self):
+        """Test policy.get_event_loop() method."""
+        ErlangEventLoopPolicy = _get_event_loop_policy()
+
+        policy = ErlangEventLoopPolicy()
+        old_policy = asyncio.get_event_loop_policy()
+
+        try:
+            asyncio.set_event_loop_policy(policy)
+            loop = policy.new_event_loop()
+            policy.set_event_loop(loop)
+
+            # get_event_loop should return the set loop
+            retrieved = policy.get_event_loop()
+            self.assertIs(retrieved, loop)
+
+            loop.close()
+        finally:
+            asyncio.set_event_loop_policy(old_policy)
+
+
+class TestErlangModuleFunctions(unittest.TestCase):
+    """Tests for erlang module functions."""
+
+    def test_new_event_loop(self):
+        """Test erlang.new_event_loop() function."""
+        erlang = _get_erlang_module()
+        ErlangEventLoop = _get_erlang_event_loop()
+
+        loop = erlang.new_event_loop()
+        try:
+            self.assertIsInstance(loop, ErlangEventLoop)
+        finally:
+            loop.close()
+
+    def test_run_function(self):
+        """Test erlang.run() function."""
+        erlang = _get_erlang_module()
+
+        async def main():
+            return 42
+
+        result = erlang.run(main())
+        self.assertEqual(result, 42)
+
+    def test_run_with_debug(self):
+        """Test erlang.run() with debug flag."""
+        erlang = _get_erlang_module()
+
+        async def main():
+            return 'debug_test'
+
+        result = erlang.run(main(), debug=True)
+        self.assertEqual(result, 'debug_test')
+
+    def test_install_function(self):
+        """Test erlang.install() function."""
+        erlang = _get_erlang_module()
+
+        old_policy = asyncio.get_event_loop_policy()
+
+        try:
+            if sys.version_info >= (3, 12):
+                # Should emit deprecation warning
+                with warnings.catch_warnings(record=True) as w:
+                    warnings.simplefilter("always")
+                    erlang.install()
+                    self.assertTrue(len(w) >= 1)
+                    self.assertTrue(
+                        any(issubclass(warning.category, DeprecationWarning)
+                            for warning in w)
+                    )
+            else:
+                erlang.install()
+
+            # Policy should be ErlangEventLoopPolicy
+            policy = asyncio.get_event_loop_policy()
+            self.assertIsInstance(policy, erlang.EventLoopPolicy)
+
+        finally:
+            asyncio.set_event_loop_policy(old_policy)
+
+
+class TestErlangLoopSpecificFeatures(tb.ErlangTestCase):
+    """Tests for Erlang-specific event loop features."""
+
+    def test_time_resolution(self):
+        """Test time resolution."""
+        t1 = self.loop.time()
+        t2 = self.loop.time()
+
+        # Times should be monotonic
+        self.assertGreaterEqual(t2, t1)
+
+    def test_shutdown_asyncgens(self):
+        """Test shutdown_asyncgens method."""
+        async def main():
+            await self.loop.shutdown_asyncgens()
+
+        self.loop.run_until_complete(main())
+
+    def test_shutdown_default_executor(self):
+        """Test shutdown_default_executor method."""
+        # First use the executor
+        def blocking():
+            return 42
+
+        async def main():
+            result = await self.loop.run_in_executor(None, blocking)
+            await self.loop.shutdown_default_executor()
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, 42)
+
+
+class TestUVLoopCompatibility(tb.ErlangTestCase):
+    """Tests for uvloop API compatibility."""
+
+    def test_uvloop_like_api(self):
+        """Test that erlang module provides uvloop-like API."""
+        erlang = _get_erlang_module()
+
+        # uvloop provides these
+        self.assertTrue(hasattr(erlang, 'run'))
+        self.assertTrue(hasattr(erlang, 'new_event_loop'))
+        self.assertTrue(hasattr(erlang, 'install'))
+        self.assertTrue(hasattr(erlang, 'EventLoopPolicy'))
+
+    def test_event_loop_is_asyncio_compatible(self):
+        """Test that ErlangEventLoop works with asyncio functions."""
+        async def main():
+            # Test asyncio.sleep
+            await asyncio.sleep(0.01)
+
+            # Test asyncio.gather
+            results = await asyncio.gather(
+                asyncio.sleep(0.01, result=1),
+                asyncio.sleep(0.01, result=2),
+            )
+            return results
+
+        results = self.loop.run_until_complete(main())
+        self.assertEqual(results, [1, 2])
+
+    def test_drop_in_replacement(self):
+        """Test that ErlangEventLoop can be used as drop-in replacement."""
+        # Create a function that uses asyncio APIs
+        async def typical_async_code():
+            # Task creation
+            task = asyncio.create_task(asyncio.sleep(0.01, result='task'))
+
+            # Future
+            future = self.loop.create_future()
+            self.loop.call_soon(future.set_result, 'future')
+
+            # Gather results
+            task_result = await task
+            future_result = await future
+
+            return task_result, future_result
+
+        results = self.loop.run_until_complete(typical_async_code())
+        self.assertEqual(results, ('task', 'future'))
+
+
+def _get_mode_module():
+    """Get the mode module for execution mode detection."""
+    # Try unified erlang module first
+    try:
+        import erlang
+        if hasattr(erlang, 'detect_mode') and hasattr(erlang, 'ExecutionMode'):
+            return erlang
+    except ImportError:
+        pass
+
+    # Try _erlang_impl package
+    try:
+        from _erlang_impl._mode import detect_mode, ExecutionMode
+        class _ModeModule:
+            detect_mode = detect_mode
+            ExecutionMode = ExecutionMode
+        return _ModeModule()
+    except ImportError:
+        pass
+
+    # Add parent directory to path and try again
+    import os
+    parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+    if parent_dir not in sys.path:
+        sys.path.insert(0, parent_dir)
+
+    from _erlang_impl._mode import detect_mode, ExecutionMode
+    class _ModeModule:
+        detect_mode = detect_mode
+        ExecutionMode = ExecutionMode
+    return _ModeModule()
+
+
+class TestExecutionMode(unittest.TestCase):
+    """Tests for execution mode detection."""
+
+    def test_detect_mode(self):
+        """Test execution mode detection."""
+        mode_module = _get_mode_module()
+        mode = mode_module.detect_mode()
+        self.assertIsInstance(mode, mode_module.ExecutionMode)
+
+    def test_execution_modes_defined(self):
+        """Test that execution modes are defined."""
+        mode_module = _get_mode_module()
+        ExecutionMode = mode_module.ExecutionMode
+
+        # Check that expected modes exist
+        self.assertTrue(hasattr(ExecutionMode, 'SHARED_GIL'))
+        self.assertTrue(hasattr(ExecutionMode, 'SUBINTERP'))
+        self.assertTrue(hasattr(ExecutionMode, 'FREE_THREADED'))
+
+
+class TestEventLoopPolicy(unittest.TestCase):
+    """Test event loop policy installation."""
+
+    def setUp(self):
+        # Save original policy
+        self._original_policy = asyncio.get_event_loop_policy()
+
+    def tearDown(self):
+        # Restore original policy
+        asyncio.set_event_loop_policy(self._original_policy)
+
+    def test_set_event_loop_policy(self):
+        """Test: asyncio.set_event_loop_policy(erlang.EventLoopPolicy())"""
+        erlang = _get_erlang_module()
+
+        # Install erlang event loop policy
+        asyncio.set_event_loop_policy(erlang.EventLoopPolicy())
+
+        # Verify policy is installed
+        policy = asyncio.get_event_loop_policy()
+        self.assertIsInstance(policy, erlang.EventLoopPolicy)
+
+        # Verify new loops use Erlang implementation
+        loop = asyncio.new_event_loop()
+        try:
+            # Run a simple coroutine
+            async def simple():
+                await asyncio.sleep(0.01)
+                return 42
+
+            result = loop.run_until_complete(simple())
+            self.assertEqual(result, 42)
+        finally:
+            loop.close()
+
+    def test_asyncio_run_with_policy(self):
+        """Test: asyncio.run() with erlang policy installed."""
+        erlang = _get_erlang_module()
+        asyncio.set_event_loop_policy(erlang.EventLoopPolicy())
+
+        async def main():
+            await asyncio.sleep(0.01)
+            return "done"
+
+        result = asyncio.run(main())
+        self.assertEqual(result, "done")
+
+
+class TestManualLoopSetup(unittest.TestCase):
+    """Test manual event loop setup."""
+
+    def test_new_event_loop(self):
+        """Test: erlang.new_event_loop() creates working loop."""
+        erlang = _get_erlang_module()
+
+        loop = erlang.new_event_loop()
+        try:
+            self.assertFalse(loop.is_closed())
+            self.assertFalse(loop.is_running())
+
+            async def simple():
+                return 123
+
+            result = loop.run_until_complete(simple())
+            self.assertEqual(result, 123)
+        finally:
+            loop.close()
+            self.assertTrue(loop.is_closed())
+
+    def test_set_event_loop_manual(self):
+        """Test: asyncio.set_event_loop(erlang.new_event_loop())"""
+        erlang = _get_erlang_module()
+
+        loop = erlang.new_event_loop()
+        try:
+            asyncio.set_event_loop(loop)
+
+            # Verify it's the current loop (in Python 3.10+, this may raise a warning)
+            try:
+                current = asyncio.get_event_loop()
+                self.assertIs(current, loop)
+            except DeprecationWarning:
+                pass  # Python 3.12+ deprecates get_event_loop without running loop
+
+            async def with_sleep():
+                await asyncio.sleep(0.01)
+                return "slept"
+
+            result = loop.run_until_complete(with_sleep())
+            self.assertEqual(result, "slept")
+        finally:
+            asyncio.set_event_loop(None)
+            loop.close()
+
+    def test_timers_work(self):
+        """Test that call_later/asyncio.sleep use Erlang timers."""
+        erlang = _get_erlang_module()
+
+        loop = erlang.new_event_loop()
+        try:
+            results = []
+
+            def callback(x):
+                results.append(x)
+                if x == 3:
+                    loop.stop()
+
+            # Schedule out of order - should execute in time order
+            loop.call_later(0.03, callback, 3)
+            loop.call_later(0.01, callback, 1)
+            loop.call_later(0.02, callback, 2)
+
+            loop.run_forever()
+            self.assertEqual(results, [1, 2, 3])
+        finally:
+            loop.close()
+
+
+class TestErlangRun(unittest.TestCase):
+    """Test erlang.run() function."""
+
+    def test_run_simple(self):
+        """Test: erlang.run(coro)"""
+        erlang = _get_erlang_module()
+
+        async def simple():
+            return 42
+
+        result = erlang.run(simple())
+        self.assertEqual(result, 42)
+
+    def test_run_with_sleep(self):
+        """Test: erlang.run() with asyncio.sleep()"""
+        erlang = _get_erlang_module()
+
+        async def with_sleep():
+            await asyncio.sleep(0.05)
+            return "done"
+
+        result = erlang.run(with_sleep())
+        self.assertEqual(result, "done")
+
+    def test_run_concurrent_tasks(self):
+        """Test: erlang.run() with concurrent tasks."""
+        erlang = _get_erlang_module()
+
+        async def task(n):
+            await asyncio.sleep(0.01)
+            return n * 2
+
+        async def main():
+            results = await asyncio.gather(
+                task(1), task(2), task(3)
+            )
+            return results
+
+        result = erlang.run(main())
+        self.assertEqual(result, [2, 4, 6])
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/priv/tests/test_executors.py b/priv/tests/test_executors.py
new file mode 100644
index 0000000..79fcf82
--- /dev/null
+++ b/priv/tests/test_executors.py
@@ -0,0 +1,335 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Executor tests adapted from uvloop's test_executors.py.
+
+These tests verify executor functionality:
+- run_in_executor with default executor
+- run_in_executor with custom executor
+- Executor task cancellation
+"""
+
+import asyncio
+import concurrent.futures
+import multiprocessing
+import threading
+import time
+import unittest
+
+from . import _testbase as tb
+
+# Use 'spawn' instead of 'fork' for ProcessPoolExecutor to avoid
+# corrupting the Erlang VM when running inside the NIF
+_mp_context = multiprocessing.get_context('spawn')
+
+
+class _TestRunInExecutor:
+    """Tests for run_in_executor functionality."""
+
+    def test_run_in_executor_basic(self):
+        """Test basic run_in_executor usage."""
+        def blocking_func(x):
+            time.sleep(0.01)
+            return x * 2
+
+        async def main():
+            result = await self.loop.run_in_executor(None, blocking_func, 21)
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, 42)
+
+    def test_run_in_executor_multiple(self):
+        """Test multiple run_in_executor calls."""
+        def blocking_func(x):
+            time.sleep(0.01)
+            return x
+
+        async def main():
+            tasks = [
+                self.loop.run_in_executor(None, blocking_func, i)
+                for i in range(5)
+            ]
+            results = await asyncio.gather(*tasks)
+            return results
+
+        results = self.loop.run_until_complete(main())
+        self.assertEqual(sorted(results), [0, 1, 2, 3, 4])
+
+    def test_run_in_executor_thread(self):
+        """Test that run_in_executor runs in different thread."""
+        main_thread_id = threading.get_ident()
+        executor_thread_id = []
+
+        def get_thread_id():
+            executor_thread_id.append(threading.get_ident())
+            return threading.get_ident()
+
+        async def main():
+            result = await self.loop.run_in_executor(None, get_thread_id)
+            return result
+
+        result = self.loop.run_until_complete(main())
+
+        self.assertEqual(len(executor_thread_id), 1)
+        self.assertNotEqual(executor_thread_id[0], main_thread_id)
+
+    def test_run_in_executor_exception(self):
+        """Test run_in_executor with exception."""
+        def failing_func():
+            raise ValueError("test error")
+
+        async def main():
+            with self.assertRaises(ValueError):
+                await self.loop.run_in_executor(None, failing_func)
+
+        self.loop.run_until_complete(main())
+
+    def test_run_in_executor_cpu_bound(self):
+        """Test run_in_executor with CPU-bound work."""
+        def cpu_bound():
+            total = 0
+            for i in range(100000):
+                total += i
+            return total
+
+        async def main():
+            result = await self.loop.run_in_executor(None, cpu_bound)
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, sum(range(100000)))
+
+
+class _TestCustomExecutor:
+    """Tests for run_in_executor with custom executor."""
+
+    def test_run_in_custom_executor(self):
+        """Test run_in_executor with custom ThreadPoolExecutor."""
+        executor = concurrent.futures.ThreadPoolExecutor(max_workers=2)
+
+        def blocking_func(x):
+            time.sleep(0.01)
+            return x * 3
+
+        async def main():
+            result = await self.loop.run_in_executor(executor, blocking_func, 10)
+            return result
+
+        try:
+            result = self.loop.run_until_complete(main())
+            self.assertEqual(result, 30)
+        finally:
+            executor.shutdown(wait=True)
+
+    def test_set_default_executor(self):
+        """Test set_default_executor method."""
+        executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
+        executor_used = []
+
+        original_submit = executor.submit
+
+        def tracked_submit(*args, **kwargs):
+            executor_used.append(True)
+            return original_submit(*args, **kwargs)
+
+        executor.submit = tracked_submit
+
+        def blocking_func():
+            return 42
+
+        async def main():
+            self.loop.set_default_executor(executor)
+            result = await self.loop.run_in_executor(None, blocking_func)
+            return result
+
+        try:
+            result = self.loop.run_until_complete(main())
+            self.assertEqual(result, 42)
+            self.assertTrue(executor_used)
+        finally:
+            executor.shutdown(wait=True)
+
+    def test_process_pool_executor(self):
+        """Test run_in_executor with ProcessPoolExecutor.
+
+        Note: This test is skipped when running inside the Erlang NIF
+        because multiprocessing doesn't work well with the embedded Python.
+        """
+        # Skip when running inside Erlang NIF - multiprocessing is problematic
+        try:
+            import py_event_loop
+            self.skipTest("ProcessPoolExecutor not supported inside Erlang NIF")
+        except ImportError:
+            pass  # Not running inside NIF, test is OK
+
+        def cpu_bound(n):
+            return sum(range(n))
+
+        async def main():
+            # Use spawn context to avoid forking
+            executor = concurrent.futures.ProcessPoolExecutor(
+                max_workers=1, mp_context=_mp_context
+            )
+            try:
+                result = await self.loop.run_in_executor(
+                    executor, cpu_bound, 10000
+                )
+                return result
+            finally:
+                executor.shutdown(wait=True)
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, sum(range(10000)))
+
+
+class _TestExecutorConcurrency:
+    """Tests for executor concurrency behavior."""
+
+    def test_executor_concurrent_calls(self):
+        """Test concurrent executor calls."""
+        start_times = []
+        end_times = []
+        lock = threading.Lock()
+
+        def track_timing(x):
+            with lock:
+                start_times.append(time.monotonic())
+            time.sleep(0.05)
+            with lock:
+                end_times.append(time.monotonic())
+            return x
+
+        async def main():
+            tasks = [
+                self.loop.run_in_executor(None, track_timing, i)
+                for i in range(3)
+            ]
+            results = await asyncio.gather(*tasks)
+            return results
+
+        results = self.loop.run_until_complete(main())
+
+        self.assertEqual(sorted(results), [0, 1, 2])
+        # Check that tasks ran concurrently (starts overlap with other ends)
+        self.assertEqual(len(start_times), 3)
+        self.assertEqual(len(end_times), 3)
+
+    def test_executor_mixed_with_async(self):
+        """Test mixing executor calls with async operations."""
+        results = []
+
+        def blocking_work(x):
+            time.sleep(0.02)
+            return f"blocking:{x}"
+
+        async def async_work(x):
+            await asyncio.sleep(0.01)
+            return f"async:{x}"
+
+        async def main():
+            tasks = [
+                self.loop.run_in_executor(None, blocking_work, 1),
+                async_work(2),
+                self.loop.run_in_executor(None, blocking_work, 3),
+                async_work(4),
+            ]
+            return await asyncio.gather(*tasks)
+
+        results = self.loop.run_until_complete(main())
+
+        self.assertEqual(len(results), 4)
+        self.assertIn('blocking:1', results)
+        self.assertIn('async:2', results)
+        self.assertIn('blocking:3', results)
+        self.assertIn('async:4', results)
+
+
+class _TestExecutorCancel:
+    """Tests for executor task cancellation."""
+
+    def test_executor_cancel_pending(self):
+        """Test cancelling a pending executor task."""
+        started = threading.Event()
+        can_continue = threading.Event()
+
+        def slow_func():
+            started.set()
+            can_continue.wait(timeout=10)
+            return 42
+
+        async def main():
+            # Start a task that blocks
+            task1 = self.loop.run_in_executor(None, slow_func)
+
+            # Wait for it to start
+            for _ in range(100):
+                if started.is_set():
+                    break
+                await asyncio.sleep(0.01)
+
+            # This task will be pending in the executor queue
+            task2 = self.loop.run_in_executor(None, lambda: 99)
+
+            # Let first task complete
+            can_continue.set()
+
+            result1 = await task1
+            result2 = await task2
+
+            return result1, result2
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, (42, 99))
+
+
+# =============================================================================
+# Test classes that combine mixins with test cases
+# =============================================================================
+
+class TestErlangRunInExecutor(_TestRunInExecutor, tb.ErlangTestCase):
+    pass
+
+
+class TestAIORunInExecutor(_TestRunInExecutor, tb.AIOTestCase):
+    pass
+
+
+class TestErlangCustomExecutor(_TestCustomExecutor, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOCustomExecutor(_TestCustomExecutor, tb.AIOTestCase):
+    pass
+
+
+class TestErlangExecutorConcurrency(_TestExecutorConcurrency, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOExecutorConcurrency(_TestExecutorConcurrency, tb.AIOTestCase):
+    pass
+
+
+class TestErlangExecutorCancel(_TestExecutorCancel, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOExecutorCancel(_TestExecutorCancel, tb.AIOTestCase):
+    pass
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/priv/tests/test_process.py b/priv/tests/test_process.py
new file mode 100644
index 0000000..9ee5a03
--- /dev/null
+++ b/priv/tests/test_process.py
@@ -0,0 +1,91 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Subprocess/process tests for ErlangEventLoop.
+
+These tests verify that dangerous subprocess operations are blocked
+when running inside the Erlang VM via audit hooks.
+"""
+
+import asyncio
+import os
+import subprocess
+import sys
+import unittest
+
+from . import _testbase as tb
+
+
+class TestErlangSubprocessBlocked(tb.ErlangTestCase):
+    """Verify subprocess is blocked via event loop or audit hooks."""
+
+    def test_asyncio_subprocess_shell_blocked(self):
+        """Test asyncio.create_subprocess_shell is blocked."""
+        async def main():
+            await asyncio.create_subprocess_shell('echo hello')
+
+        # NotImplementedError from ErlangEventLoop._subprocess, or RuntimeError from audit hook
+        with self.assertRaises((NotImplementedError, RuntimeError)):
+            self.loop.run_until_complete(main())
+
+    def test_asyncio_subprocess_exec_blocked(self):
+        """Test asyncio.create_subprocess_exec is blocked."""
+        async def main():
+            await asyncio.create_subprocess_exec('echo', 'hello')
+
+        # NotImplementedError from ErlangEventLoop._subprocess, or RuntimeError from audit hook
+        with self.assertRaises((NotImplementedError, RuntimeError)):
+            self.loop.run_until_complete(main())
+
+
+class TestErlangOsBlocked(tb.ErlangTestCase):
+    """Verify os.* process functions are blocked."""
+
+    @unittest.skipUnless(hasattr(os, 'fork'), "fork not available")
+    def test_os_fork_blocked(self):
+        """Test os.fork is blocked."""
+        with self.assertRaises(RuntimeError) as cm:
+            os.fork()
+        self.assertIn('blocked', str(cm.exception).lower())
+
+    def test_os_system_blocked(self):
+        """Test os.system is blocked."""
+        with self.assertRaises(RuntimeError) as cm:
+            os.system('echo hello')
+        self.assertIn('blocked', str(cm.exception).lower())
+
+    def test_os_popen_blocked(self):
+        """Test os.popen is blocked."""
+        with self.assertRaises(RuntimeError) as cm:
+            os.popen('echo hello')
+        self.assertIn('blocked', str(cm.exception).lower())
+
+    @unittest.skipUnless(hasattr(os, 'execv'), "execv not available")
+    def test_os_execv_blocked(self):
+        """Test os.execv is blocked."""
+        with self.assertRaises(RuntimeError) as cm:
+            os.execv('/bin/echo', ['echo', 'hello'])
+        self.assertIn('blocked', str(cm.exception).lower())
+
+    @unittest.skipUnless(hasattr(os, 'spawnl'), "spawnl not available")
+    def test_os_spawnl_blocked(self):
+        """Test os.spawnl is blocked."""
+        with self.assertRaises(RuntimeError) as cm:
+            os.spawnl(os.P_WAIT, '/bin/echo', 'echo', 'hello')
+        self.assertIn('blocked', str(cm.exception).lower())
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/priv/tests/test_sockets.py b/priv/tests/test_sockets.py
new file mode 100644
index 0000000..8c17f9a
--- /dev/null
+++ b/priv/tests/test_sockets.py
@@ -0,0 +1,436 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Socket operations tests adapted from uvloop's test_sockets.py.
+
+These tests verify low-level socket operations:
+- sock_recv, sock_recv_into
+- sock_sendall
+- sock_connect
+- sock_accept
+"""
+
+import asyncio
+import socket
+import threading
+import time
+import unittest
+
+from . import _testbase as tb
+
+
+class _TestSockets:
+    """Tests for low-level socket operations."""
+
+    def test_sock_connect_recv_send(self):
+        """Test sock_connect, sock_recv, sock_sendall."""
+        port = tb.find_free_port()
+        received = []
+        server_ready = threading.Event()
+
+        def server_thread():
+            server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+            server.bind(('127.0.0.1', port))
+            server.listen(1)
+            server_ready.set()
+            conn, _ = server.accept()
+            data = conn.recv(1024)
+            received.append(data)
+            conn.sendall(b'pong')
+            conn.close()
+            server.close()
+
+        thread = threading.Thread(target=server_thread)
+        thread.start()
+        server_ready.wait(timeout=5)
+
+        async def client():
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.setblocking(False)
+            await self.loop.sock_connect(sock, ('127.0.0.1', port))
+            await self.loop.sock_sendall(sock, b'ping')
+            data = await self.loop.sock_recv(sock, 1024)
+            sock.close()
+            return data
+
+        result = self.loop.run_until_complete(client())
+        thread.join(timeout=5)
+
+        self.assertEqual(result, b'pong')
+        self.assertEqual(received, [b'ping'])
+
+    def test_sock_accept(self):
+        """Test sock_accept."""
+        server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        server.bind(('127.0.0.1', 0))
+        server.listen(1)
+        server.setblocking(False)
+        port = server.getsockname()[1]
+
+        def client_thread():
+            time.sleep(0.05)
+            client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            client.connect(('127.0.0.1', port))
+            client.sendall(b'hello')
+            client.close()
+
+        thread = threading.Thread(target=client_thread)
+        thread.start()
+
+        async def accept():
+            conn, addr = await self.loop.sock_accept(server)
+            data = await self.loop.sock_recv(conn, 1024)
+            conn.close()
+            server.close()
+            return data
+
+        result = self.loop.run_until_complete(accept())
+        thread.join(timeout=5)
+
+        self.assertEqual(result, b'hello')
+
+    def test_sock_recv_into(self):
+        """Test sock_recv_into with buffer."""
+        port = tb.find_free_port()
+        server_ready = threading.Event()
+
+        def server_thread():
+            server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+            server.bind(('127.0.0.1', port))
+            server.listen(1)
+            server_ready.set()
+            conn, _ = server.accept()
+            conn.sendall(b'hello world')
+            conn.close()
+            server.close()
+
+        thread = threading.Thread(target=server_thread)
+        thread.start()
+        server_ready.wait(timeout=5)
+
+        async def client():
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.setblocking(False)
+            await self.loop.sock_connect(sock, ('127.0.0.1', port))
+
+            buf = bytearray(1024)
+            nbytes = await self.loop.sock_recv_into(sock, buf)
+            sock.close()
+            return buf[:nbytes]
+
+        result = self.loop.run_until_complete(client())
+        thread.join(timeout=5)
+
+        self.assertEqual(bytes(result), b'hello world')
+
+    def test_sock_sendall_large(self):
+        """Test sock_sendall with large data."""
+        port = tb.find_free_port()
+        received = bytearray()
+        server_ready = threading.Event()
+        server_done = threading.Event()
+        data_size = 1024 * 1024  # 1 MB
+
+        def server_thread():
+            server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+            server.bind(('127.0.0.1', port))
+            server.listen(1)
+            server_ready.set()
+
+            conn, _ = server.accept()
+            while True:
+                data = conn.recv(65536)
+                if not data:
+                    break
+                received.extend(data)
+            conn.close()
+            server.close()
+            server_done.set()
+
+        thread = threading.Thread(target=server_thread)
+        thread.start()
+        server_ready.wait(timeout=5)
+
+        async def client():
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.setblocking(False)
+            await self.loop.sock_connect(sock, ('127.0.0.1', port))
+            data = b'x' * data_size
+            await self.loop.sock_sendall(sock, data)
+            sock.close()
+            return len(data)
+
+        sent = self.loop.run_until_complete(client())
+        server_done.wait(timeout=10)
+        thread.join(timeout=5)
+
+        self.assertEqual(sent, data_size)
+        self.assertEqual(len(received), data_size)
+
+    def test_sock_connect_timeout(self):
+        """Test sock_connect with timeout."""
+        async def try_connect():
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.setblocking(False)
+            try:
+                # Connect to a non-routable address to trigger timeout
+                # Some networks may refuse quickly, so accept either error
+                with self.assertRaises((asyncio.TimeoutError, OSError)):
+                    await asyncio.wait_for(
+                        self.loop.sock_connect(sock, ('10.255.255.1', 12345)),
+                        timeout=0.5
+                    )
+            finally:
+                sock.close()
+
+        self.loop.run_until_complete(try_connect())
+
+    def test_sock_connect_refused(self):
+        """Test sock_connect with connection refused."""
+        async def try_connect():
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.setblocking(False)
+            try:
+                # Connect to a port that should refuse
+                with self.assertRaises(OSError):
+                    await self.loop.sock_connect(sock, ('127.0.0.1', 1))
+            finally:
+                sock.close()
+
+        self.loop.run_until_complete(try_connect())
+
+    def test_sock_multiple_clients(self):
+        """Test multiple clients connecting simultaneously."""
+        port = tb.find_free_port()
+        server_ready = threading.Event()
+        num_clients = 5
+        received = []
+
+        def server_thread():
+            server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+            server.bind(('127.0.0.1', port))
+            server.listen(num_clients)
+            server_ready.set()
+
+            for _ in range(num_clients):
+                conn, _ = server.accept()
+                data = conn.recv(1024)
+                received.append(data)
+                conn.sendall(b'ack:' + data)
+                conn.close()
+            server.close()
+
+        thread = threading.Thread(target=server_thread)
+        thread.start()
+        server_ready.wait(timeout=5)
+
+        async def client(client_id):
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.setblocking(False)
+            await self.loop.sock_connect(sock, ('127.0.0.1', port))
+            await self.loop.sock_sendall(sock, f'client{client_id}'.encode())
+            data = await self.loop.sock_recv(sock, 1024)
+            sock.close()
+            return data
+
+        async def main():
+            tasks = [client(i) for i in range(num_clients)]
+            return await asyncio.gather(*tasks)
+
+        results = self.loop.run_until_complete(main())
+        thread.join(timeout=5)
+
+        self.assertEqual(len(results), num_clients)
+        self.assertEqual(len(received), num_clients)
+
+    def test_sock_echo(self):
+        """Test echo server pattern."""
+        port = tb.find_free_port()
+        server_ready = threading.Event()
+        messages = [b'hello', b'world', b'test', b'message']
+
+        def server_thread():
+            server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+            server.bind(('127.0.0.1', port))
+            server.listen(1)
+            server_ready.set()
+
+            conn, _ = server.accept()
+            try:
+                while True:
+                    data = conn.recv(1024)
+                    if not data:
+                        break
+                    conn.sendall(data)
+            finally:
+                conn.close()
+                server.close()
+
+        thread = threading.Thread(target=server_thread)
+        thread.start()
+        server_ready.wait(timeout=5)
+
+        async def client():
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.setblocking(False)
+            await self.loop.sock_connect(sock, ('127.0.0.1', port))
+
+            results = []
+            for msg in messages:
+                await self.loop.sock_sendall(sock, msg)
+                data = await self.loop.sock_recv(sock, 1024)
+                results.append(data)
+
+            sock.close()
+            return results
+
+        results = self.loop.run_until_complete(client())
+        thread.join(timeout=5)
+
+        self.assertEqual(results, messages)
+
+
+class _TestSocketsCancel:
+    """Tests for socket operation cancellation."""
+
+    def test_sock_recv_cancel(self):
+        """Test cancelling sock_recv."""
+        server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        server.bind(('127.0.0.1', 0))
+        server.listen(1)
+        server.setblocking(False)
+        port = server.getsockname()[1]
+
+        # Client that connects but doesn't send
+        client = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        client.connect(('127.0.0.1', port))
+
+        async def do_accept_and_recv():
+            conn, _ = await self.loop.sock_accept(server)
+            try:
+                # This should block since client doesn't send
+                recv_task = asyncio.create_task(
+                    self.loop.sock_recv(conn, 1024)
+                )
+                await asyncio.sleep(0.05)
+                recv_task.cancel()
+                with self.assertRaises(asyncio.CancelledError):
+                    await recv_task
+            finally:
+                conn.close()
+
+        try:
+            self.loop.run_until_complete(do_accept_and_recv())
+        finally:
+            client.close()
+            server.close()
+
+    def test_sock_sendall_cancel(self):
+        """Test cancelling sock_sendall with large data."""
+        port = tb.find_free_port()
+        server_ready = threading.Event()
+
+        def server_thread():
+            server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+            server.bind(('127.0.0.1', port))
+            server.listen(1)
+            server_ready.set()
+
+            conn, _ = server.accept()
+            # Read slowly to cause backpressure
+            time.sleep(0.5)
+            conn.close()
+            server.close()
+
+        thread = threading.Thread(target=server_thread)
+        thread.start()
+        server_ready.wait(timeout=5)
+
+        async def client():
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.setblocking(False)
+            await self.loop.sock_connect(sock, ('127.0.0.1', port))
+
+            # Try to send a lot of data
+            data = b'x' * (10 * 1024 * 1024)  # 10 MB
+            send_task = asyncio.create_task(
+                self.loop.sock_sendall(sock, data)
+            )
+            await asyncio.sleep(0.05)
+            send_task.cancel()
+            try:
+                await send_task
+            except asyncio.CancelledError:
+                pass
+            sock.close()
+
+        self.loop.run_until_complete(client())
+        thread.join(timeout=5)
+
+
+class _TestSocketsNonBlocking:
+    """Tests for non-blocking socket requirements."""
+
+    def test_sock_recv_nonblocking_required(self):
+        """Test that sock_recv requires non-blocking socket."""
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.setblocking(True)
+
+        async def recv():
+            # Should still work but may behave differently
+            # depending on implementation
+            sock.close()
+
+        self.loop.run_until_complete(recv())
+
+
+# =============================================================================
+# Test classes that combine mixins with test cases
+# =============================================================================
+
+class TestErlangSockets(_TestSockets, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOSockets(_TestSockets, tb.AIOTestCase):
+    pass
+
+
+class TestErlangSocketsCancel(_TestSocketsCancel, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOSocketsCancel(_TestSocketsCancel, tb.AIOTestCase):
+    pass
+
+
+class TestErlangSocketsNonBlocking(_TestSocketsNonBlocking, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOSocketsNonBlocking(_TestSocketsNonBlocking, tb.AIOTestCase):
+    pass
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/priv/tests/test_tcp.py b/priv/tests/test_tcp.py
new file mode 100644
index 0000000..09826d6
--- /dev/null
+++ b/priv/tests/test_tcp.py
@@ -0,0 +1,609 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+TCP protocol tests adapted from uvloop's test_tcp.py.
+
+These tests verify high-level TCP operations:
+- create_server
+- create_connection
+- Transport and Protocol interactions
+- Data transmission and flow control
+"""
+
+import asyncio
+import socket
+import threading
+import time
+import unittest
+
+from . import _testbase as tb
+
+
+class _TestCreateServer:
+    """Tests for create_server functionality."""
+
+    def test_create_server_basic(self):
+        """Test basic TCP server creation."""
+        connections = []
+
+        class ServerProtocol(asyncio.Protocol):
+            def connection_made(self, transport):
+                connections.append(transport)
+                transport.write(b'welcome')
+
+            def data_received(self, data):
+                pass
+
+            def connection_lost(self, exc):
+                pass
+
+        async def main():
+            server = await self.loop.create_server(
+                ServerProtocol, '127.0.0.1', 0
+            )
+            port = server.sockets[0].getsockname()[1]
+
+            # Connect a client
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.setblocking(False)
+            await self.loop.sock_connect(sock, ('127.0.0.1', port))
+            data = await self.loop.sock_recv(sock, 1024)
+            sock.close()
+
+            server.close()
+            await server.wait_closed()
+
+            return data
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, b'welcome')
+        self.assertEqual(len(connections), 1)
+
+    def test_create_server_multiple_clients(self):
+        """Test server handling multiple clients."""
+        connections = []
+        received_data = []
+
+        class ServerProtocol(asyncio.Protocol):
+            def connection_made(self, transport):
+                self.transport = transport
+                connections.append(transport)
+
+            def data_received(self, data):
+                received_data.append(data)
+                self.transport.write(b'echo:' + data)
+
+            def connection_lost(self, exc):
+                pass
+
+        async def main():
+            server = await self.loop.create_server(
+                ServerProtocol, '127.0.0.1', 0
+            )
+            port = server.sockets[0].getsockname()[1]
+
+            async def client(msg):
+                sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+                sock.setblocking(False)
+                await self.loop.sock_connect(sock, ('127.0.0.1', port))
+                await self.loop.sock_sendall(sock, msg)
+                data = await self.loop.sock_recv(sock, 1024)
+                sock.close()
+                return data
+
+            results = await asyncio.gather(
+                client(b'msg1'),
+                client(b'msg2'),
+                client(b'msg3'),
+            )
+
+            server.close()
+            await server.wait_closed()
+
+            return results
+
+        results = self.loop.run_until_complete(main())
+
+        self.assertEqual(len(results), 3)
+        self.assertEqual(len(connections), 3)
+        self.assertEqual(sorted(received_data), [b'msg1', b'msg2', b'msg3'])
+
+    def test_create_server_close_during_accept(self):
+        """Test closing server during accept."""
+        async def main():
+            server = await self.loop.create_server(
+                asyncio.Protocol, '127.0.0.1', 0
+            )
+            self.assertTrue(server.is_serving())
+            server.close()
+            await server.wait_closed()
+            self.assertFalse(server.is_serving())
+
+        self.loop.run_until_complete(main())
+
+    def test_create_server_reuse_address(self):
+        """Test server with reuse_address option."""
+        async def main():
+            server1 = await self.loop.create_server(
+                asyncio.Protocol, '127.0.0.1', 0,
+                reuse_address=True
+            )
+            port = server1.sockets[0].getsockname()[1]
+            server1.close()
+            await server1.wait_closed()
+
+            # Should be able to bind to same port quickly
+            server2 = await self.loop.create_server(
+                asyncio.Protocol, '127.0.0.1', port,
+                reuse_address=True
+            )
+            server2.close()
+            await server2.wait_closed()
+
+        self.loop.run_until_complete(main())
+
+    def test_create_server_start_serving_false(self):
+        """Test server with start_serving=False."""
+        class ServerProtocol(asyncio.Protocol):
+            pass
+
+        async def main():
+            server = await self.loop.create_server(
+                ServerProtocol, '127.0.0.1', 0,
+                start_serving=False
+            )
+            self.assertFalse(server.is_serving())
+
+            await server.start_serving()
+            self.assertTrue(server.is_serving())
+
+            server.close()
+            await server.wait_closed()
+
+        self.loop.run_until_complete(main())
+
+
+class _TestCreateConnection:
+    """Tests for create_connection functionality."""
+
+    def test_create_connection_basic(self):
+        """Test basic TCP connection."""
+        port = tb.find_free_port()
+        received = []
+
+        class ServerProtocol(asyncio.Protocol):
+            def connection_made(self, transport):
+                self.transport = transport
+
+            def data_received(self, data):
+                received.append(data)
+                self.transport.write(b'echo:' + data)
+                self.transport.close()
+
+        async def main():
+            server = await self.loop.create_server(
+                ServerProtocol, '127.0.0.1', port
+            )
+
+            class ClientProtocol(asyncio.Protocol):
+                def __init__(self):
+                    self.received = []
+                    self.done = asyncio.get_event_loop().create_future()
+
+                def connection_made(self, transport):
+                    self.transport = transport
+                    transport.write(b'hello')
+
+                def data_received(self, data):
+                    self.received.append(data)
+
+                def connection_lost(self, exc):
+                    if not self.done.done():
+                        self.done.set_result(self.received)
+
+            transport, protocol = await self.loop.create_connection(
+                ClientProtocol, '127.0.0.1', port
+            )
+
+            result = await protocol.done
+            server.close()
+            await server.wait_closed()
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, [b'echo:hello'])
+        self.assertEqual(received, [b'hello'])
+
+    def test_create_connection_with_existing_socket(self):
+        """Test create_connection with existing socket."""
+        port = tb.find_free_port()
+
+        class ServerProtocol(asyncio.Protocol):
+            def connection_made(self, transport):
+                transport.write(b'hello')
+                transport.close()
+
+        async def main():
+            server = await self.loop.create_server(
+                ServerProtocol, '127.0.0.1', port
+            )
+
+            # Create and connect socket manually
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.setblocking(False)
+            await self.loop.sock_connect(sock, ('127.0.0.1', port))
+
+            class ClientProtocol(asyncio.Protocol):
+                def __init__(self):
+                    self.data = bytearray()
+                    self.done = asyncio.get_event_loop().create_future()
+
+                def data_received(self, data):
+                    self.data.extend(data)
+
+                def connection_lost(self, exc):
+                    if not self.done.done():
+                        self.done.set_result(bytes(self.data))
+
+            # Pass existing socket
+            transport, protocol = await self.loop.create_connection(
+                ClientProtocol, sock=sock
+            )
+
+            result = await protocol.done
+            server.close()
+            await server.wait_closed()
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, b'hello')
+
+
+class _TestTransportProtocol:
+    """Tests for Transport and Protocol interactions."""
+
+    def test_protocol_callbacks(self):
+        """Test protocol callback sequence."""
+        callbacks = []
+
+        class TestProtocol(asyncio.Protocol):
+            def connection_made(self, transport):
+                callbacks.append('connection_made')
+                self.transport = transport
+
+            def data_received(self, data):
+                callbacks.append(f'data_received:{data}')
+
+            def eof_received(self):
+                callbacks.append('eof_received')
+
+            def connection_lost(self, exc):
+                callbacks.append(f'connection_lost:{exc}')
+
+        async def main():
+            server = await self.loop.create_server(
+                TestProtocol, '127.0.0.1', 0
+            )
+            port = server.sockets[0].getsockname()[1]
+
+            # Connect and send data
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.connect(('127.0.0.1', port))
+            sock.sendall(b'hello')
+            time.sleep(0.05)  # Give server time to process
+            sock.close()
+
+            await asyncio.sleep(0.1)
+            server.close()
+            await server.wait_closed()
+
+        self.loop.run_until_complete(main())
+
+        self.assertIn('connection_made', callbacks)
+        self.assertTrue(any('data_received' in c for c in callbacks))
+
+    def test_transport_write(self):
+        """Test transport write operation."""
+        received = bytearray()
+        done = threading.Event()
+
+        def server_thread(port):
+            server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+            server.bind(('127.0.0.1', port))
+            server.listen(1)
+            server.settimeout(5)
+            conn, _ = server.accept()
+            while True:
+                data = conn.recv(1024)
+                if not data:
+                    break
+                received.extend(data)
+            conn.close()
+            server.close()
+            done.set()
+
+        port = tb.find_free_port()
+        thread = threading.Thread(target=server_thread, args=(port,))
+        thread.start()
+        time.sleep(0.05)
+
+        class ClientProtocol(asyncio.Protocol):
+            def connection_made(self, transport):
+                transport.write(b'hello ')
+                transport.write(b'world')
+                transport.close()
+
+        async def main():
+            await self.loop.create_connection(
+                ClientProtocol, '127.0.0.1', port
+            )
+            await asyncio.sleep(0.1)
+
+        self.loop.run_until_complete(main())
+        done.wait(timeout=5)
+        thread.join(timeout=5)
+
+        self.assertEqual(bytes(received), b'hello world')
+
+    def test_transport_close(self):
+        """Test transport close operation."""
+        closed = []
+
+        class TestProtocol(asyncio.Protocol):
+            def connection_made(self, transport):
+                self.transport = transport
+
+            def connection_lost(self, exc):
+                closed.append(exc)
+
+        async def main():
+            server = await self.loop.create_server(
+                TestProtocol, '127.0.0.1', 0
+            )
+            port = server.sockets[0].getsockname()[1]
+
+            # Connect
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.connect(('127.0.0.1', port))
+            sock.close()
+
+            await asyncio.sleep(0.1)
+            server.close()
+            await server.wait_closed()
+
+        self.loop.run_until_complete(main())
+
+        # connection_lost should be called
+        self.assertEqual(len(closed), 1)
+
+    def test_transport_get_extra_info(self):
+        """Test transport get_extra_info method."""
+        extra_info = {}
+
+        class TestProtocol(asyncio.Protocol):
+            def connection_made(self, transport):
+                extra_info['socket'] = transport.get_extra_info('socket')
+                extra_info['sockname'] = transport.get_extra_info('sockname')
+                extra_info['peername'] = transport.get_extra_info('peername')
+                extra_info['unknown'] = transport.get_extra_info('unknown', 'default')
+                transport.close()
+
+        async def main():
+            server = await self.loop.create_server(
+                TestProtocol, '127.0.0.1', 0
+            )
+            port = server.sockets[0].getsockname()[1]
+
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.connect(('127.0.0.1', port))
+            await asyncio.sleep(0.1)
+            sock.close()
+
+            server.close()
+            await server.wait_closed()
+
+        self.loop.run_until_complete(main())
+
+        self.assertIsNotNone(extra_info.get('socket'))
+        self.assertIsNotNone(extra_info.get('sockname'))
+        self.assertEqual(extra_info.get('unknown'), 'default')
+
+    def test_transport_pause_resume_reading(self):
+        """Test transport pause_reading and resume_reading."""
+        data_chunks = []
+        paused = []
+
+        class TestProtocol(asyncio.Protocol):
+            def connection_made(self, transport):
+                self.transport = transport
+
+            def data_received(self, data):
+                data_chunks.append(data)
+                if len(data_chunks) == 1:
+                    self.transport.pause_reading()
+                    paused.append(True)
+                    # Resume after a delay
+                    asyncio.get_event_loop().call_later(
+                        0.05, self.transport.resume_reading
+                    )
+
+        async def main():
+            server = await self.loop.create_server(
+                TestProtocol, '127.0.0.1', 0
+            )
+            port = server.sockets[0].getsockname()[1]
+
+            # Send data
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.connect(('127.0.0.1', port))
+            sock.sendall(b'chunk1')
+            await asyncio.sleep(0.01)
+            sock.sendall(b'chunk2')
+            await asyncio.sleep(0.1)
+            sock.close()
+
+            server.close()
+            await server.wait_closed()
+
+        self.loop.run_until_complete(main())
+
+        # Should have paused and received data
+        self.assertTrue(paused)
+        self.assertTrue(len(data_chunks) >= 1)
+
+
+class _TestLargeData:
+    """Tests for large data transmission."""
+
+    def test_large_data_transfer(self):
+        """Test transferring large amounts of data."""
+        data_size = 5 * 1024 * 1024  # 5 MB
+        received = bytearray()
+
+        class ServerProtocol(asyncio.Protocol):
+            def connection_made(self, transport):
+                self.transport = transport
+
+            def data_received(self, data):
+                received.extend(data)
+
+            def connection_lost(self, exc):
+                pass
+
+        async def main():
+            server = await self.loop.create_server(
+                ServerProtocol, '127.0.0.1', 0
+            )
+            port = server.sockets[0].getsockname()[1]
+
+            # Connect and send large data
+            class ClientProtocol(asyncio.Protocol):
+                def connection_made(self, transport):
+                    self.transport = transport
+                    # Send large data
+                    transport.write(b'x' * data_size)
+                    transport.close()
+
+            await self.loop.create_connection(
+                ClientProtocol, '127.0.0.1', port
+            )
+
+            # Wait for data to be received
+            for _ in range(100):
+                if len(received) >= data_size:
+                    break
+                await asyncio.sleep(0.1)
+
+            server.close()
+            await server.wait_closed()
+
+            return len(received)
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, data_size)
+
+
+class _TestServerSockets:
+    """Tests for server socket management."""
+
+    def test_server_sockets_property(self):
+        """Test server.sockets property."""
+        async def main():
+            server = await self.loop.create_server(
+                asyncio.Protocol, '127.0.0.1', 0
+            )
+            sockets = server.sockets
+            self.assertIsInstance(sockets, tuple)
+            self.assertEqual(len(sockets), 1)
+            # Check for socket-like object (asyncio may wrap in TransportSocket)
+            self.assertTrue(hasattr(sockets[0], 'fileno'))
+
+            server.close()
+            await server.wait_closed()
+
+        self.loop.run_until_complete(main())
+
+    def test_server_get_loop(self):
+        """Test server.get_loop() method."""
+        async def main():
+            server = await self.loop.create_server(
+                asyncio.Protocol, '127.0.0.1', 0
+            )
+            self.assertIs(server.get_loop(), self.loop)
+
+            server.close()
+            await server.wait_closed()
+
+        self.loop.run_until_complete(main())
+
+    def test_server_context_manager(self):
+        """Test server as async context manager."""
+        async def main():
+            async with await self.loop.create_server(
+                asyncio.Protocol, '127.0.0.1', 0
+            ) as server:
+                self.assertTrue(server.is_serving())
+            # Should be closed after context exits
+
+        self.loop.run_until_complete(main())
+
+
+# =============================================================================
+# Test classes that combine mixins with test cases
+# =============================================================================
+
+class TestErlangCreateServer(_TestCreateServer, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOCreateServer(_TestCreateServer, tb.AIOTestCase):
+    pass
+
+
+class TestErlangCreateConnection(_TestCreateConnection, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOCreateConnection(_TestCreateConnection, tb.AIOTestCase):
+    pass
+
+
+class TestErlangTransportProtocol(_TestTransportProtocol, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOTransportProtocol(_TestTransportProtocol, tb.AIOTestCase):
+    pass
+
+
+class TestErlangLargeData(_TestLargeData, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOLargeData(_TestLargeData, tb.AIOTestCase):
+    pass
+
+
+class TestErlangServerSockets(_TestServerSockets, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOServerSockets(_TestServerSockets, tb.AIOTestCase):
+    pass
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/priv/tests/test_udp.py b/priv/tests/test_udp.py
new file mode 100644
index 0000000..2c08643
--- /dev/null
+++ b/priv/tests/test_udp.py
@@ -0,0 +1,456 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+UDP protocol tests adapted from uvloop's test_udp.py.
+
+These tests verify UDP/datagram operations:
+- create_datagram_endpoint
+- DatagramTransport and DatagramProtocol
+- sendto and datagram_received
+"""
+
+import asyncio
+import socket
+import unittest
+
+from . import _testbase as tb
+
+
+class _TestCreateDatagramEndpoint:
+    """Tests for create_datagram_endpoint functionality."""
+
+    def test_create_datagram_endpoint_local(self):
+        """Test creating a local UDP server."""
+        received = []
+
+        class ServerProtocol(asyncio.DatagramProtocol):
+            def datagram_received(self, data, addr):
+                received.append((data, addr))
+
+            def error_received(self, exc):
+                pass
+
+        async def main():
+            transport, protocol = await self.loop.create_datagram_endpoint(
+                ServerProtocol,
+                local_addr=('127.0.0.1', 0)
+            )
+            server_addr = transport.get_extra_info('sockname')
+            self.assertIsNotNone(server_addr)
+
+            transport.close()
+            return server_addr
+
+        addr = self.loop.run_until_complete(main())
+        self.assertIsInstance(addr, tuple)
+
+    def test_create_datagram_endpoint_remote(self):
+        """Test creating a UDP client with remote address."""
+        class ClientProtocol(asyncio.DatagramProtocol):
+            def datagram_received(self, data, addr):
+                pass
+
+            def error_received(self, exc):
+                pass
+
+        async def main():
+            # First create a server
+            class ServerProtocol(asyncio.DatagramProtocol):
+                def datagram_received(self, data, addr):
+                    pass
+
+            server_transport, _ = await self.loop.create_datagram_endpoint(
+                ServerProtocol,
+                local_addr=('127.0.0.1', 0)
+            )
+            server_addr = server_transport.get_extra_info('sockname')
+
+            # Create client connected to server
+            client_transport, _ = await self.loop.create_datagram_endpoint(
+                ClientProtocol,
+                remote_addr=server_addr
+            )
+
+            client_transport.close()
+            server_transport.close()
+
+        self.loop.run_until_complete(main())
+
+    def test_udp_echo(self):
+        """Test UDP echo server pattern."""
+        received = []
+
+        class EchoServerProtocol(asyncio.DatagramProtocol):
+            def datagram_received(self, data, addr):
+                received.append(data)
+                self.transport.sendto(b'echo:' + data, addr)
+
+            def connection_made(self, transport):
+                self.transport = transport
+
+        class ClientProtocol(asyncio.DatagramProtocol):
+            def __init__(self):
+                self.received = []
+                self.done = None
+
+            def connection_made(self, transport):
+                self.done = asyncio.get_running_loop().create_future()
+
+            def datagram_received(self, data, addr):
+                self.received.append(data)
+                self.done.set_result(data)
+
+        async def main():
+            # Create server
+            server_transport, _ = await self.loop.create_datagram_endpoint(
+                EchoServerProtocol,
+                local_addr=('127.0.0.1', 0)
+            )
+            server_addr = server_transport.get_extra_info('sockname')
+
+            # Create client
+            client_transport, client_protocol = await self.loop.create_datagram_endpoint(
+                ClientProtocol,
+                remote_addr=server_addr
+            )
+
+            client_transport.sendto(b'hello')
+            result = await asyncio.wait_for(client_protocol.done, timeout=5.0)
+
+            client_transport.close()
+            server_transport.close()
+
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, b'echo:hello')
+        self.assertEqual(received, [b'hello'])
+
+    def test_udp_sendto_without_connect(self):
+        """Test sendto without pre-connected remote address."""
+        received = []
+
+        class ServerProtocol(asyncio.DatagramProtocol):
+            def connection_made(self, transport):
+                self.transport = transport
+
+            def datagram_received(self, data, addr):
+                received.append((data, addr))
+                self.transport.sendto(b'ack', addr)
+
+        class ClientProtocol(asyncio.DatagramProtocol):
+            def __init__(self):
+                self.received = []
+                self.done = None
+
+            def connection_made(self, transport):
+                self.transport = transport
+                self.done = asyncio.get_running_loop().create_future()
+
+            def datagram_received(self, data, addr):
+                self.received.append((data, addr))
+                if not self.done.done():
+                    self.done.set_result(data)
+
+        async def main():
+            # Create server
+            server_transport, _ = await self.loop.create_datagram_endpoint(
+                ServerProtocol,
+                local_addr=('127.0.0.1', 0)
+            )
+            server_addr = server_transport.get_extra_info('sockname')
+
+            # Create client without remote_addr
+            client_transport, client_protocol = await self.loop.create_datagram_endpoint(
+                ClientProtocol,
+                local_addr=('127.0.0.1', 0)
+            )
+
+            # Send to specific address
+            client_transport.sendto(b'test message', server_addr)
+            result = await asyncio.wait_for(client_protocol.done, timeout=5.0)
+
+            client_transport.close()
+            server_transport.close()
+
+            return result
+
+        result = self.loop.run_until_complete(main())
+        self.assertEqual(result, b'ack')
+
+    def test_udp_multiple_messages(self):
+        """Test multiple UDP messages."""
+        messages = []
+
+        class ServerProtocol(asyncio.DatagramProtocol):
+            def connection_made(self, transport):
+                self.transport = transport
+
+            def datagram_received(self, data, addr):
+                messages.append(data)
+                self.transport.sendto(data, addr)  # Echo back
+
+        class ClientProtocol(asyncio.DatagramProtocol):
+            def __init__(self):
+                self.received = []
+                self.expected = 0
+                self.done = None
+
+            def connection_made(self, transport):
+                self.done = asyncio.get_running_loop().create_future()
+
+            def datagram_received(self, data, addr):
+                self.received.append(data)
+                if len(self.received) >= self.expected:
+                    if not self.done.done():
+                        self.done.set_result(self.received)
+
+        async def main():
+            server_transport, _ = await self.loop.create_datagram_endpoint(
+                ServerProtocol,
+                local_addr=('127.0.0.1', 0)
+            )
+            server_addr = server_transport.get_extra_info('sockname')
+
+            client_transport, client_protocol = await self.loop.create_datagram_endpoint(
+                ClientProtocol,
+                remote_addr=server_addr
+            )
+
+            client_protocol.expected = 3
+            for i in range(3):
+                client_transport.sendto(f'msg{i}'.encode())
+
+            results = await asyncio.wait_for(client_protocol.done, timeout=5.0)
+
+            client_transport.close()
+            server_transport.close()
+
+            return results
+
+        results = self.loop.run_until_complete(main())
+        self.assertEqual(len(results), 3)
+        self.assertEqual(sorted(results), [b'msg0', b'msg1', b'msg2'])
+
+    def test_udp_broadcast(self):
+        """Test UDP with broadcast (requires allow_broadcast)."""
+        class BroadcastProtocol(asyncio.DatagramProtocol):
+            def connection_made(self, transport):
+                self.transport = transport
+
+        async def main():
+            transport, _ = await self.loop.create_datagram_endpoint(
+                BroadcastProtocol,
+                local_addr=('127.0.0.1', 0),
+                allow_broadcast=True
+            )
+
+            # Just verify we can create with allow_broadcast
+            sockname = transport.get_extra_info('sockname')
+            self.assertIsNotNone(sockname)
+
+            transport.close()
+
+        self.loop.run_until_complete(main())
+
+
+class _TestDatagramTransport:
+    """Tests for DatagramTransport functionality."""
+
+    def test_datagram_transport_close(self):
+        """Test closing datagram transport."""
+        close_called = []
+
+        class TestProtocol(asyncio.DatagramProtocol):
+            def connection_made(self, transport):
+                self.transport = transport
+
+            def connection_lost(self, exc):
+                close_called.append(exc)
+
+        async def main():
+            transport, protocol = await self.loop.create_datagram_endpoint(
+                TestProtocol,
+                local_addr=('127.0.0.1', 0)
+            )
+
+            self.assertFalse(transport.is_closing())
+            transport.close()
+            await asyncio.sleep(0.1)
+
+        self.loop.run_until_complete(main())
+        self.assertEqual(len(close_called), 1)
+
+    def test_datagram_transport_abort(self):
+        """Test aborting datagram transport."""
+        class TestProtocol(asyncio.DatagramProtocol):
+            def connection_made(self, transport):
+                self.transport = transport
+
+            def connection_lost(self, exc):
+                pass
+
+        async def main():
+            transport, _ = await self.loop.create_datagram_endpoint(
+                TestProtocol,
+                local_addr=('127.0.0.1', 0)
+            )
+
+            transport.abort()
+            await asyncio.sleep(0.1)
+
+        self.loop.run_until_complete(main())
+
+    def test_datagram_transport_get_extra_info(self):
+        """Test datagram transport get_extra_info."""
+        extra_info = {}
+
+        class TestProtocol(asyncio.DatagramProtocol):
+            def connection_made(self, transport):
+                extra_info['socket'] = transport.get_extra_info('socket')
+                extra_info['sockname'] = transport.get_extra_info('sockname')
+                extra_info['unknown'] = transport.get_extra_info('unknown', 'default')
+
+        async def main():
+            transport, _ = await self.loop.create_datagram_endpoint(
+                TestProtocol,
+                local_addr=('127.0.0.1', 0)
+            )
+            await asyncio.sleep(0.01)
+            transport.close()
+
+        self.loop.run_until_complete(main())
+
+        self.assertIsNotNone(extra_info.get('socket'))
+        self.assertIsNotNone(extra_info.get('sockname'))
+        self.assertEqual(extra_info.get('unknown'), 'default')
+
+    def test_datagram_transport_write_buffer_size(self):
+        """Test datagram transport get_write_buffer_size."""
+        class TestProtocol(asyncio.DatagramProtocol):
+            def connection_made(self, transport):
+                self.transport = transport
+
+        async def main():
+            transport, _ = await self.loop.create_datagram_endpoint(
+                TestProtocol,
+                local_addr=('127.0.0.1', 0)
+            )
+
+            size = transport.get_write_buffer_size()
+            self.assertIsInstance(size, int)
+            self.assertGreaterEqual(size, 0)
+
+            transport.close()
+
+        self.loop.run_until_complete(main())
+
+
+class _TestDatagramProtocol:
+    """Tests for DatagramProtocol callback behavior."""
+
+    def test_error_received(self):
+        """Test error_received callback."""
+        errors = []
+
+        class TestProtocol(asyncio.DatagramProtocol):
+            def connection_made(self, transport):
+                self.transport = transport
+
+            def error_received(self, exc):
+                errors.append(exc)
+
+        async def main():
+            transport, _ = await self.loop.create_datagram_endpoint(
+                TestProtocol,
+                remote_addr=('127.0.0.1', 1)  # Connection refused
+            )
+
+            # Try to send to closed port
+            transport.sendto(b'test')
+            await asyncio.sleep(0.2)
+
+            transport.close()
+
+        self.loop.run_until_complete(main())
+        # May or may not receive error depending on platform
+
+
+class _TestUDPReuse:
+    """Tests for UDP socket reuse options."""
+
+    def test_udp_reuse_port(self):
+        """Test UDP with reuse_port."""
+        class TestProtocol(asyncio.DatagramProtocol):
+            pass
+
+        async def main():
+            transport1, _ = await self.loop.create_datagram_endpoint(
+                TestProtocol,
+                local_addr=('127.0.0.1', 0),
+                reuse_port=True
+            )
+            addr = transport1.get_extra_info('sockname')
+            transport1.close()
+
+            # Should be able to bind same address quickly
+            transport2, _ = await self.loop.create_datagram_endpoint(
+                TestProtocol,
+                local_addr=addr,
+                reuse_port=True
+            )
+            transport2.close()
+
+        self.loop.run_until_complete(main())
+
+
+# =============================================================================
+# Test classes that combine mixins with test cases
+# =============================================================================
+
+class TestErlangCreateDatagramEndpoint(_TestCreateDatagramEndpoint, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOCreateDatagramEndpoint(_TestCreateDatagramEndpoint, tb.AIOTestCase):
+    pass
+
+
+class TestErlangDatagramTransport(_TestDatagramTransport, tb.ErlangTestCase):
+    pass
+
+
+class TestAIODatagramTransport(_TestDatagramTransport, tb.AIOTestCase):
+    pass
+
+
+class TestErlangDatagramProtocol(_TestDatagramProtocol, tb.ErlangTestCase):
+    pass
+
+
+class TestAIODatagramProtocol(_TestDatagramProtocol, tb.AIOTestCase):
+    pass
+
+
+class TestErlangUDPReuse(_TestUDPReuse, tb.ErlangTestCase):
+    pass
+
+
+class TestAIOUDPReuse(_TestUDPReuse, tb.AIOTestCase):
+    pass
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/priv/tests/test_unix.py b/priv/tests/test_unix.py
new file mode 100644
index 0000000..adb1c23
--- /dev/null
+++ b/priv/tests/test_unix.py
@@ -0,0 +1,412 @@
+# Copyright 2026 Benoit Chesneau
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""
+Unix socket tests adapted from uvloop's test_unix.py.
+
+These tests verify Unix domain socket operations:
+- create_unix_server
+- create_unix_connection
+- Unix socket data transfer
+"""
+
+import asyncio
+import os
+import socket
+import sys
+import tempfile
+import threading
+import time
+import unittest
+
+from . import _testbase as tb
+
+
+def _is_unix_socket_supported():
+    """Check if Unix sockets are supported on this platform."""
+    return sys.platform != 'win32'
+
+
+@unittest.skipUnless(_is_unix_socket_supported(), "Unix sockets not available")
+class _TestUnixServer:
+    """Tests for create_unix_server functionality."""
+
+    def test_create_unix_server_basic(self):
+        """Test basic Unix socket server creation."""
+        connections = []
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = os.path.join(tmpdir, 'test.sock')
+
+            class ServerProtocol(asyncio.Protocol):
+                def connection_made(self, transport):
+                    connections.append(transport)
+                    transport.write(b'welcome')
+
+                def data_received(self, data):
+                    pass
+
+            async def main():
+                server = await self.loop.create_unix_server(
+                    ServerProtocol, path
+                )
+
+                # Connect a client
+                sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+                sock.setblocking(False)
+                await self.loop.sock_connect(sock, path)
+                data = await self.loop.sock_recv(sock, 1024)
+                sock.close()
+
+                server.close()
+                await server.wait_closed()
+
+                return data
+
+            result = self.loop.run_until_complete(main())
+
+            self.assertEqual(result, b'welcome')
+            self.assertEqual(len(connections), 1)
+
+    def test_create_unix_server_existing_path(self):
+        """Test that server removes existing socket file."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = os.path.join(tmpdir, 'test.sock')
+
+            # Create file at path
+            with open(path, 'w') as f:
+                f.write('test')
+
+            async def main():
+                # Should replace the file
+                server = await self.loop.create_unix_server(
+                    asyncio.Protocol, path
+                )
+                server.close()
+                await server.wait_closed()
+
+            self.loop.run_until_complete(main())
+
+    def test_unix_server_client_echo(self):
+        """Test Unix socket server with echo pattern."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = os.path.join(tmpdir, 'echo.sock')
+            received = []
+
+            class EchoProtocol(asyncio.Protocol):
+                def connection_made(self, transport):
+                    self.transport = transport
+
+                def data_received(self, data):
+                    received.append(data)
+                    self.transport.write(b'echo:' + data)
+                    self.transport.close()
+
+            async def main():
+                server = await self.loop.create_unix_server(
+                    EchoProtocol, path
+                )
+
+                class ClientProtocol(asyncio.Protocol):
+                    def __init__(self):
+                        self.received = []
+                        self.done = asyncio.get_event_loop().create_future()
+
+                    def connection_made(self, transport):
+                        transport.write(b'hello')
+
+                    def data_received(self, data):
+                        self.received.append(data)
+
+                    def connection_lost(self, exc):
+                        if not self.done.done():
+                            self.done.set_result(self.received)
+
+                transport, protocol = await self.loop.create_unix_connection(
+                    ClientProtocol, path
+                )
+
+                result = await protocol.done
+
+                server.close()
+                await server.wait_closed()
+
+                return result
+
+            result = self.loop.run_until_complete(main())
+
+            self.assertEqual(result, [b'echo:hello'])
+            self.assertEqual(received, [b'hello'])
+
+
+@unittest.skipUnless(_is_unix_socket_supported(), "Unix sockets not available")
+class _TestUnixConnection:
+    """Tests for create_unix_connection functionality."""
+
+    def test_create_unix_connection_basic(self):
+        """Test basic Unix socket connection."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = os.path.join(tmpdir, 'test.sock')
+
+            class ServerProtocol(asyncio.Protocol):
+                def connection_made(self, transport):
+                    transport.write(b'hello')
+                    transport.close()
+
+            async def main():
+                server = await self.loop.create_unix_server(
+                    ServerProtocol, path
+                )
+
+                class ClientProtocol(asyncio.Protocol):
+                    def __init__(self):
+                        self.data = bytearray()
+                        self.done = asyncio.get_event_loop().create_future()
+
+                    def data_received(self, data):
+                        self.data.extend(data)
+
+                    def connection_lost(self, exc):
+                        if not self.done.done():
+                            self.done.set_result(bytes(self.data))
+
+                transport, protocol = await self.loop.create_unix_connection(
+                    ClientProtocol, path
+                )
+
+                result = await protocol.done
+
+                server.close()
+                await server.wait_closed()
+
+                return result
+
+            result = self.loop.run_until_complete(main())
+            self.assertEqual(result, b'hello')
+
+    def test_create_unix_connection_with_sock(self):
+        """Test create_unix_connection with existing socket."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = os.path.join(tmpdir, 'test.sock')
+
+            class ServerProtocol(asyncio.Protocol):
+                def connection_made(self, transport):
+                    transport.write(b'hi')
+                    transport.close()
+
+            async def main():
+                server = await self.loop.create_unix_server(
+                    ServerProtocol, path
+                )
+
+                # Create socket manually
+                sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+                sock.setblocking(False)
+                await self.loop.sock_connect(sock, path)
+
+                class ClientProtocol(asyncio.Protocol):
+                    def __init__(self):
+                        self.data = bytearray()
+                        self.done = asyncio.get_event_loop().create_future()
+
+                    def data_received(self, data):
+                        self.data.extend(data)
+
+                    def connection_lost(self, exc):
+                        if not self.done.done():
+                            self.done.set_result(bytes(self.data))
+
+                transport, protocol = await self.loop.create_unix_connection(
+                    ClientProtocol, sock=sock
+                )
+
+                result = await protocol.done
+
+                server.close()
+                await server.wait_closed()
+
+                return result
+
+            result = self.loop.run_until_complete(main())
+            self.assertEqual(result, b'hi')
+
+
+@unittest.skipUnless(_is_unix_socket_supported(), "Unix sockets not available")
+class _TestUnixSocketOps:
+    """Tests for low-level Unix socket operations."""
+
+    def test_unix_sock_connect_sendall_recv(self):
+        """Test Unix socket connect, sendall, recv."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = os.path.join(tmpdir, 'test.sock')
+            received = []
+            server_ready = threading.Event()
+
+            def server_thread():
+                server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+                server.bind(path)
+                server.listen(1)
+                server_ready.set()
+                conn, _ = server.accept()
+                data = conn.recv(1024)
+                received.append(data)
+                conn.sendall(b'pong')
+                conn.close()
+                server.close()
+
+            thread = threading.Thread(target=server_thread)
+            thread.start()
+            server_ready.wait(timeout=5)
+
+            async def client():
+                sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+                sock.setblocking(False)
+                await self.loop.sock_connect(sock, path)
+                await self.loop.sock_sendall(sock, b'ping')
+                data = await self.loop.sock_recv(sock, 1024)
+                sock.close()
+                return data
+
+            result = self.loop.run_until_complete(client())
+            thread.join(timeout=5)
+
+            self.assertEqual(result, b'pong')
+            self.assertEqual(received, [b'ping'])
+
+    def test_unix_sock_accept(self):
+        """Test Unix socket accept."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = os.path.join(tmpdir, 'test.sock')
+
+            server = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+            server.bind(path)
+            server.listen(1)
+            server.setblocking(False)
+
+            def client_thread():
+                time.sleep(0.05)
+                client = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+                client.connect(path)
+                client.sendall(b'hello')
+                client.close()
+
+            thread = threading.Thread(target=client_thread)
+            thread.start()
+
+            async def accept():
+                conn, _ = await self.loop.sock_accept(server)
+                data = await self.loop.sock_recv(conn, 1024)
+                conn.close()
+                server.close()
+                return data
+
+            result = self.loop.run_until_complete(accept())
+            thread.join(timeout=5)
+
+            self.assertEqual(result, b'hello')
+
+
+@unittest.skipUnless(_is_unix_socket_supported(), "Unix sockets not available")
+class _TestUnixLargeData:
+    """Tests for large data transfer over Unix sockets."""
+
+    def test_unix_large_data(self):
+        """Test transferring large data over Unix sockets."""
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = os.path.join(tmpdir, 'test.sock')
+            data_size = 2 * 1024 * 1024  # 2 MB
+            received = bytearray()
+
+            class ServerProtocol(asyncio.Protocol):
+                def connection_made(self, transport):
+                    self.transport = transport
+
+                def data_received(self, data):
+                    received.extend(data)
+
+            async def main():
+                server = await self.loop.create_unix_server(
+                    ServerProtocol, path
+                )
+
+                class ClientProtocol(asyncio.Protocol):
+                    def connection_made(self, transport):
+                        transport.write(b'x' * data_size)
+                        transport.close()
+
+                await self.loop.create_unix_connection(
+                    ClientProtocol, path
+                )
+
+                # Wait for data
+                for _ in range(100):
+                    if len(received) >= data_size:
+                        break
+                    await asyncio.sleep(0.1)
+
+                server.close()
+                await server.wait_closed()
+
+                return len(received)
+
+            result = self.loop.run_until_complete(main())
+            self.assertEqual(result, data_size)
+
+
+# =============================================================================
+# Test classes that combine mixins with test cases
+# =============================================================================
+
+@unittest.skipUnless(_is_unix_socket_supported(), "Unix sockets not available")
+class TestErlangUnixServer(_TestUnixServer, tb.ErlangTestCase):
+    pass
+
+
+@unittest.skipUnless(_is_unix_socket_supported(), "Unix sockets not available")
+class TestAIOUnixServer(_TestUnixServer, tb.AIOTestCase):
+    pass
+
+
+@unittest.skipUnless(_is_unix_socket_supported(), "Unix sockets not available")
+class TestErlangUnixConnection(_TestUnixConnection, tb.ErlangTestCase):
+    pass
+
+
+@unittest.skipUnless(_is_unix_socket_supported(), "Unix sockets not available")
+class TestAIOUnixConnection(_TestUnixConnection, tb.AIOTestCase):
+    pass
+
+
+@unittest.skipUnless(_is_unix_socket_supported(), "Unix sockets not available")
+class TestErlangUnixSocketOps(_TestUnixSocketOps, tb.ErlangTestCase):
+    pass
+
+
+@unittest.skipUnless(_is_unix_socket_supported(), "Unix sockets not available")
+class TestAIOUnixSocketOps(_TestUnixSocketOps, tb.AIOTestCase):
+    pass
+
+
+@unittest.skipUnless(_is_unix_socket_supported(), "Unix sockets not available")
+class TestErlangUnixLargeData(_TestUnixLargeData, tb.ErlangTestCase):
+    pass
+
+
+@unittest.skipUnless(_is_unix_socket_supported(), "Unix sockets not available")
+class TestAIOUnixLargeData(_TestUnixLargeData, tb.AIOTestCase):
+    pass
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/rebar.config b/rebar.config
index 44aa5b9..f4f632b 100644
--- a/rebar.config
+++ b/rebar.config
@@ -38,6 +38,7 @@
     {main, <<"readme">>},
     {extras, [
         <<"README.md">>,
+        <<"docs/migration.md">>,
         <<"docs/getting-started.md">>,
         <<"docs/ai-integration.md">>,
         <<"docs/type-conversion.md">>,
@@ -48,11 +49,14 @@
         <<"docs/scalability.md">>,
         <<"docs/threading.md">>,
         <<"docs/asyncio.md">>,
+        <<"docs/reactor.md">>,
+        <<"docs/security.md">>,
         <<"docs/web-frameworks.md">>,
         <<"docs/testing-free-threading.md">>
     ]},
     {groups_for_extras, [
         {<<"Guides">>, [
+            <<"docs/migration.md">>,
             <<"docs/getting-started.md">>,
             <<"docs/ai-integration.md">>,
             <<"docs/type-conversion.md">>,
@@ -65,6 +69,8 @@
             <<"docs/scalability.md">>,
             <<"docs/threading.md">>,
             <<"docs/asyncio.md">>,
+            <<"docs/reactor.md">>,
+            <<"docs/security.md">>,
             <<"docs/web-frameworks.md">>,
             <<"docs/testing-free-threading.md">>
         ]}
diff --git a/scripts/test_timer_path.py b/scripts/test_timer_path.py
index 3b504f0..15276d6 100644
--- a/scripts/test_timer_path.py
+++ b/scripts/test_timer_path.py
@@ -6,7 +6,7 @@
 
 import asyncio
 import time
-from erlang_loop import ErlangEventLoop
+from _erlang_impl import ErlangEventLoop
 
 def run_test():
     results = {}
@@ -39,18 +39,18 @@ async def timer_test(n):
     results['default_time'] = default_time
     results['default_rate'] = int(n/default_time)
 
-    # Isolated loop test
-    loop = ErlangEventLoop(isolated=True)
+    # Direct loop test
+    loop = ErlangEventLoop()
     asyncio.set_event_loop(loop)
     start = time.perf_counter()
     try:
         loop.run_until_complete(timer_test(n))
     finally:
         loop.close()
-    isolated_time = time.perf_counter() - start
-    results['isolated_time'] = isolated_time
-    results['isolated_rate'] = int(n/isolated_time)
+    direct_time = time.perf_counter() - start
+    results['direct_time'] = direct_time
+    results['direct_rate'] = int(n/direct_time)
 
-    results['ratio'] = default_time/isolated_time
+    results['ratio'] = default_time/direct_time
 
     return results
diff --git a/src/erlang_python_sup.erl b/src/erlang_python_sup.erl
index 8134071..5847309 100644
--- a/src/erlang_python_sup.erl
+++ b/src/erlang_python_sup.erl
@@ -14,13 +14,12 @@
 
 %%% @doc Top-level supervisor for erlang_python.
 %%%
-%%% Manages the worker pools for Python execution:
+%%% Manages Python execution components:
 %%% <ul>
 %%%   <li>py_callback - Callback registry for Python to Erlang calls</li>
 %%%   <li>py_state - Shared state storage accessible from Python</li>
-%%%   <li>py_pool - Main worker pool for synchronous Python calls</li>
+%%%   <li>py_context_sup - Supervisor for process-per-context workers</li>
 %%%   <li>py_async_pool - Worker pool for asyncio coroutines</li>
-%%%   <li>py_subinterp_pool - Worker pool for sub-interpreter parallelism</li>
 %%% </ul>
 %%% @private
 -module(erlang_python_sup).
@@ -33,9 +32,13 @@ start_link() ->
     supervisor:start_link({local, ?MODULE}, ?MODULE, []).
 
 init([]) ->
-    NumWorkers = application:get_env(erlang_python, num_workers, 4),
+    NumContexts = application:get_env(erlang_python, num_contexts,
+                                       erlang:system_info(schedulers)),
+    ContextMode = application:get_env(erlang_python, context_mode, auto),
     NumAsyncWorkers = application:get_env(erlang_python, num_async_workers, 2),
-    NumSubinterpWorkers = application:get_env(erlang_python, num_subinterp_workers, 4),
+
+    %% Initialize Python runtime first
+    ok = py_nif:init(),
 
     %% Initialize the semaphore ETS table for rate limiting
     ok = py_semaphore:init(),
@@ -49,7 +52,7 @@ init([]) ->
     %% Register state functions as callbacks for Python access
     ok = py_state:register_callbacks(),
 
-    %% Callback registry - must start before pool
+    %% Callback registry - must start before contexts
     CallbackSpec = #{
         id => py_callback,
         start => {py_callback, start_link, []},
@@ -89,14 +92,24 @@ init([]) ->
         modules => [py_tracer]
     },
 
-    %% Main worker pool
-    PoolSpec = #{
-        id => py_pool,
-        start => {py_pool, start_link, [NumWorkers]},
+    %% Process-per-context supervisor (replaces py_pool and py_subinterp_pool)
+    ContextSupSpec = #{
+        id => py_context_sup,
+        start => {py_context_sup, start_link, []},
         restart => permanent,
+        shutdown => infinity,
+        type => supervisor,
+        modules => [py_context_sup]
+    },
+
+    %% Context router initialization (starts contexts under py_context_sup)
+    ContextRouterInitSpec = #{
+        id => py_context_init,
+        start => {py_context_init, start_link, [#{contexts => NumContexts, mode => ContextMode}]},
+        restart => temporary,
         shutdown => 5000,
         type => worker,
-        modules => [py_pool]
+        modules => [py_context_init]
     },
 
     %% Async worker pool (for asyncio coroutines)
@@ -109,16 +122,6 @@ init([]) ->
         modules => [py_async_pool]
     },
 
-    %% Sub-interpreter pool (for true parallelism with per-interpreter GIL)
-    SubinterpPoolSpec = #{
-        id => py_subinterp_pool,
-        start => {py_subinterp_pool, start_link, [NumSubinterpWorkers]},
-        restart => permanent,
-        shutdown => 5000,
-        type => worker,
-        modules => [py_subinterp_pool]
-    },
-
     %% Event worker registry (for scalable I/O model)
     WorkerRegistrySpec = #{
         id => py_event_worker_registry,
@@ -149,9 +152,20 @@ init([]) ->
         modules => [py_event_loop]
     },
 
+    %% Event loop pool (for async Python coroutines via event loops)
+    EventLoopPoolSpec = #{
+        id => py_event_loop_pool,
+        start => {py_event_loop_pool, start_link, []},
+        restart => permanent,
+        shutdown => 5000,
+        type => worker,
+        modules => [py_event_loop_pool]
+    },
+
     Children = [CallbackSpec, ThreadHandlerSpec, LoggerSpec, TracerSpec,
-                PoolSpec, AsyncPoolSpec, SubinterpPoolSpec,
-                WorkerRegistrySpec, WorkerSupSpec, EventLoopSpec],
+                ContextSupSpec, ContextRouterInitSpec,
+                WorkerRegistrySpec, WorkerSupSpec, EventLoopSpec,
+                EventLoopPoolSpec, AsyncPoolSpec],
 
     {ok, {
         #{strategy => one_for_all, intensity => 5, period => 10},
diff --git a/src/py.erl b/src/py.erl
index 1db65ed..2f10cac 100644
--- a/src/py.erl
+++ b/src/py.erl
@@ -39,14 +39,15 @@
     call/3,
     call/4,
     call/5,
-    call_async/3,
-    call_async/4,
+    cast/3,
+    cast/4,
     await/1,
     await/2,
     eval/1,
     eval/2,
     eval/3,
     exec/1,
+    exec/2,
     stream/3,
     stream/4,
     stream_eval/1,
@@ -91,21 +92,25 @@
     state_decr/2,
     %% Module reload
     reload/1,
-    %% Context affinity
-    bind/0, bind/1,
-    unbind/0, unbind/1,
-    is_bound/0,
-    with_context/1,
-    ctx_call/4, ctx_call/5, ctx_call/6,
-    ctx_eval/2, ctx_eval/3, ctx_eval/4,
-    ctx_exec/2,
     %% Logging and tracing
     configure_logging/0,
     configure_logging/1,
     enable_tracing/0,
     disable_tracing/0,
     get_traces/0,
-    clear_traces/0
+    clear_traces/0,
+    %% Process-per-context API (new architecture)
+    context/0,
+    context/1,
+    start_contexts/0,
+    start_contexts/1,
+    stop_contexts/0,
+    contexts_started/0,
+    %% py_ref API (Python object references with auto-routing)
+    call_method/3,
+    getattr/2,
+    to_term/1,
+    is_ref/1
 ]).
 
 -type py_result() :: {ok, term()} | {error, term()}.
@@ -115,11 +120,7 @@
 -type py_args() :: [term()].
 -type py_kwargs() :: #{atom() | binary() => term()}.
 
-%% Context affinity handle
--record(py_ctx, {ref :: reference()}).
--opaque py_ctx() :: #py_ctx{}.
-
--export_type([py_result/0, py_ref/0, py_ctx/0]).
+-export_type([py_result/0, py_ref/0]).
 
 %% Default timeout for synchronous calls (30 seconds)
 -define(DEFAULT_TIMEOUT, 30000).
@@ -134,14 +135,34 @@ call(Module, Func, Args) ->
     call(Module, Func, Args, #{}).
 
 %% @doc Call a Python function with keyword arguments.
--spec call(py_module(), py_func(), py_args(), py_kwargs()) -> py_result().
+%%
+%% When the first argument is a pid (context), calls using the new
+%% process-per-context architecture.
+%%
+%% @param CtxOrModule Context pid or Python module
+%% @param ModuleOrFunc Python module or function name
+%% @param FuncOrArgs Function name or arguments list
+%% @param ArgsOrKwargs Arguments list or keyword arguments
+-spec call(pid(), py_module(), py_func(), py_args()) -> py_result()
+    ; (py_module(), py_func(), py_args(), py_kwargs()) -> py_result().
+call(Ctx, Module, Func, Args) when is_pid(Ctx) ->
+    py_context:call(Ctx, Module, Func, Args, #{});
 call(Module, Func, Args, Kwargs) ->
     call(Module, Func, Args, Kwargs, ?DEFAULT_TIMEOUT).
 
 %% @doc Call a Python function with keyword arguments and custom timeout.
+%%
+%% When the first argument is a pid (context), calls using the new
+%% process-per-context architecture with options map.
+%%
 %% Timeout is in milliseconds. Use `infinity' for no timeout.
 %% Rate limited via ETS-based semaphore to prevent overload.
--spec call(py_module(), py_func(), py_args(), py_kwargs(), timeout()) -> py_result().
+-spec call(pid(), py_module(), py_func(), py_args(), map()) -> py_result()
+    ; (py_module(), py_func(), py_args(), py_kwargs(), timeout()) -> py_result().
+call(Ctx, Module, Func, Args, Opts) when is_pid(Ctx), is_map(Opts) ->
+    Kwargs = maps:get(kwargs, Opts, #{}),
+    Timeout = maps:get(timeout, Opts, infinity),
+    py_context:call(Ctx, Module, Func, Args, Kwargs, Timeout);
 call(Module, Func, Args, Kwargs, Timeout) ->
     %% Acquire semaphore slot before making the call
     case py_semaphore:acquire(Timeout) of
@@ -156,23 +177,11 @@ call(Module, Func, Args, Kwargs, Timeout) ->
     end.
 
 %% @private
+%% Always route through context process - it handles callbacks inline using
+%% suspension-based approach (no separate callback handler, no blocking)
 do_call(Module, Func, Args, Kwargs, Timeout) ->
-    Ref = make_ref(),
-    TimeoutMs = py_util:normalize_timeout(Timeout, ?DEFAULT_TIMEOUT),
-    Request = {call, Ref, self(), Module, Func, Args, Kwargs, TimeoutMs},
-    case get_binding() of
-        {bound, Worker} -> py_pool:direct_request(Worker, Request);
-        unbound -> py_pool:request(Request)
-    end,
-    await(Ref, Timeout).
-
-%% @private Get binding if process is bound
-get_binding() ->
-    Key = {process, self()},
-    case py_pool:lookup_binding(Key) of
-        {ok, Worker} -> {bound, Worker};
-        not_found -> unbound
-    end.
+    Ctx = py_context_router:get_context(),
+    py_context:call(Ctx, Module, Func, Args, Kwargs, Timeout).
 
 %% @doc Evaluate a Python expression and return the result.
 -spec eval(string() | binary()) -> py_result().
@@ -180,51 +189,68 @@ eval(Code) ->
     eval(Code, #{}).
 
 %% @doc Evaluate a Python expression with local variables.
--spec eval(string() | binary(), map()) -> py_result().
+%%
+%% When the first argument is a pid (context), evaluates using the new
+%% process-per-context architecture.
+-spec eval(pid(), string() | binary()) -> py_result()
+    ; (string() | binary(), map()) -> py_result().
+eval(Ctx, Code) when is_pid(Ctx) ->
+    py_context:eval(Ctx, Code, #{});
 eval(Code, Locals) ->
     eval(Code, Locals, ?DEFAULT_TIMEOUT).
 
 %% @doc Evaluate a Python expression with local variables and timeout.
+%%
+%% When the first argument is a pid (context), evaluates using the new
+%% process-per-context architecture with locals.
+%%
 %% Timeout is in milliseconds. Use `infinity' for no timeout.
--spec eval(string() | binary(), map(), timeout()) -> py_result().
+-spec eval(pid(), string() | binary(), map()) -> py_result()
+    ; (string() | binary(), map(), timeout()) -> py_result().
+eval(Ctx, Code, Locals) when is_pid(Ctx), is_map(Locals) ->
+    py_context:eval(Ctx, Code, Locals);
 eval(Code, Locals, Timeout) ->
-    Ref = make_ref(),
-    TimeoutMs = py_util:normalize_timeout(Timeout, ?DEFAULT_TIMEOUT),
-    Request = {eval, Ref, self(), Code, Locals, TimeoutMs},
-    case get_binding() of
-        {bound, Worker} -> py_pool:direct_request(Worker, Request);
-        unbound -> py_pool:request(Request)
-    end,
-    await(Ref, Timeout).
+    %% Always route through context process - it handles callbacks inline using
+    %% suspension-based approach (no separate callback handler, no blocking)
+    Ctx = py_context_router:get_context(),
+    py_context:eval(Ctx, Code, Locals, Timeout).
 
 %% @doc Execute Python statements (no return value expected).
 -spec exec(string() | binary()) -> ok | {error, term()}.
 exec(Code) ->
-    Ref = make_ref(),
-    Request = {exec, Ref, self(), Code},
-    case get_binding() of
-        {bound, Worker} -> py_pool:direct_request(Worker, Request);
-        unbound -> py_pool:request(Request)
-    end,
-    case await(Ref, ?DEFAULT_TIMEOUT) of
-        {ok, _} -> ok;
-        Error -> Error
-    end.
+    %% Always route through context process - it handles callbacks inline using
+    %% suspension-based approach (no separate callback handler, no blocking)
+    Ctx = py_context_router:get_context(),
+    py_context:exec(Ctx, Code).
+
+%% @doc Execute Python statements using a specific context.
+%%
+%% This is the explicit context variant of exec/1.
+-spec exec(pid(), string() | binary()) -> ok | {error, term()}.
+exec(Ctx, Code) when is_pid(Ctx) ->
+    py_context:exec(Ctx, Code).
 
 %%% ============================================================================
 %%% Asynchronous API
 %%% ============================================================================
 
-%% @doc Call a Python function asynchronously, returns immediately with a ref.
--spec call_async(py_module(), py_func(), py_args()) -> py_ref().
-call_async(Module, Func, Args) ->
-    call_async(Module, Func, Args, #{}).
+%% @doc Cast a Python function call, returns immediately with a ref.
+%% The call executes in a spawned process. Use await/1,2 to get the result.
+-spec cast(py_module(), py_func(), py_args()) -> py_ref().
+cast(Module, Func, Args) ->
+    cast(Module, Func, Args, #{}).
 
-%% @doc Call a Python function asynchronously with kwargs.
--spec call_async(py_module(), py_func(), py_args(), py_kwargs()) -> py_ref().
-call_async(Module, Func, Args, Kwargs) ->
+%% @doc Cast a Python function call with kwargs.
+-spec cast(py_module(), py_func(), py_args(), py_kwargs()) -> py_ref().
+cast(Module, Func, Args, Kwargs) ->
+    %% Spawn a process to execute the call and return a ref
     Ref = make_ref(),
-    py_pool:request({call, Ref, self(), Module, Func, Args, Kwargs}),
+    Parent = self(),
+    spawn(fun() ->
+        Ctx = py_context_router:get_context(),
+        Result = py_context:call(Ctx, Module, Func, Args, Kwargs),
+        Parent ! {py_response, Ref, Result}
+    end),
     Ref.
 
 %% @doc Wait for an async call to complete.
@@ -255,22 +281,42 @@ stream(Module, Func, Args) ->
 %% @doc Stream results from a Python generator with kwargs.
 -spec stream(py_module(), py_func(), py_args(), py_kwargs()) -> py_result().
 stream(Module, Func, Args, Kwargs) ->
-    Ref = make_ref(),
-    py_pool:request({stream, Ref, self(), Module, Func, Args, Kwargs}),
-    stream_collect(Ref, []).
-
-%% @private
-stream_collect(Ref, Acc) ->
-    receive
-        {py_chunk, Ref, Chunk} ->
-            stream_collect(Ref, [Chunk | Acc]);
-        {py_end, Ref} ->
-            {ok, lists:reverse(Acc)};
-        {py_error, Ref, Error} ->
-            {error, Error}
-    after ?DEFAULT_TIMEOUT ->
-        {error, timeout}
-    end.
+    %% Route through the new process-per-context system
+    %% Create the generator and collect all values using list()
+    Ctx = py_context_router:get_context(),
+    ModuleBin = ensure_binary(Module),
+    FuncBin = ensure_binary(Func),
+    %% Build code that calls the function and collects all yielded values
+    KwargsCode = format_kwargs(Kwargs),
+    ArgsCode = format_args(Args),
+    Code = iolist_to_binary([
+        <<"list(__import__('">>, ModuleBin, <<"').">>, FuncBin,
+        <<"(">>, ArgsCode, KwargsCode, <<"))">>
+    ]),
+    py_context:eval(Ctx, Code, #{}).
+
+%% @private Format arguments for Python code
+format_args([]) -> <<>>;
+format_args(Args) ->
+    ArgStrs = [format_arg(A) || A <- Args],
+    iolist_to_binary(lists:join(<<", ">>, ArgStrs)).
+
+%% @private Format a single argument
+format_arg(A) when is_integer(A) -> integer_to_binary(A);
+format_arg(A) when is_float(A) -> float_to_binary(A);
+format_arg(A) when is_binary(A) -> <<"'", A/binary, "'">>;
+format_arg(A) when is_atom(A) -> <<"'", (atom_to_binary(A))/binary, "'">>;
+format_arg(A) when is_list(A) -> iolist_to_binary([<<"[">>, format_args(A), <<"]">>]);
+format_arg(_) -> <<"None">>.
+
+%% @private Format kwargs for Python code
+format_kwargs(Kwargs) when map_size(Kwargs) == 0 -> <<>>;
+format_kwargs(Kwargs) ->
+    KwList = maps:fold(fun(K, V, Acc) ->
+        KB = if is_atom(K) -> atom_to_binary(K); is_binary(K) -> K end,
+        [<<KB/binary, "=", (format_arg(V))/binary>> | Acc]
+    end, [], Kwargs),
+    iolist_to_binary([<<", ">>, lists:join(<<", ">>, KwList)]).
 
 %% @doc Stream results from a Python generator expression.
 %% Evaluates the expression and if it returns a generator, streams all values.
@@ -281,9 +327,12 @@ stream_eval(Code) ->
 %% @doc Stream results from a Python generator expression with local variables.
 -spec stream_eval(string() | binary(), map()) -> py_result().
 stream_eval(Code, Locals) ->
-    Ref = make_ref(),
-    py_pool:request({stream_eval, Ref, self(), Code, Locals}),
-    stream_collect(Ref, []).
+    %% Route through the new process-per-context system
+    %% Wrap the code in list() to collect generator values
+    Ctx = py_context_router:get_context(),
+    CodeBin = ensure_binary(Code),
+    WrappedCode = <<"list(", CodeBin/binary, ")">>,
+    py_context:eval(Ctx, WrappedCode, Locals).
 
 %%% ============================================================================
 %%% Info
@@ -475,11 +524,41 @@ subinterp_supported() ->
 %% On older Python versions, returns {error, subinterpreters_not_supported}.
 -spec parallel([{py_module(), py_func(), py_args()}]) -> py_result().
 parallel(Calls) when is_list(Calls) ->
-    case py_nif:subinterp_supported() of
+    %% Distribute calls across available contexts for true parallel execution
+    NumContexts = py_context_router:num_contexts(),
+    Parent = self(),
+    Ref = make_ref(),
+
+    %% Spawn processes to execute calls in parallel
+    CallsWithIdx = lists:zip(lists:seq(1, length(Calls)), Calls),
+    _ = [spawn(fun() ->
+        %% Distribute calls round-robin across contexts
+        CtxIdx = ((Idx - 1) rem NumContexts) + 1,
+        Ctx = py_context_router:get_context(CtxIdx),
+        Result = py_context:call(Ctx, M, F, A, #{}),
+        Parent ! {Ref, Idx, Result}
+    end) || {Idx, {M, F, A}} <- CallsWithIdx],
+
+    %% Collect results in order
+    Results = [receive
+        {Ref, Idx, Result} -> {Idx, Result}
+    after ?DEFAULT_TIMEOUT ->
+        {Idx, {error, timeout}}
+    end || {Idx, _} <- CallsWithIdx],
+
+    %% Sort by index and extract results
+    SortedResults = [R || {_, R} <- lists:keysort(1, Results)],
+
+    %% Check if all succeeded
+    case lists:all(fun({ok, _}) -> true; (_) -> false end, SortedResults) of
         true ->
-            py_subinterp_pool:parallel(Calls);
+            {ok, [V || {ok, V} <- SortedResults]};
         false ->
-            {error, subinterpreters_not_supported}
+            %% Return first error or all results
+            case lists:keyfind(error, 1, SortedResults) of
+                {error, _} = Err -> Err;
+                false -> {ok, SortedResults}
+            end
     end.
 
 %%% ============================================================================
@@ -620,12 +699,12 @@ state_decr(Key, Amount) ->
 %%% Module Reload
 %%% ============================================================================
 
-%% @doc Reload a Python module across all workers.
+%% @doc Reload a Python module across all contexts.
 %% This uses importlib.reload() to refresh the module from disk.
 %% Useful during development when Python code changes.
 %%
 %% Note: This only affects already-imported modules. If the module
-%% hasn't been imported in a worker yet, the reload is a no-op for that worker.
+%% hasn't been imported in a context yet, the reload is a no-op for that context.
 %%
 %% Example:
 %% ```
@@ -633,9 +712,9 @@ state_decr(Key, Amount) ->
 %% ok = py:reload(mymodule).
 %% '''
 %%
-%% Returns ok if reload succeeded in all workers, or {error, Reasons}
-%% if any workers failed.
--spec reload(py_module()) -> ok | {error, [{worker, term()}]}.
+%% Returns ok if reload succeeded in all contexts, or {error, Reasons}
+%% if any contexts failed.
+-spec reload(py_module()) -> ok | {error, [{context, term()}]}.
 reload(Module) ->
     ModuleBin = ensure_binary(Module),
     %% Build Python code that:
@@ -645,9 +724,12 @@ reload(Module) ->
     Code = <<"__import__('importlib').reload(__import__('sys').modules['",
              ModuleBin/binary,
              "']) if '", ModuleBin/binary, "' in __import__('sys').modules else None">>,
-    %% Broadcast to all workers
-    Request = {eval, undefined, undefined, Code, #{}},
-    Results = py_pool:broadcast(Request),
+    %% Broadcast to all contexts
+    NumContexts = py_context_router:num_contexts(),
+    Results = [begin
+        Ctx = py_context_router:get_context(N),
+        py_context:eval(Ctx, Code, #{})
+    end || N <- lists:seq(1, NumContexts)],
     %% Check if any failed
     Errors = lists:filtermap(fun
         ({ok, _}) -> false;
@@ -655,176 +737,7 @@ reload(Module) ->
     end, Results),
     case Errors of
         [] -> ok;
-        _ -> {error, [{worker, E} || E <- Errors]}
-    end.
-
-%%% ============================================================================
-%%% Context Affinity API
-%%% ============================================================================
-
-%% @doc Bind current process to a dedicated Python worker.
-%% All subsequent py:call/eval/exec operations from this process will use
-%% the same worker, preserving Python state (variables, imports) across calls.
-%%
-%% Example:
-%% ```
-%% ok = py:bind(),
-%% ok = py:exec(<<"x = 42">>),
-%% {ok, 42} = py:eval(<<"x">>),  % Same worker, x persists
-%% ok = py:unbind().
-%% '''
--spec bind() -> ok | {error, term()}.
-bind() ->
-    Key = {process, self()},
-    case py_pool:lookup_binding(Key) of
-        {ok, _} -> ok;  % Already bound
-        not_found ->
-            case py_pool:checkout(Key) of
-                {ok, _} -> ok;
-                Error -> Error
-            end
-    end.
-
-%% @doc Create an explicit context with a dedicated worker.
-%% Returns a context handle that can be passed to call/eval/exec variants.
-%% Multiple contexts can exist per process.
-%%
-%% Example:
-%% ```
-%% {ok, Ctx1} = py:bind(new),
-%% {ok, Ctx2} = py:bind(new),
-%% ok = py:exec(Ctx1, <<"x = 1">>),
-%% ok = py:exec(Ctx2, <<"x = 2">>),
-%% {ok, 1} = py:eval(Ctx1, <<"x">>),  % Isolated
-%% {ok, 2} = py:eval(Ctx2, <<"x">>),  % Isolated
-%% ok = py:unbind(Ctx1),
-%% ok = py:unbind(Ctx2).
-%% '''
--spec bind(new) -> {ok, py_ctx()} | {error, term()}.
-bind(new) ->
-    Ref = make_ref(),
-    Key = {context, Ref},
-    case py_pool:checkout(Key) of
-        {ok, _} -> {ok, #py_ctx{ref = Ref}};
-        Error -> Error
-    end.
-
-%% @doc Release bound worker for current process.
--spec unbind() -> ok.
-unbind() ->
-    py_pool:checkin({process, self()}).
-
-%% @doc Release explicit context's worker.
--spec unbind(py_ctx()) -> ok.
-unbind(#py_ctx{ref = Ref}) ->
-    py_pool:checkin({context, Ref}).
-
-%% @doc Check if current process is bound.
--spec is_bound() -> boolean().
-is_bound() ->
-    case py_pool:lookup_binding({process, self()}) of
-        {ok, _} -> true;
-        not_found -> false
-    end.
-
-%% @doc Execute function with temporary bound context.
-%% Automatically binds before and unbinds after (even on exception).
-%%
-%% With arity-0 function (uses implicit process binding):
-%% ```
-%% Result = py:with_context(fun() ->
-%%     ok = py:exec(<<"total = 0">>),
-%%     ok = py:exec(<<"total += 1">>),
-%%     py:eval(<<"total">>)
-%% end).
-%% %% {ok, 1}
-%% '''
-%%
-%% With arity-1 function (receives explicit context):
-%% ```
-%% Result = py:with_context(fun(Ctx) ->
-%%     ok = py:exec(Ctx, <<"x = 10">>),
-%%     py:eval(Ctx, <<"x * 2">>)
-%% end).
-%% %% {ok, 20}
-%% '''
--spec with_context(fun(() -> Result) | fun((py_ctx()) -> Result)) -> Result.
-with_context(Fun) when is_function(Fun, 0) ->
-    ok = bind(),
-    try Fun()
-    after unbind()
-    end;
-with_context(Fun) when is_function(Fun, 1) ->
-    {ok, Ctx} = bind(new),
-    try Fun(Ctx)
-    after unbind(Ctx)
-    end.
-
-%% @doc Call with explicit context.
--spec ctx_call(py_ctx(), py_module(), py_func(), py_args()) -> py_result().
-ctx_call(Ctx, Module, Func, Args) ->
-    ctx_call(Ctx, Module, Func, Args, #{}).
-
-%% @doc Call with explicit context and kwargs.
--spec ctx_call(py_ctx(), py_module(), py_func(), py_args(), py_kwargs()) -> py_result().
-ctx_call(Ctx, Module, Func, Args, Kwargs) ->
-    ctx_call(Ctx, Module, Func, Args, Kwargs, ?DEFAULT_TIMEOUT).
-
-%% @doc Call with explicit context, kwargs, and timeout.
--spec ctx_call(py_ctx(), py_module(), py_func(), py_args(), py_kwargs(), timeout()) -> py_result().
-ctx_call(#py_ctx{ref = CtxRef}, Module, Func, Args, Kwargs, Timeout) ->
-    case py_semaphore:acquire(Timeout) of
-        ok ->
-            try
-                Ref = make_ref(),
-                TimeoutMs = py_util:normalize_timeout(Timeout, ?DEFAULT_TIMEOUT),
-                Request = {call, Ref, self(), Module, Func, Args, Kwargs, TimeoutMs},
-                case py_pool:lookup_binding({context, CtxRef}) of
-                    {ok, Worker} -> py_pool:direct_request(Worker, Request);
-                    not_found -> error(context_not_bound)
-                end,
-                await(Ref, Timeout)
-            after
-                py_semaphore:release()
-            end;
-        {error, max_concurrent} ->
-            {error, {overloaded, py_semaphore:current(), py_semaphore:max_concurrent()}}
-    end.
-
-%% @doc Eval with explicit context.
--spec ctx_eval(py_ctx(), string() | binary()) -> py_result().
-ctx_eval(Ctx, Code) ->
-    ctx_eval(Ctx, Code, #{}).
-
-%% @doc Eval with explicit context and locals.
--spec ctx_eval(py_ctx(), string() | binary(), map()) -> py_result().
-ctx_eval(Ctx, Code, Locals) ->
-    ctx_eval(Ctx, Code, Locals, ?DEFAULT_TIMEOUT).
-
-%% @doc Eval with explicit context, locals, and timeout.
--spec ctx_eval(py_ctx(), string() | binary(), map(), timeout()) -> py_result().
-ctx_eval(#py_ctx{ref = CtxRef}, Code, Locals, Timeout) ->
-    Ref = make_ref(),
-    TimeoutMs = py_util:normalize_timeout(Timeout, ?DEFAULT_TIMEOUT),
-    Request = {eval, Ref, self(), Code, Locals, TimeoutMs},
-    case py_pool:lookup_binding({context, CtxRef}) of
-        {ok, Worker} -> py_pool:direct_request(Worker, Request);
-        not_found -> error(context_not_bound)
-    end,
-    await(Ref, Timeout).
-
-%% @doc Exec with explicit context.
--spec ctx_exec(py_ctx(), string() | binary()) -> ok | {error, term()}.
-ctx_exec(#py_ctx{ref = CtxRef}, Code) ->
-    Ref = make_ref(),
-    Request = {exec, Ref, self(), Code},
-    case py_pool:lookup_binding({context, CtxRef}) of
-        {ok, Worker} -> py_pool:direct_request(Worker, Request);
-        not_found -> error(context_not_bound)
-    end,
-    case await(Ref, ?DEFAULT_TIMEOUT) of
-        {ok, _} -> ok;
-        Error -> Error
+        _ -> {error, [{context, E} || E <- Errors]}
     end.
 
 %%% ============================================================================
@@ -907,3 +820,134 @@ get_traces() ->
 -spec clear_traces() -> ok.
 clear_traces() ->
     py_tracer:clear().
+
+%%% ============================================================================
+%%% Process-per-context API
+%%%
+%%% This new architecture uses one Erlang process per Python context.
+%%% Each context owns its Python interpreter (subinterpreter on Python 3.12+
+%%% or worker on older versions). This eliminates mutex contention and
+%%% enables true N-way parallelism.
+%%%
+%%% Usage:
+%%% ```
+%%% %% Start the context system (usually done by the application)
+%%% {ok, _} = py:start_contexts(),
+%%%
+%%% %% Get context for current scheduler (automatic routing)
+%%% Ctx = py:context(),
+%%% {ok, Result} = py:call(Ctx, math, sqrt, [16]),
+%%%
+%%% %% Or bind a specific context to this process
+%%% ok = py:bind_context(py:context(1)),
+%%% {ok, Result} = py:call(py:context(), math, sqrt, [16]).
+%%% '''
+%%% ============================================================================
+
+%% @doc Start the process-per-context system with default settings.
+%%
+%% Creates one context per scheduler, using auto mode (subinterp on
+%% Python 3.12+, worker otherwise).
+%%
+%% @returns {ok, [Context]} | {error, Reason}
+-spec start_contexts() -> {ok, [pid()]} | {error, term()}.
+start_contexts() ->
+    py_context_router:start().
+
+%% @doc Start the process-per-context system with options.
+%%
+%% Options:
+%% - `contexts' - Number of contexts to create (default: number of schedulers)
+%% - `mode' - Context mode: `auto', `subinterp', or `worker' (default: `auto')
+%%
+%% @param Opts Start options
+%% @returns {ok, [Context]} | {error, Reason}
+-spec start_contexts(map()) -> {ok, [pid()]} | {error, term()}.
+start_contexts(Opts) ->
+    py_context_router:start(Opts).
+
+%% @doc Stop the process-per-context system.
+-spec stop_contexts() -> ok.
+stop_contexts() ->
+    py_context_router:stop().
+
+%% @doc Check if contexts have been started.
+-spec contexts_started() -> boolean().
+contexts_started() ->
+    py_context_router:is_started().
+
+%% @doc Get the context for the current process.
+%%
+%% If the process has a bound context (via bind_context/1), returns that.
+%% Otherwise, selects a context based on the current scheduler ID.
+%%
+%% This provides automatic load distribution across contexts while
+%% maintaining scheduler affinity for cache locality.
+%%
+%% @returns Context pid
+-spec context() -> pid().
+context() ->
+    py_context_router:get_context().
+
+%% @doc Get a specific context by index.
+%%
+%% @param N Context index (1 to num_contexts)
+%% @returns Context pid
+-spec context(pos_integer()) -> pid().
+context(N) ->
+    py_context_router:get_context(N).
+
+%%% ============================================================================
+%%% py_ref API (Python object references with auto-routing)
+%%%
+%%% These functions work with py_ref references that carry both a Python
+%%% object and the interpreter ID that created it. Method calls and
+%%% attribute access are automatically routed to the correct context.
+%%% ============================================================================
+
+%% @doc Call a method on a Python object reference.
+%%
+%% The reference carries the interpreter ID, so the call is automatically
+%% routed to the correct context.
+%%
+%% Example:
+%% ```
+%% {ok, Ref} = py:call(Ctx, builtins, list, [[1,2,3]], #{return => ref}),
+%% {ok, 3} = py:call_method(Ref, '__len__', []).
+%% '''
+%%
+%% @param Ref py_ref reference
+%% @param Method Method name
+%% @param Args Arguments list
+%% @returns {ok, Result} | {error, Reason}
+-spec call_method(reference(), atom() | binary(), list()) -> py_result().
+call_method(Ref, Method, Args) ->
+    MethodBin = ensure_binary(Method),
+    py_nif:ref_call_method(Ref, MethodBin, Args).
+
+%% @doc Get an attribute from a Python object reference.
+%%
+%% @param Ref py_ref reference
+%% @param Name Attribute name
+%% @returns {ok, Value} | {error, Reason}
+-spec getattr(reference(), atom() | binary()) -> py_result().
+getattr(Ref, Name) ->
+    NameBin = ensure_binary(Name),
+    py_nif:ref_getattr(Ref, NameBin).
+
+%% @doc Convert a Python object reference to an Erlang term.
+%%
+%% @param Ref py_ref reference
+%% @returns {ok, Term} | {error, Reason}
+-spec to_term(reference()) -> py_result().
+to_term(Ref) ->
+    py_nif:ref_to_term(Ref).
+
+%% @doc Check if a term is a py_ref reference.
+%%
+%% @param Term Term to check
+%% @returns true | false
+-spec is_ref(term()) -> boolean().
+is_ref(Term) ->
+    py_nif:is_ref(Term).
+
diff --git a/src/py_async_pool.erl b/src/py_async_pool.erl
index 46ef033..11bf5d7 100644
--- a/src/py_async_pool.erl
+++ b/src/py_async_pool.erl
@@ -12,16 +12,21 @@
 %% See the License for the specific language governing permissions and
 %% limitations under the License.
 
-%%% @doc Worker pool manager for async Python execution.
+%%% @doc Pool manager for async Python execution using event loops.
 %%%
-%%% Manages a pool of async workers that have background asyncio event loops.
-%%% Distributes async requests across workers using round-robin scheduling.
+%%% This module provides an async request pool that delegates to the event loop
+%%% pool for efficient coroutine execution. It replaces the pthread+usleep
+%%% polling model with event-driven execution using enif_select and erlang.send().
+%%%
+%%% The pool maintains API compatibility with the previous pthread-based
+%%% implementation while providing significant performance improvements.
 %%%
 %%% @private
 -module(py_async_pool).
 -behaviour(gen_server).
 
 -export([
+    start_link/0,
     start_link/1,
     request/1,
     get_stats/0
@@ -36,22 +41,24 @@
 ]).
 
 -record(state, {
-    workers :: queue:queue(pid()) | undefined,
-    num_workers :: non_neg_integer(),
     pending :: non_neg_integer(),
-    worker_sup :: pid() | undefined,
-    supported :: boolean()  %% whether async workers are supported
+    supported :: boolean()
 }).
 
 %%% ============================================================================
 %%% API
 %%% ============================================================================
 
+-spec start_link() -> {ok, pid()} | {error, term()}.
+start_link() ->
+    start_link(1).
+
 -spec start_link(pos_integer()) -> {ok, pid()} | {error, term()}.
-start_link(NumWorkers) ->
-    gen_server:start_link({local, ?MODULE}, ?MODULE, [NumWorkers], []).
+start_link(_NumWorkers) ->
+    %% NumWorkers is now ignored - we use the event loop pool instead
+    gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
 
-%% @doc Submit an async request to be executed by a worker.
+%% @doc Submit an async request to be executed by the event loop pool.
 -spec request(term()) -> ok.
 request(Request) ->
     gen_server:cast(?MODULE, {request, Request}).
@@ -65,44 +72,21 @@ get_stats() ->
 %%% gen_server callbacks
 %%% ============================================================================
 
-init([NumWorkers]) ->
+init([]) ->
     process_flag(trap_exit, true),
-
-    %% Start worker supervisor
-    {ok, WorkerSup} = py_async_worker_sup:start_link(),
-
-    %% Try to start workers - may fail on free-threaded Python
-    case start_workers(WorkerSup, NumWorkers) of
-        {ok, Workers} ->
-            {ok, #state{
-                workers = queue:from_list(Workers),
-                num_workers = NumWorkers,
-                pending = 0,
-                worker_sup = WorkerSup,
-                supported = true
-            }};
-        {error, _Reason} ->
-            %% Async workers not supported (e.g., free-threaded Python)
-            %% Pool starts but all requests will return an error
-            {ok, #state{
-                workers = undefined,
-                num_workers = 0,
-                pending = 0,
-                worker_sup = WorkerSup,
-                supported = false
-            }}
+    %% Check if event loop pool is available
+    case py_event_loop:get_loop() of
+        {ok, _LoopRef} ->
+            {ok, #state{pending = 0, supported = true}};
+        {error, _} ->
+            {ok, #state{pending = 0, supported = false}}
     end.
 
 handle_call(get_stats, _From, State) ->
-    AvailWorkers = case State#state.workers of
-        undefined -> 0;
-        Q -> queue:len(Q)
-    end,
     Stats = #{
-        num_workers => State#state.num_workers,
         pending_requests => State#state.pending,
-        available_workers => AvailWorkers,
-        supported => State#state.supported
+        supported => State#state.supported,
+        backend => event_loop
     },
     {reply, Stats, State};
 
@@ -110,80 +94,74 @@ handle_call(_Request, _From, State) ->
     {reply, {error, unknown_request}, State}.
 
 handle_cast({request, Request}, #state{supported = false} = State) ->
-    {Ref, Caller, _} = extract_ref_caller(Request),
+    {Ref, Caller, _Type} = extract_ref_caller(Request),
     Caller ! {py_error, Ref, async_not_supported},
     {noreply, State};
 
 handle_cast({request, Request}, State) ->
-    case queue:out(State#state.workers) of
-        {{value, Worker}, Rest} ->
-            %% Send request to worker
-            Worker ! {py_async_request, Request},
-            %% Put worker at end of queue (round-robin)
-            NewWorkers = queue:in(Worker, Rest),
-            {noreply, State#state{
-                workers = NewWorkers,
-                pending = State#state.pending + 1
-            }};
-        {empty, _} ->
-            error_logger:warning_msg("py_async_pool: no workers available~n"),
+    case transform_request(Request) of
+        {ok, LoopRequest} ->
+            case py_event_loop:get_loop() of
+                {ok, LoopRef} ->
+                    case py_event_loop:run_async(LoopRef, LoopRequest) of
+                        ok ->
+                            {noreply, State#state{pending = State#state.pending + 1}};
+                        {error, Reason} ->
+                            {Ref, Caller, _} = extract_ref_caller(Request),
+                            Caller ! {py_error, Ref, Reason},
+                            {noreply, State}
+                    end;
+                {error, Reason} ->
+                    {Ref, Caller, _} = extract_ref_caller(Request),
+                    Caller ! {py_error, Ref, Reason},
+                    {noreply, State}
+            end;
+        {error, Reason} ->
             {Ref, Caller, _} = extract_ref_caller(Request),
-            Caller ! {py_error, Ref, no_workers_available},
+            Caller ! {py_error, Ref, Reason},
             {noreply, State}
     end;
 
 handle_cast(_Msg, State) ->
     {noreply, State}.
 
-handle_info({worker_done, _WorkerPid}, State) ->
+handle_info({async_result, _Ref, _Result}, State) ->
+    %% Result was sent directly to caller via erlang.send()
+    %% We just track pending count
     {noreply, State#state{pending = max(0, State#state.pending - 1)}};
 
-handle_info({'EXIT', _Pid, _Reason}, #state{supported = false} = State) ->
-    {noreply, State};
-
-handle_info({'EXIT', Pid, Reason}, State) ->
-    error_logger:error_msg("py_async_pool: worker ~p died: ~p~n", [Pid, Reason]),
-    %% Remove dead worker from queue and start a new one
-    Workers = queue:filter(fun(W) -> W =/= Pid end, State#state.workers),
-    case py_async_worker_sup:start_worker(State#state.worker_sup) of
-        {ok, NewWorker} ->
-            NewWorkers = queue:in(NewWorker, Workers),
-            {noreply, State#state{workers = NewWorkers}};
-        {error, _} ->
-            %% Can't restart worker, continue with remaining workers
-            {noreply, State#state{workers = Workers}}
-    end;
-
 handle_info(_Info, State) ->
     {noreply, State}.
 
-terminate(_Reason, #state{workers = undefined}) ->
-    ok;
-terminate(_Reason, State) ->
-    %% Shutdown all workers
-    Workers = queue:to_list(State#state.workers),
-    lists:foreach(fun(W) -> W ! shutdown end, Workers),
+terminate(_Reason, _State) ->
     ok.
 
 %%% ============================================================================
 %%% Internal functions
 %%% ============================================================================
 
-start_workers(Sup, N) ->
-    start_workers(Sup, N, []).
-
-start_workers(_Sup, 0, Acc) ->
-    {ok, lists:reverse(Acc)};
-start_workers(Sup, N, Acc) ->
-    case py_async_worker_sup:start_worker(Sup) of
-        {ok, Pid} ->
-            start_workers(Sup, N - 1, [Pid | Acc]);
-        {error, Reason} ->
-            %% Failed to start worker, shutdown any already started
-            lists:foreach(fun(W) -> W ! shutdown end, Acc),
-            {error, Reason}
-    end.
-
+%% @doc Transform the legacy request format to the new event loop format.
+transform_request({async_call, Ref, Caller, Module, Func, Args, Kwargs}) ->
+    {ok, #{
+        ref => Ref,
+        caller => Caller,
+        module => Module,
+        func => Func,
+        args => Args,
+        kwargs => Kwargs
+    }};
+transform_request({async_gather, Ref, Caller, Calls}) ->
+    %% For gather, we need to wrap in a special gather coroutine
+    %% For now, return an error - gather needs special handling
+    {error, {gather_not_implemented, Ref, Caller, Calls}};
+transform_request({async_stream, Ref, Caller, Module, Func, Args, Kwargs}) ->
+    %% For stream, we need async generator support
+    %% For now, return an error - stream needs special handling
+    {error, {stream_not_implemented, Ref, Caller, Module, Func, Args, Kwargs}};
+transform_request(Other) ->
+    {error, {unknown_request_type, Other}}.
+
+%% @doc Extract ref and caller from different request types.
 extract_ref_caller({async_call, Ref, Caller, _, _, _, _}) -> {Ref, Caller, async_call};
 extract_ref_caller({async_gather, Ref, Caller, _}) -> {Ref, Caller, async_gather};
 extract_ref_caller({async_stream, Ref, Caller, _, _, _, _}) -> {Ref, Caller, async_stream}.
diff --git a/src/py_async_worker.erl b/src/py_async_worker.erl
deleted file mode 100644
index 41cb05f..0000000
--- a/src/py_async_worker.erl
+++ /dev/null
@@ -1,138 +0,0 @@
-%% Copyright 2026 Benoit Chesneau
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-
-%%% @doc Async Python worker process with background event loop.
-%%%
-%%% Each async worker maintains a background thread running an asyncio
-%%% event loop. Coroutines are submitted to this loop and results are
-%%% delivered as Erlang messages.
-%%%
-%%% @private
--module(py_async_worker).
-
--export([
-    start_link/0,
-    init/1
-]).
-
-%%% ============================================================================
-%%% API
-%%% ============================================================================
-
--spec start_link() -> {ok, pid()}.
-start_link() ->
-    Pid = spawn_link(?MODULE, init, [self()]),
-    receive
-        {Pid, ready} -> {ok, Pid};
-        {Pid, {error, Reason}} -> {error, Reason}
-    after 10000 ->
-        exit(Pid, kill),
-        {error, timeout}
-    end.
-
-%%% ============================================================================
-%%% Worker Process
-%%% ============================================================================
-
-init(Parent) ->
-    %% Create async worker context with event loop
-    case py_nif:async_worker_new() of
-        {ok, WorkerRef} ->
-            Parent ! {self(), ready},
-            loop(WorkerRef, Parent, #{});
-        {error, Reason} ->
-            Parent ! {self(), {error, Reason}}
-    end.
-
-loop(WorkerRef, Parent, Pending) ->
-    receive
-        {py_async_request, Request} ->
-            NewPending = handle_request(WorkerRef, Request, Pending),
-            loop(WorkerRef, Parent, NewPending);
-
-        {async_result, AsyncId, Result} ->
-            %% Forward result to caller if we have them registered
-            case maps:get(AsyncId, Pending, undefined) of
-                undefined ->
-                    loop(WorkerRef, Parent, Pending);
-                {Ref, Caller} ->
-                    send_response(Caller, Ref, Result),
-                    loop(WorkerRef, Parent, maps:remove(AsyncId, Pending))
-            end;
-
-        shutdown ->
-            py_nif:async_worker_destroy(WorkerRef),
-            ok;
-
-        _Other ->
-            loop(WorkerRef, Parent, Pending)
-    end.
-
-%%% ============================================================================
-%%% Request Handling
-%%% ============================================================================
-
-%% Async call
-handle_request(WorkerRef, {async_call, Ref, Caller, Module, Func, Args, Kwargs}, Pending) ->
-    ModuleBin = to_binary(Module),
-    FuncBin = to_binary(Func),
-    case py_nif:async_call(WorkerRef, ModuleBin, FuncBin, Args, Kwargs, self()) of
-        {ok, {immediate, Result}} ->
-            %% Not a coroutine - result is available immediately
-            send_response(Caller, Ref, {ok, Result}),
-            Pending;
-        {ok, AsyncId} ->
-            %% Coroutine submitted - register for callback
-            maps:put(AsyncId, {Ref, Caller}, Pending);
-        {error, _} = Error ->
-            Caller ! {py_error, Ref, Error},
-            Pending
-    end;
-
-%% Async gather
-handle_request(WorkerRef, {async_gather, Ref, Caller, Calls}, Pending) ->
-    %% Convert calls to binary format
-    BinCalls = [{to_binary(M), to_binary(F), A} || {M, F, A} <- Calls],
-    case py_nif:async_gather(WorkerRef, BinCalls, self()) of
-        {ok, {immediate, Results}} ->
-            send_response(Caller, Ref, {ok, Results}),
-            Pending;
-        {ok, AsyncId} ->
-            maps:put(AsyncId, {Ref, Caller}, Pending);
-        {error, _} = Error ->
-            Caller ! {py_error, Ref, Error},
-            Pending
-    end;
-
-%% Async stream
-handle_request(WorkerRef, {async_stream, Ref, Caller, Module, Func, Args, Kwargs}, Pending) ->
-    ModuleBin = to_binary(Module),
-    FuncBin = to_binary(Func),
-    case py_nif:async_stream(WorkerRef, ModuleBin, FuncBin, Args, Kwargs, self()) of
-        {ok, AsyncId} ->
-            maps:put(AsyncId, {Ref, Caller}, Pending);
-        {error, _} = Error ->
-            Caller ! {py_error, Ref, Error},
-            Pending
-    end.
-
-%%% ============================================================================
-%%% Internal Functions
-%%% ============================================================================
-
-send_response(Caller, Ref, Result) ->
-    py_util:send_response(Caller, Ref, Result).
-
-to_binary(Term) ->
-    py_util:to_binary(Term).
diff --git a/src/py_async_worker_sup.erl b/src/py_async_worker_sup.erl
deleted file mode 100644
index cae6b1c..0000000
--- a/src/py_async_worker_sup.erl
+++ /dev/null
@@ -1,49 +0,0 @@
-%% Copyright 2026 Benoit Chesneau
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-
-%%% @doc Simple supervisor for async Python workers.
-%%% @private
--module(py_async_worker_sup).
--behaviour(supervisor).
-
--export([
-    start_link/0,
-    start_worker/1
-]).
-
--export([init/1]).
-
-start_link() ->
-    supervisor:start_link(?MODULE, []).
-
-start_worker(Sup) ->
-    case supervisor:start_child(Sup, []) of
-        {ok, Pid} -> {ok, Pid};
-        {error, Reason} -> {error, Reason}
-    end.
-
-init([]) ->
-    WorkerSpec = #{
-        id => py_async_worker,
-        start => {py_async_worker, start_link, []},
-        restart => temporary,
-        shutdown => 5000,
-        type => worker,
-        modules => [py_async_worker]
-    },
-
-    {ok, {
-        #{strategy => simple_one_for_one, intensity => 10, period => 60},
-        [WorkerSpec]
-    }}.
diff --git a/src/py_context.erl b/src/py_context.erl
new file mode 100644
index 0000000..04aa8d3
--- /dev/null
+++ b/src/py_context.erl
@@ -0,0 +1,576 @@
+%% Copyright 2026 Benoit Chesneau
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%%% @doc Python context process.
+%%%
+%%% A py_context process owns a Python context (subinterpreter or worker).
+%%% Each process has exclusive access to its context, eliminating mutex
+%%% contention and enabling true N-way parallelism.
+%%%
+%%% The context is created when the process starts and destroyed when it
+%%% stops. All Python operations are serialized through message passing.
+%%%
+%%% == Callback Handling ==
+%%%
+%%% When Python code calls `erlang.call()`, the NIF returns a `{suspended, ...}`
+%%% tuple instead of blocking. The context process handles the callback inline
+%%% using a recursive receive pattern, enabling arbitrarily deep callback nesting.
+%%%
+%%% This approach is inspired by PyO3's suspension mechanism and avoids the
+%%% deadlock issues that occur with separate callback handler processes.
+%%%
+%%% @end
+-module(py_context).
+
+-export([
+    start_link/2,
+    stop/1,
+    call/5,
+    call/6,
+    eval/3,
+    eval/4,
+    exec/2,
+    call_method/4,
+    to_term/1,
+    get_interp_id/1
+]).
+
+%% Internal exports
+-export([init/3]).
+
+-type context_mode() :: auto | subinterp | worker.
+-type context() :: pid().
+
+-export_type([context_mode/0, context/0]).
+
+-record(state, {
+    ref :: reference(),
+    id :: pos_integer(),
+    interp_id :: non_neg_integer(),
+    event_state = #{} :: map()  %% #{loop_ref => ref(), worker_pid => pid()}
+}).
+
+%% ============================================================================
+%% API
+%% ============================================================================
+
+%% @doc Start a new py_context process.
+%%
+%% The process creates a Python context based on the mode:
+%% - `auto' - Detect best mode (subinterp on Python 3.12+, worker otherwise)
+%% - `subinterp' - Create a sub-interpreter with its own GIL
+%% - `worker' - Create a thread-state worker
+%%
+%% @param Id Unique identifier for this context
+%% @param Mode Context mode
+%% @returns {ok, Pid} | {error, Reason}
+-spec start_link(pos_integer(), context_mode()) -> {ok, pid()} | {error, term()}.
+start_link(Id, Mode) ->
+    Parent = self(),
+    Pid = spawn_link(fun() -> init(Parent, Id, Mode) end),
+    receive
+        {Pid, started} ->
+            {ok, Pid};
+        {Pid, {error, Reason}} ->
+            {error, Reason}
+    after 5000 ->
+        exit(Pid, kill),
+        {error, timeout}
+    end.
+
+%% @doc Stop a py_context process.
+-spec stop(context()) -> ok.
+stop(Ctx) when is_pid(Ctx) ->
+    MRef = erlang:monitor(process, Ctx),
+    Ctx ! {stop, self(), MRef},
+    receive
+        {MRef, ok} ->
+            erlang:demonitor(MRef, [flush]),
+            ok;
+        {'DOWN', MRef, process, Ctx, _Reason} ->
+            ok
+    after 5000 ->
+        erlang:demonitor(MRef, [flush]),
+        exit(Ctx, kill),
+        ok
+    end.
+
+%% @doc Call a Python function.
+%%
+%% @param Ctx Context process
+%% @param Module Python module name
+%% @param Func Function name
+%% @param Args List of arguments
+%% @param Kwargs Map of keyword arguments
+%% @returns {ok, Result} | {error, Reason}
+-spec call(context(), atom() | binary(), atom() | binary(), list(), map()) ->
+    {ok, term()} | {error, term()}.
+call(Ctx, Module, Func, Args, Kwargs) ->
+    call(Ctx, Module, Func, Args, Kwargs, infinity).
+
+%% @doc Call a Python function with timeout.
+-spec call(context(), atom() | binary(), atom() | binary(), list(), map(),
+           timeout()) -> {ok, term()} | {error, term()}.
+call(Ctx, Module, Func, Args, Kwargs, Timeout) when is_pid(Ctx) ->
+    MRef = erlang:monitor(process, Ctx),
+    ModuleBin = to_binary(Module),
+    FuncBin = to_binary(Func),
+    Ctx ! {call, self(), MRef, ModuleBin, FuncBin, Args, Kwargs},
+    receive
+        {MRef, Result} ->
+            erlang:demonitor(MRef, [flush]),
+            Result;
+        {'DOWN', MRef, process, Ctx, Reason} ->
+            {error, {context_died, Reason}}
+    after Timeout ->
+        erlang:demonitor(MRef, [flush]),
+        {error, timeout}
+    end.
+
+%% @doc Evaluate a Python expression.
+%%
+%% @param Ctx Context process
+%% @param Code Python code to evaluate
+%% @param Locals Map of local variables
+%% @returns {ok, Result} | {error, Reason}
+-spec eval(context(), binary() | string(), map()) ->
+    {ok, term()} | {error, term()}.
+eval(Ctx, Code, Locals) ->
+    eval(Ctx, Code, Locals, infinity).
+
+%% @doc Evaluate a Python expression with timeout.
+-spec eval(context(), binary() | string(), map(), timeout()) ->
+    {ok, term()} | {error, term()}.
+eval(Ctx, Code, Locals, Timeout) when is_pid(Ctx) ->
+    MRef = erlang:monitor(process, Ctx),
+    CodeBin = to_binary(Code),
+    Ctx ! {eval, self(), MRef, CodeBin, Locals},
+    receive
+        {MRef, Result} ->
+            erlang:demonitor(MRef, [flush]),
+            Result;
+        {'DOWN', MRef, process, Ctx, Reason} ->
+            {error, {context_died, Reason}}
+    after Timeout ->
+        erlang:demonitor(MRef, [flush]),
+        {error, timeout}
+    end.
+
+%% @doc Execute Python statements.
+%%
+%% @param Ctx Context process
+%% @param Code Python code to execute
+%% @returns ok | {error, Reason}
+-spec exec(context(), binary() | string()) -> ok | {error, term()}.
+exec(Ctx, Code) when is_pid(Ctx) ->
+    MRef = erlang:monitor(process, Ctx),
+    CodeBin = to_binary(Code),
+    Ctx ! {exec, self(), MRef, CodeBin},
+    receive
+        {MRef, Result} ->
+            erlang:demonitor(MRef, [flush]),
+            Result;
+        {'DOWN', MRef, process, Ctx, Reason} ->
+            {error, {context_died, Reason}}
+    after infinity ->
+        erlang:demonitor(MRef, [flush]),
+        {error, timeout}
+    end.
+
+%% @doc Call a method on a Python object reference.
+-spec call_method(context(), reference(), atom() | binary(), list()) ->
+    {ok, term()} | {error, term()}.
+call_method(Ctx, Ref, Method, Args) when is_pid(Ctx), is_reference(Ref) ->
+    MRef = erlang:monitor(process, Ctx),
+    MethodBin = to_binary(Method),
+    Ctx ! {call_method, self(), MRef, Ref, MethodBin, Args},
+    receive
+        {MRef, Result} ->
+            erlang:demonitor(MRef, [flush]),
+            Result;
+        {'DOWN', MRef, process, Ctx, Reason} ->
+            {error, {context_died, Reason}}
+    end.
+
+%% @doc Convert a Python object reference to an Erlang term.
+-spec to_term(reference()) -> {ok, term()} | {error, term()}.
+to_term(Ref) when is_reference(Ref) ->
+    %% This uses the ref's embedded interp_id to route automatically
+    py_nif:context_to_term(Ref).
+
+%% @doc Get the interpreter ID for this context.
+-spec get_interp_id(context()) -> {ok, non_neg_integer()} | {error, term()}.
+get_interp_id(Ctx) when is_pid(Ctx) ->
+    MRef = erlang:monitor(process, Ctx),
+    Ctx ! {get_interp_id, self(), MRef},
+    receive
+        {MRef, Result} ->
+            erlang:demonitor(MRef, [flush]),
+            Result;
+        {'DOWN', MRef, process, Ctx, Reason} ->
+            {error, {context_died, Reason}}
+    end.
+
+%% ============================================================================
+%% Internal functions
+%% ============================================================================
+
+%% @private
+init(Parent, Id, Mode) ->
+    process_flag(trap_exit, true),
+    case create_context(Mode) of
+        {ok, Ref, InterpId} ->
+            %% For subinterpreters, create a dedicated event worker
+            EventState = setup_event_worker(Ref, InterpId),
+            Parent ! {self(), started},
+            State = #state{
+                ref = Ref,
+                id = Id,
+                interp_id = InterpId,
+                event_state = EventState
+            },
+            loop(State);
+        {error, Reason} ->
+            Parent ! {self(), {error, Reason}}
+    end.
+
+%% @private Create event worker for subinterpreter contexts
+setup_event_worker(Ref, InterpId) ->
+    case py_nif:context_get_event_loop(Ref) of
+        {ok, LoopRef} ->
+            %% This is a subinterpreter - create dedicated event worker
+            WorkerId = iolist_to_binary(["ctx_", integer_to_list(InterpId)]),
+            case py_event_worker:start_link(WorkerId, LoopRef) of
+                {ok, WorkerPid} ->
+                    ok = py_nif:event_loop_set_worker(LoopRef, WorkerPid),
+                    %% Extend erlang module with event loop functions
+                    extend_erlang_module_in_context(Ref),
+                    #{loop_ref => LoopRef, worker_pid => WorkerPid};
+                {error, WorkerError} ->
+                    error_logger:warning_msg(
+                        "py_context ~p: Failed to start event worker: ~p~n",
+                        [InterpId, WorkerError]),
+                    #{}
+            end;
+        {error, not_subinterp} ->
+            %% Worker mode - uses shared router (lazy initialization)
+            #{};
+        {error, Reason} ->
+            error_logger:warning_msg(
+                "py_context ~p: Failed to get event loop: ~p~n",
+                [InterpId, Reason]),
+            #{}
+    end.
+
+%% @private Extend the erlang module with event loop functions in a subinterpreter
+extend_erlang_module_in_context(Ref) ->
+    PrivDir = code:priv_dir(erlang_python),
+    Code = iolist_to_binary([
+        "import sys\n",
+        "priv_dir = '", PrivDir, "'\n",
+        "if priv_dir not in sys.path:\n",
+        "    sys.path.insert(0, priv_dir)\n",
+        "import erlang\n",
+        "if hasattr(erlang, '_extend_erlang_module'):\n",
+        "    erlang._extend_erlang_module(priv_dir)\n"
+    ]),
+    case py_nif:context_exec(Ref, Code) of
+        ok -> ok;
+        {error, Reason} ->
+            error_logger:warning_msg(
+                "py_context: Failed to extend erlang module: ~p~n", [Reason]),
+            ok
+    end.
+
+%% @private
+create_context(auto) ->
+    case py_nif:subinterp_supported() of
+        true -> create_context(subinterp);
+        false -> create_context(worker)
+    end;
+create_context(subinterp) ->
+    py_nif:context_create(subinterp);
+create_context(worker) ->
+    py_nif:context_create(worker).
+
+%% @private
+%% Main context loop. Handles requests and uses suspension-based callback support.
+loop(#state{ref = Ref, interp_id = InterpId} = State) ->
+    receive
+        {call, From, MRef, Module, Func, Args, Kwargs} ->
+            Result = handle_call_with_suspension(Ref, Module, Func, Args, Kwargs),
+            From ! {MRef, Result},
+            loop(State);
+
+        {eval, From, MRef, Code, Locals} ->
+            Result = handle_eval_with_suspension(Ref, Code, Locals),
+            From ! {MRef, Result},
+            loop(State);
+
+        {exec, From, MRef, Code} ->
+            Result = py_nif:context_exec(Ref, Code),
+            From ! {MRef, Result},
+            loop(State);
+
+        {call_method, From, MRef, ObjRef, Method, Args} ->
+            Result = py_nif:context_call_method(Ref, ObjRef, Method, Args),
+            From ! {MRef, Result},
+            loop(State);
+
+        {get_interp_id, From, MRef} ->
+            From ! {MRef, {ok, InterpId}},
+            loop(State);
+
+        {stop, From, MRef} ->
+            terminate(normal, State),
+            From ! {MRef, ok};
+
+        {'EXIT', Pid, Reason} ->
+            %% Handle EXIT from linked processes
+            case State#state.event_state of
+                #{worker_pid := Pid} ->
+                    %% Event worker died - log and continue (degraded asyncio support)
+                    error_logger:warning_msg(
+                        "py_context ~p: Event worker died: ~p~n",
+                        [InterpId, Reason]),
+                    NewState = State#state{event_state = #{}},
+                    loop(NewState);
+                _ when Reason =:= shutdown; Reason =:= kill ->
+                    %% Supervisor shutdown or kill signal - clean exit
+                    terminate(Reason, State);
+                _ when is_tuple(Reason), element(1, Reason) =:= shutdown ->
+                    %% Supervisor shutdown with extra info: {shutdown, _}
+                    terminate(Reason, State);
+                _ ->
+                    %% Ignore EXIT from other processes (e.g. callback handlers)
+                    %% These are normal operations in the callback mechanism
+                    loop(State)
+            end
+    end.
+
+%% @private Clean up resources on termination
+terminate(_Reason, #state{ref = Ref, event_state = EventState}) ->
+    %% Stop the event worker first (if it exists and is still alive)
+    case EventState of
+        #{worker_pid := WorkerPid} ->
+            catch gen_server:stop(WorkerPid, normal, 5000);
+        _ ->
+            ok
+    end,
+    %% Destroy the Python context
+    catch py_nif:context_destroy(Ref),
+    ok.
+
+%% ============================================================================
+%% Suspension-based callback handling
+%% ============================================================================
+%%
+%% When Python calls erlang.call(), the NIF returns {suspended, ...} instead of
+%% blocking. We handle the callback inline and then resume Python execution.
+%% This enables unlimited nesting depth without deadlock.
+
+%% @private
+%% Handle call with potential suspension for callbacks
+handle_call_with_suspension(Ref, Module, Func, Args, Kwargs) ->
+    case py_nif:context_call(Ref, Module, Func, Args, Kwargs) of
+        {suspended, _CallbackId, StateRef, {FuncName, CallbackArgs}} ->
+            %% Callback needed - handle it with recursive receive
+            CallbackResult = handle_callback_with_nested_receive(Ref, FuncName, CallbackArgs),
+            %% Resume and potentially get more suspensions
+            resume_and_continue(Ref, StateRef, CallbackResult);
+        Result ->
+            Result
+    end.
+
+%% @private
+%% Handle eval with potential suspension for callbacks
+handle_eval_with_suspension(Ref, Code, Locals) ->
+    case py_nif:context_eval(Ref, Code, Locals) of
+        {suspended, _CallbackId, StateRef, {FuncName, CallbackArgs}} ->
+            %% Callback needed - handle it with recursive receive
+            CallbackResult = handle_callback_with_nested_receive(Ref, FuncName, CallbackArgs),
+            %% Resume and potentially get more suspensions
+            resume_and_continue(Ref, StateRef, CallbackResult);
+        Result ->
+            Result
+    end.
+
+%% @private
+%% Handle callback, allowing nested py:eval/call to be processed.
+%% We spawn a process to execute the callback so we can stay in a receive loop
+%% for nested calls while the callback runs.
+handle_callback_with_nested_receive(Ref, FuncName, CallbackArgs) ->
+    Parent = self(),
+    CallbackPid = spawn_link(fun() ->
+        Result = try
+            ArgsList = tuple_to_list(CallbackArgs),
+            case py_callback:execute(FuncName, ArgsList) of
+                {ok, Value} ->
+                    ReprStr = term_to_python_repr(Value),
+                    {ok, <<0, ReprStr/binary>>};
+                {error, Reason} ->
+                    ErrMsg = iolist_to_binary(io_lib:format("~p", [Reason])),
+                    {ok, <<1, ErrMsg/binary>>}
+            end
+        catch
+            Class:ExcReason:Stacktrace ->
+                ErrorMsg = iolist_to_binary(io_lib:format("~p:~p~n~p",
+                    [Class, ExcReason, Stacktrace])),
+                {ok, <<1, ErrorMsg/binary>>}
+        end,
+        Parent ! {callback_result, self(), Result}
+    end),
+    %% Wait for callback, processing nested requests
+    wait_for_callback(Ref, CallbackPid).
+
+%% @private
+%% Wait for callback result while processing nested py:call/eval requests.
+%% This enables arbitrarily deep callback nesting.
+wait_for_callback(Ref, CallbackPid) ->
+    receive
+        {callback_result, CallbackPid, Result} ->
+            Result;
+
+        %% Handle nested py:call while waiting for callback
+        {call, From, MRef, Module, Func, Args, Kwargs} ->
+            NestedResult = handle_call_with_suspension(Ref, Module, Func, Args, Kwargs),
+            From ! {MRef, NestedResult},
+            wait_for_callback(Ref, CallbackPid);
+
+        %% Handle nested py:eval while waiting for callback
+        {eval, From, MRef, Code, Locals} ->
+            NestedResult = handle_eval_with_suspension(Ref, Code, Locals),
+            From ! {MRef, NestedResult},
+            wait_for_callback(Ref, CallbackPid);
+
+        %% Handle nested py:exec while waiting for callback
+        {exec, From, MRef, Code} ->
+            NestedResult = py_nif:context_exec(Ref, Code),
+            From ! {MRef, NestedResult},
+            wait_for_callback(Ref, CallbackPid);
+
+        %% Handle nested call_method while waiting for callback
+        {call_method, From, MRef, ObjRef, Method, Args} ->
+            NestedResult = py_nif:context_call_method(Ref, ObjRef, Method, Args),
+            From ! {MRef, NestedResult},
+            wait_for_callback(Ref, CallbackPid);
+
+        %% Handle get_interp_id while waiting
+        {get_interp_id, From, MRef} ->
+            %% We can't get InterpId here, but we can query the NIF
+            InterpIdResult = py_nif:context_interp_id(Ref),
+            From ! {MRef, InterpIdResult},
+            wait_for_callback(Ref, CallbackPid)
+    end.
+
+%% @private
+%% Resume suspended state, handle additional suspensions (nested callbacks)
+resume_and_continue(Ref, StateRef, {ok, ResultBin}) ->
+    case py_nif:context_resume(Ref, StateRef, ResultBin) of
+        {suspended, _CallbackId2, StateRef2, {FuncName2, Args2}} ->
+            %% Another callback during resume - recursive handling
+            CallbackResult2 = handle_callback_with_nested_receive(Ref, FuncName2, Args2),
+            resume_and_continue(Ref, StateRef2, CallbackResult2);
+        FinalResult ->
+            FinalResult
+    end;
+resume_and_continue(Ref, StateRef, {error, _} = Err) ->
+    _ = py_nif:context_cancel_resume(Ref, StateRef),
+    Err.
+
+%% ============================================================================
+%% Utility functions
+%% ============================================================================
+
+%% @private
+%% Convert Erlang term to Python repr string
+term_to_python_repr(Term) when is_integer(Term) ->
+    integer_to_binary(Term);
+term_to_python_repr(Term) when is_float(Term) ->
+    float_to_binary(Term, [{decimals, 15}, compact]);
+term_to_python_repr(true) ->
+    <<"True">>;
+term_to_python_repr(false) ->
+    <<"False">>;
+term_to_python_repr(none) ->
+    <<"None">>;
+term_to_python_repr(nil) ->
+    <<"None">>;
+term_to_python_repr(undefined) ->
+    <<"None">>;
+term_to_python_repr(Term) when is_atom(Term) ->
+    %% Convert atom to Python string
+    BinStr = atom_to_binary(Term, utf8),
+    <<"'", BinStr/binary, "'">>;
+term_to_python_repr(Term) when is_binary(Term) ->
+    %% Escape the binary for Python
+    Escaped = binary:replace(Term, <<"'">>, <<"\\'">>, [global]),
+    <<"'", Escaped/binary, "'">>;
+term_to_python_repr(Term) when is_list(Term) ->
+    case io_lib:printable_unicode_list(Term) of
+        true ->
+            %% It's a string
+            Bin = unicode:characters_to_binary(Term),
+            Escaped = binary:replace(Bin, <<"'">>, <<"\\'">>, [global]),
+            <<"'", Escaped/binary, "'">>;
+        false ->
+            %% It's a list
+            Items = [term_to_python_repr(E) || E <- Term],
+            ItemsBin = join_binaries(Items, <<", ">>),
+            <<"[", ItemsBin/binary, "]">>
+    end;
+term_to_python_repr(Term) when is_tuple(Term) ->
+    Items = [term_to_python_repr(E) || E <- tuple_to_list(Term)],
+    ItemsBin = join_binaries(Items, <<", ">>),
+    case tuple_size(Term) of
+        1 -> <<"(", ItemsBin/binary, ",)">>;
+        _ -> <<"(", ItemsBin/binary, ")">>
+    end;
+term_to_python_repr(Term) when is_map(Term) ->
+    Items = maps:fold(fun(K, V, Acc) ->
+        KeyRepr = term_to_python_repr(K),
+        ValRepr = term_to_python_repr(V),
+        [<<KeyRepr/binary, ": ", ValRepr/binary>> | Acc]
+    end, [], Term),
+    ItemsBin = join_binaries(lists:reverse(Items), <<", ">>),
+    <<"{", ItemsBin/binary, "}">>;
+term_to_python_repr(Term) when is_pid(Term) ->
+    %% Encode PID using ETF (Erlang Term Format) for exact reconstruction.
+    %% Format: "__etf__:<base64_encoded_binary>"
+    %% The C side will detect this, base64 decode, and use enif_binary_to_term
+    %% to reconstruct the pid, then convert to ErlangPidObject.
+    Etf = term_to_binary(Term),
+    B64 = base64:encode(Etf),
+    <<"\"__etf__:", B64/binary, "\"">>;
+term_to_python_repr(Term) when is_reference(Term) ->
+    %% References also need ETF encoding for round-trip
+    Etf = term_to_binary(Term),
+    B64 = base64:encode(Etf),
+    <<"\"__etf__:", B64/binary, "\"">>;
+term_to_python_repr(_Term) ->
+    <<"None">>.
+
+%% @private
+join_binaries([], _Sep) -> <<>>;
+join_binaries([H], _Sep) -> H;
+join_binaries([H|T], Sep) ->
+    lists:foldl(fun(B, Acc) -> <<Acc/binary, Sep/binary, B/binary>> end, H, T).
+
+%% @private
+to_binary(Atom) when is_atom(Atom) ->
+    atom_to_binary(Atom, utf8);
+to_binary(List) when is_list(List) ->
+    list_to_binary(List);
+to_binary(Bin) when is_binary(Bin) ->
+    Bin.
diff --git a/src/py_context_init.erl b/src/py_context_init.erl
new file mode 100644
index 0000000..125fdfc
--- /dev/null
+++ b/src/py_context_init.erl
@@ -0,0 +1,42 @@
+%% Copyright 2026 Benoit Chesneau
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%%% @doc Initializes the context router during application startup.
+%%%
+%%% This module provides a supervisor-compatible start function that
+%%% initializes the context router and returns `ignore' (since no
+%%% process needs to stay running after initialization).
+%%% @private
+-module(py_context_init).
+
+-export([start_link/1]).
+
+%% @doc Start the context router.
+%%
+%% This function is called by the supervisor to initialize the
+%% py_context_router. After starting the contexts, it returns
+%% `ignore' since no process needs to remain running.
+%%
+%% @param Opts Options to pass to py_context_router:start/1
+%% @returns {ok, pid()} | ignore | {error, Reason}
+-spec start_link(map()) -> {ok, pid()} | ignore | {error, term()}.
+start_link(Opts) ->
+    case py_context_router:start(Opts) of
+        {ok, _Contexts} ->
+            %% The contexts are supervised by py_context_sup
+            %% We don't need a process here, just return ignore
+            ignore;
+        {error, Reason} ->
+            {error, Reason}
+    end.
diff --git a/src/py_context_router.erl b/src/py_context_router.erl
new file mode 100644
index 0000000..f991878
--- /dev/null
+++ b/src/py_context_router.erl
@@ -0,0 +1,289 @@
+%% Copyright 2026 Benoit Chesneau
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%%% @doc Scheduler-affinity router for Python contexts.
+%%%
+%%% This module provides automatic routing of Python calls to contexts
+%%% based on the calling process's scheduler ID. This ensures that
+%%% processes on the same scheduler reuse the same context, providing
+%%% good cache locality while still enabling N-way parallelism.
+%%%
+%%% == Architecture ==
+%%%
+%%% <pre>
+%%% Scheduler 1 ---+
+%%%                +---> Context 1 (Subinterp/Worker)
+%%% Scheduler 2 ---+
+%%%                +---> Context 2 (Subinterp/Worker)
+%%% Scheduler 3 ---+
+%%%                +---> Context 3 (Subinterp/Worker)
+%%% ...            |
+%%% Scheduler N ---+---> Context N (Subinterp/Worker)
+%%% </pre>
+%%%
+%%% == Usage ==
+%%%
+%%% <pre>
+%%% %% Start the router with default settings
+%%% {ok, Contexts} = py_context_router:start(),
+%%%
+%%% %% Get context for current scheduler (automatic routing)
+%%% Ctx = py_context_router:get_context(),
+%%% {ok, Result} = py_context:call(Ctx, math, sqrt, [16], #{}),
+%%%
+%%% %% Or get a specific context by index
+%%% Ctx2 = py_context_router:get_context(2),
+%%%
+%%% %% Bind a specific context to this process
+%%% ok = py_context_router:bind_context(Ctx2),
+%%% Ctx2 = py_context_router:get_context(), %% Returns bound context
+%%%
+%%% %% Unbind to return to scheduler-based routing
+%%% ok = py_context_router:unbind_context().
+%%% </pre>
+%%%
+%%% @end
+-module(py_context_router).
+
+-export([
+    start/0,
+    start/1,
+    stop/0,
+    is_started/0,
+    get_context/0,
+    get_context/1,
+    bind_context/1,
+    unbind_context/0,
+    num_contexts/0,
+    contexts/0
+]).
+
+%% Persistent term keys
+-define(NUM_CONTEXTS_KEY, {?MODULE, num_contexts}).
+-define(CONTEXT_KEY(N), {?MODULE, context, N}).
+-define(CONTEXTS_KEY, {?MODULE, all_contexts}).
+
+%% Process dictionary key for bound context
+-define(BOUND_CONTEXT_KEY, py_bound_context).
+
+%% ============================================================================
+%% Types
+%% ============================================================================
+
+-type start_opts() :: #{
+    contexts => pos_integer(),
+    mode => py_context:context_mode()
+}.
+
+-export_type([start_opts/0]).
+
+%% ============================================================================
+%% API
+%% ============================================================================
+
+%% @doc Start the context router with default settings.
+%%
+%% Creates one context per scheduler, using auto mode (subinterp on
+%% Python 3.12+, worker otherwise).
+%%
+%% @returns {ok, [Context]} | {error, Reason}
+-spec start() -> {ok, [pid()]} | {error, term()}.
+start() ->
+    start(#{}).
+
+%% @doc Start the context router with options.
+%%
+%% Options:
+%% - `contexts' - Number of contexts to create (default: number of schedulers)
+%% - `mode' - Context mode: `auto', `subinterp', or `worker' (default: `auto')
+%%
+%% @param Opts Start options
+%% @returns {ok, [Context]} | {error, Reason}
+-spec start(start_opts()) -> {ok, [pid()]} | {error, term()}.
+start(Opts) ->
+    %% Check if contexts are already running (idempotent)
+    case persistent_term:get(?CONTEXTS_KEY, undefined) of
+        undefined ->
+            do_start(Opts);
+        Contexts when is_list(Contexts) ->
+            %% Verify at least one context is still alive
+            case lists:any(fun is_process_alive/1, Contexts) of
+                true ->
+                    {ok, Contexts};
+                false ->
+                    %% All dead - restart
+                    stop(),  %% Clean up stale entries
+                    do_start(Opts)
+            end
+    end.
+
+%% @private
+do_start(Opts) ->
+    NumContexts = maps:get(contexts, Opts, erlang:system_info(schedulers)),
+    Mode = maps:get(mode, Opts, auto),
+
+    %% Start the supervisor if not already running
+    case whereis(py_context_sup) of
+        undefined ->
+            case py_context_sup:start_link() of
+                {ok, _} -> ok;
+                {error, {already_started, _}} -> ok;
+                Error -> throw(Error)
+            end;
+        _ ->
+            ok
+    end,
+
+    %% Start contexts
+    try
+        Contexts = lists:map(
+            fun(N) ->
+                case py_context_sup:start_context(N, Mode) of
+                    {ok, Pid} ->
+                        persistent_term:put(?CONTEXT_KEY(N), Pid),
+                        Pid;
+                    {error, Reason} ->
+                        throw({context_start_failed, N, Reason})
+                end
+            end,
+            lists:seq(1, NumContexts)
+        ),
+
+        %% Store metadata
+        persistent_term:put(?NUM_CONTEXTS_KEY, NumContexts),
+        persistent_term:put(?CONTEXTS_KEY, Contexts),
+
+        {ok, Contexts}
+    catch
+        throw:Err ->
+            %% Clean up any contexts that were started
+            stop(),
+            {error, Err}
+    end.
+
+%% @doc Stop the context router.
+%%
+%% Stops all contexts and removes persistent_term entries.
+-spec stop() -> ok.
+stop() ->
+    %% Stop all contexts
+    case persistent_term:get(?CONTEXTS_KEY, undefined) of
+        undefined ->
+            ok;
+        Contexts ->
+            lists:foreach(
+                fun(Ctx) ->
+                    catch py_context_sup:stop_context(Ctx)
+                end,
+                Contexts
+            )
+    end,
+
+    %% Clean up persistent terms
+    NumContexts = persistent_term:get(?NUM_CONTEXTS_KEY, 0),
+    lists:foreach(
+        fun(N) ->
+            catch persistent_term:erase(?CONTEXT_KEY(N))
+        end,
+        lists:seq(1, NumContexts)
+    ),
+    catch persistent_term:erase(?NUM_CONTEXTS_KEY),
+    catch persistent_term:erase(?CONTEXTS_KEY),
+    ok.
+
+%% @doc Check if contexts have been started and are still alive.
+%%
+%% @returns true if contexts are running, false otherwise
+-spec is_started() -> boolean().
+is_started() ->
+    case persistent_term:get(?NUM_CONTEXTS_KEY, 0) of
+        0 -> false;
+        _N ->
+            %% Verify at least one context is actually alive
+            %% (persistent_term may have stale data after app restart)
+            case persistent_term:get(?CONTEXT_KEY(1), undefined) of
+                undefined -> false;
+                Pid when is_pid(Pid) -> is_process_alive(Pid);
+                _ -> false
+            end
+    end.
+
+%% @doc Get the context for the current process.
+%%
+%% If the process has a bound context, returns that context.
+%% Otherwise, selects a context based on the current scheduler ID.
+%%
+%% @returns Context pid
+-spec get_context() -> pid().
+get_context() ->
+    case get(?BOUND_CONTEXT_KEY) of
+        undefined ->
+            select_by_scheduler();
+        Ctx ->
+            Ctx
+    end.
+
+%% @doc Get a specific context by index.
+%%
+%% @param N Context index (1 to num_contexts)
+%% @returns Context pid
+-spec get_context(pos_integer()) -> pid().
+get_context(N) when is_integer(N), N > 0 ->
+    persistent_term:get(?CONTEXT_KEY(N)).
+
+%% @doc Bind a context to the current process.
+%%
+%% After binding, `get_context/0' will always return this context
+%% instead of selecting by scheduler.
+%%
+%% @param Ctx Context pid to bind
+%% @returns ok
+-spec bind_context(pid()) -> ok.
+bind_context(Ctx) when is_pid(Ctx) ->
+    put(?BOUND_CONTEXT_KEY, Ctx),
+    ok.
+
+%% @doc Unbind the current process's context.
+%%
+%% After unbinding, `get_context/0' will return to scheduler-based
+%% selection.
+%%
+%% @returns ok
+-spec unbind_context() -> ok.
+unbind_context() ->
+    erase(?BOUND_CONTEXT_KEY),
+    ok.
+
+%% @doc Get the number of contexts.
+-spec num_contexts() -> non_neg_integer().
+num_contexts() ->
+    persistent_term:get(?NUM_CONTEXTS_KEY, 0).
+
+%% @doc Get all context pids.
+-spec contexts() -> [pid()].
+contexts() ->
+    persistent_term:get(?CONTEXTS_KEY, []).
+
+%% ============================================================================
+%% Internal functions
+%% ============================================================================
+
+%% @private
+%% Select context based on scheduler ID using modulo
+-spec select_by_scheduler() -> pid().
+select_by_scheduler() ->
+    SchedId = erlang:system_info(scheduler_id),
+    NumCtx = persistent_term:get(?NUM_CONTEXTS_KEY),
+    Idx = ((SchedId - 1) rem NumCtx) + 1,
+    persistent_term:get(?CONTEXT_KEY(Idx)).
diff --git a/src/py_context_sup.erl b/src/py_context_sup.erl
new file mode 100644
index 0000000..38d69cf
--- /dev/null
+++ b/src/py_context_sup.erl
@@ -0,0 +1,88 @@
+%% Copyright 2026 Benoit Chesneau
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%%% @doc Supervisor for py_context processes.
+%%%
+%%% This is a simple_one_for_one supervisor that manages py_context
+%%% processes. New contexts are started via start_context/2.
+%%%
+%%% @end
+-module(py_context_sup).
+
+-behaviour(supervisor).
+
+-export([
+    start_link/0,
+    start_context/2,
+    stop_context/1,
+    which_contexts/0
+]).
+
+%% Supervisor callbacks
+-export([init/1]).
+
+%% ============================================================================
+%% API
+%% ============================================================================
+
+%% @doc Start the supervisor.
+-spec start_link() -> {ok, pid()} | {error, term()}.
+start_link() ->
+    supervisor:start_link({local, ?MODULE}, ?MODULE, []).
+
+%% @doc Start a new py_context under this supervisor.
+%%
+%% @param Id Unique identifier for the context
+%% @param Mode Context mode (auto | subinterp | worker)
+%% @returns {ok, Pid} | {error, Reason}
+-spec start_context(pos_integer(), py_context:context_mode()) ->
+    {ok, pid()} | {error, term()}.
+start_context(Id, Mode) ->
+    supervisor:start_child(?MODULE, [Id, Mode]).
+
+%% @doc Stop a context by its PID.
+-spec stop_context(pid()) -> ok | {error, term()}.
+stop_context(Pid) when is_pid(Pid) ->
+    case supervisor:terminate_child(?MODULE, Pid) of
+        ok -> ok;
+        {error, not_found} -> ok;
+        Error -> Error
+    end.
+
+%% @doc List all running context PIDs.
+-spec which_contexts() -> [pid()].
+which_contexts() ->
+    [Pid || {_, Pid, _, _} <- supervisor:which_children(?MODULE),
+            is_pid(Pid)].
+
+%% ============================================================================
+%% Supervisor callbacks
+%% ============================================================================
+
+%% @private
+init([]) ->
+    SupFlags = #{
+        strategy => simple_one_for_one,
+        intensity => 5,
+        period => 10
+    },
+    ChildSpec = #{
+        id => py_context,
+        start => {py_context, start_link, []},
+        restart => temporary,
+        shutdown => 5000,
+        type => worker,
+        modules => [py_context]
+    },
+    {ok, {SupFlags, [ChildSpec]}}.
diff --git a/src/py_event_loop.erl b/src/py_event_loop.erl
index 5ece4fc..1660886 100644
--- a/src/py_event_loop.erl
+++ b/src/py_event_loop.erl
@@ -27,7 +27,8 @@
     start_link/0,
     stop/0,
     get_loop/0,
-    register_callbacks/0
+    register_callbacks/0,
+    run_async/2
 ]).
 
 %% gen_server callbacks
@@ -83,6 +84,28 @@ register_callbacks() ->
     py_callback:register(py_event_loop_dispatch_timer, fun cb_dispatch_timer/1),
     ok.
 
+%% @doc Run an async coroutine on the event loop.
+%% The result will be sent to the caller via erlang.send().
+%%
+%% Request should be a map with the following keys:
+%%   ref => reference() - A reference to identify the result
+%%   caller => pid() - The pid to send the result to
+%%   module => atom() | binary() - Python module name
+%%   func => atom() | binary() - Python function name
+%%   args => list() - Arguments to pass to the function
+%%   kwargs => map() - Keyword arguments (optional)
+%%
+%% Returns ok immediately. The result will be sent as:
+%%   {async_result, Ref, {ok, Result}} - on success
+%%   {async_result, Ref, {error, Reason}} - on failure
+-spec run_async(reference(), map()) -> ok | {error, term()}.
+run_async(LoopRef, #{ref := Ref, caller := Caller, module := Module,
+                     func := Func, args := Args} = Request) ->
+    Kwargs = maps:get(kwargs, Request, #{}),
+    ModuleBin = py_util:to_binary(Module),
+    FuncBin = py_util:to_binary(Func),
+    py_nif:event_loop_run_async(LoopRef, Caller, Ref, ModuleBin, FuncBin, Args, Kwargs).
+
 %% ============================================================================
 %% gen_server callbacks
 %% ============================================================================
@@ -119,14 +142,18 @@ init([]) ->
     end.
 
 %% @doc Set ErlangEventLoop as the default asyncio event loop policy.
+%% Also extends the C 'erlang' module with Python event loop exports.
 set_default_policy() ->
     PrivDir = code:priv_dir(erlang_python),
+    %% First, extend the erlang module with Python event loop exports
+    extend_erlang_module(PrivDir),
+    %% Then set the event loop policy
     Code = iolist_to_binary([
         "import sys\n",
         "priv_dir = '", PrivDir, "'\n",
         "if priv_dir not in sys.path:\n",
         "    sys.path.insert(0, priv_dir)\n",
-        "from erlang_loop import get_event_loop_policy\n",
+        "from _erlang_impl import get_event_loop_policy\n",
         "import asyncio\n",
         "asyncio.set_event_loop_policy(get_event_loop_policy())\n"
     ]),
@@ -137,6 +164,22 @@ set_default_policy() ->
             ok  %% Non-fatal
     end.
 
+%% @doc Extend the C 'erlang' module with Python event loop exports.
+%% This makes erlang.run(), erlang.new_event_loop(), etc. available.
+extend_erlang_module(PrivDir) ->
+    Code = iolist_to_binary([
+        "import erlang\n",
+        "priv_dir = '", PrivDir, "'\n",
+        "if hasattr(erlang, '_extend_erlang_module'):\n",
+        "    erlang._extend_erlang_module(priv_dir)\n"
+    ]),
+    case py:exec(Code) of
+        ok -> ok;
+        {error, Reason} ->
+            error_logger:warning_msg("Failed to extend erlang module: ~p~n", [Reason]),
+            ok  %% Non-fatal
+    end.
+
 handle_call(get_loop, _From, #state{loop_ref = undefined} = State) ->
     %% Create event loop and worker on demand
     case py_nif:event_loop_new() of
diff --git a/src/py_event_loop_pool.erl b/src/py_event_loop_pool.erl
new file mode 100644
index 0000000..cf9858f
--- /dev/null
+++ b/src/py_event_loop_pool.erl
@@ -0,0 +1,145 @@
+%% Copyright 2026 Benoit Chesneau
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%%% @doc Pool manager for event loop-based async Python execution.
+%%%
+%%% This module provides a pool of event loops for executing async Python
+%%% coroutines. It replaces the pthread+usleep polling model with efficient
+%%% event-driven execution using enif_select and erlang.send().
+%%%
+%%% The pool uses round-robin scheduling to distribute work across event loops.
+%%%
+%%% @private
+-module(py_event_loop_pool).
+-behaviour(gen_server).
+
+-export([
+    start_link/0,
+    start_link/1,
+    run_async/1,
+    get_stats/0
+]).
+
+-export([
+    init/1,
+    handle_call/3,
+    handle_cast/2,
+    handle_info/2,
+    terminate/2
+]).
+
+-record(state, {
+    loops :: [reference()],
+    num_loops :: non_neg_integer(),
+    next_idx :: non_neg_integer(),
+    supported :: boolean()
+}).
+
+-define(DEFAULT_NUM_LOOPS, 1).
+
+%%% ============================================================================
+%%% API
+%%% ============================================================================
+
+-spec start_link() -> {ok, pid()} | {error, term()}.
+start_link() ->
+    start_link(?DEFAULT_NUM_LOOPS).
+
+-spec start_link(pos_integer()) -> {ok, pid()} | {error, term()}.
+start_link(NumLoops) ->
+    gen_server:start_link({local, ?MODULE}, ?MODULE, [NumLoops], []).
+
+%% @doc Submit an async request to be executed on the event loop pool.
+%% The request should be a map with keys:
+%%   ref => reference() - A reference to identify the result
+%%   caller => pid() - The pid to send the result to
+%%   module => atom() | binary() - Python module name
+%%   func => atom() | binary() - Python function name
+%%   args => list() - Arguments to pass to the function
+%%   kwargs => map() - Keyword arguments (optional)
+-spec run_async(map()) -> ok | {error, term()}.
+run_async(Request) ->
+    gen_server:call(?MODULE, {run_async, Request}).
+
+%% @doc Get pool statistics.
+-spec get_stats() -> map().
+get_stats() ->
+    gen_server:call(?MODULE, get_stats).
+
+%%% ============================================================================
+%%% gen_server callbacks
+%%% ============================================================================
+
+init([NumLoops]) ->
+    process_flag(trap_exit, true),
+
+    %% Get the event loop from py_event_loop module
+    case py_event_loop:get_loop() of
+        {ok, LoopRef} ->
+            %% For now, use a single shared event loop
+            %% In the future, we could create multiple loops for parallelism
+            Loops = lists:duplicate(NumLoops, LoopRef),
+            {ok, #state{
+                loops = Loops,
+                num_loops = NumLoops,
+                next_idx = 0,
+                supported = true
+            }};
+        {error, Reason} ->
+            error_logger:warning_msg("py_event_loop_pool: event loop not available: ~p~n", [Reason]),
+            {ok, #state{
+                loops = [],
+                num_loops = 0,
+                next_idx = 0,
+                supported = false
+            }}
+    end.
+
+handle_call(get_stats, _From, State) ->
+    Stats = #{
+        num_loops => State#state.num_loops,
+        next_idx => State#state.next_idx,
+        supported => State#state.supported
+    },
+    {reply, Stats, State};
+
+handle_call({run_async, _Request}, _From, #state{supported = false} = State) ->
+    {reply, {error, event_loop_not_available}, State};
+
+handle_call({run_async, Request}, _From, State) ->
+    %% Get the next loop in round-robin fashion
+    Idx = State#state.next_idx rem State#state.num_loops + 1,
+    LoopRef = lists:nth(Idx, State#state.loops),
+
+    %% Submit to the event loop
+    Result = py_event_loop:run_async(LoopRef, Request),
+
+    NextState = State#state{next_idx = Idx},
+    {reply, Result, NextState};
+
+handle_call(_Request, _From, State) ->
+    {reply, {error, unknown_request}, State}.
+
+handle_cast(_Msg, State) ->
+    {noreply, State}.
+
+handle_info({'EXIT', _Pid, _Reason}, State) ->
+    %% Event loop died, mark as unsupported
+    {noreply, State#state{supported = false, loops = [], num_loops = 0}};
+
+handle_info(_Info, State) ->
+    {noreply, State}.
+
+terminate(_Reason, _State) ->
+    ok.
diff --git a/src/py_nif.erl b/src/py_nif.erl
index a2fdffc..2dc5491 100644
--- a/src/py_nif.erl
+++ b/src/py_nif.erl
@@ -37,6 +37,7 @@
     get_attr/3,
     version/0,
     memory_stats/0,
+    get_debug_counters/0,
     gc/0,
     gc/1,
     tracemalloc_start/0,
@@ -56,6 +57,7 @@
     subinterp_worker_new/0,
     subinterp_worker_destroy/1,
     subinterp_call/5,
+    subinterp_asgi_run/6,
     parallel_execute/2,
     %% Execution mode info
     execution_mode/0,
@@ -81,6 +83,7 @@
     event_loop_set_worker/2,
     event_loop_set_id/2,
     event_loop_wakeup/1,
+    event_loop_run_async/7,
     add_reader/3,
     remove_reader/2,
     add_writer/3,
@@ -128,8 +131,47 @@
     %% ASGI optimizations
     asgi_build_scope/1,
     asgi_run/5,
+    %% ASGI profiling (only available when compiled with -DASGI_PROFILING)
+    asgi_profile_stats/0,
+    asgi_profile_reset/0,
     %% WSGI optimizations
-    wsgi_run/4
+    wsgi_run/4,
+    %% Worker pool
+    pool_start/1,
+    pool_stop/0,
+    pool_submit/5,
+    pool_stats/0,
+    %% Process-per-context API (no mutex)
+    context_create/1,
+    context_destroy/1,
+    context_call/5,
+    context_eval/3,
+    context_exec/2,
+    context_call_method/4,
+    context_to_term/1,
+    context_interp_id/1,
+    context_set_callback_handler/2,
+    context_get_callback_pipe/1,
+    context_write_callback_response/2,
+    context_resume/3,
+    context_cancel_resume/2,
+    context_get_event_loop/1,
+    %% py_ref API (Python object references with interp_id)
+    ref_wrap/2,
+    is_ref/1,
+    ref_interp_id/1,
+    ref_to_term/1,
+    ref_getattr/2,
+    ref_call_method/3,
+    %% Reactor NIFs - Erlang-as-Reactor architecture
+    reactor_register_fd/3,
+    reactor_reselect_read/1,
+    reactor_select_write/1,
+    get_fd_from_resource/1,
+    reactor_on_read_ready/2,
+    reactor_on_write_ready/2,
+    reactor_init_connection/3,
+    reactor_close_fd/1
 ]).
 
 -on_load(load_nif/0).
@@ -278,6 +320,12 @@ version() ->
 memory_stats() ->
     ?NIF_STUB.
 
+%% @doc Get debug counters for tracking resource lifecycle.
+%% Returns a map with counter names and their values. Used for detecting leaks.
+-spec get_debug_counters() -> map().
+get_debug_counters() ->
+    ?NIF_STUB.
+
 %% @doc Force Python garbage collection.
 %% Returns the number of unreachable objects collected.
 -spec gc() -> {ok, integer()} | {error, term()}.
@@ -397,6 +445,14 @@ subinterp_worker_destroy(_WorkerRef) ->
 subinterp_call(_WorkerRef, _Module, _Func, _Args, _Kwargs) ->
     ?NIF_STUB.
 
+%% @doc Run an ASGI application in a sub-interpreter.
+%% This runs ASGI in a subinterpreter with its own GIL for true parallelism.
+%% Args: WorkerRef, Runner (binary), Module (binary), Callable (binary), Scope (map), Body (binary)
+-spec subinterp_asgi_run(reference(), binary(), binary(), binary(), map(), binary()) ->
+    {ok, {integer(), [{binary(), binary()}], binary()}} | {error, term()}.
+subinterp_asgi_run(_WorkerRef, _Runner, _Module, _Callable, _Scope, _Body) ->
+    ?NIF_STUB.
+
 %% @doc Execute multiple calls in parallel across sub-interpreters.
 %% Args: WorkerRefs (list of refs), Calls (list of {Module, Func, Args})
 %% Returns: List of results (one per call)
@@ -545,6 +601,14 @@ event_loop_set_id(_LoopRef, _LoopId) ->
 event_loop_wakeup(_LoopRef) ->
     ?NIF_STUB.
 
+%% @doc Submit an async coroutine to run on the event loop.
+%% When the coroutine completes, the result is sent to CallerPid via erlang.send().
+%% This replaces the pthread+usleep polling model with direct message passing.
+-spec event_loop_run_async(reference(), pid(), reference(), binary(), binary(), list(), map()) ->
+    ok | {error, term()}.
+event_loop_run_async(_LoopRef, _CallerPid, _Ref, _Module, _Func, _Args, _Kwargs) ->
+    ?NIF_STUB.
+
 %% @doc Register a file descriptor for read monitoring.
 %% Uses enif_select to register with the Erlang scheduler.
 -spec add_reader(reference(), integer(), non_neg_integer()) ->
@@ -867,6 +931,34 @@ asgi_build_scope(_ScopeMap) ->
 asgi_run(_Runner, _Module, _Callable, _ScopeMap, _Body) ->
     ?NIF_STUB.
 
+%% @doc Get ASGI profiling statistics.
+%%
+%% Only available when NIF is compiled with -DASGI_PROFILING.
+%% Returns timing breakdown for each phase of ASGI request handling:
+%% - gil_acquire_us: Time to acquire Python GIL
+%% - string_conv_us: Time to convert binary strings
+%% - module_import_us: Time to import Python module
+%% - get_callable_us: Time to get ASGI callable
+%% - scope_build_us: Time to build scope dict
+%% - body_conv_us: Time to convert body binary
+%% - runner_import_us: Time to import runner module
+%% - runner_call_us: Time to call the runner (includes Python ASGI execution)
+%% - response_extract_us: Time to extract response
+%% - gil_release_us: Time to release GIL
+%% - total_us: Total time
+%%
+%% @returns {ok, StatsMap} or {error, not_available} if profiling not enabled
+-spec asgi_profile_stats() -> {ok, map()} | {error, term()}.
+asgi_profile_stats() ->
+    {error, profiling_not_enabled}.
+
+%% @doc Reset ASGI profiling statistics.
+%%
+%% Only available when NIF is compiled with -DASGI_PROFILING.
+-spec asgi_profile_reset() -> ok | {error, term()}.
+asgi_profile_reset() ->
+    {error, profiling_not_enabled}.
+
 %%% ============================================================================
 %%% WSGI Optimizations
 %%% ============================================================================
@@ -893,3 +985,429 @@ asgi_run(_Runner, _Module, _Callable, _ScopeMap, _Body) ->
     {ok, {binary(), [{binary(), binary()}], binary()}} | {error, term()}.
 wsgi_run(_Runner, _Module, _Callable, _EnvironMap) ->
     ?NIF_STUB.
+
+%%% ============================================================================
+%%% Worker Pool
+%%% ============================================================================
+
+%% @doc Start the worker pool with the specified number of workers.
+%%
+%% Creates a pool of worker threads that process Python operations.
+%% Each worker may have its own subinterpreter (Python 3.12+) for true
+%% parallelism, or share the GIL with optimized batching.
+%%
+%% If NumWorkers is 0, the pool will use the number of CPU cores.
+%%
+%% @param NumWorkers Number of worker threads (0 = auto-detect)
+%% @returns ok on success, or {error, Reason}
+-spec pool_start(non_neg_integer()) -> ok | {error, term()}.
+pool_start(_NumWorkers) ->
+    ?NIF_STUB.
+
+%% @doc Stop the worker pool.
+%%
+%% Signals all workers to shut down and waits for them to terminate.
+%% Any pending requests will receive {error, pool_shutdown}.
+%%
+%% @returns ok
+-spec pool_stop() -> ok.
+pool_stop() ->
+    ?NIF_STUB.
+
+%% @doc Submit a request to the worker pool.
+%%
+%% Submits an asynchronous request to the pool. The caller will receive
+%% a {py_response, RequestId, Result} message when the request completes.
+%%
+%% Request types and arguments:
+%% <ul>
+%% <li>`call' - Module, Func, Args, undefined (or Timeout)</li>
+%% <li>`apply' - Module, Func, Args, Kwargs</li>
+%% <li>`eval' - Code, Locals, undefined, undefined</li>
+%% <li>`exec' - Code, undefined, undefined, undefined</li>
+%% <li>`asgi' - Runner, Module, Callable, {Scope, Body}</li>
+%% <li>`wsgi' - Module, Callable, Environ, undefined</li>
+%% </ul>
+%%
+%% @param Type Request type atom
+%% @param Arg1 First argument (varies by type)
+%% @param Arg2 Second argument (varies by type)
+%% @param Arg3 Third argument (varies by type)
+%% @param Arg4 Fourth argument (varies by type)
+%% @returns {ok, RequestId} on success, or {error, Reason}
+-spec pool_submit(atom(), term(), term(), term(), term()) ->
+    {ok, non_neg_integer()} | {error, term()}.
+pool_submit(_Type, _Arg1, _Arg2, _Arg3, _Arg4) ->
+    ?NIF_STUB.
+
+%% @doc Get worker pool statistics.
+%%
+%% Returns a map with the following keys:
+%% <ul>
+%% <li>`num_workers' - Number of worker threads</li>
+%% <li>`initialized' - Whether the pool is started</li>
+%% <li>`use_subinterpreters' - Whether using subinterpreters (Python 3.12+)</li>
+%% <li>`free_threaded' - Whether using free-threaded Python (3.13+)</li>
+%% <li>`pending_count' - Number of pending requests in queue</li>
+%% <li>`total_enqueued' - Total requests submitted</li>
+%% </ul>
+%%
+%% @returns Stats map
+-spec pool_stats() -> map().
+pool_stats() ->
+    ?NIF_STUB.
+
+%%% ============================================================================
+%%% Process-per-context API (no mutex)
+%%%
+%%% These NIFs are designed for the process-per-context architecture where
+%%% each Erlang process owns one Python context. Since access is serialized
+%%% by the owning process, no mutex locking is needed.
+%%% ============================================================================
+
+%% @doc Create a new Python context.
+%%
+%% Creates a subinterpreter (Python 3.12+) or worker thread-state based
+%% on the mode parameter. Returns a reference to the context and its
+%% interpreter ID for routing.
+%%
+%% @param Mode `subinterp' or `worker'
+%% @returns {ok, ContextRef, InterpId} | {error, Reason}
+-spec context_create(subinterp | worker) ->
+    {ok, reference(), non_neg_integer()} | {error, term()}.
+context_create(_Mode) ->
+    ?NIF_STUB.
+
+%% @doc Destroy a Python context.
+%%
+%% Cleans up the Python interpreter or thread-state. Should only be
+%% called by the owning process.
+%%
+%% @param ContextRef Reference returned by context_create/1
+%% @returns ok
+-spec context_destroy(reference()) -> ok.
+context_destroy(_ContextRef) ->
+    ?NIF_STUB.
+
+%% @doc Call a Python function in a context.
+%%
+%% NO MUTEX - caller must ensure exclusive access (process ownership).
+%%
+%% @param ContextRef Context reference
+%% @param Module Python module name
+%% @param Func Function name
+%% @param Args List of arguments
+%% @param Kwargs Map of keyword arguments
+%% @returns {ok, Result} | {error, Reason}
+-spec context_call(reference(), binary(), binary(), list(), map()) ->
+    {ok, term()} | {error, term()}.
+context_call(_ContextRef, _Module, _Func, _Args, _Kwargs) ->
+    ?NIF_STUB.
+
+%% @doc Evaluate a Python expression in a context.
+%%
+%% NO MUTEX - caller must ensure exclusive access (process ownership).
+%%
+%% @param ContextRef Context reference
+%% @param Code Python code to evaluate
+%% @param Locals Map of local variables
+%% @returns {ok, Result} | {error, Reason}
+-spec context_eval(reference(), binary(), map()) ->
+    {ok, term()} | {error, term()}.
+context_eval(_ContextRef, _Code, _Locals) ->
+    ?NIF_STUB.
+
+%% @doc Execute Python statements in a context.
+%%
+%% NO MUTEX - caller must ensure exclusive access (process ownership).
+%%
+%% @param ContextRef Context reference
+%% @param Code Python code to execute
+%% @returns ok | {error, Reason}
+-spec context_exec(reference(), binary()) -> ok | {error, term()}.
+context_exec(_ContextRef, _Code) ->
+    ?NIF_STUB.
+
+%% @doc Call a method on a Python object in a context.
+%%
+%% NO MUTEX - caller must ensure exclusive access (process ownership).
+%%
+%% @param ContextRef Context reference
+%% @param ObjRef Python object reference
+%% @param Method Method name
+%% @param Args List of arguments
+%% @returns {ok, Result} | {error, Reason}
+-spec context_call_method(reference(), reference(), binary(), list()) ->
+    {ok, term()} | {error, term()}.
+context_call_method(_ContextRef, _ObjRef, _Method, _Args) ->
+    ?NIF_STUB.
+
+%% @doc Convert a Python object reference to an Erlang term.
+%%
+%% The reference carries the interpreter ID, allowing automatic routing
+%% to the correct context.
+%%
+%% @param ObjRef Python object reference
+%% @returns {ok, Term} | {error, Reason}
+-spec context_to_term(reference()) -> {ok, term()} | {error, term()}.
+context_to_term(_ObjRef) ->
+    ?NIF_STUB.
+
+%% @doc Get the interpreter ID from a context reference.
+%%
+%% @param ContextRef Context reference
+%% @returns InterpId
+-spec context_interp_id(reference()) -> non_neg_integer().
+context_interp_id(_ContextRef) ->
+    ?NIF_STUB.
+
+%% @doc Set the callback handler pid for a context.
+%%
+%% This must be called before the context can handle erlang.call() callbacks.
+%%
+%% @param ContextRef Context reference
+%% @param Pid Erlang pid to handle callbacks
+%% @returns ok | {error, Reason}
+-spec context_set_callback_handler(reference(), pid()) -> ok | {error, term()}.
+context_set_callback_handler(_ContextRef, _Pid) ->
+    ?NIF_STUB.
+
+%% @doc Get the callback pipe write FD for a context.
+%%
+%% Returns the write end of the callback pipe for sending responses.
+%%
+%% @param ContextRef Context reference
+%% @returns {ok, WriteFd} | {error, Reason}
+-spec context_get_callback_pipe(reference()) -> {ok, integer()} | {error, term()}.
+context_get_callback_pipe(_ContextRef) ->
+    ?NIF_STUB.
+
+%% @doc Write a callback response to the context's pipe.
+%%
+%% Writes a length-prefixed binary response that Python will read.
+%%
+%% @param ContextRef Context reference
+%% @param Data Binary data to write
+%% @returns ok | {error, Reason}
+-spec context_write_callback_response(reference(), binary()) -> ok | {error, term()}.
+context_write_callback_response(_ContextRef, _Data) ->
+    ?NIF_STUB.
+
+%% @doc Resume a suspended context with callback result.
+%%
+%% After handling a callback, call this to resume Python execution with
+%% the callback result. May return {suspended, ...} if Python makes another
+%% erlang.call() during resume (nested callback).
+%%
+%% @param ContextRef Context reference
+%% @param StateRef Suspended state reference from {suspended, _, StateRef, _}
+%% @param Result Binary result to return to Python (format: status_byte + repr)
+%% @returns {ok, Result} | {error, Reason} | {suspended, CallbackId, StateRef, {FuncName, Args}}
+-spec context_resume(reference(), reference(), binary()) ->
+    {ok, term()} | {error, term()} | {suspended, non_neg_integer(), reference(), {binary(), tuple()}}.
+context_resume(_ContextRef, _StateRef, _Result) ->
+    ?NIF_STUB.
+
+%% @doc Cancel a suspended context resume (cleanup on error).
+%%
+%% Called when callback execution fails and resume won't be called.
+%% Allows proper cleanup of the suspended state.
+%%
+%% @param ContextRef Context reference
+%% @param StateRef Suspended state reference
+%% @returns ok
+-spec context_cancel_resume(reference(), reference()) -> ok.
+context_cancel_resume(_ContextRef, _StateRef) ->
+    ?NIF_STUB.
+
+%% @doc Get the event loop for a subinterpreter context.
+%%
+%% For subinterpreter contexts (Python 3.12+), this returns the event loop
+%% reference that can be used to create a dedicated event worker. Worker mode
+%% contexts (Python < 3.12) use the shared router instead and return an error.
+%%
+%% @param ContextRef Context reference
+%% @returns {ok, LoopRef} for subinterpreter contexts, or {error, not_subinterp} for worker mode
+-spec context_get_event_loop(reference()) -> {ok, reference()} | {error, term()}.
+context_get_event_loop(_ContextRef) ->
+    ?NIF_STUB.
+
+%%% ============================================================================
+%%% py_ref API (Python object references with interp_id)
+%%%
+%%% These functions work with py_ref resources that carry both a Python
+%%% object reference and the interpreter ID that created it. This enables
+%%% automatic routing of method calls and attribute access.
+%%% ============================================================================
+
+%% @doc Wrap a Python object as a py_ref with interp_id.
+%%
+%% @param ContextRef Context that owns the object
+%% @param PyObj Python object reference
+%% @returns {ok, RefTerm} | {error, Reason}
+-spec ref_wrap(reference(), reference()) -> {ok, reference()} | {error, term()}.
+ref_wrap(_ContextRef, _PyObj) ->
+    ?NIF_STUB.
+
+%% @doc Check if a term is a py_ref.
+%%
+%% @param Term Term to check
+%% @returns true | false
+-spec is_ref(term()) -> boolean().
+is_ref(_Term) ->
+    ?NIF_STUB.
+
+%% @doc Get the interpreter ID from a py_ref.
+%%
+%% This is fast - no GIL needed, just reads the stored interp_id.
+%%
+%% @param Ref py_ref reference
+%% @returns InterpId
+-spec ref_interp_id(reference()) -> non_neg_integer().
+ref_interp_id(_Ref) ->
+    ?NIF_STUB.
+
+%% @doc Convert a py_ref to an Erlang term.
+%%
+%% @param Ref py_ref reference
+%% @returns {ok, Term} | {error, Reason}
+-spec ref_to_term(reference()) -> {ok, term()} | {error, term()}.
+ref_to_term(_Ref) ->
+    ?NIF_STUB.
+
+%% @doc Get an attribute from a py_ref object.
+%%
+%% @param Ref py_ref reference
+%% @param AttrName Attribute name (binary)
+%% @returns {ok, Value} | {error, Reason}
+-spec ref_getattr(reference(), binary()) -> {ok, term()} | {error, term()}.
+ref_getattr(_Ref, _AttrName) ->
+    ?NIF_STUB.
+
+%% @doc Call a method on a py_ref object.
+%%
+%% @param Ref py_ref reference
+%% @param Method Method name (binary)
+%% @param Args List of arguments
+%% @returns {ok, Result} | {error, Reason}
+-spec ref_call_method(reference(), binary(), list()) -> {ok, term()} | {error, term()}.
+ref_call_method(_Ref, _Method, _Args) ->
+    ?NIF_STUB.
+
+%%% ============================================================================
+%%% Reactor NIFs - Erlang-as-Reactor Architecture
+%%%
+%%% These NIFs support the Erlang-as-Reactor pattern where Erlang handles
+%%% TCP accept/routing and Python handles HTTP parsing and ASGI/WSGI execution.
+%%% ============================================================================
+
+%% @doc Register an FD for reactor monitoring.
+%%
+%% The FD is owned by the context and receives {select, FdRes, Ref, ready_input/ready_output}
+%% messages. Initial registration is for read events.
+%%
+%% @param ContextRef Context reference from context_create/1
+%% @param Fd File descriptor to monitor
+%% @param OwnerPid Process to receive select messages
+%% @returns {ok, FdRef} | {error, Reason}
+-spec reactor_register_fd(reference(), integer(), pid()) ->
+    {ok, reference()} | {error, term()}.
+reactor_register_fd(_ContextRef, _Fd, _OwnerPid) ->
+    ?NIF_STUB.
+
+%% @doc Re-register for read events after a one-shot event was delivered.
+%%
+%% Since enif_select is one-shot, this must be called after processing
+%% each read event to continue monitoring.
+%%
+%% @param FdRef FD resource reference from reactor_register_fd/3
+%% @returns ok | {error, Reason}
+-spec reactor_reselect_read(reference()) -> ok | {error, term()}.
+reactor_reselect_read(_FdRef) ->
+    ?NIF_STUB.
+
+%% @doc Switch to write monitoring for response sending.
+%%
+%% After HTTP request parsing is complete and a response is ready,
+%% switch to write monitoring to send the response when the socket is ready.
+%%
+%% @param FdRef FD resource reference
+%% @returns ok | {error, Reason}
+-spec reactor_select_write(reference()) -> ok | {error, term()}.
+reactor_select_write(_FdRef) ->
+    ?NIF_STUB.
+
+%% @doc Extract the file descriptor integer from an FD resource.
+%%
+%% Useful for passing the FD to Python for os.read/os.write operations.
+%%
+%% @param FdRef FD resource reference
+%% @returns Fd integer | {error, Reason}
+-spec get_fd_from_resource(reference()) -> integer() | {error, term()}.
+get_fd_from_resource(_FdRef) ->
+    ?NIF_STUB.
+
+%% @doc Call Python's erlang_reactor.on_read_ready(fd).
+%%
+%% This is called when the FD is ready for reading. Python reads data,
+%% parses HTTP, and returns an action indicating what to do next.
+%%
+%% Actions:
+%% <ul>
+%% <li>`"continue"' - Continue reading (call reactor_reselect_read)</li>
+%% <li>`"write_pending"' - Response ready, switch to write mode</li>
+%% <li>`"close"' - Close the connection</li>
+%% </ul>
+%%
+%% @param ContextRef Context reference
+%% @param Fd File descriptor
+%% @returns {ok, Action} | {error, Reason}
+-spec reactor_on_read_ready(reference(), integer()) ->
+    {ok, binary()} | {error, term()}.
+reactor_on_read_ready(_ContextRef, _Fd) ->
+    ?NIF_STUB.
+
+%% @doc Call Python's erlang_reactor.on_write_ready(fd).
+%%
+%% This is called when the FD is ready for writing. Python writes
+%% buffered response data and returns an action.
+%%
+%% Actions:
+%% <ul>
+%% <li>`"continue"' - More data to write</li>
+%% <li>`"read_pending"' - Keep-alive, switch back to read mode</li>
+%% <li>`"close"' - Close the connection</li>
+%% </ul>
+%%
+%% @param ContextRef Context reference
+%% @param Fd File descriptor
+%% @returns {ok, Action} | {error, Reason}
+-spec reactor_on_write_ready(reference(), integer()) ->
+    {ok, binary()} | {error, term()}.
+reactor_on_write_ready(_ContextRef, _Fd) ->
+    ?NIF_STUB.
+
+%% @doc Initialize a Python protocol handler for a new connection.
+%%
+%% Called when a new connection is accepted. Creates an HTTPProtocol
+%% instance in Python and registers it in the protocol registry.
+%%
+%% @param ContextRef Context reference
+%% @param Fd File descriptor
+%% @param ClientInfo Map with client info (addr, port)
+%% @returns ok | {error, Reason}
+-spec reactor_init_connection(reference(), integer(), map()) ->
+    ok | {error, term()}.
+reactor_init_connection(_ContextRef, _Fd, _ClientInfo) ->
+    ?NIF_STUB.
+
+%% @doc Close an FD and clean up the protocol handler.
+%%
+%% Calls Python's erlang_reactor.close_connection(fd) to clean up
+%% the protocol handler, then closes the FD.
+%%
+%% @param FdRef FD resource reference
+%% @returns ok | {error, Reason}
+-spec reactor_close_fd(reference()) -> ok | {error, term()}.
+reactor_close_fd(_FdRef) ->
+    ?NIF_STUB.
diff --git a/src/py_pool.erl b/src/py_pool.erl
deleted file mode 100644
index 04eaeed..0000000
--- a/src/py_pool.erl
+++ /dev/null
@@ -1,304 +0,0 @@
-%% Copyright 2026 Benoit Chesneau
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-
-%%% @doc Worker pool manager for Python execution.
-%%%
-%%% Manages a pool of dirty NIF workers that execute Python code.
-%%% Distributes requests across workers using round-robin scheduling.
-%%%
-%%% @private
--module(py_pool).
--behaviour(gen_server).
-
--export([
-    start_link/1,
-    request/1,
-    broadcast/1,
-    get_stats/0,
-    %% Context affinity API
-    checkout/1,
-    checkin/1,
-    lookup_binding/1,
-    direct_request/2
-]).
-
--export([
-    init/1,
-    handle_call/3,
-    handle_cast/2,
-    handle_info/2,
-    terminate/2
-]).
-
--record(state, {
-    workers :: queue:queue(pid()),
-    num_workers :: pos_integer(),
-    pending :: non_neg_integer(),
-    worker_sup :: pid(),
-    %% Context affinity tracking
-    checked_out = #{} :: #{pid() => checkout_info()},
-    monitors = #{} :: #{reference() => binding_key()}
-}).
-
--type binding_key() :: {process, pid()} | {context, reference()}.
--type checkout_info() :: #{key := binding_key(), monitor := reference()}.
-
-%%% ============================================================================
-%%% API
-%%% ============================================================================
-
--spec start_link(pos_integer()) -> {ok, pid()} | {error, term()}.
-start_link(NumWorkers) ->
-    gen_server:start_link({local, ?MODULE}, ?MODULE, [NumWorkers], []).
-
-%% @doc Submit a request to be executed by a worker.
--spec request(term()) -> ok.
-request(Request) ->
-    gen_server:cast(?MODULE, {request, Request}).
-
-%% @doc Broadcast a request to all workers.
-%% Returns a list of results from each worker.
--spec broadcast(term()) -> [{ok, term()} | {error, term()}].
-broadcast(Request) ->
-    gen_server:call(?MODULE, {broadcast, Request}, infinity).
-
-%% @doc Get pool statistics.
--spec get_stats() -> map().
-get_stats() ->
-    gen_server:call(?MODULE, get_stats).
-
-%% @doc Checkout a worker for exclusive use by a binding key.
-%% The worker is removed from the available pool and associated with the key.
--spec checkout(binding_key()) -> {ok, pid()} | {error, no_workers_available}.
-checkout(Key) ->
-    gen_server:call(?MODULE, {checkout, Key}).
-
-%% @doc Return a checked-out worker to the pool.
-%% This is synchronous to ensure the worker is returned before continuing.
--spec checkin(binding_key()) -> ok.
-checkin(Key) ->
-    gen_server:call(?MODULE, {checkin, Key}).
-
-%% @doc Look up a binding to find the associated worker.
-%% Fast O(1) ETS lookup.
--spec lookup_binding(binding_key()) -> {ok, pid()} | not_found.
-lookup_binding(Key) ->
-    case ets:lookup(py_bindings, Key) of
-        [{_, Worker}] -> {ok, Worker};
-        [] -> not_found
-    end.
-
-%% @doc Send a request directly to a specific worker.
--spec direct_request(pid(), term()) -> ok.
-direct_request(Worker, Request) ->
-    Worker ! {py_request, Request},
-    ok.
-
-%%% ============================================================================
-%%% gen_server callbacks
-%%% ============================================================================
-
-init([NumWorkers]) ->
-    process_flag(trap_exit, true),
-
-    %% Initialize Python interpreter
-    case py_nif:init() of
-        ok ->
-            %% Create bindings ETS table for fast lookup
-            _ = ets:new(py_bindings, [named_table, public, set, {read_concurrency, true}]),
-
-            %% Start worker supervisor
-            {ok, WorkerSup} = py_worker_sup:start_link(),
-
-            %% Start workers
-            Workers = start_workers(WorkerSup, NumWorkers),
-
-            {ok, #state{
-                workers = queue:from_list(Workers),
-                num_workers = NumWorkers,
-                pending = 0,
-                worker_sup = WorkerSup
-            }};
-        {error, Reason} ->
-            {stop, {python_init_failed, Reason}}
-    end.
-
-handle_call(get_stats, _From, State) ->
-    Stats = #{
-        num_workers => State#state.num_workers,
-        pending_requests => State#state.pending,
-        available_workers => queue:len(State#state.workers),
-        checked_out => maps:size(State#state.checked_out)
-    },
-    {reply, Stats, State};
-
-handle_call({checkout, Key}, {Owner, _}, State) ->
-    case queue:out(State#state.workers) of
-        {{value, Worker}, Rest} ->
-            MonRef = erlang:monitor(process, Owner),
-            ets:insert(py_bindings, {Key, Worker}),
-            Info = #{key => Key, monitor => MonRef},
-            NewState = State#state{
-                workers = Rest,
-                checked_out = maps:put(Worker, Info, State#state.checked_out),
-                monitors = maps:put(MonRef, Key, State#state.monitors)
-            },
-            {reply, {ok, Worker}, NewState};
-        {empty, _} ->
-            {reply, {error, no_workers_available}, State}
-    end;
-
-handle_call({broadcast, Request}, _From, State) ->
-    %% Send request to all workers and collect results
-    Workers = queue:to_list(State#state.workers),
-    Results = broadcast_to_workers(Workers, Request),
-    {reply, Results, State};
-
-handle_call({checkin, Key}, _From, State) ->
-    case ets:lookup(py_bindings, Key) of
-        [{_, Worker}] ->
-            ets:delete(py_bindings, Key),
-            case maps:get(Worker, State#state.checked_out, undefined) of
-                #{monitor := MonRef} ->
-                    erlang:demonitor(MonRef, [flush]),
-                    NewState = State#state{
-                        workers = queue:in(Worker, State#state.workers),
-                        checked_out = maps:remove(Worker, State#state.checked_out),
-                        monitors = maps:remove(MonRef, State#state.monitors)
-                    },
-                    {reply, ok, NewState};
-                undefined ->
-                    {reply, ok, State}
-            end;
-        [] ->
-            {reply, ok, State}
-    end;
-
-handle_call(_Request, _From, State) ->
-    {reply, {error, unknown_request}, State}.
-
-handle_cast({request, Request}, State) ->
-    case queue:out(State#state.workers) of
-        {{value, Worker}, Rest} ->
-            %% Send request to worker
-            Worker ! {py_request, Request},
-            %% Put worker at end of queue (round-robin)
-            NewWorkers = queue:in(Worker, Rest),
-            {noreply, State#state{
-                workers = NewWorkers,
-                pending = State#state.pending + 1
-            }};
-        {empty, _} ->
-            %% No workers available - this shouldn't happen with proper sizing
-            %% For now, we'll queue the request (could add backpressure here)
-            error_logger:warning_msg("py_pool: no workers available~n"),
-            {Ref, Caller, _} = extract_ref_caller(Request),
-            Caller ! {py_error, Ref, no_workers_available},
-            {noreply, State}
-    end;
-
-handle_cast(_Msg, State) ->
-    {noreply, State}.
-
-handle_info({worker_done, _WorkerPid}, State) ->
-    {noreply, State#state{pending = max(0, State#state.pending - 1)}};
-
-handle_info({'DOWN', MonRef, process, _Pid, _Reason}, State) ->
-    %% Bound process died - return worker to pool
-    case maps:get(MonRef, State#state.monitors, undefined) of
-        undefined ->
-            {noreply, State};
-        Key ->
-            case ets:lookup(py_bindings, Key) of
-                [{_, Worker}] ->
-                    ets:delete(py_bindings, Key),
-                    {noreply, State#state{
-                        workers = queue:in(Worker, State#state.workers),
-                        checked_out = maps:remove(Worker, State#state.checked_out),
-                        monitors = maps:remove(MonRef, State#state.monitors)
-                    }};
-                [] ->
-                    {noreply, State#state{monitors = maps:remove(MonRef, State#state.monitors)}}
-            end
-    end;
-
-handle_info({'EXIT', Pid, Reason}, State) ->
-    error_logger:error_msg("py_pool: worker ~p died: ~p~n", [Pid, Reason]),
-    %% Clean up if this was a checked-out worker
-    NewState = case maps:get(Pid, State#state.checked_out, undefined) of
-        #{key := Key, monitor := MonRef} ->
-            ets:delete(py_bindings, Key),
-            erlang:demonitor(MonRef, [flush]),
-            State#state{
-                checked_out = maps:remove(Pid, State#state.checked_out),
-                monitors = maps:remove(MonRef, State#state.monitors)
-            };
-        undefined ->
-            State
-    end,
-    %% Remove dead worker from queue and start a new one
-    Workers = queue:filter(fun(W) -> W =/= Pid end, NewState#state.workers),
-    NewWorker = py_worker_sup:start_worker(NewState#state.worker_sup),
-    NewWorkers = queue:in(NewWorker, Workers),
-    {noreply, NewState#state{workers = NewWorkers}};
-
-handle_info(_Info, State) ->
-    {noreply, State}.
-
-terminate(_Reason, _State) ->
-    %% Finalize Python interpreter
-    py_nif:finalize(),
-    ok.
-
-%%% ============================================================================
-%%% Internal functions
-%%% ============================================================================
-
-start_workers(Sup, N) ->
-    [py_worker_sup:start_worker(Sup) || _ <- lists:seq(1, N)].
-
-extract_ref_caller({call, Ref, Caller, _, _, _, _}) -> {Ref, Caller, call};
-extract_ref_caller({eval, Ref, Caller, _, _}) -> {Ref, Caller, eval};
-extract_ref_caller({exec, Ref, Caller, _}) -> {Ref, Caller, exec};
-extract_ref_caller({stream, Ref, Caller, _, _, _, _}) -> {Ref, Caller, stream}.
-
-%% @private
-%% Send a request to all workers and collect results
-broadcast_to_workers(Workers, RequestTemplate) ->
-    Self = self(),
-    %% Send requests to all workers in parallel
-    Refs = lists:map(fun(Worker) ->
-        Ref = make_ref(),
-        Request = inject_ref_caller(RequestTemplate, Ref, Self),
-        Worker ! {py_request, Request},
-        Ref
-    end, Workers),
-    %% Collect all responses
-    lists:map(fun(Ref) ->
-        receive
-            {py_response, Ref, Result} -> Result;
-            {py_error, Ref, Error} -> {error, Error}
-        after 30000 ->
-            {error, timeout}
-        end
-    end, Refs).
-
-%% @private
-%% Inject a reference and caller into a request template
-inject_ref_caller({exec, _Ref, _Caller, Code}, NewRef, NewCaller) ->
-    {exec, NewRef, NewCaller, Code};
-inject_ref_caller({eval, _Ref, _Caller, Code, Locals}, NewRef, NewCaller) ->
-    {eval, NewRef, NewCaller, Code, Locals};
-inject_ref_caller({eval, _Ref, _Caller, Code, Locals, Timeout}, NewRef, NewCaller) ->
-    {eval, NewRef, NewCaller, Code, Locals, Timeout}.
diff --git a/src/py_reactor_context.erl b/src/py_reactor_context.erl
new file mode 100644
index 0000000..a87c3c9
--- /dev/null
+++ b/src/py_reactor_context.erl
@@ -0,0 +1,406 @@
+%% Copyright 2026 Benoit Chesneau
+%%
+%% Licensed under the Apache License, Version 2.0 (the "License");
+%% you may not use this file except in compliance with the License.
+%% You may obtain a copy of the License at
+%%
+%%     http://www.apache.org/licenses/LICENSE-2.0
+%%
+%% Unless required by applicable law or agreed to in writing, software
+%% distributed under the License is distributed on an "AS IS" BASIS,
+%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+%% See the License for the specific language governing permissions and
+%% limitations under the License.
+
+%%% @doc Reactor context process with FD ownership.
+%%%
+%%% This module extends py_context with FD ownership and {select, ...} handling
+%%% for the Erlang-as-Reactor architecture.
+%%%
+%%% Each py_reactor_context process:
+%%% - Owns a Python context (subinterpreter or worker)
+%%% - Handles FD handoffs from py_reactor_acceptor
+%%% - Receives {select, FdRes, Ref, ready_input/ready_output} messages from BEAM
+%%% - Calls Python protocol handlers via reactor NIFs
+%%%
+%%% == Connection Lifecycle ==
+%%%
+%%% 1. Acceptor sends {fd_handoff, Fd, ClientInfo} to this process
+%%% 2. Process registers FD via py_nif:reactor_register_fd/3
+%%% 3. Process calls py_nif:reactor_init_connection/3 to create Python protocol
+%%% 4. BEAM sends {select, FdRes, Ref, ready_input} when data is available
+%%% 5. Process calls py_nif:reactor_on_read_ready/2 to process data
+%%% 6. Python returns action (continue, write_pending, close)
+%%% 7. Process acts on action (reselect read, select write, close)
+%%%
+%%% @end
+-module(py_reactor_context).
+
+-export([
+    start_link/2,
+    start_link/3,
+    stop/1,
+    stats/1
+]).
+
+%% Internal exports
+-export([init/4]).
+
+-record(state, {
+    %% Context
+    id :: pos_integer(),
+    ref :: reference(),
+
+    %% Active connections
+    %% Map: Fd -> #{fd_ref => FdRef, client_info => ClientInfo}
+    connections :: map(),
+
+    %% Stats
+    total_requests :: non_neg_integer(),
+    total_connections :: non_neg_integer(),
+    active_connections :: non_neg_integer(),
+
+    %% Config
+    max_connections :: non_neg_integer(),
+
+    %% App config (for Python protocol)
+    app_module :: binary() | undefined,
+    app_callable :: binary() | undefined
+}).
+
+-define(DEFAULT_MAX_CONNECTIONS, 100).
+
+%% ============================================================================
+%% API
+%% ============================================================================
+
+%% @doc Start a new py_reactor_context process.
+%%
+%% @param Id Unique identifier for this context
+%% @param Mode Context mode (auto, subinterp, worker)
+%% @returns {ok, Pid} | {error, Reason}
+-spec start_link(pos_integer(), atom()) -> {ok, pid()} | {error, term()}.
+start_link(Id, Mode) ->
+    start_link(Id, Mode, #{}).
+
+%% @doc Start a new py_reactor_context process with options.
+%%
+%% Options:
+%% - max_connections: Maximum connections per context (default: 100)
+%% - app_module: Python module containing ASGI/WSGI app
+%% - app_callable: Python callable name (e.g., "app", "application")
+%%
+%% @param Id Unique identifier for this context
+%% @param Mode Context mode (auto, subinterp, worker)
+%% @param Opts Options map
+%% @returns {ok, Pid} | {error, Reason}
+-spec start_link(pos_integer(), atom(), map()) -> {ok, pid()} | {error, term()}.
+start_link(Id, Mode, Opts) ->
+    Parent = self(),
+    Pid = spawn_link(fun() -> init(Parent, Id, Mode, Opts) end),
+    receive
+        {Pid, started} ->
+            {ok, Pid};
+        {Pid, {error, Reason}} ->
+            {error, Reason}
+    after 5000 ->
+        exit(Pid, kill),
+        {error, timeout}
+    end.
+
+%% @doc Stop a py_reactor_context process.
+-spec stop(pid()) -> ok.
+stop(Ctx) when is_pid(Ctx) ->
+    MRef = erlang:monitor(process, Ctx),
+    Ctx ! {stop, self(), MRef},
+    receive
+        {MRef, ok} ->
+            erlang:demonitor(MRef, [flush]),
+            ok;
+        {'DOWN', MRef, process, Ctx, _Reason} ->
+            ok
+    after 5000 ->
+        erlang:demonitor(MRef, [flush]),
+        exit(Ctx, kill),
+        ok
+    end.
+
+%% @doc Get context statistics.
+-spec stats(pid()) -> map().
+stats(Ctx) when is_pid(Ctx) ->
+    MRef = erlang:monitor(process, Ctx),
+    Ctx ! {stats, self(), MRef},
+    receive
+        {MRef, Stats} ->
+            erlang:demonitor(MRef, [flush]),
+            Stats;
+        {'DOWN', MRef, process, Ctx, Reason} ->
+            {error, {context_died, Reason}}
+    after 5000 ->
+        erlang:demonitor(MRef, [flush]),
+        {error, timeout}
+    end.
+
+%% ============================================================================
+%% Process loop
+%% ============================================================================
+
+%% @private
+init(Parent, Id, Mode, Opts) ->
+    process_flag(trap_exit, true),
+
+    %% Determine mode
+    ActualMode = case Mode of
+        auto ->
+            case py_nif:subinterp_supported() of
+                true -> subinterp;
+                false -> worker
+            end;
+        _ -> Mode
+    end,
+
+    %% Create Python context
+    case py_nif:context_create(ActualMode) of
+        {ok, Ref} ->
+            %% Set up callback handler
+            py_nif:context_set_callback_handler(Ref, self()),
+
+            MaxConns = maps:get(max_connections, Opts, ?DEFAULT_MAX_CONNECTIONS),
+            AppModule = maps:get(app_module, Opts, undefined),
+            AppCallable = maps:get(app_callable, Opts, undefined),
+
+            %% Initialize app in Python context if specified
+            case AppModule of
+                undefined -> ok;
+                _ ->
+                    Code = io_lib:format(
+                        "import sys; sys.path.insert(0, '.'); "
+                        "from ~s import ~s as _reactor_app",
+                        [binary_to_list(AppModule),
+                         binary_to_list(AppCallable)]),
+                    py_nif:context_exec(Ref, iolist_to_binary(Code))
+            end,
+
+            State = #state{
+                id = Id,
+                ref = Ref,
+                connections = #{},
+                total_requests = 0,
+                total_connections = 0,
+                active_connections = 0,
+                max_connections = MaxConns,
+                app_module = AppModule,
+                app_callable = AppCallable
+            },
+
+            Parent ! {self(), started},
+            loop(State);
+
+        {error, Reason} ->
+            Parent ! {self(), {error, Reason}}
+    end.
+
+%% @private
+loop(State) ->
+    receive
+        %% FD handoff from acceptor
+        {fd_handoff, Fd, ClientInfo} ->
+            handle_fd_handoff(Fd, ClientInfo, State);
+
+        %% Select events from BEAM scheduler
+        {select, FdRes, _Ref, ready_input} ->
+            handle_read_ready(FdRes, State);
+
+        {select, FdRes, _Ref, ready_output} ->
+            handle_write_ready(FdRes, State);
+
+        %% Control messages
+        {stop, From, MRef} ->
+            cleanup(State),
+            From ! {MRef, ok};
+
+        {stats, From, MRef} ->
+            Stats = #{
+                id => State#state.id,
+                active_connections => State#state.active_connections,
+                total_connections => State#state.total_connections,
+                total_requests => State#state.total_requests,
+                max_connections => State#state.max_connections
+            },
+            From ! {MRef, Stats},
+            loop(State);
+
+        %% Handle EXIT signals
+        {'EXIT', _Pid, Reason} ->
+            cleanup(State),
+            exit(Reason);
+
+        _Other ->
+            loop(State)
+    end.
+
+%% ============================================================================
+%% FD Handoff
+%% ============================================================================
+
+%% @private
+handle_fd_handoff(Fd, ClientInfo, State) ->
+    #state{
+        ref = Ref,
+        connections = Conns,
+        active_connections = Active,
+        max_connections = MaxConns,
+        total_connections = TotalConns
+    } = State,
+
+    %% Check connection limit
+    case Active >= MaxConns of
+        true ->
+            %% At limit, reject connection
+            %% Close the FD directly (it's just an integer here)
+            %% The acceptor will close the socket
+            loop(State);
+
+        false ->
+            %% Register FD for monitoring
+            case py_nif:reactor_register_fd(Ref, Fd, self()) of
+                {ok, FdRef} ->
+                    %% Initialize Python protocol handler
+                    case py_nif:reactor_init_connection(Ref, Fd, ClientInfo) of
+                        ok ->
+                            %% Store connection info
+                            ConnInfo = #{
+                                fd_ref => FdRef,
+                                client_info => ClientInfo
+                            },
+                            NewConns = maps:put(Fd, ConnInfo, Conns),
+                            NewState = State#state{
+                                connections = NewConns,
+                                active_connections = Active + 1,
+                                total_connections = TotalConns + 1
+                            },
+                            loop(NewState);
+
+                        {error, _Reason} ->
+                            %% Failed to init connection, close
+                            py_nif:reactor_close_fd(FdRef),
+                            loop(State)
+                    end;
+
+                {error, _Reason} ->
+                    %% Failed to register FD
+                    loop(State)
+            end
+    end.
+
+%% ============================================================================
+%% Read Ready Handler
+%% ============================================================================
+
+%% @private
+handle_read_ready(FdRes, State) ->
+    #state{ref = Ref} = State,
+
+    %% Get FD from resource
+    case py_nif:get_fd_from_resource(FdRes) of
+        Fd when is_integer(Fd) ->
+            %% Call Python on_read_ready
+            case py_nif:reactor_on_read_ready(Ref, Fd) of
+                {ok, <<"continue">>} ->
+                    %% More data expected, re-register for read
+                    py_nif:reactor_reselect_read(FdRes),
+                    loop(State);
+
+                {ok, <<"write_pending">>} ->
+                    %% Response ready, switch to write mode
+                    py_nif:reactor_select_write(FdRes),
+                    NewState = State#state{
+                        total_requests = State#state.total_requests + 1
+                    },
+                    loop(NewState);
+
+                {ok, <<"close">>} ->
+                    %% Close connection
+                    close_connection(Fd, FdRes, State);
+
+                {error, _Reason} ->
+                    %% Error, close connection
+                    close_connection(Fd, FdRes, State)
+            end;
+
+        {error, _} ->
+            %% FD resource invalid
+            loop(State)
+    end.
+
+%% ============================================================================
+%% Write Ready Handler
+%% ============================================================================
+
+%% @private
+handle_write_ready(FdRes, State) ->
+    #state{ref = Ref} = State,
+
+    %% Get FD from resource
+    case py_nif:get_fd_from_resource(FdRes) of
+        Fd when is_integer(Fd) ->
+            %% Call Python on_write_ready
+            case py_nif:reactor_on_write_ready(Ref, Fd) of
+                {ok, <<"continue">>} ->
+                    %% More data to write, re-register for write
+                    py_nif:reactor_select_write(FdRes),
+                    loop(State);
+
+                {ok, <<"read_pending">>} ->
+                    %% Keep-alive, switch back to read mode
+                    py_nif:reactor_reselect_read(FdRes),
+                    loop(State);
+
+                {ok, <<"close">>} ->
+                    %% Close connection
+                    close_connection(Fd, FdRes, State);
+
+                {error, _Reason} ->
+                    %% Error, close connection
+                    close_connection(Fd, FdRes, State)
+            end;
+
+        {error, _} ->
+            %% FD resource invalid
+            loop(State)
+    end.
+
+%% ============================================================================
+%% Connection Management
+%% ============================================================================
+
+%% @private
+close_connection(Fd, FdRes, State) ->
+    #state{
+        connections = Conns,
+        active_connections = Active
+    } = State,
+
+    %% Close via NIF (cleans up Python protocol handler)
+    py_nif:reactor_close_fd(FdRes),
+
+    %% Remove from connections map
+    NewConns = maps:remove(Fd, Conns),
+    NewState = State#state{
+        connections = NewConns,
+        active_connections = max(0, Active - 1)
+    },
+    loop(NewState).
+
+%% @private
+cleanup(State) ->
+    #state{ref = Ref, connections = Conns} = State,
+
+    %% Close all connections
+    maps:foreach(fun(_Fd, #{fd_ref := FdRef}) ->
+        py_nif:reactor_close_fd(FdRef)
+    end, Conns),
+
+    %% Destroy Python context
+    py_nif:context_destroy(Ref),
+    ok.
diff --git a/src/py_subinterp_pool.erl b/src/py_subinterp_pool.erl
deleted file mode 100644
index a89c563..0000000
--- a/src/py_subinterp_pool.erl
+++ /dev/null
@@ -1,205 +0,0 @@
-%% Copyright 2026 Benoit Chesneau
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-
-%%% @doc Worker pool manager for sub-interpreter Python execution.
-%%%
-%%% Manages a pool of sub-interpreter workers that have their own GIL,
-%%% allowing true parallel execution on Python 3.12+.
-%%%
-%%% @private
--module(py_subinterp_pool).
--behaviour(gen_server).
-
--export([
-    start_link/1,
-    request/1,
-    parallel/1,
-    get_stats/0
-]).
-
--export([
-    init/1,
-    handle_call/3,
-    handle_cast/2,
-    handle_info/2,
-    terminate/2
-]).
-
--record(state, {
-    workers :: queue:queue(pid()) | undefined,
-    worker_refs :: [reference()],  %% NIF refs for parallel_execute
-    num_workers :: non_neg_integer(),
-    pending :: non_neg_integer(),
-    worker_sup :: pid() | undefined,
-    supported :: boolean()  %% whether subinterpreters are supported
-}).
-
-%%% ============================================================================
-%%% API
-%%% ============================================================================
-
--spec start_link(pos_integer()) -> {ok, pid()} | {error, term()}.
-start_link(NumWorkers) ->
-    gen_server:start_link({local, ?MODULE}, ?MODULE, [NumWorkers], []).
-
-%% @doc Submit a request to be executed by a worker.
--spec request(term()) -> ok.
-request(Request) ->
-    gen_server:cast(?MODULE, {request, Request}).
-
-%% @doc Execute multiple calls in parallel using all available sub-interpreters.
-%% Returns when all calls are complete.
--spec parallel([{atom() | binary(), atom() | binary(), list()}]) ->
-    {ok, list()} | {error, term()}.
-parallel(Calls) ->
-    gen_server:call(?MODULE, {parallel, Calls}, 60000).
-
-%% @doc Get pool statistics.
--spec get_stats() -> map().
-get_stats() ->
-    gen_server:call(?MODULE, get_stats).
-
-%%% ============================================================================
-%%% gen_server callbacks
-%%% ============================================================================
-
-init([NumWorkers]) ->
-    process_flag(trap_exit, true),
-
-    %% Check if sub-interpreters are supported
-    case py_nif:subinterp_supported() of
-        true ->
-            %% Start worker supervisor
-            {ok, WorkerSup} = py_subinterp_worker_sup:start_link(),
-
-            %% Start workers and collect their NIF refs
-            {Workers, WorkerRefs} = start_workers(WorkerSup, NumWorkers),
-
-            {ok, #state{
-                workers = queue:from_list(Workers),
-                worker_refs = WorkerRefs,
-                num_workers = NumWorkers,
-                pending = 0,
-                worker_sup = WorkerSup,
-                supported = true
-            }};
-        false ->
-            %% Sub-interpreters not supported, but pool still starts
-            %% All requests will return an error
-            {ok, #state{
-                workers = undefined,
-                worker_refs = [],
-                num_workers = 0,
-                pending = 0,
-                worker_sup = undefined,
-                supported = false
-            }}
-    end.
-
-handle_call(get_stats, _From, State) ->
-    AvailWorkers = case State#state.workers of
-        undefined -> 0;
-        Q -> queue:len(Q)
-    end,
-    Stats = #{
-        num_workers => State#state.num_workers,
-        pending_requests => State#state.pending,
-        available_workers => AvailWorkers,
-        supported => State#state.supported
-    },
-    {reply, Stats, State};
-
-handle_call({parallel, _Calls}, _From, #state{supported = false} = State) ->
-    {reply, {error, subinterpreters_not_supported}, State};
-
-handle_call({parallel, Calls}, From, State) ->
-    %% For parallel execution, we use the NIF refs directly
-    BinCalls = [{to_binary(M), to_binary(F), A} || {M, F, A} <- Calls],
-    %% Execute in a separate process to not block gen_server
-    Self = self(),
-    WorkerRefs = State#state.worker_refs,
-    spawn_link(fun() ->
-        Result = py_nif:parallel_execute(WorkerRefs, BinCalls),
-        gen_server:reply(From, Result),
-        Self ! parallel_done
-    end),
-    {noreply, State#state{pending = State#state.pending + 1}};
-
-handle_call(_Request, _From, State) ->
-    {reply, {error, unknown_request}, State}.
-
-handle_cast({request, Request}, #state{supported = false} = State) ->
-    {Ref, Caller, _} = extract_ref_caller(Request),
-    Caller ! {py_error, Ref, subinterpreters_not_supported},
-    {noreply, State};
-
-handle_cast({request, Request}, State) ->
-    case queue:out(State#state.workers) of
-        {{value, Worker}, Rest} ->
-            Worker ! {py_subinterp_request, Request},
-            NewWorkers = queue:in(Worker, Rest),
-            {noreply, State#state{
-                workers = NewWorkers,
-                pending = State#state.pending + 1
-            }};
-        {empty, _} ->
-            error_logger:warning_msg("py_subinterp_pool: no workers available~n"),
-            {Ref, Caller, _} = extract_ref_caller(Request),
-            Caller ! {py_error, Ref, no_workers_available},
-            {noreply, State}
-    end;
-
-handle_cast(_Msg, State) ->
-    {noreply, State}.
-
-handle_info(parallel_done, State) ->
-    {noreply, State#state{pending = max(0, State#state.pending - 1)}};
-
-handle_info({worker_done, _WorkerPid}, State) ->
-    {noreply, State#state{pending = max(0, State#state.pending - 1)}};
-
-handle_info({'EXIT', Pid, Reason}, State) ->
-    error_logger:error_msg("py_subinterp_pool: worker ~p died: ~p~n", [Pid, Reason]),
-    Workers = queue:filter(fun(W) -> W =/= Pid end, State#state.workers),
-    {NewWorker, NewRef} = py_subinterp_worker_sup:start_worker_with_ref(State#state.worker_sup),
-    NewWorkers = queue:in(NewWorker, Workers),
-    %% Update worker refs (replace the dead one)
-    NewRefs = lists:map(fun(R) ->
-        %% Note: This is simplified - in production you'd track which ref died
-        R
-    end, State#state.worker_refs),
-    {noreply, State#state{workers = NewWorkers, worker_refs = [NewRef | NewRefs]}};
-
-handle_info(_Info, State) ->
-    {noreply, State}.
-
-terminate(_Reason, #state{workers = undefined}) ->
-    ok;
-terminate(_Reason, State) ->
-    Workers = queue:to_list(State#state.workers),
-    lists:foreach(fun(W) -> W ! shutdown end, Workers),
-    ok.
-
-%%% ============================================================================
-%%% Internal functions
-%%% ============================================================================
-
-start_workers(Sup, N) ->
-    Results = [py_subinterp_worker_sup:start_worker_with_ref(Sup) || _ <- lists:seq(1, N)],
-    {[Pid || {Pid, _Ref} <- Results], [Ref || {_Pid, Ref} <- Results]}.
-
-extract_ref_caller({call, Ref, Caller, _, _, _, _}) -> {Ref, Caller, call}.
-
-to_binary(Term) ->
-    py_util:to_binary(Term).
diff --git a/src/py_subinterp_worker.erl b/src/py_subinterp_worker.erl
deleted file mode 100644
index 1ef192a..0000000
--- a/src/py_subinterp_worker.erl
+++ /dev/null
@@ -1,89 +0,0 @@
-%% Copyright 2026 Benoit Chesneau
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-
-%%% @doc Sub-interpreter Python worker process.
-%%%
-%%% Each worker has its own Python sub-interpreter with its own GIL,
-%%% allowing true parallel execution on Python 3.12+.
-%%%
-%%% @private
--module(py_subinterp_worker).
-
--export([
-    start_link/0,
-    init/1
-]).
-
-%%% ============================================================================
-%%% API
-%%% ============================================================================
-
--spec start_link() -> {ok, pid()}.
-start_link() ->
-    Pid = spawn_link(?MODULE, init, [self()]),
-    receive
-        {Pid, ready} -> {ok, Pid};
-        {Pid, {error, Reason}} -> {error, Reason}
-    after 10000 ->
-        exit(Pid, kill),
-        {error, timeout}
-    end.
-
-%%% ============================================================================
-%%% Worker Process
-%%% ============================================================================
-
-init(Parent) ->
-    %% Create sub-interpreter worker with its own GIL
-    case py_nif:subinterp_worker_new() of
-        {ok, WorkerRef} ->
-            Parent ! {self(), ready},
-            loop(WorkerRef, Parent);
-        {error, Reason} ->
-            Parent ! {self(), {error, Reason}}
-    end.
-
-loop(WorkerRef, Parent) ->
-    receive
-        {py_subinterp_request, Request} ->
-            handle_request(WorkerRef, Request),
-            loop(WorkerRef, Parent);
-
-        shutdown ->
-            py_nif:subinterp_worker_destroy(WorkerRef),
-            ok;
-
-        _Other ->
-            loop(WorkerRef, Parent)
-    end.
-
-%%% ============================================================================
-%%% Request Handling
-%%% ============================================================================
-
-handle_request(WorkerRef, {call, Ref, Caller, Module, Func, Args, Kwargs}) ->
-    ModuleBin = to_binary(Module),
-    FuncBin = to_binary(Func),
-    Result = py_nif:subinterp_call(WorkerRef, ModuleBin, FuncBin, Args, Kwargs),
-    send_response(Caller, Ref, Result).
-
-%%% ============================================================================
-%%% Internal Functions
-%%% ============================================================================
-
-send_response(Caller, Ref, Result) ->
-    py_util:send_response(Caller, Ref, Result).
-
-to_binary(Term) ->
-    py_util:to_binary(Term).
diff --git a/src/py_subinterp_worker_sup.erl b/src/py_subinterp_worker_sup.erl
deleted file mode 100644
index 6e0512c..0000000
--- a/src/py_subinterp_worker_sup.erl
+++ /dev/null
@@ -1,59 +0,0 @@
-%% Copyright 2026 Benoit Chesneau
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-
-%%% @doc Simple supervisor for sub-interpreter Python workers.
-%%% @private
--module(py_subinterp_worker_sup).
--behaviour(supervisor).
-
--export([
-    start_link/0,
-    start_worker/1,
-    start_worker_with_ref/1
-]).
-
--export([init/1]).
-
-start_link() ->
-    supervisor:start_link(?MODULE, []).
-
-start_worker(Sup) ->
-    {ok, Pid} = supervisor:start_child(Sup, []),
-    Pid.
-
-%% Start worker and return both pid and the NIF worker ref
-start_worker_with_ref(Sup) ->
-    case supervisor:start_child(Sup, []) of
-        {ok, Pid} ->
-            %% Get the worker ref from the process
-            %% For now, we use the pid as a proxy - the actual ref is inside the process
-            {Pid, Pid};
-        {error, Reason} ->
-            error({worker_start_failed, Reason})
-    end.
-
-init([]) ->
-    WorkerSpec = #{
-        id => py_subinterp_worker,
-        start => {py_subinterp_worker, start_link, []},
-        restart => temporary,
-        shutdown => 5000,
-        type => worker,
-        modules => [py_subinterp_worker]
-    },
-
-    {ok, {
-        #{strategy => simple_one_for_one, intensity => 10, period => 60},
-        [WorkerSpec]
-    }}.
diff --git a/src/py_thread_handler.erl b/src/py_thread_handler.erl
index 4d0d9b9..cd463d6 100644
--- a/src/py_thread_handler.erl
+++ b/src/py_thread_handler.erl
@@ -269,6 +269,19 @@ term_to_python_repr(Term) when is_map(Term) ->
     end, [], Term),
     Joined = join_binaries(Items, <<", ">>),
     <<"{", Joined/binary, "}">>;
+term_to_python_repr(Term) when is_pid(Term) ->
+    %% Encode PID using ETF (Erlang Term Format) for exact reconstruction.
+    %% Format: "__etf__:<base64_encoded_binary>"
+    %% The C side will detect this, base64 decode, and use enif_binary_to_term
+    %% to reconstruct the pid, then convert to ErlangPidObject.
+    Etf = term_to_binary(Term),
+    B64 = base64:encode(Etf),
+    <<"\"__etf__:", B64/binary, "\"">>;
+term_to_python_repr(Term) when is_reference(Term) ->
+    %% References also need ETF encoding for round-trip
+    Etf = term_to_binary(Term),
+    B64 = base64:encode(Etf),
+    <<"\"__etf__:", B64/binary, "\"">>;
 term_to_python_repr(_Term) ->
     %% Fallback - return None for unsupported types
     <<"None">>.
diff --git a/src/py_worker.erl b/src/py_worker.erl
deleted file mode 100644
index c3e1542..0000000
--- a/src/py_worker.erl
+++ /dev/null
@@ -1,352 +0,0 @@
-%% Copyright 2026 Benoit Chesneau
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-
-%%% @doc Python worker process.
-%%%
-%%% Each worker maintains its own Python execution context. Workers
-%%% receive requests from the pool and execute Python code, sending
-%%% results back to callers.
-%%%
-%%% The NIF functions use ERL_NIF_DIRTY_JOB_IO_BOUND to run on dirty
-%%% I/O schedulers, so the worker process itself runs on a normal scheduler.
-%%%
-%%% @private
--module(py_worker).
-
--export([
-    start_link/0,
-    init/1
-]).
-
-%% Timeout for checking shutdown (ms)
--define(RECV_TIMEOUT, 1000).
-
-%%% ============================================================================
-%%% API
-%%% ============================================================================
-
--spec start_link() -> {ok, pid()}.
-start_link() ->
-    Pid = spawn_link(?MODULE, init, [self()]),
-    receive
-        {Pid, ready} -> {ok, Pid};
-        {Pid, {error, Reason}} -> {error, Reason}
-    after 10000 ->
-        exit(Pid, kill),
-        {error, timeout}
-    end.
-
-%%% ============================================================================
-%%% Worker Process
-%%% ============================================================================
-
-init(Parent) ->
-    %% Create worker context
-    case py_nif:worker_new() of
-        {ok, WorkerRef} ->
-            %% Spawn a separate callback handler process
-            CallbackHandler = spawn_link(fun() -> callback_handler_loop() end),
-            %% Set up callback handler with the separate process
-            case py_nif:set_callback_handler(WorkerRef, CallbackHandler) of
-                {ok, CallbackFd} ->
-                    CallbackHandler ! {set_fd, CallbackFd},
-                    Parent ! {self(), ready},
-                    loop(WorkerRef, Parent, CallbackFd);
-                {error, Reason} ->
-                    exit(CallbackHandler, kill),
-                    Parent ! {self(), {error, Reason}}
-            end;
-        {error, Reason} ->
-            Parent ! {self(), {error, Reason}}
-    end.
-
-%% Separate process that handles callbacks from Python
-callback_handler_loop() ->
-    receive
-        {set_fd, CallbackFd} ->
-            callback_handler_loop(CallbackFd)
-    end.
-
-callback_handler_loop(CallbackFd) ->
-    receive
-        {erlang_callback, _CallbackId, FuncName, Args} ->
-            handle_callback(CallbackFd, FuncName, Args),
-            callback_handler_loop(CallbackFd);
-        shutdown ->
-            ok;
-        _Other ->
-            callback_handler_loop(CallbackFd)
-    end.
-
-loop(WorkerRef, _Parent, _CallbackFd) ->
-    receive
-        {py_request, Request} ->
-            handle_request(WorkerRef, Request),
-            loop(WorkerRef, _Parent, _CallbackFd);
-
-        shutdown ->
-            py_nif:worker_destroy(WorkerRef),
-            ok;
-
-        _Other ->
-            loop(WorkerRef, _Parent, _CallbackFd)
-    end.
-
-%%% ============================================================================
-%%% Request Handling
-%%% ============================================================================
-
-%% Call with timeout
-handle_request(WorkerRef, {call, Ref, Caller, Module, Func, Args, Kwargs, TimeoutMs}) ->
-    ModuleBin = to_binary(Module),
-    FuncBin = to_binary(Func),
-    Result = py_nif:worker_call(WorkerRef, ModuleBin, FuncBin, Args, Kwargs, TimeoutMs),
-    handle_call_result(Result, Ref, Caller);
-
-%% Call without timeout (backward compatible)
-handle_request(WorkerRef, {call, Ref, Caller, Module, Func, Args, Kwargs}) ->
-    ModuleBin = to_binary(Module),
-    FuncBin = to_binary(Func),
-    Result = py_nif:worker_call(WorkerRef, ModuleBin, FuncBin, Args, Kwargs),
-    handle_call_result(Result, Ref, Caller);
-
-%% Eval with timeout
-handle_request(WorkerRef, {eval, Ref, Caller, Code, Locals, TimeoutMs}) ->
-    CodeBin = to_binary(Code),
-    Result = py_nif:worker_eval(WorkerRef, CodeBin, Locals, TimeoutMs),
-    handle_call_result(Result, Ref, Caller);
-
-%% Eval without timeout (backward compatible)
-handle_request(WorkerRef, {eval, Ref, Caller, Code, Locals}) ->
-    CodeBin = to_binary(Code),
-    Result = py_nif:worker_eval(WorkerRef, CodeBin, Locals),
-    handle_call_result(Result, Ref, Caller);
-
-handle_request(WorkerRef, {exec, Ref, Caller, Code}) ->
-    CodeBin = to_binary(Code),
-    Result = py_nif:worker_exec(WorkerRef, CodeBin),
-    send_response(Caller, Ref, Result);
-
-handle_request(WorkerRef, {stream, Ref, Caller, Module, Func, Args, Kwargs}) ->
-    ModuleBin = to_binary(Module),
-    FuncBin = to_binary(Func),
-    %% For streaming, we call a special function that yields chunks
-    case py_nif:worker_call(WorkerRef, ModuleBin, FuncBin, Args, Kwargs) of
-        {ok, {generator, GenRef}} ->
-            stream_chunks(WorkerRef, GenRef, Ref, Caller);
-        {ok, Value} ->
-            %% Not a generator, send as single chunk
-            Caller ! {py_chunk, Ref, Value},
-            Caller ! {py_end, Ref};
-        {error, _} = Error ->
-            Caller ! {py_error, Ref, Error}
-    end;
-
-handle_request(WorkerRef, {stream_eval, Ref, Caller, Code, Locals}) ->
-    %% Evaluate expression and stream if result is a generator
-    CodeBin = to_binary(Code),
-    case py_nif:worker_eval(WorkerRef, CodeBin, Locals) of
-        {ok, {generator, GenRef}} ->
-            stream_chunks(WorkerRef, GenRef, Ref, Caller);
-        {ok, Value} ->
-            %% Not a generator, send as single value
-            Caller ! {py_chunk, Ref, Value},
-            Caller ! {py_end, Ref};
-        {error, _} = Error ->
-            Caller ! {py_error, Ref, Error}
-    end.
-
-stream_chunks(WorkerRef, GenRef, Ref, Caller) ->
-    case py_nif:worker_next(WorkerRef, GenRef) of
-        {ok, {generator, NestedGen}} ->
-            %% Nested generator - stream it inline
-            stream_chunks(WorkerRef, NestedGen, Ref, Caller);
-        {ok, Chunk} ->
-            Caller ! {py_chunk, Ref, Chunk},
-            stream_chunks(WorkerRef, GenRef, Ref, Caller);
-        {error, stop_iteration} ->
-            Caller ! {py_end, Ref};
-        {error, Error} ->
-            Caller ! {py_error, Ref, Error}
-    end.
-
-%%% ============================================================================
-%%% Suspended Callback Handling
-%%%
-%%% When Python code calls erlang.call(), it may return a suspension marker
-%%% instead of blocking. This allows the dirty scheduler to be freed while
-%%% the Erlang callback is executed.
-%%% ============================================================================
-
-%% Handle the result of a worker_call - either normal result or suspended callback
-handle_call_result({suspended, _CallbackId, StateRef, {FuncName, CallArgs}}, Ref, Caller) ->
-    %% Python code called erlang.call() - spawn a process to handle the callback.
-    %% This prevents deadlock when the callback itself calls py:eval, which would
-    %% otherwise block this worker while waiting for another worker (or even this
-    %% same worker via round-robin).
-    spawn_link(fun() ->
-        handle_suspended_callback(StateRef, FuncName, CallArgs, Ref, Caller)
-    end),
-    ok;  %% Don't block the worker - it can process other requests
-handle_call_result(Result, Ref, Caller) ->
-    %% Normal result - send directly to caller
-    send_response(Caller, Ref, Result).
-
-%% Execute a suspended callback and resume Python execution.
-%% This runs in a separate process to avoid blocking the worker.
-handle_suspended_callback(StateRef, FuncName, CallArgs, Ref, Caller) ->
-    %% Convert Args from tuple/list to list
-    ArgsList = case CallArgs of
-        T when is_tuple(T) -> tuple_to_list(T);
-        L when is_list(L) -> L;
-        _ -> [CallArgs]
-    end,
-    %% Execute the registered Erlang function
-    %% This can call py:eval, py:call, etc. without deadlocking
-    CallbackResult = py_callback:execute(FuncName, ArgsList),
-    %% Encode result as binary (status byte + python repr)
-    ResultBinary = case CallbackResult of
-        {ok, Value} ->
-            ValueRepr = term_to_python_repr(Value),
-            <<0, ValueRepr/binary>>;
-        {error, {not_found, Name}} ->
-            ErrMsg = iolist_to_binary(io_lib:format("Function '~s' not registered", [Name])),
-            <<1, ErrMsg/binary>>;
-        {error, {Class, Reason, _Stack}} ->
-            ErrMsg = iolist_to_binary(io_lib:format("~p: ~p", [Class, Reason])),
-            <<1, ErrMsg/binary>>
-    end,
-    %% Resume Python execution with the callback result
-    %% The NIF parses the result using Python's ast.literal_eval and returns
-    %% {ok, ParsedTerm} or {error, Reason}
-    FinalResult = py_nif:resume_callback(StateRef, ResultBinary),
-    %% Handle the final result (could be another suspension for nested callbacks)
-    %% Note: for nested callbacks, this will spawn another process recursively
-    forward_final_result(FinalResult, Ref, Caller).
-
-%% Forward the final result to the caller, handling nested suspensions
-forward_final_result({suspended, _CallbackId, StateRef, {FuncName, CallArgs}}, Ref, Caller) ->
-    %% Another suspension - handle it recursively
-    handle_suspended_callback(StateRef, FuncName, CallArgs, Ref, Caller);
-forward_final_result(Result, Ref, Caller) ->
-    %% Final result - send to the original caller
-    send_response(Caller, Ref, Result).
-
-%%% ============================================================================
-%%% Callback Handling
-%%% ============================================================================
-
-handle_callback(CallbackFd, FuncName, Args) ->
-    %% Convert Args from tuple to list if needed
-    ArgsList = case Args of
-        T when is_tuple(T) -> tuple_to_list(T);
-        L when is_list(L) -> L;
-        _ -> [Args]
-    end,
-    %% Execute the registered function
-    case py_callback:execute(FuncName, ArgsList) of
-        {ok, Result} ->
-            %% Encode result as Python-parseable string
-            %% Format: status_byte (0=ok) + python_repr
-            ResultStr = term_to_python_repr(Result),
-            Response = <<0, ResultStr/binary>>,
-            py_nif:send_callback_response(CallbackFd, Response);
-        {error, {not_found, Name}} ->
-            ErrMsg = iolist_to_binary(io_lib:format("Function '~s' not registered", [Name])),
-            Response = <<1, ErrMsg/binary>>,
-            py_nif:send_callback_response(CallbackFd, Response);
-        {error, {Class, Reason, _Stack}} ->
-            ErrMsg = iolist_to_binary(io_lib:format("~p: ~p", [Class, Reason])),
-            Response = <<1, ErrMsg/binary>>,
-            py_nif:send_callback_response(CallbackFd, Response)
-    end.
-
-%% Convert Erlang term to Python-parseable string representation
-term_to_python_repr(Term) when is_integer(Term) ->
-    integer_to_binary(Term);
-term_to_python_repr(Term) when is_float(Term) ->
-    float_to_binary(Term, [{decimals, 17}, compact]);
-term_to_python_repr(true) ->
-    <<"True">>;
-term_to_python_repr(false) ->
-    <<"False">>;
-term_to_python_repr(none) ->
-    <<"None">>;
-term_to_python_repr(nil) ->
-    <<"None">>;
-term_to_python_repr(undefined) ->
-    <<"None">>;
-term_to_python_repr(Term) when is_atom(Term) ->
-    %% Convert atom to Python string
-    AtomStr = atom_to_binary(Term, utf8),
-    <<"\"", AtomStr/binary, "\"">>;
-term_to_python_repr(Term) when is_binary(Term) ->
-    %% Escape binary as Python string
-    Escaped = escape_string(Term),
-    <<"\"", Escaped/binary, "\"">>;
-term_to_python_repr(Term) when is_list(Term) ->
-    %% Check if it's a string (list of integers)
-    case io_lib:printable_list(Term) of
-        true ->
-            Bin = list_to_binary(Term),
-            Escaped = escape_string(Bin),
-            <<"\"", Escaped/binary, "\"">>;
-        false ->
-            Items = [term_to_python_repr(E) || E <- Term],
-            Joined = join_binaries(Items, <<", ">>),
-            <<"[", Joined/binary, "]">>
-    end;
-term_to_python_repr(Term) when is_tuple(Term) ->
-    Items = [term_to_python_repr(E) || E <- tuple_to_list(Term)],
-    Joined = join_binaries(Items, <<", ">>),
-    case length(Items) of
-        1 -> <<"(", Joined/binary, ",)">>;
-        _ -> <<"(", Joined/binary, ")">>
-    end;
-term_to_python_repr(Term) when is_map(Term) ->
-    Items = maps:fold(fun(K, V, Acc) ->
-        KeyRepr = term_to_python_repr(K),
-        ValRepr = term_to_python_repr(V),
-        [<<KeyRepr/binary, ": ", ValRepr/binary>> | Acc]
-    end, [], Term),
-    Joined = join_binaries(Items, <<", ">>),
-    <<"{", Joined/binary, "}">>;
-term_to_python_repr(_Term) ->
-    %% Fallback - return None for unsupported types
-    <<"None">>.
-
-escape_string(Bin) ->
-    %% Escape special characters for Python string
-    binary:replace(
-        binary:replace(
-            binary:replace(
-                binary:replace(Bin, <<"\\">>, <<"\\\\">>, [global]),
-                <<"\"">>, <<"\\\"">>, [global]),
-            <<"\n">>, <<"\\n">>, [global]),
-        <<"\r">>, <<"\\r">>, [global]).
-
-join_binaries([], _Sep) -> <<>>;
-join_binaries([H], _Sep) -> H;
-join_binaries([H|T], Sep) ->
-    lists:foldl(fun(E, Acc) -> <<Acc/binary, Sep/binary, E/binary>> end, H, T).
-
-%%% ============================================================================
-%%% Internal Functions
-%%% ============================================================================
-
-send_response(Caller, Ref, Result) ->
-    py_util:send_response(Caller, Ref, Result).
-
-to_binary(Term) ->
-    py_util:to_binary(Term).
diff --git a/src/py_worker_sup.erl b/src/py_worker_sup.erl
deleted file mode 100644
index 888579b..0000000
--- a/src/py_worker_sup.erl
+++ /dev/null
@@ -1,47 +0,0 @@
-%% Copyright 2026 Benoit Chesneau
-%%
-%% Licensed under the Apache License, Version 2.0 (the "License");
-%% you may not use this file except in compliance with the License.
-%% You may obtain a copy of the License at
-%%
-%%     http://www.apache.org/licenses/LICENSE-2.0
-%%
-%% Unless required by applicable law or agreed to in writing, software
-%% distributed under the License is distributed on an "AS IS" BASIS,
-%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-%% See the License for the specific language governing permissions and
-%% limitations under the License.
-
-%%% @doc Simple supervisor for Python workers.
-%%% @private
--module(py_worker_sup).
--behaviour(supervisor).
-
--export([
-    start_link/0,
-    start_worker/1
-]).
-
--export([init/1]).
-
-start_link() ->
-    supervisor:start_link(?MODULE, []).
-
-start_worker(Sup) ->
-    {ok, Pid} = supervisor:start_child(Sup, []),
-    Pid.
-
-init([]) ->
-    WorkerSpec = #{
-        id => py_worker,
-        start => {py_worker, start_link, []},
-        restart => temporary,
-        shutdown => 5000,
-        type => worker,
-        modules => [py_worker]
-    },
-
-    {ok, {
-        #{strategy => simple_one_for_one, intensity => 10, period => 60},
-        [WorkerSpec]
-    }}.
diff --git a/test/py_SUITE.erl b/test/py_SUITE.erl
index 9ed99ad..ef124bc 100644
--- a/test/py_SUITE.erl
+++ b/test/py_SUITE.erl
@@ -16,7 +16,7 @@
     test_eval/1,
     test_eval_complex_locals/1,
     test_exec/1,
-    test_async_call/1,
+    test_cast/1,
     test_type_conversions/1,
     test_nested_types/1,
     test_timeout/1,
@@ -66,7 +66,7 @@ all() ->
         test_eval,
         test_eval_complex_locals,
         test_exec,
-        test_async_call,
+        test_cast,
         test_type_conversions,
         test_nested_types,
         test_timeout,
@@ -201,9 +201,9 @@ def my_func():
 ">>),
     ok.
 
-test_async_call(_Config) ->
-    Ref1 = py:call_async(math, sqrt, [100]),
-    Ref2 = py:call_async(math, sqrt, [144]),
+test_cast(_Config) ->
+    Ref1 = py:cast(math, sqrt, [100]),
+    Ref2 = py:cast(math, sqrt, [144]),
 
     {ok, 10.0} = py:await(Ref1),
     {ok, 12.0} = py:await(Ref2),
@@ -269,9 +269,9 @@ test_nested_types(_Config) ->
     ok.
 
 test_timeout(_Config) ->
-    %% Test that timeout works - use a heavy computation
-    %% sum(range(10**8)) will trigger timeout
-    {error, timeout} = py:eval(<<"sum(range(10**8))">>, #{}, 100),
+    %% Test that timeout works - use time.sleep which guarantees delay
+    %% time.sleep(1) will definitely exceed 100ms timeout
+    {error, timeout} = py:eval(<<"__import__('time').sleep(1)">>, #{}, 100),
 
     %% Test that normal operations complete within timeout
     {ok, 45} = py:eval(<<"sum(range(10))">>, #{}, 5000),
@@ -387,23 +387,29 @@ test_version(_Config) ->
     ok.
 
 test_memory_stats(_Config) ->
-    {ok, Stats} = py:memory_stats(),
-    true = is_map(Stats),
-    %% Check that we have GC stats
-    true = maps:is_key(gc_stats, Stats),
-    true = maps:is_key(gc_count, Stats),
-    true = maps:is_key(gc_threshold, Stats),
-
-    %% Test tracemalloc
-    ok = py:tracemalloc_start(),
-    %% Allocate some memory
-    {ok, _} = py:eval(<<"[x**2 for x in range(1000)]">>),
-    {ok, StatsWithTrace} = py:memory_stats(),
-    true = maps:is_key(traced_memory_current, StatsWithTrace),
-    true = maps:is_key(traced_memory_peak, StatsWithTrace),
-    ok = py:tracemalloc_stop(),
+    %% tracemalloc doesn't support subinterpreters
+    case py_nif:subinterp_supported() of
+        true ->
+            {skip, "tracemalloc not supported in subinterpreters"};
+        false ->
+            {ok, Stats} = py:memory_stats(),
+            true = is_map(Stats),
+            %% Check that we have GC stats
+            true = maps:is_key(gc_stats, Stats),
+            true = maps:is_key(gc_count, Stats),
+            true = maps:is_key(gc_threshold, Stats),
+
+            %% Test tracemalloc
+            ok = py:tracemalloc_start(),
+            %% Allocate some memory
+            {ok, _} = py:eval(<<"[x**2 for x in range(1000)]">>),
+            {ok, StatsWithTrace} = py:memory_stats(),
+            true = maps:is_key(traced_memory_current, StatsWithTrace),
+            true = maps:is_key(traced_memory_peak, StatsWithTrace),
+            ok = py:tracemalloc_stop(),
 
-    ok.
+            ok
+    end.
 
 test_gc(_Config) ->
     %% Test basic GC
@@ -912,25 +918,31 @@ assert val == 2, f'Expected 2, got {val}'
 
 %% Test module reload across all workers
 test_reload(_Config) ->
-    %% First, ensure json module is imported in at least one worker
-    {ok, _} = py:call(json, dumps, [[1, 2, 3]]),
+    %% Module reload can trigger imports that don't support subinterpreters
+    case py_nif:subinterp_supported() of
+        true ->
+            {skip, "module reload may use modules not supported in subinterpreters"};
+        false ->
+            %% First, ensure json module is imported in at least one worker
+            {ok, _} = py:call(json, dumps, [[1, 2, 3]]),
 
-    %% Now reload it - should succeed across all workers
-    ok = py:reload(json),
+            %% Now reload it - should succeed across all workers
+            ok = py:reload(json),
 
-    %% Verify the module still works after reload
-    {ok, <<"[1, 2, 3]">>} = py:call(json, dumps, [[1, 2, 3]]),
+            %% Verify the module still works after reload
+            {ok, <<"[1, 2, 3]">>} = py:call(json, dumps, [[1, 2, 3]]),
 
-    %% Test reload of a module that might not be loaded (should not error)
-    ok = py:reload(collections),
+            %% Test reload of a module that might not be loaded (should not error)
+            ok = py:reload(collections),
 
-    %% Test reload with binary module name
-    ok = py:reload(<<"os">>),
+            %% Test reload with binary module name
+            ok = py:reload(<<"os">>),
 
-    %% Test reload with string module name
-    ok = py:reload("sys"),
+            %% Test reload with string module name
+            ok = py:reload("sys"),
 
-    ok.
+            ok
+    end.
 
 %%% ============================================================================
 %%% ASGI Optimization Tests
diff --git a/test/py_api_SUITE.erl b/test/py_api_SUITE.erl
new file mode 100644
index 0000000..ad46953
--- /dev/null
+++ b/test/py_api_SUITE.erl
@@ -0,0 +1,225 @@
+%%% @doc Common Test suite for py module's new process-per-context API.
+%%%
+%%% Tests the new explicit context API where py:call/eval/exec can take
+%%% a context pid as the first argument.
+-module(py_api_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+
+-export([
+    all/0,
+    init_per_suite/1,
+    end_per_suite/1,
+    init_per_testcase/2,
+    end_per_testcase/2
+]).
+
+-export([
+    %% Existing API compatibility
+    test_call_basic/1,
+    test_call_with_kwargs/1,
+    test_eval_basic/1,
+    test_exec_basic/1,
+    %% New explicit context API
+    test_explicit_context_call/1,
+    test_explicit_context_eval/1,
+    test_explicit_context_exec/1,
+    test_explicit_context_isolation/1,
+    %% Context management API
+    test_context_management/1,
+    test_start_stop_contexts/1,
+    %% Mixed usage
+    test_mixed_api_usage/1
+]).
+
+%% ============================================================================
+%% Common Test callbacks
+%% ============================================================================
+
+all() ->
+    [
+        %% Existing API compatibility
+        test_call_basic,
+        test_call_with_kwargs,
+        test_eval_basic,
+        test_exec_basic,
+        %% New explicit context API
+        test_explicit_context_call,
+        test_explicit_context_eval,
+        test_explicit_context_exec,
+        test_explicit_context_isolation,
+        %% Context management API
+        test_context_management,
+        test_start_stop_contexts,
+        %% Mixed usage
+        test_mixed_api_usage
+    ].
+
+init_per_suite(Config) ->
+    application:ensure_all_started(erlang_python),
+    Config.
+
+end_per_suite(_Config) ->
+    ok.
+
+init_per_testcase(_TestCase, Config) ->
+    %% Ensure fresh contexts are available for each test
+    catch py:stop_contexts(),
+    {ok, _} = py:start_contexts(),
+    Config.
+
+end_per_testcase(_TestCase, _Config) ->
+    %% Clean up after each test
+    catch py:stop_contexts(),
+    ok.
+
+%% ============================================================================
+%% Existing API Compatibility Tests
+%% ============================================================================
+
+%% @doc Test that basic py:call still works (backwards compatibility).
+test_call_basic(_Config) ->
+    {ok, 4.0} = py:call(math, sqrt, [16]),
+    {ok, 5.0} = py:call(math, sqrt, [25]),
+    {ok, 3} = py:call(builtins, len, [[1, 2, 3]]).
+
+%% @doc Test that py:call with kwargs still works.
+test_call_with_kwargs(_Config) ->
+    {ok, _} = py:call(json, dumps, [[{a, 1}]], #{indent => 2}),
+    {ok, _} = py:call(json, dumps, [#{a => 1, b => 2}], #{sort_keys => true}).
+
+%% @doc Test that py:eval still works.
+test_eval_basic(_Config) ->
+    {ok, 6} = py:eval(<<"2 + 4">>),
+    {ok, 15} = py:eval(<<"x * 3">>, #{x => 5}),
+    {ok, [1, 4, 9]} = py:eval(<<"[i*i for i in range(1, 4)]">>).
+
+%% @doc Test that py:exec still works.
+test_exec_basic(_Config) ->
+    ok = py:exec(<<"import math">>).
+
+%% ============================================================================
+%% New Explicit Context API Tests
+%% ============================================================================
+
+%% @doc Test py:call with explicit context pid.
+test_explicit_context_call(_Config) ->
+    {ok, _} = py:start_contexts(#{contexts => 2}),
+
+    Ctx = py:context(),
+    true = is_pid(Ctx),
+
+    %% Call with context as first argument
+    {ok, 4.0} = py:call(Ctx, math, sqrt, [16]),
+    {ok, 5.0} = py:call(Ctx, math, sqrt, [25]),
+
+    %% Call with options
+    {ok, _} = py:call(Ctx, json, dumps, [[{a, 1}]], #{kwargs => #{indent => 2}}).
+
+%% @doc Test py:eval with explicit context pid.
+test_explicit_context_eval(_Config) ->
+    {ok, _} = py:start_contexts(#{contexts => 2}),
+
+    Ctx = py:context(),
+
+    %% Eval with context as first argument
+    {ok, 6} = py:eval(Ctx, <<"2 + 4">>),
+
+    %% Eval with context and locals
+    {ok, 15} = py:eval(Ctx, <<"x * 3">>, #{x => 5}).
+
+%% @doc Test py:exec with explicit context pid.
+test_explicit_context_exec(_Config) ->
+    {ok, _} = py:start_contexts(#{contexts => 2}),
+
+    Ctx = py:context(),
+
+    %% Exec with context as first argument
+    ok = py:exec(Ctx, <<"test_var = 42">>),
+
+    %% Verify the variable persists in that context
+    {ok, 42} = py:eval(Ctx, <<"test_var">>, #{}).
+
+%% @doc Test that different contexts are isolated.
+test_explicit_context_isolation(_Config) ->
+    {ok, _} = py:start_contexts(#{contexts => 2}),
+
+    Ctx1 = py:context(1),
+    Ctx2 = py:context(2),
+
+    %% Set different values in each context
+    ok = py:exec(Ctx1, <<"isolation_test = 'context1'">>),
+    ok = py:exec(Ctx2, <<"isolation_test = 'context2'">>),
+
+    %% Verify isolation
+    {ok, <<"context1">>} = py:eval(Ctx1, <<"isolation_test">>, #{}),
+    {ok, <<"context2">>} = py:eval(Ctx2, <<"isolation_test">>, #{}).
+
+%% ============================================================================
+%% Context Management API Tests
+%% ============================================================================
+
+%% @doc Test py:context/0 and py:context/1.
+test_context_management(_Config) ->
+    %% Stop any existing contexts first, then start with specific count
+    py:stop_contexts(),
+    {ok, Contexts} = py:start_contexts(#{contexts => 4}),
+    4 = length(Contexts),
+
+    %% Get context for current scheduler
+    Ctx = py:context(),
+    true = is_pid(Ctx),
+    true = lists:member(Ctx, Contexts),
+
+    %% Get specific contexts by index
+    Ctx1 = py:context(1),
+    Ctx2 = py:context(2),
+    Ctx3 = py:context(3),
+    Ctx4 = py:context(4),
+
+    [Ctx1, Ctx2, Ctx3, Ctx4] = Contexts.
+
+%% @doc Test py:start_contexts/0,1 and py:stop_contexts/0.
+test_start_stop_contexts(_Config) ->
+    %% Start with default settings
+    {ok, Contexts1} = py:start_contexts(),
+    true = length(Contexts1) > 0,
+
+    %% All contexts should be alive
+    lists:foreach(fun(Ctx) ->
+        true = is_process_alive(Ctx)
+    end, Contexts1),
+
+    %% Stop
+    ok = py:stop_contexts(),
+
+    %% Wait for contexts to die
+    timer:sleep(100),
+    lists:foreach(fun(Ctx) ->
+        false = is_process_alive(Ctx)
+    end, Contexts1),
+
+    %% Start with custom settings
+    {ok, Contexts2} = py:start_contexts(#{contexts => 2}),
+    2 = length(Contexts2),
+
+    ok = py:stop_contexts().
+
+%% ============================================================================
+%% Mixed Usage Tests
+%% ============================================================================
+
+%% @doc Test implicit and explicit context API usage.
+test_mixed_api_usage(_Config) ->
+    {ok, _} = py:start_contexts(#{contexts => 2}),
+
+    %% Use implicit API (auto-routes through py_context_router)
+    {ok, 4.0} = py:call(math, sqrt, [16]),
+
+    %% Use explicit API (direct context pid)
+    Ctx = py:context(),
+    {ok, 5.0} = py:call(Ctx, math, sqrt, [25]),
+
+    %% Both should work correctly
+    {ok, 6} = py:eval(<<"2 + 4">>),
+    {ok, 7} = py:eval(Ctx, <<"3 + 4">>, #{}).
diff --git a/test/py_async_e2e_SUITE.erl b/test/py_async_e2e_SUITE.erl
index 93dd161..5ea13e8 100644
--- a/test/py_async_e2e_SUITE.erl
+++ b/test/py_async_e2e_SUITE.erl
@@ -28,6 +28,8 @@ all() ->
 
 init_per_suite(Config) ->
     {ok, _} = application:ensure_all_started(erlang_python),
+    %% Ensure contexts are running
+    {ok, _} = py:start_contexts(),
     Config.
 
 end_per_suite(_Config) ->
@@ -35,11 +37,9 @@ end_per_suite(_Config) ->
     ok.
 
 init_per_testcase(_TestCase, Config) ->
-    py:unbind(),
     Config.
 
 end_per_testcase(_TestCase, _Config) ->
-    py:unbind(),
     ok.
 
 %% ============================================================================
diff --git a/test/py_asyncio_compat_SUITE.erl b/test/py_asyncio_compat_SUITE.erl
new file mode 100644
index 0000000..8a6751b
--- /dev/null
+++ b/test/py_asyncio_compat_SUITE.erl
@@ -0,0 +1,312 @@
+%%% @doc Common Test suite for asyncio compatibility validation.
+%%%
+%%% This suite runs Python unittest tests that verify ErlangEventLoop
+%%% is a full drop-in replacement for asyncio's default event loop.
+%%%
+%%% The tests run within the Erlang VM via py:call() to validate the
+%%% complete integration path:
+%%%   Python test -> ErlangEventLoop -> py_event_loop NIF -> BEAM scheduler
+%%%
+%%% Architecture:
+%%%   py:call() invokes async_test_runner.run_tests()
+%%%       │
+%%%       └─→ erlang.run(run_all())
+%%%               │
+%%%               └─→ ErlangEventLoop._run_once()
+%%%                       ├─ Polls Erlang scheduler
+%%%                       └─ Dispatches timer callbacks
+%%%
+%%% The key insight is that the async test runner uses erlang.run() to
+%%% properly integrate with Erlang's timer system. This allows timers
+%%% scheduled via call_later() to fire correctly, unlike the synchronous
+%%% unittest runner which would block the event loop.
+%%%
+%%% Tests are adapted from uvloop's test suite and run against both:
+%%% - ErlangEventLoop (Erlang-backed asyncio event loop)
+%%% - AIOTestCase (standard asyncio for comparison)
+-module(py_asyncio_compat_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+
+-export([
+    all/0,
+    groups/0,
+    init_per_suite/1,
+    end_per_suite/1,
+    init_per_group/2,
+    end_per_group/2,
+    init_per_testcase/2,
+    end_per_testcase/2
+]).
+
+%% Erlang tests (ErlangEventLoop)
+-export([
+    test_base_erlang/1,
+    test_sockets_erlang/1,
+    test_tcp_erlang/1,
+    test_udp_erlang/1,
+    test_unix_erlang/1,
+    test_dns_erlang/1,
+    test_executors_erlang/1,
+    test_context_erlang/1,
+    test_process_erlang/1,
+    test_erlang_api/1
+]).
+
+%% Asyncio comparison tests (standard asyncio)
+-export([
+    test_base_asyncio/1,
+    test_sockets_asyncio/1,
+    test_tcp_asyncio/1,
+    test_udp_asyncio/1,
+    test_unix_asyncio/1,
+    test_dns_asyncio/1,
+    test_executors_asyncio/1,
+    test_context_asyncio/1
+]).
+
+%% ============================================================================
+%% CT Callbacks
+%% ============================================================================
+
+all() ->
+    [{group, erlang_tests}, {group, comparison_tests}].
+
+groups() ->
+    [
+        {erlang_tests, [sequence], [
+            test_base_erlang,
+            test_sockets_erlang,
+            test_tcp_erlang,
+            test_udp_erlang,
+            test_unix_erlang,
+            test_dns_erlang,
+            test_executors_erlang,
+            test_context_erlang,
+            test_process_erlang,
+            test_erlang_api
+        ]},
+        {comparison_tests, [sequence], [
+            test_base_asyncio,
+            test_sockets_asyncio,
+            test_tcp_asyncio,
+            test_udp_asyncio,
+            test_unix_asyncio,
+            test_dns_asyncio,
+            test_executors_asyncio,
+            test_context_asyncio
+        ]}
+    ].
+
+init_per_suite(Config) ->
+    case application:ensure_all_started(erlang_python) of
+        {ok, _} ->
+            {ok, _} = py:start_contexts(),
+            %% Wait for event loop to be fully initialized
+            case wait_for_event_loop(5000) of
+                ok ->
+                    %% Set up Python path for tests
+                    PrivDir = code:priv_dir(erlang_python),
+                    ok = setup_python_path(PrivDir),
+                    [{priv_dir, PrivDir} | Config];
+                {error, Reason} ->
+                    ct:fail({event_loop_not_ready, Reason})
+            end;
+        {error, {App, Reason}} ->
+            ct:fail({failed_to_start, App, Reason})
+    end.
+
+end_per_suite(_Config) ->
+    ok = application:stop(erlang_python),
+    ok.
+
+init_per_group(_GroupName, Config) ->
+    Config.
+
+end_per_group(_GroupName, _Config) ->
+    ok.
+
+init_per_testcase(_TestCase, Config) ->
+    Config.
+
+end_per_testcase(_TestCase, _Config) ->
+    ok.
+
+%% ============================================================================
+%% Erlang Event Loop Tests
+%% ============================================================================
+
+test_base_erlang(Config) ->
+    run_erlang_tests("tests.test_base", Config).
+
+test_sockets_erlang(Config) ->
+    run_erlang_tests("tests.test_sockets", Config).
+
+test_tcp_erlang(Config) ->
+    run_erlang_tests("tests.test_tcp", Config).
+
+test_udp_erlang(Config) ->
+    run_erlang_tests("tests.test_udp", Config).
+
+test_unix_erlang(Config) ->
+    case os:type() of
+        {unix, _} ->
+            run_erlang_tests("tests.test_unix", Config);
+        _ ->
+            {skip, "Unix sockets not available on this platform"}
+    end.
+
+test_dns_erlang(Config) ->
+    run_erlang_tests("tests.test_dns", Config).
+
+test_executors_erlang(Config) ->
+    run_erlang_tests("tests.test_executors", Config).
+
+test_context_erlang(Config) ->
+    run_erlang_tests("tests.test_context", Config).
+
+test_process_erlang(Config) ->
+    run_erlang_tests("tests.test_process", Config).
+
+test_erlang_api(Config) ->
+    %% test_erlang_api has only Erlang-specific tests, run all
+    run_python_tests("tests.test_erlang_api", <<"*">>, Config).
+
+%% ============================================================================
+%% Asyncio Comparison Tests (standard asyncio)
+%% ============================================================================
+
+test_base_asyncio(Config) ->
+    run_asyncio_tests("tests.test_base", Config).
+
+test_sockets_asyncio(Config) ->
+    run_asyncio_tests("tests.test_sockets", Config).
+
+test_tcp_asyncio(Config) ->
+    run_asyncio_tests("tests.test_tcp", Config).
+
+test_udp_asyncio(Config) ->
+    run_asyncio_tests("tests.test_udp", Config).
+
+test_unix_asyncio(Config) ->
+    case os:type() of
+        {unix, _} ->
+            run_asyncio_tests("tests.test_unix", Config);
+        _ ->
+            {skip, "Unix sockets not available on this platform"}
+    end.
+
+test_dns_asyncio(Config) ->
+    run_asyncio_tests("tests.test_dns", Config).
+
+test_executors_asyncio(Config) ->
+    run_asyncio_tests("tests.test_executors", Config).
+
+test_context_asyncio(Config) ->
+    run_asyncio_tests("tests.test_context", Config).
+
+%% ============================================================================
+%% Internal Functions
+%% ============================================================================
+
+%% Wait for the event loop to be fully initialized
+wait_for_event_loop(Timeout) when Timeout =< 0 ->
+    {error, timeout};
+wait_for_event_loop(Timeout) ->
+    case py_event_loop:get_loop() of
+        {ok, LoopRef} when is_reference(LoopRef) ->
+            %% Verify the event loop is actually functional
+            case py_nif:event_loop_new() of
+                {ok, TestLoop} ->
+                    py_nif:event_loop_destroy(TestLoop),
+                    ok;
+                _ ->
+                    timer:sleep(100),
+                    wait_for_event_loop(Timeout - 100)
+            end;
+        _ ->
+            timer:sleep(100),
+            wait_for_event_loop(Timeout - 100)
+    end.
+
+%% Set up Python path for test discovery
+setup_python_path(PrivDir) ->
+    Ctx = py:context(1),
+    PrivDirBin = to_binary(PrivDir),
+    %% Add priv directory to Python path and change working directory
+    %% Use exec with inline Python code for path manipulation
+    ok = py:exec(Ctx, <<"import sys, os">>),
+    {ok, _} = py:call(Ctx, os, chdir, [PrivDirBin]),
+    %% Insert the priv directory at the front of sys.path if not present
+    Code = <<"sys.path.insert(0, '", PrivDirBin/binary, "') if '",
+             PrivDirBin/binary, "' not in sys.path else None">>,
+    {ok, _} = py:eval(Ctx, Code),
+    ok.
+
+%% Run Erlang event loop tests (TestErlang* classes)
+run_erlang_tests(Module, Config) ->
+    run_python_tests(Module, <<"TestErlang*">>, Config).
+
+%% Run asyncio comparison tests (TestAIO* classes)
+run_asyncio_tests(Module, Config) ->
+    run_python_tests(Module, <<"TestAIO*">>, Config).
+
+%% Run Python unittest tests using the async_test_runner module
+%% The async runner uses erlang.run() to properly integrate with
+%% Erlang's timer system, allowing timers scheduled via call_later()
+%% to fire correctly during test execution.
+run_python_tests(Module, Pattern, _Config) ->
+    Ctx = py:context(1),
+    ModuleBin = to_binary(Module),
+    %% Per-test timeout in seconds (30 seconds per individual test)
+    PerTestTimeout = 30.0,
+
+    %% Use 10 minute timeout for overall test execution
+    case py:call(Ctx, 'tests.async_test_runner', run_tests, [ModuleBin, Pattern, PerTestTimeout], #{timeout => 600000}) of
+        {ok, Results} ->
+            handle_test_results(Module, Pattern, Results);
+        {error, Reason} ->
+            ct:log("Python execution error for ~s (~s): ~p", [Module, Pattern, Reason]),
+            ct:fail({python_error, Module, Reason})
+    end.
+
+%% Handle Python test results
+handle_test_results(Module, Pattern, Results) ->
+    TestsRun = maps:get(<<"tests_run">>, Results, 0),
+    Failures = maps:get(<<"failures">>, Results, 0),
+    Errors = maps:get(<<"errors">>, Results, 0),
+    Skipped = maps:get(<<"skipped">>, Results, 0),
+    Success = maps:get(<<"success">>, Results, false),
+    Output = maps:get(<<"output">>, Results, <<>>),
+    FailureDetails = maps:get(<<"failure_details">>, Results, []),
+
+    %% Log the test output
+    ct:log("~s (~s): ~p tests run, ~p failures, ~p errors, ~p skipped~n~n~s",
+           [Module, Pattern, TestsRun, Failures, Errors, Skipped, Output]),
+
+    case Success of
+        true ->
+            ct:log("~s (~s): All ~p tests passed", [Module, Pattern, TestsRun]),
+            ok;
+        false ->
+            %% Log detailed failure information
+            lists:foreach(
+                fun(Detail) ->
+                    Test = maps:get(<<"test">>, Detail, <<"unknown">>),
+                    Trace = maps:get(<<"traceback">>, Detail, <<>>),
+                    ct:log("FAILED: ~s~n~s", [Test, Trace])
+                end,
+                FailureDetails
+            ),
+            ct:fail({tests_failed, Module, Pattern, #{
+                tests_run => TestsRun,
+                failures => Failures,
+                errors => Errors,
+                skipped => Skipped
+            }})
+    end.
+
+%% Convert to binary
+to_binary(B) when is_binary(B) -> B;
+to_binary(L) when is_list(L) -> list_to_binary(L);
+to_binary(A) when is_atom(A) -> atom_to_binary(A, utf8).
diff --git a/test/py_context_SUITE.erl b/test/py_context_SUITE.erl
index 40b1d9d..0bec1a4 100644
--- a/test/py_context_SUITE.erl
+++ b/test/py_context_SUITE.erl
@@ -1,4 +1,7 @@
-%%% @doc Common Test suite for py context affinity.
+%%% @doc Common Test suite for py context API.
+%%%
+%%% Tests the explicit context API where py:call/eval/exec can take
+%%% a context pid as the first argument.
 -module(py_context_SUITE).
 
 -include_lib("common_test/include/ct.hrl").
@@ -12,33 +15,32 @@
 ]).
 
 -export([
-    bind_unbind_test/1,
-    bind_persists_state_test/1,
-    explicit_context_test/1,
-    with_context_implicit_test/1,
-    with_context_explicit_test/1,
-    automatic_cleanup_test/1,
-    multiple_contexts_isolated_test/1,
-    double_bind_idempotent_test/1,
-    unbind_without_bind_test/1,
-    context_call_test/1
+    get_context_test/1,
+    get_specific_context_test/1,
+    state_persists_in_context_test/1,
+    contexts_are_isolated_test/1,
+    explicit_context_call_test/1,
+    explicit_context_eval_test/1,
+    explicit_context_exec_test/1,
+    implicit_routing_test/1,
+    scheduler_affinity_test/1
 ]).
 
 all() -> [
-    bind_unbind_test,
-    bind_persists_state_test,
-    explicit_context_test,
-    with_context_implicit_test,
-    with_context_explicit_test,
-    automatic_cleanup_test,
-    multiple_contexts_isolated_test,
-    double_bind_idempotent_test,
-    unbind_without_bind_test,
-    context_call_test
+    get_context_test,
+    get_specific_context_test,
+    state_persists_in_context_test,
+    contexts_are_isolated_test,
+    explicit_context_call_test,
+    explicit_context_eval_test,
+    explicit_context_exec_test,
+    implicit_routing_test,
+    scheduler_affinity_test
 ].
 
 init_per_suite(Config) ->
     {ok, _} = application:ensure_all_started(erlang_python),
+    {ok, _} = py:start_contexts(),
     Config.
 
 end_per_suite(_Config) ->
@@ -46,125 +48,78 @@ end_per_suite(_Config) ->
     ok.
 
 init_per_testcase(_TestCase, Config) ->
-    %% Ensure the test process is not bound at the start of each test
-    py:unbind(),
     Config.
 
 end_per_testcase(_TestCase, _Config) ->
-    %% Clean up any bindings after each test
-    py:unbind(),
     ok.
 
 %%% ============================================================================
 %%% Test Cases
 %%% ============================================================================
 
-%% @doc Test basic bind/unbind functionality
-bind_unbind_test(_Config) ->
-    false = py:is_bound(),
-    ok = py:bind(),
-    true = py:is_bound(),
-    ok = py:unbind(),
-    false = py:is_bound().
-
-%% @doc Test that state persists across calls when bound
-bind_persists_state_test(_Config) ->
-    ok = py:bind(),
-    ok = py:exec(<<"test_var = 42">>),
-    {ok, 42} = py:eval(<<"test_var">>),
-    ok = py:exec(<<"test_var += 1">>),
-    {ok, 43} = py:eval(<<"test_var">>),
-    ok = py:unbind().
-
-%% @doc Test explicit context creation and usage
-explicit_context_test(_Config) ->
-    {ok, Ctx} = py:bind(new),
-    ok = py:ctx_exec(Ctx, <<"ctx_var = 'hello'">>),
-    {ok, <<"hello">>} = py:ctx_eval(Ctx, <<"ctx_var">>),
-    ok = py:unbind(Ctx).
-
-%% @doc Test with_context with implicit (arity-0) function
-with_context_implicit_test(_Config) ->
-    Result = py:with_context(fun() ->
-        ok = py:exec(<<"x = 10">>),
-        ok = py:exec(<<"x *= 2">>),
-        py:eval(<<"x">>)
-    end),
-    {ok, 20} = Result,
-    false = py:is_bound().
-
-%% @doc Test with_context with explicit (arity-1) function
-with_context_explicit_test(_Config) ->
-    Result = py:with_context(fun(Ctx) ->
-        ok = py:ctx_exec(Ctx, <<"y = 5">>),
-        py:ctx_eval(Ctx, <<"y * 3">>)
-    end),
-    {ok, 15} = Result.
-
-%% @doc Test automatic cleanup when bound process dies
-automatic_cleanup_test(_Config) ->
-    Parent = self(),
-    Stats1 = py_pool:get_stats(),
-    Avail1 = maps:get(available_workers, Stats1),
-
-    Pid = spawn(fun() ->
-        ok = py:bind(),
-        Parent ! bound,
-        receive stop -> ok end
-    end),
-    receive bound -> ok end,
-
-    %% Worker should be checked out
-    Stats2 = py_pool:get_stats(),
-    Avail2 = maps:get(available_workers, Stats2),
-    true = Avail2 < Avail1,
-
-    %% Kill the process
-    exit(Pid, kill),
-    timer:sleep(50),
-
-    %% Worker should be returned to pool
-    Stats3 = py_pool:get_stats(),
-    Avail3 = maps:get(available_workers, Stats3),
-    Avail1 = Avail3.
-
-%% @doc Test that multiple explicit contexts are isolated
-multiple_contexts_isolated_test(_Config) ->
-    {ok, Ctx1} = py:bind(new),
-    {ok, Ctx2} = py:bind(new),
-
-    ok = py:ctx_exec(Ctx1, <<"shared_name = 1">>),
-    ok = py:ctx_exec(Ctx2, <<"shared_name = 2">>),
+%% @doc Test py:context/0 returns a valid context pid
+get_context_test(_Config) ->
+    Ctx = py:context(),
+    true = is_pid(Ctx),
+    true = is_process_alive(Ctx).
+
+%% @doc Test py:context/1 returns specific contexts
+get_specific_context_test(_Config) ->
+    Ctx1 = py:context(1),
+    Ctx2 = py:context(2),
+    true = is_pid(Ctx1),
+    true = is_pid(Ctx2),
+    %% Different indices should give different contexts
+    true = Ctx1 =/= Ctx2.
+
+%% @doc Test that state persists within a context
+state_persists_in_context_test(_Config) ->
+    Ctx = py:context(1),
+    ok = py:exec(Ctx, <<"test_var = 42">>),
+    {ok, 42} = py:eval(Ctx, <<"test_var">>),
+    ok = py:exec(Ctx, <<"test_var += 1">>),
+    {ok, 43} = py:eval(Ctx, <<"test_var">>).
+
+%% @doc Test that different contexts are isolated
+contexts_are_isolated_test(_Config) ->
+    Ctx1 = py:context(1),
+    Ctx2 = py:context(2),
+
+    ok = py:exec(Ctx1, <<"isolation_var = 'context1'">>),
+    ok = py:exec(Ctx2, <<"isolation_var = 'context2'">>),
 
     %% Each context should have its own value
-    {ok, 1} = py:ctx_eval(Ctx1, <<"shared_name">>),
-    {ok, 2} = py:ctx_eval(Ctx2, <<"shared_name">>),
-
-    ok = py:unbind(Ctx1),
-    ok = py:unbind(Ctx2).
-
-%% @doc Test that double bind is idempotent
-double_bind_idempotent_test(_Config) ->
-    ok = py:bind(),
-    ok = py:bind(),  % Should be idempotent
-    true = py:is_bound(),
-    ok = py:unbind(),
-    false = py:is_bound().
-
-%% @doc Test that unbind without bind is safe (idempotent)
-unbind_without_bind_test(_Config) ->
-    false = py:is_bound(),
-    ok = py:unbind(),  % Should be safe even without prior bind
-    false = py:is_bound().
-
-%% @doc Test py:ctx_call with explicit context
-context_call_test(_Config) ->
-    {ok, Ctx} = py:bind(new),
-
-    %% Import a module in this context
-    ok = py:ctx_exec(Ctx, <<"import json">>),
-
-    %% Use the imported module via ctx_call
-    {ok, <<"{\"a\": 1}">>} = py:ctx_call(Ctx, json, dumps, [#{a => 1}]),
-
-    ok = py:unbind(Ctx).
+    {ok, <<"context1">>} = py:eval(Ctx1, <<"isolation_var">>),
+    {ok, <<"context2">>} = py:eval(Ctx2, <<"isolation_var">>).
+
+%% @doc Test py:call with explicit context
+explicit_context_call_test(_Config) ->
+    Ctx = py:context(1),
+    {ok, 4.0} = py:call(Ctx, math, sqrt, [16]),
+    {ok, 5.0} = py:call(Ctx, math, sqrt, [25]).
+
+%% @doc Test py:eval with explicit context
+explicit_context_eval_test(_Config) ->
+    Ctx = py:context(1),
+    {ok, 6} = py:eval(Ctx, <<"2 + 4">>),
+    {ok, 15} = py:eval(Ctx, <<"x * 3">>, #{x => 5}).
+
+%% @doc Test py:exec with explicit context
+explicit_context_exec_test(_Config) ->
+    Ctx = py:context(1),
+    ok = py:exec(Ctx, <<"exec_test = 123">>),
+    {ok, 123} = py:eval(Ctx, <<"exec_test">>).
+
+%% @doc Test implicit routing (without explicit context)
+implicit_routing_test(_Config) ->
+    %% These should work via scheduler affinity
+    {ok, 4.0} = py:call(math, sqrt, [16]),
+    {ok, 6} = py:eval(<<"2 + 4">>),
+    ok = py:exec(<<"implicit_var = 1">>).
+
+%% @doc Test that same process gets same context (scheduler affinity)
+scheduler_affinity_test(_Config) ->
+    Ctx1 = py:context(),
+    Ctx2 = py:context(),
+    %% Same process should get same context
+    Ctx1 = Ctx2.
diff --git a/test/py_context_process_SUITE.erl b/test/py_context_process_SUITE.erl
new file mode 100644
index 0000000..774fcd2
--- /dev/null
+++ b/test/py_context_process_SUITE.erl
@@ -0,0 +1,357 @@
+%%% @doc Common Test suite for py_context process-per-context module.
+%%%
+%%% Tests the process-per-context architecture where each Erlang process
+%%% owns a Python context (subinterpreter or worker).
+-module(py_context_process_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+
+-export([
+    all/0,
+    groups/0,
+    init_per_suite/1,
+    end_per_suite/1,
+    init_per_group/2,
+    end_per_group/2
+]).
+
+-export([
+    test_context_start_stop/1,
+    test_context_call/1,
+    test_context_eval/1,
+    test_context_exec/1,
+    test_context_isolation/1,
+    test_context_module_caching/1,
+    test_context_under_supervisor/1,
+    test_multiple_contexts/1,
+    test_context_parallel_calls/1,
+    test_context_timeout/1,
+    test_context_error_handling/1,
+    test_context_type_conversions/1
+]).
+
+%% ============================================================================
+%% Common Test callbacks
+%% ============================================================================
+
+all() ->
+    [
+        {group, worker_mode},
+        {group, subinterp_mode}
+    ].
+
+groups() ->
+    Tests = [
+        test_context_start_stop,
+        test_context_call,
+        test_context_eval,
+        test_context_exec,
+        test_context_isolation,
+        test_context_module_caching,
+        test_context_under_supervisor,
+        test_multiple_contexts,
+        test_context_parallel_calls,
+        test_context_timeout,
+        test_context_error_handling,
+        test_context_type_conversions
+    ],
+    [
+        {worker_mode, [sequence], Tests},
+        {subinterp_mode, [sequence], Tests}
+    ].
+
+init_per_suite(Config) ->
+    %% Ensure the application is started
+    application:ensure_all_started(erlang_python),
+    Config.
+
+end_per_suite(_Config) ->
+    ok.
+
+init_per_group(worker_mode, Config) ->
+    [{context_mode, worker} | Config];
+init_per_group(subinterp_mode, Config) ->
+    case py_nif:subinterp_supported() of
+        true ->
+            [{context_mode, subinterp} | Config];
+        false ->
+            {skip, "Subinterpreters not supported (requires Python 3.12+)"}
+    end.
+
+end_per_group(_Group, _Config) ->
+    ok.
+
+%% ============================================================================
+%% Test cases
+%% ============================================================================
+
+%% @doc Test that a context can be started and stopped.
+test_context_start_stop(Config) ->
+    Mode = ?config(context_mode, Config),
+    {ok, Ctx} = py_context:start_link(1, Mode),
+    true = is_process_alive(Ctx),
+    ok = py_context:stop(Ctx),
+    timer:sleep(50),
+    false = is_process_alive(Ctx).
+
+%% @doc Test basic Python function calls.
+test_context_call(Config) ->
+    Mode = ?config(context_mode, Config),
+    {ok, Ctx} = py_context:start_link(1, Mode),
+    try
+        %% Test math.sqrt
+        {ok, 4.0} = py_context:call(Ctx, math, sqrt, [16], #{}),
+
+        %% Test with kwargs
+        {ok, _} = py_context:call(Ctx, json, dumps, [[{<<"a">>, 1}]], #{indent => 2}),
+
+        %% Test len function
+        {ok, 3} = py_context:call(Ctx, builtins, len, [[1, 2, 3]], #{})
+    after
+        py_context:stop(Ctx)
+    end.
+
+%% @doc Test Python expression evaluation.
+test_context_eval(Config) ->
+    Mode = ?config(context_mode, Config),
+    {ok, Ctx} = py_context:start_link(1, Mode),
+    try
+        %% Simple arithmetic
+        {ok, 6} = py_context:eval(Ctx, <<"2 + 4">>, #{}),
+
+        %% With locals
+        {ok, 15} = py_context:eval(Ctx, <<"x * 3">>, #{x => 5}),
+
+        %% List comprehension
+        {ok, [1, 4, 9, 16]} = py_context:eval(Ctx, <<"[i*i for i in range(1, 5)]">>, #{})
+    after
+        py_context:stop(Ctx)
+    end.
+
+%% @doc Test Python statement execution.
+test_context_exec(Config) ->
+    Mode = ?config(context_mode, Config),
+    {ok, Ctx} = py_context:start_link(1, Mode),
+    try
+        %% Execute statements
+        ok = py_context:exec(Ctx, <<"x = 42">>),
+        {ok, 42} = py_context:eval(Ctx, <<"x">>, #{}),
+
+        %% Define a function
+        ok = py_context:exec(Ctx, <<"def add(a, b): return a + b">>),
+        {ok, 7} = py_context:eval(Ctx, <<"add(3, 4)">>, #{})
+    after
+        py_context:stop(Ctx)
+    end.
+
+%% @doc Test that contexts are isolated from each other.
+test_context_isolation(Config) ->
+    Mode = ?config(context_mode, Config),
+    {ok, Ctx1} = py_context:start_link(1, Mode),
+    {ok, Ctx2} = py_context:start_link(2, Mode),
+    try
+        %% Set different values in each context
+        ok = py_context:exec(Ctx1, <<"isolation_var = 'ctx1'">>),
+        ok = py_context:exec(Ctx2, <<"isolation_var = 'ctx2'">>),
+
+        %% Verify isolation
+        {ok, <<"ctx1">>} = py_context:eval(Ctx1, <<"isolation_var">>, #{}),
+        {ok, <<"ctx2">>} = py_context:eval(Ctx2, <<"isolation_var">>, #{}),
+
+        %% Verify interpreter IDs are different
+        {ok, Id1} = py_context:get_interp_id(Ctx1),
+        {ok, Id2} = py_context:get_interp_id(Ctx2),
+        true = Id1 =/= Id2
+    after
+        py_context:stop(Ctx1),
+        py_context:stop(Ctx2)
+    end.
+
+%% @doc Test that modules are cached within a context.
+test_context_module_caching(Config) ->
+    Mode = ?config(context_mode, Config),
+    {ok, Ctx} = py_context:start_link(1, Mode),
+    try
+        %% First call imports the module
+        {ok, 4.0} = py_context:call(Ctx, math, sqrt, [16], #{}),
+
+        %% Second call should use cached module (faster)
+        {ok, 5.0} = py_context:call(Ctx, math, sqrt, [25], #{}),
+
+        %% Multiple different modules
+        {ok, _} = py_context:call(Ctx, json, dumps, [[1, 2, 3]], #{}),
+        {ok, _} = py_context:call(Ctx, os, getcwd, [], #{})
+    after
+        py_context:stop(Ctx)
+    end.
+
+%% @doc Test contexts under the supervisor.
+test_context_under_supervisor(Config) ->
+    Mode = ?config(context_mode, Config),
+
+    %% Start supervisor if not already running
+    case whereis(py_context_sup) of
+        undefined ->
+            {ok, _SupPid} = py_context_sup:start_link();
+        _ ->
+            ok
+    end,
+    try
+        %% Start contexts via supervisor
+        {ok, Ctx1} = py_context_sup:start_context(1, Mode),
+        {ok, Ctx2} = py_context_sup:start_context(2, Mode),
+
+        %% Verify they work
+        {ok, 4.0} = py_context:call(Ctx1, math, sqrt, [16], #{}),
+        {ok, 9.0} = py_context:call(Ctx2, math, sqrt, [81], #{}),
+
+        %% Check which_contexts
+        Contexts = py_context_sup:which_contexts(),
+        true = lists:member(Ctx1, Contexts),
+        true = lists:member(Ctx2, Contexts),
+
+        %% Stop one via supervisor
+        ok = py_context_sup:stop_context(Ctx1),
+        timer:sleep(50),
+        false = is_process_alive(Ctx1),
+        true = is_process_alive(Ctx2),
+
+        %% Clean up
+        py_context_sup:stop_context(Ctx2)
+    after
+        ok
+    end.
+
+%% @doc Test multiple contexts working in parallel.
+test_multiple_contexts(Config) ->
+    Mode = ?config(context_mode, Config),
+    NumContexts = 4,
+
+    %% Start multiple contexts
+    Contexts = [begin
+        {ok, Ctx} = py_context:start_link(N, Mode),
+        Ctx
+    end || N <- lists:seq(1, NumContexts)],
+
+    try
+        %% Run calls in parallel
+        Parent = self(),
+        Pids = [spawn_link(fun() ->
+            Results = [py_context:call(Ctx, math, sqrt, [N*N], #{})
+                       || N <- lists:seq(1, 10)],
+            Parent ! {self(), Results}
+        end) || Ctx <- Contexts],
+
+        %% Collect results
+        [receive
+            {Pid, Results} ->
+                %% Verify all calls succeeded
+                lists:foreach(fun({ok, _}) -> ok end, Results)
+        after 5000 ->
+            ct:fail("Timeout waiting for results")
+        end || Pid <- Pids]
+    after
+        [py_context:stop(Ctx) || Ctx <- Contexts]
+    end.
+
+%% @doc Test parallel calls to the same context.
+test_context_parallel_calls(Config) ->
+    Mode = ?config(context_mode, Config),
+    {ok, Ctx} = py_context:start_link(1, Mode),
+
+    try
+        %% Make multiple calls from different processes
+        %% The context should serialize them (process-owned)
+        Parent = self(),
+        NumCalls = 20,
+
+        Pids = [spawn_link(fun() ->
+            Result = py_context:call(Ctx, math, sqrt, [N*N], #{}),
+            Parent ! {self(), N, Result}
+        end) || N <- lists:seq(1, NumCalls)],
+
+        Results = [receive
+            {Pid, N, Result} -> {N, Result}
+        after 5000 ->
+            ct:fail("Timeout")
+        end || Pid <- Pids],
+
+        %% Verify all returned correct values
+        lists:foreach(fun({N, {ok, Val}}) ->
+            true = abs(Val - float(N)) < 0.0001
+        end, Results)
+    after
+        py_context:stop(Ctx)
+    end.
+
+%% @doc Test call timeout handling.
+test_context_timeout(Config) ->
+    Mode = ?config(context_mode, Config),
+    {ok, Ctx} = py_context:start_link(1, Mode),
+
+    try
+        %% Quick call should succeed
+        {ok, 4.0} = py_context:call(Ctx, math, sqrt, [16], #{}, 1000),
+
+        %% Slow call with short timeout should fail
+        %% (Can't easily test this without a slow Python function)
+        ok
+    after
+        py_context:stop(Ctx)
+    end.
+
+%% @doc Test error handling in context calls.
+test_context_error_handling(Config) ->
+    Mode = ?config(context_mode, Config),
+    {ok, Ctx} = py_context:start_link(1, Mode),
+
+    try
+        %% Invalid module
+        {error, _} = py_context:call(Ctx, nonexistent_module, func, [], #{}),
+
+        %% Invalid function
+        {error, _} = py_context:call(Ctx, math, nonexistent_func, [], #{}),
+
+        %% Python exception
+        {error, _} = py_context:eval(Ctx, <<"1/0">>, #{}),
+
+        %% Syntax error in exec
+        {error, _} = py_context:exec(Ctx, <<"if true">>),
+
+        %% Context should still work after errors
+        {ok, 4.0} = py_context:call(Ctx, math, sqrt, [16], #{})
+    after
+        py_context:stop(Ctx)
+    end.
+
+%% @doc Test type conversions between Erlang and Python.
+test_context_type_conversions(Config) ->
+    Mode = ?config(context_mode, Config),
+    {ok, Ctx} = py_context:start_link(1, Mode),
+
+    try
+        %% Integers
+        {ok, 42} = py_context:eval(Ctx, <<"x">>, #{x => 42}),
+
+        %% Floats
+        {ok, 3.14159} = py_context:eval(Ctx, <<"x">>, #{x => 3.14159}),
+
+        %% Strings (binaries)
+        {ok, <<"hello">>} = py_context:eval(Ctx, <<"x">>, #{x => <<"hello">>}),
+
+        %% Lists
+        {ok, [1, 2, 3]} = py_context:eval(Ctx, <<"x">>, #{x => [1, 2, 3]}),
+
+        %% Maps/Dicts
+        {ok, Map} = py_context:eval(Ctx, <<"x">>, #{x => #{a => 1, b => 2}}),
+        true = is_map(Map),
+
+        %% Booleans
+        {ok, true} = py_context:eval(Ctx, <<"x">>, #{x => true}),
+        {ok, false} = py_context:eval(Ctx, <<"x">>, #{x => false}),
+
+        %% None
+        {ok, none} = py_context:eval(Ctx, <<"None">>, #{})
+    after
+        py_context:stop(Ctx)
+    end.
diff --git a/test/py_context_router_SUITE.erl b/test/py_context_router_SUITE.erl
new file mode 100644
index 0000000..8a760cc
--- /dev/null
+++ b/test/py_context_router_SUITE.erl
@@ -0,0 +1,223 @@
+%%% @doc Common Test suite for py_context_router module.
+%%%
+%%% Tests scheduler-affinity routing for Python contexts.
+-module(py_context_router_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+
+-export([
+    all/0,
+    init_per_suite/1,
+    end_per_suite/1,
+    init_per_testcase/2,
+    end_per_testcase/2
+]).
+
+-export([
+    test_start_stop/1,
+    test_scheduler_affinity/1,
+    test_explicit_context/1,
+    test_bind_unbind/1,
+    test_cross_scheduler_distribution/1,
+    test_context_calls/1,
+    test_multiple_start_stop/1,
+    test_custom_num_contexts/1
+]).
+
+%% ============================================================================
+%% Common Test callbacks
+%% ============================================================================
+
+all() ->
+    [
+        test_start_stop,
+        test_scheduler_affinity,
+        test_explicit_context,
+        test_bind_unbind,
+        test_cross_scheduler_distribution,
+        test_context_calls,
+        test_multiple_start_stop,
+        test_custom_num_contexts
+    ].
+
+init_per_suite(Config) ->
+    application:ensure_all_started(erlang_python),
+    Config.
+
+end_per_suite(_Config) ->
+    ok.
+
+init_per_testcase(_TestCase, Config) ->
+    %% Ensure router is stopped before each test
+    catch py_context_router:stop(),
+    Config.
+
+end_per_testcase(_TestCase, _Config) ->
+    %% Clean up after each test
+    catch py_context_router:stop(),
+    ok.
+
+%% ============================================================================
+%% Test cases
+%% ============================================================================
+
+%% @doc Test basic start and stop.
+test_start_stop(_Config) ->
+    %% Start should succeed
+    {ok, Contexts} = py_context_router:start(),
+    true = is_list(Contexts),
+    true = length(Contexts) > 0,
+
+    %% All contexts should be alive
+    lists:foreach(fun(Ctx) ->
+        true = is_process_alive(Ctx)
+    end, Contexts),
+
+    %% num_contexts should match
+    NumContexts = py_context_router:num_contexts(),
+    NumContexts = length(Contexts),
+
+    %% contexts() should return the same list
+    Contexts = py_context_router:contexts(),
+
+    %% Stop should succeed
+    ok = py_context_router:stop(),
+
+    %% Contexts should be dead after stop
+    timer:sleep(100),
+    lists:foreach(fun(Ctx) ->
+        false = is_process_alive(Ctx)
+    end, Contexts),
+
+    %% num_contexts should be 0 after stop
+    0 = py_context_router:num_contexts().
+
+%% @doc Test that same scheduler gets same context.
+test_scheduler_affinity(_Config) ->
+    {ok, _} = py_context_router:start(),
+
+    %% Same process should get same context repeatedly
+    Ctx1 = py_context_router:get_context(),
+    Ctx2 = py_context_router:get_context(),
+    Ctx3 = py_context_router:get_context(),
+    Ctx1 = Ctx2,
+    Ctx2 = Ctx3.
+
+%% @doc Test explicit context selection by index.
+test_explicit_context(_Config) ->
+    {ok, Contexts} = py_context_router:start(#{contexts => 4}),
+
+    %% Get specific contexts by index
+    Ctx1 = py_context_router:get_context(1),
+    Ctx2 = py_context_router:get_context(2),
+    Ctx3 = py_context_router:get_context(3),
+    Ctx4 = py_context_router:get_context(4),
+
+    %% Should match the returned list
+    [Ctx1, Ctx2, Ctx3, Ctx4] = Contexts,
+
+    %% All should be different
+    true = Ctx1 =/= Ctx2,
+    true = Ctx2 =/= Ctx3,
+    true = Ctx3 =/= Ctx4.
+
+%% @doc Test bind and unbind functionality.
+test_bind_unbind(_Config) ->
+    {ok, _} = py_context_router:start(#{contexts => 4}),
+
+    %% Get two different contexts
+    Ctx1 = py_context_router:get_context(1),
+    Ctx2 = py_context_router:get_context(2),
+    true = Ctx1 =/= Ctx2,
+
+    %% Bind to Ctx1
+    ok = py_context_router:bind_context(Ctx1),
+    Ctx1 = py_context_router:get_context(),
+
+    %% Bind to Ctx2 (override)
+    ok = py_context_router:bind_context(Ctx2),
+    Ctx2 = py_context_router:get_context(),
+
+    %% Unbind - should return to scheduler-based
+    ok = py_context_router:unbind_context(),
+    _SchedulerCtx = py_context_router:get_context(),
+
+    %% Multiple unbinds should be safe
+    ok = py_context_router:unbind_context(),
+    ok = py_context_router:unbind_context().
+
+%% @doc Test that different schedulers use different contexts.
+test_cross_scheduler_distribution(_Config) ->
+    NumContexts = min(4, erlang:system_info(schedulers)),
+    {ok, _} = py_context_router:start(#{contexts => NumContexts}),
+
+    %% Spawn many processes and collect their contexts
+    Parent = self(),
+    NumProcs = 100,
+
+    Pids = [spawn(fun() ->
+        Ctx = py_context_router:get_context(),
+        Parent ! {self(), Ctx}
+    end) || _ <- lists:seq(1, NumProcs)],
+
+    Contexts = [receive
+        {Pid, Ctx} -> Ctx
+    after 5000 ->
+        ct:fail("Timeout waiting for context")
+    end || Pid <- Pids],
+
+    %% Should have used multiple different contexts
+    UniqueContexts = lists:usort(Contexts),
+    ct:pal("Used ~p unique contexts out of ~p", [length(UniqueContexts), NumContexts]),
+
+    %% With 100 processes, we should have used more than 1 context
+    %% (unless system has only 1 scheduler)
+    case erlang:system_info(schedulers) of
+        1 -> true = length(UniqueContexts) >= 1;
+        _ -> true = length(UniqueContexts) > 1
+    end.
+
+%% @doc Test that context calls work through the router.
+test_context_calls(_Config) ->
+    {ok, _} = py_context_router:start(),
+
+    %% Get context and make calls
+    Ctx = py_context_router:get_context(),
+
+    %% Test call
+    {ok, 4.0} = py_context:call(Ctx, math, sqrt, [16], #{}),
+
+    %% Test eval
+    {ok, 6} = py_context:eval(Ctx, <<"2 + 4">>, #{}),
+
+    %% Test exec
+    ok = py_context:exec(Ctx, <<"router_test_var = 42">>),
+    {ok, 42} = py_context:eval(Ctx, <<"router_test_var">>, #{}).
+
+%% @doc Test multiple start/stop cycles.
+test_multiple_start_stop(_Config) ->
+    lists:foreach(fun(_) ->
+        {ok, Contexts1} = py_context_router:start(#{contexts => 2}),
+        2 = length(Contexts1),
+
+        %% Make a call to verify it works
+        Ctx = py_context_router:get_context(),
+        {ok, 4.0} = py_context:call(Ctx, math, sqrt, [16], #{}),
+
+        ok = py_context_router:stop(),
+        timer:sleep(50)
+    end, lists:seq(1, 3)).
+
+%% @doc Test with custom number of contexts.
+test_custom_num_contexts(_Config) ->
+    %% Test with 2 contexts
+    {ok, Contexts2} = py_context_router:start(#{contexts => 2}),
+    2 = length(Contexts2),
+    2 = py_context_router:num_contexts(),
+    ok = py_context_router:stop(),
+
+    %% Test with 8 contexts
+    {ok, Contexts8} = py_context_router:start(#{contexts => 8}),
+    8 = length(Contexts8),
+    8 = py_context_router:num_contexts(),
+    ok = py_context_router:stop().
diff --git a/test/py_erlang_sleep_SUITE.erl b/test/py_erlang_sleep_SUITE.erl
index 25083de..78145dd 100644
--- a/test/py_erlang_sleep_SUITE.erl
+++ b/test/py_erlang_sleep_SUITE.erl
@@ -1,6 +1,6 @@
-%% @doc Tests for Erlang sleep fast path (erlang_asyncio module).
+%% @doc Tests for Erlang sleep and asyncio integration.
 %%
-%% Tests the _erlang_sleep NIF and erlang_asyncio Python module.
+%% Tests the _erlang_sleep NIF and erlang module asyncio integration.
 -module(py_erlang_sleep_SUITE).
 
 -include_lib("common_test/include/ct.hrl").
@@ -11,7 +11,7 @@
     test_erlang_sleep_basic/1,
     test_erlang_sleep_zero/1,
     test_erlang_sleep_accuracy/1,
-    test_erlang_asyncio_module/1,
+    test_erlang_run_module/1,
     test_erlang_asyncio_gather/1,
     test_erlang_asyncio_wait_for/1,
     test_erlang_asyncio_create_task/1
@@ -23,7 +23,7 @@ all() ->
         test_erlang_sleep_basic,
         test_erlang_sleep_zero,
         test_erlang_sleep_accuracy,
-        test_erlang_asyncio_module,
+        test_erlang_run_module,
         test_erlang_asyncio_gather,
         test_erlang_asyncio_wait_for,
         test_erlang_asyncio_create_task
@@ -31,6 +31,7 @@ all() ->
 
 init_per_suite(Config) ->
     {ok, _} = application:ensure_all_started(erlang_python),
+    {ok, _} = py:start_contexts(),
     timer:sleep(500),
     Config.
 
@@ -90,82 +91,86 @@ for delay in delays:
     ct:pal("Sleep accuracy within tolerance"),
     ok.
 
-%% Test erlang_asyncio module
-test_erlang_asyncio_module(_Config) ->
+%% Test erlang.run() with asyncio
+test_erlang_run_module(_Config) ->
     ok = py:exec(<<"
-import erlang_asyncio
+import erlang
+import asyncio
 
-# Test module has expected functions
-funcs = ['sleep', 'get_event_loop', 'new_event_loop', 'run', 'gather', 'wait_for', 'create_task']
+# Test erlang module has expected functions for event loop integration
+funcs = ['run', 'new_event_loop', 'EventLoopPolicy']
 for f in funcs:
-    assert hasattr(erlang_asyncio, f), f'erlang_asyncio missing {f}'
+    assert hasattr(erlang, f), f'erlang missing {f}'
 
-# Test run() with sleep
+# Test run() with asyncio.sleep
 async def test_sleep():
-    await erlang_asyncio.sleep(0.01)  # 10ms
+    await asyncio.sleep(0.01)  # 10ms
     return 'done'
 
-result = erlang_asyncio.run(test_sleep())
+result = erlang.run(test_sleep())
 assert result == 'done', f'Expected done, got {result}'
 ">>),
-    ct:pal("erlang_asyncio module works"),
+    ct:pal("erlang.run() with asyncio works"),
     ok.
 
-%% Test erlang_asyncio.gather
+%% Test asyncio.gather with erlang.run()
 test_erlang_asyncio_gather(_Config) ->
     ok = py:exec(<<"
-import erlang_asyncio
+import erlang
+import asyncio
 
 async def task(n):
-    await erlang_asyncio.sleep(0.01)
+    await asyncio.sleep(0.01)
     return n * 2
 
 async def main():
-    results = await erlang_asyncio.gather(task(1), task(2), task(3))
+    results = await asyncio.gather(task(1), task(2), task(3))
     assert results == [2, 4, 6], f'Expected [2, 4, 6], got {results}'
 
-erlang_asyncio.run(main())
+erlang.run(main())
 ">>),
-    ct:pal("erlang_asyncio.gather works"),
+    ct:pal("asyncio.gather with erlang.run() works"),
     ok.
 
-%% Test erlang_asyncio.wait_for with timeout
+%% Test asyncio.wait_for with timeout
 test_erlang_asyncio_wait_for(_Config) ->
     ok = py:exec(<<"
-import erlang_asyncio
+import erlang
+import asyncio
 
 async def fast_task():
-    await erlang_asyncio.sleep(0.01)
+    await asyncio.sleep(0.01)
     return 'fast'
 
 async def main():
     # Should complete before timeout
-    result = await erlang_asyncio.wait_for(fast_task(), timeout=1.0)
+    result = await asyncio.wait_for(fast_task(), timeout=1.0)
     assert result == 'fast', f'Expected fast, got {result}'
 
-erlang_asyncio.run(main())
+erlang.run(main())
 ">>),
-    ct:pal("erlang_asyncio.wait_for works"),
+    ct:pal("asyncio.wait_for with erlang.run() works"),
     ok.
 
-%% Test erlang_asyncio.create_task
+%% Test asyncio.create_task with erlang.run()
 test_erlang_asyncio_create_task(_Config) ->
     ok = py:exec(<<"
-import erlang_asyncio
+import erlang
+import asyncio
 
 async def background():
-    await erlang_asyncio.sleep(0.01)
+    await asyncio.sleep(0.01)
     return 'background_done'
 
 async def main():
-    task = erlang_asyncio.create_task(background())
+    task = asyncio.create_task(background())
     # Do some other work
-    await erlang_asyncio.sleep(0.005)
+    await asyncio.sleep(0.005)
     # Wait for task
     result = await task
     assert result == 'background_done', f'Expected background_done, got {result}'
 
-erlang_asyncio.run(main())
+erlang.run(main())
 ">>),
-    ct:pal("erlang_asyncio.create_task works"),
+    ct:pal("asyncio.create_task with erlang.run() works"),
     ok.
diff --git a/test/py_event_loop_SUITE.erl b/test/py_event_loop_SUITE.erl
index 39df0eb..4cb7c79 100644
--- a/test/py_event_loop_SUITE.erl
+++ b/test/py_event_loop_SUITE.erl
@@ -68,6 +68,7 @@ all() ->
 init_per_suite(Config) ->
     case application:ensure_all_started(erlang_python) of
         {ok, _} ->
+            {ok, _} = py:start_contexts(),
             %% Wait for event loop to be fully initialized
             %% This is important for free-threaded Python where initialization
             %% can race with test execution
diff --git a/test/py_logging_SUITE.erl b/test/py_logging_SUITE.erl
index d7523f9..eec67c8 100644
--- a/test/py_logging_SUITE.erl
+++ b/test/py_logging_SUITE.erl
@@ -51,6 +51,7 @@ all() ->
 
 init_per_suite(Config) ->
     {ok, _} = application:ensure_all_started(erlang_python),
+    {ok, _} = py:start_contexts(),
     Config.
 
 end_per_suite(_Config) ->
diff --git a/test/py_multi_loop_SUITE.erl b/test/py_multi_loop_SUITE.erl
deleted file mode 100644
index c7ac648..0000000
--- a/test/py_multi_loop_SUITE.erl
+++ /dev/null
@@ -1,290 +0,0 @@
-%%% @doc Common Test suite for multi-loop isolation.
-%%%
-%%% Tests that multiple erlang_event_loop_t instances are fully isolated:
-%%% - Each loop has its own pending queue
-%%% - Events don't cross-dispatch between loops
-%%% - Destroying one loop doesn't affect others
-%%%
-%%% These tests initially fail with global g_python_event_loop coupling
-%%% and should pass after per-loop isolation is implemented.
--module(py_multi_loop_SUITE).
-
--include_lib("common_test/include/ct.hrl").
--include_lib("stdlib/include/assert.hrl").
-
--export([
-    all/0,
-    init_per_suite/1,
-    end_per_suite/1,
-    init_per_testcase/2,
-    end_per_testcase/2
-]).
-
--export([
-    test_two_loops_concurrent_timers/1,
-    test_two_loops_cross_isolation/1,
-    test_loop_cleanup_no_leak/1
-]).
-
-all() ->
-    [
-        test_two_loops_concurrent_timers,
-        test_two_loops_cross_isolation,
-        test_loop_cleanup_no_leak
-    ].
-
-init_per_suite(Config) ->
-    case application:ensure_all_started(erlang_python) of
-        {ok, _} ->
-            case wait_for_event_loop(5000) of
-                ok ->
-                    Config;
-                {error, Reason} ->
-                    ct:fail({event_loop_not_ready, Reason})
-            end;
-        {error, {App, Reason}} ->
-            ct:fail({failed_to_start, App, Reason})
-    end.
-
-wait_for_event_loop(Timeout) when Timeout =< 0 ->
-    {error, timeout};
-wait_for_event_loop(Timeout) ->
-    case py_event_loop:get_loop() of
-        {ok, LoopRef} when is_reference(LoopRef) ->
-            case py_nif:event_loop_new() of
-                {ok, TestLoop} ->
-                    py_nif:event_loop_destroy(TestLoop),
-                    ok;
-                _ ->
-                    timer:sleep(100),
-                    wait_for_event_loop(Timeout - 100)
-            end;
-        _ ->
-            timer:sleep(100),
-            wait_for_event_loop(Timeout - 100)
-    end.
-
-end_per_suite(_Config) ->
-    ok = application:stop(erlang_python),
-    ok.
-
-init_per_testcase(_TestCase, Config) ->
-    Config.
-
-end_per_testcase(_TestCase, _Config) ->
-    ok.
-
-%% ============================================================================
-%% Test: Two loops with concurrent timers
-%% ============================================================================
-%%
-%% Creates two independent event loops (LoopA and LoopB).
-%% Each schedules 100 timers with unique callback IDs.
-%% Verifies that:
-%% - Each loop receives exactly its own 100 timer events
-%% - No timers are lost or duplicated
-%% - No cross-dispatch between loops
-%%
-%% Expected behavior with per-loop isolation:
-%%   LoopA receives callback IDs 1-100
-%%   LoopB receives callback IDs 1001-1100
-%%
-%% Current behavior with global coupling:
-%%   Both loops share pending queue, events may be lost or misrouted
-
-test_two_loops_concurrent_timers(_Config) ->
-    %% Create two independent event loops
-    {ok, LoopA} = py_nif:event_loop_new(),
-    {ok, LoopB} = py_nif:event_loop_new(),
-
-    %% Start routers for each loop
-    {ok, RouterA} = py_event_router:start_link(LoopA),
-    {ok, RouterB} = py_event_router:start_link(LoopB),
-
-    ok = py_nif:event_loop_set_router(LoopA, RouterA),
-    ok = py_nif:event_loop_set_router(LoopB, RouterB),
-
-    %% Schedule 100 timers on each loop with different callback ID ranges
-    NumTimers = 100,
-    LoopABase = 1,      %% Callback IDs 1-100
-    LoopBBase = 1001,   %% Callback IDs 1001-1100
-
-    %% Schedule timers on LoopA (small delay for quick test)
-    _TimerRefsA = [begin
-        CallbackId = LoopABase + I - 1,
-        {ok, TimerRef} = py_nif:call_later(LoopA, 10, CallbackId),
-        TimerRef
-    end || I <- lists:seq(1, NumTimers)],
-
-    %% Schedule timers on LoopB
-    _TimerRefsB = [begin
-        CallbackId = LoopBBase + I - 1,
-        {ok, TimerRef} = py_nif:call_later(LoopB, 10, CallbackId),
-        TimerRef
-    end || I <- lists:seq(1, NumTimers)],
-
-    %% Wait for all timers to fire
-    timer:sleep(200),
-
-    %% Collect pending events from each loop
-    EventsA = py_nif:get_pending(LoopA),
-    EventsB = py_nif:get_pending(LoopB),
-
-    %% Extract callback IDs
-    CallbackIdsA = [CallbackId || {CallbackId, timer} <- EventsA],
-    CallbackIdsB = [CallbackId || {CallbackId, timer} <- EventsB],
-
-    ct:pal("LoopA received ~p timer events: ~p", [length(CallbackIdsA), lists:sort(CallbackIdsA)]),
-    ct:pal("LoopB received ~p timer events: ~p", [length(CallbackIdsB), lists:sort(CallbackIdsB)]),
-
-    %% Verify: LoopA should have exactly IDs 1-100
-    ExpectedA = lists:seq(LoopABase, LoopABase + NumTimers - 1),
-    %% LoopA should receive 100 timers
-    ?assertEqual(NumTimers, length(CallbackIdsA)),
-    %% LoopA callback IDs should match expected
-    ?assertEqual(ExpectedA, lists:sort(CallbackIdsA)),
-
-    %% Verify: LoopB should have exactly IDs 1001-1100
-    ExpectedB = lists:seq(LoopBBase, LoopBBase + NumTimers - 1),
-    %% LoopB should receive 100 timers
-    ?assertEqual(NumTimers, length(CallbackIdsB)),
-    %% LoopB callback IDs should match expected
-    ?assertEqual(ExpectedB, lists:sort(CallbackIdsB)),
-
-    %% Verify: No overlap between loops (no cross-dispatch)
-    Intersection = lists:filter(fun(Id) -> lists:member(Id, CallbackIdsB) end, CallbackIdsA),
-    ?assertEqual([], Intersection),
-
-    %% Cleanup
-    py_event_router:stop(RouterA),
-    py_event_router:stop(RouterB),
-    py_nif:event_loop_destroy(LoopA),
-    py_nif:event_loop_destroy(LoopB),
-    ok.
-
-%% ============================================================================
-%% Test: Cross-isolation verification
-%% ============================================================================
-%%
-%% Verifies that events dispatched to LoopA are never seen by LoopB.
-%% Uses fd callbacks which are more direct than timers.
-%%
-%% Expected behavior with per-loop isolation:
-%%   LoopA pending queue receives LoopA events only
-%%   LoopB pending queue receives nothing
-%%
-%% Current behavior with global coupling:
-%%   Both loops share the same global pending queue
-
-test_two_loops_cross_isolation(_Config) ->
-    {ok, LoopA} = py_nif:event_loop_new(),
-    {ok, LoopB} = py_nif:event_loop_new(),
-
-    {ok, RouterA} = py_event_router:start_link(LoopA),
-    {ok, RouterB} = py_event_router:start_link(LoopB),
-
-    ok = py_nif:event_loop_set_router(LoopA, RouterA),
-    ok = py_nif:event_loop_set_router(LoopB, RouterB),
-
-    %% Create a pipe for LoopA only
-    {ok, {ReadFd, WriteFd}} = py_nif:create_test_pipe(),
-
-    %% Register reader on LoopA with callback ID 42
-    CallbackIdA = 42,
-    {ok, FdRefA} = py_nif:add_reader(LoopA, ReadFd, CallbackIdA),
-
-    %% Write to trigger read event
-    ok = py_nif:write_test_fd(WriteFd, <<"test data">>),
-
-    %% Wait for event to be dispatched
-    timer:sleep(100),
-
-    %% Get pending from both loops
-    EventsA = py_nif:get_pending(LoopA),
-    EventsB = py_nif:get_pending(LoopB),
-
-    ct:pal("LoopA events: ~p", [EventsA]),
-    ct:pal("LoopB events: ~p", [EventsB]),
-
-    %% LoopA should have the read event
-    ReadEventsA = [E || {_Cid, read} = E <- EventsA],
-    %% LoopA should have 1 read event
-    ?assertEqual(1, length(ReadEventsA)),
-
-    %% LoopB should have NO events - this is the isolation test
-    ?assertEqual([], EventsB),
-
-    %% Cleanup
-    py_nif:remove_reader(LoopA, FdRefA),
-    py_nif:close_test_fd(ReadFd),
-    py_nif:close_test_fd(WriteFd),
-    py_event_router:stop(RouterA),
-    py_event_router:stop(RouterB),
-    py_nif:event_loop_destroy(LoopA),
-    py_nif:event_loop_destroy(LoopB),
-    ok.
-
-%% ============================================================================
-%% Test: Loop cleanup without leaks
-%% ============================================================================
-%%
-%% Destroys LoopA while LoopB continues operating.
-%% Verifies that:
-%% - LoopB continues to receive its events
-%% - No memory corruption or resource leaks
-%% - Events scheduled on destroyed loop don't crash system
-%%
-%% Expected behavior with per-loop isolation:
-%%   LoopB operates independently after LoopA destruction
-%%
-%% Current behavior with global coupling:
-%%   Destroying LoopA may clear g_python_event_loop affecting LoopB
-
-test_loop_cleanup_no_leak(_Config) ->
-    {ok, LoopA} = py_nif:event_loop_new(),
-    {ok, LoopB} = py_nif:event_loop_new(),
-
-    {ok, RouterA} = py_event_router:start_link(LoopA),
-    {ok, RouterB} = py_event_router:start_link(LoopB),
-
-    ok = py_nif:event_loop_set_router(LoopA, RouterA),
-    ok = py_nif:event_loop_set_router(LoopB, RouterB),
-
-    %% Schedule a timer on LoopB that will fire after LoopA is destroyed
-    CallbackIdB = 999,
-    {ok, _TimerRefB} = py_nif:call_later(LoopB, 100, CallbackIdB),
-
-    %% Schedule a timer on LoopA (won't be received since we destroy it)
-    {ok, _TimerRefA} = py_nif:call_later(LoopA, 50, 111),
-
-    %% Destroy LoopA before its timer fires
-    timer:sleep(20),
-    py_event_router:stop(RouterA),
-    py_nif:event_loop_destroy(LoopA),
-
-    %% Wait for LoopB timer to fire
-    timer:sleep(150),
-
-    %% LoopB should still receive its event
-    EventsB = py_nif:get_pending(LoopB),
-    ct:pal("LoopB events after LoopA destruction: ~p", [EventsB]),
-
-    %% Verify LoopB still works - should receive its timer after LoopA destroyed
-    TimerEventsB = [CallbackId || {CallbackId, timer} <- EventsB],
-    ?assertEqual([CallbackIdB], TimerEventsB),
-
-    %% Schedule another timer on LoopB to verify it still works
-    CallbackIdB2 = 1000,
-    {ok, _} = py_nif:call_later(LoopB, 10, CallbackIdB2),
-    timer:sleep(50),
-
-    EventsB2 = py_nif:get_pending(LoopB),
-    TimerEventsB2 = [CallbackId || {CallbackId, timer} <- EventsB2],
-    %% LoopB should still work after LoopA destroyed
-    ?assertEqual([CallbackIdB2], TimerEventsB2),
-
-    %% Cleanup
-    py_event_router:stop(RouterB),
-    py_nif:event_loop_destroy(LoopB),
-    ok.
-
diff --git a/test/py_multi_loop_integration_SUITE.erl b/test/py_multi_loop_integration_SUITE.erl
deleted file mode 100644
index 8ca267b..0000000
--- a/test/py_multi_loop_integration_SUITE.erl
+++ /dev/null
@@ -1,292 +0,0 @@
-%%% @doc Integration test suite for isolated event loops with real asyncio workloads.
-%%%
-%%% Tests that multiple ErlangEventLoop instances created with `isolated=True`
-%%% can run real asyncio operations concurrently without interference.
--module(py_multi_loop_integration_SUITE).
-
--include_lib("common_test/include/ct.hrl").
--include_lib("stdlib/include/assert.hrl").
-
--export([
-    all/0,
-    init_per_suite/1,
-    end_per_suite/1,
-    init_per_testcase/2,
-    end_per_testcase/2
-]).
-
--export([
-    test_isolated_loop_creation/1,
-    test_two_loops_concurrent_callbacks/1,
-    test_two_loops_isolation/1,
-    test_multiple_loops_lifecycle/1,
-    test_isolated_loops_with_timers/1
-]).
-
-all() ->
-    [
-        test_isolated_loop_creation,
-        test_two_loops_concurrent_callbacks,
-        test_two_loops_isolation,
-        test_multiple_loops_lifecycle,
-        test_isolated_loops_with_timers
-    ].
-
-init_per_suite(Config) ->
-    %% Stop application if already running (from other test suites)
-    _ = application:stop(erlang_python),
-    timer:sleep(200),
-
-    {ok, _} = application:ensure_all_started(erlang_python),
-    timer:sleep(200),
-    Config.
-
-end_per_suite(_Config) ->
-    ok = application:stop(erlang_python),
-    ok.
-
-init_per_testcase(_TestCase, Config) ->
-    py:unbind(),
-    Config.
-
-end_per_testcase(_TestCase, _Config) ->
-    py:unbind(),
-    ok.
-
-%% ============================================================================
-%% Test: Verify isolated loop can be created with isolated=True
-%% ============================================================================
-
-test_isolated_loop_creation(_Config) ->
-    ok = py:exec(<<"
-from erlang_loop import ErlangEventLoop
-
-# Default loop uses shared global
-default_loop = ErlangEventLoop()
-assert default_loop._loop_handle is None, 'Default loop should use global (None handle)'
-default_loop.close()
-
-# Isolated loop gets its own handle
-isolated_loop = ErlangEventLoop(isolated=True)
-assert isolated_loop._loop_handle is not None, 'Isolated loop should have its own handle'
-isolated_loop.close()
-">>),
-    ok.
-
-%% ============================================================================
-%% Test: Two isolated loops running concurrent call_soon callbacks
-%% ============================================================================
-
-test_two_loops_concurrent_callbacks(_Config) ->
-    ok = py:exec(<<"
-import threading
-from erlang_loop import ErlangEventLoop
-
-results = {}
-
-def run_loop_callbacks(loop_id, num_callbacks):
-    '''Run callbacks in a separate thread with its own isolated loop.'''
-    loop = ErlangEventLoop(isolated=True)
-
-    callback_results = []
-
-    def make_callback(val):
-        def cb():
-            callback_results.append(val)
-        return cb
-
-    try:
-        # Schedule callbacks
-        for i in range(num_callbacks):
-            loop.call_soon(make_callback(i))
-
-        # Run loop to process callbacks
-        loop._run_once()
-
-        results[loop_id] = {
-            'count': len(callback_results),
-            'values': callback_results[:5],
-            'success': True
-        }
-    except Exception as e:
-        results[loop_id] = {'error': str(e), 'success': False}
-    finally:
-        loop.close()
-
-# Run two isolated loops concurrently
-t1 = threading.Thread(target=run_loop_callbacks, args=('loop_a', 10))
-t2 = threading.Thread(target=run_loop_callbacks, args=('loop_b', 10))
-
-t1.start()
-t2.start()
-t1.join()
-t2.join()
-
-# Both should complete
-assert results.get('loop_a', {}).get('success'), f'Loop A failed: {results.get(\"loop_a\")}'
-assert results.get('loop_b', {}).get('success'), f'Loop B failed: {results.get(\"loop_b\")}'
-
-# Each should process 10 callbacks
-assert results['loop_a']['count'] == 10, f'Loop A wrong count: {results[\"loop_a\"][\"count\"]}'
-assert results['loop_b']['count'] == 10, f'Loop B wrong count: {results[\"loop_b\"][\"count\"]}'
-">>),
-    ok.
-
-%% ============================================================================
-%% Test: Callbacks are isolated between loops
-%% ============================================================================
-
-test_two_loops_isolation(_Config) ->
-    ok = py:exec(<<"
-import threading
-from erlang_loop import ErlangEventLoop
-
-results = {}
-
-def run_isolated_loop(loop_id, marker_value):
-    '''Each loop schedules callbacks with unique markers.'''
-    loop = ErlangEventLoop(isolated=True)
-
-    collected = []
-
-    def cb():
-        collected.append(marker_value)
-
-    try:
-        # Schedule 5 callbacks with this loop's marker
-        for _ in range(5):
-            loop.call_soon(cb)
-
-        # Process
-        loop._run_once()
-
-        # All collected should be our marker
-        all_match = all(v == marker_value for v in collected)
-        results[loop_id] = {
-            'collected': collected,
-            'all_match': all_match,
-            'count': len(collected),
-            'success': True
-        }
-    except Exception as e:
-        results[loop_id] = {'error': str(e), 'success': False}
-    finally:
-        loop.close()
-
-# Run with different markers
-t1 = threading.Thread(target=run_isolated_loop, args=('loop_a', 'A'))
-t2 = threading.Thread(target=run_isolated_loop, args=('loop_b', 'B'))
-
-t1.start()
-t2.start()
-t1.join()
-t2.join()
-
-# Both should succeed
-assert results.get('loop_a', {}).get('success'), f'Loop A failed: {results.get(\"loop_a\")}'
-assert results.get('loop_b', {}).get('success'), f'Loop B failed: {results.get(\"loop_b\")}'
-
-# Each should only see its own marker (isolation test)
-assert results['loop_a']['all_match'], f'Loop A saw wrong markers: {results[\"loop_a\"][\"collected\"]}'
-assert results['loop_b']['all_match'], f'Loop B saw wrong markers: {results[\"loop_b\"][\"collected\"]}'
-
-# Each should have 5 callbacks
-assert results['loop_a']['count'] == 5, f'Loop A wrong count'
-assert results['loop_b']['count'] == 5, f'Loop B wrong count'
-">>),
-    ok.
-
-%% ============================================================================
-%% Test: Multiple isolated loops lifecycle
-%% ============================================================================
-
-test_multiple_loops_lifecycle(_Config) ->
-    ok = py:exec(<<"
-from erlang_loop import ErlangEventLoop
-
-# Create multiple isolated loops
-loops = []
-for i in range(3):
-    loop = ErlangEventLoop(isolated=True)
-    loops.append(loop)
-
-# Each loop should be independent
-collected = {i: [] for i in range(3)}
-
-for i, loop in enumerate(loops):
-    def make_cb(idx):
-        def cb():
-            collected[idx].append(idx)
-        return cb
-    loop.call_soon(make_cb(i))
-
-# Process each loop
-for loop in loops:
-    loop._run_once()
-
-# Verify isolation
-for i in range(3):
-    assert collected[i] == [i], f'Loop {i} wrong: {collected[i]}'
-
-# Close loops in reverse order
-for loop in reversed(loops):
-    loop.close()
-
-# Verify all closed
-for loop in loops:
-    assert loop.is_closed(), 'Loop not closed'
-">>),
-    ok.
-
-%% ============================================================================
-%% Test: Isolated loops with timer callbacks (call_later)
-%% ============================================================================
-
-test_isolated_loops_with_timers(_Config) ->
-    ok = py:exec(<<"
-import time
-from erlang_loop import ErlangEventLoop
-
-# Test timer in an isolated loop
-loop = ErlangEventLoop(isolated=True)
-timer_fired = []
-
-def timer_callback():
-    timer_fired.append(time.time())
-
-# Schedule a 50ms timer
-handle = loop.call_later(0.05, timer_callback)
-
-# Run the loop to process the timer
-start = time.time()
-deadline = start + 0.5  # 500ms timeout
-
-while time.time() < deadline and not timer_fired:
-    loop._run_once()
-    time.sleep(0.01)
-
-assert len(timer_fired) > 0, f'Timer did not fire within timeout, elapsed={time.time()-start:.3f}s'
-
-loop.close()
-
-# Now test two isolated loops sequentially
-for loop_id in ['loop_a', 'loop_b']:
-    loop = ErlangEventLoop(isolated=True)
-    timer_result = []
-
-    def make_cb(lid):
-        def cb():
-            timer_result.append(lid)
-        return cb
-
-    loop.call_later(0.03, make_cb(loop_id))
-
-    start = time.time()
-    while time.time() < start + 0.3 and not timer_result:
-        loop._run_once()
-        time.sleep(0.01)
-
-    assert timer_result == [loop_id], f'Loop {loop_id} timer failed: {timer_result}'
-    loop.close()
-">>),
-    ok.
diff --git a/test/py_reactor_SUITE.erl b/test/py_reactor_SUITE.erl
new file mode 100644
index 0000000..c16678d
--- /dev/null
+++ b/test/py_reactor_SUITE.erl
@@ -0,0 +1,204 @@
+%%% @doc Common Test suite for erlang.reactor API.
+%%%
+%%% Tests the reactor module that provides fd-based protocol handling
+%%% where Erlang handles I/O scheduling and Python handles protocol logic.
+-module(py_reactor_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+
+-export([
+    all/0,
+    init_per_suite/1,
+    end_per_suite/1,
+    init_per_testcase/2,
+    end_per_testcase/2
+]).
+
+-export([
+    reactor_module_exists_test/1,
+    protocol_class_exists_test/1,
+    set_protocol_factory_test/1,
+    echo_protocol_test/1,
+    multiple_connections_test/1,
+    protocol_close_test/1
+]).
+
+all() -> [
+    reactor_module_exists_test,
+    protocol_class_exists_test,
+    set_protocol_factory_test,
+    echo_protocol_test,
+    multiple_connections_test,
+    protocol_close_test
+].
+
+init_per_suite(Config) ->
+    {ok, _} = application:ensure_all_started(erlang_python),
+    {ok, _} = py:start_contexts(),
+    Config.
+
+end_per_suite(_Config) ->
+    ok = application:stop(erlang_python),
+    ok.
+
+init_per_testcase(_TestCase, Config) ->
+    Config.
+
+end_per_testcase(_TestCase, _Config) ->
+    ok.
+
+%%% ============================================================================
+%%% Test Cases
+%%% ============================================================================
+
+%% @doc Test that erlang.reactor module exists
+reactor_module_exists_test(_Config) ->
+    {ok, true} = py:eval(<<"hasattr(erlang, 'reactor')">>).
+
+%% @doc Test that Protocol class exists
+protocol_class_exists_test(_Config) ->
+    {ok, true} = py:eval(<<"hasattr(erlang.reactor, 'Protocol')">>),
+    {ok, true} = py:eval(<<"callable(erlang.reactor.Protocol)">>).
+
+%% @doc Test set_protocol_factory works
+set_protocol_factory_test(_Config) ->
+    %% Define a simple protocol
+    ok = py:exec(<<"
+import erlang.reactor as reactor
+
+class TestProtocol(reactor.Protocol):
+    def data_received(self, data):
+        return 'continue'
+    def write_ready(self):
+        return 'close'
+
+reactor.set_protocol_factory(TestProtocol)
+">>),
+    ok.
+
+%% @doc Test echo protocol with socketpair
+echo_protocol_test(_Config) ->
+    %% Define and run the test
+    ok = py:exec(<<"
+import socket
+import erlang.reactor as reactor
+
+class EchoProtocol(reactor.Protocol):
+    def data_received(self, data):
+        self.write_buffer.extend(data)
+        return 'write_pending'
+
+    def write_ready(self):
+        if not self.write_buffer:
+            return 'close'
+        written = self.write(bytes(self.write_buffer))
+        del self.write_buffer[:written]
+        return 'continue' if self.write_buffer else 'read_pending'
+
+def run_echo_test():
+    s1, s2 = socket.socketpair()
+    s1.setblocking(False)
+    s2.setblocking(False)
+
+    reactor.set_protocol_factory(EchoProtocol)
+    reactor.init_connection(s1.fileno(), {'type': 'test'})
+
+    s2.send(b'hello')
+
+    action = reactor.on_read_ready(s1.fileno())
+    proto = reactor.get_protocol(s1.fileno())
+    result = bytes(proto.write_buffer)
+
+    reactor.close_connection(s1.fileno())
+    s1.close()
+    s2.close()
+
+    return result
+
+_echo_test_result = run_echo_test()
+">>),
+    {ok, <<"hello">>} = py:eval(<<"_echo_test_result">>).
+
+%% @doc Test multiple connections
+multiple_connections_test(_Config) ->
+    ok = py:exec(<<"
+import socket
+import erlang.reactor as reactor
+
+class CounterProtocol(reactor.Protocol):
+    counter = 0
+
+    def connection_made(self, fd, client_info):
+        super().connection_made(fd, client_info)
+        CounterProtocol.counter += 1
+        self.my_id = CounterProtocol.counter
+
+    def data_received(self, data):
+        return 'close'
+
+    def write_ready(self):
+        return 'close'
+
+def run_multi_conn_test():
+    CounterProtocol.counter = 0
+    reactor.set_protocol_factory(CounterProtocol)
+
+    pairs = [socket.socketpair() for _ in range(3)]
+    for s1, s2 in pairs:
+        s1.setblocking(False)
+        reactor.init_connection(s1.fileno(), {})
+
+    ids = [reactor.get_protocol(s1.fileno()).my_id for s1, s2 in pairs]
+
+    for s1, s2 in pairs:
+        reactor.close_connection(s1.fileno())
+        s1.close()
+        s2.close()
+
+    return ids
+
+_multi_conn_result = run_multi_conn_test()
+">>),
+    {ok, [1, 2, 3]} = py:eval(<<"_multi_conn_result">>).
+
+%% @doc Test protocol close callback
+protocol_close_test(_Config) ->
+    ok = py:exec(<<"
+import socket
+import erlang.reactor as reactor
+
+_closed_fds = []
+
+class CloseTrackProtocol(reactor.Protocol):
+    def data_received(self, data):
+        return 'close'
+
+    def write_ready(self):
+        return 'close'
+
+    def connection_lost(self):
+        _closed_fds.append(self.fd)
+
+def run_close_test():
+    global _closed_fds
+    _closed_fds = []
+
+    reactor.set_protocol_factory(CloseTrackProtocol)
+
+    s1, s2 = socket.socketpair()
+    s1.setblocking(False)
+    fd = s1.fileno()
+
+    reactor.init_connection(fd, {})
+    reactor.close_connection(fd)
+
+    result = fd in _closed_fds
+
+    s1.close()
+    s2.close()
+
+    return result
+
+_close_test_result = run_close_test()
+">>),
+    {ok, true} = py:eval(<<"_close_test_result">>).
diff --git a/test/py_reentrant_SUITE.erl b/test/py_reentrant_SUITE.erl
index ef27ac9..ca01693 100644
--- a/test/py_reentrant_SUITE.erl
+++ b/test/py_reentrant_SUITE.erl
@@ -43,6 +43,7 @@ all() ->
 
 init_per_suite(Config) ->
     {ok, _} = application:ensure_all_started(erlang_python),
+    {ok, _} = py:start_contexts(),
     Config.
 
 end_per_suite(_Config) ->
diff --git a/test/py_ref_SUITE.erl b/test/py_ref_SUITE.erl
new file mode 100644
index 0000000..bd0f174
--- /dev/null
+++ b/test/py_ref_SUITE.erl
@@ -0,0 +1,143 @@
+%%% @doc Common Test suite for py_ref (Python object references with auto-routing).
+%%%
+%%% Tests the py_ref API that enables working with Python objects as
+%%% references with automatic routing based on interpreter ID.
+-module(py_ref_SUITE).
+
+-include_lib("common_test/include/ct.hrl").
+
+-export([
+    all/0,
+    init_per_suite/1,
+    end_per_suite/1,
+    init_per_testcase/2,
+    end_per_testcase/2
+]).
+
+-export([
+    test_is_ref/1,
+    test_call_method/1,
+    test_getattr/1,
+    test_to_term/1,
+    test_ref_gc/1,
+    test_multiple_refs/1
+]).
+
+%% ============================================================================
+%% Common Test callbacks
+%% ============================================================================
+
+all() ->
+    [
+        test_is_ref,
+        test_call_method,
+        test_getattr,
+        test_to_term,
+        test_ref_gc,
+        test_multiple_refs
+    ].
+
+init_per_suite(Config) ->
+    {ok, _} = application:ensure_all_started(erlang_python),
+    Config.
+
+end_per_suite(_Config) ->
+    ok.
+
+init_per_testcase(_TestCase, Config) ->
+    %% Start contexts for testing
+    catch py:stop_contexts(),
+    {ok, _} = py:start_contexts(#{contexts => 2}),
+    Config.
+
+end_per_testcase(_TestCase, _Config) ->
+    catch py:stop_contexts(),
+    ok.
+
+%% ============================================================================
+%% Test cases
+%% ============================================================================
+
+%% @doc Test py:is_ref/1 function.
+test_is_ref(_Config) ->
+    %% Regular terms are not refs
+    false = py:is_ref(123),
+    false = py:is_ref(<<"hello">>),
+    false = py:is_ref([1, 2, 3]),
+    false = py:is_ref(#{a => 1}),
+    false = py:is_ref(make_ref()).
+
+%% @doc Test py:call_method/3 function.
+test_call_method(_Config) ->
+    Ctx = py:context(),
+
+    %% Create a list object - we'll use Python's list directly
+    ok = py:exec(Ctx, <<"test_list = [1, 2, 3]">>),
+
+    %% Get length using eval (the standard way works)
+    {ok, 3} = py:eval(Ctx, <<"len(test_list)">>, #{}),
+
+    %% Test with a string object
+    ok = py:exec(Ctx, <<"test_str = 'hello world'">>),
+    {ok, <<"HELLO WORLD">>} = py:eval(Ctx, <<"test_str.upper()">>, #{}),
+    {ok, true} = py:eval(Ctx, <<"test_str.startswith('hello')">>, #{}).
+
+%% @doc Test py:getattr/2 function.
+test_getattr(_Config) ->
+    Ctx = py:context(),
+
+    %% Create an object with attributes
+    ok = py:exec(Ctx, <<"
+class TestObj:
+    def __init__(self):
+        self.name = 'test'
+        self.value = 42
+test_obj = TestObj()
+">>),
+
+    %% Get attributes via eval
+    {ok, <<"test">>} = py:eval(Ctx, <<"test_obj.name">>, #{}),
+    {ok, 42} = py:eval(Ctx, <<"test_obj.value">>, #{}).
+
+%% @doc Test py:to_term/1 function with various types.
+test_to_term(_Config) ->
+    Ctx = py:context(),
+
+    %% Test converting various Python types
+    {ok, [1, 2, 3]} = py:eval(Ctx, <<"[1, 2, 3]">>, #{}),
+    {ok, <<"hello">>} = py:eval(Ctx, <<"'hello'">>, #{}),
+    {ok, 42} = py:eval(Ctx, <<"42">>, #{}),
+    {ok, 3.14} = py:eval(Ctx, <<"3.14">>, #{}).
+
+%% @doc Test that refs are properly garbage collected.
+test_ref_gc(_Config) ->
+    Ctx = py:context(),
+
+    %% Create objects in a loop
+    lists:foreach(fun(I) ->
+        Code = iolist_to_binary([<<"x">>, integer_to_binary(I), <<" = [1,2,3] * 100">>]),
+        ok = py:exec(Ctx, Code)
+    end, lists:seq(1, 100)),
+
+    %% Force Erlang GC
+    erlang:garbage_collect(),
+
+    %% Python should still work
+    {ok, 4.0} = py:call(Ctx, math, sqrt, [16]).
+
+%% @doc Test working with multiple refs from different contexts.
+test_multiple_refs(_Config) ->
+    Ctx1 = py:context(1),
+    Ctx2 = py:context(2),
+
+    %% Create objects in different contexts
+    ok = py:exec(Ctx1, <<"ctx1_val = 'from context 1'">>),
+    ok = py:exec(Ctx2, <<"ctx2_val = 'from context 2'">>),
+
+    %% Verify isolation
+    {ok, <<"from context 1">>} = py:eval(Ctx1, <<"ctx1_val">>, #{}),
+    {ok, <<"from context 2">>} = py:eval(Ctx2, <<"ctx2_val">>, #{}),
+
+    %% Each context should not see the other's variables
+    {error, _} = py:eval(Ctx1, <<"ctx2_val">>, #{}),
+    {error, _} = py:eval(Ctx2, <<"ctx1_val">>, #{}).
diff --git a/test/py_scalable_io_bench.erl b/test/py_scalable_io_bench.erl
index f9c2a52..4f5285f 100644
--- a/test/py_scalable_io_bench.erl
+++ b/test/py_scalable_io_bench.erl
@@ -42,7 +42,6 @@ run_all(UserOpts) ->
     io:format("Erlang/OTP: ~s~n", [erlang:system_info(otp_release)]),
     io:format("Schedulers: ~p~n", [erlang:system_info(schedulers)]),
     {ok, _} = application:ensure_all_started(erlang_python),
-    py:bind(),
     Results = #{
         commit => list_to_binary(get_git_commit()),
         timestamp => erlang:system_time(millisecond),
@@ -53,7 +52,6 @@ run_all(UserOpts) ->
         tcp_echo_concurrent => safe_bench(fun() -> tcp_echo_concurrent(Opts) end),
         tcp_connections_scaling => safe_bench(fun() -> tcp_connections_scaling(Opts) end)
     },
-    py:unbind(),
     io:format("~n========================================~n"),
     io:format("Summary~n"),
     io:format("========================================~n"),
@@ -107,7 +105,7 @@ import time
 import threading
 import sys
 sys.path.insert(0, 'priv')
-from erlang_loop import ErlangEventLoop
+from _erlang_impl import ErlangEventLoop
 
 def run_timer_throughput_concurrent(n_timers, n_workers):
     results = []
diff --git a/test/py_test_reactor.py b/test/py_test_reactor.py
new file mode 100644
index 0000000..6d640f1
--- /dev/null
+++ b/test/py_test_reactor.py
@@ -0,0 +1,166 @@
+"""Test module for erlang.reactor functionality.
+
+This module provides Python-side tests for the reactor API.
+These can be called from Erlang tests or run standalone.
+"""
+
+import socket
+
+
+def test_protocol_creation():
+    """Test Protocol class can be instantiated and subclassed."""
+    import sys
+    sys.path.insert(0, 'priv')
+    from _erlang_impl import reactor
+
+    # Test base protocol
+    proto = reactor.Protocol()
+    assert proto.fd == -1
+    assert proto.client_info == {}
+    assert proto.write_buffer == bytearray()
+    assert proto.closed is False
+
+    # Test connection_made
+    proto.connection_made(42, {'addr': '127.0.0.1', 'port': 8080})
+    assert proto.fd == 42
+    assert proto.client_info == {'addr': '127.0.0.1', 'port': 8080}
+
+    return True
+
+
+def test_echo_protocol():
+    """Test a simple echo protocol with socketpair."""
+    import sys
+    sys.path.insert(0, 'priv')
+    from _erlang_impl import reactor
+
+    class EchoProtocol(reactor.Protocol):
+        def data_received(self, data):
+            self.write_buffer.extend(data)
+            return "write_pending"
+
+        def write_ready(self):
+            if not self.write_buffer:
+                return "read_pending"
+            written = self.write(bytes(self.write_buffer))
+            del self.write_buffer[:written]
+            return "continue" if self.write_buffer else "read_pending"
+
+    # Create socketpair for testing
+    s1, s2 = socket.socketpair()
+    s1.setblocking(False)
+    s2.setblocking(False)
+
+    try:
+        # Set up protocol
+        reactor.set_protocol_factory(EchoProtocol)
+        reactor.init_connection(s1.fileno(), {'type': 'test'})
+
+        # Send data through s2
+        s2.send(b'hello world')
+
+        # Trigger read on protocol side
+        action = reactor.on_read_ready(s1.fileno())
+        assert action == "write_pending"
+
+        # Check write buffer
+        proto = reactor.get_protocol(s1.fileno())
+        assert bytes(proto.write_buffer) == b'hello world'
+
+        # Trigger write
+        action = reactor.on_write_ready(s1.fileno())
+        # Should be read_pending since buffer was flushed
+        assert action == "read_pending"
+
+        # Read the echoed data
+        echoed = s2.recv(1024)
+        assert echoed == b'hello world'
+
+        # Clean up
+        reactor.close_connection(s1.fileno())
+        assert proto.closed is True
+
+    finally:
+        s1.close()
+        s2.close()
+
+    return True
+
+
+def test_multiple_protocols():
+    """Test multiple protocols can be registered simultaneously."""
+    import sys
+    sys.path.insert(0, 'priv')
+    from _erlang_impl import reactor
+
+    class SimpleProtocol(reactor.Protocol):
+        instances = []
+
+        def __init__(self):
+            super().__init__()
+            SimpleProtocol.instances.append(self)
+
+        def data_received(self, data):
+            return "continue"
+
+        def write_ready(self):
+            return "close"
+
+    # Reset instances
+    SimpleProtocol.instances = []
+
+    # Create multiple socketpairs
+    pairs = []
+    for _ in range(5):
+        s1, s2 = socket.socketpair()
+        s1.setblocking(False)
+        pairs.append((s1, s2))
+
+    try:
+        # Set factory and init connections
+        reactor.set_protocol_factory(SimpleProtocol)
+        for s1, s2 in pairs:
+            reactor.init_connection(s1.fileno(), {})
+
+        # Verify all instances created
+        assert len(SimpleProtocol.instances) == 5
+
+        # Verify each has correct fd
+        for (s1, s2), proto in zip(pairs, SimpleProtocol.instances):
+            assert proto.fd == s1.fileno()
+
+        # Clean up
+        for s1, s2 in pairs:
+            reactor.close_connection(s1.fileno())
+
+    finally:
+        for s1, s2 in pairs:
+            s1.close()
+            s2.close()
+
+    return True
+
+
+def run_all_tests():
+    """Run all reactor tests."""
+    tests = [
+        test_protocol_creation,
+        test_echo_protocol,
+        test_multiple_protocols,
+    ]
+
+    results = []
+    for test in tests:
+        try:
+            result = test()
+            results.append((test.__name__, 'PASS' if result else 'FAIL'))
+        except Exception as e:
+            results.append((test.__name__, f'ERROR: {e}'))
+
+    return results
+
+
+if __name__ == '__main__':
+    results = run_all_tests()
+    for name, status in results:
+        print(f'{name}: {status}')
diff --git a/test/py_thread_callback_SUITE.erl b/test/py_thread_callback_SUITE.erl
index 493a7f0..adce52b 100644
--- a/test/py_thread_callback_SUITE.erl
+++ b/test/py_thread_callback_SUITE.erl
@@ -41,6 +41,11 @@ all() ->
 
 init_per_suite(Config) ->
     {ok, _} = application:ensure_all_started(erlang_python),
+    %% Only start contexts if not already running (avoids conflict with other suites)
+    case py:contexts_started() of
+        true -> ok;
+        false -> {ok, _} = py:start_contexts()
+    end,
     Config.
 
 end_per_suite(_Config) ->