close
Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Lib/test/audit-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,16 @@ def rl(name):
else:
return None

try:
import _remote_debugging
except ImportError:
_remote_debugging = None

def rd(name):
if _remote_debugging:
return getattr(_remote_debugging, name, None)
return None

# Try a range of "open" functions.
# All of them should fail
with TestHook(raise_on_events={"open"}) as hook:
Expand All @@ -225,6 +235,8 @@ def rl(name):
(rl("append_history_file"), 0, None),
(rl("read_init_file"), testfn),
(rl("read_init_file"), None),
(rd("BinaryWriter"), testfn, 1000, 0),
(rd("BinaryReader"), testfn),
]:
if not fn:
continue
Expand Down Expand Up @@ -258,6 +270,8 @@ def rl(name):
("~/.history", "a") if rl("append_history_file") else None,
(testfn, "r") if readline else None,
("<readline_init_file>", "r") if readline else None,
(testfn, "wb") if rd("BinaryWriter") else None,
(testfn, "rb") if rd("BinaryReader") else None,
]
if i is not None
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import json
import os
import pathlib
import random
import struct
import tempfile
Expand Down Expand Up @@ -814,6 +815,35 @@ def test_invalid_file_path(self):
with BinaryReader("/nonexistent/path/file.bin") as reader:
reader.replay_samples(RawCollector())

def test_path_arguments_round_trip(self):
"""Reader and writer accept str, bytes or os.PathLike."""
with tempfile.NamedTemporaryFile(suffix=".bin", delete=False) as f:
filename = f.name
self.temp_files.append(filename)

for path_arg in (filename, os.fsencode(filename), pathlib.Path(filename)):
with self.subTest(path_type=type(path_arg).__name__):
writer = _remote_debugging.BinaryWriter(path_arg, 1000, 0)
writer.finalize()
reader = _remote_debugging.BinaryReader(path_arg)
info = reader.get_info()
reader.close()
self.assertEqual(info["sample_count"], 0)

def test_rejects_non_pathlike(self):
"""Reader and writer raise TypeError on non-path-like filenames."""
with self.assertRaises(TypeError):
_remote_debugging.BinaryWriter(123, 1000, 0)
with self.assertRaises(TypeError):
_remote_debugging.BinaryReader(123)

def test_invalid_path_error_preserves_pathlib(self):
"""Missing path: OSError carries the original path object, not a string."""
missing = pathlib.Path("/i/do/not/exist")
with self.assertRaises(FileNotFoundError) as cm:
_remote_debugging.BinaryReader(missing)
self.assertEqual(os.fspath(cm.exception.filename), os.fspath(missing))

def test_writer_handles_empty_stack_first_sample(self):
"""BinaryWriter.write_sample tolerates an empty stack on a fresh thread.

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix the binary writer in :mod:`profiling.sampling` not firing the audit
(:pep:`578`) when creating the output file. The writer and the reader now
accept any path-like object. Patch by Maurycy Pawłowski-Wieroński.
12 changes: 4 additions & 8 deletions Modules/_remote_debugging/binary_io.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,6 @@ typedef struct {
/* Main binary writer structure */
typedef struct {
FILE *fp;
char *filename;

/* Write buffer for batched I/O */
uint8_t *write_buffer;
Expand Down Expand Up @@ -311,10 +310,7 @@ typedef struct {

/* Main binary reader structure */
typedef struct {
char *filename;

#if USE_MMAP
int fd;
uint8_t *mapped_data;
size_t mapped_size;
#else
Expand Down Expand Up @@ -522,7 +518,7 @@ grow_array_inplace(void **ptr_addr, size_t count, size_t *capacity, size_t elem_
* Create a new binary writer.
*
* Arguments:
* filename: Path to output file
* path: Path to output file
* sample_interval_us: Sampling interval in microseconds
* compression_type: COMPRESSION_NONE or COMPRESSION_ZSTD
* start_time_us: Start timestamp in microseconds (from time.monotonic() * 1e6)
Expand All @@ -531,7 +527,7 @@ grow_array_inplace(void **ptr_addr, size_t count, size_t *capacity, size_t elem_
* New BinaryWriter* on success, NULL on failure (PyErr set)
*/
BinaryWriter *binary_writer_create(
const char *filename,
PyObject *path,
uint64_t sample_interval_us,
int compression_type,
uint64_t start_time_us
Expand Down Expand Up @@ -583,12 +579,12 @@ void binary_writer_destroy(BinaryWriter *writer);
* Open a binary file for reading.
*
* Arguments:
* filename: Path to input file
* path: Path to input file
*
* Returns:
* New BinaryReader* on success, NULL on failure (PyErr set)
*/
BinaryReader *binary_reader_open(const char *filename);
BinaryReader *binary_reader_open(PyObject *path);

/*
* Replay samples from binary file through a collector.
Expand Down
45 changes: 15 additions & 30 deletions Modules/_remote_debugging/binary_io_reader.c
Original file line number Diff line number Diff line change
Expand Up @@ -358,37 +358,26 @@ reader_parse_frame_table(BinaryReader *reader, const uint8_t *data, size_t file_
}

BinaryReader *
binary_reader_open(const char *filename)
binary_reader_open(PyObject *path)
{
BinaryReader *reader = PyMem_Calloc(1, sizeof(BinaryReader));
if (!reader) {
PyErr_NoMemory();
return NULL;
}

#if USE_MMAP
reader->fd = -1; /* Explicit initialization for cleanup safety */
#endif

reader->filename = PyMem_Malloc(strlen(filename) + 1);
Comment thread
pablogsal marked this conversation as resolved.
if (!reader->filename) {
PyMem_Free(reader);
PyErr_NoMemory();
return NULL;
}
strcpy(reader->filename, filename);

#if USE_MMAP
/* Open with mmap on Unix */
reader->fd = open(filename, O_RDONLY);
if (reader->fd < 0) {
PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
FILE *fp = Py_fopen(path, "rb");
if (!fp) {
goto error;
}
int fd = fileno(fp);

struct stat st;
if (fstat(reader->fd, &st) < 0) {
if (fstat(fd, &st) < 0) {
PyErr_SetFromErrno(PyExc_IOError);
Py_fclose(fp);
goto error;
}
reader->mapped_size = st.st_size;
Expand All @@ -400,14 +389,15 @@ binary_reader_open(const char *filename)
*/
#ifdef __linux__
reader->mapped_data = mmap(NULL, reader->mapped_size, PROT_READ,
MAP_PRIVATE | MAP_POPULATE, reader->fd, 0);
MAP_PRIVATE | MAP_POPULATE, fd, 0);
#else
reader->mapped_data = mmap(NULL, reader->mapped_size, PROT_READ,
MAP_PRIVATE, reader->fd, 0);
MAP_PRIVATE, fd, 0);
#endif
if (reader->mapped_data == MAP_FAILED) {
reader->mapped_data = NULL;
PyErr_SetFromErrno(PyExc_IOError);
Py_fclose(fp);
goto error;
}

Expand All @@ -428,19 +418,20 @@ binary_reader_open(const char *filename)

/* Add file descriptor-level hints for better kernel I/O scheduling */
#if defined(__linux__) && defined(POSIX_FADV_SEQUENTIAL)
(void)posix_fadvise(reader->fd, 0, 0, POSIX_FADV_SEQUENTIAL);
(void)posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: os_posix_fadvise_impl has two implementation details that this place does not: Py_BEGIN_ALLOW_THREADS is set and async_err = PyErr_CheckSignals() is checked.

Do we need to do this here?

Copy link
Copy Markdown
Member

@pablogsal pablogsal May 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don’t think we need it here. These calls are best-effort performance hints and intentionally ignore all posix_fadvise() failures, unlike os.posix_fadvise where the result is user-visible and must handle EINTR/signals correctly. PyErr_CheckSignals() would make an advisory hint observable by turning some interruptions into failures, which does not match the surrounding madvise()/posix_fadvise() handling.

y_BEGIN_ALLOW_THREADS also is not needed for correctness here; if we wanted to tune this path, we would probably need to look at the whole block, since mmap(... MAP_POPULATE) and madvise(... MADV_WILLNEED) are also currently called with the GIL held.

if (reader->mapped_size > (64 * 1024 * 1024)) {
(void)posix_fadvise(reader->fd, 0, 0, POSIX_FADV_WILLNEED);
(void)posix_fadvise(fd, 0, 0, POSIX_FADV_WILLNEED);
}
#endif

(void)Py_fclose(fp);

uint8_t *data = reader->mapped_data;
size_t file_size = reader->mapped_size;
#else
/* Use stdio on Windows */
reader->fp = fopen(filename, "rb");
reader->fp = Py_fopen(path, "rb");
if (!reader->fp) {
PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
goto error;
}

Expand Down Expand Up @@ -1263,8 +1254,6 @@ binary_reader_close(BinaryReader *reader)
return;
}

PyMem_Free(reader->filename);

#if USE_MMAP
if (reader->mapped_data) {
munmap(reader->mapped_data, reader->mapped_size);
Expand All @@ -1274,13 +1263,9 @@ binary_reader_close(BinaryReader *reader)
/* Clear sample_data which may point into the now-unmapped region */
reader->sample_data = NULL;
reader->sample_data_size = 0;
if (reader->fd >= 0) {
close(reader->fd);
reader->fd = -1; /* Mark as closed */
}
#else
if (reader->fp) {
fclose(reader->fp);
Py_fclose(reader->fp);
reader->fp = NULL;
}
if (reader->file_data) {
Expand Down
18 changes: 4 additions & 14 deletions Modules/_remote_debugging/binary_io_writer.c
Original file line number Diff line number Diff line change
Expand Up @@ -717,7 +717,7 @@ write_sample_with_encoding(BinaryWriter *writer, ThreadEntry *entry,
}

BinaryWriter *
binary_writer_create(const char *filename, uint64_t sample_interval_us, int compression_type,
binary_writer_create(PyObject *path, uint64_t sample_interval_us, int compression_type,
uint64_t start_time_us)
{
BinaryWriter *writer = PyMem_Calloc(1, sizeof(BinaryWriter));
Expand All @@ -726,14 +726,6 @@ binary_writer_create(const char *filename, uint64_t sample_interval_us, int comp
return NULL;
}

writer->filename = PyMem_Malloc(strlen(filename) + 1);
Comment thread
pablogsal marked this conversation as resolved.
if (!writer->filename) {
PyMem_Free(writer);
PyErr_NoMemory();
return NULL;
}
strcpy(writer->filename, filename);

writer->start_time_us = start_time_us;
writer->sample_interval_us = sample_interval_us;
writer->compression_type = compression_type;
Expand Down Expand Up @@ -799,9 +791,8 @@ binary_writer_create(const char *filename, uint64_t sample_interval_us, int comp
}
}

writer->fp = fopen(filename, "wb");
writer->fp = Py_fopen(path, "wb");
if (!writer->fp) {
PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
goto error;
}

Expand Down Expand Up @@ -1193,7 +1184,7 @@ binary_writer_finalize(BinaryWriter *writer)
return -1;
}

if (fclose(writer->fp) != 0) {
if (Py_fclose(writer->fp) != 0) {
writer->fp = NULL;
PyErr_SetFromErrno(PyExc_IOError);
return -1;
Expand All @@ -1211,10 +1202,9 @@ binary_writer_destroy(BinaryWriter *writer)
}

if (writer->fp) {
fclose(writer->fp);
Py_fclose(writer->fp);
Copy link
Copy Markdown
Member

@sobolevn sobolevn May 8, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: why don't we check the return code here? Because the returned type is void?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is nothing we can do if close fails, we could log perhaps...

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this case I woudl suggest that we explicitly suppress the returned value and add a comment. We can also raise an unraisable exception (which I do in curses)

}

PyMem_Free(writer->filename);
PyMem_Free(writer->write_buffer);

#ifdef HAVE_ZSTD
Expand Down
38 changes: 7 additions & 31 deletions Modules/_remote_debugging/clinic/module.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading