Skip to content

Commit

Permalink
pythongh-129205: Add os.readinto() API for reading data into a caller…
Browse files Browse the repository at this point in the history
… provided buffer (python#129211)

Add a new OS API which will read data directly into a caller provided
writeable buffer protocol object.

Co-authored-by: Bénédikt Tran <[email protected]>
Co-authored-by: Victor Stinner <[email protected]>
  • Loading branch information
3 people authored Jan 26, 2025
1 parent 0ef8d47 commit 1ed4487
Show file tree
Hide file tree
Showing 7 changed files with 267 additions and 1 deletion.
27 changes: 27 additions & 0 deletions Doc/library/os.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1659,6 +1659,33 @@ or `the MSDN <https://msdn.microsoft.com/en-us/library/z0kc8e3z.aspx>`_ on Windo
:exc:`InterruptedError` exception (see :pep:`475` for the rationale).


.. function:: readinto(fd, buffer, /)

Read from a file descriptor *fd* into a mutable
:ref:`buffer object <bufferobjects>` *buffer*.

The *buffer* should be mutable and :term:`bytes-like <bytes-like object>`. On
success, returns the number of bytes read. Less bytes may be read than the
size of the buffer. The underlying system call will be retried when
interrupted by a signal, unless the signal handler raises an exception.
Other errors will not be retried and an error will be raised.

Returns 0 if *fd* is at end of file or if the provided *buffer* has
length 0 (which can be used to check for errors without reading data).
Never returns negative.

.. note::

This function is intended for low-level I/O and must be applied to a file
descriptor as returned by :func:`os.open` or :func:`os.pipe`. To read a
"file object" returned by the built-in function :func:`open`, or
:data:`sys.stdin`, use its member functions, for example
:meth:`io.BufferedIOBase.readinto`, :meth:`io.BufferedIOBase.read`, or
:meth:`io.TextIOBase.read`

.. versionadded:: next


.. function:: sendfile(out_fd, in_fd, offset, count)
sendfile(out_fd, in_fd, offset, count, headers=(), trailers=(), flags=0)

Expand Down
4 changes: 4 additions & 0 deletions Doc/whatsnew/3.14.rst
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,10 @@ os
to the :mod:`os` module.
(Contributed by James Roy in :gh:`127688`.)

* Add the :func:`os.readinto` function to read into a
:ref:`buffer object <bufferobjects>` from a file descriptor.
(Contributed by Cody Maloney in :gh:`129205`.)


pathlib
-------
Expand Down
31 changes: 31 additions & 0 deletions Lib/test/_test_eintr.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,37 @@ def test_read(self):
self.assertEqual(data, os.read(rd, len(data)))
self.assertEqual(proc.wait(), 0)

def test_readinto(self):
rd, wr = os.pipe()
self.addCleanup(os.close, rd)
# wr closed explicitly by parent

# the payload below are smaller than PIPE_BUF, hence the writes will be
# atomic
datas = [b"hello", b"world", b"spam"]

code = '\n'.join((
'import os, sys, time',
'',
'wr = int(sys.argv[1])',
'datas = %r' % datas,
'sleep_time = %r' % self.sleep_time,
'',
'for data in datas:',
' # let the parent block on read()',
' time.sleep(sleep_time)',
' os.write(wr, data)',
))

proc = self.subprocess(code, str(wr), pass_fds=[wr])
with kill_on_error(proc):
os.close(wr)
for data in datas:
buffer = bytearray(len(data))
self.assertEqual(os.readinto(rd, buffer), len(data))
self.assertEqual(buffer, data)
self.assertEqual(proc.wait(), 0)

def test_write(self):
rd, wr = os.pipe()
self.addCleanup(os.close, wr)
Expand Down
114 changes: 114 additions & 0 deletions Lib/test/test_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,93 @@ def test_read(self):
self.assertEqual(type(s), bytes)
self.assertEqual(s, b"spam")

def test_readinto(self):
with open(os_helper.TESTFN, "w+b") as fobj:
fobj.write(b"spam")
fobj.flush()
fd = fobj.fileno()
os.lseek(fd, 0, 0)
# Oversized so readinto without hitting end.
buffer = bytearray(7)
s = os.readinto(fd, buffer)
self.assertEqual(type(s), int)
self.assertEqual(s, 4)
# Should overwrite the first 4 bytes of the buffer.
self.assertEqual(buffer[:4], b"spam")

# Readinto at EOF should return 0 and not touch buffer.
buffer[:] = b"notspam"
s = os.readinto(fd, buffer)
self.assertEqual(type(s), int)
self.assertEqual(s, 0)
self.assertEqual(bytes(buffer), b"notspam")
s = os.readinto(fd, buffer)
self.assertEqual(s, 0)
self.assertEqual(bytes(buffer), b"notspam")

# Readinto a 0 length bytearray when at EOF should return 0
self.assertEqual(os.readinto(fd, bytearray()), 0)

# Readinto a 0 length bytearray with data available should return 0.
os.lseek(fd, 0, 0)
self.assertEqual(os.readinto(fd, bytearray()), 0)

@unittest.skipUnless(hasattr(os, 'get_blocking'),
'needs os.get_blocking() and os.set_blocking()')
@unittest.skipUnless(hasattr(os, "pipe"), "requires os.pipe()")
def test_readinto_non_blocking(self):
# Verify behavior of a readinto which would block on a non-blocking fd.
r, w = os.pipe()
try:
os.set_blocking(r, False)
with self.assertRaises(BlockingIOError):
os.readinto(r, bytearray(5))

# Pass some data through
os.write(w, b"spam")
self.assertEqual(os.readinto(r, bytearray(4)), 4)

# Still don't block or return 0.
with self.assertRaises(BlockingIOError):
os.readinto(r, bytearray(5))

# At EOF should return size 0
os.close(w)
w = None
self.assertEqual(os.readinto(r, bytearray(5)), 0)
self.assertEqual(os.readinto(r, bytearray(5)), 0) # Still EOF

finally:
os.close(r)
if w is not None:
os.close(w)

def test_readinto_badarg(self):
with open(os_helper.TESTFN, "w+b") as fobj:
fobj.write(b"spam")
fobj.flush()
fd = fobj.fileno()
os.lseek(fd, 0, 0)

for bad_arg in ("test", bytes(), 14):
with self.subTest(f"bad buffer {type(bad_arg)}"):
with self.assertRaises(TypeError):
os.readinto(fd, bad_arg)

with self.subTest("doesn't work on file objects"):
with self.assertRaises(TypeError):
os.readinto(fobj, bytearray(5))

# takes two args
with self.assertRaises(TypeError):
os.readinto(fd)

# No data should have been read with the bad arguments.
buffer = bytearray(4)
s = os.readinto(fd, buffer)
self.assertEqual(s, 4)
self.assertEqual(buffer, b"spam")

@support.cpython_only
# Skip the test on 32-bit platforms: the number of bytes must fit in a
# Py_ssize_t type
Expand All @@ -249,6 +336,29 @@ def test_large_read(self, size):
# operating system is free to return less bytes than requested.
self.assertEqual(data, b'test')


@support.cpython_only
# Skip the test on 32-bit platforms: the number of bytes must fit in a
# Py_ssize_t type
@unittest.skipUnless(INT_MAX < PY_SSIZE_T_MAX,
"needs INT_MAX < PY_SSIZE_T_MAX")
@support.bigmemtest(size=INT_MAX + 10, memuse=1, dry_run=False)
def test_large_readinto(self, size):
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
create_file(os_helper.TESTFN, b'test')

# Issue #21932: For readinto the buffer contains the length rather than
# a length being passed explicitly to read, should still get capped to a
# valid size / not raise an OverflowError for sizes larger than INT_MAX.
buffer = bytearray(INT_MAX + 10)
with open(os_helper.TESTFN, "rb") as fp:
length = os.readinto(fp.fileno(), buffer)

# The test does not try to read more than 2 GiB at once because the
# operating system is free to return less bytes than requested.
self.assertEqual(length, 4)
self.assertEqual(buffer[:4], b'test')

def test_write(self):
# os.write() accepts bytes- and buffer-like objects but not strings
fd = os.open(os_helper.TESTFN, os.O_CREAT | os.O_WRONLY)
Expand Down Expand Up @@ -2467,6 +2577,10 @@ def test_lseek(self):
def test_read(self):
self.check(os.read, 1)

@unittest.skipUnless(hasattr(os, 'readinto'), 'test needs os.readinto()')
def test_readinto(self):
self.check(os.readinto, bytearray(5))

@unittest.skipUnless(hasattr(os, 'readv'), 'test needs os.readv()')
def test_readv(self):
buf = bytearray(10)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add :func:`os.readinto` to read into a :ref:`buffer object <bufferobjects>` from a file descriptor.
58 changes: 57 additions & 1 deletion Modules/clinic/posixmodule.c.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

33 changes: 33 additions & 0 deletions Modules/posixmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -11433,6 +11433,38 @@ os_read_impl(PyObject *module, int fd, Py_ssize_t length)
return buffer;
}

/*[clinic input]
os.readinto -> Py_ssize_t
fd: int
buffer: Py_buffer(accept={rwbuffer})
/
Read into a buffer object from a file descriptor.
The buffer should be mutable and bytes-like. On success, returns the number of
bytes read. Less bytes may be read than the size of the buffer. The underlying
system call will be retried when interrupted by a signal, unless the signal
handler raises an exception. Other errors will not be retried and an error will
be raised.
Returns 0 if *fd* is at end of file or if the provided *buffer* has length 0
(which can be used to check for errors without reading data). Never returns
negative.
[clinic start generated code]*/

static Py_ssize_t
os_readinto_impl(PyObject *module, int fd, Py_buffer *buffer)
/*[clinic end generated code: output=8091a3513c683a80 input=d40074d0a68de575]*/
{
assert(buffer->len >= 0);
Py_ssize_t result = _Py_read(fd, buffer->buf, buffer->len);
/* Ensure negative is never returned without an error. Simplifies calling
code. _Py_read should succeed, possibly reading 0 bytes, _or_ set an
error. */
assert(result >= 0 || (result == -1 && PyErr_Occurred()));
return result;
}

#if (defined(HAVE_SENDFILE) && (defined(__FreeBSD__) || defined(__DragonFly__) \
|| defined(__APPLE__))) \
|| defined(HAVE_READV) || defined(HAVE_PREADV) || defined (HAVE_PREADV2) \
Expand Down Expand Up @@ -16973,6 +17005,7 @@ static PyMethodDef posix_methods[] = {
OS_LOCKF_METHODDEF
OS_LSEEK_METHODDEF
OS_READ_METHODDEF
OS_READINTO_METHODDEF
OS_READV_METHODDEF
OS_PREAD_METHODDEF
OS_PREADV_METHODDEF
Expand Down

0 comments on commit 1ed4487

Please sign in to comment.