Source code for hbutils.binary.str

"""
C-style string types for binary I/O operations.

This module provides implementations of C-style string handling for binary
streams, including null-terminated strings (``char *`` / ``char[]`` with a
terminator) and fixed-size buffers (``char s[size]``). The types are designed
to operate on binary file objects such as :class:`io.BytesIO` and other
:class:`typing.BinaryIO`-compatible streams.

The module contains the following public components:

* :class:`CStringType` - Null-terminated string type (terminated by ``\\x00``).
* :class:`CSizedStringType` - Fixed-size string type with a specified buffer size.
* :data:`c_str` - Default :class:`CStringType` instance for convenient use.
* :func:`c_sized_str` - Factory function for creating sized-string types.

Example::

    >>> import io
    >>> from hbutils.binary import c_str, c_sized_str
    >>>
    >>> with io.BytesIO(b'hello\\x00world\\x00') as file:
    ...     print(c_str.read(file))
    ...     print(c_str.read(file))
    hello
    world
    >>> with io.BytesIO() as file:
    ...     c_sized_str(8).write(file, "hi")
    ...     print(file.getvalue())
    b'hi\\x00\\x00\\x00\\x00\\x00\\x00'

.. note::
   The :class:`CSizedStringType` decoder currently uses automatic encoding
   detection during reads, regardless of the instance's ``encoding`` attribute.
"""

import io
from typing import BinaryIO, Optional

from .base import CIOType, CFixedType
from .buffer import c_buffer
from ..encoding import auto_decode

__all__ = [
    'CStringType',
    'c_str',
    'CSizedStringType',
    'c_sized_str',
]


def _auto_encode(s: str, encoding: Optional[str]) -> bytes:
    """
    Encode a string to bytes using the specified encoding or UTF-8 as default.

    :param s: The string to encode.
    :type s: str
    :param encoding: The encoding to use, or ``None`` for UTF-8 default.
    :type encoding: Optional[str]
    :return: The encoded bytes.
    :rtype: bytes
    """
    return s.encode(encoding if encoding is not None else 'utf-8')


[docs] class CStringType(CIOType): """ Simple null-terminated string type. This class represents C-style strings that end with a single ``\\x00`` byte, which is the standard string format in the C language. :param encoding: Encoding type, default is ``None`` which enables auto-detection during decoding and UTF-8 during encoding. :type encoding: Optional[str] :ivar encoding: Encoding type configured for the instance. :vartype encoding: Optional[str] """
[docs] def __init__(self, encoding: Optional[str] = None): """ Constructor of :class:`CStringType`. :param encoding: Encoding type, default is ``None`` which means auto-detect the encodings. :type encoding: Optional[str] """ self.__encoding = encoding
@property def encoding(self) -> Optional[str]: """ Get the encoding type used for string conversion. :return: The encoding type, or ``None`` for auto-detection. :rtype: Optional[str] """ return self.__encoding
[docs] def read(self, file: BinaryIO) -> str: """ Read a null-terminated string value from a binary file. Reads bytes from the file until a null byte (``\\x00``) is encountered, then decodes the bytes to a string using the specified or auto-detected encoding. :param file: Binary file object to read from; ``io.BytesIO`` is supported. :type file: BinaryIO :return: The decoded string value (without the null terminator). :rtype: str :raises IndexError: If EOF is reached before a null terminator is found. """ b = bytearray() while True: bt = file.read(1)[0] if bt: b.append(bt) else: break return auto_decode(bytes(b), self.__encoding)
[docs] def write(self, file: BinaryIO, val: str) -> None: """ Write a null-terminated string value to a binary IO object. Encodes the string using the specified or default encoding and appends a null byte (``\\x00``) at the end. :param file: Binary file object to write to; ``io.BytesIO`` is supported. :type file: BinaryIO :param val: String value to write. :type val: str :raises TypeError: If ``val`` is not a string. Example:: >>> import io >>> from hbutils.binary import c_str >>> with io.BytesIO() as file: ... c_str.write(file, "hello") ... print(file.getvalue()) b'hello\\x00' """ if not isinstance(val, str): raise TypeError(f'String value expected, but {repr(val)} found.') file.write(_auto_encode(val, self.__encoding) + b'\x00')
c_str = CStringType() """ Default instance for reading and writing null-terminated strings. This instance provides convenient access to :class:`CStringType` functionality with the default encoding (auto-detection). Examples:: >>> import io >>> from hbutils.binary import c_str >>> >>> with io.BytesIO( ... b'kdsfjldsjflkdsmgds\\x00' ... b'\\xd0\\x94\\xd0\\xbe\\xd0\\xb1\\xd1\\x80\\xd1\\x8b\\xd0\\xb9 \\xd0' ... b'\\xb2\\xd0\\xb5\\xd1\\x87\\xd0\\xb5\\xd1\\x80\\x00' ... b'\\xa4\\xb3\\xa4\\xf3\\xa4\\xd0\\xa4\\xf3\\xa4\\xcf\\x00' ... b'\\xcd\\xed\\xc9\\xcf\\xba\\xc3\\x00' ... ) as file: ... print(c_str.read(file)) ... print(c_str.read(file)) ... print(c_str.read(file)) ... print(c_str.read(file)) kdsfjldsjflkdsmgds Добрый вечер こんばんは 晚上好 >>> with io.BytesIO() as file: ... c_str.write(file, "kdsfjld") ... c_str.write(file, "Добрый") ... print(file.getvalue()) b'kdsfjld\\x00\\xd0\\x94\\xd0\\xbe\\xd0\\xb1\\xd1\\x80\\xd1\\x8b\\xd0\\xb9\\x00' """
[docs] class CSizedStringType(CFixedType): """ Fixed-size string type. This class represents C-style fixed-size character arrays, defined like ``char s[size]`` in the C language. The string occupies a fixed amount of space regardless of its actual content length. :param size: Size of the string's buffer space in bytes. :type size: int :param encoding: Encoding type, default is ``None`` which means auto-detect the encodings during decoding and UTF-8 during encoding. :type encoding: Optional[str] :ivar encoding: Encoding type configured for the instance. :vartype encoding: Optional[str] :ivar size: Size of the fixed buffer in bytes. :vartype size: int """
[docs] def __init__(self, size: int, encoding: Optional[str] = None): """ Constructor of :class:`CSizedStringType`. :param size: Size of the string's buffer space in bytes. :type size: int :param encoding: Encoding type, default is ``None`` which means auto-detect the encodings. :type encoding: Optional[str] """ CFixedType.__init__(self, size) self.__encoding = encoding self._buffer = c_buffer(size)
@property def encoding(self) -> Optional[str]: """ Get the encoding type used for string conversion. :return: The encoding type, or ``None`` for auto-detection. :rtype: Optional[str] """ return self.__encoding
[docs] def read(self, file: BinaryIO) -> str: """ Read a fixed-size string value from a binary file. Reads exactly the specified number of bytes from the file, then decodes them as a null-terminated string using auto-detected encoding. The current implementation uses :data:`c_str` internally, so any ``encoding`` configured on this instance is not applied during reads. :param file: Binary file object to read from; ``io.BytesIO`` is supported. :type file: BinaryIO :return: The decoded string value (without padding or null terminator). :rtype: str """ bytes_ = self._buffer.read(file) with io.BytesIO(bytes_ + b'\x00') as bf: return c_str.read(bf)
[docs] def write(self, file: BinaryIO, val: str) -> None: """ Write a fixed-size string value to a binary IO object. Encodes the string and writes it to the file, padding with null bytes if necessary to fill the fixed-size buffer. :param file: Binary file object to write to; ``io.BytesIO`` is supported. :type file: BinaryIO :param val: String value to write. :type val: str :raises TypeError: If ``val`` is not a string. """ if not isinstance(val, str): raise TypeError(f'String value expected, but {repr(val)} found.') self._buffer.write(file, _auto_encode(val, self.__encoding))
[docs] def c_sized_str(size: int) -> CSizedStringType: """ Factory function to create a fixed-size string type. Creates a :class:`CSizedStringType` instance that reads and writes strings occupying a fixed amount of space, similar to C ``char`` arrays. :param size: Size of the string's buffer space in bytes. :type size: int :return: A :class:`CSizedStringType` instance with the specified size. :rtype: CSizedStringType Examples:: >>> import io >>> from hbutils.binary import c_sized_str >>> >>> with io.BytesIO( ... b'kdsfjld\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00' ... b'\\xd0\\x94\\xd0\\xbe\\xd0\\xb1\\xd1\\x80\\xd1\\x8b\\xd0\\xb9\\x00\\x00\\x00' ... ) as file: ... print(c_sized_str(15).read(file)) ... print(c_sized_str(15).read(file)) kdsfjld Добрый >>> with io.BytesIO() as file: ... c_sized_str(15).write(file, "kdsfjld") ... c_sized_str(15).write(file, "Добрый") ... print(file.getvalue()) b'kdsfjld\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\x00\\xd0\\x94\\xd0\\xbe\\xd0\\xb1\\xd1\\x80\\xd1\\x8b\\xd0\\xb9\\x00\\x00\\x00' """ return CSizedStringType(size)