Source code for hbutils.system.filesystem.binary

"""
Overview:
    This module provides functions to check if a file is binary or text.
    It uses a heuristic approach by sampling the first portion of the file
    and checking for the presence of binary characters.
"""
__all__ = [
    'is_binary_file', 'is_text_file',
]

_TEXT_CHARS = bytearray({7, 8, 9, 10, 12, 13, 27} | set(range(0x20, 0x100)) - {0x7f})


def _is_binary_string(data: bytes) -> bool:
    """
    Check if the given byte data contains binary characters.

    :param data: The byte data to check.
    :type data: bytes

    :return: True if the data contains binary characters, False otherwise.
    :rtype: bool
    """
    return bool(data.translate(None, _TEXT_CHARS))


def _take_sample(filename, size: int = 1024) -> bytes:
    """
    Read a sample of bytes from the beginning of a file.

    :param filename: The path to the file to sample.
    :type filename: str
    :param size: The number of bytes to read from the file. Defaults to 1024.
    :type size: int

    :return: The sampled bytes from the file.
    :rtype: bytes
    """
    with open(filename, 'rb') as f:
        return f.read(size)


[docs] def is_binary_file(filename) -> bool: """ Check if the given file is a binary file. This function reads a sample from the beginning of the file and checks if it contains binary characters. Files are considered binary if they contain characters outside the standard text character set. :param filename: The path to the file to check. :type filename: str :return: True if the file is binary, False otherwise. :rtype: bool Examples:: >>> from hbutils.system import is_binary_file >>> is_binary_file('rar_template-simple.rar') True >>> is_binary_file('README.md') False .. note:: Empty files will be treated as text files. """ return _is_binary_string(_take_sample(filename))
[docs] def is_text_file(filename) -> bool: """ Check if the given file is a text file. This function reads a sample from the beginning of the file and checks if it contains only text characters. Files are considered text if they do not contain binary characters. :param filename: The path to the file to check. :type filename: str :return: True if the file is text, False otherwise. :rtype: bool Examples:: >>> from hbutils.system import is_text_file >>> is_text_file('rar_template-simple.rar') False >>> is_text_file('README.md') True .. note:: Empty files will be treated as text files. """ return not _is_binary_string(_take_sample(filename))