Source code for megadetector.utils.string_utils

"""

string_utils.py

Miscellaneous string utilities.

"""

#%% Imports

import re


#%% Functions


[docs]
def is_float(s):
    """
    Checks whether [s] is an object (typically a string) that can be cast to a float

    Args:
        s (object): object to evaluate

    Returns:
        bool: True if s successfully casts to a float, otherwise False
    """

    if s is None:
        return False

    try:
        _ = float(s)
    except ValueError:
        return False
    return True




[docs]
def is_int(s):
    """
    Checks whether [s] is an object (typically a string) that can be cast to a int

    Args:
        s (object): object to evaluate

    Returns:
        bool: True if s successfully casts to a int, otherwise False
    """

    if s is None:
        return False

    try:
        _ = int(s)
    except ValueError:
        return False
    return True




[docs]
def human_readable_to_bytes(size):
    """
    Given a human-readable byte string (e.g. 2G, 10GB, 30MB, 20KB),
    returns the number of bytes.  Will return 0 if the argument has
    unexpected form.

    https://gist.github.com/beugley/ccd69945346759eb6142272a6d69b4e0

    Args:
        size (str): string representing a size

    Returns:
        int: the corresponding size in bytes
    """

    size = re.sub(r'\s+', '', size)

    if not size: # Handle empty string case after stripping spaces
        return 0

    if (size[-1] == 'B'):
        size = size[:-1]

    if not size: # Handle case where size was just "B"
        return 0

    if (size.isdigit()):
        bytes_val = int(size) # Renamed to avoid conflict with built-in 'bytes'
    elif (is_float(size)):
        bytes_val = float(size) # Renamed
    else:
        # Handle cases like "1KB" where size[:-1] might be "1K" before this block
        # The original code would try to float("1K") which fails.
        # Need to separate numeric part from unit more carefully.
        numeric_part = ''
        unit_part = ''

        # Iterate from the end to find the unit (K, M, G, T)
        # This handles cases like "10KB" or "2.5GB"
        for i in range(len(size) -1, -1, -1):
            if size[i].isalpha():
                unit_part = size[i] + unit_part
            else:
                numeric_part = size[:i+1]
                break

        # If no unit found, or numeric part is empty after stripping unit
        if not unit_part or not numeric_part:
            return 0

        try:
            bytes_val = float(numeric_part)
            unit = unit_part
            if (unit == 'T'):
                bytes_val *= 1024*1024*1024*1024
            elif (unit == 'G'):
                bytes_val *= 1024*1024*1024
            elif (unit == 'M'):
                bytes_val *= 1024*1024
            elif (unit == 'K'):
                bytes_val *= 1024
            else:
                # If it's a known unit (like 'B' already stripped) but not T/G/M/K,
                # and it was floatable, it's just bytes.  If it's an unknown unit, it's
                # an error.
                if unit not in ['B', '']: # 'B' was stripped, '' means just a number
                     bytes_val = 0
        except ValueError:
            bytes_val = 0

    return bytes_val




[docs]
def remove_ansi_codes(s):
    """
    Removes ANSI escape codes from a string.

    https://stackoverflow.com/questions/14693701/how-can-i-remove-the-ansi-escape-sequences-from-a-string-in-python#14693789

    Args:
        s (str): the string to de-ANSI-i-fy

    Returns:
        str: A copy of [s] without ANSI codes
    """

    ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
    return ansi_escape.sub('', s)



#%% Tests



[docs]
class TestStringUtils:
    """
    Tests for string_utils.py
    """



[docs]
    def test_is_float(self):
        """
        Test the is_float function.
        """

        assert is_float("1.23")
        assert is_float("-0.5")
        assert is_float("0")
        assert is_float(1.23)
        assert is_float(0)
        assert not is_float("abc")
        assert not is_float("1.2.3")
        assert not is_float("")
        assert not is_float(None)
        assert not is_float("1,23")




[docs]
    def test_human_readable_to_bytes(self):
        """
        Test the human_readable_to_bytes function.
        """

        assert human_readable_to_bytes("10B") == 10
        assert human_readable_to_bytes("10") == 10
        assert human_readable_to_bytes("1K") == 1024
        assert human_readable_to_bytes("1KB") == 1024
        assert human_readable_to_bytes("1M") == 1024*1024
        assert human_readable_to_bytes("1MB") == 1024*1024
        assert human_readable_to_bytes("1G") == 1024*1024*1024
        assert human_readable_to_bytes("1GB") == 1024*1024*1024
        assert human_readable_to_bytes("1T") == 1024*1024*1024*1024
        assert human_readable_to_bytes("1TB") == 1024*1024*1024*1024

        assert human_readable_to_bytes("2.5K") == 2.5 * 1024
        assert human_readable_to_bytes("0.5MB") == 0.5 * 1024 * 1024

        # Test with spaces
        assert human_readable_to_bytes(" 2 G ") == 2 * 1024*1024*1024
        assert human_readable_to_bytes("500 KB") == 500 * 1024

        # Invalid inputs
        assert human_readable_to_bytes("abc") == 0
        assert human_readable_to_bytes("1X") == 0
        assert human_readable_to_bytes("1KBB") == 0
        assert human_readable_to_bytes("K1") == 0
        assert human_readable_to_bytes("") == 0
        assert human_readable_to_bytes("1.2.3K") == 0
        assert human_readable_to_bytes("B") == 0




[docs]
    def test_remove_ansi_codes(self):
        """
        Test the remove_ansi_codes function.
        """

        assert remove_ansi_codes("text without codes") == "text without codes"
        assert remove_ansi_codes("\x1b[31mRed text\x1b[0m") == "Red text"
        assert remove_ansi_codes("\x1b[1m\x1b[4mBold and Underline\x1b[0m") == "Bold and Underline"
        assert remove_ansi_codes("Mixed \x1b[32mgreen\x1b[0m and normal") == "Mixed green and normal"
        assert remove_ansi_codes("") == ""

        # More complex/varied ANSI codes
        assert remove_ansi_codes("text\x1b[1Aup") == "textup"
        assert remove_ansi_codes("\x1b[2Jclearscreen") == "clearscreen"





[docs]
def test_string_utils():
    """
    Runs all tests in the TestStringUtils class.
    """

    test_instance = TestStringUtils()
    test_instance.test_is_float()
    test_instance.test_human_readable_to_bytes()
    test_instance.test_remove_ansi_codes()


# from IPython import embed; embed()
# test_string_utils()