"""
string_utils.py
Miscellaneous string utilities.
"""
#%% Imports
import re
#%% Functions
[docs]
def is_float(s):
"""
Checks whether [s] is an object (typically a string) that can be cast to a float
Args:
s (object): object to evaluate
Returns:
bool: True if s successfully casts to a float, otherwise False
"""
if s is None:
return False
try:
_ = float(s)
except ValueError:
return False
return True
[docs]
def is_int(s):
"""
Checks whether [s] is an object (typically a string) that can be cast to a int
Args:
s (object): object to evaluate
Returns:
bool: True if s successfully casts to a int, otherwise False
"""
if s is None:
return False
try:
_ = int(s)
except ValueError:
return False
return True
[docs]
def human_readable_to_bytes(size):
"""
Given a human-readable byte string (e.g. 2G, 10GB, 30MB, 20KB),
returns the number of bytes. Will return 0 if the argument has
unexpected form.
https://gist.github.com/beugley/ccd69945346759eb6142272a6d69b4e0
Args:
size (str): string representing a size
Returns:
int: the corresponding size in bytes
"""
size = re.sub(r'\s+', '', size)
if not size: # Handle empty string case after stripping spaces
return 0
if (size[-1] == 'B'):
size = size[:-1]
if not size: # Handle case where size was just "B"
return 0
if (size.isdigit()):
bytes_val = int(size) # Renamed to avoid conflict with built-in 'bytes'
elif (is_float(size)):
bytes_val = float(size) # Renamed
else:
# Handle cases like "1KB" where size[:-1] might be "1K" before this block
# The original code would try to float("1K") which fails.
# Need to separate numeric part from unit more carefully.
numeric_part = ''
unit_part = ''
# Iterate from the end to find the unit (K, M, G, T)
# This handles cases like "10KB" or "2.5GB"
for i in range(len(size) -1, -1, -1):
if size[i].isalpha():
unit_part = size[i] + unit_part
else:
numeric_part = size[:i+1]
break
# If no unit found, or numeric part is empty after stripping unit
if not unit_part or not numeric_part:
return 0
try:
bytes_val = float(numeric_part)
unit = unit_part
if (unit == 'T'):
bytes_val *= 1024*1024*1024*1024
elif (unit == 'G'):
bytes_val *= 1024*1024*1024
elif (unit == 'M'):
bytes_val *= 1024*1024
elif (unit == 'K'):
bytes_val *= 1024
else:
# If it's a known unit (like 'B' already stripped) but not T/G/M/K,
# and it was floatable, it's just bytes. If it's an unknown unit, it's
# an error.
if unit not in ['B', '']: # 'B' was stripped, '' means just a number
bytes_val = 0
except ValueError:
bytes_val = 0
return bytes_val
[docs]
def remove_ansi_codes(s):
"""
Removes ANSI escape codes from a string.
https://stackoverflow.com/questions/14693701/how-can-i-remove-the-ansi-escape-sequences-from-a-string-in-python#14693789
Args:
s (str): the string to de-ANSI-i-fy
Returns:
str: A copy of [s] without ANSI codes
"""
ansi_escape = re.compile(r'\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])')
return ansi_escape.sub('', s)
#%% Tests
[docs]
class TestStringUtils:
"""
Tests for string_utils.py
"""
[docs]
def test_is_float(self):
"""
Test the is_float function.
"""
assert is_float("1.23")
assert is_float("-0.5")
assert is_float("0")
assert is_float(1.23)
assert is_float(0)
assert not is_float("abc")
assert not is_float("1.2.3")
assert not is_float("")
assert not is_float(None)
assert not is_float("1,23")
[docs]
def test_human_readable_to_bytes(self):
"""
Test the human_readable_to_bytes function.
"""
assert human_readable_to_bytes("10B") == 10
assert human_readable_to_bytes("10") == 10
assert human_readable_to_bytes("1K") == 1024
assert human_readable_to_bytes("1KB") == 1024
assert human_readable_to_bytes("1M") == 1024*1024
assert human_readable_to_bytes("1MB") == 1024*1024
assert human_readable_to_bytes("1G") == 1024*1024*1024
assert human_readable_to_bytes("1GB") == 1024*1024*1024
assert human_readable_to_bytes("1T") == 1024*1024*1024*1024
assert human_readable_to_bytes("1TB") == 1024*1024*1024*1024
assert human_readable_to_bytes("2.5K") == 2.5 * 1024
assert human_readable_to_bytes("0.5MB") == 0.5 * 1024 * 1024
# Test with spaces
assert human_readable_to_bytes(" 2 G ") == 2 * 1024*1024*1024
assert human_readable_to_bytes("500 KB") == 500 * 1024
# Invalid inputs
assert human_readable_to_bytes("abc") == 0
assert human_readable_to_bytes("1X") == 0
assert human_readable_to_bytes("1KBB") == 0
assert human_readable_to_bytes("K1") == 0
assert human_readable_to_bytes("") == 0
assert human_readable_to_bytes("1.2.3K") == 0
assert human_readable_to_bytes("B") == 0
[docs]
def test_remove_ansi_codes(self):
"""
Test the remove_ansi_codes function.
"""
assert remove_ansi_codes("text without codes") == "text without codes"
assert remove_ansi_codes("\x1b[31mRed text\x1b[0m") == "Red text"
assert remove_ansi_codes("\x1b[1m\x1b[4mBold and Underline\x1b[0m") == "Bold and Underline"
assert remove_ansi_codes("Mixed \x1b[32mgreen\x1b[0m and normal") == "Mixed green and normal"
assert remove_ansi_codes("") == ""
# More complex/varied ANSI codes
assert remove_ansi_codes("text\x1b[1Aup") == "textup"
assert remove_ansi_codes("\x1b[2Jclearscreen") == "clearscreen"
[docs]
def test_string_utils():
"""
Runs all tests in the TestStringUtils class.
"""
test_instance = TestStringUtils()
test_instance.test_is_float()
test_instance.test_human_readable_to_bytes()
test_instance.test_remove_ansi_codes()
# from IPython import embed; embed()
# test_string_utils()