Source code for OCDocker.Toolbox.IO

#!/usr/bin/env python3

# Description
###############################################################################
'''
Sets of classes and functions that are used to handle I/O operations.

Usage:

import OCDocker.Toolbox.IO as ocio
'''

# Imports
###############################################################################
import mmap
import os

from typing import Generator

# License
###############################################################################
'''
OCDocker
Authors: Rossi, A.D.; Monachesi, M.C.E.; Spelta, G.I.; Torres, P.H.M.
Federal University of Rio de Janeiro
Carlos Chagas Filho Institute of Biophysics
Laboratory for Molecular Modeling and Dynamics

This program is proprietary software owned by the Federal University of Rio de Janeiro (UFRJ),
developed by Rossi, A.D.; Monachesi, M.C.E.; Spelta, G.I.; Torres, P.H.M., and protected under Brazilian Law No. 9,609/1998.
All rights reserved. Use, reproduction, modification, and distribution are allowed under this UFRJ license,
provided this copyright notice is preserved. See the LICENSE file for details.

Contact: Artur Duque Rossi - arturossi10@gmail.com
'''

# Classes
###############################################################################

# Functions
###############################################################################
## Private ##

## Public ##

[docs] def lazyread(file_name: str, decode: str = "utf-8") -> Generator[str, None, None]: '''Read a file in sequential order. Parameters ---------- file_name : str The file to be read. decode : str, optional The decode to be used, by default "utf-8" Returns ------- Generator[str, None, None] A generator with the lines of the file in sequential order. ''' # Open file for reading in binary mode with open(file_name, 'rb') as read_obj: # Read line by line for line in iter(read_obj.readline, b''): yield line.decode(decode)
[docs] def lazyread_mmap(file_name: str, decode: str = "utf-8") -> Generator[str, None, None]: '''Read a file in sequential order using mmap. Parameters ---------- file_name : str The file to be read. decode : str, optional The decode to be used, by default "utf-8" Returns ------- Generator[str, None, None] A generator with the lines of the file in sequential order. ''' # Open file for reading in binary mode with open(file_name, 'rb') as read_obj: with mmap.mmap(read_obj.fileno(), 0, access = mmap.ACCESS_READ) as mmap_obj: # Read line by line for line in iter(mmap_obj.readline, b''): yield line.decode(decode)
[docs] def lazyread_reverse_order(file_name: str, decode: str = "utf-8") -> Generator[str, None, None]: '''Read a file in reverse order. Parameters ---------- file_name : str The file to be read. decode : str, optional The decode to be used, by default "utf-8" Returns ------- Generator[str, None, None] A generator with the lines of the file in reverse order. ''' # Open file for reading in binary mode with open(file_name, 'rb') as read_obj: # Move the cursor to the end of the file read_obj.seek(0, os.SEEK_END) # Get the current position of pointer i.e eof pointer_location = read_obj.tell() # Create a buffer to keep the last read line buffer = bytearray() # Loop till pointer reaches the top of the file while pointer_location >= 0: # Move the file pointer to the location pointed by pointer_location read_obj.seek(pointer_location) # Shift pointer location by -1 pointer_location = pointer_location - 1 # read that byte / character new_byte = read_obj.read(1) # If the read byte is new line character then it means one line is read if new_byte == b'\n': # Only yield if there is content accumulated (avoid empty line for trailing newline) if len(buffer) > 0: yield buffer.decode(decode)[::-1] # Reinitialie the byte array to save next line buffer = bytearray() else: # If last read character is not eol then add it in buffer if new_byte: buffer.extend(new_byte) # As file is read completely, if there is still data in buffer, then its the first line. if len(buffer) > 0: # Yield the first line too yield buffer.decode(decode)[::-1]
[docs] def lazyread_reverse_order_mmap(file_name: str, decode: str = "utf-8") -> Generator[str, None, None]: '''Read a file in reverse order using mmap. Parameters ---------- file_name : str The file to be read. decode : str, optional The decode to be used, by default "utf-8" Returns ------- Generator[str, None, None] A generator with the lines of the file in reverse order. ''' # Open file for reading in binary mode with open(file_name, 'rb') as read_obj: with mmap.mmap(read_obj.fileno(), 0, access = mmap.ACCESS_READ) as mmap_obj: # Move the cursor to the end of the file mmap_obj.seek(0, os.SEEK_END) # Get the current position of pointer i.e eof pointer_location = mmap_obj.tell() # Create a buffer to keep the last read line buffer = bytearray() # Loop till pointer reaches the top of the file while pointer_location >= 0: # Move the file pointer to the location pointed by pointer_location mmap_obj.seek(pointer_location) # Shift pointer location by -1 pointer_location = pointer_location - 1 # read that byte / character new_byte = mmap_obj.read(1) # If the read byte is new line character then it means one line is read if new_byte == b'\n': # Only yield if there is content accumulated (avoid empty line for trailing newline) if len(buffer) > 0: yield buffer.decode(decode)[::-1] # Reinitialise the byte array to save next line buffer = bytearray() else: # If last read character is not eol then add it in buffer if new_byte: buffer.extend(new_byte) # As file is read completely, if there is still data in buffer, then its the first line. if len(buffer) > 0: # Yield the first line too yield buffer.decode(decode)[::-1]