# =========================================================================
# Module: ioapps/tarfile_interface.py
# Author: Henry R. Winterbottom
# Email: henry.winterbottom@noaa.gov
# This program is free software: you can redistribute it and/or modify
# it under the terms of the respective public license published by the
# Free Software Foundation and included with the repository within
# which this application is contained.
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# =========================================================================
"""
Module
------
tarfile_interface.py
Description
-----------
This module contains functions to create and read local host
tarball files.
Functions
---------
read_tarfile(path, tarball_path, mode=None, filelist=None):
This function parses a specified tarball archive and extracts
the specified files; if `filelist` is NoneType on entry, all
files within the archive will be extracted.
write_tarfile(path, tarball_path, filelist=None, filedict=None,
ref_local=False, gzip=False, compress_level=1)
This function creates a POSIX compliant tarball file and
appends the files within the specified filelist to the
respective tarball file.
Author(s)
---------
Henry R. Winterbottom; 25 September 2022
History
-------
2022-09-25: Henry Winterbottom -- Initial implementation.
"""
# ----
# pylint: disable=broad-except
# pylint: disable=consider-using-with
# pylint: disable=raise-missing-from
# pylint: disable=too-many-arguments
# ----
__author__ = "Henry R. Winterbottom"
__maintainer__ = "Henry R. Winterbottom"
__email__ = "henry.winterbottom@noaa.gov"
# ----
import os
import tarfile
from typing import Dict, List
from tools import parser_interface
from utils.exceptions_interface import TarFileInterfaceError
from utils.logger_interface import Logger
# ----
# Define all available functions.
__all__ = ["read_tarfile", "write_tarfile"]
# ----
logger = Logger(caller_name=__name__)
# ----
[docs]def read_tarfile(
path: str, tarball_path: str, mode: str = None, filelist: List = None
) -> None:
"""
Description
-----------
This function parses a specified tarball archive and extracts the
specified files; if filelist is NoneType upon entry, all files
within the archive will be extracted.
Parameters
----------
path: str
A Python string specifying the path beneath which the contents
of the tarball are to be extracted to.
tarball_path: str
A Python string specifying the path to the tarball to be read.
Keywords
--------
mode: str, optional
A Python string specifying the mode to be used to open the
tarball archive; a complete list of combinations can be found
at https://docs.python.org/3/library/tarfile.html#tarfile.open.
filelist: List, optional
A Python list of member files within the tarball archive to be
extracted.
Raises
------
TarFileInterfaceError:
- raised if an exception is encountered while extracting files
from the tarball file path specified upon entry.
- raised if an execption is encountered while extracting a
specified file from the tarball file path specified upon
entry.
"""
# Move to the working directory within which to extract the files
# within the tarball archive.
msg = (
f"The contents of tarball path {tarball_path} will be extracted to path {path}."
)
logger.warn(msg=msg)
os.chdir(path)
# Open the existing tarball.
msg = f"Opening tarball file {tarball_path}."
logger.info(msg=msg)
if mode is None:
mode = "r"
tarball = tarfile.open(tarball_path, mode)
# If filelist is NoneType upon entry, extract the entire archive;
# proceed accordingly.
if filelist is None:
# Extract all files in the tarball; proceed accordingly.
try:
msg = f"Extracting all files from tarball path {tarball_path}."
logger.info(msg=msg)
tarball.extractall()
except Exception as errmsg:
msg = (
f"The extraction of all files from tarball path {tarball_path} "
f"failed with error {errmsg}. Aborting!!!"
)
raise TarFileInterfaceError(msg=msg)
# Extract only the files specified within the filelist attribute
# upon entry; proceed accordingly.
if filelist is not None:
for filename in filelist:
try:
msg = f"Determining tarball object path for file {filename}."
logger.info(msg=msg)
tarball_obj = tarball.getmember(filename)
msg = f"Extracting file {filename} from tarball path {tarball_path}."
logger.info(msg=msg)
tarball.extract(tarball_obj)
except Exception as errmsg:
msg = (
f"The extraction of file {filename} from tarball path {tarball_path} "
f"failed with error {errmsg}. Aborting!!!"
)
raise TarFileInterfaceError(msg=msg)
# Close the open tarball archive.
tarball.close()
# ----
[docs]def write_tarfile(
path: str,
tarball_path: str,
filelist: List = None,
filedict: Dict = None,
ref_local: bool = False,
gzip: bool = False,
compresslevel: int = 1,
) -> None:
"""
Description
-----------
This function creates a POSIX compliant tarball file and appends
the files within the specified filelist to the respective
tarball file.
Parameters
----------
path: str
A Python string specifying the path beneath which the files to
be archived (see `filelist`) exist on the local host.
tarball_path: str
A Python string specifying the path to the tarball, containing
the contents of path, to be written.
Keywords
--------
filelist: List, optional
A Python list of filename and/or paths beneath the path to be
archived (see `path`, above); if provided, only the files
contained within the list will be writen to tarball archive.
filedict: Dict, optional
A Python dictionary containing the key and value pairs to be
used to construct the tarball file; the key values are the
local filename path and the corresponding values are the
tarball file member file paths.
ref_local: bool, optional
A Python boolean variable specifying whether to define the
archive path, within the user-specified tarball, relative to
the working directory (`path`).
gzip: bool, optional
A Python boolean variable specifying whether to apply gzip
compression to the tarball.
compresslevel: int, optional
A Python integer value specifying the compression level for
the archive; the default is minimal compression.
Raises
------
TarFileInterfaceError:
- raised if an exception is encountered while validating the
parameter values provided upon entry.
"""
# Check that the attributes provided upon entry are valid.
if (filelist is None) and (filedict is None):
msg = (
"Neither the filelist or filedict keyword parameters have "
"been specified upon entry; this may cause this method "
"to (not) produce the expected results."
)
logger.warn(msg=msg)
if (filelist is not None) and (filedict is not None):
msg = (
"The write_tarfile method does not support file name "
"lists (filelist) and file and archive file mapping "
"names (filedict) simultaneously. Aborting!!!"
)
raise TarFileInterfaceError(msg=msg)
# Define the tarball archive attributes accordingly.
if gzip:
mode = "w:gz"
kwargs = {"compresslevel": compresslevel}
else:
mode = "w"
kwargs = {}
# Open the tarball archive and proceed accordingly.
os.chdir(path)
tarball = tarfile.open(tarball_path, mode, **kwargs)
# Check the status of the attributes provided upon entry and
# proceed accordingly.
if filelist is not None:
# Write the respective file names to the tarball archive path;
# proceed accordingly.
for filename in filelist:
msg = f"Adding file {filename} to tarball file {tarball_path}."
logger.info(msg=msg)
# Define the tarball archive member file name attributes;
# proceed accordingly.
if ref_local:
tarball.add(f"./{filename}")
if not ref_local:
tarball.add(filename)
# Check the status of the attributes provided upon entry and
# proceed accordingly.
if filedict is not None:
# Write the respective file names to the tarball archive path;
# proceed accordingly.
for filename in filedict.keys():
# Define the tarball archive member file name.
arcname = parser_interface.dict_key_value(
dict_in=filedict, key=filename, no_split=True
)
msg = f"Adding file {filename} to tarball file {tarball_path} as {arcname}."
logger.info(msg=msg)
# Define the tarball archive member file name attributes;
# proceed accordingly.
if ref_local:
kwargs = {"arcname": f"./{arcname}"}
if not ref_local:
kwargs = {"arcname": arcname}
tarball.add(filename, **kwargs)
# Close the open tarball archive.
tarball.close()