Source code for kvalikirstu2.utils

""" Module for small utility functions used in various places.

This module contains many small functions that are used in various places of Kvalikirstu.

Attributes
----------
    HEADER_LINE_REGEX(str): Contains the regex used for detecting header lines from files.

    SUPPORTED_FORMATS(list(str)): Contains a list of strings that are the file formats supported by Kvalikirstu.
"""

from ast import literal_eval
import errno
import glob
import json
import logging
import os
import re
import shutil
import uuid
from collections import OrderedDict
import jsonpickle
from natsort import natsorted
from kvalikirstu2 import argument_parser
from kvalikirstu2.localization import _

SUPPORTED_FORMATS = ['.txt', '.odt']
CONVERTABLE_FORMATS = ['.docx', '.doc', '.rtf']
CONVERTABLE_TO_TXT = ['.odt', '.docx', '.doc', '.rtf']
CONVERTABLE_TO_ODT = ['.txt', '.docx', '.doc', '.rtf']
logger = logging.getLogger(__name__)


[docs]class InvalidPathException(Exception): """User provided an invalid path."""
[docs]def natsorted_glob(pathname, extensions=None, files_only=True): """Return files from pathname in a naturally sorted order. :param pathname: The search path to use for glob.glob. :param extensions: The set of allowed extensions. None for any extension. :param files_only: Whether or not only files should be returned. :return: A list of filenames in natural sort order. """ output = [] for path in glob.glob(pathname, recursive=True): if not files_only or os.path.isfile(path): extension = get_path_extension(path) if not extensions or extension in extensions: output.append(path) return natsorted(output)
[docs]def parse_header(line: str): """Parses a header line :param str line: The line to be parsed :rtype: (str, str) :return: Header name and value pair """ match = re.match(argument_parser.get_args().header_regex, line) return match.group(1).strip(), match.group(2).strip()
[docs]def is_valid_header(header): """ Is the header a valid header? :param header: The header to be tested for validity. """ test_string = "%s: value" % header return is_header_line(test_string)
[docs]def is_header_line(line: str, selected_headers: dict = None): """Is the current line a header? :param str line: The line to be parsed :param selected_headers: A dictionary from header names to booleans that determines if they are selected. :rtype: bool :return: True if line is a header, False otherwise """ args = argument_parser.get_args() match = re.match(args.header_regex, line) if match and match.group(0) == line: header = match.group(1) if len(header) > args.max_header_length: return False if not selected_headers or (header in selected_headers and selected_headers[header]): return True return False
[docs]def get_formatted_header_line(header, value): """Formats the header line according to the format specified in configurations. :param str header: The header part of the line :param str value: The value part of the line :rtype: str :return: The formatted header line. """ args = argument_parser.get_args() header_format = args.header_line_format return header_format.format(header=header, value=value)
[docs]def is_temp_file(path: str): """Checks if the filename looks like a temporary file for an .odt document. :param str path: The filename of the file. Can be the full path or just the name of the file :rtype: bool :return: True if the file is a temp file, False otherwise """ return "~$" in os.path.basename(path)
[docs]def is_valid_text_format(filename: str): """Is the text format supported by Kvalikirstu? :param str filename: The filename to be checked :rtype: bool :return: True if text format is valid, False otherwise """ name, extension = os.path.splitext(filename) return not is_temp_file(name) and extension in SUPPORTED_FORMATS
[docs]def is_convertable_format(filename: str, formats): """Is the text format convertable by Kvalikirstu? :param str filename: The filename to be checked :param formats: The convertable formats. :rtype: bool :return: True if text format is valid, False otherwise """ _name, extension = os.path.splitext(filename) return extension in formats
[docs]def json_serialize(obj, path: str): """Serializes(saves) any object to a file :param obj: Any python object you want to serialize :param str path: Where the file should be saved """ logger.debug('Saving %s to path %s', obj, path) json_string = jsonpickle.encode(obj) obj = json.loads(json_string) output = json.dumps(obj, indent=4, ensure_ascii=False, sort_keys=True).encode('utf8') create_folder_if_not_exists(os.path.dirname(path)) with open(path, 'wb') as file_stream: file_stream.write(output)
[docs]def json_deserialize(path: str): """Deserializes(loads) any object from filename :param str path: Where the saved file is located :return: The deserialized object """ logger.debug('Unserializing object from path %s', path) with open(path, 'r', encoding='utf-8') as file_stream: json_string = file_stream.read() obj = jsonpickle.decode(json_string) return obj
[docs]def get_path_without_extension(path: str): """Returns the filepath without the file extension :param str path: The filepath. :rtype: str """ return os.path.splitext(path)[0]
[docs]def create_folder_if_not_exists(path: str): """Creates a folder if one does not already exist. :param str path: The path of the folder. """ if not os.path.isdir(path) and not os.path.isfile(path): os.makedirs(path) logger.debug("Created folder %s", path)
[docs]def get_folder(path): """Gets the folder for a file, for a folder returns itself. :param str path: The path to the folder/file. :rtype: str """ if os.path.isdir(path): return path return os.path.dirname(path)
def _not_in_html_folder(folder_path, file_path): if not file_path or folder_path is None: return False relative_path = os.path.relpath(file_path, folder_path) path_components = os.path.normpath(relative_path).split(os.path.sep) return not path_components or path_components[0] != 'html'
[docs]def get_supported_files(path): """Get all the files in the folder and subfolders. All the files in the folder that have a compatible file format are matched. :param str path: The path to search for files. :rtype: list :return: A list of files """ output = [] if os.path.isdir(path): for filename in natsorted_glob(os.path.join(path, '**')): if is_valid_text_format(filename) and _not_in_html_folder(path, filename): output.append(filename) elif os.path.isfile(path) and is_valid_text_format(path): output.append(path) return natsorted(output)
[docs]def get_convertable_files(path): """Get all the files in the folder and subfolders. All the files in the folder that have a convertable file format are matched. :param str path: The path to search for files. :rtype: list :return: A list of files """ output = [] for filename in natsorted_glob(os.path.join(path, '**')): if is_convertable_format(filename, CONVERTABLE_FORMATS): output.append(filename) return natsorted(output)
[docs]def get_files_with_extensions(path, formats): """Get files with the given extensions from the path. :param path: The folder path. :param formats: List of file extensions. """ output = [] for filename in natsorted_glob(os.path.join(path, '**')): if is_convertable_format(filename, formats): output.append(filename) return natsorted(output)
[docs]def is_image_file(path: str): """Tests is the file in the path is an image file :param str path: The path to the file. :rtype: bool """ formats = ['.png', '.svg', '.bmp', '.tif', '.jp2', '.jpeg', '.gif'] _, extension = os.path.splitext(path) return extension in formats
[docs]def get_temp_path(folder_path: str, extension: str): """Gets a path for a new temporary file. :param str folder_path: The path of the folder where the temp file is created in. :param str extension: The file extension of the file. :rtype: str """ temp = None while not temp or os.path.isfile(temp): temp = os.path.join(folder_path, str(uuid.uuid4()) + extension) return temp
[docs]def delete_file_if_exists(path: str): """Delete file if one exists. :param str path: The filepath to be deleted. """ logger.debug('Delete file if exists %s', path) if os.path.isfile(path): logger.debug('Deleting') os.unlink(path) else: logger.debug('Doesnt exist')
[docs]def delete_folder_if_exists(path: str): """Deletes a folder and its contents if it exists. :param path: The path of the folder. """ logger.debug('Delete folder if exists %s', path) if os.path.exists(path): logger.debug('Deleting folder.') shutil.rmtree(path) else: logger.debug('Folder does not exist.')
[docs]def get_list_without_duplicates(items: list): """ Removes duplicates from list. :param items: List to be pruned of duplicates. :rtype: list :returns: List without duplicates. """ return list(OrderedDict.fromkeys(items))
[docs]def get_path_extension(path: str): """Returns the filepath with the file extension :param str path: The filepath. :rtype: str """ return os.path.splitext(path)[1]
[docs]def number_of_digits(number): """Returns the number of digits in the given number. :param int number: The number whose digit number is being examined. :rtype: int """ power = 1 while number >= 10 ** power: power += 1 logger.debug('Number %s has %s digits', number, power) return power
[docs]def is_windows(): """Returns True if the user's OS is a Windows system. :rtype: bool """ return os.name == 'nt'
[docs]def folder_modified_timestamp(folder): """Gets the timestamp for when any file in the folder was last modified. :param str folder: The folder to get the timestamp for. """ latest = os.path.getmtime(folder) for file in natsorted_glob(os.path.join(folder, '**'), files_only=False): latest = max(latest, os.path.getmtime(file)) return latest
[docs]def get_folder_size(folder): """Gets the total size of all files in the folder. :param folder: The path of the folder to be examined. """ size = 0 for file in natsorted_glob(os.path.join(folder, '**')): size += os.path.getsize(file) return size
[docs]def check_if_any_file_in_use(path: str, file_extensions): """ Checks if any file is in use in the folder. :param path: Folder path. :param file_extensions: List of file extensions. """ try: for filepath in natsorted_glob(os.path.join(path, '**')): _f, extension = os.path.splitext(filepath) extension = extension.lower() if not file_extensions or extension in file_extensions: with open(filepath, 'a'): pass except OSError as exc: raise OSError(errno.EACCES, _('Cannot edit file, could be already open in another application'), exc.filename) except Exception as exc: raise exc
[docs]def get_filesize(path): """Gets the size of a file in kilobytes. Rounded to one decimal point. :param path: The filepath of the entry. """ bytes_in_kb = 1024 if not os.path.isfile(path): return 0 return round(os.stat(path).st_size / bytes_in_kb, 1)
[docs]def get_pair_out_of_string(string_pair): """Takes a string containing a pair and parses it and returns it. :param string_pair: A pair inside a string. Expected format is "('key', 'value')" """ return literal_eval(string_pair)
[docs]def add_language_code_to_path(path, code, codes): """Gets the path with the language code. :param str path: The old filepath. :param str code: The language code. :param list codes: Valid language codes. """ if code not in codes: raise ValueError('Code %s was not in codes %s' % (code, codes)) if get_language_code_from_path(path, codes): # has existing language code path, extension = os.path.splitext(path) match = re.match('(.*)_...$', path) filepath = match.group(1) else: filepath, extension = os.path.splitext(path) return "%s_%s%s" % (filepath, code, extension)
[docs]def get_language_code_from_path(filepath, codes=None): """Gets the language code from path. :param str filepath: The path of the file. :param list codes: Valid language codes. """ path, _ext = os.path.splitext(filepath) match = re.match('.*_(...)$', path) if match: code = str(match.group(1)) if codes and code in codes: return code if not codes: return code return ''