Source code for kvalikirstu2.study_reader

""" A module for generating a study out of data files.
"""

import logging
import os
from kvalikirstu2 import study
from kvalikirstu2 import paragraph_analyzer
from kvalikirstu2 import utils
from kvalikirstu2.localization import _


logger = logging.getLogger(__name__)


[docs]class NoSubjectsException(Exception): """Study contained no subjects."""
[docs]class StudyReader: """ A class for reading studies from a folder. """ # pylint: disable=R0913 def __init__(self, study_id: str, study_title: str, overwrite_temp: bool = True, selected_headers: dict = None, study_path: str = None, data_folder_name: str = None, data_file: str = None): """Constructor :param str path: The file to be parsed./The folder where the files are searched in. :param str study_id: The id of the study. :param str study_title: The title of the study. :param bool overwrite_temp: Overwrite temporary files if they exist. :param dict selected_headers: A dictionary of selected headers. :param study_path: The path of the study. :param data_folder_name: The path of the data folder for the study(usually the data folder inside the study folder). :param data_file: A single data file to be read(can be used instead of data_folder_name with single files). """ self.study_path = study_path self.data_folder_name = data_folder_name self.study_id = study_id self.study_title = study_title self.overwrite_temp = overwrite_temp self.selected_headers = selected_headers self.data_file = data_file # pylint: enable=R0913
[docs] def get_data_path(self): """ Returns the data path. """ if self.data_folder_name: return os.path.join(self.study_path, self.data_folder_name) return self.study_path
[docs] def get_files(self): """Get all the files in the folder and subfolders. All the files in the folder that have a compatible file format are matched. :rtype: list :return: A list of files """ if self.data_file: return [self.data_file] path = self.get_data_path() output = utils.get_supported_files(path) if not output: raise utils.InvalidPathException( _("Path %s is not a data file nor a folder of data files. It could be that the format" " of the data file(s) is not supported.") % path) return output
[docs] def get_subjects(self): """Gets all subjects from the folder. :rtype: list :return: A list of subjects. """ all_subjects = [] files_without_subjects = [] for file in self.get_files(): reader = paragraph_analyzer.ParagraphReader(file, self.overwrite_temp, self.selected_headers, study_path=self.study_path) subjects = reader.read_subjects() if not subjects: files_without_subjects.append(file) all_subjects.extend(subjects) if files_without_subjects: logger.warning(_("Files %s had no subjects! Headers may be incorrectly formatted."), ', '.join(files_without_subjects)) return all_subjects
[docs] def get_study(self): """Gets a study for a given folder :param Study output: A study where subjects are stored into. :rtype: Study :return: The study that was read. """ output = study.Study(self.study_id, self.study_path, self.study_title, self.get_data_path()) subjects = self.get_subjects() if not subjects: raise NoSubjectsException(_("The study has no subjects! The data folder could be empty.")) for subject in subjects: output.add_subject(subject) return output