Source code for kvalikirstu2.study_reader
""" A module for generating a study out of data files.
"""
import logging
import os
from kvalikirstu2 import study
from kvalikirstu2 import paragraph_analyzer
from kvalikirstu2 import utils
from kvalikirstu2.localization import _
logger = logging.getLogger(__name__)
[docs]class NoSubjectsException(Exception):
"""Study contained no subjects."""
[docs]class StudyReader:
""" A class for reading studies from a folder.
"""
# pylint: disable=R0913
def __init__(self, study_id: str, study_title: str, overwrite_temp: bool = True,
selected_headers: dict = None, study_path: str = None, data_folder_name: str = None,
data_file: str = None):
"""Constructor
:param str path: The file to be parsed./The folder where the files are searched in.
:param str study_id: The id of the study.
:param str study_title: The title of the study.
:param bool overwrite_temp: Overwrite temporary files if they exist.
:param dict selected_headers: A dictionary of selected headers.
:param study_path: The path of the study.
:param data_folder_name: The path of the data folder for the study(usually the data folder inside the study
folder).
:param data_file: A single data file to be read(can be used instead of data_folder_name with single files).
"""
self.study_path = study_path
self.data_folder_name = data_folder_name
self.study_id = study_id
self.study_title = study_title
self.overwrite_temp = overwrite_temp
self.selected_headers = selected_headers
self.data_file = data_file
# pylint: enable=R0913
[docs] def get_data_path(self):
""" Returns the data path.
"""
if self.data_folder_name:
return os.path.join(self.study_path, self.data_folder_name)
return self.study_path
[docs] def get_files(self):
"""Get all the files in the folder and subfolders. All the files in the folder that have a compatible file
format are matched.
:rtype: list
:return: A list of files
"""
if self.data_file:
return [self.data_file]
path = self.get_data_path()
output = utils.get_supported_files(path)
if not output:
raise utils.InvalidPathException(
_("Path %s is not a data file nor a folder of data files. It could be that the format"
" of the data file(s) is not supported.") % path)
return output
[docs] def get_subjects(self):
"""Gets all subjects from the folder.
:rtype: list
:return: A list of subjects.
"""
all_subjects = []
files_without_subjects = []
for file in self.get_files():
reader = paragraph_analyzer.ParagraphReader(file, self.overwrite_temp, self.selected_headers,
study_path=self.study_path)
subjects = reader.read_subjects()
if not subjects:
files_without_subjects.append(file)
all_subjects.extend(subjects)
if files_without_subjects:
logger.warning(_("Files %s had no subjects! Headers may be incorrectly formatted."),
', '.join(files_without_subjects))
return all_subjects
[docs] def get_study(self):
"""Gets a study for a given folder
:param Study output: A study where subjects are stored into.
:rtype: Study
:return: The study that was read.
"""
output = study.Study(self.study_id, self.study_path, self.study_title, self.get_data_path())
subjects = self.get_subjects()
if not subjects:
raise NoSubjectsException(_("The study has no subjects! The data folder could be empty."))
for subject in subjects:
output.add_subject(subject)
return output