Source code for kvalikirstu2.study

""" Contains the Study and HeaderInfo classes for storing data about a qualitative study.
"""

import logging
import os
from kvalikirstu2 import argument_parser
from kvalikirstu2 import utils
from kvalikirstu2 import paragraph_analyzer


logger = logging.getLogger(__name__)


[docs]class Align: """ Contains the untranslated alignment strings. """ LEFT = "left" CENTER = "center" RIGHT = "right"
[docs]class HeaderInfo: """Contains the header information about the study. :type header_mapping: dict(string, string) :var header_mapping: A mapping for headers from name to name :var header_indexes: A mapping from header to index :var value_mapping: Maps (header, value) pairs to new values :var header_enabled: A mapping from header to bool :var header_alignment: A mapping from header to alignment :var selected_headers: A mapping from header to booleans :var deleted_headers: A mapping from header to booleans """ def __init__(self): args = argument_parser.get_args() self.header_indexes = {} self.header_mapping = {} self.value_mapping = {} self.header_enabled = {} self.header_alignment = {} self.selected_headers = {} self.deleted_headers = {} self.daf_header = args.separate_daf_symbol self.table_width = args.default_table_width
[docs] def get_original_headers(self): """ Get the original headers. """ headers = list(self.header_indexes.items()) headers.sort(key=lambda tup: tup[1]) headers = [pair[0] for pair in headers] return headers
[docs] def set_daf_header(self, header): """Sets the data file header. """ self.daf_header = header self.header_enabled[header] = False
def __eq__(self, other): """Tests for equality. """ if isinstance(other, HeaderInfo): return (self.header_indexes == other.header_indexes and self.header_enabled == other.header_enabled and self.header_mapping == other.header_mapping) return False
[docs] def is_ordinary_header(self, header): """Is the header a not a built-in header? Also returns false if header not in study. :param header: The name of the header. """ return header in self.header_mapping
[docs] def get_header_alignments(self): """ Gets the header alignments for the index. """ old_headers = self.get_original_headers() enabled_headers = [header for header in old_headers if self.header_enabled[header]] alignments = {} for header in enabled_headers: if self.is_ordinary_header(header): new_header = self.header_mapping[header] alignments[new_header] = self.header_alignment[header] else: alignments[header] = self.header_alignment[header] return alignments
[docs] def get_index_headers(self): """ Gets the remapped headers for the index file. """ old_headers = self.get_original_headers() enabled_headers = [header for header in old_headers if self.header_enabled[header]] for index, header in enumerate(enabled_headers): if self.is_ordinary_header(header): enabled_headers[index] = self.header_mapping[header] return utils.get_list_without_duplicates(enabled_headers)
[docs] def add_header(self, header): """ Adds a new header for the study. :param header: The header to be added. """ self.header_indexes[header] = len(self.header_indexes) self.header_mapping[header] = header self.header_enabled[header] = True self.header_alignment[header] = Align.LEFT self.selected_headers[header] = True
def _add_builtin_header(self, header, enabled, index=None): """Adds a built-in header. :param header: The name of the header. :param enabled: Should the header be enabled. :param index: The index of the header, determines the ordering of headers. """ self.header_enabled[header] = enabled if index is None: self.header_indexes[header] = len(self.header_indexes) else: self.header_indexes[header] = index self.header_alignment[header] = Align.LEFT
[docs] def init_builtin_headers(self): """Inits built-in headers. """ self._add_builtin_header("Folder", False) self._add_builtin_header("daF", True) self._add_builtin_header("HTML", True, -1) self._add_builtin_header("Text-data", True) self._add_builtin_header("Size", False)
[docs] def update_headers(self): """Updates all headers that changed. """ changed_headers = [header for header in self.header_mapping if self.header_mapping[header] != header] for header in changed_headers: self._remap_header(header)
def _remap_header(self, header): """Replaces a key in all header info attributes. Used to preserve values when user renames headers. :param header: Old header name. """ new_header = self.header_mapping[header] logger.debug('Renaming header %s to %s', header, new_header) self.header_indexes[new_header] = self.header_indexes.pop(header) self.header_enabled[new_header] = self.header_enabled.pop(header) self.header_alignment[new_header] = self.header_alignment.pop(header) self.selected_headers[new_header] = self.selected_headers.pop(header) self.header_mapping[new_header] = self.header_mapping.pop(header) values_associated = {} for key_pair_str, value in self.value_mapping.items(): # header has to be in key_pair_str to be the header # do this check to improve performance if header in key_pair_str: pair = utils.get_pair_out_of_string(key_pair_str) if pair[0] == header: values_associated[key_pair_str] = value for _key_pair, value in values_associated.items(): new_key_pair = str((new_header, value)) self.value_mapping[new_key_pair] = value
[docs]class Study: """A study containing qualitative data. :var str study_id: The ID of the study :var subjects: Subjects and their data """ def __init__(self, study_id: str, study_path: str, study_title: str, data_folder=None): """ Constructor :param str study_id: Contains an identifier for the study. This is used in creating the index and it also determines the filenames for the temporary files :param str study_path: The folder the study is located in. :param str study_title: The title of the study. """ self.study_id = study_id self.study_title = study_title self.subjects = [] # Subjects and their data self.study_path = study_path self._resolve_data_folder(data_folder) def _resolve_data_folder(self, data_folder): if data_folder: self.data_folder = data_folder elif self.study_path: args = argument_parser.get_args() self.data_folder = os.path.join(self.study_path, args.study_data_folder) else: self.data_folder = None
[docs] def get_relative_path(self, path): """ Get relative path. """ return os.path.relpath(path, self.data_folder)
[docs] def get_data_folder_path(self): """ Gets the data folder path. """ return self.data_folder
def __eq__(self, other): """Test for equality :param Study other: The right hand side of the equals sign. :return: Whether or not the objects are equal. :rtype: bool """ if isinstance(other, Study): return (self.study_id == other.study_id and self.study_title == other.study_title and self.subjects == other.subjects) return False
[docs] def get_full_path(self): """The location where the data for the study is stored in. :return: The path of the full object. :rtype: str """ if self.study_path: path = self.study_path else: path = os.getcwd() return os.path.join(path, "temp", self.study_id + "_full.json")
[docs] def write_to_file(self): """Writes study to file """ # Serialize headers separately as well, option to modify headers logger.debug('Writing study to file %s', self.get_full_path()) utils.json_serialize(self, self.get_full_path())
[docs] def load_from_file(self): """Loads the study from file """ logger.debug('Loading study from file %s', self.get_full_path()) # Serialize subjects and headers separately, option to modify headers study_copy = utils.json_deserialize(self.get_full_path()) # self.id = copy.id # don't copy this, user might change filename self.subjects = study_copy.subjects self.study_title = study_copy.study_title
[docs] def remap_headers(self, header_info): """Remap the headers after user has had a chance to give feedback. For all subjects all the headers are mapped into their new values. :param header_info: The header info used for the remapping process. """ logger.debug('Remapping study headers') for subject in self.subjects: subject.remap_values(header_info.value_mapping) subject.remap(header_info)
[docs] def add_subject(self, subject: paragraph_analyzer.Subject): """ Add a new subject to the study :param Subject subject: The subject to be added. """ # Do not add empty subjects if not subject.has_headers(): return subject.check_validity() self.subjects.append(subject)
[docs] def get_language_code(self, language_codes): """ Gets the language code for the whole study. :param language_codes: The set of possible language codes. :return: A language code as a three letter string. """ language_set = set() for subject in self.subjects: lang = subject.get_language_code(language_codes) if lang: language_set.add(lang) if len(language_set) == 0: return None elif len(language_set) == 1: return lang return 'mul'
[docs] def get_subject_language_code(self, index, language_codes): """Gets the language code for the given subject. :param index: The index of the subject. :param language_codes: List of valid language codes. """ return self.subjects[index].get_language_code(language_codes)