Source code for kvalikirstu2.study
""" Contains the Study and HeaderInfo classes for storing data about a qualitative study.
"""
import logging
import os
from kvalikirstu2 import argument_parser
from kvalikirstu2 import utils
from kvalikirstu2 import paragraph_analyzer
logger = logging.getLogger(__name__)
[docs]class Align:
""" Contains the untranslated alignment strings.
"""
LEFT = "left"
CENTER = "center"
RIGHT = "right"
[docs]class HeaderInfo:
"""Contains the header information about the study.
:type header_mapping: dict(string, string)
:var header_mapping: A mapping for headers from name to name
:var header_indexes: A mapping from header to index
:var value_mapping: Maps (header, value) pairs to new values
:var header_enabled: A mapping from header to bool
:var header_alignment: A mapping from header to alignment
:var selected_headers: A mapping from header to booleans
:var deleted_headers: A mapping from header to booleans
"""
def __init__(self):
args = argument_parser.get_args()
self.header_indexes = {}
self.header_mapping = {}
self.value_mapping = {}
self.header_enabled = {}
self.header_alignment = {}
self.selected_headers = {}
self.deleted_headers = {}
self.daf_header = args.separate_daf_symbol
self.table_width = args.default_table_width
[docs] def get_original_headers(self):
""" Get the original headers.
"""
headers = list(self.header_indexes.items())
headers.sort(key=lambda tup: tup[1])
headers = [pair[0] for pair in headers]
return headers
[docs] def set_daf_header(self, header):
"""Sets the data file header.
"""
self.daf_header = header
self.header_enabled[header] = False
def __eq__(self, other):
"""Tests for equality.
"""
if isinstance(other, HeaderInfo):
return (self.header_indexes == other.header_indexes and self.header_enabled == other.header_enabled and
self.header_mapping == other.header_mapping)
return False
[docs] def is_ordinary_header(self, header):
"""Is the header a not a built-in header? Also returns false if header not in study.
:param header: The name of the header.
"""
return header in self.header_mapping
[docs] def get_header_alignments(self):
""" Gets the header alignments for the index.
"""
old_headers = self.get_original_headers()
enabled_headers = [header for header in old_headers if self.header_enabled[header]]
alignments = {}
for header in enabled_headers:
if self.is_ordinary_header(header):
new_header = self.header_mapping[header]
alignments[new_header] = self.header_alignment[header]
else:
alignments[header] = self.header_alignment[header]
return alignments
[docs] def get_index_headers(self):
""" Gets the remapped headers for the index file.
"""
old_headers = self.get_original_headers()
enabled_headers = [header for header in old_headers if self.header_enabled[header]]
for index, header in enumerate(enabled_headers):
if self.is_ordinary_header(header):
enabled_headers[index] = self.header_mapping[header]
return utils.get_list_without_duplicates(enabled_headers)
[docs] def add_header(self, header):
""" Adds a new header for the study.
:param header: The header to be added.
"""
self.header_indexes[header] = len(self.header_indexes)
self.header_mapping[header] = header
self.header_enabled[header] = True
self.header_alignment[header] = Align.LEFT
self.selected_headers[header] = True
def _add_builtin_header(self, header, enabled, index=None):
"""Adds a built-in header.
:param header: The name of the header.
:param enabled: Should the header be enabled.
:param index: The index of the header, determines the ordering of headers.
"""
self.header_enabled[header] = enabled
if index is None:
self.header_indexes[header] = len(self.header_indexes)
else:
self.header_indexes[header] = index
self.header_alignment[header] = Align.LEFT
[docs] def init_builtin_headers(self):
"""Inits built-in headers.
"""
self._add_builtin_header("Folder", False)
self._add_builtin_header("daF", True)
self._add_builtin_header("HTML", True, -1)
self._add_builtin_header("Text-data", True)
self._add_builtin_header("Size", False)
[docs] def update_headers(self):
"""Updates all headers that changed.
"""
changed_headers = [header for header in self.header_mapping if self.header_mapping[header] != header]
for header in changed_headers:
self._remap_header(header)
def _remap_header(self, header):
"""Replaces a key in all header info attributes.
Used to preserve values when user renames headers.
:param header: Old header name.
"""
new_header = self.header_mapping[header]
logger.debug('Renaming header %s to %s', header, new_header)
self.header_indexes[new_header] = self.header_indexes.pop(header)
self.header_enabled[new_header] = self.header_enabled.pop(header)
self.header_alignment[new_header] = self.header_alignment.pop(header)
self.selected_headers[new_header] = self.selected_headers.pop(header)
self.header_mapping[new_header] = self.header_mapping.pop(header)
values_associated = {}
for key_pair_str, value in self.value_mapping.items():
# header has to be in key_pair_str to be the header
# do this check to improve performance
if header in key_pair_str:
pair = utils.get_pair_out_of_string(key_pair_str)
if pair[0] == header:
values_associated[key_pair_str] = value
for _key_pair, value in values_associated.items():
new_key_pair = str((new_header, value))
self.value_mapping[new_key_pair] = value
[docs]class Study:
"""A study containing qualitative data.
:var str study_id: The ID of the study
:var subjects: Subjects and their data
"""
def __init__(self, study_id: str, study_path: str, study_title: str, data_folder=None):
""" Constructor
:param str study_id: Contains an identifier for the study. This is used in creating the index
and it also determines the filenames for the temporary files
:param str study_path: The folder the study is located in.
:param str study_title: The title of the study.
"""
self.study_id = study_id
self.study_title = study_title
self.subjects = [] # Subjects and their data
self.study_path = study_path
self._resolve_data_folder(data_folder)
def _resolve_data_folder(self, data_folder):
if data_folder:
self.data_folder = data_folder
elif self.study_path:
args = argument_parser.get_args()
self.data_folder = os.path.join(self.study_path, args.study_data_folder)
else:
self.data_folder = None
[docs] def get_relative_path(self, path):
""" Get relative path.
"""
return os.path.relpath(path, self.data_folder)
def __eq__(self, other):
"""Test for equality
:param Study other: The right hand side of the equals sign.
:return: Whether or not the objects are equal.
:rtype: bool
"""
if isinstance(other, Study):
return (self.study_id == other.study_id and self.study_title == other.study_title and
self.subjects == other.subjects)
return False
[docs] def get_full_path(self):
"""The location where the data for the study is stored in.
:return: The path of the full object.
:rtype: str
"""
if self.study_path:
path = self.study_path
else:
path = os.getcwd()
return os.path.join(path, "temp", self.study_id + "_full.json")
[docs] def write_to_file(self):
"""Writes study to file
"""
# Serialize headers separately as well, option to modify headers
logger.debug('Writing study to file %s', self.get_full_path())
utils.json_serialize(self, self.get_full_path())
[docs] def load_from_file(self):
"""Loads the study from file
"""
logger.debug('Loading study from file %s', self.get_full_path())
# Serialize subjects and headers separately, option to modify headers
study_copy = utils.json_deserialize(self.get_full_path())
# self.id = copy.id # don't copy this, user might change filename
self.subjects = study_copy.subjects
self.study_title = study_copy.study_title
[docs] def remap_headers(self, header_info):
"""Remap the headers after user has had a chance to give feedback. For all subjects all the headers are mapped
into their new values.
:param header_info: The header info used for the remapping process.
"""
logger.debug('Remapping study headers')
for subject in self.subjects:
subject.remap_values(header_info.value_mapping)
subject.remap(header_info)
[docs] def add_subject(self, subject: paragraph_analyzer.Subject):
""" Add a new subject to the study
:param Subject subject: The subject to be added.
"""
# Do not add empty subjects
if not subject.has_headers():
return
subject.check_validity()
self.subjects.append(subject)
[docs] def get_language_code(self, language_codes):
""" Gets the language code for the whole study.
:param language_codes: The set of possible language codes.
:return: A language code as a three letter string.
"""
language_set = set()
for subject in self.subjects:
lang = subject.get_language_code(language_codes)
if lang:
language_set.add(lang)
if len(language_set) == 0:
return None
elif len(language_set) == 1:
return lang
return 'mul'
[docs] def get_subject_language_code(self, index, language_codes):
"""Gets the language code for the given subject.
:param index: The index of the subject.
:param language_codes: List of valid language codes.
"""
return self.subjects[index].get_language_code(language_codes)