Source code for kvalikirstu2.csv_generator
"""A module for generating .csv files out of studies."""
import csv
import logging
import os
from io import StringIO
from kvalikirstu2 import study
from kvalikirstu2 import paragraph_analyzer
from kvalikirstu2 import localization
from kvalikirstu2 import utils
logger = logging.getLogger(__name__)
def _extract_all_text(subject: paragraph_analyzer.Subject):
"""Extracts the data text for the subject.
:param subject: The subject from which the data is extracted from.
"""
with StringIO() as stream:
for paragraph in subject.paragraphs:
if paragraph.par_type == paragraph_analyzer.ParagraphType.DATA:
paragraph.write_to_stream(stream)
return stream.getvalue().replace(os.linesep, ' ').rstrip()
[docs]def create_csv(loaded_study: study.Study, language, encoding, csv_filename, header_info, language_codes=None):
"""Creates a .csv file from the study. This function assumes that headers have already been remapped prior to
calling this function.
:param loaded_study: The study from which the .csv file is generated from.
:param language: The language of the .csv file.
:param encoding: The encoding for the .csv file.
:param csv_filename: The filename for the csv file.
:param header_info: The header information for the study.
:param language_codes: The set of possible language codes.
"""
logger.info('Creating .csv to file %s, encoding %s', csv_filename, encoding)
headers = header_info.get_index_headers()
headers = [header for header in headers if header_info.is_ordinary_header(header)]
_ = localization.get_translation_func(language)
data_path = loaded_study.get_data_folder_path()
csv_path = os.path.join(data_path, csv_filename)
language_code = loaded_study.get_language_code(language_codes)
if language_code:
csv_path = utils.add_language_code_to_path(csv_path, language_code, language_codes)
with open(csv_path, 'w', encoding=encoding, newline='') as csvfile:
writer = csv.writer(csvfile, delimiter=';')
header_row = list(headers)
header_row.append(_("Data file"))
header_row.append(_("Text"))
writer.writerow(header_row)
for subject in loaded_study.subjects:
data_row = [subject.get_header_value(header) for header in headers]
data_row.append(loaded_study.get_relative_path(subject.data_file))
data_row.append(_extract_all_text(subject))
writer.writerow(data_row)