Source code for kvalikirstu2.csv_generator

"""A module for generating .csv files out of studies."""

import csv
import logging
import os
from io import StringIO
from kvalikirstu2 import study
from kvalikirstu2 import paragraph_analyzer
from kvalikirstu2 import localization
from kvalikirstu2 import utils


logger = logging.getLogger(__name__)


def _extract_all_text(subject: paragraph_analyzer.Subject):
    """Extracts the data text for the subject.

    :param subject: The subject from which the data is extracted from.

    """
    with StringIO() as stream:
        for paragraph in subject.paragraphs:
            if paragraph.par_type == paragraph_analyzer.ParagraphType.DATA:
                paragraph.write_to_stream(stream)

        return stream.getvalue().replace(os.linesep, ' ').rstrip()


[docs]def create_csv(loaded_study: study.Study, language, encoding, csv_filename, header_info, language_codes=None): """Creates a .csv file from the study. This function assumes that headers have already been remapped prior to calling this function. :param loaded_study: The study from which the .csv file is generated from. :param language: The language of the .csv file. :param encoding: The encoding for the .csv file. :param csv_filename: The filename for the csv file. :param header_info: The header information for the study. :param language_codes: The set of possible language codes. """ logger.info('Creating .csv to file %s, encoding %s', csv_filename, encoding) headers = header_info.get_index_headers() headers = [header for header in headers if header_info.is_ordinary_header(header)] _ = localization.get_translation_func(language) data_path = loaded_study.get_data_folder_path() csv_path = os.path.join(data_path, csv_filename) language_code = loaded_study.get_language_code(language_codes) if language_code: csv_path = utils.add_language_code_to_path(csv_path, language_code, language_codes) with open(csv_path, 'w', encoding=encoding, newline='') as csvfile: writer = csv.writer(csvfile, delimiter=';') header_row = list(headers) header_row.append(_("Data file")) header_row.append(_("Text")) writer.writerow(header_row) for subject in loaded_study.subjects: data_row = [subject.get_header_value(header) for header in headers] data_row.append(loaded_study.get_relative_path(subject.data_file)) data_row.append(_extract_all_text(subject)) writer.writerow(data_row)