csv_tools.py

import os
import pandas as pd
import numpy as np


def csv_combine(files, identifiers, identifiercolumn="df_id"):
    """
    appends one table to another while adding a column to identify which entries came from each table
    :param files: list of csv files
    :param identifiers: list of labels to identify original tables
    :param: identifiercolumn: name of identifiercolumn
    :return
    """

    if not isinstance(files, list) or not isinstance(identifiers, list):
        raise Exception("Either files or identifiers is not a list")
    for file in files:
        if not os.path.exists(file):
            raise Exception(file + " was not found!")

    df_combined = pd.read_csv(files[0])
    df_combined[identifiercolumn] = identifiers[0]

    for file, identifier in zip(files[1:], identifiers[1:]):
        nextfile = pd.read_csv(file)
        nextfile[identifiercolumn] = identifier
        df_combined = df_combined.append(nextfile)

    return df_combined


def mnp_csvlister(folder, notwanted=False, extension=False):
    """
    list all species output csv files in an MNP result folder
    you can provida a list of species that you want to exclude from te returned list
    :param folder: MNP result folder
    :param notwanted: list of species to exclude
    :param extension: return file extensions?
    :return: list containing filenames or species codes
    """
    if type(notwanted) != bool:
        try:
            file_list = [
                file
                for file in os.listdir(folder)
                if not (np.isin(file[0:9], notwanted))
            ]
            file_list.sort()
        except Exception as e:
            print(e)
            next
    else:
        file_list = [file for file in os.listdir(folder)]
        file_list.sort()

    if not extension:
        file_list = [f[0:9] for f in file_list]

    file_list = list(set([s for s in file_list if s[:2] == "S0"]))
    file_list.sort()

    return file_list