-
Roelofsen, Hans authoredRoelofsen, Hans authored
csv_tools.py 1.92 KiB
import os
import pandas as pd
import numpy as np
def csv_combine(files, identifiers, identifiercolumn="df_id"):
"""
appends one table to another while adding a column to identify which entries came from each table
:param files: list of csv files
:param identifiers: list of labels to identify original tables
:param: identifiercolumn: name of identifiercolumn
:return
"""
if not isinstance(files, list) or not isinstance(identifiers, list):
raise Exception("Either files or identifiers is not a list")
for file in files:
if not os.path.exists(file):
raise Exception(file + " was not found!")
df_combined = pd.read_csv(files[0])
df_combined[identifiercolumn] = identifiers[0]
for file, identifier in zip(files[1:], identifiers[1:]):
nextfile = pd.read_csv(file)
nextfile[identifiercolumn] = identifier
df_combined = df_combined.append(nextfile)
return df_combined
def mnp_csvlister(folder, notwanted=False, extension=False):
"""
list all species output csv files in an MNP result folder
you can provida a list of species that you want to exclude from te returned list
:param folder: MNP result folder
:param notwanted: list of species to exclude
:param extension: return file extensions?
:return: list containing filenames or species codes
"""
if type(notwanted) != bool:
try:
file_list = [
file
for file in os.listdir(folder)
if not (np.isin(file[0:9], notwanted))
]
file_list.sort()
except Exception as e:
print(e)
next
else:
file_list = [file for file in os.listdir(folder)]
file_list.sort()
if not extension:
file_list = [f[0:9] for f in file_list]
file_list = list(set([s for s in file_list if s[:2] == "S0"]))
file_list.sort()
return file_list