Skip to content
Snippets Groups Projects
Commit cbba1075 authored by Noordijk, Ben's avatar Noordijk, Ben
Browse files

Train_test split now moves both train and test files

parent fb27c1ca
No related branches found
No related tags found
1 merge request!3Added data preparation, hyperparameter optimisation, benchmarking code and k-mer library visualisation
......@@ -42,12 +42,15 @@ def main():
df = pd.read_csv(args.ground_truth)
df.dropna(inplace=True)
# Set aside 10% for hyperparameter optimisation:
_, set_aside_for_hyperparam = train_test_split(df, test_size=args.test_size,
train_data, test_data = train_test_split(df, test_size=args.test_size,
stratify=df['species'])
set_aside_files = list(set_aside_for_hyperparam['file name'])
train_files = list(train_data['file name'])
test_files = list(test_data['file name'])
move_files_from_list(set_aside_files, in_directory=args.in_dir,
out_directory=args.out_dir)
move_files_from_list(train_files, in_directory=args.in_dir,
out_directory=os.path.join(args.out_dir, 'train'))
move_files_from_list(test_files, in_directory=args.in_dir,
out_directory=os.path.join(args.out_dir, 'test'))
if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment