From 7d32c6de36f2ec98e1ae04026af46dd4ef3b0c30 Mon Sep 17 00:00:00 2001 From: bvn13 Date: Sun, 3 Nov 2024 10:53:10 +0300 Subject: [PATCH] dataset combiner --- src/combine-dataset.py | 25 +++++++++++++++++++++++++ src/model/updater.py | 2 -- 2 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 src/combine-dataset.py diff --git a/src/combine-dataset.py b/src/combine-dataset.py new file mode 100644 index 0000000..4a208ed --- /dev/null +++ b/src/combine-dataset.py @@ -0,0 +1,25 @@ +import argparse +import csv +import sys + + +parser = argparse.ArgumentParser(prog='combine-dataset.py') +parser.add_argument('-i', '--input', nargs='+', required=True, help='multiple input CSV') +parser.add_argument('-o', '--output', required=True, help='output CSV') +args = parser.parse_args() + + +if __name__ == '__main__': + csv.field_size_limit(sys.maxsize) + with open(args.output, 'w') as o: + ocsv = csv.writer(o, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) + ocsv.writerow(['label', 'text']) + for file in args.input: + with open(file, 'r') as i: + icsv = csv.reader(i, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) + ln = 0 + for line in icsv: + if ln == 0: + ln += 1 + continue + ocsv.writerow([line[0], line[1]]) diff --git a/src/model/updater.py b/src/model/updater.py index 7e165a3..338c80e 100644 --- a/src/model/updater.py +++ b/src/model/updater.py @@ -1,7 +1,5 @@ import csv import datetime as dt -from threading import Thread - from src.l.logger import logger import src.model.trainer as trainer from scheduler import Scheduler