dataset combiner

This commit is contained in:
bvn13 2024-11-03 10:53:10 +03:00
parent b10d600532
commit 7d32c6de36
2 changed files with 25 additions and 2 deletions

25
src/combine-dataset.py Normal file
View File

@ -0,0 +1,25 @@
import argparse
import csv
import sys
parser = argparse.ArgumentParser(prog='combine-dataset.py')
parser.add_argument('-i', '--input', nargs='+', required=True, help='multiple input CSV')
parser.add_argument('-o', '--output', required=True, help='output CSV')
args = parser.parse_args()
if __name__ == '__main__':
csv.field_size_limit(sys.maxsize)
with open(args.output, 'w') as o:
ocsv = csv.writer(o, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
ocsv.writerow(['label', 'text'])
for file in args.input:
with open(file, 'r') as i:
icsv = csv.reader(i, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
ln = 0
for line in icsv:
if ln == 0:
ln += 1
continue
ocsv.writerow([line[0], line[1]])

View File

@ -1,7 +1,5 @@
import csv
import datetime as dt
from threading import Thread
from src.l.logger import logger
import src.model.trainer as trainer
from scheduler import Scheduler