dataset combiner

This commit is contained in:
bvn13 2024-11-03 10:53:10 +03:00
parent b10d600532
commit 7d32c6de36
2 changed files with 25 additions and 2 deletions

25
src/combine-dataset.py Normal file
View File

@ -0,0 +1,25 @@
import argparse
import csv
import sys
parser = argparse.ArgumentParser(prog='combine-dataset.py')
parser.add_argument('-i', '--input', nargs='+', required=True, help='multiple input CSV')
parser.add_argument('-o', '--output', required=True, help='output CSV')
args = parser.parse_args()
if __name__ == '__main__':
csv.field_size_limit(sys.maxsize)
with open(args.output, 'w') as o:
ocsv = csv.writer(o, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
ocsv.writerow(['label', 'text'])
for file in args.input:
with open(file, 'r') as i:
icsv = csv.reader(i, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
ln = 0
for line in icsv:
if ln == 0:
ln += 1
continue
ocsv.writerow([line[0], line[1]])

View File

@ -1,7 +1,5 @@
import csv import csv
import datetime as dt import datetime as dt
from threading import Thread
from src.l.logger import logger from src.l.logger import logger
import src.model.trainer as trainer import src.model.trainer as trainer
from scheduler import Scheduler from scheduler import Scheduler