75 lines
2.3 KiB
Python
75 lines
2.3 KiB
Python
# https://thepythoncode.com/article/translate-text-in-python
|
|
from os import close
|
|
|
|
from googletrans import Translator
|
|
import csv
|
|
from tqdm import tqdm
|
|
import argparse
|
|
import sys
|
|
import os.path
|
|
import shutil
|
|
|
|
csv.field_size_limit(sys.maxsize)
|
|
|
|
parser = argparse.ArgumentParser(prog='translate-dataset')
|
|
parser.add_argument('-i', '--input', help='Source file')
|
|
parser.add_argument('-o', '--output', help='Destination file')
|
|
args = parser.parse_args()
|
|
#/home/bvn13/develop/spam-detector-1/spam.csv
|
|
|
|
translator = Translator()
|
|
|
|
translation = translator.translate("Hola Mundo", dest="ru")
|
|
print(f"{translation.origin} ({translation.src}) --> {translation.text} ({translation.dest})")
|
|
|
|
total = 0
|
|
with open(args.input, "r") as f:
|
|
reader = csv.reader(f)
|
|
for row in reader:
|
|
total += 1
|
|
skip = 0
|
|
bup = None
|
|
if os.path.exists(args.output):
|
|
bup = f"{args.output}.bup"
|
|
shutil.copyfile(args.output, bup)
|
|
with open(args.output, "r") as f:
|
|
reader = csv.reader(f)
|
|
for row in reader:
|
|
skip += 1
|
|
|
|
progress = tqdm(total=total, unit='row', unit_scale=2)
|
|
n = 0
|
|
with open(args.input, "r") as f:
|
|
with open(args.output, "w") as tf:
|
|
bupf = None
|
|
bupcsv = None
|
|
if bup is not None:
|
|
bupf = open(bup, "r")
|
|
bupcsv = csv.reader(bupf)
|
|
next(bupcsv)
|
|
try:
|
|
reader = csv.reader(f)
|
|
progress.update(1)
|
|
ru = csv.writer(tf, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
|
ru.writerow(['label', 'text'])
|
|
header = next(reader)
|
|
skipped = 1
|
|
for row in reader:
|
|
progress.update(1)
|
|
decision = row[0]
|
|
text = row[1]
|
|
if skipped < skip:
|
|
skipped += 1
|
|
already_translated = next(bupcsv)
|
|
ru.writerow(already_translated)
|
|
else:
|
|
try:
|
|
translated_text = translator.translate(text, dest='ru')
|
|
ru.writerow([decision] + [translated_text.text])
|
|
except Exception as e:
|
|
print(f"Skipping line: {e}")
|
|
except Exception as e:
|
|
print(e)
|
|
finally:
|
|
if bupf is not None:
|
|
close(bupf) |