# https://thepythoncode.com/article/translate-text-in-python from os import close from googletrans import Translator import csv from tqdm import tqdm import argparse import sys import os.path import shutil csv.field_size_limit(sys.maxsize) parser = argparse.ArgumentParser(prog='translate-dataset') parser.add_argument('-i', '--input', help='Source file') parser.add_argument('-o', '--output', help='Destination file') args = parser.parse_args() #/home/bvn13/develop/spam-detector-1/spam.csv translator = Translator() translation = translator.translate("Hola Mundo", dest="ru") print(f"{translation.origin} ({translation.src}) --> {translation.text} ({translation.dest})") total = 0 with open(args.input, "r") as f: reader = csv.reader(f) for row in reader: total += 1 skip = 0 bup = None if os.path.exists(args.output): bup = f"{args.output}.bup" shutil.copyfile(args.output, bup) with open(args.output, "r") as f: reader = csv.reader(f) for row in reader: skip += 1 progress = tqdm(total=total, unit='row', unit_scale=2) n = 0 with open(args.input, "r") as f: with open(args.output, "w") as tf: bupf = None bupcsv = None if bup is not None: bupf = open(bup, "r") bupcsv = csv.reader(bupf) next(bupcsv) try: reader = csv.reader(f) progress.update(1) ru = csv.writer(tf, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) ru.writerow(['label', 'text']) header = next(reader) skipped = 1 for row in reader: progress.update(1) decision = row[0] text = row[1] if skipped < skip: skipped += 1 already_translated = next(bupcsv) ru.writerow(already_translated) else: try: translated_text = translator.translate(text, dest='ru') ru.writerow([decision] + [translated_text.text]) except Exception as e: print(f"Skipping line: {e}") except Exception as e: print(e) finally: if bupf is not None: close(bupf)