updated translation
This commit is contained in:
parent
24b6a2a8a7
commit
1ebe2e1212
|
@ -17,14 +17,18 @@ model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
||||||
|
|
||||||
# Function to translate text
|
# Function to translate text
|
||||||
def translate(text):
|
def translate(text):
|
||||||
|
if pd.isna(text) or text == '':
|
||||||
|
return '' # Return an empty string for NaN or empty string inputs
|
||||||
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512)
|
||||||
translated = model.generate(**inputs)
|
translated = model.generate(**inputs)
|
||||||
return tokenizer.decode(translated[0], skip_special_tokens=True)
|
return tokenizer.decode(translated[0], skip_special_tokens=True)
|
||||||
|
|
||||||
|
|
||||||
# Translate 'text' and 'title' columns
|
# Translate 'text' and 'title' columns
|
||||||
tqdm.pandas()
|
tqdm.pandas()
|
||||||
df_sample['title_de'] = df_sample['title'].progress_apply(translate)
|
df_sample['title_de'] = df_sample['title'].fillna('').progress_apply(translate)
|
||||||
df_sample['text_de'] = df_sample['text'].progress_apply(translate)
|
df_sample['text_de'] = df_sample['text'].fillna('').progress_apply(translate)
|
||||||
|
|
||||||
|
|
||||||
# Calculate the new serial numbers
|
# Calculate the new serial numbers
|
||||||
max_serial = df['Serial'].max()
|
max_serial = df['Serial'].max()
|
||||||
|
|
Reference in New Issue