using unnamed for the translator
This commit is contained in:
parent
55a50276fa
commit
dc3e8ba73c
|
@ -13,9 +13,9 @@ except FileNotFoundError:
|
|||
|
||||
print("Columns in the DataFrame:", df.columns)
|
||||
|
||||
# Ensure the '#' column exists
|
||||
if '#' not in df.columns:
|
||||
print("'#' column not found. Please check your CSV file.")
|
||||
# Ensure the 'Unnamed: 0' column exists
|
||||
if 'Unnamed: 0' not in df.columns:
|
||||
print("'Unnamed: 0' column not found. Please check your CSV file.")
|
||||
exit(1)
|
||||
|
||||
# Take a sample of 10 entries
|
||||
|
@ -41,23 +41,23 @@ df_sample['title_de'] = df_sample['title'].fillna('').progress_apply(translate)
|
|||
df_sample['text_de'] = df_sample['text'].fillna('').progress_apply(translate)
|
||||
|
||||
# Calculate the new serial numbers
|
||||
max_serial = df['#'].max()
|
||||
df_sample['#_de'] = df_sample['#'].apply(lambda x: x + max_serial + 1)
|
||||
max_serial = df['Unnamed: 0'].max()
|
||||
df_sample['Unnamed: 0_de'] = df_sample['Unnamed: 0'].apply(lambda x: x + max_serial + 1)
|
||||
|
||||
# Create new rows with translated content
|
||||
df_translated = df_sample.copy()
|
||||
df_translated['#'] = df_translated['#_de']
|
||||
df_translated['Unnamed: 0'] = df_translated['Unnamed: 0_de']
|
||||
df_translated['title'] = df_translated['title_de']
|
||||
df_translated['text'] = df_translated['text_de']
|
||||
|
||||
# Drop the temporary columns
|
||||
df_translated = df_translated.drop(['#_de', 'title_de', 'text_de'], axis=1)
|
||||
df_translated = df_translated.drop(['Unnamed: 0_de', 'title_de', 'text_de'], axis=1)
|
||||
|
||||
# Combine original and translated DataFrames
|
||||
df_combined = pd.concat([df, df_translated], ignore_index=True)
|
||||
|
||||
# Sort by '#' (serial) number
|
||||
df_combined = df_combined.sort_values('#').reset_index(drop=True)
|
||||
# Sort by 'Unnamed: 0' (serial) number
|
||||
df_combined = df_combined.sort_values('Unnamed: 0').reset_index(drop=True)
|
||||
|
||||
# Save as parquet
|
||||
df_combined.to_parquet('combined_with_translations_10_samples.parquet', index=False)
|
||||
|
|
Reference in New Issue