-
Notifications
You must be signed in to change notification settings - Fork 52
FEATURE: Translate categories and tags #269
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
54849f1
0836516
0e8c3e2
84b4ea6
298f834
b9a8d19
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,10 +24,10 @@ def fetch_untranslated_model_ids(model, content_column, limit, target_locale) | |
SELECT m.id | ||
FROM #{model.table_name} m | ||
#{limit_to_public_clause(model)} | ||
WHERE m.deleted_at IS NULL | ||
AND m.#{content_column} != '' | ||
AND m.user_id > 0 | ||
#{max_age_clause} | ||
WHERE m.#{content_column} != '' | ||
#{not_deleted_clause(model)} | ||
#{non_bot_clause(model)} | ||
#{max_age_clause(model)} | ||
Comment on lines
+28
to
+30
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm wondering if it makes sense to just make this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually the current query looks very neat instead of having to repeat the clause for every model
I will prefer optimizing next time if it gets more complex. |
||
ORDER BY m.updated_at DESC | ||
) | ||
EXCEPT | ||
|
@@ -87,26 +87,35 @@ def translate_records(type, record_ids, target_locale) | |
|
||
def process_batch | ||
records_to_translate = SiteSetting.automatic_translation_backfill_rate | ||
backfill_locales.each_with_index do |target_locale, i| | ||
topic_ids = | ||
fetch_untranslated_model_ids(Topic, "title", records_to_translate, target_locale) | ||
post_ids = fetch_untranslated_model_ids(Post, "raw", records_to_translate, target_locale) | ||
|
||
next if topic_ids.empty? && post_ids.empty? | ||
|
||
DiscourseTranslator::VerboseLogger.log( | ||
"Translating #{topic_ids.size} topics and #{post_ids.size} posts to #{target_locale}", | ||
) | ||
backfill_locales.each do |target_locale| | ||
[ | ||
[Topic, "title"], | ||
[Post, "raw"], | ||
[Category, "name"], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not yet. Category descriptions are currently topics so we will ignore for now. |
||
[Tag, "name"], | ||
].each do |model, content_column| | ||
ids = | ||
fetch_untranslated_model_ids(model, content_column, records_to_translate, target_locale) | ||
|
||
next if ids.empty? | ||
|
||
DiscourseTranslator::VerboseLogger.log( | ||
"Translating #{ids.size} #{model.name} to #{target_locale}", | ||
) | ||
|
||
translate_records(Topic, topic_ids, target_locale) | ||
translate_records(Post, post_ids, target_locale) | ||
translate_records(model, ids, target_locale) | ||
end | ||
end | ||
end | ||
|
||
def max_age_clause | ||
def max_age_clause(model) | ||
return "" if SiteSetting.automatic_translation_backfill_max_age_days <= 0 | ||
|
||
"AND m.created_at > NOW() - INTERVAL '#{SiteSetting.automatic_translation_backfill_max_age_days} days'" | ||
if model == Post || model == Topic | ||
"AND m.created_at > NOW() - INTERVAL '#{SiteSetting.automatic_translation_backfill_max_age_days} days'" | ||
else | ||
"" | ||
end | ||
end | ||
|
||
def limit_to_public_clause(model) | ||
|
@@ -130,5 +139,15 @@ def limit_to_public_clause(model) | |
|
||
limit_to_public_clause | ||
end | ||
|
||
def non_bot_clause(model) | ||
return "AND m.user_id > 0" if model == Post || model == Topic | ||
"" | ||
end | ||
|
||
def not_deleted_clause(model) | ||
return "AND m.deleted_at IS NULL" if model == Post || model == Topic | ||
"" | ||
end | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# frozen_string_literal: true | ||
|
||
module DiscourseTranslator | ||
class CategoryLocale < ActiveRecord::Base | ||
self.table_name = "discourse_translator_category_locales" | ||
|
||
belongs_to :category | ||
|
||
validates :category_id, presence: true | ||
validates :detected_locale, presence: true | ||
end | ||
end | ||
|
||
# == Schema Information | ||
# | ||
# Table name: discourse_translator_category_locales | ||
# | ||
# id :bigint not null, primary key | ||
# category_id :integer not null | ||
# detected_locale :string(20) not null | ||
# created_at :datetime not null | ||
# updated_at :datetime not null | ||
# |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# frozen_string_literal: true | ||
|
||
module DiscourseTranslator | ||
class CategoryTranslation < ActiveRecord::Base | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to differentiate between There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No. This current implementation makes use of the |
||
self.table_name = "discourse_translator_category_translations" | ||
|
||
belongs_to :category | ||
|
||
validates :category_id, presence: true | ||
validates :locale, presence: true | ||
validates :translation, presence: true | ||
validates :locale, uniqueness: { scope: :category_id } | ||
end | ||
end | ||
|
||
# == Schema Information | ||
# | ||
# Table name: discourse_translator_category_translations | ||
# | ||
# id :bigint not null, primary key | ||
# category_id :integer not null | ||
# locale :string not null | ||
# translation :text not null | ||
# created_at :datetime not null | ||
# updated_at :datetime not null | ||
# | ||
# Indexes | ||
# | ||
# idx_category_translations_on_category_id_and_locale (category_id,locale) UNIQUE | ||
# |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# frozen_string_literal: true | ||
|
||
module DiscourseTranslator | ||
class TagLocale < ActiveRecord::Base | ||
self.table_name = "discourse_translator_tag_locales" | ||
|
||
belongs_to :tag | ||
|
||
validates :tag_id, presence: true | ||
validates :detected_locale, presence: true | ||
end | ||
end | ||
|
||
# == Schema Information | ||
# | ||
# Table name: discourse_translator_tag_locales | ||
# | ||
# id :bigint not null, primary key | ||
# tag_id :integer not null | ||
# detected_locale :string(20) not null | ||
# created_at :datetime not null | ||
# updated_at :datetime not null | ||
# |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
# frozen_string_literal: true | ||
|
||
module DiscourseTranslator | ||
class TagTranslation < ActiveRecord::Base | ||
self.table_name = "discourse_translator_tag_translations" | ||
|
||
belongs_to :tag | ||
|
||
validates :tag_id, presence: true | ||
validates :locale, presence: true | ||
validates :translation, presence: true | ||
validates :locale, uniqueness: { scope: :tag_id } | ||
end | ||
end | ||
|
||
# == Schema Information | ||
# | ||
# Table name: discourse_translator_tag_translations | ||
# | ||
# id :bigint not null, primary key | ||
# tag_id :integer not null | ||
# locale :string not null | ||
# translation :text not null | ||
# created_at :datetime not null | ||
# updated_at :datetime not null | ||
# | ||
# Indexes | ||
# | ||
# idx_tag_translations_on_tag_id_and_locale (tag_id,locale) UNIQUE | ||
# |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
# frozen_string_literal: true | ||
|
||
module DiscourseAi | ||
class CategoryTranslator < BaseTranslator | ||
PROMPT_TEMPLATE = <<~TEXT.freeze | ||
You are a translation service specializing in translating forum category names to the asked target_language. Your task is to provide accurate and contextually appropriate translations while adhering to the following guidelines: | ||
|
||
1. Translate the category name to target_language asked | ||
2. Keep proper nouns and technical terms in their original language | ||
3. Keep the translated category name length short, and close to the original length | ||
4. Ensure the translation maintains the original meaning | ||
|
||
Provide your translation in the following JSON format: | ||
|
||
<output> | ||
{"translation": "Your target_language translation here"} | ||
</output> | ||
|
||
Here are three examples of correct translation | ||
|
||
Original: {"name":"Cats and Dogs", "target_language":"Chinese"} | ||
Correct translation: {"translation": "猫和狗"} | ||
|
||
Original: {"name":"General", "target_language":"French"} | ||
Correct translation: {"translation": "Général"} | ||
|
||
Original: {"name": "Q&A", "target_language": "Portuguese"} | ||
Correct translation: {"translation": "Perguntas e Respostas"} | ||
|
||
Remember to keep proper nouns like "Minecraft" and "Toyota" in their original form. Translate the category name now and provide your answer in the specified JSON format. | ||
TEXT | ||
|
||
private def prompt_template | ||
PROMPT_TEMPLATE | ||
end | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# frozen_string_literal: true | ||
|
||
module DiscourseAi | ||
class TagTranslator < BaseTranslator | ||
PROMPT_TEMPLATE = <<~TEXT.freeze | ||
You are a translation service specializing in translating forum tags to the asked target_language. Your task is to provide accurate and contextually appropriate translations while adhering to the following guidelines: | ||
|
||
1. Translate the tags to target_language asked | ||
2. Keep proper nouns and technical terms in their original language | ||
3. Keep the translated tags short, close to the original length | ||
4. Ensure the translation maintains the original meaning | ||
4. Translated tags will be in lowercase | ||
|
||
Provide your translation in the following JSON format: | ||
|
||
<output> | ||
{"translation": "your target_language translation here"} | ||
</output> | ||
|
||
Here are three examples of correct translation | ||
|
||
Original: {"name":"solved", "target_language":"Chinese"} | ||
Correct translation: {"translation": "已解决"} | ||
|
||
Original: {"name":"General", "target_language":"French"} | ||
Correct translation: {"translation": "général"} | ||
|
||
Original: {"name": "Q&A", "target_language": "Portuguese"} | ||
Correct translation: {"translation": "perguntas e respostas"} | ||
|
||
Remember to keep proper nouns like "minecraft" and "toyota" in their original form. Translate the tag now and provide your answer in the specified JSON format. | ||
TEXT | ||
|
||
private def prompt_template | ||
PROMPT_TEMPLATE | ||
end | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# frozen_string_literal: true | ||
|
||
class CreateCategoryTranslationTable < ActiveRecord::Migration[7.2] | ||
def change | ||
create_table :discourse_translator_category_locales do |t| | ||
t.integer :category_id, null: false | ||
t.string :detected_locale, limit: 20, null: false | ||
t.timestamps | ||
end | ||
|
||
create_table :discourse_translator_category_translations do |t| | ||
t.integer :category_id, null: false | ||
t.string :locale, null: false | ||
t.text :translation, null: false | ||
t.timestamps | ||
end | ||
|
||
add_index :discourse_translator_category_translations, | ||
%i[category_id locale], | ||
unique: true, | ||
name: "idx_category_translations_on_category_id_and_locale" | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# frozen_string_literal: true | ||
|
||
class CreateTagTranslationTable < ActiveRecord::Migration[7.2] | ||
def change | ||
create_table :discourse_translator_tag_locales do |t| | ||
t.integer :tag_id, null: false | ||
t.string :detected_locale, limit: 20, null: false | ||
t.timestamps | ||
end | ||
|
||
create_table :discourse_translator_tag_translations do |t| | ||
t.integer :tag_id, null: false | ||
t.string :locale, null: false | ||
t.text :translation, null: false | ||
t.timestamps | ||
end | ||
|
||
add_index :discourse_translator_tag_translations, | ||
%i[tag_id locale], | ||
unique: true, | ||
name: "idx_tag_translations_on_tag_id_and_locale" | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# frozen_string_literal: true | ||
|
||
module DiscourseTranslator | ||
module Extensions | ||
module CategoryExtension | ||
extend ActiveSupport::Concern | ||
prepended { before_update :clear_translations, if: :name_changed? } | ||
include Translatable | ||
end | ||
end | ||
end |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# frozen_string_literal: true | ||
|
||
module DiscourseTranslator | ||
module Extensions | ||
module TagExtension | ||
extend ActiveSupport::Concern | ||
prepended { before_update :clear_translations, if: :name_changed? } | ||
include Translatable | ||
end | ||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unlike posts and topics, categories and tags do not have a
deleted_at
nor can be created by a bot