Spaces:

Aeon-Avinash
/

GenAI_Multi_Language_Translator

Runtime error

App Files Files Community

Aeon-Avinash commited on May 20, 2024

Commit

98fefd6

verified ·

1 Parent(s): abf61f0

Create lang_codes.py

Browse files

Files changed (1) hide show

lang_codes.py +72 -0

lang_codes.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import requests
+import json
+def generate_lang_code_file():
+    # URL of the Flores README containing the language codes
+    url = 'https://raw.githubusercontent.com/openlanguagedata/flores/main/README.md'
+    # Fetch the page content
+    response = requests.get(url)
+    content = response.text
+    # Extract the table content by parsing the plain text
+    lines = content.split('\n')
+    # Initialize a flag to start capturing data
+    languages = []
+    start_parsing = False
+    for line in lines:
+        if "Language coverage" in line:
+            start_parsing = True
+            continue
+        if start_parsing:
+            if line.strip() == "":
+                continue
+            if '|' not in line:
+                continue
+            parts = line.split('|')
+            if len(parts) >= 2:
+                code = parts[1].strip()[1:-1]
+                identifier = parts[2].strip()[1:-1]
+                name = parts[3].strip()
+                languages.append({"code": code, "identifier": identifier, "name": name})
+    # Omit the labels and divider
+    languages = languages[2:]
+    # Convert to JSON
+    json_data = json.dumps(languages, indent=4)
+    # Save the JSON data to a file
+    file_path = '/teamspace/studios/this_studio/multi-lang-translator/flores_language_codes.json'
+    with open(file_path, 'w') as file:
+        file.write(json_data)
+    print(f"JSON data saved to {file_path}")
+# generate_lang_code_file()
+def get_language_code(language_name,
+                      json_file_path='/teamspace/studios/this_studio/multi-lang-translator/flores_language_codes.json'):
+    # Load the JSON data from the file
+    with open(json_file_path, 'r') as file:
+        languages = json.load(file)
+    # Search for the language code by language name
+    for language in languages:
+        if language['name'].lower() == language_name.lower():
+            return language['code']
+    return None  # Return None if the language name is not found
+def get_language_list(
+                json_file_path='/teamspace/studios/this_studio/multi-lang-translator/flores_language_codes.json'):
+    # Load the JSON data from the file
+    with open(json_file_path, 'r') as file:
+        languages = json.load(file)
+    # extract language name
+    language_names = [language['name'] for language in languages]
+    return language_names