Skip to content

Commit

Permalink
Created diffing algorithm draft
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexLuo602 committed Nov 14, 2024
1 parent 48e1220 commit 0ae27c8
Show file tree
Hide file tree
Showing 13 changed files with 219 additions and 43 deletions.
120 changes: 99 additions & 21 deletions i18nilize/src/internationalize/diffing_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,15 @@
import json
from dirsync import sync

# Diffing Processor Class
JSON_EXTENSION = ".json"

CREATED = "created"
MODIFIED = "modified"
DELETED = "deleted"

"""
Diffing Processor Class
"""
class DiffingProcessor():
def __init__(self, curr_translations_dir):
self.diff_state_root_dir = "diff_state"
Expand Down Expand Up @@ -42,25 +50,6 @@ def update_to_current_state(self, changed_files_list, hash_dict):
self.update_metadata(changed_files_list, hash_dict)
self.sync_translations()

"""
Gets differences between old and new translations and sets new state
of translations.
"""
def diff(self):
# Get hashes of current translation files (current state)
new_hashes_dict = self.compute_hashes(self.curr_translation_files_dir)

# Get files that changed by comparing hashes
changed_files_list = []

# Perform diffing on files that changed and get added, modified, deleted

# Update metadata and old state
self.update_to_current_state(changed_files_list, new_hashes_dict)

# return added, modified, deleted
pass

def update_metadata(self, changed_files_list, hash_dict):
metadata = {}
with open(self.metadata_file_dir) as file:
Expand All @@ -76,11 +65,85 @@ def update_metadata(self, changed_files_list, hash_dict):
def sync_translations(self):
sync(self.curr_translation_files_dir, self.diff_state_files_dir, "sync", purge=True)

"""
Returns a list of all the files that have been modified
"""
def get_changed_files(self):
# Initialize hashes
current_hashes = compute_hashes(self.curr_translation_files_dir)

with open(self.metadata_file_dir, "r") as file:
original_hashes = json.load(file)

changed_files = []

# Find any languages that were either modified or added the current PIP package
for language, current_hash in current_hashes.items():
if language not in original_hashes or original_hashes[language] != current_hash:
changed_files.append(language + JSON_EXTENSION)

# Find files that were removed from PIP package
for language in original_hashes:
if language not in current_hashes:
changed_files.append(language + JSON_EXTENSION)

return changed_files

"""
Gets differences between old and new translations
"""
def get_changed_translations(self):
changed_files = self.get_changed_files()
changed_translations = {}

# Perform diffing on files that changed and got added, modified, deleted
for file_name in changed_files:
language = file_name.split(".")[0]
changed_translations[language] = self.compare_language(file_name)


"""
commented out updating metadata in this section for now
"""
# # Update metadata and old state
# self.update_to_current_state(changed_files_list, new_hashes_dict)

return changed_translations

"""
Gets differences between old and new translations for one language
"""
def compare_language(self, file_name):
original_language_location = self.diff_state_files_dir + "\\" + file_name
current_language_location = self.curr_translation_files_dir + "\\" + file_name

original_language = read_language(original_language_location)
current_language = read_language(current_language_location)

changed_translations = {}
changed_translations[CREATED] = {}
changed_translations[MODIFIED] = {}
changed_translations[DELETED] = {}

# find modified and newly added translations
for word, translation in current_language.items():
if word not in original_language:
changed_translations[CREATED][word] = translation
elif translation != original_language[word]:
changed_translations[MODIFIED][word] = translation

# find removed translations
for word, translation in original_language.items():
if word not in current_language:
changed_translations[DELETED][word] = translation

return changed_translations



"""
Helper functions
"""

def compute_hash(file_content):
hash = hashlib.sha256()
hash.update(file_content)
Expand All @@ -100,3 +163,18 @@ def compute_hashes(directory):
hash_dict[file_name_no_ext] = file_hash

return hash_dict

"""
Reads a language file given the directory and returns json object
"""
def read_language(directory):
try:
with open(directory, "r") as file:
language = json.load(file)
return language
except FileNotFoundError:
print(f"File not found: {directory}")
raise
except IOError:
print(f"An error occurred while trying to read the file: {directory}")
raise
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"hello": "bonjour",
"thanks": "merci"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"hello": "bonjourno",
"thanks": "grazie",
"welcome": "benvenuto"
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{
"language": "Spanish",
"hello": "hola",
"thanks": "gracias"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"hello": "bonjourno",
"thanks": "La ringrazio"
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
{
"language": "Spanish",
"hello": "hola",
"hello": "holi",
"thanks": "gracias",
"welcome": "bienvenido"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"hello": "bonjour",
"thanks": "merci"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"hello": "bonjourno",
"thanks": "grazie",
"welcome": "benvenuto"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"hello": "hola",
"thanks": "gracias"
}
5 changes: 0 additions & 5 deletions i18nilize/tests/resources/modified_translations/italian.json

This file was deleted.

5 changes: 0 additions & 5 deletions i18nilize/tests/resources/test_translations/italian.json

This file was deleted.

57 changes: 50 additions & 7 deletions i18nilize/tests/test_diffing.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,24 @@
import filecmp
import json
import shutil
from tests.util.test_diffing_util import DiffingTestUtil
from src.internationalize.diffing_processor import compute_hashes, DiffingProcessor

class TestDiffing(unittest.TestCase):

def setUp(self):
self.test_translations_dir = "tests/resources/test_translations"
self.modified_translations_dir = "tests/resources/modified_translations"
self.test_translations_dir = "tests/resources/diffing_algorithm/test_translations/"
self.basic_data_location = "tests/resources/diffing_algorithm/basic_initial_translations/"
self.basic_modified_data_location = "tests/resources/diffing_algorithm/basic_modified_translations/"

# initialize util class
self.util = DiffingTestUtil(self.test_translations_dir)
self.util.initialize_test_data(self.basic_data_location)

# initialize diffing processor
self.dp = DiffingProcessor(self.test_translations_dir)
self.dp.setup()

# tear down diffing processor instance
def tearDown(self):
if os.path.exists(self.dp.diff_state_root_dir):
shutil.rmtree(self.dp.diff_state_root_dir)
Expand All @@ -38,9 +44,45 @@ def test_initialization(self):
self.assertTrue(len(mismatch) == 0)
self.assertTrue(len(errors) == 0)


def test_find_changed_files_basic(self):
self.util.bulk_modify_test_data(self.basic_modified_data_location)
expected_changed_files = ["italian.json", "spanish.json"]
changed_files = self.dp.get_changed_files()
self.assertListEqual(changed_files, expected_changed_files)


def test_find_changed_translations_basic(self):
self.util.bulk_modify_test_data(self.basic_modified_data_location)
expected_changed_translations = {
"italian": {
"created": {},
"modified": {
"thanks": "La ringrazio"
},
"deleted": {
"welcome": "benvenuto"
}
},
"spanish": {
"created": {
"welcome": "bienvenido"
},
"modified": {
"hello": "holi"
},
"deleted": {}
}
}

changed_translations = self.dp.get_changed_translations()
self.assertEqual(changed_translations, expected_changed_translations)



def test_updating_state(self):
hashes = compute_hashes(self.modified_translations_dir)
changed_files = ["spanish.json"]
hashes = compute_hashes(self.test_translations_dir)
changed_files = ["italian.json", "spanish.json"]
self.dp.update_to_current_state(changed_files, hashes)

updated_metadata = {}
Expand All @@ -64,5 +106,6 @@ def test_updating_state(self):
# print(match)
# self.assertTrue(len(match) == 2)

if __name__ == '__main__':
unittest.main()

if __name__ == '__main__':
unittest.main()
41 changes: 41 additions & 0 deletions i18nilize/tests/util/test_diffing_util.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import json
import os
import shutil
from src.internationalize.diffing_processor import read_language

class DiffingTestUtil():
def __init__(self, test_directory):
self.test_directory = test_directory

"""
Initialize test data for diffing algorithm
"""
def initialize_test_data(self, directory):
self.clear_test_data()
os.mkdir(self.test_directory)

# this will create all the language files
self.bulk_modify_test_data(directory)


"""
Modify test data with new language files
Doesn't support removing a language
"""
def bulk_modify_test_data(self, directory):
file_names = os.listdir(directory)
for file_name in file_names:
language_data = read_language(directory + file_name)
with open(self.test_directory + file_name, 'w') as json_file:
json.dump(language_data, json_file, indent=4)

"""
Modifies a translation based on the given language, word, and translation
Might already be implemented in PIP package
"""
def modify_test_data(self, language, word, translation):
pass

def clear_test_data(self):
if os.path.exists(self.test_directory):
shutil.rmtree(self.test_directory)

0 comments on commit 0ae27c8

Please sign in to comment.