Skip to content

Commit

Permalink
Merge pull request #13 from ubclaunchpad/diffing_algorithm
Browse files Browse the repository at this point in the history
Diffing Algorithm
  • Loading branch information
kevinrczhang authored Nov 22, 2024
2 parents 86cca58 + d7257af commit 8812162
Show file tree
Hide file tree
Showing 61 changed files with 634 additions and 13 deletions.
5 changes: 1 addition & 4 deletions .github/workflows/django.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,4 @@ jobs:
- name: Run PIP Tests
run: |
cd i18nilize
python3 -m tests.test_read_file
python3 -m tests.test_parse_json
python3 -m tests.test_cli
python3 -m tests.test_api_helpers
python -m unittest discover tests
1 change: 1 addition & 0 deletions i18nilize/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
dirsync==2.2.5
1 change: 1 addition & 0 deletions i18nilize/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ install_requires =
geocoder>=1.38.1
geopy>=2.2.0
Babel>=2.9.1
dirsync >= 2.2.5

[options.packages.find]
where = src
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
157 changes: 157 additions & 0 deletions i18nilize/src/internationalize/diffing_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import os
import hashlib
import json
from dirsync import sync
from src.internationalize.helpers import compute_hash, compute_hashes, read_json_file

JSON_EXTENSION = ".json"

TYPE = "type"
CREATED = "created"
MODIFIED = "modified"
DELETED = "deleted"

"""
Diffing Processor Class
"""
class DiffingProcessor():
def __init__(self, curr_translations_dir):
self.diff_state_root_dir = "diff_state"
self.diff_state_files_dir = os.path.join(self.diff_state_root_dir, "translations")
self.metadata_file_dir = os.path.join(self.diff_state_root_dir, "metadata.json")
self.curr_translation_files_dir = curr_translations_dir

"""
Initializes the old state of translations when package is first installed.
"""
def setup(self):
try:
os.mkdir(self.diff_state_root_dir)
os.mkdir(self.diff_state_files_dir)
with open(self.metadata_file_dir, "w") as outfile:
json.dump({}, outfile)

# sync folders
self.sync_translations()

# Compute all file hashes and store hashes in metadata
all_files = os.listdir(self.diff_state_files_dir)
all_file_hashes = compute_hashes(self.diff_state_files_dir)
self.update_metadata(all_file_hashes)
except FileExistsError:
print(f"Old translations directory has already been created.")
except PermissionError:
print(f"Permission denied: unable to setup old translation state.")
except Exception as e:
print(f"An exception occured: {e}")

"""
Updates translation files with new changes and updates hashes in metadata.
"""
def update_to_current_state(self, hash_dict):
self.update_metadata(hash_dict)
self.sync_translations()

def update_metadata(self, hash_dict):
with open(self.metadata_file_dir, "w") as outfile:
json.dump(hash_dict, outfile)

def sync_translations(self):
sync(self.curr_translation_files_dir, self.diff_state_files_dir, "sync", purge=True)

"""
Returns a list of all the files that have been modified
"""
def get_changed_files(self):
# Initialize hashes
current_hashes = compute_hashes(self.curr_translation_files_dir)

with open(self.metadata_file_dir, "r") as file:
original_hashes = json.load(file)

changed_files = {
CREATED: [],
MODIFIED: [],
DELETED: []
}

# Find any languages that were either modified or added the current PIP package
for language, current_hash in current_hashes.items():
file_name = language + JSON_EXTENSION
if language not in original_hashes:
changed_files[CREATED].append(file_name)
elif original_hashes[language] != current_hash:
changed_files[MODIFIED].append(file_name)

# Find files that were removed from PIP package
for language in original_hashes:
file_name = language + JSON_EXTENSION
if language not in current_hashes:
changed_files[DELETED].append(file_name)

return changed_files

"""
Gets differences between old and new translations
"""
def get_changed_translations(self):
changed_files = self.get_changed_files()
changed_translations = {}

for type, file_names in changed_files.items():
for file_name in file_names:
language = file_name.split(".")[0]
changed_translations[language] = self.__initialize_changed_template(type)

# fetch modified translations
if type == MODIFIED:
changed_translations[language] = self.compare_language(file_name, changed_translations[language])

if type == CREATED:
changed_translations[language] = self.add_language(file_name, changed_translations[language])

return changed_translations

"""
Gets differences between old and new translations for one language
"""
def compare_language(self, file_name, changed_translations):
original_language_location = os.path.join(self.diff_state_files_dir, file_name)
current_language_location = os.path.join(self.curr_translation_files_dir, file_name)

original_language = read_json_file(original_language_location)
current_language = read_json_file(current_language_location)

# find modified and newly added translations
for word, translation in current_language.items():
if word not in original_language:
changed_translations[CREATED][word] = translation
elif translation != original_language[word]:
changed_translations[MODIFIED][word] = translation

# find removed translations
for word, translation in original_language.items():
if word not in current_language:
changed_translations[DELETED][word] = translation

return changed_translations

def add_language(self, file_name, changed_translations):
current_language_location = os.path.join(self.curr_translation_files_dir, file_name)
current_language = read_json_file(current_language_location)

for word, translation in current_language.items():
changed_translations[CREATED][word] = translation

return changed_translations

"""
Create empty JSON template to show modifications from a language
"""
def __initialize_changed_template(self, type):
changed_translations = {}
changed_translations[TYPE] = type
changed_translations[CREATED] = {}
changed_translations[MODIFIED] = {}
changed_translations[DELETED] = {}
return changed_translations
44 changes: 44 additions & 0 deletions i18nilize/src/internationalize/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import sys
import os
import hashlib
import requests
from . import globals

Expand Down Expand Up @@ -138,3 +139,46 @@ def make_translation_map(data):
def get_translation(translations_map, language):
return translations_map.get(language, "Translation not found")

"""
Computes 256-bit hash for given content
"""
def compute_hash(file_content):
hash = hashlib.sha256()
hash.update(file_content)
return hash.hexdigest()

"""
Computes hashes for all files in a directory
"""
def compute_hashes(directory):
hash_dict = {}
files = os.listdir(directory)
for file_name in files:
path = directory + "/" + file_name

# Read file as byte buffer for hashing
with open(path, "rb") as file:
file_name_no_ext = file_name.split(".")[0]
file_content = file.read()
file_hash = compute_hash(file_content)
hash_dict[file_name_no_ext] = file_hash

return hash_dict

"""
Reads a file given the directory and returns json object
Expects file to be in json format
"""
def read_json_file(directory):
try:
with open(directory, "r") as file:
json_object = json.load(file)
return json_object
except FileNotFoundError:
print(f"File not found: {directory}")
raise
except IOError:
print(f"An error occurred while trying to read the file: {directory}")
raise
except Exception as e:
print(f"An exception occured: {e}")
3 changes: 3 additions & 0 deletions i18nilize/src/internationalize/languages/chinese.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"thank you": "\u8c22\u8c22"
}
3 changes: 3 additions & 0 deletions i18nilize/src/internationalize/languages/german.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"thank you": "danke"
}
4 changes: 3 additions & 1 deletion i18nilize/src/internationalize/languages/korean.json
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
{}
{
"welcome": "\ud658\uc601\ud569\ub2c8\ub2e4"
}
Binary file removed i18nilize/tests/__pycache__/__init__.cpython-310.pyc
Binary file not shown.
Binary file removed i18nilize/tests/__pycache__/__init__.cpython-311.pyc
Binary file not shown.
Binary file removed i18nilize/tests/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file removed i18nilize/tests/__pycache__/test.cpython-310.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"italian": {
"type": "created",
"created": {
"hello": "bonjourno",
"thanks": "grazie",
"welcome": "benvenuto"
},
"modified": {},
"deleted": {}
},
"spanish": {
"type": "created",
"created": {
"hello": "hola",
"thanks": "gracias"
},
"modified": {},
"deleted": {}
},
"french": {
"type": "created",
"created": {
"hello": "bonjour",
"thanks": "merci"
},
"modified": {},
"deleted": {}
},
"portugese": {
"type": "created",
"created": {
"hello": "ola",
"thanks": "obrigado",
"welcome": "Bem-vindo"
},
"modified": {},
"deleted": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"hello": "bonjour",
"thanks": "merci"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"hello": "bonjourno",
"thanks": "grazie",
"welcome": "benvenuto"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"hello": "ola",
"thanks": "obrigado",
"welcome": "Bem-vindo"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"hello": "hola",
"thanks": "gracias"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"italian": {
"type": "deleted",
"created": {},
"modified": {},
"deleted": {}
},
"spanish": {
"type": "deleted",
"created": {},
"modified": {},
"deleted": {}
},
"french": {
"type": "deleted",
"created": {},
"modified": {},
"deleted": {}
},
"portugese": {
"type": "deleted",
"created": {},
"modified": {},
"deleted": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"hello": "bonjour",
"thanks": "merci"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"hello": "bonjourno",
"thanks": "grazie",
"welcome": "benvenuto"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"hello": "ola",
"thanks": "obrigado",
"welcome": "Bem-vindo"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"hello": "hola",
"thanks": "gracias"
}
Loading

0 comments on commit 8812162

Please sign in to comment.