Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Diffing Algorithm #13

Merged
merged 36 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
d12f125
added stub functions
andrew-fenton Nov 5, 2024
e0d2f88
implemented setup
andrew-fenton Nov 6, 2024
3129226
abstracted out hashing functions
andrew-fenton Nov 6, 2024
c1b2892
cleaned up file
andrew-fenton Nov 6, 2024
c9ce287
skeleton for diffing algo
andrew-fenton Nov 7, 2024
b74659c
changed function name
andrew-fenton Nov 7, 2024
7f1a2b8
changed order of functions for readability
andrew-fenton Nov 7, 2024
9ac2fdc
added tests for updating metadata and state
andrew-fenton Nov 7, 2024
7eaefca
code refactoring, align diffing_processor with new engineering tasks
AlexLuo602 Nov 9, 2024
8db0141
fixed test bug
andrew-fenton Nov 11, 2024
578df7b
remove pycache
andrew-fenton Nov 11, 2024
1b1933a
changed directory names
andrew-fenton Nov 11, 2024
48e1220
Merge branch 'main' of https://github.com/ubclaunchpad/localization i…
AlexLuo602 Nov 14, 2024
0ae27c8
Created diffing algorithm draft
AlexLuo602 Nov 14, 2024
40f8f39
Changed formatting of diff results
AlexLuo602 Nov 15, 2024
6b29f5d
standardized diffing test folders to allow for more complex tests to …
AlexLuo602 Nov 15, 2024
22c194f
remove excessive folders after diffing test
AlexLuo602 Nov 15, 2024
c4c21db
Move the diff test folders into its own folder to prevent clashing wi…
AlexLuo602 Nov 15, 2024
6ffc187
dump all newly computed hashes instead of only pushing updates to met…
andrew-fenton Nov 15, 2024
dcdf23d
automated tests
AlexLuo602 Nov 16, 2024
fc74577
disabled dirsync logging
andrew-fenton Nov 16, 2024
8b6d913
added dummy folders for testing
andrew-fenton Nov 16, 2024
0f8eae8
revert dummy folders
andrew-fenton Nov 16, 2024
8d0d90e
added dummy test folders (this time they work)
andrew-fenton Nov 16, 2024
c6855ff
added updating metadata tests
andrew-fenton Nov 19, 2024
9910118
added sync test
andrew-fenton Nov 20, 2024
b361e56
Merge branch 'main' into diffing_algorithm
andrew-fenton Nov 20, 2024
624f42b
Merge branch 'main' into diffing_algorithm
kevinrczhang Nov 20, 2024
23ef39a
merge into main, update ci yaml
kevinrczhang Nov 20, 2024
a4ca66e
update pipeline
kevinrczhang Nov 20, 2024
3f114cc
repair tests
kevinrczhang Nov 20, 2024
6882334
add workaround for broken test
kevinrczhang Nov 20, 2024
853de6c
add dirsync to cfg
kevinrczhang Nov 20, 2024
8dcab79
fixed failing test
andrew-fenton Nov 20, 2024
7282622
moved read_json_file to helpers
andrew-fenton Nov 20, 2024
d7257af
moved diff_processor helper functions to helper.py
andrew-fenton Nov 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions .github/workflows/django.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,4 @@ jobs:
- name: Run PIP Tests
run: |
cd i18nilize
python3 -m tests.test_read_file
python3 -m tests.test_parse_json
python3 -m tests.test_cli
python3 -m tests.test_api_helpers
python -m unittest discover tests
1 change: 1 addition & 0 deletions i18nilize/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
dirsync==2.2.5
1 change: 1 addition & 0 deletions i18nilize/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ install_requires =
geocoder>=1.38.1
geopy>=2.2.0
Babel>=2.9.1
dirsync >= 2.2.5

[options.packages.find]
where = src
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
157 changes: 157 additions & 0 deletions i18nilize/src/internationalize/diffing_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
import os
import hashlib
import json
from dirsync import sync
from src.internationalize.helpers import compute_hash, compute_hashes, read_json_file

JSON_EXTENSION = ".json"

TYPE = "type"
CREATED = "created"
MODIFIED = "modified"
DELETED = "deleted"

"""
Diffing Processor Class
"""
class DiffingProcessor():
def __init__(self, curr_translations_dir):
self.diff_state_root_dir = "diff_state"
self.diff_state_files_dir = os.path.join(self.diff_state_root_dir, "translations")
self.metadata_file_dir = os.path.join(self.diff_state_root_dir, "metadata.json")
self.curr_translation_files_dir = curr_translations_dir

"""
Initializes the old state of translations when package is first installed.
"""
def setup(self):
try:
os.mkdir(self.diff_state_root_dir)
os.mkdir(self.diff_state_files_dir)
with open(self.metadata_file_dir, "w") as outfile:
json.dump({}, outfile)

# sync folders
self.sync_translations()

# Compute all file hashes and store hashes in metadata
all_files = os.listdir(self.diff_state_files_dir)
all_file_hashes = compute_hashes(self.diff_state_files_dir)
self.update_metadata(all_file_hashes)
except FileExistsError:
print(f"Old translations directory has already been created.")
except PermissionError:
print(f"Permission denied: unable to setup old translation state.")
except Exception as e:
print(f"An exception occured: {e}")

"""
Updates translation files with new changes and updates hashes in metadata.
"""
def update_to_current_state(self, hash_dict):
self.update_metadata(hash_dict)
self.sync_translations()

def update_metadata(self, hash_dict):
with open(self.metadata_file_dir, "w") as outfile:
json.dump(hash_dict, outfile)

def sync_translations(self):
sync(self.curr_translation_files_dir, self.diff_state_files_dir, "sync", purge=True)

"""
Returns a list of all the files that have been modified
"""
def get_changed_files(self):
# Initialize hashes
current_hashes = compute_hashes(self.curr_translation_files_dir)

with open(self.metadata_file_dir, "r") as file:
original_hashes = json.load(file)

changed_files = {
CREATED: [],
MODIFIED: [],
DELETED: []
}

# Find any languages that were either modified or added the current PIP package
for language, current_hash in current_hashes.items():
file_name = language + JSON_EXTENSION
if language not in original_hashes:
changed_files[CREATED].append(file_name)
elif original_hashes[language] != current_hash:
changed_files[MODIFIED].append(file_name)

# Find files that were removed from PIP package
for language in original_hashes:
file_name = language + JSON_EXTENSION
if language not in current_hashes:
changed_files[DELETED].append(file_name)

return changed_files

"""
Gets differences between old and new translations
"""
def get_changed_translations(self):
changed_files = self.get_changed_files()
changed_translations = {}

for type, file_names in changed_files.items():
for file_name in file_names:
language = file_name.split(".")[0]
changed_translations[language] = self.__initialize_changed_template(type)

# fetch modified translations
if type == MODIFIED:
changed_translations[language] = self.compare_language(file_name, changed_translations[language])

if type == CREATED:
changed_translations[language] = self.add_language(file_name, changed_translations[language])

return changed_translations

"""
Gets differences between old and new translations for one language
"""
def compare_language(self, file_name, changed_translations):
original_language_location = os.path.join(self.diff_state_files_dir, file_name)
current_language_location = os.path.join(self.curr_translation_files_dir, file_name)

original_language = read_json_file(original_language_location)
current_language = read_json_file(current_language_location)

# find modified and newly added translations
for word, translation in current_language.items():
if word not in original_language:
changed_translations[CREATED][word] = translation
elif translation != original_language[word]:
changed_translations[MODIFIED][word] = translation

# find removed translations
for word, translation in original_language.items():
if word not in current_language:
changed_translations[DELETED][word] = translation

return changed_translations

def add_language(self, file_name, changed_translations):
current_language_location = os.path.join(self.curr_translation_files_dir, file_name)
current_language = read_json_file(current_language_location)

for word, translation in current_language.items():
changed_translations[CREATED][word] = translation

return changed_translations

"""
Create empty JSON template to show modifications from a language
"""
def __initialize_changed_template(self, type):
changed_translations = {}
changed_translations[TYPE] = type
changed_translations[CREATED] = {}
changed_translations[MODIFIED] = {}
changed_translations[DELETED] = {}
return changed_translations
44 changes: 44 additions & 0 deletions i18nilize/src/internationalize/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import sys
import os
import hashlib
import requests
from . import globals

Expand Down Expand Up @@ -138,3 +139,46 @@ def make_translation_map(data):
def get_translation(translations_map, language):
return translations_map.get(language, "Translation not found")

"""
Computes 256-bit hash for given content
"""
def compute_hash(file_content):
hash = hashlib.sha256()
hash.update(file_content)
return hash.hexdigest()

"""
Computes hashes for all files in a directory
"""
def compute_hashes(directory):
hash_dict = {}
files = os.listdir(directory)
for file_name in files:
path = directory + "/" + file_name

# Read file as byte buffer for hashing
with open(path, "rb") as file:
file_name_no_ext = file_name.split(".")[0]
file_content = file.read()
file_hash = compute_hash(file_content)
hash_dict[file_name_no_ext] = file_hash

return hash_dict

"""
Reads a file given the directory and returns json object
Expects file to be in json format
"""
def read_json_file(directory):
try:
with open(directory, "r") as file:
json_object = json.load(file)
return json_object
except FileNotFoundError:
print(f"File not found: {directory}")
raise
except IOError:
print(f"An error occurred while trying to read the file: {directory}")
raise
except Exception as e:
print(f"An exception occured: {e}")
3 changes: 3 additions & 0 deletions i18nilize/src/internationalize/languages/chinese.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"thank you": "\u8c22\u8c22"
}
3 changes: 3 additions & 0 deletions i18nilize/src/internationalize/languages/german.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"thank you": "danke"
}
4 changes: 3 additions & 1 deletion i18nilize/src/internationalize/languages/korean.json
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
{}
{
"welcome": "\ud658\uc601\ud569\ub2c8\ub2e4"
kevinrczhang marked this conversation as resolved.
Show resolved Hide resolved
}
Binary file removed i18nilize/tests/__pycache__/__init__.cpython-310.pyc
Binary file not shown.
Binary file removed i18nilize/tests/__pycache__/__init__.cpython-311.pyc
Binary file not shown.
Binary file removed i18nilize/tests/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file removed i18nilize/tests/__pycache__/test.cpython-310.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"italian": {
"type": "created",
"created": {
"hello": "bonjourno",
"thanks": "grazie",
"welcome": "benvenuto"
},
"modified": {},
"deleted": {}
},
"spanish": {
"type": "created",
"created": {
"hello": "hola",
"thanks": "gracias"
},
"modified": {},
"deleted": {}
},
"french": {
"type": "created",
"created": {
"hello": "bonjour",
"thanks": "merci"
},
"modified": {},
"deleted": {}
},
"portugese": {
"type": "created",
"created": {
"hello": "ola",
"thanks": "obrigado",
"welcome": "Bem-vindo"
},
"modified": {},
"deleted": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"hello": "bonjour",
"thanks": "merci"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"hello": "bonjourno",
"thanks": "grazie",
"welcome": "benvenuto"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"hello": "ola",
"thanks": "obrigado",
"welcome": "Bem-vindo"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"hello": "hola",
"thanks": "gracias"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"italian": {
"type": "deleted",
"created": {},
"modified": {},
"deleted": {}
},
"spanish": {
"type": "deleted",
"created": {},
"modified": {},
"deleted": {}
},
"french": {
"type": "deleted",
"created": {},
"modified": {},
"deleted": {}
},
"portugese": {
"type": "deleted",
"created": {},
"modified": {},
"deleted": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"hello": "bonjour",
"thanks": "merci"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"hello": "bonjourno",
"thanks": "grazie",
"welcome": "benvenuto"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"hello": "ola",
"thanks": "obrigado",
"welcome": "Bem-vindo"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"hello": "hola",
"thanks": "gracias"
}
Loading
Loading