Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parse DataPack on backend #232

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
/coverage

# production
/build
build/

# misc
.DS_Store
Expand All @@ -36,4 +36,5 @@ simple-backend/db-old.sqlite3

.eslintcache
stave.iml
package-lock.json
package-lock.json
*.egg-info
6 changes: 2 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,9 @@
'requests==2.25.1',
'django>=3.0.4',
'django-guardian==2.3.0',
'tornado==6.1'
'tornado==6.1',
'forte',
],
extras_require={
"forte": ["forte"],
},
entry_points={
'console_scripts':[
'stave = stave_backend.lib.stave_cli:main'
Expand Down
6 changes: 6 additions & 0 deletions simple-backend/sample_sql/stave_backend_project.sql
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,12 @@ INSERT INTO "stave_backend_project" ("id","name","ontology","user_id","config",

"name": "all_ontology",

"additional_prefixes": [

"edu.cmu"

],

"definitions": [

{
Expand Down
93 changes: 26 additions & 67 deletions simple-backend/stave_backend/handlers/document.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from django.contrib import admin
from django.urls import include, path
from django.http import HttpResponse, JsonResponse, Http404
from django.forms import model_to_dict
import uuid
import json
from django.contrib.auth.decorators import permission_required
from guardian.decorators import permission_required_or_403
from ..models import Document, User, Project, Job
from ..models import Document, Project, Job
from ..lib.require_login import require_login
from ..lib.utils import fetch_doc_check_perm, check_perm_project, fetch_job
from ..lib.utils import (
fetch_doc_check_perm, check_perm_project, fetch_job
)
from ..lib.stave_pack_parser import StavePackParser

@require_login
@permission_required('stave_backend.view_document', raise_exception=True)
Expand Down Expand Up @@ -228,18 +228,15 @@ def new_annotation(request, document_id):
# }

doc = fetch_doc_check_perm(document_id, request.user, "stave_backend.edit_annotation")

docJson = model_to_dict(doc)
textPackJson = json.loads(docJson['textPack'])

annotation_id = uuid.uuid4().int

received_json_data = json.loads(request.body)
annotation = received_json_data.get('data')
annotation["py/state"]['_tid'] = annotation_id

textPackJson['py/state']['annotations'].append(annotation)
doc.textPack = json.dumps(textPackJson)
doc.textPack = StavePackParser(
raw_pack=doc.textPack, raw_ontology=doc.project.ontology
).add_entry_to_doc(entry_dict=annotation)
doc.save()

return JsonResponse({"id": str(annotation_id)}, safe=False)
Expand Down Expand Up @@ -270,18 +267,9 @@ def edit_annotation(request, document_id, annotation_id):
OK if succeeded, otherwise forbidden or not found
"""
doc = fetch_doc_check_perm(document_id, request.user, "stave_backend.edit_annotation")

received_json_data = json.loads(request.body)
annotation = received_json_data.get('data')

docJson = model_to_dict(doc)
textPackJson = json.loads(docJson['textPack'])

for index, item in enumerate(textPackJson['py/state']['annotations']):
if item["py/state"]['_tid'] == annotation_id:
textPackJson['py/state']['annotations'][index] = annotation

doc.textPack = json.dumps(textPackJson)
doc.textPack = StavePackParser(
raw_pack=doc.textPack, raw_ontology=doc.project.ontology
).edit_entry_in_doc(entry_dict=json.loads(request.body).get('data'))
doc.save()

return HttpResponse('OK')
Expand Down Expand Up @@ -310,17 +298,9 @@ def delete_annotation(request, document_id, annotation_id):

# if doc doesn't exist
doc = fetch_doc_check_perm(document_id, request.user, "stave_backend.edit_annotation")

docJson = model_to_dict(doc)
textPackJson = json.loads(docJson['textPack'])

deleteIndex = -1
for index, item in enumerate(textPackJson['py/state']['annotations']):
if item["py/state"]['_tid'] == annotation_id:
deleteIndex = index

del textPackJson['py/state']['annotations'][deleteIndex]
doc.textPack = json.dumps(textPackJson)
doc.textPack = StavePackParser(
raw_pack=doc.textPack, raw_ontology=doc.project.ontology
).delete_annotation_from_doc(entry_tid=annotation_id)
doc.save()

return HttpResponse('OK')
Expand Down Expand Up @@ -371,10 +351,9 @@ def new_link(request, document_id):
link = received_json_data.get('data')
link["py/state"]['_tid'] = link_id
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like here we use a certain data structure very similar to the Forte format, and need to use some internal understanding. I feel like this would make the system still depend on some certain Forte version.


docJson = model_to_dict(doc)
textPackJson = json.loads(docJson['textPack'])
textPackJson['py/state']['links'].append(link)
doc.textPack = json.dumps(textPackJson)
doc.textPack = StavePackParser(
raw_pack=doc.textPack, raw_ontology=doc.project.ontology
).add_entry_to_doc(entry_dict=link)
doc.save()

return JsonResponse({"id": str(link_id)}, safe=False)
Expand All @@ -397,18 +376,9 @@ def edit_link(request, document_id, link_id):
OK if succeeded, otherwise forbidden or not found.
"""
doc = fetch_doc_check_perm(document_id, request.user, "stave_backend.edit_annotation")

received_json_data = json.loads(request.body)
link = received_json_data.get('data')

docJson = model_to_dict(doc)
textPackJson = json.loads(docJson['textPack'])

for index, item in enumerate(textPackJson['py/state']['links']):
if item["py/state"]['_tid'] == link_id:
textPackJson['py/state']['links'][index] = link

doc.textPack = json.dumps(textPackJson)
doc.textPack = StavePackParser(
raw_pack=doc.textPack, raw_ontology=doc.project.ontology
).edit_entry_in_doc(entry_dict=json.loads(request.body).get('data'))
doc.save()

return HttpResponse('OK')
Expand All @@ -431,17 +401,9 @@ def delete_link(request, document_id, link_id):
OK if succeeded, otherwise forbidden or not found.
"""
doc = fetch_doc_check_perm(document_id, request.user, "stave_backend.edit_annotation")

docJson = model_to_dict(doc)
textPackJson = json.loads(docJson['textPack'])

deleteIndex = -1
for index, item in enumerate(textPackJson['py/state']['links']):
if item["py/state"]['_tid'] == link_id:
deleteIndex = index

del textPackJson['py/state']['links'][deleteIndex]
doc.textPack = json.dumps(textPackJson)
doc.textPack = StavePackParser(
raw_pack=doc.textPack, raw_ontology=doc.project.ontology
).delete_link_from_doc(entry_tid=link_id)
doc.save()

return HttpResponse('OK')
Expand All @@ -468,14 +430,11 @@ def get_doc_ontology_pack(request, document_id):
"""
doc = fetch_doc_check_perm(document_id, request.user, "stave_backend.read_project")

# Convert every large integer to string to prevent precision loss
# In javascript, integers are accurate up to 15 digits.
textPackJson = json.loads(doc.textPack,
parse_int=lambda si: int(si) if len(si) < 15 else si)

docJson = {
'id': document_id,
'textPack': json.dumps(textPackJson),
'textPack': json.dumps(StavePackParser(
raw_pack=doc.textPack, raw_ontology=doc.project.ontology
).transform_pack()),
'ontology': doc.project.ontology
}

Expand Down
11 changes: 7 additions & 4 deletions simple-backend/stave_backend/handlers/nlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from ..models import Document
from ..lib.require_login import require_login
from ..lib.stave_pack_parser import StavePackParser

forte_msg = "Forte is not installed or imported successfully. To get NLP support from Forte, install it from https://github.com/asyml/forte"
forte_installed = False
Expand Down Expand Up @@ -110,12 +111,14 @@ def run_pipeline(request, document_id: int):
if pipeline:
processedPack = pipeline.process([docJson['textPack']])
doc.textPack = processedPack.to_string(True)
doc.save()
response = JsonResponse(model_to_dict(doc), safe=False)
doc.save()
docJson = model_to_dict(doc)
else:
logging.error(
f"The NLP model of name {model_name} is not "
f"loaded, please check the log for possible reasons."
)
response = JsonResponse(docJson, safe=False)
return response
docJson["textPack"] = json.dumps(StavePackParser(
raw_pack=doc.textPack, raw_ontology=doc.project.ontology
).transform_pack())
return JsonResponse(docJson, safe=False)
Loading