From a0dc465b128c7b5967560d3199b72a41639269a2 Mon Sep 17 00:00:00 2001
From: Viswanatha Reddy Gajjala <viswanatha.g@aganitha.ai>
Date: Sun, 27 Jun 2021 01:30:58 +0530
Subject: [PATCH] Added num2words

---
 transformations/num2words/README.md         | 19 ++++++
 transformations/num2words/__init__.py       |  1 +
 transformations/num2words/test.json         | 50 ++++++++++++++++
 transformations/num2words/transformation.py | 66 +++++++++++++++++++++
 4 files changed, 136 insertions(+)
 create mode 100644 transformations/num2words/README.md
 create mode 100644 transformations/num2words/__init__.py
 create mode 100644 transformations/num2words/test.json
 create mode 100644 transformations/num2words/transformation.py

diff --git a/transformations/num2words/README.md b/transformations/num2words/README.md
new file mode 100644
index 000000000..6eac4c3fc
--- /dev/null
+++ b/transformations/num2words/README.md
@@ -0,0 +1,19 @@
+# Numbers2Words Transformation 🦎  + ⌨️ → 🐍
+This transformation converts the numbers/floats in the given sentence/paragraph to word format.
+
+Author name: Viswanatha Reddy Gajjala
+Author email: viswanatha.g15@iiits.in
+
+## What type of a transformation is this?
+This transformation acts like a perturbation to test robustness. 
+Input: 2 times 2 is 4.
+Output: two times two is four.
+
+## What tasks does it intend to benefit?
+This perturbation would benefit all tasks which have a sentence/paragraph/document as input like text classification, 
+text generation, etc. 
+
+This transformation can be used to augment the dataset that contains numerical values. It helps to analyze models performance on questions which require numerical understanding. 
+
+## What are the limitations of this transformation?
+The transformation's outputs are too simple to be used for data augmentation. Unlike a paraphraser, it is not capable of generating linguistically diverse text.
\ No newline at end of file
diff --git a/transformations/num2words/__init__.py b/transformations/num2words/__init__.py
new file mode 100644
index 000000000..0a79241bb
--- /dev/null
+++ b/transformations/num2words/__init__.py
@@ -0,0 +1 @@
+from .transformation import *
\ No newline at end of file
diff --git a/transformations/num2words/test.json b/transformations/num2words/test.json
new file mode 100644
index 000000000..7720f2151
--- /dev/null
+++ b/transformations/num2words/test.json
@@ -0,0 +1,50 @@
+{
+  "type": "num2words",
+  "test_cases": [
+    {
+      "class": "num2words",
+      "inputs": {
+        "sentence": "He ate 0.25 of the pizza within the first 5 minutes."
+      },
+      "outputs": [{
+        "sentence": "He ate zero and 25/100 of the pizza within the first 5 minutes."
+      }]
+    },
+    {
+      "class": "num2words",
+      "inputs": {
+        "sentence": "He has returned from his office."
+      },
+      "outputs": [{
+        "sentence": "He has returned from his office."
+      }]
+    },
+    {
+      "class": "num2words",
+      "inputs": {
+        "sentence": "She has bought 100 apples."
+      },
+      "outputs": [{
+        "sentence": "She has bought one hundred apples."
+      }]
+    },
+    {
+      "class": "num2words",
+      "inputs": {
+        "sentence": "2 times 2 is 4."
+      },
+      "outputs": [{
+        "sentence": "two times two is four."
+      }]
+    },
+    {
+      "class": "num2words",
+      "inputs": {
+        "sentence": "Neuroplasticity is a continuous processing allowing short-term, medium-term, and long-term remodeling of the neuronosynaptic organization."
+      },
+      "outputs": [{
+        "sentence": "Neuroplasticity is a continuous processing allowing short-term, medium-term, and long-term remodeling of the neuronosynaptic organization."
+      }]
+    }
+  ]
+}
diff --git a/transformations/num2words/transformation.py b/transformations/num2words/transformation.py
new file mode 100644
index 000000000..9f7e8e892
--- /dev/null
+++ b/transformations/num2words/transformation.py
@@ -0,0 +1,66 @@
+import re
+import spacy
+from interfaces.SentenceOperation import SentenceOperation
+from tasks.TaskTypes import TaskType
+import inflect 
+
+
+class Numbers2Words:
+    nlp = None
+
+    def __init__(self):
+        self.nlp = spacy.load("en_core_web_sm")
+
+    @staticmethod
+    def int2words(n, p=inflect.engine()):
+        return ' '.join(p.number_to_words(n, wantlist=True, andword=' '))
+
+    def float2words(self, float_value):
+        float_value = str(round(float(float_value), 2))
+        integer, dot, decimal = float_value.partition('.')
+        return "{integer}{decimal}".format(
+            integer=self.int2words(int(integer)),
+            decimal=" and {}/100".format(decimal) if decimal and int(decimal) else '')
+
+    def __call__(self, input_text: str):
+        doc = self.nlp(input_text)
+
+        for entity in doc.ents:
+            new_value = None
+
+            if entity.label_ == "CARDINAL" and not re.search(
+                "[_]|[-]|[:]|[/]|[(]|[)]", entity.text
+            ):
+
+                cardinal_value = entity.text
+
+                cardinal_value = cardinal_value.replace(",", "")
+
+                if cardinal_value.isdigit() or '.' in cardinal_value:
+                    cardinal_value = self.float2words(cardinal_value)
+                input_text = input_text.replace(entity.text, str(cardinal_value))
+
+        return input_text
+
+
+class Num2Words(SentenceOperation):
+    tasks = [TaskType.TEXT_CLASSIFICATION, TaskType.TEXT_TO_TEXT_GENERATION]
+    languages = ["en"]
+
+    def __init__(self, verbose=False):
+        super().__init__(verbose=verbose)
+        self.transform = Numbers2Words()
+
+    def generate(self, sentence: str):
+        result = self.transform(sentence)
+        if self.verbose:
+            print(f"Perturbed Input from {self.name()} : {result}")
+        return [result]
+
+"""
+# Sample code to demonstrate usage. Can also assist in adding test cases.
+if __name__ == '__main__':
+    Num2Words(verbose=True).generate('she has bought hundred apples.')
+    Num2Words(verbose=True).generate('she has bought 100 apples.')
+    Num2Words(verbose=True).generate('she has bought 100.55 apples.')
+"""
\ No newline at end of file