Merge remote-tracking branch 'origin/master'

Machyne · Mar 15, 2015 · b858514 · b858514
2 parents fe06984 + cfe9bae
commit b858514
Show file tree

Hide file tree

Showing 33 changed files with 397 additions and 849 deletions.
diff --git a/README.md b/README.md
@@ -1,9 +1,98 @@
 # PAL
 ---------
 
+## Installation & Running
+--------------------------------
+### Requirements
+- Python 2.7 with `pip`
+    - Required Python packages in requirements.txt
+- PostgreSQL 9.3 required for directory service
+- Node.js > v0.10 for pizza service
+- Web server capable of running wsgi applications for deployment
+
+### Base Installation
+We recommend installing PAL in a python virtual environment by using the `virtualenv` package.
+Use pip to install virtualenv then create a new virtualenv in the pal directory
+
+`virtualenv env`
+
+and activate the virtual environment
+
+`source env/bin/activate`
+
+If not using the directory service and/or PostgreSQL is not installed, remove the line containing
+`psycopg2` from the `requirements.txt` file. Install the required packages
+
+`pip install -r requirements.txt`
+
+Next, install the required `nltk` libraries. Open a python shell and type
+
+~~~
+>>> import nltk
+>>> nltk.download()
+~~~
+
+and install `maxent_ne_chunker`, `maxent_treebank_pos_tagger`, `punkt`, `qc`, and `words`.
+
+Obtain API keys for TMDB, Wolfram, and Yelp and insert them into `config.py`.
+Obtain a Facebook app ID and insert it into `static/home.js`.
+
+PAL is now ready to run locally. Start it by running `python server.py`.
+PAL can be accessed by connecting to `localhost:5000` in a browser.
+
+### Hill Climb Instructions
+To ensure that queries are properly sorted into the proper services,
+hill climbing should be run. From the base directory, run
+
+`python -m pal.heuristics/hill_climb.hill_climb`
+
+After running hill climbing, updated heuristics values can be found in the
+`pal/heuristics/hill_climb/climbed_values` directory, and will be automatically
+referenced by the rest of PAL. Hill climbing should be re-run whenever a new
+service is added to PAL.
+
+### Pizza Service Installation
+The pizza service requires Node.js or io.js. Install one of these and `npm`.
+Navigate to the `api/dominos` directory and run
+
+`npm install`
+
+Next, install `forever` with
+
+`npm install -g forever`
+
+Start the pizza server with
+
+`forever start server.js`
+
+### Scraping the Directory
+In order for the directory service to function, the directory must be
+pre-scraped. Navigate to the `api/directory` directory. Insert the
+username and password for the database in the `stalkernet_scraper.py`
+file with the format `postgresql://username:password@database_url/table_name`
+
+Create the database schema by running
+
+`python models.py`
+
+Populate the buildings, majors, and departments tables by running
+
+`python stalkernet_scraper.py -b buildings.txt`
+`python stalkernet_scraper.py -m majors.txt`
+`python stalkernet_scraper.py -d depts.txt`
+
+Finally, scrape the directory with
+
+`python stalkernet_scraper.py -s`
+
+### Installing in Apache
+PAL can be run by Apache HTTP Server by using the `mod_wsgi` module.
+The Flask documentation has pretty detailed instructions on how to set
+this up. Please see `http://flask.pocoo.org/docs/0.10/deploying/mod_wsgi/`
+
 ## Services
 - Bon Appetit (actively scraped from [Bon Appetit website]
-    (http://carleton.cafebonappetit.com/cafe/))
+        (http://carleton.cafebonappetit.com/cafe/))
     - [x] Single dining hall, single day, any or all meals
 
 - Dictionary/Thesaurus (actively scraped from [dictionary]
@@ -39,10 +128,11 @@
         - [ ] order multiple different pizzas
 
 - Facebook (requests to [Facebook Graph API]
-    (https://developers.facebook.com/docs/graph-api))
+        (https://developers.facebook.com/docs/graph-api))
     - [x] Post to timeline on behalf of user
 
-- Movies (using the TMDB API)
+- Movies (using the [TMDB API]
+        (https://www.themoviedb.org/documentation/api))
     - [x] What movies was this person involved in (acting, directing, etc.)
     - [x] Was this person involved in this movie?
     - [x] How many movies was this person involved in?
@@ -51,7 +141,7 @@
     - [ ] Who played this character in this movie?
 
 - Translations (requests through [UltraLingua REST API]
-    (http://api.ultralingua.com/ulapi/rest)
+        (http://api.ultralingua.com/ulapi/rest)
     - [x] From English to {Spanish, French, German, Italian, Portuguese}
     - [x] From {Spanish, French, German, Italian, Portuguese} to English
     - [x] Between {Spanish, French, German, Italian, Portuguese}
@@ -65,12 +155,12 @@
     - [x] Geolocation
 
 - Wolfram|Alpha (requests using [Wolfram|Alpha's API]
-    (http://products.wolframalpha.com/api/))
+        (http://products.wolframalpha.com/api/))
     - [x] Run queries on natural language and get numerical output
     - [ ] Keep track of our limited number of queries (difficult due to concurrency issues)
 
 - Yelp (requests to [Yelp API]
-    (http://www.yelp.com/developers/documentation))
+        (http://www.yelp.com/developers/documentation))
     - [x] Businesses by search terms
     - [x] Ratings, URL, Phone Number
     - [x] Find by location

diff --git a/config_example.py b/config_example.py
@@ -0,0 +1,12 @@
+GOOGLE_GEOCODE_KEY = 'key'
+
+YELP_CONSUMER_KEY = 'key'
+YELP_CONSUMER_SECRET = 'key'
+YELP_TOKEN = 'key'
+YELP_TOKEN_SECRET = 'key'
+
+TMDB_KEY = 'key'
+
+WA_KEY = 'key'
+
+UL_KEY = 'key'
diff --git a/flashy.gif b/flashy.gif
diff --git a/pal.wsgi b/pal.wsgi
diff --git a/pal/heuristics/heuristic.py b/pal/heuristics/heuristic.py
@@ -25,23 +25,20 @@ def _get_score(self, word):
 
     # Returns a heuristic value for a list of keywords
     def run_heuristic(self, keywords):
-        return sum(self._get_score(word) for word in keywords)
+        kws = keywords + ['BIAS']
+        return sum(self._get_score(word) for word in kws)
 
     def read_input_file(self):
         # read input file into dictionary with keyword as key and
         # heuristic score as value
-        file_ = path.realpath(
-            path.join(
-                path.dirname(__file__),
-                'values',
-                self._name + '_values.txt'))
+        fname = self.climbed_file_name
         lines = []
         try:
-            with open(self.climb_file_name, 'rb') as input_file:
+            with open(fname, 'rb') as input_file:
                 lines = input_file.readlines()
-            file_ = self.climb_file_name
-        except Exception:
-            with open(file_, 'rb') as input_file:
+        except IOError:
+            fname = self.unclimbed_file_name
+            with open(fname, 'rb') as input_file:
                 lines = input_file.readlines()
         dummy_count = 0
         in_list = False
@@ -51,7 +48,7 @@ def read_input_file(self):
                 if in_list:
                     raise SyntaxError(
                         'File "{}", line {}, nested lists not supported'
-                        .format(file_, i + 1))
+                        .format(fname, i + 1))
                 in_list = True
                 dummy_count += 1
 
@@ -68,6 +65,21 @@ def read_input_file(self):
                 cur_line = map(str.strip, line.split(','))
                 self._variables[cur_line[0]] = int(cur_line[1])
 
+    def write_to_file(self, fname):
+        with open(fname, 'w') as file_:
+            in_dummy = False
+            for k, v in self._variables.iteritems():
+                if isinstance(v, int) and not in_dummy:
+                    file_.write('{}, {}\n'.format(k, v))
+                elif isinstance(v, str):
+                    if not in_dummy:
+                        file_.write('[\n')
+                        in_dummy = True
+                    file_.write('    {}\n'.format(k))
+                else:
+                    file_.write('], {}\n'.format(v))
+                    in_dummy = False
+
     def get_input_list_values(self):
         return filter(lambda x: isinstance(x, int),
                       self._variables.itervalues())
@@ -91,11 +103,21 @@ def get_input_list_keywords(self):
         return map(self._get_key_or_list, keys)
 
     @property
-    def climb_file_name(self):
+    def climbed_file_name(self):
+        return path.realpath(
+            path.join(
+                path.dirname(__file__),
+                'hill_climb',
+                'climbed_values',
+                '{}_climbed_values.txt'.format(self._name)))
+
+    @property
+    def unclimbed_file_name(self):
         return path.realpath(
             path.join(
                 path.dirname(__file__),
-                'climbed_{}_values.txt'.format(self._name)))
+                'values',
+                '{}_values.txt'.format(self._name)))
 
 if __name__ == '__main__':
     my_heur = Heuristic('movie')

diff --git a/pal/heuristics/hill_climb/climbed_values/.gitignore b/pal/heuristics/hill_climb/climbed_values/.gitignore
@@ -0,0 +1 @@
+*_climbed_values.txt