Merge pull request #53 from FriendlyUser/feature/extract-from-listing

Feature/extract from listing
FriendlyUser · Aug 8, 2021 · 5f703ff · 5f703ff
2 parents 183c9f6 + 0c51f0d
commit 5f703ff
Show file tree

Hide file tree

Showing 18 changed files with 853 additions and 472 deletions.
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -57,5 +57,5 @@ RUN apt-get update && apt-get -y dist-upgrade \
 ENV DEBIAN_FRONTEND=dialog
 
 USER vscode
-RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python - --version=0.12.17
+RUN curl -sSL curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python -
 ENV PATH "/home/vscode/.poetry/bin:$PATH"
diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml
@@ -0,0 +1,24 @@
+name: Update CRON
+# Make update to public file to keep workflows running
+on:
+  schedule:
+    # * is a special character in YAML so you have to quote this string
+    - cron:  '0 0 2 * *'
+
+
+jobs:
+  update_cron:
+    name: Generate Report
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+      - name: Update Cron
+        run: |
+          echo "c\n" >> CRON.md
+      
+      - name: Update resources
+        uses: test-room-7/action-update-file@v1
+        with:
+            file-path: CRON.md
+            commit-msg: Update CRON.md
+            github-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
@@ -6,15 +6,16 @@ on:
   schedule:
     # * is a special character in YAML so you have to quote this string
     - cron:  '30 13 * * 1-5'
+
 # Add cron job to ensure external webpages + apis have no changed
 jobs:
   run_tests:
     strategy:
         fail-fast: false
         matrix:
-          python-version: [3.6.7, 3.7, 3.8]
-          poetry-version: [1.1.2]
-          os: [ubuntu-20.04, windows-latest]
+          python-version: [3.9]
+          poetry-version: [1.1.7]
+          os: [ubuntu-20.04]
     runs-on: ${{ matrix.os }}
     name: coverage
     env:
@@ -34,24 +35,11 @@ jobs:
       - name: Install Dependencies
         run: poetry install
 
-      - name: Dataclasses for python 3.6 for linux
-        if: startsWith(matrix.os,'ubuntu')
-        run: | 
-          ver=$(python -V 2>&1 | sed 's/.* \([0-9]\).\([0-9]\).*/\1\2/')
-          echo $ver
-          if [ "$ver" -eq "36" ]; then
-            poetry run python -m pip install dataclasses
-          fi
-      - name: Dataclasses for python 3.6 for windows
-        if: startsWith(matrix.os,'windows')
-        run: | 
-          poetry run python -m pip install dataclasses
-
       - name: Coverage Report
         run: poetry run python -m pytest --cov=./ --cov-report=xml
 
       - name: Upload coverage to Codecov
-        uses: codecov/codecov-action@v1.0.15
+        uses: codecov/codecov-action@v2
         with:
           file: ./coverage.xml
           flags: ga_ci

diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -21,6 +21,6 @@ formats:
 
 # Optionally set the version of Python and requirements required to build your docs
 python:
-  version: 3.7
+  version: 3.9
   install:
     - requirements: requirements.txt
diff --git a/ReadMe.md b/ReadMe.md
@@ -6,6 +6,8 @@ Function to extract exchange data from the cse and tsx websites and various othe
 
 The entire 0.2.x version of tsx functions are now depricated.
 
+Tsx has switched to graphql which I like.
+
 
 ### How to run tests
 

diff --git a/cad_tickers/exchanges/cse.py b/cad_tickers/exchanges/cse.py
@@ -6,6 +6,7 @@
 from bs4 import BeautifulSoup
 from cad_tickers.util import (
     parse_description_tags,
+    extract_recent_news_links,
     make_cse_path,
     cse_ticker_to_webmoney,
 )
@@ -130,7 +131,7 @@ def get_cse_tickers_df() -> pd.DataFrame:
         urls        url to listing on cse website
         ==========  ====================================================================
     """
-    URL = f"https://www.thecse.com/export-listings/xlsx?f=" + r"{}"
+    URL = "https://www.thecse.com/export-listings/xlsx?f=" + r"{}"
     r = requests.get(URL)
     responseHeaders = r.headers
     if "text/html" in responseHeaders["Content-Type"]:
@@ -220,6 +221,22 @@ def add_descriptions_to_df(df: pd.DataFrame, max_workers: int = 16) -> pd.DataFr
     df["description"] = descriptions
     return df
 
+def get_recent_docs_from_url(url: str) -> list:
+    """
+    Parameters:
+      url - link to ticker can be empty string
+    Returns:
+      list - list of document urls with title
+    """
+    if url == "":
+        return ""
+    r = requests.get(url)
+    html_content = r.text
+    soup = BeautifulSoup(html_content, "lxml")
+    news_model = "group-cse-filings-content > view-listing-views item-link > a"
+    description_tags = soup.select(news_model)
+    return extract_recent_news_links(description_tags)
+
 
 if __name__ == "__main__":
     from datetime import datetime

diff --git a/cad_tickers/exchanges/tsx/get_ticker_data.py b/cad_tickers/exchanges/tsx/get_ticker_data.py
@@ -4,6 +4,9 @@
 from typing import Union
 
 
+# TODO fix this later
+# unlikely need this data, I think yahoo finance is good enough
+# more consistent api
 def get_ticker_data(symbol=str) -> Union[dict, None]:
     """
     Parameters:

diff --git a/cad_tickers/exchanges/tsx/gql_data.py b/cad_tickers/exchanges/tsx/gql_data.py
@@ -131,6 +131,46 @@ class GQL:
         "query": quote_by_symbol_query,
     }
 
+    get_company_news_events_query = """query getNewsAndEvents(
+      $symbol: String!,
+      $page: Int!,
+      $limit: Int!,
+      $locale: String!
+    ) { 
+      news: getNewsForSymbol(
+        symbol: $symbol,
+        page: $page,
+        limit: $limit,
+        locale: $locale
+      ) {
+        headline
+        datetime
+        source
+        newsid
+        summary
+        __typename
+      }
+      events: getUpComingEventsForSymbol(symbol: $symbol, locale: $locale) {
+        title
+        date
+        status
+        type
+        __typename
+        }
+      }
+    """
+
+    get_company_news_events_payload = {
+        "operationName": "getNewsAndEvents",
+        "variables": {
+            "symbol": "ART",
+            "page": 1,
+            "limit": 100,
+            "locale": "en"
+        },
+        "query": get_company_news_events_query,
+    }
+
     get_company_filings_query = """query getCompanyFilings(
       $symbol: String!
       $fromDate: String
@@ -160,6 +200,7 @@ class GQL:
             "symbol": "ART",
             "fromDate": "2020-09-01",
             "toDate": "2020-09-30",
+            "limit": 100,
         },
         "query": get_company_filings_query,
     }
diff --git a/cad_tickers/sedar/tsx.py b/cad_tickers/sedar/tsx.py
@@ -1,4 +1,5 @@
 import requests
+import json
 from datetime import datetime
 from cad_tickers.exchanges.tsx.gql_data import GQL
 from typing import Union
@@ -27,24 +28,73 @@ def get_ticker_filings(
     url = "https://app-money.tmx.com/graphql"
     r = requests.post(
         url,
-        json=payload,
+        data=json.dumps(payload),
         headers={
             "authority": "app-money.tmx.com",
             "referer": f"https://money.tmx.com/en/quote/{symbol.upper()}",
             "locale": "en",
+            "Content-Type": "application/json"
         },
     )
-    allData = r.json()
     try:
-        data = allData["data"]
-        return data
+        if r.status_code == 403:
+            print(r.text)
+            return {}
+        else:
+            allData = r.json()
+            print(allData)
+            data = allData["data"]
+            return data
     except KeyError as _e:
         print(_e, symbol)
         pass
 
+# TODO rename this later
+def get_news_and_events(
+    symbol: str,
+    page: int = 1,
+    limit: int = 100,
+    locale: str = "en",
+) -> Union[dict, None]:
+    """
+    Parameters:
+        symbol - ticker symbol from tsx, no prefix
+        page - start date to grab documents
+        limit - max number of documents to retrieve
+        locale - language
+    Returns:
+        dict - :ref:`Quote By Symbol <quote_by_symbol_query>`
+    """
+    payload = GQL.get_company_news_events_payload
+    payload["variables"]["symbol"] = symbol
+    payload["variables"]["page"] = page
+    payload["variables"]["limit"] = limit
+    payload["variables"]["locale"] = locale
+    url = "https://app-money.tmx.com/graphql"
+    r = requests.post(
+        url,
+        data=json.dumps(payload),
+        headers={
+            "authority": "app-money.tmx.com",
+            "referer": f"https://money.tmx.com/en/quote/{symbol.upper()}",
+            "locale": "en",
+            "Content-Type": "application/json"
+        },
+    )
+    try:
+        # check headings
+        if r.status_code == 403:
+            print(r.text)
+            return {}
+        else:
+            allData = r.json()
+            data = allData["data"]
+            return data
+    except KeyError as _e:
+        return {}
 
 if __name__ == "__main__":
-    art = get_ticker_filings(
-        "ART", start_date="2015-11-11", end_date="2020-11-11", limit=108
+    art = get_news_and_events(
+        "PKK.CN", 1, 108
     )
     print(art)
diff --git a/cad_tickers/util/__init__.py b/cad_tickers/util/__init__.py
@@ -7,6 +7,7 @@
 from cad_tickers.util.utils import (
     transform_name_to_slug,
     parse_description_tags,
+    extract_recent_news_links,
     make_cse_path,
     read_df_from_file,
     tickers_to_ytickers,

diff --git a/cad_tickers/util/utils.py b/cad_tickers/util/utils.py
@@ -33,6 +33,17 @@ def parse_description_tags(description_tags: List[bs4.element.Tag]) -> str:
         return ""
 
 
+def extract_recent_news_links(modal_tags: List[bs4.element.Tag]) -> List[str]:
+  """extracts new release modal from cse
+  """
+  urls = []
+  for tag in modal_tags:
+    attrs = tag.attrs
+    data = attrs["href"]
+    urls.append(data)
+  return urls
+
+
 def make_cse_path(raw_ticker: str, raw_industry: str) -> str:
     """makes slug for ticker for the cse
Original file line number	Diff line number	Diff line change
Expand Up		@@ -6,6 +6,8 @@ Function to extract exchange data from the cse and tsx websites and various othe

		The entire 0.2.x version of tsx functions are now depricated.

		Tsx has switched to graphql which I like.


		### How to run tests

Expand Down