Skip to content

Commit

Permalink
Merge pull request #53 from FriendlyUser/feature/extract-from-listing
Browse files Browse the repository at this point in the history
Feature/extract from listing
  • Loading branch information
FriendlyUser authored Aug 8, 2021
2 parents 183c9f6 + 0c51f0d commit 5f703ff
Show file tree
Hide file tree
Showing 18 changed files with 853 additions and 472 deletions.
2 changes: 1 addition & 1 deletion .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -57,5 +57,5 @@ RUN apt-get update && apt-get -y dist-upgrade \
ENV DEBIAN_FRONTEND=dialog

USER vscode
RUN curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python - --version=0.12.17
RUN curl -sSL curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/get-poetry.py | python -
ENV PATH "/home/vscode/.poetry/bin:$PATH"
24 changes: 24 additions & 0 deletions .github/workflows/cron.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Update CRON
# Make update to public file to keep workflows running
on:
schedule:
# * is a special character in YAML so you have to quote this string
- cron: '0 0 2 * *'


jobs:
update_cron:
name: Generate Report
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Update Cron
run: |
echo "c\n" >> CRON.md
- name: Update resources
uses: test-room-7/action-update-file@v1
with:
file-path: CRON.md
commit-msg: Update CRON.md
github-token: ${{ secrets.GITHUB_TOKEN }}
22 changes: 5 additions & 17 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@ on:
schedule:
# * is a special character in YAML so you have to quote this string
- cron: '30 13 * * 1-5'

# Add cron job to ensure external webpages + apis have no changed
jobs:
run_tests:
strategy:
fail-fast: false
matrix:
python-version: [3.6.7, 3.7, 3.8]
poetry-version: [1.1.2]
os: [ubuntu-20.04, windows-latest]
python-version: [3.9]
poetry-version: [1.1.7]
os: [ubuntu-20.04]
runs-on: ${{ matrix.os }}
name: coverage
env:
Expand All @@ -34,24 +35,11 @@ jobs:
- name: Install Dependencies
run: poetry install

- name: Dataclasses for python 3.6 for linux
if: startsWith(matrix.os,'ubuntu')
run: |
ver=$(python -V 2>&1 | sed 's/.* \([0-9]\).\([0-9]\).*/\1\2/')
echo $ver
if [ "$ver" -eq "36" ]; then
poetry run python -m pip install dataclasses
fi
- name: Dataclasses for python 3.6 for windows
if: startsWith(matrix.os,'windows')
run: |
poetry run python -m pip install dataclasses
- name: Coverage Report
run: poetry run python -m pytest --cov=./ --cov-report=xml

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1.0.15
uses: codecov/codecov-action@v2
with:
file: ./coverage.xml
flags: ga_ci
Expand Down
2 changes: 1 addition & 1 deletion .readthedocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,6 @@ formats:

# Optionally set the version of Python and requirements required to build your docs
python:
version: 3.7
version: 3.9
install:
- requirements: requirements.txt
2 changes: 2 additions & 0 deletions ReadMe.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ Function to extract exchange data from the cse and tsx websites and various othe

The entire 0.2.x version of tsx functions are now depricated.

Tsx has switched to graphql which I like.


### How to run tests

Expand Down
19 changes: 18 additions & 1 deletion cad_tickers/exchanges/cse.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from bs4 import BeautifulSoup
from cad_tickers.util import (
parse_description_tags,
extract_recent_news_links,
make_cse_path,
cse_ticker_to_webmoney,
)
Expand Down Expand Up @@ -130,7 +131,7 @@ def get_cse_tickers_df() -> pd.DataFrame:
urls url to listing on cse website
========== ====================================================================
"""
URL = f"https://www.thecse.com/export-listings/xlsx?f=" + r"{}"
URL = "https://www.thecse.com/export-listings/xlsx?f=" + r"{}"
r = requests.get(URL)
responseHeaders = r.headers
if "text/html" in responseHeaders["Content-Type"]:
Expand Down Expand Up @@ -220,6 +221,22 @@ def add_descriptions_to_df(df: pd.DataFrame, max_workers: int = 16) -> pd.DataFr
df["description"] = descriptions
return df

def get_recent_docs_from_url(url: str) -> list:
"""
Parameters:
url - link to ticker can be empty string
Returns:
list - list of document urls with title
"""
if url == "":
return ""
r = requests.get(url)
html_content = r.text
soup = BeautifulSoup(html_content, "lxml")
news_model = "group-cse-filings-content > view-listing-views item-link > a"
description_tags = soup.select(news_model)
return extract_recent_news_links(description_tags)


if __name__ == "__main__":
from datetime import datetime
Expand Down
3 changes: 3 additions & 0 deletions cad_tickers/exchanges/tsx/get_ticker_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
from typing import Union


# TODO fix this later
# unlikely need this data, I think yahoo finance is good enough
# more consistent api
def get_ticker_data(symbol=str) -> Union[dict, None]:
"""
Parameters:
Expand Down
41 changes: 41 additions & 0 deletions cad_tickers/exchanges/tsx/gql_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,46 @@ class GQL:
"query": quote_by_symbol_query,
}

get_company_news_events_query = """query getNewsAndEvents(
$symbol: String!,
$page: Int!,
$limit: Int!,
$locale: String!
) {
news: getNewsForSymbol(
symbol: $symbol,
page: $page,
limit: $limit,
locale: $locale
) {
headline
datetime
source
newsid
summary
__typename
}
events: getUpComingEventsForSymbol(symbol: $symbol, locale: $locale) {
title
date
status
type
__typename
}
}
"""

get_company_news_events_payload = {
"operationName": "getNewsAndEvents",
"variables": {
"symbol": "ART",
"page": 1,
"limit": 100,
"locale": "en"
},
"query": get_company_news_events_query,
}

get_company_filings_query = """query getCompanyFilings(
$symbol: String!
$fromDate: String
Expand Down Expand Up @@ -160,6 +200,7 @@ class GQL:
"symbol": "ART",
"fromDate": "2020-09-01",
"toDate": "2020-09-30",
"limit": 100,
},
"query": get_company_filings_query,
}
62 changes: 56 additions & 6 deletions cad_tickers/sedar/tsx.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import requests
import json
from datetime import datetime
from cad_tickers.exchanges.tsx.gql_data import GQL
from typing import Union
Expand Down Expand Up @@ -27,24 +28,73 @@ def get_ticker_filings(
url = "https://app-money.tmx.com/graphql"
r = requests.post(
url,
json=payload,
data=json.dumps(payload),
headers={
"authority": "app-money.tmx.com",
"referer": f"https://money.tmx.com/en/quote/{symbol.upper()}",
"locale": "en",
"Content-Type": "application/json"
},
)
allData = r.json()
try:
data = allData["data"]
return data
if r.status_code == 403:
print(r.text)
return {}
else:
allData = r.json()
print(allData)
data = allData["data"]
return data
except KeyError as _e:
print(_e, symbol)
pass

# TODO rename this later
def get_news_and_events(
symbol: str,
page: int = 1,
limit: int = 100,
locale: str = "en",
) -> Union[dict, None]:
"""
Parameters:
symbol - ticker symbol from tsx, no prefix
page - start date to grab documents
limit - max number of documents to retrieve
locale - language
Returns:
dict - :ref:`Quote By Symbol <quote_by_symbol_query>`
"""
payload = GQL.get_company_news_events_payload
payload["variables"]["symbol"] = symbol
payload["variables"]["page"] = page
payload["variables"]["limit"] = limit
payload["variables"]["locale"] = locale
url = "https://app-money.tmx.com/graphql"
r = requests.post(
url,
data=json.dumps(payload),
headers={
"authority": "app-money.tmx.com",
"referer": f"https://money.tmx.com/en/quote/{symbol.upper()}",
"locale": "en",
"Content-Type": "application/json"
},
)
try:
# check headings
if r.status_code == 403:
print(r.text)
return {}
else:
allData = r.json()
data = allData["data"]
return data
except KeyError as _e:
return {}

if __name__ == "__main__":
art = get_ticker_filings(
"ART", start_date="2015-11-11", end_date="2020-11-11", limit=108
art = get_news_and_events(
"PKK.CN", 1, 108
)
print(art)
1 change: 1 addition & 0 deletions cad_tickers/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from cad_tickers.util.utils import (
transform_name_to_slug,
parse_description_tags,
extract_recent_news_links,
make_cse_path,
read_df_from_file,
tickers_to_ytickers,
Expand Down
11 changes: 11 additions & 0 deletions cad_tickers/util/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,17 @@ def parse_description_tags(description_tags: List[bs4.element.Tag]) -> str:
return ""


def extract_recent_news_links(modal_tags: List[bs4.element.Tag]) -> List[str]:
"""extracts new release modal from cse
"""
urls = []
for tag in modal_tags:
attrs = tag.attrs
data = attrs["href"]
urls.append(data)
return urls


def make_cse_path(raw_ticker: str, raw_industry: str) -> str:
"""makes slug for ticker for the cse
Expand Down
Loading

0 comments on commit 5f703ff

Please sign in to comment.