-
Notifications
You must be signed in to change notification settings - Fork 0
/
pipeline_gurufocus.py
81 lines (66 loc) · 2.79 KB
/
pipeline_gurufocus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import numpy as np
import pandas as pd
import requests
import os
from datetime import datetime
from gurufocus import get_all_ratio
from gurufocus import get_GF_Value
# Get current date and time
# current_datetime = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
current_datetime = datetime.now().strftime("%Y-%m-%d")
# Create the gurufocus folder if it doesn't exist
if not os.path.exists('gurufocus'):
os.makedirs('gurufocus')
#get ticker list by filtering only above 1 billion dollar company
DFUSA = pd.read_csv('america_2023-09-16.csv')
tickerlst = list(DFUSA.query('`Market Capitalization`>1e9').Ticker)
print(f"Number of Tickers: {len(tickerlst)}")
# Main loop to retrieve profitability ranks for each ticker
dfs=[]
counter=0
for ticker in tickerlst:
counter+=1
print(f'{counter} out of {len(tickerlst)} {ticker}')
try:
# Get profitability rank for the current ticker
dftemp = get_all_ratio(ticker)
# Add the Ticker column for reference
dftemp['Ticker'] = ticker
dfs.append(dftemp)
except:
print(f"could not retrieve data for {ticker}")
pass
# Concatenate the DataFrames in the list to create a single DataFrame
DFtotal = pd.concat(dfs, ignore_index=True)
# DFtotal.to_csv(f'gurufocus/DFtotal_{current_datetime}.csv',index=False)
# Function to check if a string can be converted to a number to remove it
def is_convertible_to_number(value):
try:
float(value)
return True
except ValueError:
return False
# Filter out rows where the 'Name' column cannot be converted to a number; first table made this error
df_filtered = DFtotal[~DFtotal['Name'].apply(is_convertible_to_number)]
# df_filtered.Name.unique()
df_filtered = df_filtered.drop_duplicates(subset=['Ticker', 'Name'])
# Using pivot method to rearrange the data
DFfinal = df_filtered.pivot(index='Ticker',columns='Name', values='Current')
DFfinal = DFfinal.reset_index()
# Save the final DataFrame to a CSV file
# DFfinal.to_csv(f'gurufocus/GuruFocus_{current_datetime}.csv')
DFfinal_merged= pd.merge(DFfinal,DFUSA[['Ticker','Market Capitalization','Industry','Sector']],on='Ticker')
#Fixing non-value items
# Define a function to replace non-float strings with NaN
def replace_non_float_with_nan(value):
if isinstance(value, str) and not value.replace(".", "", 1).isdigit():
return np.nan
return value
# Apply the function to the entire DataFrame
# df = DFfinal_merged.applymap(replace_non_float_with_nan)
# Apply the function to the specific column "Column1"
df = DFfinal_merged.copy()
for column in DFfinal_merged:
if column not in ['Ticker', 'Sector' , 'Industry']:
df[column] = DFfinal_merged[column].apply(replace_non_float_with_nan)
df.to_csv(f'gurufocus/GuruFocus_merged_{current_datetime}.csv',index=False)