-
Notifications
You must be signed in to change notification settings - Fork 0
/
companies.py
82 lines (68 loc) · 2.29 KB
/
companies.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import requests
import pandas as pd
import time
from dotenv import load_dotenv
import os
# Load environment variables from .env file
load_dotenv()
# Configuration
API_URL = "https://api.dotfile.com/v1/companies"
API_KEY = os.getenv("API_KEY")
DATE_FILTER = "2024-12-10T00:00:00Z" # Replace with last sync date
OUTPUT_CSV = "outputs/companies.csv"
RATE_LIMIT = 150 # 150 requests per minute
def fetch_companies():
companies = []
page = 1
limit = 100 # Maximum limit per page
while True:
params = {
"last_activity_at.gte": DATE_FILTER,
"page": page,
"limit": limit,
}
headers = {
"X-DOTFILE-API-KEY": API_KEY,
"Content-Type": "application/json"
}
response = requests.get(API_URL, headers=headers, params=params)
if response.status_code == 200:
data = response.json()
companies.extend(data['data']) # Extract the list of companies from the 'data' field
pagination = data.get('pagination', {})
if len(data['data']) < limit or pagination.get('count', 0) <= len(companies):
break
page += 1
time.sleep(60 / RATE_LIMIT) # Rate limiting
else:
print(f"Error: {response.status_code}, {response.text}")
break
return companies
def fetch_case_details(case_id):
url = f"{API_URL}/{case_id}"
headers = {
"X-DOTFILE-API-KEY": API_KEY,
"Content-Type": "application/json"
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.json()
else:
print(f"Error fetching details for case {case_id}: {response.status_code}, {response.text}")
return None
def save_to_csv(companies):
df = pd.json_normalize(companies) # Flatten nested JSON data
df.to_csv(OUTPUT_CSV, index=False)
print(f"Data saved to {OUTPUT_CSV}")
def main():
companies = fetch_companies()
detailed_companies = []
for case in companies:
case_id = case['id']
details = fetch_case_details(case_id)
if details:
detailed_companies.append(details)
time.sleep(60 / RATE_LIMIT) # Rate limiting
save_to_csv(detailed_companies)
if __name__ == "__main__":
main()