Skip to content

Commit

Permalink
minor tunning para arreglar claves mal guardadas
Browse files Browse the repository at this point in the history
  • Loading branch information
daniel_shazura committed Nov 22, 2021
1 parent b35f839 commit 516b5ea
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 10 deletions.
19 changes: 19 additions & 0 deletions data_gathering/cities/get_nifs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from pymongo import MongoClient

mongo = MongoClient()["dme"]
contratos = mongo["contracts"]

# recupera los que tienen adjudicaciones
adjudicados = contratos.find({"adjudicacion": { "$exists" : True }})

print(f"Hay {adjudicados.count()} contratos adjudicados")

nifs = []
for contrato in adjudicados:
try:
nifs.append(contrato['adjudicacion']['nif adjudicatario'])
except Exception as e:
import pdb; pdb.set_trace()
raise e

print(len(nifs))
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def contract_scrapping(self, response):
adjudicacion = {}
for col, val in zip(columnas, valores):
col_nombre = col.xpath("strong").extract_first().strip()
col_nombre = col_nombre.lower().replace("º", "-")
col_nombre = col_nombre.lower().replace("º", "-").replace("<strong>", "").replace("</strong>")
col_val = val.xpath("text()").extract_first()
if col_val:
col_val = col_val.strip()
Expand Down
21 changes: 12 additions & 9 deletions website/app/models/mongodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,18 @@ def contract_file_to_json(file_path, file_name):
items = json_file.items()
for k, v in list(items):
if "esupuesto" in k and (' euros' in v or '€' in v or 'euros ' in v):
try:
# if "porte total" in v or "estimado" in v:
v = v.split("euros")[0]
numero = "".join([letra for i, letra in enumerate(v) if (letra == "," and v[i-1].isdigit()) or letra.isdigit()])
numero = numero.replace(",", ".")
json_file['presupuesto'] = float(numero)
except Exception as e:
import pdb; pdb.set_trace()
raise e
# if "porte total" in v or "estimado" in v:
v = v.split("euros")[0]
numero = "".join([letra for i, letra in enumerate(v) if (letra == "," and v[i-1].isdigit()) or letra.isdigit()])
numero = numero.replace(",", ".")
json_file['presupuesto'] = float(numero)
if k == 'adjudicacion':
if type(v) == dict and len(v) > 0:
vv = {kk.replace("<strong>","").replace("</strong>", ""): val for kk, val in v.items()}
json_file[k] = vv
else:
# Si no es un diccionario es que está mal (o vacío), y lo quitamos
json_file.pop(k)

json_file['categoria'] = file_path.split('/')[-1]
json_file.update({MONGODB_ID: file_name.split(".")[0]})
Expand Down

0 comments on commit 516b5ea

Please sign in to comment.