Skip to content

Commit

Permalink
add kwargs to inference api
Browse files Browse the repository at this point in the history
  • Loading branch information
theblackcat102 committed Oct 30, 2022
1 parent 97b5d2b commit b1360a8
Showing 1 changed file with 14 additions and 5 deletions.
19 changes: 14 additions & 5 deletions extractnet/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,14 @@ def extract_one_meta(document):
def __call__(self, html, **kwargs):
return self.extract(html, **kwargs)

def extract(self, html, encoding=None, as_blocks=False, extract_target=None, debug=True, metadata_mining=True):
def extract(self, html,
encoding=None,
as_blocks=False,
extract_target=None,
debug=False,
metadata_mining=True,
**kwargs):

if isinstance(html, (str, bytes, unicode_, np.unicode_)):
documents_meta_data = {}
if metadata_mining:
Expand All @@ -78,11 +85,11 @@ def extract(self, html, encoding=None, as_blocks=False, extract_target=None, deb

output = self.content_extractor.predict(html)
if isinstance(output, dict):
return self.postprocess(html, output, documents_meta_data)
return self.postprocess(html, output, documents_meta_data, **kwargs)

return [ self.postprocess(h, o, meta) for h, o, meta in zip(html, output, documents_meta_data)]
return [ self.postprocess(h, o, meta, **kwargs) for h, o, meta in zip(html, output, documents_meta_data)]

def postprocess(self, html, output, meta):
def postprocess(self, html, output, meta, **kwargs):
results = {}
if 'author' in output and len(output['author']) > 0:
author_text, confidence = output['author'][0]
Expand Down Expand Up @@ -119,7 +126,9 @@ def postprocess(self, html, output, meta):
results = priority_merge(post_ml_results_, results)

sanity_check_params = {}
if 'url' in results:
if 'url' in kwargs:
sanity_check_params['url'] = kwargs['url']
elif 'url' in results:
sanity_check_params['url'] = results['url']

return attribute_sanity_check(results, **sanity_check_params)

0 comments on commit b1360a8

Please sign in to comment.