You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
We are trying to use the Interactive Metamap API to get tags for a set of patents. We have a total of about 400k patents. While running the code, the api breaks down randomly. I would like to know if you have any built-in restrictions we have to follow while using the code.
I had also tried running the specific input texts at which the code was breaking to see if there were some character issues throwing the error, but the code works fine for the texts.
PFB the code used:
def flow_from_df(dataframe: pd.DataFrame, chunk_size: int = 400):
for start_row in range(0, dataframe.shape[0], chunk_size):
end_row = min(start_row + chunk_size, dataframe.shape[0])
yield dataframe.iloc[start_row:end_row, :]
get_chunk = flow_from_df(df)
list_mesh = []
for index, indi_chunks in enumerate(chunk_list):
print('Working on Chunk number: {}'.format(index))
print('Date and Time of start of the chunk: {}'.format(datetime.datetime.now()))
for idx, rows in indi_chunks.iterrows():
input_id = df.iloc[idx]['patent_id']
input_text = df.iloc[idx]['merged_claim_text']
inst.init_mti_interactive(input_text, args='-opt1L_DCMS')
response = inst.submit()
print('response status: {}'.format(response.status_code))
resp_dec = response.content.decode()
list_mesh_3.append([input_id, input_text, resp_dec])
print('Currently Processed: {}'.format(len(list_mesh_3)))
#print('Date and Time of completion of the above patent: {}'.format(datetime.datetime.now()))
print('Date and Time of completion of the chunk: {}'.format(datetime.datetime.now()))
sleep(60*60)
ValueError: invalid literal for int() with base 16: b''
During handling of the above exception, another exception occurred:
InvalidChunkLength Traceback (most recent call last)
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\urllib3\response.py:438, in HTTPResponse._error_catcher(self)
437 try:
--> 438 yield
440 except SocketTimeout:
441 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
442 # there is yet no clean way to get at it from this context.
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\urllib3\response.py:764, in HTTPResponse.read_chunked(self, amt, decode_content)
763 while True:
--> 764 self._update_chunk_length()
765 if self.chunk_left == 0:
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\requests\models.py:760, in Response.iter_content..generate()
759 try:
--> 760 for chunk in self.raw.stream(chunk_size, decode_content=True):
761 yield chunk
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\urllib3\response.py:572, in HTTPResponse.stream(self, amt, decode_content)
571 if self.chunked and self.supports_chunked_reads():
--> 572 for line in self.read_chunked(amt, decode_content=decode_content):
573 yield line
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\urllib3\response.py:793, in HTTPResponse.read_chunked(self, amt, decode_content)
792 if self._original_response:
--> 793 self._original_response.close()
File c:\users\somas326055\appdata\local\programs\python\python38\lib\contextlib.py:131, in _GeneratorContextManager.exit(self, type, value, traceback)
130 try:
--> 131 self.gen.throw(type, value, traceback)
132 except StopIteration as exc:
133 # Suppress StopIteration unless it's the same exception that
134 # was passed to throw(). This prevents a StopIteration
135 # raised inside the "with" statement from being suppressed.
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\urllib3\response.py:455, in HTTPResponse._error_catcher(self)
453 except (HTTPException, SocketError) as e:
454 # This includes IncompleteRead.
--> 455 raise ProtocolError("Connection broken: %r" % e, e)
457 # If no exception is thrown, we should avoid cleaning up
458 # unnecessarily.
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\requests\sessions.py:687, in Session.send(self, request, **kwargs)
684 pass
686 if not stream:
--> 687 r.content
689 return r
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\requests\models.py:838, in Response.content(self)
836 self._content = None
837 else:
--> 838 self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b''
840 self._content_consumed = True
841 # don't need to release the connection; that's been handled by urllib3
842 # since we exhausted the data.
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\requests\models.py:763, in Response.iter_content..generate()
761 yield chunk
762 except ProtocolError as e:
--> 763 raise ChunkedEncodingError(e)
764 except DecodeError as e:
765 raise ContentDecodingError(e)
Hi,
We are trying to use the Interactive Metamap API to get tags for a set of patents. We have a total of about 400k patents. While running the code, the api breaks down randomly. I would like to know if you have any built-in restrictions we have to follow while using the code.
I had also tried running the specific input texts at which the code was breaking to see if there were some character issues throwing the error, but the code works fine for the texts.
PFB the code used:
def flow_from_df(dataframe: pd.DataFrame, chunk_size: int = 400):
for start_row in range(0, dataframe.shape[0], chunk_size):
end_row = min(start_row + chunk_size, dataframe.shape[0])
yield dataframe.iloc[start_row:end_row, :]
get_chunk = flow_from_df(df)
list_mesh = []
for index, indi_chunks in enumerate(chunk_list):
print('Working on Chunk number: {}'.format(index))
print('Date and Time of start of the chunk: {}'.format(datetime.datetime.now()))
for idx, rows in indi_chunks.iterrows():
input_id = df.iloc[idx]['patent_id']
input_text = df.iloc[idx]['merged_claim_text']
inst.init_mti_interactive(input_text, args='-opt1L_DCMS')
response = inst.submit()
print('response status: {}'.format(response.status_code))
resp_dec = response.content.decode()
list_mesh_3.append([input_id, input_text, resp_dec])
print('Currently Processed: {}'.format(len(list_mesh_3)))
#print('Date and Time of completion of the above patent: {}'.format(datetime.datetime.now()))
print('Date and Time of completion of the chunk: {}'.format(datetime.datetime.now()))
sleep(60*60)
Error received:
ValueError Traceback (most recent call last)
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\urllib3\response.py:697, in HTTPResponse._update_chunk_length(self)
696 try:
--> 697 self.chunk_left = int(line, 16)
698 except ValueError:
699 # Invalid chunked protocol response, abort.
ValueError: invalid literal for int() with base 16: b''
During handling of the above exception, another exception occurred:
InvalidChunkLength Traceback (most recent call last)
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\urllib3\response.py:438, in HTTPResponse._error_catcher(self)
437 try:
--> 438 yield
440 except SocketTimeout:
441 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
442 # there is yet no clean way to get at it from this context.
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\urllib3\response.py:764, in HTTPResponse.read_chunked(self, amt, decode_content)
763 while True:
--> 764 self._update_chunk_length()
765 if self.chunk_left == 0:
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\urllib3\response.py:701, in HTTPResponse._update_chunk_length(self)
700 self.close()
--> 701 raise InvalidChunkLength(self, line)
InvalidChunkLength: InvalidChunkLength(got length b'', 0 bytes read)
During handling of the above exception, another exception occurred:
ProtocolError Traceback (most recent call last)
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\requests\models.py:760, in Response.iter_content..generate()
759 try:
--> 760 for chunk in self.raw.stream(chunk_size, decode_content=True):
761 yield chunk
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\urllib3\response.py:572, in HTTPResponse.stream(self, amt, decode_content)
571 if self.chunked and self.supports_chunked_reads():
--> 572 for line in self.read_chunked(amt, decode_content=decode_content):
573 yield line
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\urllib3\response.py:793, in HTTPResponse.read_chunked(self, amt, decode_content)
792 if self._original_response:
--> 793 self._original_response.close()
File c:\users\somas326055\appdata\local\programs\python\python38\lib\contextlib.py:131, in _GeneratorContextManager.exit(self, type, value, traceback)
130 try:
--> 131 self.gen.throw(type, value, traceback)
132 except StopIteration as exc:
133 # Suppress StopIteration unless it's the same exception that
134 # was passed to throw(). This prevents a StopIteration
135 # raised inside the "with" statement from being suppressed.
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\urllib3\response.py:455, in HTTPResponse._error_catcher(self)
453 except (HTTPException, SocketError) as e:
454 # This includes IncompleteRead.
--> 455 raise ProtocolError("Connection broken: %r" % e, e)
457 # If no exception is thrown, we should avoid cleaning up
458 # unnecessarily.
ProtocolError: ("Connection broken: InvalidChunkLength(got length b'', 0 bytes read)", InvalidChunkLength(got length b'', 0 bytes read))
During handling of the above exception, another exception occurred:
ChunkedEncodingError Traceback (most recent call last)
Input In [12], in <cell line: 3>()
8 input_text = df.iloc[idx]['merged_claim_text']
9 inst.init_mti_interactive(input_text, args='-opt1L_DCMS')
---> 10 response = inst.submit()
11 print('response status: {}'.format(response.status_code))
12 resp_dec = response.content.decode()
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\skr_web_api_init_.py:103, in Submission.submit(self)
101 if response.status_code == 302:
102 newurl = s.get_redirect_target(response)
--> 103 response = s.post(newurl,
104 self.form, files=self.files,
105 headers=headers, params=params,
106 allow_redirects=False)
107 return response
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\requests\sessions.py:577, in Session.post(self, url, data, json, **kwargs)
566 def post(self, url, data=None, json=None, **kwargs):
567 r"""Sends a POST request. Returns :class:
Response
object.568
569 :param url: URL for the new :class:
Request
object.(...)
574 :rtype: requests.Response
575 """
--> 577 return self.request('POST', url, data=data, json=json, **kwargs)
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\requests\sessions.py:529, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
524 send_kwargs = {
525 'timeout': timeout,
526 'allow_redirects': allow_redirects,
527 }
528 send_kwargs.update(settings)
--> 529 resp = self.send(prep, **send_kwargs)
531 return resp
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\requests\sessions.py:687, in Session.send(self, request, **kwargs)
684 pass
686 if not stream:
--> 687 r.content
689 return r
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\requests\models.py:838, in Response.content(self)
836 self._content = None
837 else:
--> 838 self._content = b''.join(self.iter_content(CONTENT_CHUNK_SIZE)) or b''
840 self._content_consumed = True
841 # don't need to release the connection; that's been handled by urllib3
842 # since we exhausted the data.
File c:\users\somas326055\appdata\local\programs\python\python38\lib\site-packages\requests\models.py:763, in Response.iter_content..generate()
761 yield chunk
762 except ProtocolError as e:
--> 763 raise ChunkedEncodingError(e)
764 except DecodeError as e:
765 raise ContentDecodingError(e)
ChunkedEncodingError: ("Connection broken: InvalidChunkLength(got length b'', 0 bytes read)", InvalidChunkLength(got length b'', 0 bytes read))
The text was updated successfully, but these errors were encountered: