Skip to content

Commit

Permalink
fix: get note detail (#137)
Browse files Browse the repository at this point in the history
  • Loading branch information
vritser authored Nov 26, 2024
1 parent 4b55f82 commit 4cdfeaf
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 8 deletions.
2 changes: 1 addition & 1 deletion example/basic_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def sign(uri, data=None, a1="", web_session=""):
for _ in range(10):
# 即便上面做了重试,还是有可能会遇到签名失败的情况,重试即可
try:
note = xhs_client.get_note_by_id("6505318c000000001f03c5a6")
note = xhs_client.get_note_by_id("6505318c000000001f03c5a6", "xsec_token of the note")
print(json.dumps(note, indent=4))
print(help.get_imgs_url_from_note(note))
break
Expand Down
20 changes: 13 additions & 7 deletions xhs/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,18 +203,25 @@ def post(self, uri: str, data: dict | None, is_creator: bool = False, is_custome
else:
return self.request(method="POST", url=f"{endpoint}{uri}", **kwargs)

def get_note_by_id(self, note_id: str):
def get_note_by_id(self, note_id: str, xsec_token: str, xsec_source: str = "pc_feed"):
"""
:param note_id: note_id you want to fetch
:type note_id: str
:rtype: dict
"""
data = {"source_note_id": note_id, "image_scenes": ["CRD_WM_WEBP"]}

data = {
"source_note_id": note_id,
"image_formats": ["jpg", "webp", "avif"],
"extra": {"need_body_topic": 1},
"xsec_source": xsec_source,
"xsec_token": xsec_token
}
uri = "/api/sns/web/v1/feed"
res = self.post(uri, data)
return res["items"][0]["note_card"]

def get_note_by_id_from_html(self, note_id: str):
def get_note_by_id_from_html(self, note_id: str, xsec_token: str, xsec_source: str = "pc_feed"):
"""get note info from "https://www.xiaohongshu.com/explore/" + note_id,
and the return obj is equal to get_note_by_id
Expand Down Expand Up @@ -245,7 +252,7 @@ def transform_json_keys(json_data):
dict_new[new_key] = value
return dict_new

url = "https://www.xiaohongshu.com/explore/" + note_id
url = f"https://www.xiaohongshu.com/explore/{note_id}?xsec_token={xsec_token}&xsec_source={xsec_source}"
res = self.session.get(url, headers={"user-agent": self.user_agent, "referer": "https://www.xiaohongshu.com/"})
html = res.text
state = re.findall(r"window.__INITIAL_STATE__=({.*})</script>", html)[0].replace("undefined", '""')
Expand Down Expand Up @@ -463,11 +470,10 @@ def get_user_all_notes(self, user_id: str, crawl_interval: int = 1):
res = self.get_user_notes(user_id, cursor)
has_more = res["has_more"]
cursor = res["cursor"]
note_ids = map(lambda item: item["note_id"], res["notes"])

for note_id in note_ids:
for item in res["notes"]:
try:
note = self.get_note_by_id(note_id)
note = self.get_note_by_id(item["note_id"], item["xsec_token"])
except DataFetchError as e:
if ErrorEnum.NOTE_ABNORMAL.value.msg in e.__repr__() or ErrorEnum.NOTE_SECRETE_FAULT.value.msg in e.__repr__():
continue
Expand Down

0 comments on commit 4cdfeaf

Please sign in to comment.