fix: get note detail (#137)

ReaJason · Nov 26, 2024 · 4cdfeaf · 4cdfeaf
1 parent 4b55f82
commit 4cdfeaf
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 8 deletions.
diff --git a/example/basic_usage.py b/example/basic_usage.py
@@ -47,7 +47,7 @@ def sign(uri, data=None, a1="", web_session=""):
     for _ in range(10):
         # 即便上面做了重试，还是有可能会遇到签名失败的情况，重试即可
         try:
-            note = xhs_client.get_note_by_id("6505318c000000001f03c5a6")
+            note = xhs_client.get_note_by_id("6505318c000000001f03c5a6", "xsec_token of the note")
             print(json.dumps(note, indent=4))
             print(help.get_imgs_url_from_note(note))
             break

diff --git a/xhs/core.py b/xhs/core.py
@@ -203,18 +203,25 @@ def post(self, uri: str, data: dict | None, is_creator: bool = False, is_custome
         else:
             return self.request(method="POST", url=f"{endpoint}{uri}", **kwargs)
 
-    def get_note_by_id(self, note_id: str):
+    def get_note_by_id(self, note_id: str, xsec_token: str, xsec_source: str = "pc_feed"):
         """
         :param note_id: note_id you want to fetch
         :type note_id: str
         :rtype: dict
         """
-        data = {"source_note_id": note_id, "image_scenes": ["CRD_WM_WEBP"]}
+
+        data = {
+            "source_note_id": note_id,
+            "image_formats": ["jpg", "webp", "avif"],
+            "extra": {"need_body_topic": 1},
+            "xsec_source": xsec_source,
+            "xsec_token": xsec_token
+        }
         uri = "/api/sns/web/v1/feed"
         res = self.post(uri, data)
         return res["items"][0]["note_card"]
 
-    def get_note_by_id_from_html(self, note_id: str):
+    def get_note_by_id_from_html(self, note_id: str, xsec_token: str, xsec_source: str = "pc_feed"):
         """get note info from "https://www.xiaohongshu.com/explore/" + note_id,
         and the return obj is equal to get_note_by_id
 
@@ -245,7 +252,7 @@ def transform_json_keys(json_data):
                     dict_new[new_key] = value
             return dict_new
 
-        url = "https://www.xiaohongshu.com/explore/" + note_id
+        url = f"https://www.xiaohongshu.com/explore/{note_id}?xsec_token={xsec_token}&xsec_source={xsec_source}"
         res = self.session.get(url, headers={"user-agent": self.user_agent, "referer": "https://www.xiaohongshu.com/"})
         html = res.text
         state = re.findall(r"window.__INITIAL_STATE__=({.*})</script>", html)[0].replace("undefined", '""')
@@ -463,11 +470,10 @@ def get_user_all_notes(self, user_id: str, crawl_interval: int = 1):
             res = self.get_user_notes(user_id, cursor)
             has_more = res["has_more"]
             cursor = res["cursor"]
-            note_ids = map(lambda item: item["note_id"], res["notes"])
 
-            for note_id in note_ids:
+            for item in res["notes"]:
                 try:
-                    note = self.get_note_by_id(note_id)
+                    note = self.get_note_by_id(item["note_id"], item["xsec_token"])
                 except DataFetchError as e:
                     if ErrorEnum.NOTE_ABNORMAL.value.msg in e.__repr__() or ErrorEnum.NOTE_SECRETE_FAULT.value.msg in e.__repr__():
                         continue