fix bing

liberize · May 7, 2017 · 9fa0d9f · 9fa0d9f
1 parent a4c73d1
commit 9fa0d9f
Show file tree

Hide file tree

Showing 3 changed files with 24 additions and 53 deletions.
diff --git a/README.md b/README.md
@@ -85,8 +85,6 @@ update  | 修改配置文件的某些项后需要更新才能生效
         * "parse_html": 使用 html 或纯文本格式作为查询结果，理论上解析 html 更准确，默认为 "true"。
     * "dictcn": 海词词典选项：
         * "wap_page": 是否使用 wap 页面查词，wap 页面信息较少，默认为 "false"。
-    * "bing": 必应词典选项：
-        * "wap_page": 是否使用 wap 页面查词，wap 页面速度更快，默认为 "true"。
 * "cache": 缓存相关的设置。
     * "enable": 打开或关闭缓存，默认为 "true"。
     * "expire": 缓存失效时间，以小时为单位，默认为 "24"。

diff --git a/cndict/bing.py b/cndict/bing.py
@@ -9,14 +9,9 @@
 from utils import *
 
 
-def lookup(word, wap_page=True, *args):
+def lookup(word, *args):
     params = {'q': word}
-    if wap_page:
-        params['view'] = 'wap'
-        url = 'http://dict.bing.com.cn/'
-    else:
-        url = 'http://www.bing.com/dict/search'
-    url = '{}?{}'.format(url, urllib.urlencode(params))
+    url = '{}?{}'.format('http://www.bing.com/dict/search', urllib.urlencode(params))
     try:
         request = urllib2.Request(url)
         request.add_header('Accept-Encoding', 'gzip')
@@ -32,49 +27,31 @@ def lookup(word, wap_page=True, *args):
     result = []
     is_eng = is_english(word)
 
-    if wap_page:
-        match = re.search(r'<div><b>.*?</b>{}<br />(.*?)<br />web\.<br />'.format(
-                          r'(.*?US:\[(.*?)\])?(.*?UK:\[(.*?)\])?.*?'
-                          if is_eng else
-                          r'.*?'), data)
-        if match:
-            phonetic = (match.group(2) or match.group(4)) if is_eng else ''
-            result.append('{}{}'.format(word, ' /{}/'.format(phonetic) if phonetic else ''))
-            definition = match.group(5 if is_eng else 1)
-            items = definition.replace('&nbsp;', '').replace('&bull;', '').split('<br />')
-            part = ''
+    # no need to use BeautifulSoup, just extract definition from meta tag
+    match = re.search(r'<meta name="description" content="(.*?)" />', data)
+    if match is None:
+        raise DictLookupError('failed to find meta tag.')
+    description = match.group(1)
+
+    match = re.match(r'^必应词典为您提供.*?的释义{}，(.*?)； 网络释义：.*$'.format(
+                     r'(，美\[(.*?)\])?(，英\[(.*?)\])?'
+                     if is_eng else
+                     r'，拼音\[(.*)\]'), description)
+    if match:
+        phonetic = (match.group(2) or match.group(4)) if is_eng else match.group(1)
+        result.append('{}{}'.format(word, ' /{}/'.format(phonetic) if phonetic else ''))
+        items = match.group(5 if is_eng else 2).split('； ')
+        if is_eng:
+            for item in items:
+                if item != '':
+                    result.append(item)
+        else:
             for item in items:
-                match = re.match(r'^([a-z]+\.)$', item)
+                match = re.match(r'([a-z]+\.) (.+)', item)
                 if match:
                     part = match.group(1)
-                    continue
-                result.append('{} {}'.format(part, item))
-    else:
-        # no need to use BeautifulSoup, just extract definition from meta tag
-        match = re.search(r'<meta name="description" content="(.*?)"/>', data)
-        if match is None:
-            raise DictLookupError('failed to find meta tag.')
-        description = match.group(1)
-
-        match = re.match(r'^必应词典为您提供.*?的释义{}，(.*?)； 网络释义：.*$'.format(
-                         r'(，美\[(.*?)\])?(，英\[(.*?)\])?'
-                         if is_eng else
-                         r'，拼音\[(.*)\]'), description)
-        if match:
-            phonetic = (match.group(2) or match.group(4)) if is_eng else match.group(1)
-            result.append('{}{}'.format(word, ' /{}/'.format(phonetic) if phonetic else ''))
-            items = match.group(5 if is_eng else 2).split('； ')
-            if is_eng:
-                for item in items:
-                    if item != '':
-                        result.append(item)
-            else:
-                for item in items:
-                    match = re.match(r'([a-z]+\.) (.+)', item)
-                    if match:
-                        part = match.group(1)
-                        for new_item in match.group(2).split('; '):
-                            result.append('{} {}'.format(part, new_item))
+                    for new_item in match.group(2).split('; '):
+                        result.append('{} {}'.format(part, new_item))
     return result
 
 

diff --git a/config.json b/config.json
@@ -39,10 +39,6 @@
             "parse_html": true
         },
         "dictcn":
-        {
-            "wap_page": false
-        },
-        "bing":
         {
             "wap_page": true
         }