add details

2016-09-27 01:03:20 +08:00 · 2016-09-27 01:03:20 +08:00 · c5049f8d38
parent 1049136022
commit c5049f8d38
5 changed files with 75 additions and 4 deletions
--- a/kindle/.gitignore
+++ b/kindle/.gitignore
@ -1,3 +1,6 @@
 kindle.json
 .idea
 __pycache__
+venv
+config.py
+cache
--- a/kindle/README.md
+++ b/kindle/README.md
@ -1,8 +1,19 @@
 # Kindle

+## 配置
+
+参考 `config.py.example` ，修改 `config.py` 文件，填写 `API key`， 请在 [Amazon](https://console.aws.amazon.com/iam/home#security_credential
+) 获取。 
+
+```shell
+AWS_ACCESS_KEY_ID = "xxx"
+AWS_SECRET_ACCESS_KEY = "xxx"
+AWS_ASSOCIATE_TAG = "xxx"
+```
+
 ## 运行

-```
+```shell
 virtualenv -p python3 venv
 source venv/bin/activate
 pip install -r requirements.txt -I
@ -11,6 +22,6 @@ python kindle.py

 **crontab**

-```
+```shell
 5 0 * * * /path/to/kindle/cron.sh >> /var/log/kindle.log 2>&1
 ```
--- a/kindle/config.py.example
+++ b/kindle/config.py.example
@ -0,0 +1,3 @@
+AWS_ACCESS_KEY_ID = "xxx"
+AWS_SECRET_ACCESS_KEY = "xxx"
+AWS_ASSOCIATE_TAG = "xxx"
--- a/kindle/kindle.py
+++ b/kindle/kindle.py
@ -1,13 +1,42 @@
 #!/usr/bin/env python3
 import io
 import json
+import os
 import re
+import time as t
+from urllib.error import HTTPError

+from amazon.api import AmazonAPI
 import requests
 from bs4 import Tag

+import config
 from book import Book

+cache_dir = 'cache/'
+
+
+def write_query_to_db(cache_url, data):
+
+    if not os.path.exists(cache_dir):
+        os.mkdir(cache_dir)
+
+    file = cache_dir + re.match('.*ItemId=(.*)&Operation', cache_url).group(1) + '.xml'
+    f = open(file, 'wb')
+    f.write(data)
+
+
+def read_query_from_db(cache_url):
+    file = cache_dir + re.match('.*ItemId=(.*)&Operation', cache_url).group(1) + '.xml'
+    if os.path.exists(file) and os.path.getmtime(file) > t.time() - 20 * 60 * 60 * 1000:
+        f = open(file, 'rb')
+        return f.read()
+    return None
+
+
+amazon = AmazonAPI(config.AWS_ACCESS_KEY_ID, config.AWS_SECRET_ACCESS_KEY, config.AWS_ASSOCIATE_TAG,
+                   region='CN', MaxQPS=0.9, CacheReader=read_query_from_db, CacheWriter=write_query_to_db)
+
 user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/50.0.2661.75 Safari/537.36'

 header = {'User-Agent': user_agent}
@ -37,6 +66,7 @@ def fetch(url, headers, cookies):

            if isinstance(a, Tag):
                book.url = 'https' + re.match('http(.*)/ref', a.get('href')).group(1)
+                book.item_id = re.match('.*product/(.*)/ref', a.get('href')).group(1)
                book.title = a.get('title')

            matches = re.match('.*历史均价：￥(.*)，现价：￥(.*)作者：(.*)，评分：(.*)，历史最低价：￥(.*)', book_item.text)
@ -47,7 +77,30 @@ def fetch(url, headers, cookies):
            book.score = matches.group(4)
            book.min = matches.group(5)

-            kindle['books'].append(book)
+            while True:
+                try:
+                    product = amazon.lookup(ItemId=book.item_id)
+
+                    book.author = product.author
+                    book.pages = product.pages
+                    book.publisher = product.publisher
+                    book.brand = product.brand
+                    book.asin = product.asin
+                    book.binding = product.binding
+                    book.edition = product.edition
+                    book.editorial_reviews = product.editorial_reviews
+                    book.isbn = product.isbn
+                    book.large_image_url = product.large_image_url
+                    book.region = product.region
+                    book.release_date = product.release_date.strftime("%Y-%m-%d")
+                    book.sales_rank = product.sales_rank
+
+                    kindle['books'].append(book)
+                    print('cached: ' + book.item_id + ' -> ' + book.title)
+                    break
+                except HTTPError:
+                    t.sleep(2)
+                    pass

    with io.open('kindle.json', 'w', encoding='utf-8') as f:
        f.write(json.dumps(kindle, default=lambda o: o.__dict__, indent=2, ensure_ascii=False, sort_keys=True))
--- a/kindle/requirements.txt
+++ b/kindle/requirements.txt
@ -1,3 +1,4 @@
 lxml == 3.5.0
 requests == 2.9.1
+AmazonAPIWrapper == 0.0.11
 beautifulsoup4 == 4.5.1