add details

This commit is contained in:
tianyu 2016-09-27 01:03:20 +08:00
parent 1049136022
commit c5049f8d38
5 changed files with 75 additions and 4 deletions

5
kindle/.gitignore vendored
View File

@ -1,3 +1,6 @@
kindle.json kindle.json
.idea .idea
__pycache__ __pycache__
venv
config.py
cache

View File

@ -1,8 +1,19 @@
# Kindle # Kindle
## 配置
参考 `config.py.example` ,修改 `config.py` 文件,填写 `API key` 请在 [Amazon](https://console.aws.amazon.com/iam/home#security_credential
) 获取。
```shell
AWS_ACCESS_KEY_ID = "xxx"
AWS_SECRET_ACCESS_KEY = "xxx"
AWS_ASSOCIATE_TAG = "xxx"
```
## 运行 ## 运行
``` ```shell
virtualenv -p python3 venv virtualenv -p python3 venv
source venv/bin/activate source venv/bin/activate
pip install -r requirements.txt -I pip install -r requirements.txt -I
@ -11,6 +22,6 @@ python kindle.py
**crontab** **crontab**
``` ```shell
5 0 * * * /path/to/kindle/cron.sh >> /var/log/kindle.log 2>&1 5 0 * * * /path/to/kindle/cron.sh >> /var/log/kindle.log 2>&1
``` ```

3
kindle/config.py.example Normal file
View File

@ -0,0 +1,3 @@
AWS_ACCESS_KEY_ID = "xxx"
AWS_SECRET_ACCESS_KEY = "xxx"
AWS_ASSOCIATE_TAG = "xxx"

View File

@ -1,13 +1,42 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import io import io
import json import json
import os
import re import re
import time as t
from urllib.error import HTTPError
from amazon.api import AmazonAPI
import requests import requests
from bs4 import Tag from bs4 import Tag
import config
from book import Book from book import Book
cache_dir = 'cache/'
def write_query_to_db(cache_url, data):
if not os.path.exists(cache_dir):
os.mkdir(cache_dir)
file = cache_dir + re.match('.*ItemId=(.*)&Operation', cache_url).group(1) + '.xml'
f = open(file, 'wb')
f.write(data)
def read_query_from_db(cache_url):
file = cache_dir + re.match('.*ItemId=(.*)&Operation', cache_url).group(1) + '.xml'
if os.path.exists(file) and os.path.getmtime(file) > t.time() - 20 * 60 * 60 * 1000:
f = open(file, 'rb')
return f.read()
return None
amazon = AmazonAPI(config.AWS_ACCESS_KEY_ID, config.AWS_SECRET_ACCESS_KEY, config.AWS_ASSOCIATE_TAG,
region='CN', MaxQPS=0.9, CacheReader=read_query_from_db, CacheWriter=write_query_to_db)
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/50.0.2661.75 Safari/537.36' user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/50.0.2661.75 Safari/537.36'
header = {'User-Agent': user_agent} header = {'User-Agent': user_agent}
@ -37,6 +66,7 @@ def fetch(url, headers, cookies):
if isinstance(a, Tag): if isinstance(a, Tag):
book.url = 'https' + re.match('http(.*)/ref', a.get('href')).group(1) book.url = 'https' + re.match('http(.*)/ref', a.get('href')).group(1)
book.item_id = re.match('.*product/(.*)/ref', a.get('href')).group(1)
book.title = a.get('title') book.title = a.get('title')
matches = re.match('.*历史均价:¥(.*),现价:¥(.*)作者:(.*),评分:(.*),历史最低价:¥(.*)', book_item.text) matches = re.match('.*历史均价:¥(.*),现价:¥(.*)作者:(.*),评分:(.*),历史最低价:¥(.*)', book_item.text)
@ -47,7 +77,30 @@ def fetch(url, headers, cookies):
book.score = matches.group(4) book.score = matches.group(4)
book.min = matches.group(5) book.min = matches.group(5)
kindle['books'].append(book) while True:
try:
product = amazon.lookup(ItemId=book.item_id)
book.author = product.author
book.pages = product.pages
book.publisher = product.publisher
book.brand = product.brand
book.asin = product.asin
book.binding = product.binding
book.edition = product.edition
book.editorial_reviews = product.editorial_reviews
book.isbn = product.isbn
book.large_image_url = product.large_image_url
book.region = product.region
book.release_date = product.release_date.strftime("%Y-%m-%d")
book.sales_rank = product.sales_rank
kindle['books'].append(book)
print('cached: ' + book.item_id + ' -> ' + book.title)
break
except HTTPError:
t.sleep(2)
pass
with io.open('kindle.json', 'w', encoding='utf-8') as f: with io.open('kindle.json', 'w', encoding='utf-8') as f:
f.write(json.dumps(kindle, default=lambda o: o.__dict__, indent=2, ensure_ascii=False, sort_keys=True)) f.write(json.dumps(kindle, default=lambda o: o.__dict__, indent=2, ensure_ascii=False, sort_keys=True))

View File

@ -1,3 +1,4 @@
lxml == 3.5.0 lxml == 3.5.0
requests == 2.9.1 requests == 2.9.1
AmazonAPIWrapper == 0.0.11
beautifulsoup4 == 4.5.1 beautifulsoup4 == 4.5.1