add details
This commit is contained in:
parent
1049136022
commit
c5049f8d38
|
@ -1,3 +1,6 @@
|
||||||
kindle.json
|
kindle.json
|
||||||
.idea
|
.idea
|
||||||
__pycache__
|
__pycache__
|
||||||
|
venv
|
||||||
|
config.py
|
||||||
|
cache
|
|
@ -1,8 +1,19 @@
|
||||||
# Kindle
|
# Kindle
|
||||||
|
|
||||||
|
## 配置
|
||||||
|
|
||||||
|
参考 `config.py.example` ,修改 `config.py` 文件,填写 `API key`, 请在 [Amazon](https://console.aws.amazon.com/iam/home#security_credential
|
||||||
|
) 获取。
|
||||||
|
|
||||||
|
```shell
|
||||||
|
AWS_ACCESS_KEY_ID = "xxx"
|
||||||
|
AWS_SECRET_ACCESS_KEY = "xxx"
|
||||||
|
AWS_ASSOCIATE_TAG = "xxx"
|
||||||
|
```
|
||||||
|
|
||||||
## 运行
|
## 运行
|
||||||
|
|
||||||
```
|
```shell
|
||||||
virtualenv -p python3 venv
|
virtualenv -p python3 venv
|
||||||
source venv/bin/activate
|
source venv/bin/activate
|
||||||
pip install -r requirements.txt -I
|
pip install -r requirements.txt -I
|
||||||
|
@ -11,6 +22,6 @@ python kindle.py
|
||||||
|
|
||||||
**crontab**
|
**crontab**
|
||||||
|
|
||||||
```
|
```shell
|
||||||
5 0 * * * /path/to/kindle/cron.sh >> /var/log/kindle.log 2>&1
|
5 0 * * * /path/to/kindle/cron.sh >> /var/log/kindle.log 2>&1
|
||||||
```
|
```
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
AWS_ACCESS_KEY_ID = "xxx"
|
||||||
|
AWS_SECRET_ACCESS_KEY = "xxx"
|
||||||
|
AWS_ASSOCIATE_TAG = "xxx"
|
|
@ -1,13 +1,42 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
|
import time as t
|
||||||
|
from urllib.error import HTTPError
|
||||||
|
|
||||||
|
from amazon.api import AmazonAPI
|
||||||
import requests
|
import requests
|
||||||
from bs4 import Tag
|
from bs4 import Tag
|
||||||
|
|
||||||
|
import config
|
||||||
from book import Book
|
from book import Book
|
||||||
|
|
||||||
|
cache_dir = 'cache/'
|
||||||
|
|
||||||
|
|
||||||
|
def write_query_to_db(cache_url, data):
|
||||||
|
|
||||||
|
if not os.path.exists(cache_dir):
|
||||||
|
os.mkdir(cache_dir)
|
||||||
|
|
||||||
|
file = cache_dir + re.match('.*ItemId=(.*)&Operation', cache_url).group(1) + '.xml'
|
||||||
|
f = open(file, 'wb')
|
||||||
|
f.write(data)
|
||||||
|
|
||||||
|
|
||||||
|
def read_query_from_db(cache_url):
|
||||||
|
file = cache_dir + re.match('.*ItemId=(.*)&Operation', cache_url).group(1) + '.xml'
|
||||||
|
if os.path.exists(file) and os.path.getmtime(file) > t.time() - 20 * 60 * 60 * 1000:
|
||||||
|
f = open(file, 'rb')
|
||||||
|
return f.read()
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
amazon = AmazonAPI(config.AWS_ACCESS_KEY_ID, config.AWS_SECRET_ACCESS_KEY, config.AWS_ASSOCIATE_TAG,
|
||||||
|
region='CN', MaxQPS=0.9, CacheReader=read_query_from_db, CacheWriter=write_query_to_db)
|
||||||
|
|
||||||
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/50.0.2661.75 Safari/537.36'
|
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/50.0.2661.75 Safari/537.36'
|
||||||
|
|
||||||
header = {'User-Agent': user_agent}
|
header = {'User-Agent': user_agent}
|
||||||
|
@ -37,6 +66,7 @@ def fetch(url, headers, cookies):
|
||||||
|
|
||||||
if isinstance(a, Tag):
|
if isinstance(a, Tag):
|
||||||
book.url = 'https' + re.match('http(.*)/ref', a.get('href')).group(1)
|
book.url = 'https' + re.match('http(.*)/ref', a.get('href')).group(1)
|
||||||
|
book.item_id = re.match('.*product/(.*)/ref', a.get('href')).group(1)
|
||||||
book.title = a.get('title')
|
book.title = a.get('title')
|
||||||
|
|
||||||
matches = re.match('.*历史均价:¥(.*),现价:¥(.*)作者:(.*),评分:(.*),历史最低价:¥(.*)', book_item.text)
|
matches = re.match('.*历史均价:¥(.*),现价:¥(.*)作者:(.*),评分:(.*),历史最低价:¥(.*)', book_item.text)
|
||||||
|
@ -47,7 +77,30 @@ def fetch(url, headers, cookies):
|
||||||
book.score = matches.group(4)
|
book.score = matches.group(4)
|
||||||
book.min = matches.group(5)
|
book.min = matches.group(5)
|
||||||
|
|
||||||
kindle['books'].append(book)
|
while True:
|
||||||
|
try:
|
||||||
|
product = amazon.lookup(ItemId=book.item_id)
|
||||||
|
|
||||||
|
book.author = product.author
|
||||||
|
book.pages = product.pages
|
||||||
|
book.publisher = product.publisher
|
||||||
|
book.brand = product.brand
|
||||||
|
book.asin = product.asin
|
||||||
|
book.binding = product.binding
|
||||||
|
book.edition = product.edition
|
||||||
|
book.editorial_reviews = product.editorial_reviews
|
||||||
|
book.isbn = product.isbn
|
||||||
|
book.large_image_url = product.large_image_url
|
||||||
|
book.region = product.region
|
||||||
|
book.release_date = product.release_date.strftime("%Y-%m-%d")
|
||||||
|
book.sales_rank = product.sales_rank
|
||||||
|
|
||||||
|
kindle['books'].append(book)
|
||||||
|
print('cached: ' + book.item_id + ' -> ' + book.title)
|
||||||
|
break
|
||||||
|
except HTTPError:
|
||||||
|
t.sleep(2)
|
||||||
|
pass
|
||||||
|
|
||||||
with io.open('kindle.json', 'w', encoding='utf-8') as f:
|
with io.open('kindle.json', 'w', encoding='utf-8') as f:
|
||||||
f.write(json.dumps(kindle, default=lambda o: o.__dict__, indent=2, ensure_ascii=False, sort_keys=True))
|
f.write(json.dumps(kindle, default=lambda o: o.__dict__, indent=2, ensure_ascii=False, sort_keys=True))
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
lxml == 3.5.0
|
lxml == 3.5.0
|
||||||
requests == 2.9.1
|
requests == 2.9.1
|
||||||
|
AmazonAPIWrapper == 0.0.11
|
||||||
beautifulsoup4 == 4.5.1
|
beautifulsoup4 == 4.5.1
|
||||||
|
|
Loading…
Reference in New Issue