add kindle

This commit is contained in:
tianyu 2016-09-25 00:52:16 +08:00
parent e0ed298860
commit 3d880bb187
5 changed files with 91 additions and 0 deletions

3
Kindle/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
kindle.json
.idea
__pycache__

13
Kindle/README.md Normal file
View File

@ -0,0 +1,13 @@
# Kindle
## Install dependencies
```shell
pip3 install -r requirements.txt
```
## Run
```shell
python3 kindle.py
```

16
Kindle/book.py Normal file
View File

@ -0,0 +1,16 @@
import json
class Book:
title = ''
average = 0
price = 0
author = ''
min = 0
score = 0
url = ''
min_day = ''
def json(self):
return json.dumps(self, default=lambda o: o.__dict__, indent=2, ensure_ascii=False, sort_keys=True)

56
Kindle/kindle.py Executable file
View File

@ -0,0 +1,56 @@
#!/usr/bin/env python3
import io
import json
import re
import requests
from bs4 import Tag
from book import Book
user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/50.0.2661.75 Safari/537.36'
header = {'User-Agent': user_agent}
def fetch(url, headers, cookies):
r = requests.get(url, headers=headers, cookies=cookies)
from bs4 import BeautifulSoup
import lxml
bs = BeautifulSoup(r.text, lxml.__name__)
time = re.match('数据更新于:(.*)', bs.find('span', style='color:#FFF9A8').text).group(1)
kindle = {'time': time, 'books': []}
book_items = bs.find_all('div', style='margin-bottom: 0.9em;')
for book_item in book_items:
book = Book()
if isinstance(book_item, Tag):
a = book_item.find('a')
min_day = book_item.find('span', title=re.compile('最近在')).get('title')
book.min_day = re.match('最近在(.*)达到最低价', min_day).group(1)
if isinstance(a, Tag):
book.url = 'https' + re.match('http(.*)/ref', a.get('href')).group(1)
book.title = a.get('title')
matches = re.match('.*历史均价:¥(.*),现价:¥(.*)作者:(.*),评分:(.*),历史最低价:¥(.*)', book_item.text)
book.average = matches.group(1)
book.price = matches.group(2)
book.author = matches.group(3)
book.score = matches.group(4)
book.min = matches.group(5)
kindle['books'].append(book)
with io.open('kindle.json', 'w', encoding='utf-8') as f:
f.write(json.dumps(kindle, default=lambda o: o.__dict__, indent=2, ensure_ascii=False, sort_keys=True))
if __name__ == '__main__':
fetch('http://t.bookdna.cn', header, {})

3
Kindle/requirements.txt Normal file
View File

@ -0,0 +1,3 @@
lxml == 3.5.0
requests == 2.9.1
beautifulsoup4 == 4.5.1