From 3d880bb1875d2af1787d6da403a66759b1394c74 Mon Sep 17 00:00:00 2001
From: tianyu <xdtianyu@gmail.com>
Date: Sun, 25 Sep 2016 00:52:16 +0800
Subject: [PATCH] add kindle

---
 Kindle/.gitignore       |  3 +++
 Kindle/README.md        | 13 ++++++++++
 Kindle/book.py          | 16 ++++++++++++
 Kindle/kindle.py        | 56 +++++++++++++++++++++++++++++++++++++++++
 Kindle/requirements.txt |  3 +++
 5 files changed, 91 insertions(+)
 create mode 100644 Kindle/.gitignore
 create mode 100644 Kindle/README.md
 create mode 100644 Kindle/book.py
 create mode 100755 Kindle/kindle.py
 create mode 100644 Kindle/requirements.txt

diff --git a/Kindle/.gitignore b/Kindle/.gitignore
new file mode 100644
index 0000000..eeffe73
--- /dev/null
+++ b/Kindle/.gitignore
@@ -0,0 +1,3 @@
+kindle.json
+.idea
+__pycache__
\ No newline at end of file
diff --git a/Kindle/README.md b/Kindle/README.md
new file mode 100644
index 0000000..db7b4fd
--- /dev/null
+++ b/Kindle/README.md
@@ -0,0 +1,13 @@
+# Kindle
+
+## Install dependencies
+
+```shell
+pip3 install -r requirements.txt
+```
+
+## Run
+
+```shell
+python3 kindle.py
+```
\ No newline at end of file
diff --git a/Kindle/book.py b/Kindle/book.py
new file mode 100644
index 0000000..12a33a7
--- /dev/null
+++ b/Kindle/book.py
@@ -0,0 +1,16 @@
+import json
+
+
+class Book:
+    title = ''
+    average = 0
+    price = 0
+    author = ''
+    min = 0
+    score = 0
+    url = ''
+    min_day = ''
+
+    def json(self):
+        return json.dumps(self, default=lambda o: o.__dict__, indent=2, ensure_ascii=False, sort_keys=True)
+
diff --git a/Kindle/kindle.py b/Kindle/kindle.py
new file mode 100755
index 0000000..f99ab6d
--- /dev/null
+++ b/Kindle/kindle.py
@@ -0,0 +1,56 @@
+#!/usr/bin/env python3
+import io
+import json
+import re
+
+import requests
+from bs4 import Tag
+
+from book import Book
+
+user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/50.0.2661.75 Safari/537.36'
+
+header = {'User-Agent': user_agent}
+
+
+def fetch(url, headers, cookies):
+    r = requests.get(url, headers=headers, cookies=cookies)
+    from bs4 import BeautifulSoup
+    import lxml
+
+    bs = BeautifulSoup(r.text, lxml.__name__)
+
+    time = re.match('数据更新于：(.*)', bs.find('span', style='color:#FFF9A8').text).group(1)
+
+    kindle = {'time': time, 'books': []}
+
+    book_items = bs.find_all('div', style='margin-bottom: 0.9em;')
+
+    for book_item in book_items:
+
+        book = Book()
+
+        if isinstance(book_item, Tag):
+            a = book_item.find('a')
+            min_day = book_item.find('span', title=re.compile('最近在')).get('title')
+            book.min_day = re.match('最近在(.*)达到最低价', min_day).group(1)
+
+            if isinstance(a, Tag):
+                book.url = 'https' + re.match('http(.*)/ref', a.get('href')).group(1)
+                book.title = a.get('title')
+
+            matches = re.match('.*历史均价：￥(.*)，现价：￥(.*)作者：(.*)，评分：(.*)，历史最低价：￥(.*)', book_item.text)
+
+            book.average = matches.group(1)
+            book.price = matches.group(2)
+            book.author = matches.group(3)
+            book.score = matches.group(4)
+            book.min = matches.group(5)
+
+            kindle['books'].append(book)
+
+    with io.open('kindle.json', 'w', encoding='utf-8') as f:
+        f.write(json.dumps(kindle, default=lambda o: o.__dict__, indent=2, ensure_ascii=False, sort_keys=True))
+
+if __name__ == '__main__':
+    fetch('http://t.bookdna.cn', header, {})
diff --git a/Kindle/requirements.txt b/Kindle/requirements.txt
new file mode 100644
index 0000000..1c47764
--- /dev/null
+++ b/Kindle/requirements.txt
@@ -0,0 +1,3 @@
+lxml == 3.5.0
+requests == 2.9.1
+beautifulsoup4 == 4.5.1