move kindle to new repo

2016-10-15 18:06:33 +08:00 · 2016-10-15 18:06:33 +08:00 · 4817808e5e
parent fd2cb0ed9f
commit 4817808e5e
11 changed files with 0 additions and 341 deletions
--- a/kindle/.gitignore
+++ b/kindle/.gitignore
@ -1,7 +0,0 @@
-kindle.json
-.idea
-__pycache__
-venv
-config.py
-cache
-page
--- a/kindle/README.md
+++ b/kindle/README.md
@ -1,27 +0,0 @@
-# Kindle
-
-## 配置
-
-参考 `config.py.example` ，修改 `config.py` 文件，填写 `API key`， 请在 [Amazon](https://console.aws.amazon.com/iam/home#security_credential
-) 获取。 
-
-```shell
-AWS_ACCESS_KEY_ID = "xxx"
-AWS_SECRET_ACCESS_KEY = "xxx"
-AWS_ASSOCIATE_TAG = "xxx"
-```
-
-## 运行
-
-```shell
-virtualenv -p python3 venv
-source venv/bin/activate
-pip install -r requirements.txt -I
-python kindle.py
-```
-
-**crontab**
-
-```shell
-5 0 * * * /path/to/kindle/cron.sh >> /var/log/kindle.log 2>&1
-```
--- a/kindle/amz.py
+++ b/kindle/amz.py
@ -1,81 +0,0 @@
-#!/usr/bin/env python3
-import time as t
-import os
-import re
-from urllib.error import HTTPError
-
-from amazon.api import AmazonAPI
-
-import config
-from node import Node
-
-cache_dir = 'cache/'
-
-
-def write_query_to_db(cache_url, data):
-    if not os.path.exists(cache_dir):
-        os.mkdir(cache_dir)
-
-    file = cache_dir + re.match('.*ItemId=(.*)&Operation', cache_url).group(1) + '.xml'
-    f = open(file, 'wb')
-    f.write(data)
-
-
-def read_query_from_db(cache_url):
-    file = cache_dir + re.match('.*ItemId=(.*)&Operation', cache_url).group(1) + '.xml'
-    if os.path.exists(file) and t.time() - os.path.getmtime(file) < 100 * 24 * 60 * 60 * 1000:
-        f = open(file, 'rb')
-        return f.read()
-    return None
-
-
-amazon = AmazonAPI(config.KEY_ID, config.SECRET_KEY, config.TAG,
-                   region='CN', MaxQPS=0.9, CacheReader=read_query_from_db, CacheWriter=write_query_to_db)
-
-
-def lookup(book):
-    while True:
-        try:
-            product = amazon.lookup(ItemId=book.item_id)
-
-            book.author = product.author
-            book.pages = product.pages
-            book.publisher = product.publisher
-            book.brand = product.brand
-            book.asin = product.asin
-            book.binding = product.binding
-            book.edition = product.edition
-            book.editorial_review = product.editorial_review
-            book.isbn = product.isbn
-            book.large_image_url = product.large_image_url
-            book.region = product.region
-            book.release_date = product.release_date.strftime("%Y-%m-%d")
-            if product.publication_date:
-                book.publication_date = product.publication_date.strftime("%Y-%m-%d")
-            book.sales_rank = product.sales_rank
-            book.medium_image_url = product.medium_image_url
-            book.small_image_url = product.small_image_url
-            if product.languages:
-                book.languages = list(product.languages)
-
-            book.nodes = []
-            for browse_node in product.browse_nodes:
-                node = Node()
-                book.nodes.append(node)
-                while True:
-                    node.id = browse_node.id
-                    node.name = str(browse_node.name)
-                    if not browse_node.is_category_root:
-                        node.node = Node()
-                        node = node.node
-                        browse_node = browse_node.ancestor
-                    else:
-                        node.is_root = True
-                        break
-
-            print('cached: ' + book.item_id + ' -> ' + book.title)
-            break
-        except HTTPError as e:
-            print(e)
-            t.sleep(3)
-            pass
--- a/kindle/book.py
+++ b/kindle/book.py
@ -1,43 +0,0 @@
-import json
-
-
-class Book:
-    title = ''
-    average = 0
-    price = 0
-    author = ''
-    min = 0
-    score = 0
-    url = ''
-    min_day = ''
-
-    item_id = None
-    pages = None
-    publisher = None
-    brand = None
-    asin = None
-    binding = None
-    edition = None
-    editorial_review = None
-    isbn = None
-    large_image_url = None
-    region = None
-    release_date = None
-    sales_rank = None
-    medium_image_url = None
-    publication_date = None
-    small_image_url = None
-    languages = None
-    nodes = None
-
-    def json(self):
-        return json.dumps(self, default=lambda o: o.__dict__, indent=2, ensure_ascii=False, sort_keys=True)
-
-    def dump(self):
-        return clean_dict(self.__dict__)
-
-
-def clean_dict(d):
-    if not isinstance(d, dict):
-        return d
-    return dict((k, clean_dict(v)) for k, v in d.items() if v is not None)
--- a/kindle/config.py.example
+++ b/kindle/config.py.example
@ -1,6 +0,0 @@
-user_agent = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)Chrome/50.0.2661.75 Safari/537.36'
-header = {'User-Agent': user_agent}
-
-KEY_ID = "xxx"
-SECRET_KEY = "xxx"
-TAG = "xxx"
--- a/kindle/cron.sh
+++ b/kindle/cron.sh
@ -1,11 +0,0 @@
-#!/bin/bash
-
-echo $(date)
-
-PWD="$(dirname $0)"
-
-echo "$PWD"
-
-cd "$PWD" || exit 1
-
-PYTHONIOENCODING=utf-8:surrogateescape venv/bin/python kindle.py
--- a/kindle/free.sh
+++ b/kindle/free.sh
@ -1,11 +0,0 @@
-#!/bin/bash
-
-echo $(date)
-
-PWD="$(dirname $0)"
-
-echo "$PWD"
-
-cd "$PWD" || exit 1
-
-PYTHONIOENCODING=utf-8:surrogateescape venv/bin/python free_book.py
--- a/kindle/free_book.py
+++ b/kindle/free_book.py
@ -1,74 +0,0 @@
-#!/usr/bin/env python3
-import io
-import json
-import os
-import re
-
-import requests
-
-from book import Book
-import config
-
-cn_url = 'https://www.amazon.cn/s/?rh=n:116087071,n:!116088071,n:116169071,p_36:159125071&page='
-en_url = 'https://www.amazon.cn/s/?rh=n:116087071,n:!116088071,n:116169071,n:116170071,p_36:159125071&page='
-base_url = 'https://www.amazon.cn/gp/product/'
-page_dir = 'page/'
-
-
-def fetch_free_books(url, page):
-    r = requests.get(url + str(page), headers=config.header)
-    from bs4 import BeautifulSoup, Tag
-    import lxml
-
-    bs = BeautifulSoup(r.text, lxml.__name__)
-    items = bs.find_all('li', attrs={'class': 's-result-item celwidget'})
-
-    kindle = {'books': []}
-
-    for item in items:
-        if isinstance(item, Tag):
-            book = Book()
-            book.title = item.find('h2').text
-            # book.item_id = item.find('span', attrs={'name': re.compile('.*')}).get('name')
-            book.item_id = item.get('data-asin')
-            book.url = base_url + book.item_id
-            book.average = 0
-            book.price = 0
-            book.min = 0
-            score = item.find('span', attrs={'class': 'a-icon-alt'})
-            if score:
-                book.score = re.match('平均(.*) 星', score.text).group(1)
-
-            import amz
-            amz.lookup(book)
-
-            kindle['books'].append(book)
-
-    kindle['count'] = len(kindle['books'])
-    kindle['page'] = page
-    return kindle
-
-
-def get_free_cn_books(page):
-    kindle = fetch_free_books(cn_url, page)
-    with io.open(page_dir + 'kindle_free_books_cn_' + str(page) + '.json', 'w', encoding='utf-8') as f:
-        f.write(json.dumps(kindle, default=lambda o: o.dump(), indent=2, ensure_ascii=False, sort_keys=True))
-
-
-def get_free_en_books(page):
-    kindle = fetch_free_books(en_url, page)
-    with io.open(page_dir + 'kindle_free_books_en_' + str(page) + '.json', 'w', encoding='utf-8') as f:
-        f.write(json.dumps(kindle, default=lambda o: o.dump(), indent=2, ensure_ascii=False, sort_keys=True))
-
-
-def get_free_books():
-    if not os.path.exists(page_dir):
-        os.mkdir(page_dir)
-
-    for page in range(1, 400):
-        get_free_cn_books(page)
-
-    for page in range(1, 400):
-        get_free_en_books(page)
-
-get_free_books()
--- a/kindle/kindle.py
+++ b/kindle/kindle.py
@ -1,57 +0,0 @@
-#!/usr/bin/env python3
-import io
-import json
-import re
-
-import requests
-from bs4 import Tag
-
-import config
-from book import Book
-
-
-def fetch(url, headers, cookies):
-    r = requests.get(url, headers=headers, cookies=cookies)
-    from bs4 import BeautifulSoup
-    import lxml
-
-    bs = BeautifulSoup(r.text, lxml.__name__)
-
-    time = re.match('数据更新于：(.*)', bs.find('span', style='color:#FFF9A8').text).group(1)
-
-    kindle = {'time': time, 'books': []}
-
-    book_items = bs.find_all('div', style='margin-bottom: 0.9em;')
-
-    for book_item in book_items:
-
-        book = Book()
-
-        if isinstance(book_item, Tag):
-            a = book_item.find('a')
-            min_day = book_item.find('span', title=re.compile('最近在')).get('title')
-            book.min_day = re.match('最近在(.*)达到最低价', min_day).group(1)
-
-            if isinstance(a, Tag):
-                book.url = 'https' + re.match('http(.*)/ref', a.get('href')).group(1)
-                book.item_id = re.match('.*product/(.*)/ref', a.get('href')).group(1)
-                book.title = a.get('title')
-
-            matches = re.match('.*历史均价：￥(.*)，现价：￥(.*)作者：(.*)，评分：(.*)，历史最低价：￥(.*)', book_item.text)
-
-            book.average = matches.group(1)
-            book.price = matches.group(2)
-            book.author = matches.group(3)
-            book.score = matches.group(4)
-            book.min = matches.group(5)
-
-            import amz
-            amz.lookup(book)
-
-            kindle['books'].append(book)
-
-    with io.open('kindle.json', 'w', encoding='utf-8') as f:
-        f.write(json.dumps(kindle, default=lambda o: o.dump(), indent=2, ensure_ascii=False, sort_keys=True))
-
-if __name__ == '__main__':
-    fetch('http://t.bookdna.cn', config.header, {})
--- a/kindle/node.py
+++ b/kindle/node.py
@ -1,17 +0,0 @@
-
-class Node:
-    node = None  # ancestor
-    id = None
-    is_root = False
-    name = None
-
-    def dump(self):
-        return clean_dict(self.__dict__)
-
-
-def clean_dict(d):
-    if not isinstance(d, dict):
-        return d
-    return dict((k, clean_dict(v)) for k, v in d.items() if v is not None)
-
-
--- a/kindle/requirements.txt
+++ b/kindle/requirements.txt
@ -1,7 +0,0 @@
-beautifulsoup4==4.5.1
-bottlenose==1.0.1
-lxml==3.5.0
-python-amazon-simple-product-api==2.1.0
-python-dateutil==2.5.3
-requests==2.9.1
-six==1.10.0