Commit 46f0add4 authored by 许晓宇's avatar 许晓宇

update code

parent a28978ef
Pipeline #366 failed with stages
# Default ignored files
/shelf/
/workspace.xml
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# Editor-based HTTP Client requests
/httpRequests/
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.6 (crawler_main)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<profile version="1.0">
<option name="myName" value="Project Default" />
<inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
<option name="ignoredErrors">
<list>
<option value="N806" />
</list>
</option>
</inspection_tool>
</profile>
</component>
\ No newline at end of file
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6 (crawler_main)" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/currency_follow.iml" filepath="$PROJECT_DIR$/.idea/currency_follow.iml" />
</modules>
</component>
</project>
\ No newline at end of file
# Define here the models for your scraped items
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/items.html
import scrapy
class BitcoinTokenholdersTop100Item(scrapy.Item):
holder_list = scrapy.Field()
bitcoin_tokenholders = scrapy.Field()
class BitcoinTokenHolderItem(scrapy.Item):
holders = scrapy.Field()
holder_chart_url = scrapy.Field()
contract_address = scrapy.Field()
currency_project_id = scrapy.Field()
utc0_date = scrapy.Field()
class DefiLockedInfoItem(scrapy.Item):
name = scrapy.Field()
id = scrapy.Field()
contract_address = scrapy.Field()
lockup_category_name = scrapy.Field()
volume_24h_usd = scrapy.Field()
percent_change_24h = scrapy.Field()
proportion = scrapy.Field()
currency_id = scrapy.Field()
# item_num = scrapy.Field()
data = scrapy.Field()
ts = scrapy.Field()
volume_usd = scrapy.Field()
class GrayscaleTrustItem(scrapy.Item):
currency_data = scrapy.Field()
currency_name = scrapy.Field()
currency_pet_name = scrapy.Field()
currency_id = scrapy.Field()
currency_etf = scrapy.Field()
# Define here the models for your spider middleware
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
# useful for handling different item types with a single interface
from scrapy.http import HtmlResponse
class CloudScraperMiddleware:
def process_response(self, request, response, spider):
if response.status == 403:
if spider.name == "grayscale_trust":
url = request.url
req = spider.scraper.get(url, headers={'referer': url})
# time.sleep(5)
return HtmlResponse(url=url, body=req.text, encoding="utf-8", request=request)
return response
\ No newline at end of file
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# useful for handling different item types with a single interface
import redis
from . import utils
from .utils import update_grayscale_trust
class CurrencyFollowPipeline:
def __init__(self):
super(CurrencyFollowPipeline, self).__init__()
self.eth20_redis = redis.StrictRedis(host='mt-cache.redis.rds.aliyuncs.com', db=0)
# self.eth20_redis = redis.StrictRedis(host='localhost', db=0)
def process_item(self, item, spider):
if spider.name.startswith('grayscale'):
self.process_grayscale_trust(item, spider)
return item
if spider.name in ['defi_coming_eth', 'defi_coming_heco', 'defi_coming_bsc']:
self.process_defi_overview(item, spider)
if spider.name in ['defi_locked_eth', 'defi_locked_heco']:
self.process_defi_locked_info(item, spider)
if spider.name in ['bitcoin_tokenholders_top100', 'tokenholders_STD_top100']:
self.process_bitcoin_tokenholders_top100(item, spider)
@staticmethod
def process_bitcoin_tokenholders_top100(item, spider):
if spider.env == 'demo':
spider.logger.info(item)
if spider.env == 'online':
utils.update_bitcoin_top100(item)
def process_grayscale_trust(self, item, spider):
if spider.env == 'demo':
print(item)
if spider.env == 'online':
self.eth20_redis.set("gray:grayscale_trust_" + item['currency_pet_name'], str(item['currency_etf']))
utils.update_grayscale_trust(item)
# print("成功post", item)
@staticmethod
def process_defi_overview(item, spider):
if spider.env == 'demo':
spider.logger.info(item)
# utils.write_csv(item)
if spider.env == 'online':
utils.update_defi_overview(item)
@staticmethod
def process_defi_locked_info(item, spider):
if spider.env == 'demo':
spider.logger.info(item)
# utils.write_csv(item)
if spider.env == 'online':
utils.update_defi_locked_info(item)
\ No newline at end of file
from scrapy import cmdline
name = "tokenholders_STD_top100"
# cmd = 'scrapy crawl {0} -a env=demo'.format(name)
cmd = 'scrapy crawl {0} -a env=online'.format(name)
cmdline.execute(cmd.split())
# Scrapy settings for currency_follow project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
# https://docs.scrapy.org/en/latest/topics/settings.html
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
import os
BOT_NAME = 'currency_follow'
SPIDER_MODULES = ['currency_follow.spiders']
NEWSPIDER_MODULE = 'currency_follow.spiders'
REDIS_HOST_DOCKER = os.getenv('REDIS_HOST', 'r-j6cd64854a7eed94.redis.rds.aliyuncs.com')
REDIS_CACHE = 'mt-core-data.redis.rds.aliyuncs.com'
REDIS_PORT = '6379'
# Crawl responsibly by identifying yourself (and your website) on the user-agent
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) ' \
'Chrome/90.0.4430.85 Safari/537.36 '
# Obey robots.txt rules
ROBOTSTXT_OBEY = False
LOG_LEVEL = 'INFO'
class ApiUrl(object):
API_BASE = os.getenv('API_BASE_HOST', 'http://172.16.0.147:12306') # 正式环境
REDIS_CORE = os.getenv('REDIS_CORE', 'mt-core-data.redis.rds.aliyuncs.com')
API_007 = os.getenv('API_NEW_ALFRED_HOST', 'http://dataapi.mytokenapi.com')
UPLOAD_GRAYSCALE_TRUST = '{}/marketindex/create'.format(API_BASE)
UPLOAD_BITCOIN_MININ_DATA = '{}/mining/create'.format('http://172.16.6.98:12306')
# API_BASE = "http://beta.internal.mytokenio.com" # 办公司访问 香港阿里云测试环境
API_BASE_BETA = 'http://172.17.1.63:12306' # 香港阿里云测试环境
API_BASE_PRE = 'http://172.16.6.98:12306' # 香港阿里云PRE环境
# API_BASE = "http://beta.internal.mytoken-local.com" # 香港阿里云测试环境
# API_BASE = 'http://hkbeta.manageapi.mytoken.org' # 香港阿里云测试环境
# API_BASE = 'http://172.16.0.147:12306' # 香港阿里云正式环境(备份)
# API_BASE = 'http://52.56.166.200:12306' # AWS正式环境(废弃)
# API_BASE = 'http://35.176.110.161:12306' # AWS测试环境(废弃)
# 上传新闻资讯接口:
UPLOAD_FOREIGN_MEDIA = '{}/news/create'.format(API_BASE)
# 获取currency(项目)信息接口:
GET_SOCIAL_TARGET = API_BASE + '/currencyproject/list?page_num={}&page_limit=200'
# 获取currency_detail
GET_CURRENCY_DETAIL = API_BASE + '/currency/currencydetail?market_id=1303&currency_id={}'
# 上传currency(项目方)社交媒体数据:
UPLOAD_SOCIAL_INFO = '{}/currencysummary/create'.format(API_BASE)
# 获取交易所信息接口:
# UPLOAD_USDT_VOL = '{}/usdtvol/updateorinsert'.format(API_BASE)
UPLOAD_USDT_VOL = '{}/usdtvol/insert'.format(API_BASE)
MARKET_LIST = '{}/market/marketlist?source=script'.format(API_BASE)
# 上传交易所排名基础数据
UPLOAD_MARKET_RANK = '{}/marketrank/bcreate'.format(API_BASE)
# 上传defi数据
UPLOAD_DEFI_LOCKED_INFO = '{}/defi/lockupupsert'.format(API_BASE)
UPLOAD_DEFI_OVERVIEW = '{}/deficity/currencycreate'.format(API_BASE_PRE)
UPLOAD_ETHERSCAN_TOP100 = '{}/currencyethereumaddress/create'.format(API_BASE)
UPLOAD_ETHERSCAN_INFO = '{}/currencyproject/blockchainupdate'.format(API_BASE)
UPLOAD_DEFI_DEX = '{}/defi/dexupupsert'.format(API_BASE)
UPLOAD_DEFI_DEBT = '{}/defi/loanupupsert'.format(API_BASE)
UPLOAD_DEFI_RATE = '{}/defi/ratecreate'.format(API_BASE)
# 上传NFT数据
UPLOAD_NFT_INFO = '{}/nft/create'.format(API_BASE_PRE)
# 获取mytoken
GET_MYTOKEN = "{}/common/getqntoken".format(API_BASE)
# 上传快讯
UPLOAD_NEWS_FLASH = "{}/newsflash/add".format(API_BASE)
GET_ETHERSCAN_TARGET = API_BASE + '/currencyproject/list?page_limit=100&page_num={}'
# 链接redis
REDIS_DATA = os.getenv("REDIS_DATA", 'r-j6cd64854a7eed94.redis.rds.aliyuncs.com')
REDIS_DATA_DB = int(os.getenv("REDIS_DATA_DB", 7))
REDIS_TTL = int(os.getenv("REDIS_TTL", 900))
REDIS_KEY = os.getenv("REDIS_KEY", "media_cache_{}:{}")
ERROR_ACCOUNT_KEY = os.getenv("ERROR_ACCOUNT_KEY", "error_account:{}:{}")
UPLOAD_ETHERSCAN_ADDRESS_OVERVIEW = '{}/exchangebalancesnapshot/create'.format(API_BASE)
GET_ALL_ADDRESS = 'http://172.16.3.188:8001/getAllAddresses?type={}&enabled=1'
# Configure maximum concurrent requests performed by Scrapy (default: 16)
# CONCURRENT_REQUESTS = 32
# Configure a delay for requests for the same website (default: 0)
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
# DOWNLOAD_DELAY = 3
# The download delay setting will honor only one of:
# CONCURRENT_REQUESTS_PER_DOMAIN = 16
# CONCURRENT_REQUESTS_PER_IP = 16
# Disable cookies (enabled by default)
COOKIES_ENABLED = False
# Disable Telnet Console (enabled by default)
# TELNETCONSOLE_ENABLED = False
# Override the default request headers:
# DEFAULT_REQUEST_HEADERS = {
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
# 'Accept-Language': 'en',
# }
# Enable or disable spider middlewares
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
# SPIDER_MIDDLEWARES = {
# 'currency_follow.middlewares.CurrencyFollowSpiderMiddleware': 543,
# }
# Enable or disable downloader middlewares
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
DOWNLOADER_MIDDLEWARES = {
'currency_follow.middlewares.CloudScraperMiddleware': 543,
}
# Enable or disable extensions
# See https://docs.scrapy.org/en/latest/topics/extensions.html
# EXTENSIONS = {
# 'scrapy.extensions.telnet.TelnetConsole': None,
# }
# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'currency_follow.pipelines.CurrencyFollowPipeline': 300,
}
# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
# AUTOTHROTTLE_ENABLED = True
# The initial download delay
# AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
# AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
# AUTOTHROTTLE_DEBUG = False
# Enable and configure HTTP caching (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
# HTTPCACHE_ENABLED = True
# HTTPCACHE_EXPIRATION_SECS = 0
# HTTPCACHE_DIR = 'httpcache'
# HTTPCACHE_IGNORE_HTTP_CODES = []
# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
# This package will contain the spiders of your Scrapy project
#
# Please refer to the documentation for information on how to create and manage
# your spiders.
import json
import logging
import requests
import scrapy
import xmltodict
from ..items import DefiLockedInfoItem
all_info_data = []
item = DefiLockedInfoItem()
class DefiComingBscSpider(scrapy.Spider):
name = 'defi_coming_bsc'
# allowed_domains = ['www.xx.com']
# start_urls = ['http://www.xx.com/']
def __init__(self, *args, **kwargs):
super(DefiComingBscSpider, self).__init__(*args, **kwargs)
self.env = kwargs.get('env', 'online')
self.start_num = 0
self.dynamic_url = 'https://www.defibox.com/dgg/ranks/v3/all?lang=cn'
self.headers = {
"content-type": "application/json;charset=UTF-8"
}
self.page = 0
self.request_payload_info = {
"page": self.page,
"size": 20,
"chain": "bsc",
"vtoken": "0bca0f1778027e0626d56f4be3a9bb0a",
"field": "locked",
"direction": "DESC"
}
def start_requests(self):
yield scrapy.Request(url=self.dynamic_url, callback=self.parse, method='POST',
body=json.dumps(self.request_payload_info), headers=self.headers)
def parse(self, response):
if response.status == 200:
try:
coverte_json = xmltodict.parse(response.text, encoding='utf-8')
data_json = json.dumps(coverte_json)
data_dict = json.loads(data_json)
data_list = data_dict.get('R').get('data')
except Exception as e:
logging.info("***数据格式错误,解析异常,使用requests抓取json数据 ******")
resp = requests.post(url=self.dynamic_url, json=self.request_payload_info, headers=self.headers)
data_dict = resp.json()
data_list = data_dict.get('data')
if data_list:
# print(json.dumps(data_list))
for data_item in data_list:
defi_item = dict()
defi_item['name'] = data_item['name']
try:
category = data_item.get('category')
if category is None:
defi_item['chain_category_name'] = '其他'
else:
category_name = data_item.get('category').get('category').get('zh')
if category_name is None:
defi_item['chain_category_name'] = '其他'
elif category_name == '其他项目':
defi_item['chain_category_name'] = '其他'
else:
defi_item['chain_category_name'] = category_name
except Exception as e:
category_name = data_item.get('category')[0].get('zh')
if category_name is None:
defi_item['chain_category_name'] = '其他'
elif category_name == '其他项目':
defi_item['chain_category_name'] = '其他'
else:
defi_item['chain_category_name'] = category_name
defi_item['address_24h_num'] = data_item.get('address24')
defi_item['address_change_24h'] = data_item.get('address24Pct')
defi_item['volume_24h_usd'] = float(data_item.get('volume24'))
defi_item['volume_change_24h'] = data_item.get('volume24Pct')
defi_item['locked_usd'] = data_item.get('locked')
defi_item['locked_change_24h'] = data_item.get('lockedPct')
defi_item['chain_currency_id'] = 4
defi_item['website'] = data_item.get('website')
all_info_data.append(defi_item)
if len(data_list) == 20:
self.page += 1
self.request_payload_info = {
"page": self.page,
"size": 20,
"chain": "bsc",
"vtoken": "0bca0f1778027e0626d56f4be3a9bb0a",
"field": "locked",
"direction": "DESC"
}
yield scrapy.Request(url=self.dynamic_url, callback=self.parse, method='POST',
body=json.dumps(self.request_payload_info), headers=self.headers)
else:
item['data'] = all_info_data
yield item
else:
item['data'] = all_info_data
yield item
import json
import logging
import time
import requests
from ..items import DefiLockedInfoItem
import scrapy
import xmltodict
import pprint
all_info_data = []
item = DefiLockedInfoItem()
class DefiComingEthSpider(scrapy.Spider):
name = 'defi_coming_eth'
# allowed_domains = ['www.xx.com']
# start_urls = ['http://www.xx.com/']
def __init__(self, *args, **kwargs):
super(DefiComingEthSpider, self).__init__(*args, **kwargs)
self.env = kwargs.get('env', 'online')
self.start_num = 0
self.dynamic_url = 'https://www.defibox.com/dgg/ranks/v3/all?lang=cn'
self.headers = {
"content-type": "application/json;charset=UTF-8"
}
self.page = 0
self.request_payload_info = {
"page": self.page,
"size": 20,
"chain": "eth",
"vtoken": "0bca0f1778027e0626d56f4be3a9bb0a",
"field": "locked",
"direction": "DESC"
}
def start_requests(self):
yield scrapy.Request(url=self.dynamic_url, callback=self.parse, method='POST',
body=json.dumps(self.request_payload_info), headers=self.headers)
def parse(self, response):
if response.status == 200:
try:
coverte_json = xmltodict.parse(response.text, encoding='utf-8')
data_json = json.dumps(coverte_json)
data_dict = json.loads(data_json)
data_list = data_dict.get('R').get('data')
except Exception as e:
logging.info("***数据格式错误,解析异常,使用requests抓取json数据 ******")
resp = requests.post(url=self.dynamic_url, json=self.request_payload_info, headers=self.headers)
data_dict = resp.json()
data_list = data_dict.get('data')
if data_list:
for data_item in data_list:
defi_item = dict()
defi_item['name'] = data_item['name']
try:
category_name = data_item.get('category').get('category').get('zh')
if category_name is None:
defi_item['chain_category_name'] = '其他'
elif category_name == '其他项目':
defi_item['chain_category_name'] = '其他'
else:
defi_item['chain_category_name'] = category_name
except Exception as e:
category_name = data_item.get('category')[0].get('zh')
if category_name is None:
defi_item['chain_category_name'] = '其他'
elif category_name == '其他项目':
defi_item['chain_category_name'] = '其他'
else:
defi_item['chain_category_name'] = category_name
defi_item['address_24h_num'] = data_item.get('address24')
defi_item['address_change_24h'] = data_item.get('address24Pct')
defi_item['volume_24h_usd'] = float(data_item.get('volume24'))
defi_item['volume_change_24h'] = data_item.get('volume24Pct')
defi_item['locked_usd'] = data_item.get('locked')
defi_item['locked_change_24h'] = data_item.get('lockedPct')
defi_item['chain_currency_id'] = 2
defi_item['website'] = data_item.get('website')
all_info_data.append(defi_item)
if len(data_list) == 20:
self.page += 1
self.request_payload_info = {
"page": self.page,
"size": 20,
"chain": "eth",
"vtoken": "0bca0f1778027e0626d56f4be3a9bb0a",
"field": "locked",
"direction": "DESC"
}
yield scrapy.Request(url=self.dynamic_url, callback=self.parse, method='POST',
body=json.dumps(self.request_payload_info), headers=self.headers)
else:
item['data'] = all_info_data
yield item
else:
item['data'] = all_info_data
yield item
import logging
import scrapy
import json
import requests
import pprint
from ..items import DefiLockedInfoItem
import xmltodict
all_info_data = []
item = DefiLockedInfoItem()
class DefiComingHecoSpider(scrapy.Spider):
name = 'defi_coming_heco'
# allowed_domains = ['www.xx.com']
# start_urls = ['http://www.xx.com/']
def __init__(self, *args, **kwargs):
super(DefiComingHecoSpider, self).__init__(*args, **kwargs)
self.env = kwargs.get('env', 'online')
self.start_num = 0
# self.base_url = 'https://m.defibox.com/-/x/dgg/quote/v2/deficoin/list?lang=cn'
self.base_url = 'https://www.defibox.com/dgg/ranks/v3/all?lang=cn'
self.page = 0
self.request_payload_info = {
'chain': "heco",
'direction': "DESC",
'field': "locked",
'page': self.page,
'size': 20,
'vtoken': "0bca0f1778027e0626d56f4be3a9bb0a"
}
self.headers = {
"content-type": "application/json"
}
def start_requests(self):
yield scrapy.FormRequest(url=self.base_url, callback=self.parse, method='POST',
body=json.dumps(self.request_payload_info), headers=self.headers)
def parse(self, response):
if response.status == 200:
try:
coverte_json = xmltodict.parse(response.text, encoding='utf-8')
data_json = json.dumps(coverte_json)
data_dict = json.loads(data_json)
data_list = data_dict.get('R').get('data')
except Exception as e:
logging.info("***数据格式错误,解析异常,使用requests抓取json数据 ******")
resp = requests.post(url=self.base_url, json=self.request_payload_info, headers=self.headers)
data_dict = resp.json()
data_list = data_dict.get('data')
if data_list:
# print(json.dumps(data_list))
for data_item in data_list:
defi_item = dict()
defi_item['name'] = data_item['name']
try:
if data_item.get('category') is None:
defi_item['chain_category_name'] = '其他'
else:
category_name = data_item.get('category').get('category').get('zh')
if category_name is None:
defi_item['chain_category_name'] = '其他'
elif category_name == '其他项目':
defi_item['chain_category_name'] = '其他'
else:
defi_item['chain_category_name'] = category_name
except Exception as e:
category_name = data_item.get('category')[0].get('zh')
if category_name is None:
defi_item['chain_category_name'] = '其他'
elif category_name == '其他项目':
defi_item['chain_category_name'] = '其他'
else:
defi_item['chain_category_name'] = category_name
defi_item['address_24h_num'] = data_item.get('address24')
defi_item['address_change_24h'] = data_item.get('address24Pct')
defi_item['volume_24h_usd'] = float(data_item.get('volume24'))
defi_item['volume_change_24h'] = data_item.get('volume24Pct')
defi_item['locked_usd'] = data_item.get('locked')
defi_item['locked_change_24h'] = data_item.get('lockedPct')
defi_item['chain_currency_id'] = 3
defi_item['website'] = data_item.get('website')
all_info_data.append(defi_item)
if len(data_list) == 20:
self.page += 1
self.request_payload_info = {
"page": self.page,
"size": 20,
"chain": "heco",
"vtoken": "0bca0f1778027e0626d56f4be3a9bb0a",
"field": "locked",
"direction": "DESC"
}
yield scrapy.Request(url=self.base_url, callback=self.parse, method='POST',
body=json.dumps(self.request_payload_info), headers=self.headers)
else:
item['data'] = all_info_data
yield item
else:
item['data'] = all_info_data
yield item
import scrapy
import json
from ..items import DefiLockedInfoItem
import requests
all_info_data = []
volume_24h_total = []
class DefiLockedEthSpider(scrapy.Spider):
name = 'defi_locked_eth'
start_num = 0
# start_urls = ['https://static1.debank.com/tvl/projects-v2.json']
def __init__(self, *args, **kwargs):
super(DefiLockedEthSpider, self).__init__(*args, **kwargs)
self.env = kwargs.get('env', 'online')
self.tag_url = "https://api.debank.com/project/tag_list"
self.base_url = "https://static1.debank.com/tvl/projects-v2.json"
def start_requests(self):
yield scrapy.Request(url=self.tag_url, callback=self.parse_tag)
def parse_tag(self, response):
if response.status == 200:
tag_dict = {}
tag_dict_list = json.loads(response.text)['data']['tag_list']
for i in tag_dict_list:
if i['id']:
tag_dict[i['id']] = i['name']['ch']
yield scrapy.Request(url=self.base_url, callback=self.parse, meta={'tag_dict': tag_dict})
def parse(self, response):
if response.status == 200:
data_dict = json.loads(response.text)
tag_dict = response.meta['tag_dict']
for data_item in data_dict:
item = DefiLockedInfoItem()
defi_item = dict()
# if data_item['chain']['id'] == "eth":
if data_item['chain'] == "eth":
defi_item['name'] = data_item['name']['en']
defi_item['contract_address'] = data_item.get('id') if data_item.get('id').startswith(
'0x') else '' if data_item.get('platform_token_id') is None else data_item.get(
'platform_token_id')
defi_item['lockup_category_name'] = '其他' if data_item.get('tag_ids') is None else '其他' if len(
data_item.get('tag_ids')) == 0 else tag_dict.get(
data_item.get('tag_ids')[0])
defi_item['volume_24h_usd'] = float(data_item.get('data')[-1].get('locked_usd_value'))
last_locked_amount = data_item.get('data')[-2].get('locked_usd_value')
if not last_locked_amount == 0:
defi_item['percent_change_24h'] = (defi_item[
'volume_24h_usd'] - last_locked_amount) / last_locked_amount
else:
defi_item['percent_change_24h'] = 0
defi_item['lock_currency_id'] = 2
all_info_data.append(defi_item)
volume_24h_total.append(defi_item['volume_24h_usd'])
self.start_num += 1
if self.start_num == 174:
item['data'] = all_info_data
# print(item)
# print('total:', sum(volume_24h_total))
return item
import scrapy
import json
import xmltodict
from ..items import DefiLockedInfoItem
import pprint
all_info_data = []
all_info_dict = {}
class DefiLockedHecoSpider(scrapy.Spider):
name = 'defi_locked_heco'
# allowed_domains = ['www.xx.com']
# start_urls = ['http://www.xx.com/']
def __init__(self, *args, **kwargs):
super(DefiLockedHecoSpider, self).__init__(*args, **kwargs)
self.env = kwargs.get('env', 'online')
self.start_num = 0
self.base_url = 'https://www.defibox.com/dgg/ranks/v3/all?lang=cn'
self.new_url = 'https://m.defibox.com/-/x/dgg/quote/v2/deficoin/list?lang=cn'
self.request_payload_info = {
"page": 0,
"size": 100,
"chain": "heco",
"vtoken": None,
"field": "locked",
"direction": "DESC"
}
self.payload = {"chain": "heco", "rule": "top", "size": 500}
self.headers = {
"content-type": "application/json;charset=UTF-8",
}
def start_requests(self):
yield scrapy.FormRequest(url=self.base_url, callback=self.parse, method='POST',
body=json.dumps(self.request_payload_info), headers=self.headers)
def parse(self, response):
if response.status == 200:
coverte_json = xmltodict.parse(response.text, encoding='utf-8')
data_json = json.dumps(coverte_json)
data_dict = json.loads(data_json)
# pprint.pprint(data_dict)
data_list = data_dict['R']['data']
for data_item in data_list:
extId = data_item['extId']
# defi_item = dict()
name = data_item['name']
# defi_item['contract_address'] = data_item.get('id') if data_item.get('id').startswith(
# '0x') else '' if data_item.get('platform_token_id') is None else data_item.get(
# 'platform_token_id')
try:
lockup_category_name = '其他' if data_item.get('category').get('category').get(
'zh') is None else data_item.get('category').get('category').get('zh')
except:
lockup_category_name = '其他'
all_info_dict[extId] = {'name': name, 'lockup_category_name': lockup_category_name}
# defi_item['volume_24h_usd'] = float(data_item.get('locked'))
# # last_locked_amount = data_item.get('locked')
# defi_item['percent_change_24h'] = data_item['lockedPct']
# defi_item['lock_currency_id'] = 3
yield scrapy.FormRequest(url=self.new_url, method='POST', callback=self.parse_data,
body=json.dumps(self.payload), headers=self.headers)
def parse_data(self, response):
if response.status == 200:
coverte_json = xmltodict.parse(response.text, encoding='utf-8')
data_json = json.dumps(coverte_json)
data_dict = json.loads(data_json)
# pprint.pprint(data_dict)
data_list = data_dict['R']['data']
for data_item in data_list:
defi_info_item = dict()
item = DefiLockedInfoItem()
extId = data_item['extId']
extId_1 = all_info_dict.get(extId)
if extId_1:
defi_info_item['name'] = all_info_dict.get(extId)['name']
defi_info_item['lockup_category_name'] = all_info_dict[extId]['lockup_category_name']
else:
defi_info_item['name'] = data_item['name'].capitalize()
defi_info_item['lockup_category_name'] = '其他'
defi_info_item['volume_24h_usd'] = float(data_item.get('locked'))
defi_info_item['percent_change_24h'] = data_item['priceChangePercentage24h']
defi_info_item['lock_currency_id'] = 3
all_info_data.append(defi_info_item)
self.start_num += 1
if self.start_num == len(data_list):
item['data'] = all_info_data
# print(sum([data.get('volume_24h_usd') for data in all_info_data]))
# print(item)
return item
import json
import logging
import re
import cloudscraper
import scrapy
from ..items import GrayscaleTrustItem
class GrayscaleTrustSpider(scrapy.Spider):
name = 'grayscale_trust'
# allowed_domains = ['www.cx.com']
# start_urls = ['http://www.cx.com/']
def __init__(self, *args, **kwargs):
super(GrayscaleTrustSpider, self).__init__(*args, **kwargs)
self.env = kwargs.get('env', 'online')
self.base_url = "https://grayscale.co/{}-trust/"
self.currency_info_list = [("basic-attention-token", 37, "bat"), ("bitcoin", 28, "btc"),
("bitcoin-cash", 33, "bch"),
("chainlink", 38, "link"), ('decentraland', 40, "mana"), ("ethereum", 29, "eth"),
("ethereum-classic", 32, "etc"), ('filecoin', 39, "fil"), ("horizen", 34, "zen"),
("litecoin", 30, "ltc"), ('livepeer', 41, "lpt"), ("stellar-lumens", 36, "lum"),
("zcash", 35, "zch")]
self.scraper = cloudscraper.create_scraper()
# self.scraper = cfscrape.create_scraper()
def start_requests(self):
for currency_info in self.currency_info_list:
currency_re_url = self.base_url.format(currency_info[0])
yield scrapy.Request(url=currency_re_url, callback=self.parse,
meta={"currency_name": currency_info[0], "currency_id": currency_info[1],
"pet_name": currency_info[2]})
def parse(self, response):
currency_name = response.meta['currency_name']
currency_id = response.meta['currency_id']
pet_name = response.meta['pet_name']
if response.status == 200:
item = GrayscaleTrustItem()
try:
shares_Outstanding = response.xpath(
'//span[@data-title="Shares Outstanding"]/text()').get()[:-1]
currency_per_Share = response.xpath(
'//span[@data-title="Token per Share"]/text()').get()[:-1]
shares_Outstanding = int(''.join(shares_Outstanding.split(',')))
currency_per_Share = float(currency_per_Share)
# json_data = json.loads(
# re.search(r'chart_data_(.*?)window', response.text, re.S | re.I).group(1).split('=')[-1][:-2])
json_str = re.search(r'window\[\'chart_data_.*?\'] = (.*?);', response.text, re.S | re.I).group(1)
json_data = json.loads(json_str)
market_price_per_share = json_data["Benchmark"][-1]['y']
if currency_id in (28, 29, 30, 33, 32):
currency_etf = float(market_price_per_share) / float(currency_per_Share)
else:
currency_etf = 0
except Exception as e:
# print('-----------------------------------------', currency_name)
logging.error("{}页面解析出错,原因:{}".format(currency_name, e))
else:
item['currency_id'] = currency_id
item['currency_name'] = currency_name
item['currency_pet_name'] = pet_name
item['currency_data'] = shares_Outstanding * currency_per_Share
item['currency_etf'] = currency_etf
yield item
import json
import time
import scrapy
from currency_follow import utils
from currency_follow.items import BitcoinTokenholdersTop100Item, BitcoinTokenHolderItem
class TokenholdersStdTop100Spider(scrapy.Spider):
name = 'tokenholders_STD_top100'
start_urls = ['http://47.57.70.99:8080/std/api/getTop100']
def __init__(self, **kwargs):
super(TokenholdersStdTop100Spider, self).__init__(**kwargs)
self.env = kwargs.get('env', 'online')
self.cpid, self.cid = 11750, 356455
def parse(self, response):
data_raws = json.loads(response.body.decode())
item = BitcoinTokenholdersTop100Item()
cpid, cid = self.cpid, self.cid
bitem = BitcoinTokenHolderItem()
holder_list = []
rank = 1
currency_price_usd = utils.get_currency_detail(cid).get('price_usd')
for row in data_raws['data']['data']:
row_dict = dict()
row_dict['holder_rank'] = rank
row_dict['address'] = row['address']
row_dict['quantity'] = row['balance']
row_dict['percentage'] = row['percent']
row_dict['currency_id'] = cid
row_dict['source_id'] = 3
row_dict['note'] = ''
row_dict['value'] = row['balance'] * currency_price_usd
holder_list.append(row_dict)
rank += 1
item['holder_list'] = holder_list
# # 获取持币地址数
bitem['contract_address'] = 'address'
bitem['holders'] = data_raws['data']['holders']
bitem['holder_chart_url'] = 'https://explorer.standardchain.info/'
bitem['currency_project_id'] = cpid
bitem['utc0_date'] = int(time.time())
item['bitcoin_tokenholders'] = bitem
yield item
import json
import logging
import time
import grequests as grequests
import requests
from scrapy.utils.project import get_project_settings
from .settings import ApiUrl
# 比特币的
def update_bitcoin_top100(item):
holder_list = item['holder_list']
# 这里间接调用 新增持币地址数 方法。
bitcoin_tokenholders = item['bitcoin_tokenholders']
if bitcoin_tokenholders:
update_etherscan_holders_info(bitcoin_tokenholders)
if len(holder_list) > 0:
request_list = []
for row_dict in holder_list:
rs = grequests.post(ApiUrl.UPLOAD_ETHERSCAN_TOP100, data=dict(row_dict))
request_list.append(rs)
try:
ret = grequests.map(request_list, size=50, exception_handler=exception_handler)
success_count = 0
failed_count = 0
for r in ret:
if r.status_code == 200 and r.json()['code'] == 0:
success_count += 1
else:
failed_count += 1
logging.info(
"***update_bitcoin_top100 failed! currency_id:{}, len_holder_list: {},ret_data:{}".format(
holder_list[0]['currency_id'],
len(holder_list),
r.content.decode()))
logging.info(
'***update_bitcoin_top100 status: currency_id:{}, len_holder_list: {}, success_count:{}, '
'failed_count:{}'.format(
holder_list[0]['currency_id'], len(holder_list), success_count, failed_count))
except Exception as e:
logging.info("***update_bitcoin_top100 Exception:{}, holder_list:{}".format(e, holder_list))
def update_etherscan_holders_info(item):
r = requests.post(ApiUrl.UPLOAD_ETHERSCAN_INFO, data=dict(item))
if r.status_code == 200 and r.json()['code'] == 0:
logging.info('***etherscan_holders update success: item: {}, ret_data:{}'.format(
item, r.content.decode()))
def get_currency_detail(currency_id):
r = requests.get(ApiUrl.GET_CURRENCY_DETAIL.format(currency_id))
if r.status_code == 200 and r.json().get('code') == 0:
return r.json().get('data')
def update_defi_locked_info(item):
r = requests.post(ApiUrl.UPLOAD_DEFI_LOCKED_INFO, data=json.dumps(item['data']))
if r.status_code == 200 and r.json().get('code') == 0:
logging.info('***defi_locked_info update success: ret_data:{}'.format(r.content.decode()))
else:
post_to_wexin_qwxchat('项目:defi_locked_info 更新失败,ret_data:{}'.format(r.content.decode()))
def update_defi_overview(item):
up_json = json.dumps(item['data'])
# pprint.pprint(up_json)
# print(up_json)
r = requests.post(ApiUrl.UPLOAD_DEFI_OVERVIEW, data=up_json)
if r.status_code == 200 and r.json().get('code') == 0:
print('***defi_overview_info update success: ret_data:{}'.format(r.content.decode()))
logging.info('***defi_overview_info update success: ret_data:{}'.format(r.content.decode()))
else:
print('***项目:defi_overview_info 更新失败,ret_data:{}'.format(r.content.decode()))
post_to_wexin_qwxchat('项目:defi_overview_info 更新失败,ret_data:{}'.format(r.content.decode()))
def update_grayscale_trust(item):
try:
ts = int(time.time()) - 86400
r = requests.post(
ApiUrl.UPLOAD_GRAYSCALE_TRUST + "?ts={}&c={}&market_index_id={}".format(ts, item['currency_data'],
item['currency_id']))
logging.info(
'***update success:currency:{}, ret_data:{}, item:{}'.format(item['currency_name'], r.json(), item))
except Exception as e:
logging.info('***Exception:{}, item:{}'.format(e, item))
def post_to_wexin_qwxchat(content):
url = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send"
querystring = {"key": "67e59f56-81c7-4bf7-b35d-3bc7068981a6"}
payload = {
"msgtype": "text",
"text": {
"content": content,
"mentioned_list": [],
"mentioned_mobile_list": []
}
}
headers = {
'Content-Type': "application/json",
'Cache-Control': "no-cache",
'Host': "qyapi.weixin.qq.com",
'Accept-Encoding': "gzip, deflate",
'Content-Length': "359",
'cache-control': "no-cache"
}
resp = requests.request("POST", url, data=json.dumps(payload), headers=headers, params=querystring)
if resp.json()['errmsg'] == 'ok':
print('发送成功')
def exception_handler(request, exception):
logging.info(
"***exception_handler,update_etherscan_top100 faile, exception:{}".format(exception))
# Define here the models for your scraped items
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/items.html
import scrapy
class BitcoinTokenholdersTop100Item(scrapy.Item):
holder_list = scrapy.Field()
bitcoin_tokenholders = scrapy.Field()
class BitcoinTokenHolderItem(scrapy.Item):
holders = scrapy.Field()
holder_chart_url = scrapy.Field()
contract_address = scrapy.Field()
currency_project_id = scrapy.Field()
utc0_date = scrapy.Field()
class DefiLockedInfoItem(scrapy.Item):
name = scrapy.Field()
id = scrapy.Field()
contract_address = scrapy.Field()
lockup_category_name = scrapy.Field()
volume_24h_usd = scrapy.Field()
percent_change_24h = scrapy.Field()
proportion = scrapy.Field()
currency_id = scrapy.Field()
# item_num = scrapy.Field()
data = scrapy.Field()
ts = scrapy.Field()
volume_usd = scrapy.Field()
class GrayscaleTrustItem(scrapy.Item):
currency_data = scrapy.Field()
currency_name = scrapy.Field()
currency_pet_name = scrapy.Field()
currency_id = scrapy.Field()
currency_etf = scrapy.Field()
# Define here the models for your spider middleware
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
# useful for handling different item types with a single interface
from scrapy.http import HtmlResponse
class CloudScraperMiddleware:
def process_response(self, request, response, spider):
if response.status == 403:
if spider.name == "grayscale_trust":
url = request.url
req = spider.scraper.get(url, headers={'referer': url})
# time.sleep(5)
return HtmlResponse(url=url, body=req.text, encoding="utf-8", request=request)
return response
\ No newline at end of file
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://docs.scrapy.org/en/latest/topics/item-pipeline.html
# useful for handling different item types with a single interface
import redis
from . import utils
from .utils import update_grayscale_trust
class CurrencyFollowPipeline:
def __init__(self):
super(CurrencyFollowPipeline, self).__init__()
self.eth20_redis = redis.StrictRedis(host='mt-cache.redis.rds.aliyuncs.com', db=0)
# self.eth20_redis = redis.StrictRedis(host='localhost', db=0)
def process_item(self, item, spider):
if spider.name.startswith('grayscale'):
self.process_grayscale_trust(item, spider)
return item
if spider.name in ['defi_coming_eth', 'defi_coming_heco', 'defi_coming_bsc']:
self.process_defi_overview(item, spider)
if spider.name in ['defi_locked_eth', 'defi_locked_heco']:
self.process_defi_locked_info(item, spider)
if spider.name in ['bitcoin_tokenholders_top100', 'tokenholders_STD_top100']:
self.process_bitcoin_tokenholders_top100(item, spider)
@staticmethod
def process_bitcoin_tokenholders_top100(item, spider):
if spider.env == 'demo':
spider.logger.info(item)
if spider.env == 'online':
utils.update_bitcoin_top100(item)
def process_grayscale_trust(self, item, spider):
if spider.env == 'demo':
print(item)
if spider.env == 'online':
self.eth20_redis.set("gray:grayscale_trust_" + item['currency_pet_name'], str(item['currency_etf']))
utils.update_grayscale_trust(item)
# print("成功post", item)
@staticmethod
def process_defi_overview(item, spider):
if spider.env == 'demo':
spider.logger.info(item)
# utils.write_csv(item)
if spider.env == 'online':
utils.update_defi_overview(item)
@staticmethod
def process_defi_locked_info(item, spider):
if spider.env == 'demo':
spider.logger.info(item)
# utils.write_csv(item)
if spider.env == 'online':
utils.update_defi_locked_info(item)
\ No newline at end of file
from scrapy import cmdline
name = "tokenholders_STD_top100"
cmd = 'scrapy crawl {0} -a env=demo'.format(name)
# cmd = 'scrapy crawl {0} -a env=online'.format(name)
cmdline.execute(cmd.split())
# Scrapy settings for currency_follow project
#
# For simplicity, this file contains only settings considered important or
# commonly used. You can find more settings consulting the documentation:
#
# https://docs.scrapy.org/en/latest/topics/settings.html
# https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
# https://docs.scrapy.org/en/latest/topics/spider-middleware.html
import os
BOT_NAME = 'currency_follow'
SPIDER_MODULES = ['currency_follow.spiders']
NEWSPIDER_MODULE = 'currency_follow.spiders'
REDIS_HOST_DOCKER = os.getenv('REDIS_HOST', 'r-j6cd64854a7eed94.redis.rds.aliyuncs.com')
REDIS_CACHE = 'mt-core-data.redis.rds.aliyuncs.com'
REDIS_PORT = '6379'
# Crawl responsibly by identifying yourself (and your website) on the user-agent
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) ' \
'Chrome/90.0.4430.85 Safari/537.36 '
# Obey robots.txt rules
ROBOTSTXT_OBEY = False
LOG_LEVEL = 'INFO'
class ApiUrl(object):
API_BASE = os.getenv('API_BASE_HOST', 'http://172.16.0.147:12306') # 正式环境
REDIS_CORE = os.getenv('REDIS_CORE', 'mt-core-data.redis.rds.aliyuncs.com')
API_007 = os.getenv('API_NEW_ALFRED_HOST', 'http://dataapi.mytokenapi.com')
UPLOAD_GRAYSCALE_TRUST = '{}/marketindex/create'.format(API_BASE)
UPLOAD_BITCOIN_MININ_DATA = '{}/mining/create'.format('http://172.16.6.98:12306')
# API_BASE = "http://beta.internal.mytokenio.com" # 办公司访问 香港阿里云测试环境
API_BASE_BETA = 'http://172.17.1.63:12306' # 香港阿里云测试环境
API_BASE_PRE = 'http://172.16.6.98:12306' # 香港阿里云PRE环境
# API_BASE = "http://beta.internal.mytoken-local.com" # 香港阿里云测试环境
# API_BASE = 'http://hkbeta.manageapi.mytoken.org' # 香港阿里云测试环境
# API_BASE = 'http://172.16.0.147:12306' # 香港阿里云正式环境(备份)
# API_BASE = 'http://52.56.166.200:12306' # AWS正式环境(废弃)
# API_BASE = 'http://35.176.110.161:12306' # AWS测试环境(废弃)
# 上传新闻资讯接口:
UPLOAD_FOREIGN_MEDIA = '{}/news/create'.format(API_BASE)
# 获取currency(项目)信息接口:
GET_SOCIAL_TARGET = API_BASE + '/currencyproject/list?page_num={}&page_limit=200'
# 获取currency_detail
GET_CURRENCY_DETAIL = API_BASE + '/currency/currencydetail?market_id=1303&currency_id={}'
# 上传currency(项目方)社交媒体数据:
UPLOAD_SOCIAL_INFO = '{}/currencysummary/create'.format(API_BASE)
# 获取交易所信息接口:
# UPLOAD_USDT_VOL = '{}/usdtvol/updateorinsert'.format(API_BASE)
UPLOAD_USDT_VOL = '{}/usdtvol/insert'.format(API_BASE)
MARKET_LIST = '{}/market/marketlist?source=script'.format(API_BASE)
# 上传交易所排名基础数据
UPLOAD_MARKET_RANK = '{}/marketrank/bcreate'.format(API_BASE)
# 上传defi数据
UPLOAD_DEFI_LOCKED_INFO = '{}/defi/lockupupsert'.format(API_BASE)
UPLOAD_DEFI_OVERVIEW = '{}/deficity/currencycreate'.format(API_BASE_PRE)
UPLOAD_ETHERSCAN_TOP100 = '{}/currencyethereumaddress/create'.format(API_BASE)
UPLOAD_ETHERSCAN_INFO = '{}/currencyproject/blockchainupdate'.format(API_BASE)
UPLOAD_DEFI_DEX = '{}/defi/dexupupsert'.format(API_BASE)
UPLOAD_DEFI_DEBT = '{}/defi/loanupupsert'.format(API_BASE)
UPLOAD_DEFI_RATE = '{}/defi/ratecreate'.format(API_BASE)
# 上传NFT数据
UPLOAD_NFT_INFO = '{}/nft/create'.format(API_BASE_PRE)
# 获取mytoken
GET_MYTOKEN = "{}/common/getqntoken".format(API_BASE)
# 上传快讯
UPLOAD_NEWS_FLASH = "{}/newsflash/add".format(API_BASE)
GET_ETHERSCAN_TARGET = API_BASE + '/currencyproject/list?page_limit=100&page_num={}'
# 链接redis
REDIS_DATA = os.getenv("REDIS_DATA", 'r-j6cd64854a7eed94.redis.rds.aliyuncs.com')
REDIS_DATA_DB = int(os.getenv("REDIS_DATA_DB", 7))
REDIS_TTL = int(os.getenv("REDIS_TTL", 900))
REDIS_KEY = os.getenv("REDIS_KEY", "media_cache_{}:{}")
ERROR_ACCOUNT_KEY = os.getenv("ERROR_ACCOUNT_KEY", "error_account:{}:{}")
UPLOAD_ETHERSCAN_ADDRESS_OVERVIEW = '{}/exchangebalancesnapshot/create'.format(API_BASE)
GET_ALL_ADDRESS = 'http://172.16.3.188:8001/getAllAddresses?type={}&enabled=1'
# Configure maximum concurrent requests performed by Scrapy (default: 16)
# CONCURRENT_REQUESTS = 32
# Configure a delay for requests for the same website (default: 0)
# See https://docs.scrapy.org/en/latest/topics/settings.html#download-delay
# See also autothrottle settings and docs
# DOWNLOAD_DELAY = 3
# The download delay setting will honor only one of:
# CONCURRENT_REQUESTS_PER_DOMAIN = 16
# CONCURRENT_REQUESTS_PER_IP = 16
# Disable cookies (enabled by default)
COOKIES_ENABLED = False
# Disable Telnet Console (enabled by default)
# TELNETCONSOLE_ENABLED = False
# Override the default request headers:
# DEFAULT_REQUEST_HEADERS = {
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
# 'Accept-Language': 'en',
# }
# Enable or disable spider middlewares
# See https://docs.scrapy.org/en/latest/topics/spider-middleware.html
# SPIDER_MIDDLEWARES = {
# 'currency_follow.middlewares.CurrencyFollowSpiderMiddleware': 543,
# }
# Enable or disable downloader middlewares
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html
DOWNLOADER_MIDDLEWARES = {
'currency_follow.middlewares.CloudScraperMiddleware': 543,
}
# Enable or disable extensions
# See https://docs.scrapy.org/en/latest/topics/extensions.html
# EXTENSIONS = {
# 'scrapy.extensions.telnet.TelnetConsole': None,
# }
# Configure item pipelines
# See https://docs.scrapy.org/en/latest/topics/item-pipeline.html
ITEM_PIPELINES = {
'currency_follow.pipelines.CurrencyFollowPipeline': 300,
}
# Enable and configure the AutoThrottle extension (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/autothrottle.html
# AUTOTHROTTLE_ENABLED = True
# The initial download delay
# AUTOTHROTTLE_START_DELAY = 5
# The maximum download delay to be set in case of high latencies
# AUTOTHROTTLE_MAX_DELAY = 60
# The average number of requests Scrapy should be sending in parallel to
# each remote server
# AUTOTHROTTLE_TARGET_CONCURRENCY = 1.0
# Enable showing throttling stats for every response received:
# AUTOTHROTTLE_DEBUG = False
# Enable and configure HTTP caching (disabled by default)
# See https://docs.scrapy.org/en/latest/topics/downloader-middleware.html#httpcache-middleware-settings
# HTTPCACHE_ENABLED = True
# HTTPCACHE_EXPIRATION_SECS = 0
# HTTPCACHE_DIR = 'httpcache'
# HTTPCACHE_IGNORE_HTTP_CODES = []
# HTTPCACHE_STORAGE = 'scrapy.extensions.httpcache.FilesystemCacheStorage'
# This package will contain the spiders of your Scrapy project
#
# Please refer to the documentation for information on how to create and manage
# your spiders.
import json
import logging
import requests
import scrapy
import xmltodict
from ..items import DefiLockedInfoItem
all_info_data = []
item = DefiLockedInfoItem()
class DefiComingBscSpider(scrapy.Spider):
name = 'defi_coming_bsc'
# allowed_domains = ['www.xx.com']
# start_urls = ['http://www.xx.com/']
def __init__(self, *args, **kwargs):
super(DefiComingBscSpider, self).__init__(*args, **kwargs)
self.env = kwargs.get('env', 'online')
self.start_num = 0
self.dynamic_url = 'https://www.defibox.com/dgg/ranks/v3/all?lang=cn'
self.headers = {
"content-type": "application/json;charset=UTF-8"
}
self.page = 0
self.request_payload_info = {
"page": self.page,
"size": 20,
"chain": "bsc",
"vtoken": "0bca0f1778027e0626d56f4be3a9bb0a",
"field": "locked",
"direction": "DESC"
}
def start_requests(self):
yield scrapy.Request(url=self.dynamic_url, callback=self.parse, method='POST',
body=json.dumps(self.request_payload_info), headers=self.headers)
def parse(self, response):
if response.status == 200:
try:
coverte_json = xmltodict.parse(response.text, encoding='utf-8')
data_json = json.dumps(coverte_json)
data_dict = json.loads(data_json)
data_list = data_dict.get('R').get('data')
except Exception as e:
logging.info("***数据格式错误,解析异常,使用requests抓取json数据 ******")
resp = requests.post(url=self.dynamic_url, json=self.request_payload_info, headers=self.headers)
data_dict = resp.json()
data_list = data_dict.get('data')
if data_list:
# print(json.dumps(data_list))
for data_item in data_list:
defi_item = dict()
defi_item['name'] = data_item['name']
try:
category = data_item.get('category')
if category is None:
defi_item['chain_category_name'] = '其他'
else:
category_name = data_item.get('category').get('category').get('zh')
if category_name is None:
defi_item['chain_category_name'] = '其他'
elif category_name == '其他项目':
defi_item['chain_category_name'] = '其他'
else:
defi_item['chain_category_name'] = category_name
except Exception as e:
category_name = data_item.get('category')[0].get('zh')
if category_name is None:
defi_item['chain_category_name'] = '其他'
elif category_name == '其他项目':
defi_item['chain_category_name'] = '其他'
else:
defi_item['chain_category_name'] = category_name
defi_item['address_24h_num'] = data_item.get('address24')
defi_item['address_change_24h'] = data_item.get('address24Pct')
defi_item['volume_24h_usd'] = float(data_item.get('volume24'))
defi_item['volume_change_24h'] = data_item.get('volume24Pct')
defi_item['locked_usd'] = data_item.get('locked')
defi_item['locked_change_24h'] = data_item.get('lockedPct')
defi_item['chain_currency_id'] = 4
defi_item['website'] = data_item.get('website')
all_info_data.append(defi_item)
if len(data_list) == 20:
self.page += 1
self.request_payload_info = {
"page": self.page,
"size": 20,
"chain": "bsc",
"vtoken": "0bca0f1778027e0626d56f4be3a9bb0a",
"field": "locked",
"direction": "DESC"
}
yield scrapy.Request(url=self.dynamic_url, callback=self.parse, method='POST',
body=json.dumps(self.request_payload_info), headers=self.headers)
else:
item['data'] = all_info_data
yield item
else:
item['data'] = all_info_data
yield item
import json
import logging
import time
import requests
from ..items import DefiLockedInfoItem
import scrapy
import xmltodict
import pprint
all_info_data = []
item = DefiLockedInfoItem()
class DefiComingEthSpider(scrapy.Spider):
name = 'defi_coming_eth'
# allowed_domains = ['www.xx.com']
# start_urls = ['http://www.xx.com/']
def __init__(self, *args, **kwargs):
super(DefiComingEthSpider, self).__init__(*args, **kwargs)
self.env = kwargs.get('env', 'online')
self.start_num = 0
self.dynamic_url = 'https://www.defibox.com/dgg/ranks/v3/all?lang=cn'
self.headers = {
"content-type": "application/json;charset=UTF-8"
}
self.page = 0
self.request_payload_info = {
"page": self.page,
"size": 20,
"chain": "eth",
"vtoken": "0bca0f1778027e0626d56f4be3a9bb0a",
"field": "locked",
"direction": "DESC"
}
def start_requests(self):
yield scrapy.Request(url=self.dynamic_url, callback=self.parse, method='POST',
body=json.dumps(self.request_payload_info), headers=self.headers)
def parse(self, response):
if response.status == 200:
try:
coverte_json = xmltodict.parse(response.text, encoding='utf-8')
data_json = json.dumps(coverte_json)
data_dict = json.loads(data_json)
data_list = data_dict.get('R').get('data')
except Exception as e:
logging.info("***数据格式错误,解析异常,使用requests抓取json数据 ******")
resp = requests.post(url=self.dynamic_url, json=self.request_payload_info, headers=self.headers)
data_dict = resp.json()
data_list = data_dict.get('data')
if data_list:
for data_item in data_list:
defi_item = dict()
defi_item['name'] = data_item['name']
try:
category_name = data_item.get('category').get('category').get('zh')
if category_name is None:
defi_item['chain_category_name'] = '其他'
elif category_name == '其他项目':
defi_item['chain_category_name'] = '其他'
else:
defi_item['chain_category_name'] = category_name
except Exception as e:
category_name = data_item.get('category')[0].get('zh')
if category_name is None:
defi_item['chain_category_name'] = '其他'
elif category_name == '其他项目':
defi_item['chain_category_name'] = '其他'
else:
defi_item['chain_category_name'] = category_name
defi_item['address_24h_num'] = data_item.get('address24')
defi_item['address_change_24h'] = data_item.get('address24Pct')
defi_item['volume_24h_usd'] = float(data_item.get('volume24'))
defi_item['volume_change_24h'] = data_item.get('volume24Pct')
defi_item['locked_usd'] = data_item.get('locked')
defi_item['locked_change_24h'] = data_item.get('lockedPct')
defi_item['chain_currency_id'] = 2
defi_item['website'] = data_item.get('website')
all_info_data.append(defi_item)
if len(data_list) == 20:
self.page += 1
self.request_payload_info = {
"page": self.page,
"size": 20,
"chain": "eth",
"vtoken": "0bca0f1778027e0626d56f4be3a9bb0a",
"field": "locked",
"direction": "DESC"
}
yield scrapy.Request(url=self.dynamic_url, callback=self.parse, method='POST',
body=json.dumps(self.request_payload_info), headers=self.headers)
else:
item['data'] = all_info_data
yield item
else:
item['data'] = all_info_data
yield item
import logging
import scrapy
import json
import requests
import pprint
from ..items import DefiLockedInfoItem
import xmltodict
all_info_data = []
item = DefiLockedInfoItem()
class DefiComingHecoSpider(scrapy.Spider):
name = 'defi_coming_heco'
# allowed_domains = ['www.xx.com']
# start_urls = ['http://www.xx.com/']
def __init__(self, *args, **kwargs):
super(DefiComingHecoSpider, self).__init__(*args, **kwargs)
self.env = kwargs.get('env', 'online')
self.start_num = 0
# self.base_url = 'https://m.defibox.com/-/x/dgg/quote/v2/deficoin/list?lang=cn'
self.base_url = 'https://www.defibox.com/dgg/ranks/v3/all?lang=cn'
self.page = 0
self.request_payload_info = {
'chain': "heco",
'direction': "DESC",
'field': "locked",
'page': self.page,
'size': 20,
'vtoken': "0bca0f1778027e0626d56f4be3a9bb0a"
}
self.headers = {
"content-type": "application/json"
}
def start_requests(self):
yield scrapy.FormRequest(url=self.base_url, callback=self.parse, method='POST',
body=json.dumps(self.request_payload_info), headers=self.headers)
def parse(self, response):
if response.status == 200:
try:
coverte_json = xmltodict.parse(response.text, encoding='utf-8')
data_json = json.dumps(coverte_json)
data_dict = json.loads(data_json)
data_list = data_dict.get('R').get('data')
except Exception as e:
logging.info("***数据格式错误,解析异常,使用requests抓取json数据 ******")
resp = requests.post(url=self.base_url, json=self.request_payload_info, headers=self.headers)
data_dict = resp.json()
data_list = data_dict.get('data')
if data_list:
# print(json.dumps(data_list))
for data_item in data_list:
defi_item = dict()
defi_item['name'] = data_item['name']
try:
if data_item.get('category') is None:
defi_item['chain_category_name'] = '其他'
else:
category_name = data_item.get('category').get('category').get('zh')
if category_name is None:
defi_item['chain_category_name'] = '其他'
elif category_name == '其他项目':
defi_item['chain_category_name'] = '其他'
else:
defi_item['chain_category_name'] = category_name
except Exception as e:
category_name = data_item.get('category')[0].get('zh')
if category_name is None:
defi_item['chain_category_name'] = '其他'
elif category_name == '其他项目':
defi_item['chain_category_name'] = '其他'
else:
defi_item['chain_category_name'] = category_name
defi_item['address_24h_num'] = data_item.get('address24')
defi_item['address_change_24h'] = data_item.get('address24Pct')
defi_item['volume_24h_usd'] = float(data_item.get('volume24'))
defi_item['volume_change_24h'] = data_item.get('volume24Pct')
defi_item['locked_usd'] = data_item.get('locked')
defi_item['locked_change_24h'] = data_item.get('lockedPct')
defi_item['chain_currency_id'] = 3
defi_item['website'] = data_item.get('website')
all_info_data.append(defi_item)
if len(data_list) == 20:
self.page += 1
self.request_payload_info = {
"page": self.page,
"size": 20,
"chain": "heco",
"vtoken": "0bca0f1778027e0626d56f4be3a9bb0a",
"field": "locked",
"direction": "DESC"
}
yield scrapy.Request(url=self.base_url, callback=self.parse, method='POST',
body=json.dumps(self.request_payload_info), headers=self.headers)
else:
item['data'] = all_info_data
yield item
else:
item['data'] = all_info_data
yield item
import scrapy
import json
from ..items import DefiLockedInfoItem
import requests
all_info_data = []
volume_24h_total = []
class DefiLockedEthSpider(scrapy.Spider):
name = 'defi_locked_eth'
start_num = 0
# start_urls = ['https://static1.debank.com/tvl/projects-v2.json']
def __init__(self, *args, **kwargs):
super(DefiLockedEthSpider, self).__init__(*args, **kwargs)
self.env = kwargs.get('env', 'online')
self.tag_url = "https://api.debank.com/project/tag_list"
self.base_url = "https://static1.debank.com/tvl/projects-v2.json"
def start_requests(self):
yield scrapy.Request(url=self.tag_url, callback=self.parse_tag)
def parse_tag(self, response):
if response.status == 200:
tag_dict = {}
tag_dict_list = json.loads(response.text)['data']['tag_list']
for i in tag_dict_list:
if i['id']:
tag_dict[i['id']] = i['name']['ch']
yield scrapy.Request(url=self.base_url, callback=self.parse, meta={'tag_dict': tag_dict})
def parse(self, response):
if response.status == 200:
data_dict = json.loads(response.text)
tag_dict = response.meta['tag_dict']
for data_item in data_dict:
item = DefiLockedInfoItem()
defi_item = dict()
# if data_item['chain']['id'] == "eth":
if data_item['chain'] == "eth":
defi_item['name'] = data_item['name']['en']
defi_item['contract_address'] = data_item.get('id') if data_item.get('id').startswith(
'0x') else '' if data_item.get('platform_token_id') is None else data_item.get(
'platform_token_id')
defi_item['lockup_category_name'] = '其他' if data_item.get('tag_ids') is None else '其他' if len(
data_item.get('tag_ids')) == 0 else tag_dict.get(
data_item.get('tag_ids')[0])
defi_item['volume_24h_usd'] = float(data_item.get('data')[-1].get('locked_usd_value'))
last_locked_amount = data_item.get('data')[-2].get('locked_usd_value')
if not last_locked_amount == 0:
defi_item['percent_change_24h'] = (defi_item[
'volume_24h_usd'] - last_locked_amount) / last_locked_amount
else:
defi_item['percent_change_24h'] = 0
defi_item['lock_currency_id'] = 2
all_info_data.append(defi_item)
volume_24h_total.append(defi_item['volume_24h_usd'])
self.start_num += 1
if self.start_num == 174:
item['data'] = all_info_data
# print(item)
# print('total:', sum(volume_24h_total))
return item
import scrapy
import json
import xmltodict
from ..items import DefiLockedInfoItem
import pprint
all_info_data = []
all_info_dict = {}
class DefiLockedHecoSpider(scrapy.Spider):
name = 'defi_locked_heco'
# allowed_domains = ['www.xx.com']
# start_urls = ['http://www.xx.com/']
def __init__(self, *args, **kwargs):
super(DefiLockedHecoSpider, self).__init__(*args, **kwargs)
self.env = kwargs.get('env', 'online')
self.start_num = 0
self.base_url = 'https://www.defibox.com/dgg/ranks/v3/all?lang=cn'
self.new_url = 'https://m.defibox.com/-/x/dgg/quote/v2/deficoin/list?lang=cn'
self.request_payload_info = {
"page": 0,
"size": 100,
"chain": "heco",
"vtoken": None,
"field": "locked",
"direction": "DESC"
}
self.payload = {"chain": "heco", "rule": "top", "size": 500}
self.headers = {
"content-type": "application/json;charset=UTF-8",
}
def start_requests(self):
yield scrapy.FormRequest(url=self.base_url, callback=self.parse, method='POST',
body=json.dumps(self.request_payload_info), headers=self.headers)
def parse(self, response):
if response.status == 200:
coverte_json = xmltodict.parse(response.text, encoding='utf-8')
data_json = json.dumps(coverte_json)
data_dict = json.loads(data_json)
# pprint.pprint(data_dict)
data_list = data_dict['R']['data']
for data_item in data_list:
extId = data_item['extId']
# defi_item = dict()
name = data_item['name']
# defi_item['contract_address'] = data_item.get('id') if data_item.get('id').startswith(
# '0x') else '' if data_item.get('platform_token_id') is None else data_item.get(
# 'platform_token_id')
try:
lockup_category_name = '其他' if data_item.get('category').get('category').get(
'zh') is None else data_item.get('category').get('category').get('zh')
except:
lockup_category_name = '其他'
all_info_dict[extId] = {'name': name, 'lockup_category_name': lockup_category_name}
# defi_item['volume_24h_usd'] = float(data_item.get('locked'))
# # last_locked_amount = data_item.get('locked')
# defi_item['percent_change_24h'] = data_item['lockedPct']
# defi_item['lock_currency_id'] = 3
yield scrapy.FormRequest(url=self.new_url, method='POST', callback=self.parse_data,
body=json.dumps(self.payload), headers=self.headers)
def parse_data(self, response):
if response.status == 200:
coverte_json = xmltodict.parse(response.text, encoding='utf-8')
data_json = json.dumps(coverte_json)
data_dict = json.loads(data_json)
# pprint.pprint(data_dict)
data_list = data_dict['R']['data']
for data_item in data_list:
defi_info_item = dict()
item = DefiLockedInfoItem()
extId = data_item['extId']
extId_1 = all_info_dict.get(extId)
if extId_1:
defi_info_item['name'] = all_info_dict.get(extId)['name']
defi_info_item['lockup_category_name'] = all_info_dict[extId]['lockup_category_name']
else:
defi_info_item['name'] = data_item['name'].capitalize()
defi_info_item['lockup_category_name'] = '其他'
defi_info_item['volume_24h_usd'] = float(data_item.get('locked'))
defi_info_item['percent_change_24h'] = data_item['priceChangePercentage24h']
defi_info_item['lock_currency_id'] = 3
all_info_data.append(defi_info_item)
self.start_num += 1
if self.start_num == len(data_list):
item['data'] = all_info_data
# print(sum([data.get('volume_24h_usd') for data in all_info_data]))
# print(item)
return item
import json
import logging
import re
import cloudscraper
import scrapy
from ..items import GrayscaleTrustItem
class GrayscaleTrustSpider(scrapy.Spider):
name = 'grayscale_trust'
# allowed_domains = ['www.cx.com']
# start_urls = ['http://www.cx.com/']
def __init__(self, *args, **kwargs):
super(GrayscaleTrustSpider, self).__init__(*args, **kwargs)
self.env = kwargs.get('env', 'online')
self.base_url = "https://grayscale.co/{}-trust/"
self.currency_info_list = [("basic-attention-token", 37, "bat"), ("bitcoin", 28, "btc"),
("bitcoin-cash", 33, "bch"),
("chainlink", 38, "link"), ('decentraland', 40, "mana"), ("ethereum", 29, "eth"),
("ethereum-classic", 32, "etc"), ('filecoin', 39, "fil"), ("horizen", 34, "zen"),
("litecoin", 30, "ltc"), ('livepeer', 41, "lpt"), ("stellar-lumens", 36, "lum"),
("zcash", 35, "zch")]
self.scraper = cloudscraper.create_scraper()
# self.scraper = cfscrape.create_scraper()
def start_requests(self):
for currency_info in self.currency_info_list:
currency_re_url = self.base_url.format(currency_info[0])
yield scrapy.Request(url=currency_re_url, callback=self.parse,
meta={"currency_name": currency_info[0], "currency_id": currency_info[1],
"pet_name": currency_info[2]})
def parse(self, response):
currency_name = response.meta['currency_name']
currency_id = response.meta['currency_id']
pet_name = response.meta['pet_name']
if response.status == 200:
item = GrayscaleTrustItem()
try:
shares_Outstanding = response.xpath(
'//span[@data-title="Shares Outstanding"]/text()').get()[:-1]
currency_per_Share = response.xpath(
'//span[@data-title="Token per Share"]/text()').get()[:-1]
shares_Outstanding = int(''.join(shares_Outstanding.split(',')))
currency_per_Share = float(currency_per_Share)
# json_data = json.loads(
# re.search(r'chart_data_(.*?)window', response.text, re.S | re.I).group(1).split('=')[-1][:-2])
json_str = re.search(r'window\[\'chart_data_.*?\'] = (.*?);', response.text, re.S | re.I).group(1)
json_data = json.loads(json_str)
market_price_per_share = json_data["Benchmark"][-1]['y']
if currency_id in (28, 29, 30, 33, 32):
currency_etf = float(market_price_per_share) / float(currency_per_Share)
else:
currency_etf = 0
except Exception as e:
# print('-----------------------------------------', currency_name)
logging.error("{}页面解析出错,原因:{}".format(currency_name, e))
else:
item['currency_id'] = currency_id
item['currency_name'] = currency_name
item['currency_pet_name'] = pet_name
item['currency_data'] = shares_Outstanding * currency_per_Share
item['currency_etf'] = currency_etf
yield item
import json
import time
import scrapy
from currency_follow import utils
from currency_follow.items import BitcoinTokenholdersTop100Item, BitcoinTokenHolderItem
class TokenholdersStdTop100Spider(scrapy.Spider):
name = 'tokenholders_STD_top100'
start_urls = ['http://47.57.70.99:8080/std/api/getTop100']
def __init__(self, **kwargs):
super(TokenholdersStdTop100Spider, self).__init__(**kwargs)
self.env = kwargs.get('env', 'online')
self.cpid, self.cid = 11750, 356455
def parse(self, response):
data_raws = json.loads(response.body.decode())
item = BitcoinTokenholdersTop100Item()
cpid, cid = self.cpid, self.cid
bitem = BitcoinTokenHolderItem()
holder_list = []
rank = 1
currency_price_usd = utils.get_currency_detail(cid).get('price_usd')
for row in data_raws['data']['data']:
row_dict = dict()
row_dict['holder_rank'] = rank
row_dict['address'] = row['address']
row_dict['quantity'] = row['balance']
row_dict['percentage'] = row['percent']
row_dict['currency_id'] = cid
row_dict['source_id'] = 3
row_dict['note'] = ''
row_dict['value'] = row['balance'] * currency_price_usd
holder_list.append(row_dict)
rank += 1
item['holder_list'] = holder_list
# # 获取持币地址数
bitem['contract_address'] = 'address'
bitem['holders'] = data_raws['data']['holders']
bitem['holder_chart_url'] = 'https://explorer.standardchain.info/'
bitem['currency_project_id'] = cpid
bitem['utc0_date'] = int(time.time())
item['bitcoin_tokenholders'] = bitem
yield item
import json
import logging
import time
import grequests as grequests
import requests
from scrapy.utils.project import get_project_settings
from .settings import ApiUrl
# 比特币的
def update_bitcoin_top100(item):
holder_list = item['holder_list']
# 这里间接调用 新增持币地址数 方法。
bitcoin_tokenholders = item['bitcoin_tokenholders']
if bitcoin_tokenholders:
update_etherscan_holders_info(bitcoin_tokenholders)
if len(holder_list) > 0:
request_list = []
for row_dict in holder_list:
rs = grequests.post(ApiUrl.UPLOAD_ETHERSCAN_TOP100, data=dict(row_dict))
request_list.append(rs)
try:
ret = grequests.map(request_list, size=50, exception_handler=exception_handler)
success_count = 0
failed_count = 0
for r in ret:
if r.status_code == 200 and r.json()['code'] == 0:
success_count += 1
else:
failed_count += 1
logging.info(
"***update_bitcoin_top100 failed! currency_id:{}, len_holder_list: {},ret_data:{}".format(
holder_list[0]['currency_id'],
len(holder_list),
r.content.decode()))
logging.info(
'***update_bitcoin_top100 status: currency_id:{}, len_holder_list: {}, success_count:{}, '
'failed_count:{}'.format(
holder_list[0]['currency_id'], len(holder_list), success_count, failed_count))
except Exception as e:
logging.info("***update_bitcoin_top100 Exception:{}, holder_list:{}".format(e, holder_list))
def update_etherscan_holders_info(item):
r = requests.post(ApiUrl.UPLOAD_ETHERSCAN_INFO, data=dict(item))
if r.status_code == 200 and r.json()['code'] == 0:
logging.info('***etherscan_holders update success: item: {}, ret_data:{}'.format(
item, r.content.decode()))
def get_currency_detail(currency_id):
r = requests.get(ApiUrl.GET_CURRENCY_DETAIL.format(currency_id))
if r.status_code == 200 and r.json().get('code') == 0:
return r.json().get('data')
def update_defi_locked_info(item):
r = requests.post(ApiUrl.UPLOAD_DEFI_LOCKED_INFO, data=json.dumps(item['data']))
if r.status_code == 200 and r.json().get('code') == 0:
logging.info('***defi_locked_info update success: ret_data:{}'.format(r.content.decode()))
else:
post_to_wexin_qwxchat('项目:defi_locked_info 更新失败,ret_data:{}'.format(r.content.decode()))
def update_defi_overview(item):
up_json = json.dumps(item['data'])
# pprint.pprint(up_json)
# print(up_json)
r = requests.post(ApiUrl.UPLOAD_DEFI_OVERVIEW, data=up_json)
if r.status_code == 200 and r.json().get('code') == 0:
print('***defi_overview_info update success: ret_data:{}'.format(r.content.decode()))
logging.info('***defi_overview_info update success: ret_data:{}'.format(r.content.decode()))
else:
print('***项目:defi_overview_info 更新失败,ret_data:{}'.format(r.content.decode()))
post_to_wexin_qwxchat('项目:defi_overview_info 更新失败,ret_data:{}'.format(r.content.decode()))
def update_grayscale_trust(item):
try:
ts = int(time.time()) - 86400
r = requests.post(
ApiUrl.UPLOAD_GRAYSCALE_TRUST + "?ts={}&c={}&market_index_id={}".format(ts, item['currency_data'],
item['currency_id']))
logging.info(
'***update success:currency:{}, ret_data:{}, item:{}'.format(item['currency_name'], r.json(), item))
except Exception as e:
logging.info('***Exception:{}, item:{}'.format(e, item))
def post_to_wexin_qwxchat(content):
url = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send"
querystring = {"key": "67e59f56-81c7-4bf7-b35d-3bc7068981a6"}
payload = {
"msgtype": "text",
"text": {
"content": content,
"mentioned_list": [],
"mentioned_mobile_list": []
}
}
headers = {
'Content-Type': "application/json",
'Cache-Control': "no-cache",
'Host': "qyapi.weixin.qq.com",
'Accept-Encoding': "gzip, deflate",
'Content-Length': "359",
'cache-control': "no-cache"
}
resp = requests.request("POST", url, data=json.dumps(payload), headers=headers, params=querystring)
if resp.json()['errmsg'] == 'ok':
print('发送成功')
def exception_handler(request, exception):
logging.info(
"***exception_handler,update_etherscan_top100 faile, exception:{}".format(exception))
Metadata-Version: 2.1
Name: project
Version: 1.0
Summary: UNKNOWN
Home-page: UNKNOWN
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN
setup.py
currency_follow/__init__.py
currency_follow/items.py
currency_follow/middlewares.py
currency_follow/pipelines.py
currency_follow/run.py
currency_follow/settings.py
currency_follow/utils.py
currency_follow/spiders/__init__.py
currency_follow/spiders/defi_coming_bsc.py
currency_follow/spiders/defi_coming_eth.py
currency_follow/spiders/defi_coming_heco.py
currency_follow/spiders/defi_locked_eth.py
currency_follow/spiders/defi_locked_heco.py
currency_follow/spiders/grayscale_trust.py
currency_follow/spiders/tokenholders_STD_top100.py
project.egg-info/PKG-INFO
project.egg-info/SOURCES.txt
project.egg-info/dependency_links.txt
project.egg-info/entry_points.txt
project.egg-info/top_level.txt
\ No newline at end of file
[scrapy]
settings = currency_follow.settings
# Automatically created by: scrapy startproject
#
# For more information about the [deploy] section see:
# https://scrapyd.readthedocs.io/en/latest/deploy.html
[settings]
default = currency_follow.settings
[deploy]
#url = http://localhost:6800/
project = currency_follow
# Automatically created by: scrapyd-deploy
from setuptools import setup, find_packages
setup(
name = 'project',
version = '1.0',
packages = find_packages(),
entry_points = {'scrapy': ['settings = currency_follow.settings']},
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment