Skip to content

Commit

Permalink
v0.1.4
Browse files Browse the repository at this point in the history
加入 workflows
  • Loading branch information
QIN2DIM committed Oct 7, 2021
1 parent 73a91fb commit 70f5c96
Show file tree
Hide file tree
Showing 26 changed files with 767 additions and 599 deletions.
72 changes: 72 additions & 0 deletions .github/workflows/automated.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
name: V2RSS Mining

on:
push:
branches: [ main ]
# 一天运行两次
# schedule:
# - cron: '25 */12 * * *'

jobs:
build:

runs-on: ubuntu-latest
env:
TZ: "Asia/Shanghai"

steps:
# ============================================
# TODO [√] 检查工作分支及 Workflows 运行环境
# ============================================
- uses: actions/checkout@v2

# ============================================
# TODO [√] 创建 Python3.6+ 编译环境
# ============================================
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
# ============================================
# TODO [√] 安装 Project 第三方依赖
# ============================================
# 拉取 requirement
- name: Install dependencies
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
# 1. 拉取最新版 Chrome 并适配对应版本的 ChromeDriver
# 2. 初始化工作目录
- name: v2rss build
run: |
python build.py
# ============================================
# TODO [√] 测试 Scaffold 脚手架指令
# ============================================

- name: v2rss mining
run: |
python main.py
#
# # 执行一次清洗作业,检查订阅监听模块是否正常工作
# - name: v2rss overdue/decouple
# run: |
# python main.py overdue
# python main.py decouple
#
# # 执行一次采集作业,检查采集模块是否正常工作
# - name: v2rss spawn
# run: |
# python main.py spawn

# ============================================
# TODO [√] 更新仓库数据
# ============================================
- name: Setup GIT user
uses: fregante/setup-git-user@v1

- name: v2rss push
run: |
git add --all
git commit -m "Automated deployment @ $(date '+%Y-%m-%d %H:%M:%S') ${{ env.TZ }}"
git push --force origin main
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -128,4 +128,4 @@ dmypy.json
# Pyre type checker
.pyre/
.idea/

./chromedriver.exe
97 changes: 97 additions & 0 deletions build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# -*- coding: utf-8 -*-
# Time : 2021/10/6 18:38
# Author : QIN2DIM
# Github : https://github.com/QIN2DIM
# Description:
import os
import shlex

import requests
from bs4 import BeautifulSoup

THIS_WALK = "."
CHROMEDRIVER_UNZIP_PATH = "./chromedriver"


def shell_echo(cmd: str, mode="default"):
"""
为了输出安全做的协调函数
:param cmd:
:param mode:
:return:
"""
if mode == "default":
return os.system(cmd)
if mode == "safe":
return os.system(shlex.quote(cmd))


def set_google_chrome():
# Google-chrome already exists in the current environment
if shell_echo("google-chrome --version") == 0:
# uninstall command
# os.system("sudo rpm -e google-chrome-stable")
return True

# installing Google Chrome on CentOS7
shell_echo("wget https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm >/dev/null")
shell_echo("sudo apt localinstall google-chrome-stable_current_x86_64.rpm >/dev/null")


def set_chromedriver(unzip_path=None):
# chromedriver 的解压安装目录
unzip_path = "/usr/bin/chromedriver" if unzip_path is None else unzip_path

# 读取 google-chrome 的发行版本 Such as 89.0.4389.23
chrome_version = "".join(os.popen("google-chrome --version").readlines()).strip().split(' ')[-1]

# 访问 chromedriver 镜像
res = requests.get("http://npm.taobao.org/mirrors/chromedriver")
soup = BeautifulSoup(res.text, 'html.parser')

# 通过文件名清洗定位到所需版本文件的下载地址
options = [i.split('/')[0] for i in soup.text.split('\n') if i.startswith(chrome_version[:5])]
if len(options) == 1:
chromedriver_version = options[0]
else:
chromedriver_version = max(options)

# 拉取 chromedriver
shell_echo(f"wget http://npm.taobao.org/mirrors/chromedriver/{chromedriver_version}"
"/chromedriver_linux64.zip >/dev/null")

# 解压 chromedriver
shell_echo("unzip chromedriver_linux64.zip >/dev/null")

# 死循环等待解压完成
while True:
if "chromedriver" not in list(os.walk(THIS_WALK))[0][-1]:
pass
else:
break

# 给予 chromedriver 运行运行权限
shell_echo("chmod +x chromedriver >/dev/null")

# 将 chromedriver 移动到预设的解压安装目录
shell_echo(f"mv -f chromedriver {unzip_path} >/dev/null")


def init_project():
print("---> Remove irrelevant information")
shell_echo("rm -rf chromedriver_linux64.zip")
shell_echo("rm -rf google-chrome-stable_current_x86_64.rpm")
shell_echo("clear")


def run():
set_google_chrome()

set_chromedriver(CHROMEDRIVER_UNZIP_PATH)

# 清理运行缓存
init_project()


if __name__ == '__main__':
run()
Binary file modified chromedriver.exe
Binary file not shown.
2 changes: 1 addition & 1 deletion examples/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
# Author : QIN2DIM
# Github : https://github.com/QIN2DIM
# Description:
from .scaffold import demo
from .get_started import demo
8 changes: 4 additions & 4 deletions examples/scaffold.py → examples/get_started.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from src.apis import staff_api
from src.apis import v2rss_api
from src.config import DEFAULT_POWER


def demo():
# 判断是否是初次运行
use_collector = staff_api.is_first_run()
use_collector = v2rss_api.is_first_run()

# 开启采集器
classify_dir, staff_info = staff_api.go(
classify_dir, staff_info = v2rss_api.go(
# debug:执行模式 仅影响日志输出形式 不影响行为
debug=False,

Expand Down Expand Up @@ -51,7 +51,7 @@ def demo():
use_generator=False,
)
# 链接去重
staff_api.refresh_cache(mode='de-dup')
v2rss_api.refresh_cache(mode='de-dup')
# 预览缓存数据
print(f"\n\nSTAFF INFO\n{'_' * 32}")
for element in staff_info.items():
Expand Down
2 changes: 1 addition & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@

from examples import demo

if __name__ == '__main__':
if __name__ == "__main__":
# TODO 本项目部分流量需要过墙 请开启系统代理
demo()
2 changes: 1 addition & 1 deletion src/apis/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .staff_api import staff_api
from .v2rss_api import v2rss_api
17 changes: 10 additions & 7 deletions src/apis/staff_api.py → src/apis/v2rss_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@
- 本模块内嵌协程启动项,请勿从外部并发调用此模块,此举将引起重大复写灾难
- 本模块网络通信基于协程进行,在程序调试时请注释掉 monkey包
"""
__all__ = ['staff_api']
__all__ = ['v2rss_api']

from gevent import monkey

monkey.patch_all()
import os

from datetime import datetime
from src.config import logger, SERVER_DIR_DATABASE, CHROMEDRIVER_PATH
from src.staff_mining import StaffChecker, StaffCollector, IdentifyRecaptcha, StaffEntropyGenerator
from src.staff_mining.common.exceptions import *
from src.sspanel_mining import StaffChecker, StaffCollector, IdentifyRecaptcha, StaffEntropyGenerator
from src.sspanel_mining.common.exceptions import *


class _Interface(object):
Expand All @@ -29,7 +29,8 @@ def __init__(self):
self._cache_dir_staff_hosts = os.path.join(SERVER_DIR_DATABASE, "staff_hosts")
self._cache_dir_classifier = os.path.join(self._cache_dir_staff_hosts, "classifier")

self._cache_path_staff_hosts = os.path.join(self._cache_dir_staff_hosts, "staff_host.txt")
self._cache_path_staff_hosts = os.path.join(self._cache_dir_staff_hosts,
f"staff_host_{str(datetime.now()).split(' ')[0]}.txt")
self._path_staff_arch_recaptcha = os.path.join(self._cache_dir_classifier, "staff_arch_recaptcha.txt")
self._path_staff_arch_entropy = os.path.join(self._cache_dir_classifier, "staff_arch_entropy.txt")

Expand Down Expand Up @@ -247,6 +248,8 @@ def is_first_run(self) -> bool:
- False: at least one cycle has been successfully completed
"""
if not os.path.exists(self._cache_path_staff_hosts):
with open(self._cache_path_staff_hosts, 'w', encoding="utf8"):
pass
return True
else:
with open(self._cache_path_staff_hosts, 'r', encoding="utf8") as f:
Expand Down Expand Up @@ -325,10 +328,10 @@ def go(self, debug: bool = False, silence: bool = True, power: int = os.cpu_coun
return self.extractor()


staff_api = _Interface()
v2rss_api = _Interface()

if __name__ == '__main__':
staff_api.go(
v2rss_api.go(
debug=False,
silence=True,
power=32,
Expand Down
9 changes: 4 additions & 5 deletions src/config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os
from os.path import dirname, join, exists
from os.path import dirname, join, exists,abspath
from sys import platform

from loguru import logger
Expand Down Expand Up @@ -31,12 +31,12 @@
# ---------------------------------------------------
if "win" in platform:
# 定位chromedriver根目录
CHROMEDRIVER_PATH = dirname(__file__) + "/chromedriver.exe"
CHROMEDRIVER_PATH = "./chromedriver.exe"
# 定位工程根目录 SERVER_DIR_PROJECT
SERVER_DIR_PROJECT = dirname(__file__)
else:
CHROMEDRIVER_PATH = dirname(__file__) + "/chromedriver"
SERVER_DIR_PROJECT = f"/qinse/sspanel-mining"
SERVER_DIR_PROJECT = abspath(".")

# 文件数据库 目录根
SERVER_DIR_DATABASE = join(SERVER_DIR_PROJECT, "database")
Expand All @@ -59,7 +59,6 @@

# 采集器默认并发数
DEFAULT_POWER = os.cpu_count()

# 若chromedriver不在CHROMEDRIVER_PATH指定的路径下 尝试从环境变量中查找路径
if not exists(CHROMEDRIVER_PATH):
CHROMEDRIVER_PATH = None
CHROMEDRIVER_PATH = "chromedriver"
50 changes: 9 additions & 41 deletions src/database/staff_hosts/classifier/other_arch.txt
Original file line number Diff line number Diff line change
@@ -1,43 +1,11 @@
https://bilii.org/auth/register
https://quanquanvip.top/auth/register
https://yubanssr.xyz/auth/register
https://muniucloud.work/auth/register
https://spcloud.club/auth/register
https://v2plus.cc/auth/register
https://www.pokercloud.top/auth/register
https://imsun.pw/auth/register
https://oplktunm.com/auth/register
https://www.niee.cc/auth/register
https://www.wxret.xyz/auth/register
https://user.zxcloud.bid/auth/register
https://tan90.best/auth/register
https://1.akkcloud1.com/auth/register
https://www.zuoyou.today/auth/register
https://清澈.xyz/auth/register
https://yooookv2.top/auth/register
https://situcloud.ml/auth/register
https://de1.foreign-expat-tv.win/auth/register
https://www.ginfem.com/auth/register
https://萌云.cn/auth/register
https://www.qjyun.top/auth/register
https://www.lywlv2.com/auth/register
https://v2th.com/auth/register
https://youxiniang.top/auth/register
https://pucloud.co/auth/register
https://goacrossv2.club/auth/register
https://www.ytssr3.top/auth/register
https://geekcloud.network/auth/register
https://muguacloud.club/auth/register
https://52xiaohongniang.xyz/auth/register
https://www.mfv2ray.top/auth/register
https://susu.tw/auth/register
https://clyun.xyz/auth/register
https://www.ismao.xyz/auth/register
https://physicxx.com/auth/register
https://yubanssr.xyz/auth/register
https://ss.yunyunyun.date/auth/register
https://www.gftech.cc/auth/register
https://eins-klien.net/auth/register
https://kuaiyun888.com/auth/register
https://www.geekiya.com/auth/register
https://www.baihu8877.com/auth/register
https://88xxa.top/auth/register
https://www.dnsko.xyz/auth/register
https://1062b.top/auth/register
https://www.froggyvpn.top/auth/register
https://ssru6.pw/auth/register
https://www.xsty.vip/auth/register
https://www.bosjs.com/auth/register
https://ss.shangdaxue.win/auth/register
https://m.2aa.casa/auth/register
Loading

0 comments on commit 70f5c96

Please sign in to comment.