v0.1.4

加入 workflows
QIN2DIM · Oct 7, 2021 · 70f5c96 · 70f5c96
1 parent 73a91fb
commit 70f5c96
Show file tree

Hide file tree

Showing 26 changed files with 767 additions and 599 deletions.
diff --git a/.github/workflows/automated.yml b/.github/workflows/automated.yml
@@ -0,0 +1,72 @@
+name: V2RSS Mining
+
+on:
+  push:
+    branches: [ main ]
+#   一天运行两次
+#  schedule:
+#      - cron: '25 */12 * * *'
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    env:
+      TZ: "Asia/Shanghai"
+
+    steps:
+    # ============================================
+    # TODO [√] 检查工作分支及 Workflows 运行环境
+    # ============================================
+    - uses: actions/checkout@v2
+
+    # ============================================
+    # TODO [√] 创建 Python3.6+ 编译环境
+    # ============================================
+    - name: Set up Python 3.8
+      uses: actions/setup-python@v2
+      with:
+        python-version: 3.8
+    # ============================================
+    # TODO [√] 安装 Project 第三方依赖
+    # ============================================
+    # 拉取 requirement
+    - name: Install dependencies
+      run: |
+          python -m pip install --upgrade pip
+          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    # 1. 拉取最新版 Chrome 并适配对应版本的 ChromeDriver
+    # 2. 初始化工作目录
+    - name: v2rss build
+      run: |
+        python build.py
+    # ============================================
+    # TODO [√] 测试 Scaffold 脚手架指令
+    # ============================================
+
+    - name: v2rss mining
+      run: |
+        python main.py
+#
+#    # 执行一次清洗作业，检查订阅监听模块是否正常工作
+#    - name: v2rss overdue/decouple
+#      run: |
+#        python main.py overdue
+#        python main.py decouple
+#
+#    # 执行一次采集作业，检查采集模块是否正常工作
+#    - name: v2rss spawn
+#      run: |
+#        python main.py spawn
+
+    # ============================================
+    # TODO [√] 更新仓库数据
+    # ============================================
+    - name: Setup GIT user
+      uses: fregante/setup-git-user@v1
+
+    - name: v2rss push
+      run: |
+        git add --all
+        git commit -m "Automated deployment @ $(date '+%Y-%m-%d %H:%M:%S') ${{ env.TZ }}"
+        git push --force origin main
diff --git a/.gitignore b/.gitignore
@@ -128,4 +128,4 @@ dmypy.json
 # Pyre type checker
 .pyre/
 .idea/
-
+./chromedriver.exe
diff --git a/build.py b/build.py
@@ -0,0 +1,97 @@
+# -*- coding: utf-8 -*-
+# Time       : 2021/10/6 18:38
+# Author     : QIN2DIM
+# Github     : https://github.com/QIN2DIM
+# Description:
+import os
+import shlex
+
+import requests
+from bs4 import BeautifulSoup
+
+THIS_WALK = "."
+CHROMEDRIVER_UNZIP_PATH = "./chromedriver"
+
+
+def shell_echo(cmd: str, mode="default"):
+    """
+    为了输出安全做的协调函数
+    :param cmd:
+    :param mode:
+    :return:
+    """
+    if mode == "default":
+        return os.system(cmd)
+    if mode == "safe":
+        return os.system(shlex.quote(cmd))
+
+
+def set_google_chrome():
+    # Google-chrome already exists in the current environment
+    if shell_echo("google-chrome --version") == 0:
+        # uninstall command
+        # os.system("sudo rpm -e google-chrome-stable")
+        return True
+
+    # installing Google Chrome on CentOS7
+    shell_echo("wget https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm >/dev/null")
+    shell_echo("sudo apt localinstall google-chrome-stable_current_x86_64.rpm >/dev/null")
+
+
+def set_chromedriver(unzip_path=None):
+    # chromedriver 的解压安装目录
+    unzip_path = "/usr/bin/chromedriver" if unzip_path is None else unzip_path
+
+    # 读取 google-chrome 的发行版本 Such as 89.0.4389.23
+    chrome_version = "".join(os.popen("google-chrome --version").readlines()).strip().split(' ')[-1]
+
+    # 访问 chromedriver 镜像
+    res = requests.get("http://npm.taobao.org/mirrors/chromedriver")
+    soup = BeautifulSoup(res.text, 'html.parser')
+
+    # 通过文件名清洗定位到所需版本文件的下载地址
+    options = [i.split('/')[0] for i in soup.text.split('\n') if i.startswith(chrome_version[:5])]
+    if len(options) == 1:
+        chromedriver_version = options[0]
+    else:
+        chromedriver_version = max(options)
+
+    # 拉取 chromedriver
+    shell_echo(f"wget http://npm.taobao.org/mirrors/chromedriver/{chromedriver_version}"
+               "/chromedriver_linux64.zip >/dev/null")
+
+    # 解压 chromedriver
+    shell_echo("unzip chromedriver_linux64.zip >/dev/null")
+
+    # 死循环等待解压完成
+    while True:
+        if "chromedriver" not in list(os.walk(THIS_WALK))[0][-1]:
+            pass
+        else:
+            break
+
+    # 给予 chromedriver 运行运行权限
+    shell_echo("chmod +x chromedriver >/dev/null")
+
+    # 将 chromedriver 移动到预设的解压安装目录
+    shell_echo(f"mv -f chromedriver {unzip_path} >/dev/null")
+
+
+def init_project():
+    print("---> Remove irrelevant information")
+    shell_echo("rm -rf chromedriver_linux64.zip")
+    shell_echo("rm -rf google-chrome-stable_current_x86_64.rpm")
+    shell_echo("clear")
+
+
+def run():
+    set_google_chrome()
+
+    set_chromedriver(CHROMEDRIVER_UNZIP_PATH)
+
+    # 清理运行缓存
+    init_project()
+
+
+if __name__ == '__main__':
+    run()
diff --git a/chromedriver.exe b/chromedriver.exe
diff --git a/examples/__init__.py b/examples/__init__.py
@@ -3,4 +3,4 @@
 # Author     : QIN2DIM
 # Github     : https://github.com/QIN2DIM
 # Description:
-from .scaffold import demo
+from .get_started import demo
diff --git a/examples/scaffold.py → examples/get_started.py b/examples/scaffold.py → examples/get_started.py
@@ -1,13 +1,13 @@
-from src.apis import staff_api
+from src.apis import v2rss_api
 from src.config import DEFAULT_POWER
 
 
 def demo():
     # 判断是否是初次运行
-    use_collector = staff_api.is_first_run()
+    use_collector = v2rss_api.is_first_run()
 
     # 开启采集器
-    classify_dir, staff_info = staff_api.go(
+    classify_dir, staff_info = v2rss_api.go(
         # debug：执行模式 仅影响日志输出形式 不影响行为
         debug=False,
 
@@ -51,7 +51,7 @@ def demo():
         use_generator=False,
     )
     # 链接去重
-    staff_api.refresh_cache(mode='de-dup')
+    v2rss_api.refresh_cache(mode='de-dup')
     # 预览缓存数据
     print(f"\n\nSTAFF INFO\n{'_' * 32}")
     for element in staff_info.items():

diff --git a/main.py b/main.py
@@ -9,6 +9,6 @@
 
 from examples import demo
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     # TODO 本项目部分流量需要过墙 请开启系统代理
     demo()
diff --git a/src/apis/__init__.py b/src/apis/__init__.py
@@ -1 +1 @@
-from .staff_api import staff_api
+from .v2rss_api import v2rss_api
diff --git a/src/apis/staff_api.py → src/apis/v2rss_api.py b/src/apis/staff_api.py → src/apis/v2rss_api.py
@@ -11,16 +11,16 @@
     - 本模块内嵌协程启动项，请勿从外部并发调用此模块，此举将引起重大复写灾难
     - 本模块网络通信基于协程进行，在程序调试时请注释掉 monkey包
 """
-__all__ = ['staff_api']
+__all__ = ['v2rss_api']
 
 from gevent import monkey
 
 monkey.patch_all()
 import os
-
+from datetime import datetime
 from src.config import logger, SERVER_DIR_DATABASE, CHROMEDRIVER_PATH
-from src.staff_mining import StaffChecker, StaffCollector, IdentifyRecaptcha, StaffEntropyGenerator
-from src.staff_mining.common.exceptions import *
+from src.sspanel_mining import StaffChecker, StaffCollector, IdentifyRecaptcha, StaffEntropyGenerator
+from src.sspanel_mining.common.exceptions import *
 
 
 class _Interface(object):
@@ -29,7 +29,8 @@ def __init__(self):
         self._cache_dir_staff_hosts = os.path.join(SERVER_DIR_DATABASE, "staff_hosts")
         self._cache_dir_classifier = os.path.join(self._cache_dir_staff_hosts, "classifier")
 
-        self._cache_path_staff_hosts = os.path.join(self._cache_dir_staff_hosts, "staff_host.txt")
+        self._cache_path_staff_hosts = os.path.join(self._cache_dir_staff_hosts,
+                                                    f"staff_host_{str(datetime.now()).split(' ')[0]}.txt")
         self._path_staff_arch_recaptcha = os.path.join(self._cache_dir_classifier, "staff_arch_recaptcha.txt")
         self._path_staff_arch_entropy = os.path.join(self._cache_dir_classifier, "staff_arch_entropy.txt")
 
@@ -247,6 +248,8 @@ def is_first_run(self) -> bool:
                 - False: at least one cycle has been successfully completed
         """
         if not os.path.exists(self._cache_path_staff_hosts):
+            with open(self._cache_path_staff_hosts, 'w', encoding="utf8"):
+                pass
             return True
         else:
             with open(self._cache_path_staff_hosts, 'r', encoding="utf8") as f:
@@ -325,10 +328,10 @@ def go(self, debug: bool = False, silence: bool = True, power: int = os.cpu_coun
         return self.extractor()
 
 
-staff_api = _Interface()
+v2rss_api = _Interface()
 
 if __name__ == '__main__':
-    staff_api.go(
+    v2rss_api.go(
         debug=False,
         silence=True,
         power=32,

diff --git a/src/config.py b/src/config.py
@@ -1,5 +1,5 @@
 import os
-from os.path import dirname, join, exists
+from os.path import dirname, join, exists,abspath
 from sys import platform
 
 from loguru import logger
@@ -31,12 +31,12 @@
 # ---------------------------------------------------
 if "win" in platform:
     # 定位chromedriver根目录
-    CHROMEDRIVER_PATH = dirname(__file__) + "/chromedriver.exe"
+    CHROMEDRIVER_PATH = "./chromedriver.exe"
     # 定位工程根目录 SERVER_DIR_PROJECT
     SERVER_DIR_PROJECT = dirname(__file__)
 else:
     CHROMEDRIVER_PATH = dirname(__file__) + "/chromedriver"
-    SERVER_DIR_PROJECT = f"/qinse/sspanel-mining"
+    SERVER_DIR_PROJECT = abspath(".")
 
 # 文件数据库 目录根
 SERVER_DIR_DATABASE = join(SERVER_DIR_PROJECT, "database")
@@ -59,7 +59,6 @@
 
 # 采集器默认并发数
 DEFAULT_POWER = os.cpu_count()
-
 # 若chromedriver不在CHROMEDRIVER_PATH指定的路径下 尝试从环境变量中查找路径
 if not exists(CHROMEDRIVER_PATH):
-    CHROMEDRIVER_PATH = None
+    CHROMEDRIVER_PATH = "chromedriver"
diff --git a/src/database/staff_hosts/classifier/other_arch.txt b/src/database/staff_hosts/classifier/other_arch.txt
@@ -1,43 +1,11 @@
-https://bilii.org/auth/register
-https://quanquanvip.top/auth/register
-https://yubanssr.xyz/auth/register
-https://muniucloud.work/auth/register
-https://spcloud.club/auth/register
-https://v2plus.cc/auth/register
-https://www.pokercloud.top/auth/register
-https://imsun.pw/auth/register
-https://oplktunm.com/auth/register
-https://www.niee.cc/auth/register
-https://www.wxret.xyz/auth/register
-https://user.zxcloud.bid/auth/register
-https://tan90.best/auth/register
-https://1.akkcloud1.com/auth/register
-https://www.zuoyou.today/auth/register
-https://清澈.xyz/auth/register
+https://yooookv2.top/auth/register
+https://situcloud.ml/auth/register
 https://de1.foreign-expat-tv.win/auth/register
-https://www.ginfem.com/auth/register
-https://萌云.cn/auth/register
-https://www.qjyun.top/auth/register
-https://www.lywlv2.com/auth/register
-https://v2th.com/auth/register
-https://youxiniang.top/auth/register
-https://pucloud.co/auth/register
-https://goacrossv2.club/auth/register
-https://www.ytssr3.top/auth/register
-https://geekcloud.network/auth/register
-https://muguacloud.club/auth/register
-https://52xiaohongniang.xyz/auth/register
-https://www.mfv2ray.top/auth/register
-https://susu.tw/auth/register
-https://clyun.xyz/auth/register
-https://www.ismao.xyz/auth/register
+https://physicxx.com/auth/register
+https://yubanssr.xyz/auth/register
+https://ss.yunyunyun.date/auth/register
+https://www.gftech.cc/auth/register
+https://eins-klien.net/auth/register
 https://kuaiyun888.com/auth/register
-https://www.geekiya.com/auth/register
-https://www.baihu8877.com/auth/register
-https://88xxa.top/auth/register
-https://www.dnsko.xyz/auth/register
-https://1062b.top/auth/register
-https://www.froggyvpn.top/auth/register
-https://ssru6.pw/auth/register
-https://www.xsty.vip/auth/register
-https://www.bosjs.com/auth/register
+https://ss.shangdaxue.win/auth/register
+https://m.2aa.casa/auth/register