c++ 图论学习3

06-29 1151阅读

使用 C++ 来实现一个针对企业内部文件的网络爬虫,尽可能利用 C++的新特性和图的遍历算法

c++ 图论学习3
(图片来源网络,侵删)
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
namespace fs = std::filesystem;
class InternalCrawler {
private:
    std::unordered_set visited;
    std::queue to_visit;
    std::mutex mtx;
    std::atomic running{true};
    std::vector threads;
    std::regex file_pattern;
    fs::path root_dir;
    void crawl_file(const fs::path& file_path) {
        std::ifstream file(file_path);
        std::string line;
        while (std::getline(file, line)) {
            std::smatch match;
            if (std::regex_search(line, match, file_pattern)) {
                std::string new_file = match.str();
                fs::path new_path = root_dir / new_file;
                if (fs::exists(new_path)) {
                    std::lock_guard lock(mtx);
                    if (visited.find(new_file) == visited.end()) {
                        to_visit.push(new_file);
                        visited.insert(new_file);
                    }
                }
            }
        }
    }
    void worker() {
        while (running) {
            std::string current_file;
            {
                std::lock_guard lock(mtx);
                if (to_visit.empty()) {
                    std::this_thread::sleep_for(std::chrono::milliseconds(100));
                    continue;
                }
                current_file = to_visit.front();
                to_visit.pop();
            }
            crawl_file(root_dir / current_file);
        }
    }
public:
    InternalCrawler(const fs::path& root, const std::string& pattern, int num_threads = 4)
        : root_dir(root), file_pattern(pattern) {
        for (int i = 0; i 
VPS购买请点击我

文章版权声明:除非注明,否则均为主机测评原创文章,转载或复制请以超链接形式并注明出处。

目录[+]