c++ 图论学习3
使用 C++ 来实现一个针对企业内部文件的网络爬虫,尽可能利用 C++的新特性和图的遍历算法
(图片来源网络,侵删)
#include #include #include #include #include #include #include #include #include #include #include namespace fs = std::filesystem; class InternalCrawler { private: std::unordered_set visited; std::queue to_visit; std::mutex mtx; std::atomic running{true}; std::vector threads; std::regex file_pattern; fs::path root_dir; void crawl_file(const fs::path& file_path) { std::ifstream file(file_path); std::string line; while (std::getline(file, line)) { std::smatch match; if (std::regex_search(line, match, file_pattern)) { std::string new_file = match.str(); fs::path new_path = root_dir / new_file; if (fs::exists(new_path)) { std::lock_guard lock(mtx); if (visited.find(new_file) == visited.end()) { to_visit.push(new_file); visited.insert(new_file); } } } } } void worker() { while (running) { std::string current_file; { std::lock_guard lock(mtx); if (to_visit.empty()) { std::this_thread::sleep_for(std::chrono::milliseconds(100)); continue; } current_file = to_visit.front(); to_visit.pop(); } crawl_file(root_dir / current_file); } } public: InternalCrawler(const fs::path& root, const std::string& pattern, int num_threads = 4) : root_dir(root), file_pattern(pattern) { for (int i = 0; i
文章版权声明:除非注明,否则均为主机测评原创文章,转载或复制请以超链接形式并注明出处。