ElasticSearch仿京东搜索
一:爬取京东数据
package com.esjd.Utils; import lombok.SneakyThrows; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.net.MalformedURLException; import java.net.URL; public class HtmlParseUtil { @SneakyThrows public static void main(String[] args) { //获取请求 https://search.jd.com/Search?keyword=java 需要联网 String url = "https://search.jd.com/Search?keyword=java"; //解析网页 jsoup返回的 Document就是游览器 Document对象 Document document = Jsoup.parse(new URL(url),30000); //获取网页idJ_goodsList Element element = document.getElementById("J_goodsList"); System.out.println(element.html()); //获取所有的li元素 Elements elements = document.getElementsByTag("li"); for (Element element1 : elements) { String img = element1.getElementsByTag("img").eq(0).attr("data-lazy-img"); String price = element1.getElementsByClass("p-price").eq(0).text(); String title = element1.getElementsByClass("p-name").eq(0).text(); System.out.println("______________________________________--"); System.out.println(img); System.out.println(price); System.out.println(title); } } }
封装成工具类
@SneakyThrows public List paresJD(String keyword){ //获取请求 https://search.jd.com/Search?keyword=java 需要联网 String urlKeywords = URLEncoder.encode(keyword, "UTF-8"); //获取请求 https://search.jd.com/Search?keyword=java //前提: 需要联网, 而且不能获取到AJAX! String url ="https://search.jd.com/Search?keyword=" + urlKeywords + "&enc=utf-8"; //解析网页 jsoup返回的 Document就是游览器 Document对象 Document document = Jsoup.parse(new URL(url),30000); //获取网页idJ_goodsList Element element = document.getElementById("J_goodsList"); //System.out.println(element.html()); //获取所有的li元素 Elements elements = document.getElementsByTag("li"); ArrayList goodsList = new ArrayList(); for (Element element1 : elements) { if (element1.attr("class").equalsIgnoreCase("gl-item")) { String img = element1.getElementsByTag("img").eq(0).attr("data-lazy-img"); String price = element1.getElementsByClass("p-price").eq(0).text(); String title = element1.getElementsByClass("p-name").eq(0).text(); Content content = new Content(); content.setTitle(title); content.setPrice(price); content.setImg(img); goodsList.add(content); } } return goodsList; }
编写pojo类
@Data @AllArgsConstructor @NoArgsConstructor public class Content { //根据业务需求自己添加属性 private String title; private String img; private String price; }
解析数据到es中
@Autowired // 不能直接使用 @Autowired 需要spring容器 private RestHighLevelClient restHighLevelClient; //解析数据放入es中 public Boolean parseContent(String keywords) throws IOException { List contents = new HtmlParseUtil().paresJD(keywords); //把查询的数据放入es中 BulkRequest bulkRequest = new BulkRequest(); bulkRequest.timeout("2m"); for (int i = 0; i对应的controller接口
@Autowired private ContentService contentService; //爬取数据到es中 @GetMapping("/pares/{keyword}") public Boolean pares(@PathVariable("keyword") String keyword) throws IOException{ return contentService.parseContent(keyword); }二:前后端分离进行搜索实现
搜索实现和搜索高亮实现
(图片来源网络,侵删)新建前端模板进行请求接口编写
new Vue({ el:"#app", data:{ keyword: '', results: [] }, methods:{ searchKey(){ var keyword = this.keyword; console.log(keyword); //搜索分页 // axios.get("search/"+keyword+"/1/10").then(response =>{ // console.log(response); // //绑定数据 // this.results = response.data; // }) //实现搜索高亮 axios.get("/HighlightBuilder/"+keyword+"/1/10").then(response =>{ console.log(response); //绑定数据 this.results = response.data; }) } } })编写service层
//2. 获取这些数据实现搜索功能 public List searchPage(String keyword ,int pageNo,int pageSize) throws IOException { if(pageNo
文章版权声明:除非注明,否则均为主机测评原创文章,转载或复制请以超链接形式并注明出处。