ElasticSearch仿京东搜索

2024-03-05 1950阅读

一:爬取京东数据

package com.esjd.Utils;
import lombok.SneakyThrows;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.net.MalformedURLException;
import java.net.URL;
public class HtmlParseUtil {
    @SneakyThrows
    public static void main(String[] args)   {
        //获取请求 https://search.jd.com/Search?keyword=java 需要联网
        String url = "https://search.jd.com/Search?keyword=java";
        //解析网页 jsoup返回的 Document就是游览器 Document对象
        Document document = Jsoup.parse(new URL(url),30000);
        //获取网页idJ_goodsList
        Element element = document.getElementById("J_goodsList");
      System.out.println(element.html());
        //获取所有的li元素
        Elements elements = document.getElementsByTag("li");
        for (Element element1 : elements) {
                String img = element1.getElementsByTag("img").eq(0).attr("data-lazy-img");
                String price = element1.getElementsByClass("p-price").eq(0).text();
                String title = element1.getElementsByClass("p-name").eq(0).text();
                System.out.println("______________________________________--");
                System.out.println(img);
                System.out.println(price);
                System.out.println(title);
        }
    }
}
封装成工具类
@SneakyThrows
public List paresJD(String keyword){
    //获取请求 https://search.jd.com/Search?keyword=java 需要联网
    String urlKeywords = URLEncoder.encode(keyword, "UTF-8");
    //获取请求 https://search.jd.com/Search?keyword=java
    //前提: 需要联网, 而且不能获取到AJAX!
    String url ="https://search.jd.com/Search?keyword=" + urlKeywords + "&enc=utf-8";
    //解析网页 jsoup返回的 Document就是游览器 Document对象
    Document document = Jsoup.parse(new URL(url),30000);
    //获取网页idJ_goodsList
    Element element = document.getElementById("J_goodsList");
    //System.out.println(element.html());
    //获取所有的li元素
    Elements elements = document.getElementsByTag("li");
    ArrayList  goodsList = new ArrayList();
    for (Element element1 : elements) {
        if (element1.attr("class").equalsIgnoreCase("gl-item")) {
            String img = element1.getElementsByTag("img").eq(0).attr("data-lazy-img");
            String price = element1.getElementsByClass("p-price").eq(0).text();
            String title = element1.getElementsByClass("p-name").eq(0).text();
       Content content = new Content();
        content.setTitle(title);
        content.setPrice(price);
        content.setImg(img);
        goodsList.add(content);
    }
    }
    return goodsList;
}
编写pojo类
@Data
@AllArgsConstructor
@NoArgsConstructor
public class Content {
    //根据业务需求自己添加属性
    private  String title;
    private String img;
    private String price;
}
解析数据到es中
  @Autowired
//  不能直接使用     @Autowired 需要spring容器
  private RestHighLevelClient restHighLevelClient;
  //解析数据放入es中
  public Boolean  parseContent(String keywords) throws IOException {
      List contents = new HtmlParseUtil().paresJD(keywords);
      //把查询的数据放入es中
      BulkRequest bulkRequest = new BulkRequest();
      bulkRequest.timeout("2m");
      for (int i = 0; i  
对应的controller接口
@Autowired
private  ContentService contentService;
//爬取数据到es中
@GetMapping("/pares/{keyword}")
public Boolean pares(@PathVariable("keyword") String  keyword) throws IOException{
     return  contentService.parseContent(keyword);
}
二:前后端分离进行搜索实现

搜索实现和搜索高亮实现

ElasticSearch仿京东搜索
(图片来源网络,侵删)

新建前端模板进行请求接口编写

new Vue({
     el:"#app",
    data:{
         keyword: '',
        results: []
    },
    methods:{
         searchKey(){
             var keyword = this.keyword;
             console.log(keyword);
             //搜索分页
             // axios.get("search/"+keyword+"/1/10").then(response =>{
             //     console.log(response);
             //     //绑定数据
             //     this.results = response.data;
             // })
             //实现搜索高亮
             axios.get("/HighlightBuilder/"+keyword+"/1/10").then(response =>{
                 console.log(response);
                 //绑定数据
                 this.results = response.data;
             })
         }
    }
})
编写service层
  //2. 获取这些数据实现搜索功能
    public List searchPage(String keyword ,int  pageNo,int pageSize) throws IOException {
        if(pageNo
VPS购买请点击我

文章版权声明:除非注明,否则均为主机测评原创文章,转载或复制请以超链接形式并注明出处。

目录[+]