From 8dc8133fb93405c6fc34c9c3c6c6bbce09ebe7f0 Mon Sep 17 00:00:00 2001 From: admin <weikou2014> Date: 星期二, 24 十二月 2019 10:52:52 +0800 Subject: [PATCH] 品牌商品更新调整 订单维权修改 --- fanli/src/main/java/com/yeshi/fanli/util/goods/jd/NYouHuiUtil.java | 97 ++++++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 88 insertions(+), 9 deletions(-) diff --git a/fanli/src/main/java/com/yeshi/fanli/util/goods/jd/NYouHuiUtil.java b/fanli/src/main/java/com/yeshi/fanli/util/goods/jd/NYouHuiUtil.java index d3aaa1c..d04d007 100644 --- a/fanli/src/main/java/com/yeshi/fanli/util/goods/jd/NYouHuiUtil.java +++ b/fanli/src/main/java/com/yeshi/fanli/util/goods/jd/NYouHuiUtil.java @@ -1,13 +1,19 @@ package com.yeshi.fanli.util.goods.jd; import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; + +import com.yeshi.fanli.dao.goods.jd.NYouHuiGoods; /** * 浜笢鍐呬紭鎯犲晢鍝佺埇鍙� https://www.n-youhui.com @@ -16,23 +22,96 @@ * */ public class NYouHuiUtil { - public static List<String> getClasses() { - List<String> classList = new ArrayList<>(); + + private static Document getDocument(String url) { + Document doc = null; try { - Document doc = Jsoup.connect("https://www.n-youhui.com").timeout(20000) + doc = Jsoup.connect(url).timeout(20000) .userAgent( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36") .get(); - Element classesRoot = doc.getElementsByClass("widget_categories").get(0); - Elements level0 = classesRoot.getElementsByClass("level-0"); - for (int i = 0; i < level0.size(); i++) { - String name = level0.get(i).html().replace(" ", "").replaceAll("\\(([0-9]|,)*\\)", "").trim(); - classList.add(name); - } } catch (IOException e) { e.printStackTrace(); } + return doc; + } + + public static List<String> getClasses() { + List<String> classList = new ArrayList<>(); + Document doc = getDocument("https://www.n-youhui.com"); + Element classesRoot = doc.getElementsByClass("widget_categories").get(0); + Elements level0 = classesRoot.getElementsByClass("level-0"); + for (int i = 0; i < level0.size(); i++) { + String name = level0.get(i).html().replace(" ", "").replaceAll("\\(([0-9]|,)*\\)", "").trim(); + classList.add(name); + } + return classList; } + /** + * + * @param className + * @return + */ + public static List<NYouHuiGoods> listByClassName(String className, int page) { + List<NYouHuiGoods> goodsList = new ArrayList<>(); + String url = null; + try { + url = "https://www.n-youhui.com/" + URLEncoder.encode(className, "UTF-8").toLowerCase() + "/page/" + page; + } catch (UnsupportedEncodingException e1) { + e1.printStackTrace(); + } + Document doc = getDocument(url); + Element content = doc.getElementsByClass("content").get(0); + if (content != null) { + Elements articles = content.getElementsByTag("article"); + if (articles != null) + for (int i = 0; i < articles.size(); i++) { + String link = articles.get(i).getElementsByTag("a").get(0).attr("href"); + String name = articles.get(i).getElementsByTag("a").get(0).ownText(); + String time = articles.get(i).getElementsByClass("time").get(0).ownText(); + time = time.split(" ")[time.split(" ").length - 1]; + NYouHuiGoods goods = new NYouHuiGoods(); + goods.setName(name); + goods.setSourceUrl(link); + goods.setPublishTime(time); + goodsList.add(goods); + } + } + return goodsList; + } + + public static NYouHuiGoods getGoodsDetail(String url) { + NYouHuiGoods goods = new NYouHuiGoods(); + Document doc = getDocument(url); + Element article = doc.getElementsByClass("article-content").get(0); + Elements ps = article.getElementsByTag("p"); + String text = ""; + for (int i = 0; i < ps.size(); i++) { + // 绉婚櫎鍥剧墖 + Elements imgList = ps.get(i).getElementsByTag("img"); + for (int j = 0; j < imgList.size(); j++) + imgList.get(j).remove(); + text += ps.get(i).html().replace("<br>", "\n") + "\n"; + } + + text=text.trim(); + + String regex = "(https://u\\.jd\\.com/)[0-9A-Za-z]{1,20}"; + Pattern pattern = Pattern.compile(regex); + Matcher m = pattern.matcher(text); + List<String> urlList = new ArrayList<>(); + while (m.find()) { + urlList.add(m.group()); + } + goods.setLinkList(urlList); + goods.setDesc(text.replaceAll(regex, "[閾炬帴]")); + goods.setName(doc.getElementsByClass("article-title").get(0).getElementsByTag("a").get(0).ownText()); + String time = doc.getElementsByClass("article-meta").get(0).getElementsByTag("li").get(0).ownText().trim(); + goods.setPublishTime(time.split(" ")[time.split(" ").length - 1]); + goods.setSourceUrl(url); + return goods; + } + } -- Gitblit v1.8.0