From 5e7b0ed4a154ad067cbcf4aa1a1c7cce32f9864c Mon Sep 17 00:00:00 2001
From: admin <weikou2014>
Date: 星期五, 26 四月 2024 18:02:17 +0800
Subject: [PATCH] 唯品会链接解析升级

---
 fanli/src/main/java/com/yeshi/fanli/util/goods/jd/NYouHuiUtil.java |  234 +++++++++++++++++++++++++++++-----------------------------
 1 files changed, 117 insertions(+), 117 deletions(-)

diff --git a/fanli/src/main/java/com/yeshi/fanli/util/goods/jd/NYouHuiUtil.java b/fanli/src/main/java/com/yeshi/fanli/util/goods/jd/NYouHuiUtil.java
index 65cb205..eb056c9 100644
--- a/fanli/src/main/java/com/yeshi/fanli/util/goods/jd/NYouHuiUtil.java
+++ b/fanli/src/main/java/com/yeshi/fanli/util/goods/jd/NYouHuiUtil.java
@@ -1,117 +1,117 @@
-package com.yeshi.fanli.util.goods.jd;
-
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.URLEncoder;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.jsoup.Jsoup;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
-import org.jsoup.select.Elements;
-
-import com.yeshi.fanli.entity.goods.jd.NYouHuiGoods;
-
-/**
- * 浜笢鍐呬紭鎯犲晢鍝佺埇鍙� https://www.n-youhui.com
- * 
- * @author Administrator
- *
- */
-public class NYouHuiUtil {
-
-	private static Document getDocument(String url) {
-		Document doc = null;
-		try {
-			doc = Jsoup.connect(url).timeout(20000)
-					.userAgent(
-							"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36")
-					.get();
-		} catch (IOException e) {
-			e.printStackTrace();
-		}
-		return doc;
-	}
-
-	public static List<String> getClasses() {
-		List<String> classList = new ArrayList<>();
-		Document doc = getDocument("https://www.n-youhui.com");
-		Element classesRoot = doc.getElementsByClass("widget_categories").get(0);
-		Elements level0 = classesRoot.getElementsByClass("level-0");
-		for (int i = 0; i < level0.size(); i++) {
-			String name = level0.get(i).html().replace("&nbsp;", "").replaceAll("\\(([0-9]|,)*\\)", "").trim();
-			classList.add(name.replace("/", "-"));
-		}
-
-		return classList;
-	}
-
-	/**
-	 * 
-	 * @param className
-	 * @return
-	 */
-	public static List<NYouHuiGoods> listByClassName(String className, int page) {
-		List<NYouHuiGoods> goodsList = new ArrayList<>();
-		String url = null;
-		try {
-			url = "https://www.n-youhui.com/" + URLEncoder.encode(className, "UTF-8").toLowerCase() + "/page/" + page;
-		} catch (UnsupportedEncodingException e1) {
-			e1.printStackTrace();
-		}
-		Document doc = getDocument(url);
-		Element content = doc.getElementsByClass("content").get(0);
-		if (content != null) {
-			Elements articles = content.getElementsByTag("article");
-			if (articles != null)
-				for (int i = 0; i < articles.size(); i++) {
-					String link = articles.get(i).getElementsByTag("a").get(0).attr("href");
-					String name = articles.get(i).getElementsByTag("a").get(0).ownText();
-					String time = articles.get(i).getElementsByClass("time").get(0).ownText();
-					time = time.split(" ")[time.split(" ").length - 1];
-					NYouHuiGoods goods = new NYouHuiGoods();
-					goods.setName(name);
-					goods.setSourceUrl(link);
-					goods.setPublishTime(time);
-					goodsList.add(goods);
-				}
-		}
-		return goodsList;
-	}
-
-	public static NYouHuiGoods getGoodsDetail(String url) {
-		NYouHuiGoods goods = new NYouHuiGoods();
-		Document doc = getDocument(url);
-		Element article = doc.getElementsByClass("article-content").get(0);
-		Elements ps = article.getElementsByTag("p");
-		String text = "";
-		for (int i = 0; i < ps.size(); i++) {
-			// 绉婚櫎鍥剧墖
-			Elements imgList = ps.get(i).getElementsByTag("img");
-			for (int j = 0; j < imgList.size(); j++)
-				imgList.get(j).remove();
-			text += ps.get(i).html().replace("<br>", "\n") + "\n";
-		}
-		
-		text=text.trim();
-
-		String regex = "(https://u\\.jd\\.com/)[0-9A-Za-z]{1,20}";
-		Pattern pattern = Pattern.compile(regex);
-		Matcher m = pattern.matcher(text);
-		List<String> urlList = new ArrayList<>();
-		while (m.find()) {
-			urlList.add(m.group());
-		}
-		goods.setLinkList(urlList);
-		goods.setDesc(text.replaceAll(regex, "[閾炬帴]"));
-		goods.setName(doc.getElementsByClass("article-title").get(0).getElementsByTag("a").get(0).ownText());
-		String time = doc.getElementsByClass("article-meta").get(0).getElementsByTag("li").get(0).ownText().trim();
-		goods.setPublishTime(time.split(" ")[time.split(" ").length - 1]);
-		goods.setSourceUrl(url);
-		return goods;
-	}
-
-}
+package com.yeshi.fanli.util.goods.jd;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import com.yeshi.fanli.entity.goods.jd.NYouHuiGoods;
+
+/**
+ * 浜笢鍐呬紭鎯犲晢鍝佺埇鍙� https://www.n-youhui.com
+ * 
+ * @author Administrator
+ *
+ */
+public class NYouHuiUtil {
+
+	private static Document getDocument(String url) {
+		Document doc = null;
+		try {
+			doc = Jsoup.connect(url).timeout(20000)
+					.userAgent(
+							"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36")
+					.get();
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+		return doc;
+	}
+
+	public static List<String> getClasses() {
+		List<String> classList = new ArrayList<>();
+		Document doc = getDocument("https://www.n-youhui.com");
+		Element classesRoot = doc.getElementsByClass("widget_categories").get(0);
+		Elements level0 = classesRoot.getElementsByClass("level-0");
+		for (int i = 0; i < level0.size(); i++) {
+			String name = level0.get(i).html().replace("&nbsp;", "").replaceAll("\\(([0-9]|,)*\\)", "").trim();
+			classList.add(name.replace("/", "-"));
+		}
+
+		return classList;
+	}
+
+	/**
+	 * 
+	 * @param className
+	 * @return
+	 */
+	public static List<NYouHuiGoods> listByClassName(String className, int page) {
+		List<NYouHuiGoods> goodsList = new ArrayList<>();
+		String url = null;
+		try {
+			url = "https://www.n-youhui.com/" + URLEncoder.encode(className, "UTF-8").toLowerCase() + "/page/" + page;
+		} catch (UnsupportedEncodingException e1) {
+			e1.printStackTrace();
+		}
+		Document doc = getDocument(url);
+		Element content = doc.getElementsByClass("content").get(0);
+		if (content != null) {
+			Elements articles = content.getElementsByTag("article");
+			if (articles != null)
+				for (int i = 0; i < articles.size(); i++) {
+					String link = articles.get(i).getElementsByTag("a").get(0).attr("href");
+					String name = articles.get(i).getElementsByTag("a").get(0).ownText();
+					String time = articles.get(i).getElementsByClass("time").get(0).ownText();
+					time = time.split(" ")[time.split(" ").length - 1];
+					NYouHuiGoods goods = new NYouHuiGoods();
+					goods.setName(name);
+					goods.setSourceUrl(link);
+					goods.setPublishTime(time);
+					goodsList.add(goods);
+				}
+		}
+		return goodsList;
+	}
+
+	public static NYouHuiGoods getGoodsDetail(String url) {
+		NYouHuiGoods goods = new NYouHuiGoods();
+		Document doc = getDocument(url);
+		Element article = doc.getElementsByClass("article-content").get(0);
+		Elements ps = article.getElementsByTag("p");
+		String text = "";
+		for (int i = 0; i < ps.size(); i++) {
+			// 绉婚櫎鍥剧墖
+			Elements imgList = ps.get(i).getElementsByTag("img");
+			for (int j = 0; j < imgList.size(); j++)
+				imgList.get(j).remove();
+			text += ps.get(i).html().replace("<br>", "\n") + "\n";
+		}
+		
+		text=text.trim();
+
+		String regex = "(https://u\\.jd\\.com/)[0-9A-Za-z]{1,20}";
+		Pattern pattern = Pattern.compile(regex);
+		Matcher m = pattern.matcher(text);
+		List<String> urlList = new ArrayList<>();
+		while (m.find()) {
+			urlList.add(m.group());
+		}
+		goods.setLinkList(urlList);
+		goods.setDesc(text.replaceAll(regex, "[閾炬帴]"));
+		goods.setName(doc.getElementsByClass("article-title").get(0).getElementsByTag("a").get(0).ownText());
+		String time = doc.getElementsByClass("article-meta").get(0).getElementsByTag("li").get(0).ownText().trim();
+		goods.setPublishTime(time.split(" ")[time.split(" ").length - 1]);
+		goods.setSourceUrl(url);
+		return goods;
+	}
+
+}

--
Gitblit v1.8.0