From f4a0f2acc63d7785eab108419a4e16f5f688cb95 Mon Sep 17 00:00:00 2001
From: yujian <yujian@163.com>
Date: 星期六, 18 一月 2020 12:06:27 +0800
Subject: [PATCH] 用户注册信息
---
fanli/src/main/java/com/yeshi/fanli/util/goods/jd/NYouHuiUtil.java | 97 ++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 88 insertions(+), 9 deletions(-)
diff --git a/fanli/src/main/java/com/yeshi/fanli/util/goods/jd/NYouHuiUtil.java b/fanli/src/main/java/com/yeshi/fanli/util/goods/jd/NYouHuiUtil.java
index d3aaa1c..65cb205 100644
--- a/fanli/src/main/java/com/yeshi/fanli/util/goods/jd/NYouHuiUtil.java
+++ b/fanli/src/main/java/com/yeshi/fanli/util/goods/jd/NYouHuiUtil.java
@@ -1,13 +1,19 @@
package com.yeshi.fanli.util.goods.jd;
import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
+
+import com.yeshi.fanli.entity.goods.jd.NYouHuiGoods;
/**
* 浜笢鍐呬紭鎯犲晢鍝佺埇鍙� https://www.n-youhui.com
@@ -16,23 +22,96 @@
*
*/
public class NYouHuiUtil {
- public static List<String> getClasses() {
- List<String> classList = new ArrayList<>();
+
+ private static Document getDocument(String url) {
+ Document doc = null;
try {
- Document doc = Jsoup.connect("https://www.n-youhui.com").timeout(20000)
+ doc = Jsoup.connect(url).timeout(20000)
.userAgent(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36")
.get();
- Element classesRoot = doc.getElementsByClass("widget_categories").get(0);
- Elements level0 = classesRoot.getElementsByClass("level-0");
- for (int i = 0; i < level0.size(); i++) {
- String name = level0.get(i).html().replace(" ", "").replaceAll("\\(([0-9]|,)*\\)", "").trim();
- classList.add(name);
- }
} catch (IOException e) {
e.printStackTrace();
}
+ return doc;
+ }
+
+ public static List<String> getClasses() {
+ List<String> classList = new ArrayList<>();
+ Document doc = getDocument("https://www.n-youhui.com");
+ Element classesRoot = doc.getElementsByClass("widget_categories").get(0);
+ Elements level0 = classesRoot.getElementsByClass("level-0");
+ for (int i = 0; i < level0.size(); i++) {
+ String name = level0.get(i).html().replace(" ", "").replaceAll("\\(([0-9]|,)*\\)", "").trim();
+ classList.add(name.replace("/", "-"));
+ }
+
return classList;
}
+ /**
+ *
+ * @param className
+ * @return
+ */
+ public static List<NYouHuiGoods> listByClassName(String className, int page) {
+ List<NYouHuiGoods> goodsList = new ArrayList<>();
+ String url = null;
+ try {
+ url = "https://www.n-youhui.com/" + URLEncoder.encode(className, "UTF-8").toLowerCase() + "/page/" + page;
+ } catch (UnsupportedEncodingException e1) {
+ e1.printStackTrace();
+ }
+ Document doc = getDocument(url);
+ Element content = doc.getElementsByClass("content").get(0);
+ if (content != null) {
+ Elements articles = content.getElementsByTag("article");
+ if (articles != null)
+ for (int i = 0; i < articles.size(); i++) {
+ String link = articles.get(i).getElementsByTag("a").get(0).attr("href");
+ String name = articles.get(i).getElementsByTag("a").get(0).ownText();
+ String time = articles.get(i).getElementsByClass("time").get(0).ownText();
+ time = time.split(" ")[time.split(" ").length - 1];
+ NYouHuiGoods goods = new NYouHuiGoods();
+ goods.setName(name);
+ goods.setSourceUrl(link);
+ goods.setPublishTime(time);
+ goodsList.add(goods);
+ }
+ }
+ return goodsList;
+ }
+
+ public static NYouHuiGoods getGoodsDetail(String url) {
+ NYouHuiGoods goods = new NYouHuiGoods();
+ Document doc = getDocument(url);
+ Element article = doc.getElementsByClass("article-content").get(0);
+ Elements ps = article.getElementsByTag("p");
+ String text = "";
+ for (int i = 0; i < ps.size(); i++) {
+ // 绉婚櫎鍥剧墖
+ Elements imgList = ps.get(i).getElementsByTag("img");
+ for (int j = 0; j < imgList.size(); j++)
+ imgList.get(j).remove();
+ text += ps.get(i).html().replace("<br>", "\n") + "\n";
+ }
+
+ text=text.trim();
+
+ String regex = "(https://u\\.jd\\.com/)[0-9A-Za-z]{1,20}";
+ Pattern pattern = Pattern.compile(regex);
+ Matcher m = pattern.matcher(text);
+ List<String> urlList = new ArrayList<>();
+ while (m.find()) {
+ urlList.add(m.group());
+ }
+ goods.setLinkList(urlList);
+ goods.setDesc(text.replaceAll(regex, "[閾炬帴]"));
+ goods.setName(doc.getElementsByClass("article-title").get(0).getElementsByTag("a").get(0).ownText());
+ String time = doc.getElementsByClass("article-meta").get(0).getElementsByTag("li").get(0).ownText().trim();
+ goods.setPublishTime(time.split(" ")[time.split(" ").length - 1]);
+ goods.setSourceUrl(url);
+ return goods;
+ }
+
}
--
Gitblit v1.8.0