From e2c9fab4823b9a9a8a186951faa5a54d1f57fb1f Mon Sep 17 00:00:00 2001 From: admin <weikou2014> Date: 星期四, 17 十月 2019 14:58:29 +0800 Subject: [PATCH] 订单爬取优化 增加日志类型 --- fanli/src/main/java/com/yeshi/fanli/util/taobao/TaoBaoUtil.java | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 152 insertions(+), 4 deletions(-) diff --git a/fanli/src/main/java/com/yeshi/fanli/util/taobao/TaoBaoUtil.java b/fanli/src/main/java/com/yeshi/fanli/util/taobao/TaoBaoUtil.java index 253f227..cde7b97 100644 --- a/fanli/src/main/java/com/yeshi/fanli/util/taobao/TaoBaoUtil.java +++ b/fanli/src/main/java/com/yeshi/fanli/util/taobao/TaoBaoUtil.java @@ -3,6 +3,7 @@ import java.io.UnsupportedEncodingException; import java.lang.reflect.Type; import java.math.BigDecimal; +import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.HashMap; @@ -14,11 +15,16 @@ import javax.annotation.PostConstruct; import javax.annotation.Resource; +import javax.script.Invocable; +import javax.script.ScriptEngine; +import javax.script.ScriptEngineManager; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import org.apache.commons.httpclient.HttpClient; +import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; import org.apache.commons.httpclient.methods.GetMethod; +import org.apache.commons.httpclient.params.HttpMethodParams; import org.jsoup.Connection; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; @@ -57,7 +63,7 @@ import com.yeshi.fanli.exception.taobao.TaobaoGoodsDownException; import com.yeshi.fanli.log.LogHelper; import com.yeshi.fanli.service.inter.goods.TaoBaoLinkService; -import com.yeshi.fanli.service.inter.hongbao.HongBaoManageService; +import com.yeshi.fanli.service.inter.order.config.HongBaoManageService; import com.yeshi.fanli.tag.PageEntity; import com.yeshi.fanli.util.AESUtil; import com.yeshi.fanli.util.Constant; @@ -1331,6 +1337,41 @@ return taoBaoGoodsBrief; } + public static String getGoodsIdByPhoneShareUrl(String burl) { + try { + Connection connect = Jsoup.connect(burl); + Document document = connect.get(); + org.jsoup.nodes.Element element = document.getElementsByTag("head").get(0).getElementsByTag("Script") + .get(1); + String html = element.toString(); + int index = html.indexOf("var url = "); + int endIndex = html.lastIndexOf("'"); + html = html.substring(index, endIndex); + int index2 = html.indexOf("'"); + html = html.substring(index2 + 1); + int indexId = html.indexOf("id="); + String id = ""; + if (html.contains("a.m.taobao.com")) { + indexId = html.indexOf("com/i"); + html = html.substring(indexId); + html = html.substring(5, html.indexOf(".")); + id = html; + } else { + String reg = "[?|&]{1}[i]{1}[d]{1}[=]{1}\\d+[&]?"; + Pattern p = Pattern.compile(reg); + Matcher matcher = p.matcher(html); + if (matcher.find()) { + id = matcher.group(); + } + id = id.substring(id.indexOf("=") + 1, id.lastIndexOf("&")); + } + return id; + } catch (Exception e) { + LogHelper.error("鏃犳硶瑙f瀽鍒版窐瀹濆晢鍝両D:" + burl); + return null; + } + } + /** * 鑾峰彇鍟嗗搧鐨勭敤鎴峰垎鎴愭瘮渚� * @@ -1918,10 +1959,117 @@ return true; } - public static void main(String[] args) { - String s = channelMap.get("3"); - System.out.println(s); + /** + * 鏌ヨ澶╃尗鍟嗗搧鍥剧墖銆佹爣棰� + * + * @param auctionId + * @return + */ + public static TaoBaoGoodsBrief getTmallGoodsInfo(String auctionId) { + TaoBaoGoodsBrief taoBaoGoodsBrief = null; + try { + Connection connect = Jsoup.connect(String.format(TM_PHONE_URL, auctionId)); + Document document = connect.get(); + Elements elements = document.getElementsByTag("script"); + if (elements.size() >= 0) { + for (int i = 0; i < elements.size(); i++) { + String content = elements.get(i).toString(); + if (content.contains("var _DATA_Mdskip")) { + content = content.replace("<script>", ""); + content = content.replace("</script>", ""); + if (content.contains("videoDetail")) { + content += ";function getData(){ var json={title:_DATA_Mdskip.item.title,pictUrl:_DATA_Mdskip.item.videoDetail.videoThumbnailURL}; return JSON.stringify(json);}"; + } else { + content += ";function getData(){ var json={title:_DATA_Mdskip.item.title,pictUrl:_DATA_Mdskip.item.videos[0].videoThumbnailURL}; return JSON.stringify(json);}"; + } + + ScriptEngineManager manager = new ScriptEngineManager(); + ScriptEngine engine = manager.getEngineByName("javascript"); + engine.eval(content); + + if (engine instanceof Invocable) { + Invocable in = (Invocable) engine; + JSONObject json = JSONObject.fromObject(in.invokeFunction("getData")); + Object title = json.get("title"); + Object pictUrl = json.get("pictUrl"); + taoBaoGoodsBrief = new TaoBaoGoodsBrief(); + taoBaoGoodsBrief.setTitle(title.toString()); + taoBaoGoodsBrief.setPictUrl(pictUrl.toString()); + } + + break; + } + } + } + } catch (Exception e) { + e.printStackTrace(); + } + return taoBaoGoodsBrief; + } + + /** + * 鎶栭煶杩斿埄local 鑾峰彇鐪熷疄娣樺疂閾炬帴 + * + * @param url + * https://s.click.taobao.com/t?e=m%3D2%26s% + * 3DT9DFpHQCT7Rw4vFB6t2Z2ueEDrYVVa64XoO8tOebS+dRAdhuF14FMR8 + * @return + */ + public static String getRealUrl(String url) { + if (StringUtil.isNullOrEmpty(url)) + return null; + + try { + HttpClient client = new HttpClient(new MultiThreadedHttpConnectionManager()); + client.getHttpConnectionManager().getParams().setConnectionTimeout(10000); + client.getParams().setParameter(HttpMethodParams.USER_AGENT, + "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)"); // 璁╂湇鍔″櫒璁や负鏄疘E + GetMethod get = new GetMethod(url); + get.setFollowRedirects(false); // 绂佹鑷姩閲嶅畾鍚� + int iGetResultCode = client.executeMethod(get); + String location = get.getResponseHeader("location").getValue(); // 鎵撳嵃鍦板潃 + GetMethod get2 = new GetMethod(URLDecoder.decode(location.split("tu=")[1])); + get2.addRequestHeader("Referer", location); + get2.setFollowRedirects(false); // 绂佹鑷姩閲嶅畾鍚� + int iGetResultCode2 = client.executeMethod(get2); + return get2.getResponseHeader("location").getValue(); // 鐪熷疄鍦板潃 + } catch (Exception ex) { + ex.printStackTrace(); + } + return null; + } + + /** + * 娣樺彛浠よ繃婊� + * + * @param token + * @return + */ + public static String filterTaoToken(String token) { + if (StringUtil.isNullOrEmpty(token)) + return token; + if (token.contains("锟�") || token.contains("鈧�")) { + String newToken = ""; + for (int i = 0; i < token.length(); i++) { + if (token.charAt(i) == '锟�' || token.charAt(i) == '鈧�') { + if (!newToken.contains("(")) + newToken += "("; + else + newToken += ")"; + } else + newToken += token.charAt(i) + ""; + } + return newToken; + } + return token; + } + + public static boolean isSpecialGoods(Integer materialLibType) { + if (materialLibType != null && materialLibType == 1) + return true; + else + return false; } } -- Gitblit v1.8.0