From e2c9fab4823b9a9a8a186951faa5a54d1f57fb1f Mon Sep 17 00:00:00 2001 From: admin <weikou2014> Date: 星期四, 17 十月 2019 14:58:29 +0800 Subject: [PATCH] 订单爬取优化 增加日志类型 --- fanli/src/main/java/com/yeshi/fanli/util/taobao/TaoBaoUtil.java | 98 +++++++++++++++++++++++++++++++++++++++++++------ 1 files changed, 86 insertions(+), 12 deletions(-) diff --git a/fanli/src/main/java/com/yeshi/fanli/util/taobao/TaoBaoUtil.java b/fanli/src/main/java/com/yeshi/fanli/util/taobao/TaoBaoUtil.java index d04bf54..cde7b97 100644 --- a/fanli/src/main/java/com/yeshi/fanli/util/taobao/TaoBaoUtil.java +++ b/fanli/src/main/java/com/yeshi/fanli/util/taobao/TaoBaoUtil.java @@ -1337,6 +1337,41 @@ return taoBaoGoodsBrief; } + public static String getGoodsIdByPhoneShareUrl(String burl) { + try { + Connection connect = Jsoup.connect(burl); + Document document = connect.get(); + org.jsoup.nodes.Element element = document.getElementsByTag("head").get(0).getElementsByTag("Script") + .get(1); + String html = element.toString(); + int index = html.indexOf("var url = "); + int endIndex = html.lastIndexOf("'"); + html = html.substring(index, endIndex); + int index2 = html.indexOf("'"); + html = html.substring(index2 + 1); + int indexId = html.indexOf("id="); + String id = ""; + if (html.contains("a.m.taobao.com")) { + indexId = html.indexOf("com/i"); + html = html.substring(indexId); + html = html.substring(5, html.indexOf(".")); + id = html; + } else { + String reg = "[?|&]{1}[i]{1}[d]{1}[=]{1}\\d+[&]?"; + Pattern p = Pattern.compile(reg); + Matcher matcher = p.matcher(html); + if (matcher.find()) { + id = matcher.group(); + } + id = id.substring(id.indexOf("=") + 1, id.lastIndexOf("&")); + } + return id; + } catch (Exception e) { + LogHelper.error("鏃犳硶瑙f瀽鍒版窐瀹濆晢鍝両D:" + burl); + return null; + } + } + /** * 鑾峰彇鍟嗗搧鐨勭敤鎴峰垎鎴愭瘮渚� * @@ -1926,6 +1961,7 @@ /** * 鏌ヨ澶╃尗鍟嗗搧鍥剧墖銆佹爣棰� + * * @param auctionId * @return */ @@ -1941,12 +1977,17 @@ if (content.contains("var _DATA_Mdskip")) { content = content.replace("<script>", ""); content = content.replace("</script>", ""); - content+=";function getData(){ var json={title:_DATA_Mdskip.item.title,pictUrl:_DATA_Mdskip.item.videoDetail.videoThumbnailURL}; return JSON.stringify(json);}"; - + + if (content.contains("videoDetail")) { + content += ";function getData(){ var json={title:_DATA_Mdskip.item.title,pictUrl:_DATA_Mdskip.item.videoDetail.videoThumbnailURL}; return JSON.stringify(json);}"; + } else { + content += ";function getData(){ var json={title:_DATA_Mdskip.item.title,pictUrl:_DATA_Mdskip.item.videos[0].videoThumbnailURL}; return JSON.stringify(json);}"; + } + ScriptEngineManager manager = new ScriptEngineManager(); ScriptEngine engine = manager.getEngineByName("javascript"); engine.eval(content); - + if (engine instanceof Invocable) { Invocable in = (Invocable) engine; JSONObject json = JSONObject.fromObject(in.invokeFunction("getData")); @@ -1956,7 +1997,7 @@ taoBaoGoodsBrief.setTitle(title.toString()); taoBaoGoodsBrief.setPictUrl(pictUrl.toString()); } - + break; } } @@ -1966,22 +2007,24 @@ } return taoBaoGoodsBrief; } - - - + /** * 鎶栭煶杩斿埄local 鑾峰彇鐪熷疄娣樺疂閾炬帴 - * @param url https://s.click.taobao.com/t?e=m%3D2%26s%3DT9DFpHQCT7Rw4vFB6t2Z2ueEDrYVVa64XoO8tOebS+dRAdhuF14FMR8 + * + * @param url + * https://s.click.taobao.com/t?e=m%3D2%26s% + * 3DT9DFpHQCT7Rw4vFB6t2Z2ueEDrYVVa64XoO8tOebS+dRAdhuF14FMR8 * @return */ public static String getRealUrl(String url) { if (StringUtil.isNullOrEmpty(url)) return null; - + try { HttpClient client = new HttpClient(new MultiThreadedHttpConnectionManager()); client.getHttpConnectionManager().getParams().setConnectionTimeout(10000); - client.getParams().setParameter(HttpMethodParams.USER_AGENT,"Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)"); // 璁╂湇鍔″櫒璁や负鏄疘E + client.getParams().setParameter(HttpMethodParams.USER_AGENT, + "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0)"); // 璁╂湇鍔″櫒璁や负鏄疘E GetMethod get = new GetMethod(url); get.setFollowRedirects(false); // 绂佹鑷姩閲嶅畾鍚� int iGetResultCode = client.executeMethod(get); @@ -1993,9 +2036,40 @@ return get2.getResponseHeader("location").getValue(); // 鐪熷疄鍦板潃 } catch (Exception ex) { ex.printStackTrace(); - } + } return null; } - + + /** + * 娣樺彛浠よ繃婊� + * + * @param token + * @return + */ + public static String filterTaoToken(String token) { + if (StringUtil.isNullOrEmpty(token)) + return token; + if (token.contains("锟�") || token.contains("鈧�")) { + String newToken = ""; + for (int i = 0; i < token.length(); i++) { + if (token.charAt(i) == '锟�' || token.charAt(i) == '鈧�') { + if (!newToken.contains("(")) + newToken += "("; + else + newToken += ")"; + } else + newToken += token.charAt(i) + ""; + } + return newToken; + } + return token; + } + + public static boolean isSpecialGoods(Integer materialLibType) { + if (materialLibType != null && materialLibType == 1) + return true; + else + return false; + } } -- Gitblit v1.8.0