From 48e4c4328cfd305d5465b66db11521ff70723683 Mon Sep 17 00:00:00 2001 From: admin <weikou2014> Date: 星期二, 24 十二月 2019 16:59:00 +0800 Subject: [PATCH] 京东内优惠商品爬取,淘宝无推广商品信息获取 --- fanli/src/main/java/com/yeshi/fanli/util/taobao/TaoBaoUtil.java | 35 +++++++++++++++-------------------- 1 files changed, 15 insertions(+), 20 deletions(-) diff --git a/fanli/src/main/java/com/yeshi/fanli/util/taobao/TaoBaoUtil.java b/fanli/src/main/java/com/yeshi/fanli/util/taobao/TaoBaoUtil.java index cf4c61d..8c5cdf9 100644 --- a/fanli/src/main/java/com/yeshi/fanli/util/taobao/TaoBaoUtil.java +++ b/fanli/src/main/java/com/yeshi/fanli/util/taobao/TaoBaoUtil.java @@ -1,6 +1,5 @@ package com.yeshi.fanli.util.taobao; -import java.io.IOException; import java.io.UnsupportedEncodingException; import java.lang.reflect.Type; import java.math.BigDecimal; @@ -23,7 +22,6 @@ import javax.xml.parsers.DocumentBuilderFactory; import org.apache.commons.httpclient.HttpClient; -import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.params.HttpMethodParams; @@ -1856,31 +1854,28 @@ String url = ""; try { - url = String.format("https://acs.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?data=%s", + url = String.format( + "http://acs.m.taobao.com/h5/mtop.taobao.detail.getdetail/6.0/?data=%s&qq-pf-to=pcqq.group", URLEncoder.encode(data.toString(), "UTF-8")); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } - HttpClient client = new HttpClient(); - GetMethod gm = new GetMethod(url); String result = null; try { - gm.setRequestHeader("user-agent", - "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.97 Mobile Safari/537.36"); - gm.setRequestHeader("cache-control", "max-age=0"); - gm.setRequestHeader("sec-fetch-mode", "navigate"); - gm.setRequestHeader("sec-fetch-site", "none"); - gm.setRequestHeader("sec-fetch-user", "?1"); - gm.setRequestHeader("upgrade-insecure-requests", "1"); - gm.setRequestHeader("cookie", - "t=2ef08e4a12314c12dcd98d2ec480dd69; thw=cn; hng=CN%7Czh-CN%7CCNY%7C156; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; ali_ab=113.251.19.3.1554796147005.0; UM_distinctid=16b9d8257dd388-0a96f36f947888-e343166-1fa400-16b9d8257df114; WAPFDFDTGFG=%2B4cMKKP%2B8PI%2BuhLvELc2mB6zAz5Z54E%3D; _w_app_lg=0; enc=00wU0%2BKYu9Yf1tHSawG%2Bc4aMZojqg8eB6baoMr9Z8Ir4qJ6hRhMhdWQxbM71vvxv6OCsxN50T0xA1gJGIdFm7w%3D%3D; cna=Q9eYFEM1rm8CAXH7EKUDSamu; miid=769739881160810491; lgc=%5Cu6930%5Cu89C6%5Cu79D1%5Cu6280; tracknick=%5Cu6930%5Cu89C6%5Cu79D1%5Cu6280; tg=0; mt=ci=7_1; cookie2=13ad477d84d5e6659aa7855de3617163; _tb_token_=ee895586817b0; dnk=%5Cu6930%5Cu89C6%5Cu79D1%5Cu6280; v=0; _m_h5_tk=286f649add5207dfe82e770832daf1cc_1573547306314; _m_h5_tk_enc=137c39dafc279e40edc796770d3eda8f; tk_trace=oTRxOWSBNwn9dPyorMJE%2FoPdY8zZPEr%2FCrvCMS%2BG3sTRRWrQ%2BVVTl09ME1KrXdbd3QNiuPM1YZeXJrHpvLuG%2FSvA2umbWq5jqrcw%2FDpk1Lj6yQFVptx6jw71DzuBbw%2BuZLEozdKGb%2BZr85uzSnUSWfVAqZprzoB%2Bfde2IHnHJp651xghkKpmsSBgY5muhOI7FvNfc8zH1BIvygVQSEIa1bCCgy2VJl%2FkyuTXaGHb%2FAKxL566%2FLult1lRlo8RZx9s76BhXe7PTot5MPGrayPkRiqWfVdg3C7gFWRjrjjKriN47mbQazOoIM7knBX%2B3mnnmwWbCQjKiuA6I8W9ng%3D%3D; _cc_=VFC%2FuZ9ajQ%3D%3D; unb=3327215652; uc1=lng=zh_CN&tag=8&cookie16=W5iHLLyFPlMGbLDwA%2BdvAGZqLg%3D%3D&cookie15=VT5L2FSpMGV7TQ%3D%3D&cookie14=UoTbnrCeRJd%2F2g%3D%3D&pas=0&existShop=false&cookie21=VFC%2FuZ9aj38X; uc3=vt3=F8dByuWjrX8oBIwXl2Q%3D&nk2=sbdfkkkB37A%3D&id2=UNN%2F6whEupDGWQ%3D%3D&lg2=WqG3DMC9VAQiUQ%3D%3D; csg=30373d82; cookie17=UNN%2F6whEupDGWQ%3D%3D; skt=6a0617b2d0843ef4; existShop=MTU3MzgxNDAxNw%3D%3D; uc4=id4=0%40UgQ3BPth2rlXv3PxjwesD5MgnRD3&nk4=0%40s9JQzQv3gdCkG6YVO6ulkEbNug%3D%3D; _l_g_=Ug%3D%3D; sg=%E6%8A%8021; _nk_=%5Cu6930%5Cu89C6%5Cu79D1%5Cu6280; cookie1=AnRaCZfOZrWoY8u%2BoXxB8y%2FnnwnZszWqI6Bbr5BrFtM%3D; isg=BOnpxPqJkNY3p6zvIy1AdcEC-JVDtt3of1m6aoveZVAPUglk0wbtuNdAFL5BUXUg; l=dBgUvWMcqlf1dR9EBOCanurza77OSIRYYuPzaNbMi_5IT6T6q1QOkBPUkF96VfWfTVLB47_ypV99-etkZ2ejm8IpXUJsAMc."); - client.executeMethod(gm); - result = gm.getResponseBodyAsString(); - } catch (HttpException e) { - e.printStackTrace(); - } catch (IOException e) { - e.printStackTrace(); + Connection.Response doc = Jsoup.connect(url).ignoreContentType(true) + .timeout(1000*10) + .userAgent( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362") + .header("Accept", "text/html, application/xhtml+xml, application/xml; q=0.9, */*; q=0.8") + .header("Accept-Language", "zh-CN").header("Host", "acs.m.taobao.com") + .header("Upgrade-Insecure-Requests", "1") + .header("Cookie", + "_cc_=VFC%2FuZ9ajQ%3D%3D; isg=BLu7TvdFYk89dV4DMXO7XNRGU58lEM8SDjV-ja14l7rRDNvuNeBfYtlOJmznNycK; thw=cn; miid=1490566681358073134; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; cna=ZW8rFCpNP1gCAWpXAiKqMzqi; tracknick=tb23001560; l=cBxbffilq1UJdAFCBOCanurza77OSdAYYuPzaNbMi_5NE6T1BR7Ok6G1vF96VsWdOW8B4NSiTkp9-etkZ3Znq9SpXUJ1.; tg=0; _w_app_lg=19; WAPFDFDTGFG=%2B4cMKKP%2B8PI%2BtNYpkiAuTPLkiJB1kcwi") + .execute(); + result = doc.body(); + } catch (Exception e1) { + e1.printStackTrace(); } JSONObject resultOBJ = JSONObject.fromObject(result); -- Gitblit v1.8.0