package com.ks; import org.apache.commons.io.IOUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.yeshi.utils.HttpUtil; import org.yeshi.utils.StringUtil; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; public class VisualUserTest { Logger logger = LoggerFactory.getLogger(VisualUserTest.class); @Test public void getPortrait0() { for (int p = 2; p < 1000; p++) { try { List list = new ArrayList<>(); Document doc = Jsoup.connect("https://www.woyaogexing.com/touxiang/index_" + p + ".html").timeout(10000).get(); Elements els = doc.getElementsByAttributeValue("class", "pMain").get(0).getElementsByAttributeValue("class", "txList"); for (int i = 0; i < els.size(); i++) { String img = els.get(i).getElementsByTag("img").get(0).attr("src"); if (!StringUtil.isNullOrEmpty(img)) { list.add("http:" + img); } logger.info(img); } if (list.size() > 0) { String fileName = "D:/portrait/0/" + p + ".txt"; if (!new File(fileName).getParentFile().exists()) { new File(fileName).getParentFile().mkdirs(); } IOUtils.writeLines(list, "\r\n", new FileOutputStream("D:/portrait/0/" + p + ".txt")); } } catch (IOException e) { e.printStackTrace(); } try { Thread.sleep(2000); } catch (InterruptedException e) { e.printStackTrace(); } } } @Test public void getPortrait1() { String[] classes = new String[]{ "qinglv", "nv", "nan", "dongman", "weixin" }; for (String clz : classes) { if (clz.equalsIgnoreCase(classes[0])) continue; for (int p = 2; p < 120; p++) { try { List list = new ArrayList<>(); Document doc = Jsoup.connect("http://www.crcz.com/touxiang/" + clz + "/list_" + p + ".html").timeout(10000).get(); Elements els = doc.getElementsByAttributeValue("class", "tx_list").get(0).getElementsByTag("li"); for (int i = 0; i < els.size(); i++) { String img = els.get(i).getElementsByTag("img").get(0).attr("src"); if (!StringUtil.isNullOrEmpty(img)) { list.add(img); } logger.info(img); } if (list.size() > 0) { String fileName = "D:/portrait/1/" + clz + "_" + p + ".txt"; if (!new File(fileName).getParentFile().exists()) { new File(fileName).getParentFile().mkdirs(); } IOUtils.writeLines(list, "\r\n", new FileOutputStream(fileName)); } } catch (Exception e) { e.printStackTrace(); } try { Thread.sleep(2000); } catch (InterruptedException e) { e.printStackTrace(); } } } } private String getNextPageUrl2(String url, Document doc, String containerClass, String activeClass) { Element next = doc.getElementsByClass(containerClass).get(0).getElementsByClass(activeClass).get(0).nextElementSibling(); String href = next.attr("href"); if (!StringUtil.isNullOrEmpty(href)) { return url.substring(0, url.lastIndexOf("/") + 1) + href; } return null; } @Test public void getPortrait2() { String[] classes = new String[]{ "fengjing", "katong", "keai", "weimei", "xiaoqingxin", "yijing", "gaoxiao", "dongwu", "qiche", "wupin", "xingzuo", "zhiwu", "shouhui", "jianzhu" }; for (String clz : classes) { String url = "http://www.imeitou.com/" + clz + "/"; int p = 1; while (!StringUtil.isNullOrEmpty(url)) { try { List list = new ArrayList<>(); Document doc = Jsoup.connect(url).timeout(10000).get(); Elements els = doc.getElementsByAttributeValue("class", "g-gxlist-imgbox").get(0).getElementsByTag("li"); for (int i = 0; i < els.size(); i++) { String img = els.get(i).getElementsByTag("img").get(0).attr("src"); if (!StringUtil.isNullOrEmpty(img)) { list.add(img); } logger.info(img); } if (list.size() > 0) { String fileName = "D:/portrait/2/" + clz + "_" + p + ".txt"; if (!new File(fileName).getParentFile().exists()) { new File(fileName).getParentFile().mkdirs(); } IOUtils.writeLines(list, "\r\n", new FileOutputStream(fileName)); } url = getNextPageUrl2(url, doc, "pagelist", "current"); p++; } catch (Exception e) { e.printStackTrace(); } try { Thread.sleep(2000); } catch (InterruptedException e) { e.printStackTrace(); } } } } private String getNextPageUrl3(String url, Document doc, String containerClass, String activeTag) { Element next = doc.getElementsByClass(containerClass).get(0).getElementsByTag(activeTag).get(0).nextElementSibling(); String href = next.attr("href"); if (!StringUtil.isNullOrEmpty(href)) { return url.substring(0, url.lastIndexOf("/") + 1) + href; } return null; } @Test public void getPortrait3() { String[][] classes = new String[][]{ new String[]{"dongman", "list_219_1.html"}, new String[]{"katong", "list_220_1.html"}, new String[]{"gexing", "list_225_1.html"}, new String[]{"keai", "list_226_1.html"}, new String[]{"fengjing", "list_229_1.html"}, }; for (String[] clz : classes) { String url = "http://www.jj20.com/tx/" + clz[0] + "/" + clz[1]; int p = 1; while (!StringUtil.isNullOrEmpty(url)) { try { List list = new ArrayList<>(); Document doc = Jsoup.connect(url).timeout(10000).get(); Elements els = doc.getElementsByAttributeValue("class", "g-select-img").get(1).getElementsByTag("li"); for (int i = 0; i < els.size(); i++) { String img = els.get(i).getElementsByTag("img").get(0).attr("src"); if (!StringUtil.isNullOrEmpty(img)) { list.add(img); } logger.info(img); } if (list.size() > 0) { String fileName = "D:/portrait/3/" + clz[0] + "_" + p + ".txt"; if (!new File(fileName).getParentFile().exists()) { new File(fileName).getParentFile().mkdirs(); } IOUtils.writeLines(list, "\r\n", new FileOutputStream(fileName)); } url = getNextPageUrl3(url, doc, "tsp_nav", "b"); p++; } catch (Exception e) { e.printStackTrace(); } try { Thread.sleep(2000); } catch (InterruptedException e) { e.printStackTrace(); } } } } @Test public void downloadPortrait () { try { File file = new File("D://portrait"); File[] fs = file.listFiles(); for (File f : fs) { if (!f.getName().contains("3")) { continue; } if (!f.getName().contains("download")) { String downloadDir = file.getAbsolutePath() + "/download/" + f.getName(); if (!new File(downloadDir).exists()) { new File(downloadDir).mkdirs(); } File[] cfs = f.listFiles(); for (File cf : cfs) { List list = IOUtils.readLines(new FileInputStream(cf)); for (int i = 0; i < list.size(); i++) { String fileName = cf.getName().replace(".txt", "_") + i + ".jpg"; if (!new File(downloadDir, fileName).exists()) { try { IOUtils.write(IOUtils.toByteArray(HttpUtil.getAsInputStream(list.get(i))), new FileOutputStream(new File(downloadDir, fileName))); Thread.sleep(100); } catch (Exception e) { e.printStackTrace(); } } } } } } } catch (Exception e) { e.printStackTrace(); } } }