| | |
| | | package com.yeshi.buwan.videos.hanmi; |
| | | |
| | | import com.yeshi.buwan.util.StringUtil; |
| | | import com.yeshi.buwan.videos.hanmi.entity.HanmiShow; |
| | | import com.yeshi.buwan.videos.hanmi.entity.HanmiShowEpisode; |
| | | import org.jsoup.Connection; |
| | |
| | | import org.jsoup.select.Elements; |
| | | |
| | | import java.io.IOException; |
| | | import java.io.UnsupportedEncodingException; |
| | | import java.net.URI; |
| | | import java.net.URLEncoder; |
| | | import java.util.*; |
| | | |
| | | public class HanmiApiUtil { |
| | |
| | | } |
| | | |
| | | public static HanmiShow parseShowDetail(HanmiShow show) throws Exception { |
| | | if (show.getUrl() == null || !show.getUrl().startsWith("https://www.hmtv.me/show/")) { |
| | | if (show.getUrl() == null || !show.getUrl().startsWith("https://www.wztaichuan.com/vod/detail")) { |
| | | throw new Exception("链接不合法"); |
| | | } |
| | | |
| | | URI uri = URI.create(show.getUrl()); |
| | | |
| | | Document doc = getDoc(show.getUrl(), getHeaders()); |
| | | |
| | | Element root = doc.getElementsByClass("video-content").get(0); |
| | | Element titleItem = root.getElementsByClass("article-title").get(0); |
| | | Element root = doc.getElementsByClass("stui-content__thumb").get(0).parent(); |
| | | |
| | | String picture = doc.getElementsByClass("stui-content__thumb").get(0).getElementsByTag("img").get(0).attr("data-original"); |
| | | |
| | | //节目信息 |
| | | Element videoInfo = root.getElementsByClass("stui-content__detail").get(0); |
| | | |
| | | Element titleItem = videoInfo.getElementsByClass("title").get(0); |
| | | |
| | | //标题 |
| | | String title = null; |
| | | try { |
| | | title = titleItem.getElementsByClass("item-title").get(0).ownText(); |
| | | title = titleItem.ownText(); |
| | | } catch (IndexOutOfBoundsException e) { |
| | | } |
| | | |
| | | String year = null; |
| | | String score = null; |
| | | try { |
| | | year = titleItem.getElementsByClass("item-year").get(0).ownText(); |
| | | score = titleItem.getElementsByClass("score").get(0).ownText(); |
| | | } catch (IndexOutOfBoundsException e) { |
| | | } |
| | | show.setScore(score); |
| | | |
| | | //节目信息 |
| | | Element videoBox = root.getElementsByClass("video_box").get(0); |
| | | |
| | | String picture = videoBox.getElementsByClass("video_img").get(0).getElementsByTag("img").attr("src"); |
| | | |
| | | Element videoInfo = videoBox.getElementsByClass("video_info").get(0); |
| | | String videoInfoStr = videoInfo.html(); |
| | | String[] sts = videoInfoStr.split("<br>"); |
| | | Map<String, String> infos = new HashMap<>(); |
| | | for (String st : sts) { |
| | | Document d = Jsoup.parse(st); |
| | | String value = d.text(); |
| | | if (value.indexOf(":") > -1) |
| | | infos.put(value.substring(0, value.indexOf(":")).trim(), value.substring(value.indexOf(":") + 1).trim()); |
| | | Elements datas = videoInfo.getElementsByClass("data"); |
| | | for(int i=0;i<datas.size();i++){ |
| | | Elements data_items = datas.get(i).getElementsByClass("text-muted"); |
| | | for(int j=0; j<data_items.size(); j++){ |
| | | String key = data_items.get(j).ownText().trim(); |
| | | String value = null; |
| | | switch (key){ |
| | | case "主演:": |
| | | List<String> actors=new ArrayList<>(); |
| | | Elements temps = data_items.get(j).parent().getElementsByTag("a"); |
| | | for(Iterator<Element> its = temps.iterator(); its.hasNext();) |
| | | { |
| | | actors.add( its.next().ownText()); |
| | | } |
| | | value = StringUtil.join(actors, ","); |
| | | break; |
| | | default: |
| | | if(data_items.get(j).nextElementSibling()!=null) { |
| | | value = data_items.get(j).nextElementSibling().ownText(); |
| | | } else{ |
| | | value = data_items.get(j).parent().ownText(); |
| | | } |
| | | } |
| | | infos.put(key,value); |
| | | } |
| | | } |
| | | |
| | | String desc = videoInfo.getElementsByClass("desc").get(0).ownText(); |
| | | //剧集列表 |
| | | Element eposide = root.getElementsByClass("video_list_li").get(0); |
| | | Element eposide = doc.getElementsByClass("playlist").get(0).getElementsByClass("stui-content__playlist").get(0); |
| | | Elements eposides = eposide.getElementsByTag("a"); |
| | | List<HanmiShowEpisode> episodeList = new ArrayList<>(); |
| | | |
| | |
| | | //电影 |
| | | if (show.getType() != null && show.getType().contains("影")) { |
| | | int index = 0; |
| | | // for (int i = 0; i < eposides.size(); i++) { |
| | | // String tag = eposides.get(i).ownText(); |
| | | // if (tag.contains("HD")) { |
| | | // index = i; |
| | | // break; |
| | | // } |
| | | // } |
| | | String href = eposides.get(index).attr("href"); |
| | | HanmiShowEpisode ep = new HanmiShowEpisode(); |
| | | ep.setOrderBy(0); |
| | | ep.setPlayUrl("https://www.hmtv.me" + href); |
| | | ep.setPlayUrl(String.format("%s://%s%s",uri.getScheme(),uri.getHost(),href)); |
| | | ep.setTag(show.getTitle() != null ? show.getTitle() : title); |
| | | episodeList.add(ep); |
| | | } else { |
| | |
| | | String tag = eposides.get(i).ownText(); |
| | | HanmiShowEpisode ep = new HanmiShowEpisode(); |
| | | ep.setOrderBy(i + 1); |
| | | ep.setPlayUrl("https://www.hmtv.me" + href); |
| | | ep.setPlayUrl(String.format("%s://%s%s",uri.getScheme(),uri.getHost(),href)); |
| | | ep.setTag(tag); |
| | | episodeList.add(ep); |
| | | } |
| | | } |
| | | |
| | | //简介 |
| | | String desc = root.getElementsByClass("jianjie").get(0).text(); |
| | | |
| | | |
| | | if (show.getTitle() == null) |
| | | show.setTitle(title.split(" ")[0]); |
| | | |
| | | show.setPicture(picture); |
| | | if (infos.get("主演") != null) |
| | | show.setActors(infos.get("主演"). |
| | | |
| | | if (infos.get("主演:") != null) |
| | | show.setActors(infos.get("主演:"). |
| | | replace("/", ",")); |
| | | if (infos.get("导演") != null) |
| | | show.setDirector(infos.get("导演")); |
| | | if (infos.get("类型") != null) |
| | | show.setCategorys(infos.get("类型")); |
| | | if (infos.get("国家/地区") != null) |
| | | show.setArea(infos.get("国家/地区")); |
| | | if (infos.get("首播") != null) |
| | | show.setRelaseDate(infos.get("首播"). |
| | | |
| | | substring(0, infos.get("首播"). |
| | | |
| | | indexOf("(") > -1 ? infos.get("首播"). |
| | | |
| | | indexOf("(") : infos.get("首播"). |
| | | |
| | | length())); |
| | | if (infos.get("上映日期") != null) { |
| | | show.setRelaseDate(infos.get("上映日期"). |
| | | |
| | | substring(0, infos.get("上映日期"). |
| | | |
| | | indexOf("(") > -1 ? infos.get("上映日期"). |
| | | |
| | | indexOf("(") : infos.get("上映日期"). |
| | | |
| | | length())); |
| | | } |
| | | |
| | | |
| | | if (year == null && show.getRelaseDate() != null) { |
| | | year = show.getRelaseDate().split("-")[0]; |
| | | } |
| | | |
| | | show.setYear(year.replace("(", ""). |
| | | replace(")", "")); |
| | | |
| | | if (infos.get("导演:") != null) |
| | | show.setDirector(infos.get("导演:")); |
| | | if (infos.get("类型:") != null) |
| | | show.setCategorys(infos.get("类型:")); |
| | | if (infos.get("地区:") != null) |
| | | show.setArea(infos.get("地区:")); |
| | | show.setDesc(desc); |
| | | show.setYear(infos.get("年份:")); |
| | | if (show.getYear() != null && show.getRelaseDate() == null) { |
| | | show.setRelaseDate(show.getYear() + "-01-01"); |
| | | } |
| | | |
| | | show.setId(show.getUrl(). |
| | | |
| | | replace("https://www.hmtv.me/show/", ""). |
| | | |
| | | trim()); |
| | | show.setId(show.getUrl().split("/id/")[1].split("/")[0].split("\\.")[0].trim()); |
| | | show.setEpisodeList(episodeList); |
| | | show.setUrl(show.getUrl()); |
| | | show.setDesc(desc.trim()); |
| | | return show; |
| | | } |
| | | |
| | | |
| | | public static List<HanmiShow> parseList(String listUrl) throws IOException { |
| | | Map<String, String> headers = new HashMap<>(); |
| | |
| | | |
| | | List<HanmiShow> list = new ArrayList<>(); |
| | | Document doc = getDoc(listUrl, headers); |
| | | String type = doc.getElementsByClass("list-content").get(0).getElementsByClass("title").get(0).getElementsByTag("strong").text(); |
| | | |
| | | Element root = doc.getElementsByClass("m-movies").get(0); |
| | | Elements items = root.getElementsByClass("u-movie"); |
| | | Elements es = doc.getElementsByClass("stui-pannel_bd"); |
| | | Element root = null; |
| | | for(int i=0;i<es.size();i++){ |
| | | if( es.get(i).select("ul.stui-vodlist").size()>0){ |
| | | root = es.get(i).select("ul.stui-vodlist").get(0); |
| | | break; |
| | | } |
| | | } |
| | | Elements items = root.getElementsByTag("li"); |
| | | for (int i = 0; i < items.size(); i++) { |
| | | Element item = items.get(i); |
| | | HanmiShow show = new HanmiShow(); |
| | | show.setUrl(item.getElementsByTag("a").get(0).attr("href")); |
| | | show.setTag(item.getElementsByClass("zhuangtai").get(0).text()); |
| | | String score = item.getElementsByClass("pingfen").get(0).text(); |
| | | if (score != null) { |
| | | score = score.replace("分", ""); |
| | | show.setScore(score); |
| | | String url = item.getElementsByTag("a").get(0).attr("href"); |
| | | if(!url.startsWith("http")){ |
| | | URI uri = URI.create(listUrl); |
| | | url=String.format("%s://%s%s",uri.getScheme(),uri.getHost(),url); |
| | | } |
| | | show.setTitle(item.getElementsByTag("h2").get(0).getElementsByTag("a").get(0).ownText()); |
| | | show.setType(type); |
| | | show.setId(show.getUrl(). |
| | | replace("https://www.hmtv.me/show/", ""). |
| | | trim()); |
| | | show.setUrl(url); |
| | | show.setTag(item.getElementsByClass("pic-text").get(0).text()); |
| | | show.setTitle(item.getElementsByClass("stui-vodlist__detail").get(0).getElementsByTag("a").get(0).ownText()); |
| | | show.setId(show.getUrl().split("/")[show.getUrl().split("/").length-1].split("\\.")[0].trim()); |
| | | list.add(show); |
| | | } |
| | | |
| | | return list; |
| | | } |
| | | |
| | |
| | | * @return |
| | | */ |
| | | public static List<HanmiShowEpisode> getShowEpisodesFromPlayUrl(String playUrl) throws IOException { |
| | | List<HanmiShowEpisode> episodeList = new ArrayList<>(); |
| | | Document doc = getDoc(playUrl, null); |
| | | Element els = doc.getElementById("playnav"); |
| | | Elements items = els.getElementsByTag("li"); |
| | | int playIndex = -1; |
| | | for (int i = 0; i < items.size(); i++) { |
| | | String name = items.get(i).text(); |
| | | if (name.contains("HM")) { |
| | | playIndex = i; |
| | | break; |
| | | } |
| | | } |
| | | if (playIndex < 0) |
| | | return null; |
| | | Element tab = doc.getElementById("playcontainer").getElementsByClass("tab").get(playIndex); |
| | | Elements es = tab.getElementsByTag("a"); |
| | | URI uri = URI.create(playUrl); |
| | | |
| | | for (int i = 0; i < es.size(); i++) { |
| | | HanmiShowEpisode episode = new HanmiShowEpisode(); |
| | | String href = "https://www.hmtv.me" + es.get(i).attr("href"); |
| | | String name = es.get(i).text(); |
| | | episode.setTag(name); |
| | | episode.setPlayUrl(href); |
| | | episode.setOrderBy(i + 1); |
| | | episodeList.add(episode); |
| | | Document doc = getDoc(playUrl, getHeaders()); |
| | | |
| | | Element root = doc.getElementsByClass("stui-content__thumb").get(0).parent(); |
| | | |
| | | String picture = doc.getElementsByClass("stui-content__thumb").get(0).getElementsByTag("img").get(0).attr("data-original"); |
| | | |
| | | //剧集列表 |
| | | Element eposide = doc.getElementsByClass("playlist").get(0).getElementsByClass("stui-content__playlist").get(0); |
| | | Elements eposides = eposide.getElementsByTag("a"); |
| | | List<HanmiShowEpisode> episodeList = new ArrayList<>(); |
| | | for (int i = 0; i < eposides.size(); i++) { |
| | | String href = eposides.get(i).attr("href"); |
| | | String tag = eposides.get(i).ownText(); |
| | | HanmiShowEpisode ep = new HanmiShowEpisode(); |
| | | ep.setOrderBy(i + 1); |
| | | ep.setPlayUrl(String.format("%s://%s%s",uri.getScheme(),uri.getHost(),href)); |
| | | ep.setTag(tag); |
| | | episodeList.add(ep); |
| | | } |
| | | return episodeList; |
| | | } |
| | |
| | | List<HanmiShow> list = new ArrayList<>(); |
| | | for (HanmiShow show : showList) { |
| | | try { |
| | | if (!show.getUrl().startsWith("http")) { |
| | | show.setUrl("https://www.hmtv.me" + show.getUrl()); |
| | | } |
| | | list.add(parseShowDetail(show)); |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | |
| | | return list; |
| | | } |
| | | |
| | | |
| | | public static void parseDY() throws UnsupportedEncodingException { |
| | | for(int p=1;p<20;p++) { |
| | | String url = String.format("https://www.wztaichuan.com/vod/show/area/%s/id/1/page/%d.html", URLEncoder.encode("韩国","UTF-8"), p); |
| | | try { |
| | | List<HanmiShow> showList = parseList(url); |
| | | showList = parseDetailList(showList); |
| | | for(HanmiShow show:showList){ |
| | | show.setTag("评分:"+show.getScore()); |
| | | show.setCategorys("电影"); |
| | | } |
| | | System.out.println("电影:"+p +"-"+showList.size()); |
| | | } catch (Exception e) { |
| | | e.printStackTrace(); |
| | | } |
| | | } |
| | | } |
| | | |
| | | |
| | | |
| | | |
| | | public static void main(String[] args) throws Exception { |
| | | List<HanmiShowEpisode> list = getShowEpisodesFromPlayUrl("https://www.hmtv.me/vplay/MTExNS0xLTA=.html"); |
| | | System.out.println(list); |
| | | // List<HanmiShowEpisode> list = getShowEpisodesFromPlayUrl("https://www.hmtv.me/vplay/MTExNS0xLTA=.html"); |
| | | // System.out.println(list); |
| | | // List<HanmiShow> showList = HanmiApiUtil.parseDetailList(HanmiApiUtil.parseList("https://www.hanjutv.me/hanju/page/" + 50)); |
| | | // System.out.printf(showList.toString()); |
| | | |
| | | // URI uri = URI.create("https://www.hanjutv.me/s/1579"); |
| | | // System.out.println(uri.getScheme()); |
| | | // parseList("https://www.wztaichuan.com/vod/type/id/5/page/1.html"); |
| | | parseDY(); |
| | | |
| | | // |
| | | // System.out.println(uri.getHost()); |
| | | // HanmiShow show=new HanmiShow(); |
| | | // show.setUrl("https://www.wztaichuan.com/vod/detail/id/10195.html"); |
| | | // show = parseShowDetail(show); |
| | | // System.out.println(show); |
| | | } |
| | | |
| | | } |