src/main/java/com/everyday/word/dao/EnglishWordsMapper.java | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/main/java/com/everyday/word/entity/EnglishWords.java | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/main/java/com/everyday/word/exception/EnglishWordsException.java | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/main/java/com/everyday/word/service/EnglishWordsService.java | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/main/java/com/everyday/word/service/impl/EnglishWordsServiceImpl.java | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/main/java/com/everyday/word/utils/YouDaoWebApi.java | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/main/java/com/everyday/word/utils/YouDaoWebUtil.java | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/main/resources/application-dev.yml | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/main/resources/mapper/EnglishWordsMapper.xml | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/test/java/com/everyday/word/MapperTest.java | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/test/java/com/everyday/word/WordsTest.java | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 |
src/main/java/com/everyday/word/dao/EnglishWordsMapper.java
New file @@ -0,0 +1,35 @@ package com.everyday.word.dao; import java.lang.String; import java.util.Date; import java.lang.Long; import java.util.List; import org.apache.ibatis.annotations.Param; import com.everyday.word.entity.EnglishWords; import org.springframework.stereotype.Repository; import org.yeshi.utils.mybatis.BaseMapper; @Repository public interface EnglishWordsMapper extends BaseMapper<EnglishWords> { EnglishWords selectByPrimaryKeyForUpdate(@Param("id") Long id); List<EnglishWords> list(@Param("query") DaoQuery query); long count(@Param("query") DaoQuery query); public static class DaoQuery{ public Long id; public String spelling; public String phoneticUS; public String phoneticUK; public Date minCreateTime; public Date maxCreateTime; public Date minUpdateTime; public Date maxUpdateTime; public long start; public int count; public List<String> sortList; } } src/main/java/com/everyday/word/entity/EnglishWords.java
New file @@ -0,0 +1,51 @@ package com.everyday.word.entity; import lombok.Builder; import lombok.Data; import org.springframework.data.annotation.Id; import org.yeshi.utils.generater.mybatis.Column; import org.yeshi.utils.generater.mybatis.Table; import java.util.Date; /** * @author hxh * @title: EnglishWords * @description: 英语单词 * @date 2024/9/14 13:23 */ @Data @Builder @Table("table_english_words") public class EnglishWords { /** 主键ID */ @Id @Column(name = "_id") private Long id; /** 拼写 */ @Column(name = "_spelling") private String spelling; /** 美式音标 */ @Column(name = "_phonetic_us") private String phoneticUS; /** 英式音标 */ @Column(name = "_phonetic_uk") private String phoneticUK; @Column(name = "_create_time") private Date createTime; @Column(name = "_update_time") private Date updateTime; } src/main/java/com/everyday/word/exception/EnglishWordsException.java
New file @@ -0,0 +1,17 @@ package com.everyday.word.exception; import org.springframework.security.core.AuthenticationException; /** * @author hxh * @description 单词异常 * @date 13:41 2024/9/14 * @return **/ public class EnglishWordsException extends AuthenticationException { public EnglishWordsException(String msg){ super(msg); } } src/main/java/com/everyday/word/service/EnglishWordsService.java
New file @@ -0,0 +1,41 @@ package com.everyday.word.service; import com.everyday.word.dao.EnglishWordsMapper; import com.everyday.word.entity.EnglishWords; import com.everyday.word.exception.EnglishWordsException; import java.util.List; /** * @author hxh * @title: EnglishWordsService * @description: TODO * @date 2024/9/14 13:40 */ public interface EnglishWordsService { /** * @author hxh * @description 添加单词 * @date 13:43 2024/9/14 * @param: word * @return void **/ public void addEnglishWords(EnglishWords word) throws EnglishWordsException; /** * @author hxh * @description 根据拼写查询 * @date 13:43 2024/9/14 * @param: spelling * @return com.everyday.word.entity.EnglishWords **/ public EnglishWords selectBySpelling(String spelling); public List<EnglishWords> list(EnglishWordsMapper.DaoQuery query, int page,int pageSize); public long count(EnglishWordsMapper.DaoQuery query); } src/main/java/com/everyday/word/service/impl/EnglishWordsServiceImpl.java
New file @@ -0,0 +1,67 @@ package com.everyday.word.service.impl; import com.everyday.word.dao.EnglishWordsMapper; import com.everyday.word.entity.EnglishWords; import com.everyday.word.exception.EnglishWordsException; import com.everyday.word.service.EnglishWordsService; import org.springframework.stereotype.Service; import org.yeshi.utils.StringUtil; import javax.annotation.Resource; import java.util.Date; import java.util.List; /** * @author hxh * @title: EnglishWordsServiceImpl * @description: * @date 2024/9/14 13:40 */ @Service public class EnglishWordsServiceImpl implements EnglishWordsService { @Resource private EnglishWordsMapper englishWordsMapper; @Override public void addEnglishWords(EnglishWords word) throws EnglishWordsException { if(word==null|| StringUtil.isNullOrEmpty(word.getSpelling())){ throw new EnglishWordsException("数据不完整"); } EnglishWords oldWord = selectBySpelling(word.getSpelling()); if(oldWord!=null){ word.setId(oldWord.getId()); word.setUpdateTime(new Date()); englishWordsMapper.updateByPrimaryKeySelective(word); }else{ if(word.getCreateTime()==null){ word.setCreateTime(new Date()); } englishWordsMapper.insertSelective(word); } } @Override public EnglishWords selectBySpelling(String spelling) { EnglishWordsMapper.DaoQuery daoQuery=new EnglishWordsMapper.DaoQuery(); daoQuery.spelling=spelling; daoQuery.count = 1; List<EnglishWords> list = englishWordsMapper.list(daoQuery); if(!list.isEmpty()){ return list.get(0); } return null; } @Override public List<EnglishWords> list(EnglishWordsMapper.DaoQuery query, int page, int pageSize) { query.start=(page-1)*pageSize; query.count = pageSize; return englishWordsMapper.list(query); } @Override public long count(EnglishWordsMapper.DaoQuery query) { return englishWordsMapper.count(query); } } src/main/java/com/everyday/word/utils/YouDaoWebApi.java
New file @@ -0,0 +1,101 @@ package com.everyday.word.utils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import javax.script.Invocable; import javax.script.ScriptEngine; import javax.script.ScriptEngineManager; import javax.script.ScriptException; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; /** * @author hxh * @title: YouDaoWebApi * @description: 有道网页API * @date 2024/9/14 15:48 */ public class YouDaoWebApi { private static ScriptEngineManager scriptEngineManager = new ScriptEngineManager(); private static String withDrawValidData(String script) { StringBuffer sb = new StringBuffer("var _window={};\n"); sb.append(script.replace("window.", "_window.")); sb.append("\nfunction getData(){return JSON.stringify(_window.__NUXT__);};"); // 获取 JavaScript 引擎 ScriptEngine engine = scriptEngineManager.getEngineByName("javascript"); // 定义要执行的 JavaScript 代码 try { // 执行 JavaScript 代码 engine.eval(sb.toString()); Invocable invocableEngine = (Invocable) engine; Object callbackvalue = invocableEngine.invokeFunction("getData"); if (callbackvalue != null) { return callbackvalue.toString(); } // 输出结果 } catch (Exception e) { e.printStackTrace(); } return null; } /** * @return java.lang.String * @author hxh * @description 从页面中提取有效数据 * @date 16:19 2024/9/14 * @param: url **/ private static String getPageData(String url) throws IOException { Document doc = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/536.36 (KHTML, like Gecko) Chrome/128.0.0.0 Safari/536.36").timeout(30 * 1000).get(); Elements els = doc.getElementsByTag("script"); for (int i = 0; i < els.size(); i++) { Element e = els.get(i); String text = e.html(); if (text.contains("window.__NUXT__=")) { return withDrawValidData(text); } } return null; } /** * @return java.lang.String * @author hxh * @description 获取单词的信息 * @date 16:24 2024/9/14 * @param: spelling **/ public static String getInfoBySpelling(String spelling) throws IOException { String value = getPageData(String.format("https://www.youdao.com/result?word=%s&lang=en", URLEncoder.encode(spelling, "UTF-8"))); return value; } /** * @return java.lang.String * @author hxh * @description 获取单词的例句 * @date 16:24 2024/9/14 * @param: spelling **/ public static String getLJBySpelling(String spelling) throws IOException { String value = getPageData(String.format("https://www.youdao.com/result?word=%s&lang=en", URLEncoder.encode("lj:" + spelling, "UTF-8"))); return value; } public static void main(String[] args) throws IOException { String value = getInfoBySpelling("administrator"); System.out.println(value); value = getLJBySpelling("administrator"); System.out.println(value); } } src/main/java/com/everyday/word/utils/YouDaoWebUtil.java
New file @@ -0,0 +1,62 @@ package com.everyday.word.utils; import org.yeshi.utils.FileUtil; import java.io.File; import java.io.IOException; import java.util.HashSet; import java.util.Set; /** * @author hxh * @title: YouDaoWebUtil * @description: 有道网页工具 * @date 2024/9/14 16:30 */ public class YouDaoWebUtil { private final static String BASE_DIR = "D:\\项目\\单词\\词库\\有道网页资源"; public static void saveBaseInfo(String spelling, String info) throws IOException { FileUtil.saveAsFileByte(info.getBytes("UTF-8") ,String.format("%s\\%s\\%s.json",BASE_DIR,"基本信息",spelling)); } public static Set<String> getBaseInfoSpellings(){ File f =new File(String.format("%s\\%s",BASE_DIR,"基本信息")); File[] fs= f.listFiles(); Set<String> spellings=new HashSet<>(); for(File ff: fs){ if(ff.length()>100){ spellings.add(ff.getName().replace(".json","")); } } return spellings; } public static void saveLJ(String spelling, String info) throws IOException{ FileUtil.saveAsFileByte(info.getBytes("UTF-8") ,String.format("%s\\%s\\%s.json",BASE_DIR,"例句",spelling)); } public static Set<String> getLJSpellings(){ File f =new File(String.format("%s\\%s",BASE_DIR,"例句")); File[] fs= f.listFiles(); Set<String> spellings=new HashSet<>(); for(File ff: fs){ if(ff.length()>100){ spellings.add(ff.getName().replace(".json","")); } } return spellings; } public static void main(String[] args) throws IOException { // String spelling = "candlestick"; // String info = YouDaoWebApi.getInfoBySpelling(spelling); // saveBaseInfo(spelling, info); // info = YouDaoWebApi.getLJBySpelling(spelling); // saveLJ(spelling, info); getBaseInfoSpellings(); getLJSpellings(); } } src/main/resources/application-dev.yml
@@ -5,9 +5,11 @@ spring: # devtools: # livereload: # enabled: true devtools: restart: enabled: true additional-exclude: "src/main/java" exclude: "static/**" main: allow-bean-definition-overriding: true http: @@ -16,13 +18,9 @@ charset: UTF-8 enabled: true datasource: # url: jdbc:mysql://gz-cdb-r13d0yi9.sql.tencentcdb.com:62929/taoke_autopay?serverTimezone=GMT%2B8 # username: root # password: Yeshi2016@ url: jdbc:mysql://rm-f8z0j143g151fp995no.mysql.rds.aliyuncs.com:3306/taoke_autopay?serverTimezone=GMT%2B8&useUnicode=true&characterEncoding=utf8 url: jdbc:mysql://gz-cdb-r13d0yi9.sql.tencentcdb.com:62929/everyday_words?serverTimezone=GMT%2B8&useUnicode=true&characterEncoding=utf8 username: root password: xcp123123@ password: Yeshi2016@ driver-class-name: com.mysql.jdbc.Driver type: com.alibaba.druid.pool.DruidDataSource @@ -49,6 +47,6 @@ config: classpath:ehcache.xml mybatis: mapper-locations: classpath:mapper/*.xml type-aliases-package: com.taoke.autopay.entity type-aliases-package: com.everyday.word.entity configuration: log-impl: org.apache.ibatis.logging.stdout.StdOutImpl src/main/resources/mapper/EnglishWordsMapper.xml
New file @@ -0,0 +1,70 @@ <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd"> <mapper namespace="com.everyday.word.dao.EnglishWordsMapper"> <resultMap id="BaseResultMap" type="com.everyday.word.entity.EnglishWords"> <id column="_id" property="id" jdbcType="BIGINT"/> <result column="_spelling" property="spelling" jdbcType="VARCHAR"/> <result column="_phonetic_us" property="phoneticUS" jdbcType="VARCHAR"/> <result column="_phonetic_uk" property="phoneticUK" jdbcType="VARCHAR"/> <result column="_create_time" property="createTime" jdbcType="TIMESTAMP"/> <result column="_update_time" property="updateTime" jdbcType="TIMESTAMP"/> </resultMap> <sql id="Base_Column_List">_id,_spelling,_phonetic_us,_phonetic_uk,_create_time,_update_time</sql> <select id="selectByPrimaryKey" resultMap="BaseResultMap" parameterType="java.lang.Long">select <include refid="Base_Column_List"/> from table_english_words where _id = #{id,jdbcType=BIGINT} </select> <select id="selectByPrimaryKeyForUpdate" resultMap="BaseResultMap" parameterType="java.lang.Long">select <include refid="Base_Column_List"/> from table_english_words where _id = #{id,jdbcType=BIGINT} for update </select> <sql id="listWhereSQL"> <if test="query.id!=null">AND _id = #{query.id}</if> <if test="query.spelling!=null">AND _spelling = #{query.spelling}</if> <if test="query.phoneticUS!=null">AND _phonetic_us = #{query.phoneticUS}</if> <if test="query.phoneticUK!=null">AND _phonetic_uk = #{query.phoneticUK}</if> <if test="query.minCreateTime!=null">AND _create_time >= #{query.minCreateTime}</if> <if test="query.maxCreateTime!=null">AND #{query.maxCreateTime} > _create_time</if> <if test="query.minUpdateTime!=null">AND _update_time >= #{query.minUpdateTime}</if> <if test="query.maxUpdateTime!=null">AND #{query.maxUpdateTime} > _update_time</if> </sql> <select id="list" resultMap="BaseResultMap">select <include refid="Base_Column_List"/> from table_english_words where 1=1 <include refid="listWhereSQL"/> <if test="query.sortList!=null"> <foreach collection="query.sortList" item="item" open=" order by " separator=",">#{item}</foreach> </if>limit #{query.start},#{query.count} </select> <select id="count" resultType="java.lang.Long">select count(*) from table_english_words where 1=1 <include refid="listWhereSQL"/> </select> <delete id="deleteByPrimaryKey" parameterType="java.lang.Long">delete from table_english_words where _id = #{id,jdbcType=BIGINT}</delete> <insert id="insert" parameterType="com.everyday.word.entity.EnglishWords" useGeneratedKeys="true" keyProperty="id">insert into table_english_words (_id,_spelling,_phonetic_us,_phonetic_uk,_create_time,_update_time) values (#{id,jdbcType=BIGINT},#{spelling,jdbcType=VARCHAR},#{phoneticUS,jdbcType=VARCHAR},#{phoneticUK,jdbcType=VARCHAR},#{createTime,jdbcType=TIMESTAMP},#{updateTime,jdbcType=TIMESTAMP})</insert> <insert id="insertSelective" parameterType="com.everyday.word.entity.EnglishWords" useGeneratedKeys="true" keyProperty="id">insert into table_english_words <trim prefix="(" suffix=")" suffixOverrides=","> <if test="id != null">_id,</if> <if test="spelling != null">_spelling,</if> <if test="phoneticUS != null">_phonetic_us,</if> <if test="phoneticUK != null">_phonetic_uk,</if> <if test="createTime != null">_create_time,</if> <if test="updateTime != null">_update_time,</if> </trim>values <trim prefix="(" suffix=")" suffixOverrides=","> <if test="id != null">#{id,jdbcType=BIGINT},</if> <if test="spelling != null">#{spelling,jdbcType=VARCHAR},</if> <if test="phoneticUS != null">#{phoneticUS,jdbcType=VARCHAR},</if> <if test="phoneticUK != null">#{phoneticUK,jdbcType=VARCHAR},</if> <if test="createTime != null">#{createTime,jdbcType=TIMESTAMP},</if> <if test="updateTime != null">#{updateTime,jdbcType=TIMESTAMP},</if> </trim> </insert> <update id="updateByPrimaryKey" parameterType="com.everyday.word.entity.EnglishWords">update table_english_words set _spelling = #{spelling,jdbcType=VARCHAR},_phonetic_us = #{phoneticUS,jdbcType=VARCHAR},_phonetic_uk = #{phoneticUK,jdbcType=VARCHAR},_create_time = #{createTime,jdbcType=TIMESTAMP},_update_time = #{updateTime,jdbcType=TIMESTAMP} where _id = #{id,jdbcType=BIGINT}</update> <update id="updateByPrimaryKeySelective" parameterType="com.everyday.word.entity.EnglishWords">update table_english_words <set> <if test="spelling != null">_spelling=#{spelling,jdbcType=VARCHAR},</if> <if test="phoneticUS != null">_phonetic_us=#{phoneticUS,jdbcType=VARCHAR},</if> <if test="phoneticUK != null">_phonetic_uk=#{phoneticUK,jdbcType=VARCHAR},</if> <if test="createTime != null">_create_time=#{createTime,jdbcType=TIMESTAMP},</if> <if test="updateTime != null">_update_time=#{updateTime,jdbcType=TIMESTAMP},</if> </set> where _id = #{id,jdbcType=BIGINT} </update> </mapper> src/test/java/com/everyday/word/MapperTest.java
New file @@ -0,0 +1,22 @@ package com.everyday.word; import com.everyday.word.entity.EnglishWords; import org.junit.jupiter.api.Test; import org.yeshi.utils.generater.mybatis.MyBatisMapperUtil; /** * @author hxh * @title: MapperTest * @description: TODO * @date 2024/9/14 13:35 */ //@SpringBootTest public class MapperTest { @Test public void createMapper(){ MyBatisMapperUtil.createMapper(EnglishWords.class); } } src/test/java/com/everyday/word/WordsTest.java
New file @@ -0,0 +1,127 @@ package com.everyday.word; import com.everyday.word.dao.EnglishWordsMapper; import com.everyday.word.entity.EnglishWords; import com.everyday.word.service.EnglishWordsService; import com.everyday.word.utils.YouDaoWebApi; import com.everyday.word.utils.YouDaoWebUtil; import net.sf.json.JSONArray; import net.sf.json.JSONObject; import org.junit.jupiter.api.Test; import org.springframework.boot.test.context.SpringBootTest; import org.yeshi.utils.StringUtil; import javax.annotation.Resource; import java.io.File; import java.io.FileInputStream; import java.util.HashSet; import java.util.List; import java.util.Scanner; import java.util.Set; import java.util.regex.Pattern; /** * @author hxh * @title: WordsTest * @description: TODO * @date 2024/9/14 13:34 */ @SpringBootTest public class WordsTest { @Resource private EnglishWordsService englishWordsService; private Set<String> getFromMOMOFiles() { Set<String> words = new HashSet<>(); File dir = new File("D:\\项目\\单词\\词库\\墨墨单词"); File[] fs = dir.listFiles(); for (File f : fs) { try { StringBuffer text = new StringBuffer(); Scanner scanner = new Scanner(new FileInputStream( f),"UTF-16"); while (scanner.hasNextLine()) { text.append(scanner.nextLine()); } scanner.close(); JSONObject root = JSONObject.fromObject(text.toString()); JSONArray dicts = root.optJSONObject("data").optJSONObject("book").optJSONArray("vocabulary"); for (int i = 0; i < dicts.size(); i++) { String spelling = dicts.optJSONObject(i).optString("spelling"); if(Pattern.matches("^[a-zA-Z]+$", spelling)){ words.add(spelling); } } } catch (Exception e) { e.printStackTrace(); } // break; } return words; } @Test public void addFromMOMOFiles() { Set<String> sets = getFromMOMOFiles(); for(String s:sets){ englishWordsService.addEnglishWords(EnglishWords.builder().spelling(s).build()); } } @Test public void pullDataFromYouDaoWeb() throws InterruptedException { EnglishWordsMapper.DaoQuery daoQuery=new EnglishWordsMapper.DaoQuery(); int page=5; int pageSize = 100; List<EnglishWords> list = englishWordsService.list(daoQuery,page, pageSize); Set<String> oSpellings=new HashSet<>(); for(EnglishWords e:list){ oSpellings.add(e.getSpelling()); } Set<String> spellings=new HashSet<>(); spellings.addAll(oSpellings); Set<String> infoSellings = YouDaoWebUtil.getBaseInfoSpellings(); spellings.removeAll(infoSellings); for(String s:spellings){ try { String result = YouDaoWebApi.getInfoBySpelling(s); if (!StringUtil.isNullOrEmpty(result) && result.length() > 100) { YouDaoWebUtil.saveBaseInfo(s,result); } }catch(Exception e){ e.printStackTrace(); break; } Thread.sleep(1000 + (int) (Math.random()*1000)); } Set<String> ljSellings = YouDaoWebUtil.getLJSpellings(); spellings=new HashSet<>(); spellings.addAll(oSpellings); spellings.removeAll(ljSellings); for(String s:spellings){ try { String result = YouDaoWebApi.getLJBySpelling(s); if (!StringUtil.isNullOrEmpty(result) && result.length() > 100) { YouDaoWebUtil.saveLJ(s,result); } }catch(Exception e){ e.printStackTrace(); break; } Thread.sleep(1000 + (int) (Math.random()*1000)); } } }