package com.everyday.word; import com.everyday.word.dao.EnglishWordsMapper; import com.everyday.word.entity.EnglishWords; import com.everyday.word.service.inter.EnglishWordsService; import com.everyday.word.utils.YouDaoWebApi; import com.everyday.word.utils.YouDaoWebUtil; import net.sf.json.JSONArray; import net.sf.json.JSONObject; import org.junit.jupiter.api.Test; import org.springframework.boot.test.context.SpringBootTest; import org.yeshi.utils.StringUtil; import javax.annotation.Resource; import java.io.File; import java.io.FileInputStream; import java.util.HashSet; import java.util.List; import java.util.Scanner; import java.util.Set; import java.util.regex.Pattern; /** * @author hxh * @title: WordsTest * @description: TODO * @date 2024/9/14 13:34 */ @SpringBootTest public class WordsTest { @Resource private EnglishWordsService englishWordsService; private Set getFromMOMOFiles() { Set words = new HashSet<>(); File dir = new File("D:\\项目\\单词\\词库\\墨墨单词"); File[] fs = dir.listFiles(); for (File f : fs) { try { StringBuffer text = new StringBuffer(); Scanner scanner = new Scanner(new FileInputStream( f),"UTF-16"); while (scanner.hasNextLine()) { text.append(scanner.nextLine()); } scanner.close(); JSONObject root = JSONObject.fromObject(text.toString()); JSONArray dicts = root.optJSONObject("data").optJSONObject("book").optJSONArray("vocabulary"); for (int i = 0; i < dicts.size(); i++) { String spelling = dicts.optJSONObject(i).optString("spelling"); if(Pattern.matches("^[a-zA-Z]+$", spelling)){ words.add(spelling); } } } catch (Exception e) { e.printStackTrace(); } // break; } return words; } @Test public void addFromMOMOFiles() { Set sets = getFromMOMOFiles(); for(String s:sets){ englishWordsService.addEnglishWords(EnglishWords.builder().spelling(s).build()); } } @Test public void pullDataFromYouDaoWeb() throws InterruptedException { EnglishWordsMapper.DaoQuery daoQuery = new EnglishWordsMapper.DaoQuery(); for (int page = 200; page < 300; page++) { int pageSize = 100; List list = englishWordsService.list(daoQuery, page, pageSize); Set oSpellings = new HashSet<>(); for (EnglishWords e : list) { oSpellings.add(e.getSpelling()); } Set spellings = new HashSet<>(); spellings.addAll(oSpellings); Set infoSellings = YouDaoWebUtil.getBaseInfoSpellings(); spellings.removeAll(infoSellings); for (String s : spellings) { try { String result = YouDaoWebApi.getInfoBySpelling(s); if (!StringUtil.isNullOrEmpty(result) && result.length() > 100) { System.out.println("info:"+s); YouDaoWebUtil.saveBaseInfo(s, result); } } catch (Exception e) { e.printStackTrace(); break; } Thread.sleep(1000 + (int) (Math.random() * 1000)); } Set ljSellings = YouDaoWebUtil.getLJSpellings(); spellings = new HashSet<>(); spellings.addAll(oSpellings); spellings.removeAll(ljSellings); for (String s : spellings) { try { String result = YouDaoWebApi.getLJBySpelling(s); if (!StringUtil.isNullOrEmpty(result) && result.length() > 100) { System.out.println("lj:"+s); YouDaoWebUtil.saveLJ(s, result); } } catch (Exception e) { e.printStackTrace(); break; } Thread.sleep(1000 + (int) (Math.random() * 1000)); } } } }