利用htmlunit下载网页上的文件详解编程语言

import java.io.FileOutputStream; 
import java.io.InputStream; 
import java.util.regex.Matcher; 
import java.util.regex.Pattern; 
  
import org.apache.commons.io.IOUtils; 
  
import com.gargoylesoftware.htmlunit.Page; 
import com.gargoylesoftware.htmlunit.WebClient; 
  
public class DownloadFile { 
    public static void main(String[] args) throws Exception { 
        String baseUrl = "<a href="http://hanyu.iciba.com/hanzi/1.shtml";" target="_blank">http://hanyu.iciba.com/hanzi/1.shtml";</a> 
        String bihuaRegex = "class=/"guanggao/"[^<]*<[^<]*<param//s*name=/"movie/"//s*value=/"([^/"]*)"; 
        String aSoundRegex = "class=/"js12/">ā.*?name=/"FlashVars/"//s*value=/"f=([^/"]*)"; 
        String eSoundRegex = "class=/"js12/">ē.*?name=/"FlashVars/"//s*value=/"f=([^/"]*)"; 
        WebClient client = new WebClient(); 
        client.getOptions().setCssEnabled(false); 
        client.getOptions().setJavaScriptEnabled(false); 
        client.getOptions().setThrowExceptionOnFailingStatusCode(false); 
        client.getOptions().setThrowExceptionOnScriptError(false); 
        Page page = client.getPage(baseUrl); 
        String source = page.getWebResponse().getContentAsString(); 
        Matcher mBihuan = Regex(source, bihuaRegex); 
        Matcher mA = Regex(source, aSoundRegex); 
        Matcher mE = Regex(source, eSoundRegex); 
        while(mBihuan.find()) { 
            String url = "<a href="http://hanyu.iciba.com/" + mBihuan.group" target="_blank">http://hanyu.iciba.com/" + mBihuan.group</a>(1); 
            page = client.getPage(url); 
            saveFile(page, "d:/testDownload/bihua.swf"); 
        } 
        while(mA.find()) { 
            String url = mA.group(1); 
            page = client.getPage(url); 
            saveFile(page, "d:/testDownload/a.mp3"); 
        } 
        while(mE.find()) { 
            String url = mE.group(1); 
            page = client.getPage(url); 
            saveFile(page, "d:/testDownload/e.mp3"); 
        } 
    } 
      
    public static Matcher Regex(String source, String regex) { 
        Pattern p = Pattern.compile(regex, Pattern.DOTALL); 
        return p.matcher(source); 
    } 
      
    public static void saveFile(Page page, String file) throws Exception { 
        InputStream is = page.getWebResponse().getContentAsStream(); 
        FileOutputStream output = new FileOutputStream(file); 
        IOUtils.copy(is, output); 
        output.close(); 
    } 
}

原创文章,作者:ItWorker,如若转载,请注明出处:https://blog.ytso.com/10334.html

(0)
上一篇 2021年7月19日
下一篇 2021年7月19日

相关推荐

发表回复

登录后才能评论