java JDK自带的HTML解析器 (HTMLEditorKit.Parser) 示例详解编程语言

HtmlParseDemo.java

import java.io.*; 
import java.net.*; 
import javax.swing.text.*; 
import javax.swing.text.html.*; 
import javax.swing.text.html.parser.*; 
 
/** 
 * This small demo program shows how to use the 
 * HTMLEditorKit.Parser and its implementing class 
 * ParserDelegator in the Swing system. 
 */ 
 
public class HtmlParseDemo { 
    public static void main(String [] args) { 
        Reader r; 
        if (args.length == 0) { 
            System.err.println("Usage: java HTMLParseDemo [url | file]"); 
            System.exit(0); 
        } 
        String spec = args[0]; 
        try { 
            if (spec.indexOf("://") > 0) { 
                URL u = new URL(spec); 
                Object content = u.getContent(); 
                if (content instanceof InputStream) { 
                    r = new InputStreamReader((InputStream)content); 
                } 
                else if (content instanceof Reader) { 
                    r = (Reader)content; 
                } 
                else { 
                    throw new Exception("Bad URL content type."); 
                } 
            } 
            else { 
                r = new FileReader(spec); 
            } 
 
            HTMLEditorKit.Parser parser; 
            System.out.println("About to parse " + spec); 
            parser = new ParserDelegator(); 
            parser.parse(r, new HTMLParseLister(), true); 
            r.close(); 
        } 
        catch (Exception e) { 
            System.err.println("Error: " + e); 
            e.printStackTrace(System.err); 
        } 
    } 
}

HTMLParseLister.java

/** 
 * HTML parsing proceeds by calling a callback for 
 * each and every piece of the HTML do*****ent.  This 
 * simple callback class simply prints an indented 
 * structural listing of the HTML data. 
 */ 
class HTMLParseLister extends HTMLEditorKit.ParserCallback 
{ 
    int indentSize = 0; 
 
    protected void indent() { 
        indentSize += 3; 
    } 
    protected void unIndent() { 
        indentSize -= 3; if (indentSize < 0) indentSize = 0; 
    } 
 
    protected void pIndent() { 
        for(int i = 0; i < indentSize; i++) System.out.print(" "); 
    } 
 
    public void handleText(char[] data, int pos) { 
        pIndent(); 
        System.out.println("Text(" + data.length + " chars)"); 
    } 
 
    public void handleComment(char[] data, int pos) { 
        pIndent(); 
        System.out.println("Comment(" + data.length + " chars)"); 
    } 
 
    public void handleStartTag(HTML.Tag t, MutableAttributeSet a, int pos) { 
        pIndent(); 
        System.out.println("Tag start(<" + t.toString() + ">, " + 
                           a.getAttributeCount() + " attrs)"); 
        indent(); 
    } 
 
    public void handleEndTag(HTML.Tag t, int pos) { 
        unIndent(); 
        pIndent(); 
        System.out.println("Tag end(</" + t.toString() + ">)"); 
    } 
 
    public void handleSimpleTag(HTML.Tag t, MutableAttributeSet a, int pos) { 
        pIndent(); 
        System.out.println("Tag(<" + t.toString() + ">, " + 
                           a.getAttributeCount() + " attrs)"); 
    } 
 
    public void handleError(String errorMsg, int pos){ 
        System.out.println("Parsing error: " + errorMsg + " at " + pos); 
    } 
}

原创文章,作者:奋斗,如若转载,请注明出处:https://blog.ytso.com/10473.html

(0)
上一篇 2021年7月19日
下一篇 2021年7月19日

相关推荐

发表回复

登录后才能评论