python压缩javascript文件代码详解编程语言

通过正规表达式实现
''' 
a regex-based JavaScript code compression kludge 
''' 
import re 
 
class JSCompressor(object): 
 
    def __init__(self, compressionLevel=2, measureCompression=False): 
        ''' 
        compressionLevel: 
        0 - no compression, script returned unchanged. For debugging only - 
            try if you suspect that compression compromises your script 
        1 - Strip comments and empty lines, don't change line breaks and indentation (code remains readable) 
        2 - Additionally strip insignificant whitespace (code will become quite unreadable) 
 
        measureCompression: append a comment stating the extent of compression 
        ''' 
        self.compressionLevel = compressionLevel 
        self.measureCompression = measureCompression 
 
    # a bunch of regexes used in compression 
    # first, exempt string and regex literals from compression by transient substitution 
 
    findLiterals = re.compile(r''' 
        (/'.*?(?<=[^//])/')             |       # single-quoted strings 
        (/".*?(?<=[^//])/")             |       # double-quoted strings 
        ((?<![/*//])//(?![///*]).*?(?<![//])//) # JS regexes, trying hard not to be tripped up by comments 
        ''', re.VERBOSE) 
 
    # literals are temporarily replaced by numbered placeholders 
 
    literalMarker = '@_@%d@_@'                  # temporary replacement 
    backSubst = re.compile('@_@(/d+)@_@')       # put the string literals back in 
 
    mlc1 = re.compile(r'(///*.*?/*//)')         # /* ... */ comments on single line 
    mlc = re.compile(r'(///*.*?/*//)', re.DOTALL)  # real multiline comments 
    slc = re.compile('////.*')                  # remove single line comments 
 
    collapseWs = re.compile('(?<=/S)[ /t]+')    # collapse successive non-leading white space characters into one 
 
    squeeze = re.compile(''' 
        /s+(?=[/}/]/)/:/&/|/=/;/,/./+])   |     # remove whitespace preceding control characters 
        (?<=[/{/[/(/:/&/|/=/;/,/./+])/s+  |     # ... or following such 
        [ /t]+(?=/W)                      |     # remove spaces or tabs preceding non-word characters 
        (?<=/W)[ /t]+                           # ... or following such 
        ''' 
        , re.VERBOSE | re.DOTALL) 
 
    def compress(self, script): 
        ''' 
        perform compression and return compressed script 
        ''' 
        if self.compressionLevel == 0: 
            return script 
 
        lengthBefore = len(script) 
 
        # first, substitute string literals by placeholders to prevent the regexes messing with them 
        literals = [] 
 
        def insertMarker(mo): 
            l = mo.group() 
            literals.append(l) 
            return self.literalMarker % (len(literals) - 1) 
 
        script = self.findLiterals.sub(insertMarker, script) 
 
        # now, to the literal-stripped carcass, apply some kludgy regexes for deflation... 
        script = self.slc.sub('', script)       # strip single line comments 
        script = self.mlc1.sub(' ', script)     # replace /* .. */ comments on single lines by space 
        script = self.mlc.sub('/n', script)     # replace real multiline comments by newlines 
 
        # remove empty lines and trailing whitespace 
        script = '/n'.join([l.rstrip() for l in script.splitlines() if l.strip()]) 
 
        if self.compressionLevel == 2:              # squeeze out any dispensible whitespace 
            script = self.squeeze.sub('', script) 
        elif self.compressionLevel == 1:            # only collapse multiple whitespace characters 
            script = self.collapseWs.sub(' ', script) 
 
        # now back-substitute the string and regex literals 
        def backsub(mo): 
            return literals[int(mo.group(1))] 
 
        script = self.backSubst.sub(backsub, script) 
 
        if self.measureCompression: 
            lengthAfter = float(len(script)) 
            squeezedBy = int(100*(1-lengthAfter/lengthBefore)) 
            script += '/n// squeezed out %s%%/n' % squeezedBy 
 
        return script 
 
if __name__ == '__main__': 
    script = ''' 
 
    /* this is a totally useless multiline comment, containing a silly "quoted string", 
       surrounded by several superfluous line breaks 
     */ 
 
    // and this is an equally important single line comment 
 
    sth = "this string contains 'quotes', a /regex/ and a // comment yet it will survive compression"; 
 
    function wurst(){           // this is a great function 
        var hans = 33; 
    } 
 
    sthelse = 'and another useless string'; 
 
    function hans(){            // another function 
        var   bill   =   66;    // successive spaces will be collapsed into one; 
        var bob = 77            // this line break will be preserved b/c of lacking semicolon 
        var george = 88; 
    } 
    ''' 
 
    for x in range(1,3): 
        print '/ncompression level', x, ':/n--------------' 
        c = JSCompressor(compressionLevel=x, measureCompression=True) 
        cpr = c.compress(script) 
        print cpr 
        print 'length', len(cpr)
原创文章，作者：ItWorker，如若转载，请注明出处：https://blog.ytso.com/8484.html
python压缩javascript文件代码详解编程语言

相关推荐

发表回复