解析URL方案总结

2019-03-15 本文已影响9人 yun_154192

对于一名前端工程🦁来说，解析url这种操作是衡量是否合格的标准之一。今天，就来探讨几种神奇的解析方法。

（一）a标签自动解析URL

var a = document.createElement('a');
a.href = 'http://www.example.com/news.php?id=10#footer';

var div = document.createElement('div');
for (var key in a) {
    !(key in div) && !!a[key] && console.log(`${key} = ${a[key]}`);
}

输出结果如下所示：

relList = 
href = http://www.example.com/news.php?id=10#footer
origin = http://www.example.com
protocol = http:
host = www.example.com
hostname = www.example.com
pathname = /news.php
search = ?id=10
hash = #footer

这些都是a标签特有的属性，且a标签的该属性有值，也就是说，a标签自动完成了URL解析，对于前端来说，这曾经是解析URL最廉价的方式，如下为更加健壮的通用方法：

function parseURL(url) {
    var a =  document.createElement('a');
    a.href = url;
    return {
        source: url,
        protocol: a.protocol.replace(':',''),
        host: a.hostname,
        port: a.port,
        query: a.search,
        params: (function(){
            var ret = {},
                seg = a.search.replace(/^\?/,'').split('&'),
                len = seg.length, i = 0, s;
            for (;i<len;i++) {
                if (!seg[i]) { continue; }
                s = seg[i].split('=');
                ret[s[0]] = s[1];
            }
            return ret;
        })(),
        file: (a.pathname.match(/\/([^\/?#]+)$/i) || [,''])[1],
        hash: a.hash.replace('#',''),
        path: a.pathname.replace(/^([^\/])/,'/$1'),
        relative: (a.href.match(/tps?:\/\/[^\/]+(.+)/) || [,''])[1],
        segments: a.pathname.replace(/^\//,'').split('/')
    };
}

该方法绝对可靠

（二）JS URL API

var url = new URL('http://www.example.com:$88;9,9@www.abc.com$/what??key=val?&&#123http://?query=2#45');
for (var key in url) {
    console.log(`${key} = ${url[key]}`);
}

Chrome下输出结果：

"href = http://www%2Eexample%2Ecom:$88%3B9,9@www.abc.com$/what??key=val?&&#123http://?query=2#45"
"origin = http://www.abc.com$"
"protocol = http:"
username = www%2Eexample%2Ecom
password = $88%3B9,9
host = www.abc.com$
hostname = www.abc.com$
port = 
pathname = /what
search = ??key=val?&&
searchParams = %3Fkey=val%3F
hash = #123http://?query=2#45

如要获得hostname，有如下方法：

var getHostname = function(url) {
    return new URL(url).hostname;
};

该方法目前只有部分浏览器支持，兼容情况请查阅：https://developer.mozilla.org/zh-CN/docs/Web/API/URL/URL

（三）神级操作正则解析

var parseUrl = function(url) {
    var urlParseRE = /^\s*(((([^:\/#\?]+:)?(?:(\/\/)((?:(([^:@\/#\?]+)(?:\:([^:@\/#\?]+))?)@)?(([^:\/#\?\]\[]+|\[[^\/\]@#?]+\])(?:\:([0-9]+))?))?)?)?((\/?(?:[^\/\?#]+\/+)*)([^\?#]*)))?(\?[^#]+)?)(#.*)?/;

    var matches = urlParseRE.exec(url || "") || [];

    return {
        href:         matches[0] || "",
        hrefNoHash:   matches[1] || "",
        hrefNoSearch: matches[2] || "",
        domain:       matches[3] || "",
        protocol:     matches[4] || "",
        doubleSlash:  matches[5] || "",
        authority:    matches[6] || "",
        username:     matches[8] || "",
        password:     matches[9] || "",
        host:         matches[10] || "",
        hostname:     matches[11] || "",
        port:         matches[12] || "",
        pathname:     matches[13] || "",
        directory:    matches[14] || "",
        filename:     matches[15] || "",
        search:       matches[16] || "",
        hash:         matches[17] || ""
    };
};

解读：

/^                      #href
\s*
(                       #hrefNoHash
  (                     #hrefNoSearch
    (                   #domain
      ([^:\/#\?]+:)?    #protocol
      (?:
        (\/\/)          #doubleSlash
        (               #authority
          (?:
            (           #取结果时$7被跳过了，应该也用非捕获型括号(?:
              ([^:@\/#\?]+)     #username
              (?:
                \:
                ([^:@\/#\?]+)   #password
              )?
            )
            @
          )?
          (                     #host
            ([^:\/#\?\]\[]+|\[[^\/\]@#?]+\])    #hostname
            (?:
              \:
              ([0-9]+)  #port
            )?
          )
        )?
      )?
    )?
    (                   #pathname
      (\/?(?:[^\/\?#]+\/+)*)    #directory
      ([^\?#]*)         #filename
    )
  )?
  (\?[^#]+)?            #search
)
(#.*)?                  #hash
/

具体自行领会~

解析URL方案总结

（一）a标签自动解析URL

（二）JS URL API

（三）神级操作正则解析

猜你喜欢

热点阅读