解析URL方案总结
2019-03-15 本文已影响9人
yun_154192
对于一名前端工程🦁来说,解析url这种操作是衡量是否合格的标准之一。今天,就来探讨几种神奇的解析方法。
(一)a标签自动解析URL
var a = document.createElement('a');
a.href = 'http://www.example.com/news.php?id=10#footer';
var div = document.createElement('div');
for (var key in a) {
!(key in div) && !!a[key] && console.log(`${key} = ${a[key]}`);
}
输出结果如下所示:
relList =
href = http://www.example.com/news.php?id=10#footer
origin = http://www.example.com
protocol = http:
host = www.example.com
hostname = www.example.com
pathname = /news.php
search = ?id=10
hash = #footer
这些都是a标签特有的属性,且a标签的该属性有值,也就是说,a标签自动完成了URL解析,对于前端来说,这曾经是解析URL最廉价的方式,如下为更加健壮的通用方法:
function parseURL(url) {
var a = document.createElement('a');
a.href = url;
return {
source: url,
protocol: a.protocol.replace(':',''),
host: a.hostname,
port: a.port,
query: a.search,
params: (function(){
var ret = {},
seg = a.search.replace(/^\?/,'').split('&'),
len = seg.length, i = 0, s;
for (;i<len;i++) {
if (!seg[i]) { continue; }
s = seg[i].split('=');
ret[s[0]] = s[1];
}
return ret;
})(),
file: (a.pathname.match(/\/([^\/?#]+)$/i) || [,''])[1],
hash: a.hash.replace('#',''),
path: a.pathname.replace(/^([^\/])/,'/$1'),
relative: (a.href.match(/tps?:\/\/[^\/]+(.+)/) || [,''])[1],
segments: a.pathname.replace(/^\//,'').split('/')
};
}
该方法绝对可靠
(二)JS URL API
var url = new URL('http://www.example.com:$88;9,9@www.abc.com$/what??key=val?&{http://?query=2#45');
for (var key in url) {
console.log(`${key} = ${url[key]}`);
}
Chrome下输出结果:
"href = http://www%2Eexample%2Ecom:$88%3B9,9@www.abc.com$/what??key=val?&{http://?query=2#45"
"origin = http://www.abc.com$"
"protocol = http:"
username = www%2Eexample%2Ecom
password = $88%3B9,9
host = www.abc.com$
hostname = www.abc.com$
port =
pathname = /what
search = ??key=val?&&
searchParams = %3Fkey=val%3F
hash = #123http://?query=2#45
如要获得hostname,有如下方法:
var getHostname = function(url) {
return new URL(url).hostname;
};
该方法目前只有部分浏览器支持,兼容情况请查阅:https://developer.mozilla.org/zh-CN/docs/Web/API/URL/URL
(三)神级操作正则解析
var parseUrl = function(url) {
var urlParseRE = /^\s*(((([^:\/#\?]+:)?(?:(\/\/)((?:(([^:@\/#\?]+)(?:\:([^:@\/#\?]+))?)@)?(([^:\/#\?\]\[]+|\[[^\/\]@#?]+\])(?:\:([0-9]+))?))?)?)?((\/?(?:[^\/\?#]+\/+)*)([^\?#]*)))?(\?[^#]+)?)(#.*)?/;
var matches = urlParseRE.exec(url || "") || [];
return {
href: matches[0] || "",
hrefNoHash: matches[1] || "",
hrefNoSearch: matches[2] || "",
domain: matches[3] || "",
protocol: matches[4] || "",
doubleSlash: matches[5] || "",
authority: matches[6] || "",
username: matches[8] || "",
password: matches[9] || "",
host: matches[10] || "",
hostname: matches[11] || "",
port: matches[12] || "",
pathname: matches[13] || "",
directory: matches[14] || "",
filename: matches[15] || "",
search: matches[16] || "",
hash: matches[17] || ""
};
};
解读:
/^ #href
\s*
( #hrefNoHash
( #hrefNoSearch
( #domain
([^:\/#\?]+:)? #protocol
(?:
(\/\/) #doubleSlash
( #authority
(?:
( #取结果时$7被跳过了,应该也用非捕获型括号(?:
([^:@\/#\?]+) #username
(?:
\:
([^:@\/#\?]+) #password
)?
)
@
)?
( #host
([^:\/#\?\]\[]+|\[[^\/\]@#?]+\]) #hostname
(?:
\:
([0-9]+) #port
)?
)
)?
)?
)?
( #pathname
(\/?(?:[^\/\?#]+\/+)*) #directory
([^\?#]*) #filename
)
)?
(\?[^#]+)? #search
)
(#.*)? #hash
/
具体自行领会~