golang 解析html时把url转绝对路径

2020-02-25  本文已影响0人  _老七

golang 解析html时把url转绝对路径

在golang解析html时,常遇到img、a标签的资源地址为相对路径[../,./]的情况,这时就需要一个方法把这些地址转换为绝对路径。

// 返回url的绝对地址
// currUrl 当前url
func absUrl(currUrl, baseUrl string) string {
    urlInfo, err := url.Parse(currUrl)
    if err != nil {
        return ""
    }
    if urlInfo.Scheme != "" {
        return currUrl
    }
    baseInfo, err := url.Parse(baseUrl)
    if err != nil {
        return ""
    }

    u := baseInfo.Scheme + "://" + baseInfo.Host
    var path string
    if strings.Index(urlInfo.Path, "/") == 0 {
        path = urlInfo.Path
    } else {
        path = filepath.Dir(baseInfo.Path) + "/" + urlInfo.Path
    }

    rst := make([]string, 0)
    pathArr := strings.Split(path, "/")

    // 如果path是已/开头,那在rst加入一个空元素
    if pathArr[0] == "" {
        rst = append(rst, "")
    }
    for _, p := range pathArr {
        if p == ".." {
            if rst[len(rst)-1] == ".." {
                rst = append(rst, "..")
            } else {
                rst = rst[:len(rst)-1]
            }
        } else if p != "" && p != "." {
            rst = append(rst, p)
        }
    }
    return u + strings.Join(rst, "/")
}

测试

func Test(t *testing.T) {
    str := "https://xxx.com/articles/2876/1.html"
    s := "../../2.html"
    s1 := "./../2.html"
    s2 := "/2.html"
    s3 := "./2.html"
    s4 := "https://xxx.com/2.html"
    println(absUrl(s, str)) // https://xxx.com/2.html
    println(absUrl(s1, str)) // https://xxx.com/articles/2.html
    println(absUrl(s2, str)) // https://xxx.com/2.html
    println(absUrl(s3, str)) // https://xxx.com/articles/2876/2.html
    println(absUrl(s4, str)) // https://xxx.com/2.html
}
上一篇下一篇

猜你喜欢

热点阅读