golang 解析html时把url转绝对路径
在golang解析html时,常遇到img、a标签的资源地址为相对路径[../,./]的情况,这时就需要一个方法把这些地址转换为绝对路径。
// 返回url的绝对地址
// currUrl 当前url
func absUrl(currUrl, baseUrl string) string {
urlInfo, err := url.Parse(currUrl)
if err != nil {
return ""
}
if urlInfo.Scheme != "" {
return currUrl
}
baseInfo, err := url.Parse(baseUrl)
if err != nil {
return ""
}
u := baseInfo.Scheme + "://" + baseInfo.Host
var path string
if strings.Index(urlInfo.Path, "/") == 0 {
path = urlInfo.Path
} else {
path = filepath.Dir(baseInfo.Path) + "/" + urlInfo.Path
}
rst := make([]string, 0)
pathArr := strings.Split(path, "/")
// 如果path是已/开头,那在rst加入一个空元素
if pathArr[0] == "" {
rst = append(rst, "")
}
for _, p := range pathArr {
if p == ".." {
if rst[len(rst)-1] == ".." {
rst = append(rst, "..")
} else {
rst = rst[:len(rst)-1]
}
} else if p != "" && p != "." {
rst = append(rst, p)
}
}
return u + strings.Join(rst, "/")
}
测试
func Test(t *testing.T) {
str := "https://xxx.com/articles/2876/1.html"
s := "../../2.html"
s1 := "./../2.html"
s2 := "/2.html"
s3 := "./2.html"
s4 := "https://xxx.com/2.html"
println(absUrl(s, str)) // https://xxx.com/2.html
println(absUrl(s1, str)) // https://xxx.com/articles/2.html
println(absUrl(s2, str)) // https://xxx.com/2.html
println(absUrl(s3, str)) // https://xxx.com/articles/2876/2.html
println(absUrl(s4, str)) // https://xxx.com/2.html
}
有疑问加站长微信联系(非本文作者)