LinkURL




Public Type Link
URL As String
Tag As String
End Type

Global LinkBuffer() As Link

Public Sub ExtractLinks(str As String, baseurl As String)
Erase LinkBuffer()
ReDim LinkBuffer(0)
thea = InStr(1, LCase(str), "<a ")
bizace = InStr(1, LCase(str), "<base ") + 6

If bizace <> 0 Then
endofbase = InStr(bizace, str, ">") + 1
If endofbase = 0 Then Goto nobase
thebasetag = Mid(str, bizace, endofbase - bizace)
basehrefloc = InStr(1, LCase(thebasetag), "href=")
If basehrefloc = 0 Then Goto nobase
basehrefend = InStr(href, thewholelink, " ")
If basehrefend = 0 Then basehrefend = InStr(basehrefloc, _
thebasetag, ">")
If basehrefend = 0 Then Goto nobase
baseurl = Mid(thebasetag, basehrefloc + 5, basehrefend - basehrefloc)
If Right$(baseurl, 1) = ">" Then baseurl = Mid(baseurl, _
1, Len(baseurl) - 1)
If Left$(baseurl, 1) = """" Then baseurl = Mid(baseurl, 2)
If Right$(baseurl, 1) = """" Then baseurl = Mid(baseurl, _
1, Len(baseurl) - 1)
End If

nobase:

Do While thea <> 0
endoflink = InStr(thea, LCase(str), "</a>") + 3
If endoflink = 0 Then Goto Invalid
thewholelink = Mid(str, thea, endoflink)
endofh = InStr(1, thewholelink, ">")
firsthalf = Mid(thewholelink, 1, endofh)

href = InStr(1, firsthalf, "href=") + 5
If href = 0 Then Goto Invalid
endofurl = InStr(href, firsthalf, " ")
If endofurl = 0 Then endofurl = InStr(href, firsthalf, ">")
If endofurl = 0 Then Goto nobase
theurl = Mid(firsthalf, href, endofurl - href)
tagend = InStr(1, LCase(thewholelink), "</a>") - 1
thetag = Mid(thewholelink, Len(firsthalf) + 1, tagend - Len(firsthalf))
If Left$(theurl, 1) = """" Then theurl = Mid(theurl, 2)
If Right$(theurl, 1) = """" Then theurl = _
Mid(theurl, 1, Len(theurl) - 1)

If Mid(theurl, 4, 3) <> "://" Then
If Left$(theurl, 1) <> "/" Then theurl = "/" & theurl
If Right$(baseurl, 1) = "/" Then baseurl = Left$(baseurl, _
Len(baseurl) - 1)
theurl = baseurl & theurl
End If

ReDim Preserve LinkBuffer(UBound(LinkBuffer) + 1)
LinkBuffer(UBound(LinkBuffer)).Owner = CurrentURL
LinkBuffer(UBound(LinkBuffer)).Tag = thetag
Invalid:

DoEvents
If endoflink = 0 Then endoflink = thea + 1
thea = InStr(endoflink, LCase(str), "<a ")
Loop
End Sub
Inputs:
Call like this:
ExtractLinks str As String, baseurl As String
Str is the String To extract links from,
baseurl is the url to use as the base url If no
<base href=...> tag is found.

'

Returns:
No returns, but the extracted information is stored
in a dynamic array (LinkBuffer) witch is defined as
the user defined type Link.
To display your links:

Make 2 list boxes, list1 and list2
In a button put:

For x=1 To ubound(LinkBuffer)
doevents
list1.additem linkbuffer(x).TAG
list2.additem linkbuffer(x).URL
Next x

Assumes:
This code could be easily modifed to work in a
VBScript enviroment by removing the UDT and just
returning an array. All variables must be made
into variants because VBscript doesn't have
data types. If you find any bugs or errors, or
use this in your own project, please drop me an
e-mail telling me. It makes you feel all warm
and gooey inside when you know you helped someone
=)elanhasson@hotmail.com










( linkurl.html )- by Paolo Puglisi - Modifica del 17/12/2023