Public Type Link
URL As String Tag As String End Type Global LinkBuffer() As Link Public Sub ExtractLinks(str As String, baseurl As String) Erase LinkBuffer() ReDim LinkBuffer(0) thea = InStr(1, LCase(str), "<a ") bizace = InStr(1, LCase(str), "<base ") + 6 If bizace <> 0 Then endofbase = InStr(bizace, str, ">") + 1 If endofbase = 0 Then Goto nobase thebasetag = Mid(str, bizace, endofbase - bizace) basehrefloc = InStr(1, LCase(thebasetag), "href=") If basehrefloc = 0 Then Goto nobase basehrefend = InStr(href, thewholelink, " ") If basehrefend = 0 Then basehrefend = InStr(basehrefloc, _ thebasetag, ">") If basehrefend = 0 Then Goto nobase baseurl = Mid(thebasetag, basehrefloc + 5, basehrefend - basehrefloc) If Right$(baseurl, 1) = ">" Then baseurl = Mid(baseurl, _ 1, Len(baseurl) - 1) If Left$(baseurl, 1) = """" Then baseurl = Mid(baseurl, 2) If Right$(baseurl, 1) = """" Then baseurl = Mid(baseurl, _ 1, Len(baseurl) - 1) End If nobase: Do While thea <> 0 endoflink = InStr(thea, LCase(str), "</a>") + 3 If endoflink = 0 Then Goto Invalid thewholelink = Mid(str, thea, endoflink) endofh = InStr(1, thewholelink, ">") firsthalf = Mid(thewholelink, 1, endofh) href = InStr(1, firsthalf, "href=") + 5 If href = 0 Then Goto Invalid endofurl = InStr(href, firsthalf, " ") If endofurl = 0 Then endofurl = InStr(href, firsthalf, ">") If endofurl = 0 Then Goto nobase theurl = Mid(firsthalf, href, endofurl - href) tagend = InStr(1, LCase(thewholelink), "</a>") - 1 thetag = Mid(thewholelink, Len(firsthalf) + 1, tagend - Len(firsthalf)) If Left$(theurl, 1) = """" Then theurl = Mid(theurl, 2) If Right$(theurl, 1) = """" Then theurl = _ Mid(theurl, 1, Len(theurl) - 1) If Mid(theurl, 4, 3) <> "://" Then If Left$(theurl, 1) <> "/" Then theurl = "/" & theurl If Right$(baseurl, 1) = "/" Then baseurl = Left$(baseurl, _ Len(baseurl) - 1) theurl = baseurl & theurl End If ReDim Preserve LinkBuffer(UBound(LinkBuffer) + 1) LinkBuffer(UBound(LinkBuffer)).Owner = CurrentURL LinkBuffer(UBound(LinkBuffer)).Tag = thetag Invalid: DoEvents If endoflink = 0 Then endoflink = thea + 1 thea = InStr(endoflink, LCase(str), "<a ") Loop End Sub Inputs: Call like this: ExtractLinks str As String, baseurl As String Str is the String To extract links from, baseurl is the url to use as the base url If no <base href=...> tag is found. ' Returns: No returns, but the extracted information is stored in a dynamic array (LinkBuffer) witch is defined as the user defined type Link. To display your links: Make 2 list boxes, list1 and list2 In a button put: For x=1 To ubound(LinkBuffer) doevents list1.additem linkbuffer(x).TAG list2.additem linkbuffer(x).URL Next x Assumes: This code could be easily modifed to work in a VBScript enviroment by removing the UDT and just returning an array. All variables must be made into variants because VBscript doesn't have data types. If you find any bugs or errors, or use this in your own project, please drop me an e-mail telling me. It makes you feel all warm and gooey inside when you know you helped someone =)elanhasson@hotmail.com |