Private Sub cmdGenDomains_Click() Rem ********************************************************************** Rem * Generates Domains and Emails from Company Name in Input File 1 Rem * produces Output File 1 with new columns for Domain / Email Rem * Ignores words or text found in Input File 2 Rem * ALSO Pings the domain to see if it exists and is online Rem ********************************************************************** Dim InFile1 As String, InFile2 As String, OutFile1 As String Dim InLine1 As String, InLine2 As String, OutLine As String Dim BeginCo As Long, EndCo As Long, BeginDomain As Long, EndDomain As Long Dim Line As String, Column As Long Dim FoundCo As String, GenDomain As String, GenEmail As String Dim CompanyCol As Long, DomainCol As Long, EmailCol As String Dim SourceStatus As String Dim DomainExcludeWords(100) As String, NumExcludeWords As Integer Dim EmailPrefixWords(100) As String, NumPrefixWords As Integer Dim I As Integer Dim DomainFinished As Boolean Rem * Read Input File #1 InFile1 = lblInputFile1 InFile2 = lblInputFile2 OutFile1 = lblOutputFile1 CompanyCol = Val(txtCompany1) DomainCol = Val(txtURL1) EmailCol = txtEmail1 Rem *** Make sure input files and output file names not blank If InFile1 = "" Or InFile2 = "" Or OutFile1 = "" Or InFile1 = InFile2 Or InFile1 = OutFile1 Or InFile2 = OutFile1 Or DomainCol = 0 Or CompanyCol = 0 Or EmailCol = "" Then MsgBox ("Sorry, file names missing for Input Files or Output File 1 or file names conflict or Company/Email Column not specified!") Rem *** Begin reading input files and producing output file Else Rem *** Determine size of input file to set progress bar NumLines = 0 Open InFile1 For Input As #1 Do While Not EOF(1) Line Input #1, InLine1 NumLines = NumLines + 1 Loop Close #1 ProgressBar1.Max = NumLines NumLines = 0 Rem ******************************************************** Rem * OPEN Input and Output Files Rem ******************************************************** Open OutFile1 For Output As #3 Open InFile1 For Input As #1 Line Input #1, InLine1 NumLines = NumLines + 1 Rem * Write First Line adding SOURCE status field InLine1 = InLine1 & ",SOURCE" Print #3, InLine1 Rem * Get DomainExcludeWords and EmailPrefixWords NumExcludeWords = 0 NumPrefixWords = 0 DomainFinished = False Open InFile2 For Input As #2 Do While Not EOF(2) Line Input #2, InLine2 If InLine2 <> "EMAIL" And InLine2 <> "DOMAIN" Then If DomainFinished Then NumPrefixWords = NumPrefixWords + 1 EmailPrefixWords(NumPrefixWords) = InLine2 End If If Not DomainFinished Then NumExcludeWords = NumExcludeWords + 1 DomainExcludeWords(NumExcludeWords) = InLine2 End If End If If InLine2 = "EMAIL" Then DomainFinished = True Loop Close #2 Do While Not EOF(1) Line Input #1, InLine1 NumLines = NumLines + 1 ProgressBar1.Value = NumLines Rem ****************************************** Rem * EXTRACT Company Name from Input File 1 Rem ****************************************** OutLine = InLine1 Line = InLine1 Column = CompanyCol Rem * Extract Company string BeginCo = 0 NumComma = Column - 1 Rem MsgBox ("Line=" & Line & " Column=" & Column & " BeginCo=" & BeginCo & " NumComma=" & NumComma) Rem * Get first comma for begin of Company name Do While NumComma > 0 BeginCo = BeginCo + 1 Rem MsgBox ("BeginCo=" & BeginCo & " and calling Mid") If Mid(Line, BeginCo, 1) = "," Then NumComma = NumComma - 1 Loop Rem * Begin Company would be first char AFTER comma BeginCo = BeginCo + 1 EndCo = BeginCo Rem * Get next comma for end of Company name Do While Mid(Line, EndCo, 1) <> "," EndCo = EndCo + 1 Rem MsgBox ("EndCo=" & EndCo & " and calling Mid") Loop Rem * End Company name would be first char BEFORE comma EndCo = EndCo - 1 Rem MsgBox ("BeginCo=" & BeginCo & " EndCo=" & EndCo) Rem MsgBox ("calling LCase(Trim(Mid(" & Line & ", " & BeginCo & ", " & EndCo - BeginCo + 1 & ")") Rem MsgBox ("After 2nd MsgBox before call URL") Rem * Company name is chars begin to end plus 1 for length FoundCo = LCase(Trim(Mid(Line, BeginCo, EndCo - BeginCo + 1))) Rem MsgBox ("After call FoundCo=" & FoundCo) Rem ******************************************************** Rem * Check If Domain Already Exists Rem * If so, put "I" (included) at Last Field "SOURCE" Rem * If not, put "C" (created) for new Rem ******************************************************** Column = DomainCol Rem * Extract Domain string BeginDomain = 0 NumComma = Column - 1 Rem MsgBox ("Line=" & Line & " Column=" & Column & " BeginDomain=" & BeginDomain & " NumComma=" & NumComma) Rem * Get first comma for begin of Domain name Do While NumComma > 0 BeginDomain = BeginDomain + 1 Rem MsgBox ("BeginDomain=" & BeginDomain & " and calling Mid") If Mid(Line, BeginDomain, 1) = "," Then NumComma = NumComma - 1 Loop Rem * Begin Domain would be first char AFTER comma BeginDomain = BeginDomain + 1 Rem * Determine SOURCE status (I = Domain already Included, C = Domain Created from Company name) If Mid(Line, BeginDomain, 1) = "," Then Rem * Set status to C = Created SourceStatus = "C" GenDomain = FoundCo Rem * remove spaces GenDomain = Replace(GenDomain, " ", "") For I = 1 To NumExcludeWords Rem * Replace excluded word with blank GenDomain = Replace(GenDomain, DomainExcludeWords(I), "") Next I GenDomain = GenDomain & ".com" Line = Left(Line, BeginDomain - 1) & GenDomain & Right(Line, Len(Line) - BeginDomain + 1) Else Rem * Set status to I = Included, find End of domain SourceStatus = "I" EndDomain = BeginDomain + 1 Do While Mid(Line, EndDomain, 1) <> "," EndDomain = EndDomain + 1 Loop EndDomain = EndDomain - 1 GenDomain = LCase(Trim(Mid(Line, BeginDomain, EndDomain - BeginDomain + 1))) GenDomain = Replace(GenDomain, "http://", "") GenDomain = Replace(GenDomain, "www.", "") End If Line = Line & "," & SourceStatus Rem ************************************************ Rem * Generate Emails, Add Email to Field Rem ************************************************ For I = 1 To NumPrefixWords Rem * generate prefix appearing before GenDomain for Emails e.g. xxxxx@GenDomain.com OutLine = Line GenEmail = EmailPrefixWords(I) & "@" & GenDomain OutLine = Replace(OutLine, EmailCol, GenEmail) Rem * Write the new Email line out Print #3, OutLine Next I Loop Close #1 Rem Close #2 -- not needed Close #3 MsgBox ("Reached EOF of Input File 1.") lblInputFile1 = "" lblInputFile2 = "" lblOutputFile1 = "" End If End Sub Private Sub cmdMatchDomains_Click() Rem **************************************************************************** Rem * Match Domains from Input File 2 (MATCH) to Input File 1 (MASTER) Rem * Insert Emails from Input File 2 into 1 at "Email" column field Rem * Additional Domain Matches generate separate identical Lines (rows/records) Rem * To Match ignore http:// and www. and get Domain Only (something.com) Rem * Save in Output File 3 Rem **************************************************************************** Rem * INPUT Lines and Files 1 and 2, OUTPUT File and Line Dim InFile1 As String, InFile2 As String, OutFile1 As String Dim InLine1 As String, InLine2 As String, OutLine3 As String Rem * OUTPUT File 2 for Domain List Dim OutFile2 As String Rem * Domains to Compare 1=MASTER, 2=MATCH Dim Domain1 As String, Domain2 As String, Temp As String Dim URL1Col As Long, Email1Col As String Rem * Total Lines and Domains Match Dim NumLines1 As Long, NumLines2 As Long, NumCount As Long Dim NumDomainMatch As Long, NumDomainNotMatch As Long Rem * Temp Column, Position for domain/email Dim DomainMatch As Boolean Dim Col As Long, Pos As Long Dim FoundEmail As String Rem * Temp ints and flags for URL and Domain strings ( NumComma = Column - 1 ) Dim BeginURL As Long, EndURL As Long, NumComma As Long Dim URL As String, Line As String, Column As Long Dim Done As Boolean Dim BeginDomain As Long, EndDomain As Long, C As Long Dim P As Long, P1 As Long, P2 As Long, P3 As Long, T As String Dim FoundDomain As String InFile1 = lblInputFile1 InFile2 = lblInputFile2 OutFile1 = lblOutputFile1 OutFile2 = lblOutputFile2 URL1Col = Val(txtURL1) Email1Col = txtEmail1 Rem *** Make sure input files and output file names not blank If InFile1 = "" Or InFile2 = "" Or OutFile1 = "" Or OutFile2 = "" Or InFile1 = InFile2 Or InFile1 = OutFile1 Or InFile2 = OutFile1 Or URL1Col = 0 Or Email1Col = "" Then MsgBox ("Sorry, file names missing for Input Files or Output File 1 or file names conflict or URL/Email Column not specified!") Rem *** Begin reading input files and producing output file Else Rem *** Determine size of input files to set progress bar NumLines1 = 0 NumLines2 = 0 NumDomainMatch = 0 NumDomainNotMatch = 0 Rem ******************************************************** Rem * OPEN Output File 2 for Domain List Rem ******************************************************** Open OutFile2 For Output As #4 Open InFile1 For Input As #1 Do While Not EOF(1) Line Input #1, InLine1 NumLines1 = NumLines1 + 1 Rem ****************************************** Rem * EXTRACT URL/Domain from Input File 1 Rem * SAVE in Output File 2 Rem ****************************************** Line = InLine1 Column = URL1Col Rem * Extract full URL string BeginURL = 0 NumComma = Column - 1 Rem MsgBox ("Line=" & Line & " Column=" & Column & " BeginURL=" & BeginURL & " NumComma=" & NumComma) Rem * Get first comma for begin of URL Do While NumComma > 0 BeginURL = BeginURL + 1 Rem MsgBox ("BeginURL=" & BeginURL & " and calling Mid") If Mid(Line, BeginURL, 1) = "," Then NumComma = NumComma - 1 Loop Rem * Begin URL would be first char AFTER comma BeginURL = BeginURL + 1 EndURL = BeginURL Done = False Rem * Get next comma for end of URL Do While Not Done EndURL = EndURL + 1 Rem MsgBox ("EndURL=" & EndURL & " and calling Mid") If Mid(Line, EndURL, 1) = "," Then Done = True Loop Rem * End URL would be first char BEFORE comma EndURL = EndURL - 1 Rem MsgBox ("BeginURL=" & BeginURL & " EndURL=" & EndURL) Rem MsgBox ("calling LCase(Trim(Mid(" & Line & ", " & BeginURL & ", " & EndURL - BeginURL + 1 & ")") Rem MsgBox ("After 2nd MsgBox before call URL") Rem * URL is chars begin to end plus 1 for length URL = LCase(Trim(Mid(Line, BeginURL, EndURL - BeginURL + 1))) Rem MsgBox ("After call URL=" & URL) Rem * Extract domain name from URL, assume domain is .com P = InStr(URL, ".com") EndDomain = 0 If P > 0 Then EndDomain = P + 3 C = P - 1 Else P1 = InStr(URL, ".net") P2 = InStr(URL, ".org") P3 = InStr(URL, ".biz") P4 = InStr(URL, ".info") Rem * if .net .org .biz If P1 > 0 Then EndDomain = P1 + 3 C = P1 - 1 End If If P2 > 0 Then EndDomain = P2 + 3 C = P2 - 1 End If If P3 > 0 Then EndDomain = P3 + 3 C = P3 - 1 End If Rem * if .info If P4 > 0 Then EndDomain = P4 + 4 C = P4 - 1 End If End If Done = False If EndDomain = 0 Then Done = True Rem * We have End of Domain now find Begin of Domain Do While Not Done Rem MsgBox ("calling Mid for C=" & C) T = Mid(URL, C, 1) If T = "/" Or T = "." Then Done = True Else C = C - 1 If C < 1 Then Done = True End If Loop Rem * Begin of Domain is char AFTER / or . BeginDomain = C + 1 If EndDomain > 0 Then FoundDomain = Mid(URL, BeginDomain, EndDomain - BeginDomain + 1) Domain1 = FoundDomain Else Domain1 = "nothing1" End If Rem MsgBox ("FoundDomain1=" & FoundDomain) Rem *************************************************************** Rem * PRINT/APPEND Domain name to Output File 2 for later checking Rem *************************************************************** Print #4, Domain1 Loop Close #1 Close #4 lblNumLines1 = "Total Number of Lines = " & NumLines1 Rem *************** REMOVE MsgBox ("All Domains Found! Check Output File!") Open InFile2 For Input As #2 Do While Not EOF(2) Line Input #2, InLine2 NumLines2 = NumLines2 + 1 Loop Close #2 lblNumLines2 = "Total Number of Lines = " & NumLines2 ProgressBar1.Max = NumLines2 Open InFile1 For Input As #1 Open InFile2 For Input As #2 Open OutFile1 For Output As #3 Open OutFile2 For Input As #4 Rem *** save first line header fields Line Input #1, InLine1 Print #3, InLine1 Rem *** ignore first domain (assumes header) Line Input #4, Domain1 NumCount = 0 DomainMatch = False Rem *** Read Input #2 Until EOF Do While Not EOF(2) Line Input #2, InLine2 NumCount = NumCount + 1 ProgressBar1.Value = NumCount Rem * Extract domain from Input #2, assumes fields are EMAIL, URL Line = InLine2 Column = 2 Rem * Extract full URL string BeginURL = 0 NumComma = 1 Rem * Get first comma for begin of URL Do While NumComma > 0 BeginURL = BeginURL + 1 Rem MsgBox ("Call to Mid( Line=" & Line & " BeginURL=" & BeginURL & ", 1)") If Mid(Line, BeginURL, 1) = "," Then NumComma = NumComma - 1 End If If (BeginURL >= Len(Line)) Then NumComma = 0 End If Loop Rem * Begin URL would be first char AFTER comma BeginURL = BeginURL + 1 Rem * End URL would be last char EndURL = Len(Line) Rem * URL is chars begin to end plus 1 for length Rem MsgBox ("Call to Trim(Mid( Line=" & Line & " BeginURL=" & BeginURL & ", EndURL - BeginURL + 1 = " & EndURL - BeginURL + 1) URL = LCase(Trim(Mid(Line, BeginURL, EndURL - BeginURL + 1))) Rem * Extract domain name from URL, assume domain is .com Rem MsgBox ("InStr URL=" & URL & " -- checking .com") P = InStr(URL, ".com") EndDomain = 0 If P > 0 Then EndDomain = P + 3 C = P - 1 Else Rem MsgBox (" -- checking .net .org .biz .info") P1 = InStr(URL, ".net") P2 = InStr(URL, ".org") P3 = InStr(URL, ".biz") P4 = InStr(URL, ".info") Rem * if .net .org .biz If P1 > 0 Then EndDomain = P1 + 3 C = P1 - 1 End If If P2 > 0 Then EndDomain = P2 + 3 C = P2 - 1 End If If P3 > 0 Then EndDomain = P3 + 3 C = P3 - 1 End If Rem * if .info If P4 > 0 Then EndDomain = P4 + 4 C = P4 - 1 End If End If Rem MsgBox ("EndDomain=" & EndDomain) Done = False If EndDomain = 0 Then Done = True Rem * We have End of Domain now find Begin of Domain Do While Not Done T = Mid(URL, C, 1) Rem MsgBox ("Checking char " & T & " in " & URL) If T = "/" Or T = "." Then Done = True Else C = C - 1 If C < 1 Then Done = True End If Loop Rem * Begin of Domain is char AFTER / or . BeginDomain = C + 1 If EndDomain > 0 Then FoundDomain = Mid(URL, BeginDomain, EndDomain - BeginDomain + 1) Domain2 = FoundDomain Else Domain2 = "nothing2" End If Rem MsgBox ("FoundDomain2=" & FoundDomain) Rem *** Compare Domain with Input #1 Until EOF or Match Do While Not EOF(1) And Not DomainMatch Line Input #1, InLine1 Line Input #4, Domain1 Rem ********************************************** Rem * COMPARE DOMAINS Rem ********************************************** If Domain1 = Domain2 Then DomainMatch = True Rem If Domain1 = Domain2 Then Rem MsgBox ("They Match!") Rem Else Rem MsgBox ("They DO NOT Match!") Rem End If Loop Close #1 Close #4 Rem MatchDomainEmail.Refresh Rem *** check if we matched domain, if so get Email from Input #2 If DomainMatch Then Pos = InStr(InLine2, ",") FoundEmail = Trim(Left(InLine2, Pos - 1)) Rem MsgBox ("FoundEmail2=" & FoundEmail & " ready to insert!") Rem *** EMAIL INSERT -- C is temp read char, Col is Comma Column Rem C = 0 Rem Col = Email1Col - 1 Rem Do While Col > 0 Rem C = C + 1 Rem MsgBox ("Inside Loop -- Col = " & Col & " C = " & C) Rem If Mid(InLine1, C, 1) = "," Then Rem Col = Col - 1 Rem End If Rem If (C >= Len(InLine1)) Then Rem Col = 0 Rem End If Rem Loop Rem OutLine3 = Left(InLine1, C) & FoundEmail & Right(InLine1, Len(InLine1) - C) OutLine3 = InLine1 OutLine3 = Replace(OutLine3, Email1Col, FoundEmail) Print #3, OutLine3 Rem MsgBox ("OutLine3 written to file, please check it") NumDomainMatch = NumDomainMatch + 1 DomainMatch = False Else NumDomainNotMatch = NumDomainNotMatch + 1 End If Rem MsgBox ("Match=" & NumDomainMatch & " NotMatch=" & NumDomainNotMatch) Rem * Re-open Master Input and read/ignore first header line Open InFile1 For Input As #1 Line Input #1, InLine1 Rem * Re-open Domain List and read/ignore first line Open OutFile2 For Input As #4 Line Input #4, Domain1 Loop Close #1 Close #2 Close #3 Close #4 MsgBox ("Reached EOF of Input File 2 -- " & NumDomainMatch & " Total Domains Matched and " & NumDomainNotMatch & " Not Matched.") lblInputFile1 = "" lblInputFile2 = "" lblOutputFile1 = "" lblOutputFile2 = "" End If End Sub Private Sub cmdShuffleDomains_Click() Rem ******************************************************************** Rem * Shuffles (scrambles) rows (records) of Input File 1 Rem * Purpose is to separate duplicate domains as optimally possible Rem ******************************************************************** End Sub Private Sub cmdInputFile1_Click() Rem *** Select Input File 1 Dim NumLines1 As Long CommonDialog1.Filter = "All Files (*.*)" CommonDialog1.FilterIndex = 1 CommonDialog1.ShowOpen InputFile1 = CommonDialog1.FileName lblInputFile1 = InputFile1 NumLines1 = 0 Open InputFile1 For Input As #1 Do While Not EOF(1) Line Input #1, InLine1 NumLines1 = NumLines1 + 1 Loop Close #1 lblNumLines1 = "Total Number of Lines = " & NumLines1 End Sub Private Sub cmdInputFile2_Click() Rem *** Select Input File 2 Dim NumLines2 As Long CommonDialog2.Filter = "All Files (*.*)" CommonDialog2.FilterIndex = 1 CommonDialog2.ShowOpen InputFile2 = CommonDialog2.FileName lblInputFile2 = InputFile2 NumLines2 = 0 Open InputFile2 For Input As #2 Do While Not EOF(2) Line Input #2, InLine2 NumLines2 = NumLines2 + 1 Loop Close #2 lblNumLines2 = "Total Number of Lines = " & NumLines2 End Sub Private Sub cmdOutputFile1_Click() Rem *** Select Output File 1 CommonDialog1.Filter = "All Files (*.*)" CommonDialog1.FilterIndex = 1 CommonDialog1.ShowOpen OutputFile1 = CommonDialog1.FileName lblOutputFile1 = OutputFile1 End Sub Private Sub cmdOutputFile2_Click() Rem *** Select Output File 2 CommonDialog2.Filter = "All Files (*.*)" CommonDialog2.FilterIndex = 1 CommonDialog2.ShowOpen OutputFile2 = CommonDialog2.FileName lblOutputFile2 = OutputFile2 End Sub Private Sub cmdExit_Click() Unload Me End Sub