#COMPILE EXE
#DIM ALL

$PUT_OK = "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM1234567890.- "
$USERAGENT = "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)"
%MAXTERMS = 100
%MAXITEMS = 250
%MAXITEMSFULL = 15000
%DELAY = 500              'can set this to pause between searches if CL starts blocking (ms)
%MAXCITIES = 1500
%SORTMODE = 2             '1=by item number and split file by search session, 2=sort by date and tag new items with "NEW!"

DECLARE FUNCTION GetCraigsRSS(Query$, City$, Section$, Item$(), Link$(), itemDate$(), Description$()) AS LONG    'Harvest a Craigslist RSS feed
DECLARE FUNCTION ToPut$(PutIn$) AS STRING      'convert string to PUT-able string
DECLARE FUNCTION httpGrab$(site$, file$)       'Simply grabs a file via HTTP GET
DECLARE FUNCTION Between$(inString$, starter$, finisher$)     'find the text between two markers
DECLARE FUNCTION NumToMonth$(mNo AS LONG)      'Month to Text Version


FUNCTION PBMAIN () AS LONG
   LOCAL numItems AS LONG, ctr AS LONG, numTerms AS LONG, numCities AS LONG, cityCtr AS LONG, termCtr AS LONG
   LOCAL theItem$(), theLink$(), completeSize AS LONG, searchTerms$(), CityList$(), x$, ctr2 AS LONG
   LOCAL CompleteData$(), FoundToday$(), numFoundToday AS LONG, foundIt AS LONG, numRepeat AS LONG
   LOCAL TodayTag() AS LONG, tries AS LONG, thisTerm$, thisSect$, theDate$(), theDescription$()
   DIM theItem$(1 TO %MAXITEMS), theLink$(1 TO %MAXITEMS), searchTerms$(1 TO %MAXTERMS), thisLink$
   DIM CompleteData$(1 TO %MAXITEMSFULL), CityList$(1 TO %MAXCITIES), FoundToday$(1 TO %MAXITEMSFULL)
   DIM TodayTag(1 TO %MAXITEMSFULL), theDate$(1 TO %MAXITEMS), theDescription$(1 TO %MAXITEMS)

   STDOUT "HarvestCL"
   STDOUT "Craigslist multi-city/term search tool by Shannon Larratt"
   STDOUT "zentastic.com"
   STDOUT

   '(1) Get search terms and city list
   x$ = DIR$("clcities.txt")
   IF x$ = "" THEN
      STDOUT "Error: Could not load clcities.txt"
      EXIT FUNCTION
   END IF
   OPEN "clcities.txt" FOR INPUT AS #1
   DO UNTIL EOF(1)
      LINE INPUT #1, x$
      x$ = TRIM$(x$)
      IF LEN(x$) AND LEFT$(x$, 1) <> "#" THEN
         INCR numCities
         CityList$(numCities) = x$
      END IF
   LOOP
   CLOSE #1
   STDOUT "Loaded " & FORMAT$(numCities, "0,") & " cities to search."
   x$ = DIR$("clsearch.txt")
   IF x$ = "" THEN
      STDOUT "Error: Could not load clsearch.txt"
      EXIT FUNCTION
   END IF
   OPEN "clsearch.txt" FOR INPUT AS #1
   DO UNTIL EOF(1)
      LINE INPUT #1, x$
      x$ = TRIM$(x$)
      IF LEN(x$) AND LEFT$(x$, 1) <> "#" THEN
         INCR numTerms
         searchTerms$(numTerms) = x$
      END IF
   LOOP
   CLOSE #1
   STDOUT "Loaded " & FORMAT$(numTerms, "0,") & " search terms."

   '(2) Load old list
   x$ = DIR$("harvestcl.html")
   IF x$ <> "" THEN
      OPEN "harvestcl.html" FOR INPUT AS #1
      DO UNTIL EOF(1)
         LINE INPUT #1, x$
         x$ = TRIM$(x$)
         IF x$ <> "" THEN
            IF %SORTMODE = 1 THEN
                  INCR completeSize
                  CompleteData$(completeSize) = x$
            ELSEIF %SORTMODE = 2 THEN
               IF INSTR(x$, "<li>") > 0 THEN
                  INCR completeSize
                  CompleteData$(completeSize) = Between(x$, "<li>", "</li>")
                  IF RIGHT$(CompleteData$(completeSize), 11) = " <b>NEW</b>" THEN
                     CompleteData$(completeSize) = LEFT$(CompleteData$(completeSize), LEN(CompleteData$(completeSize)) - 11)
                  END IF
               END IF
            END IF
         END IF
      LOOP
      CLOSE #1
      STDOUT "Loaded old file with " & FORMAT$(completeSize, "0,") & " lines."
   END IF

   '(3) Run complete search (and discard duplicates)
   STDOUT "Running search..."
   FOR cityCtr = 1 TO numCities
      STDOUT "Searching city: " & CityList$(cityCtr)
      FOR termCtr = 1 TO numTerms
         IF %DELAY > 0 THEN
            SLEEP %DELAY
         END IF
         thisTerm$ = TRIM$(PARSE$(SearchTerms$(termCtr), ",", 2))
         thisSect$ = TRIM$(PARSE$(SearchTerms$(termCtr), ",", 1))
         STDOUT "   " & thisTerm$ & " (" & thisSect$ & "): ";
         tries = 0
         tryagain:
         IF INKEY$ = CHR$(27) THEN
            STDOUT "Aborted"
            EXIT, EXIT
         END IF
         numItems = GetCraigsRSS(thisTerm$, CityList$(cityCtr), thisSect$, theItem$(), theLink$(), theDate$(), theDescription$())
         IF numItems = -1 THEN
            INCR tries
            IF tries < 5 THEN
               SLEEP 1000
               STDOUT ". ";
               GOTO tryagain
            ELSE
               STDOUT "Failed"
            END IF
         ELSEIF numItems = 0 THEN
            STDOUT "0"
         ELSE
            numRepeat = 0
            FOR ctr = 1 TO numItems
               thisLink$ = theDate$(ctr) & " <a href=""" & theLink$(ctr) & """>" & theItem$(ctr) & "</a>"
               foundIt = 0
               FOR ctr2 = 1 TO numFoundToday
                  IF FoundToday$(ctr2) = thisLink$ THEN
                     FoundIt = 1
                  END IF
               NEXT
               IF FoundIt = 0 THEN
                  FOR ctr2 = 1 TO completeSize
                     IF CompleteData$(ctr2) = thisLink$ THEN
                        FoundIt = 1
                     END IF
                  NEXT
               END IF
               IF FoundIt = 1 THEN
                  INCR numRepeat
               ELSE
                  INCR numFoundToday
                  IF %SORTMODE = 1 THEN
                     FoundToday$(numFoundToday) = "<li>" & thisLink$ & "</li>"
                  ELSEIF %SORTMODE = 2 THEN
                     FoundToday$(numFoundToday) = thisLink$ & " <b>NEW</b>"
                  END IF
               END IF
            NEXT
            STDOUT FORMAT$(numItems) & " (" & FORMAT$(numItems - numRepeat) & " new)"
         END IF
      NEXT
   NEXT
   STDOUT "Found " & FORMAT$(numFoundToday, "0,") & " new items"

   IF numFoundToday = 0 THEN
      STDOUT "No changes... exiting"
      EXIT FUNCTION
   END IF

   IF %SORTMODE = 1 THEN
      '(4) Sort new list by item number
      FOR ctr = 1 TO numFoundToday
         x$ = LEFT$(FoundToday$(ctr), INSTR(FoundToday$(ctr), ".html") - 1)
         TodayTag(ctr) = VAL(PARSE$(x$, "/", PARSECOUNT(x$, "/")))
      NEXT
      ARRAY SORT TodayTag() FOR ctr, TAGARRAY FoundToday$(), DESCEND
   ELSEIF %SORTMODE = 2 THEN
      '(4) Add new list into old list and sort by date (prefix)
      FOR ctr = 1 TO numFoundToday
         INCR completeSize
         CompleteData$(completeSize) = FoundToday$(ctr)
      NEXT
      ARRAY SORT CompleteData$() FOR completeSize, DESCEND
   END IF

   '(5) Output new file and execute it
   OPEN "harvestcl.html" FOR OUTPUT AS #1
   PRINT #1, "<h3>HarvestCL build for " & DATE$ & " at " & TIME$ & "</h3>"
   IF %SORTMODE = 1 THEN
      'Sort-by-item number and split-by-session mode
      PRINT #1, "<ul>"
      FOR ctr = 1 TO numFoundToday
         PRINT #1, FoundToday$(ctr)
      NEXT
      PRINT #1, "</ul>"
      FOR ctr = 1 TO completeSize
         PRINT #1, CompleteData$(ctr)
      NEXT
   ELSEIF %SORTMODE = 2 THEN
      'Sort by date, split by date
      PRINT #1, "<h3>" & NumToMonth$(VAL(MID$(CompleteData$(1), 6, 2))) & " " & FORMAT$(VAL(MID$(CompleteData$(1), 9, 2))) & ", " & LEFT$(CompleteData$(1), 4) & "</h3>"
      PRINT #1, "<ul>"
      FOR ctr = 1 TO completeSize
         IF ctr <> 1 THEN
            IF LEFT$(CompleteData$(ctr), 10) <> LEFT$(CompleteData$(ctr - 1), 10) THEN
               IF ctr <> 1 THEN
                  PRINT #1, "</ul>"
               END IF
               PRINT #1, "<h3>" & NumToMonth$(VAL(MID$(CompleteData$(ctr), 6, 2))) & " " & FORMAT$(VAL(MID$(CompleteData$(ctr), 9, 2))) & ", " & LEFT$(CompleteData$(ctr), 4) & "</h3>"
               PRINT #1, "<ul>"
            END IF
         END IF
         PRINT #1, "<li>" & CompleteData$(ctr) & "</li>"
      NEXT
      PRINT #1, "</ul>"
   END IF
   'close #1

   STDOUT "New file outputted [harvestcl.html]"
   STDOUT "Done!"
   SHELL "cmd /C harvestcl.html"


END FUNCTION


FUNCTION ToPut$(PutIn$) AS STRING      'convert string to PUT-able string
   LOCAL PutOut$, r AS LONG
   PutOut$ = ""
   FOR r = 1 TO LEN(PutIn$)
      IF MID$(PutIn$, r, 1) = " " THEN
         PutOut$ = PutOut$ & "+"
      ELSEIF INSTR($PUT_OK, MID$(PutIn$, r, 1)) THEN ' character is legal
         PutOut$ = PutOut$ & MID$(PutIn$,r,1)
      ELSE
         PutOut$ = PutOut$ & "%" & HEX$(ASC(MID$(PutIn$,r,1)), 2)
      END IF
   NEXT r
   IF PutOut$ = "" THEN PutOut$ = "+"  'so blank lines aren't outputted
   ToPut$ = PutOut$
END FUNCTION


FUNCTION GetCraigsRSS(Query$, City$, Section$, Item$(), Link$(), itemDate$(), Description$()) AS LONG    'Harvest a Craigslist RSS feed
   'Query$ is what to search for -- ie. '"kit car"' or 'wheelchair'
   'City$ is the city (URL reference) -- ie. 'toronto' or 'flint'
   'Section$ is the section of the site to search -- ie. 'sss' for all, 'car' for cars, 'clt' for collectibles
   'Item$(), Link$(), Description$() return the results
   'Function returns the number of results

   LOCAL theSite$, theURI$, rssFeed$, numItems AS LONG, ctr AS LONG

   theSite$ = City$ & ".craigslist.org"
   theURI$ = "/search/" & Section$ & "?query=" & toPut$(Query$) & "&format=rss"

   rssFeed$ = httpGrab$(theSite$, theURI$)

   IF LEN(rssFeed$) > 0 THEN
      rssFeed$ = MID$(rssFeed$, INSTR(rssFeed$, "</channel>"))    'chop off header, don't need it
      numItems = TALLY(rssFeed$, "<title>")
      IF numItems > 0 THEN
         FOR ctr = 1 TO numItems
            Item$(ctr) = Between$(rssFeed$, "<title>", "</title>")
            IF LEFT$(Item$(ctr), 9) = "<![CDATA[" THEN
               Item$(ctr) = Between$(Item$(ctr), "<![CDATA[", "]]>")
            END IF
            itemDate$(ctr) = LEFT$(Between$(rssFeed$, "<dc:date>", "</dc:date>"), 10)
            Description$(ctr) = Between$(rssFeed$, "<description>", "</description>")
            IF LEFT$(Description$(ctr), 9) = "<![CDATA[" THEN
               Description$(ctr) = Between$(Description$(ctr), "<![CDATA[", "]]>")
            END IF
            Link$(ctr) = Between$(rssFeed$, "<link>", "</link>")
            rssFeed$ = MID$(rssFeed$, INSTR(rssFeed$, "</item>") + 7)    'chop off what we've done
         NEXT
         FUNCTION = numItems
      ELSE
         FUNCTION = 0      'no matches
      END IF
   ELSE
      FUNCTION = -1     'error, probably http error
   END IF
END FUNCTION

FUNCTION Between$(inString$, starter$, finisher$)     'find the text between two markers
   LOCAL startat AS LONG, endat AS LONG
   startat = INSTR(inString$, starter$)
   IF startat = 0 THEN
      FUNCTION = ""
   ELSE
      startat = startat + LEN(starter$)
      endat = INSTR(startat, inString$, finisher$)
      IF endat = 0 THEN
         FUNCTION = MID$(inString$, startat)
      ELSE
         FUNCTION = MID$(inString$, startat, endat - startat)
      END IF
   END IF
END FUNCTION

FUNCTION httpGrab$(site$, file$)       'Simply grabs a file via HTTP GET
   LOCAL buffer$, res$
   LOCAL newSite$, newFile$, newURI$
   LOCAL ff AS LONG
   ff = FREEFILE
   TCP OPEN PORT 80 AT site$ AS #ff TIMEOUT 5500
   TCP PRINT #ff, "GET " & file$ & " HTTP/1.0"
   TCP PRINT #ff, "Host: " & site$
   TCP PRINT #ff, "User-Agent: " & $USERAGENT
   TCP PRINT #ff, "Accept-Language: en-us"      'Without this craigslist returns a failed document
   TCP PRINT #ff, ""
   res$ = ""
   DO
      buffer$ = ""
      TCP RECV #ff, 4096, buffer
      res$ = res$ & buffer$
   LOOP WHILE LEN(buffer$)
   TCP CLOSE #ff

   IF INSTR(res$, "302 Found") > 0 THEN         '302s will occur on all non-craigslist.org urls (often twice)
      newURI$ = Between$(res$, "Location: http://", $CRLF)
      newSite$ = MID$(newURI$, 1, INSTR(newURI$, "/") - 1)
      newFile$ = MID$(newURI$, INSTR(newURI$, "/"))
      res$ = httpGrab$(newSite$, newFile$)
   END IF

   FUNCTION = res$
END FUNCTION

FUNCTION NumToMonth$(mNo AS LONG)      'Month to Text Version
   SELECT CASE mNo
      CASE 1
         FUNCTION = "January"
      CASE 2
         FUNCTION = "February"
      CASE 3
         FUNCTION = "March"
      CASE 4
         FUNCTION = "April"
      CASE 5
         FUNCTION = "May"
      CASE 6
         FUNCTION = "June"
      CASE 7
         FUNCTION = "July"
      CASE 8
         FUNCTION = "August"
      CASE 9
         FUNCTION = "September"
      CASE 10
         FUNCTION = "October"
      CASE 11
         FUNCTION = "November"
      CASE 12
         FUNCTION = "December"
      CASE ELSE
         FUNCTION = "Error"
   END SELECT
END FUNCTION
