# -*- coding: utf-8 -*- # ------------------------------------------------------------------------------- # Name: sfp_onionsearchengine # Purpose: Searches the Tor search engine onionsearchengine.com for content # related to the domain in question. # # Author: Steve Micallef <steve@binarypool.com> # # Created: 27/10/2018 # Copyright: (c) Steve Micallef 2018 # Licence: MIT # ------------------------------------------------------------------------------- import re import urllib.error import urllib.parse import urllib.request from spiderfoot import SpiderFootEvent, SpiderFootPlugin class sfp_onionsearchengine(SpiderFootPlugin): meta = { 'name': "Onionsearchengine.com", 'summary': "Search Tor onionsearchengine.com for mentions of the target domain.", 'flags': ["tor"], 'useCases': ["Footprint", "Investigate"], 'categories': ["Search Engines"], 'dataSource': { 'website': "https://as.onionsearchengine.com", 'model': "FREE_NOAUTH_UNLIMITED", 'references': [ "https://helpdesk.onionsearchengine.com/?v=knowledgebase", "https://onionsearchengine.com/add_url.php" ], 'favIcon': "https://as.onionsearchengine.com/images/onionsearchengine.jpg", 'logo': "https://as.onionsearchengine.com/images/onionsearchengine.jpg", 'description': "No cookies, no javascript, no trace. We protect your privacy.\n" "Onion search engine is search engine with ability to find content on tor network / deepweb / darkweb.", } } # Default options opts = { 'timeout': 10, 'max_pages': 20, 'fetchlinks': True, 'blacklist': ['.*://relate.*'], 'fullnames': True } # Option descriptions optdescs = { 'timeout': "Query timeout, in seconds.", 'max_pages': "Maximum number of pages of results to fetch.", 'fetchlinks': "Fetch the darknet pages (via TOR, if enabled) to verify they mention your target.", 'blacklist': "Exclude results from sites matching these patterns.", 'fullnames': "Search for human names?" } results = None def setup(self, sfc, userOpts=dict()): self.sf = sfc self.results = self.tempStorage() for opt in list(userOpts.keys()): self.opts[opt] = userOpts[opt] # What events is this module interested in for input def watchedEvents(self): return ["DOMAIN_NAME", "HUMAN_NAME", "EMAILADDR"] # What events this module produces # This is to support the end user in selecting modules based on events # produced. def producedEvents(self): return ["DARKNET_MENTION_URL", "DARKNET_MENTION_CONTENT"] def handleEvent(self, event): eventName = event.eventType eventData = event.data if not self.opts['fullnames'] and eventName == 'HUMAN_NAME': return if eventData in self.results: self.debug("Already did a search for " + eventData + ", skipping.") return self.results[eventData] = True keepGoing = True page = 1 while keepGoing and page <= int(self.opts['max_pages']): # Check if we've been asked to stop if self.checkForStop(): return params = { 'search': '"' + eventData.encode('raw_unicode_escape').decode("ascii", errors='replace') + '"', 'submit': 'Search', 'page': str(page) } # Sites hosted on the domain data = self.sf.fetchUrl('https://onionsearchengine.com/search.php?' + urllib.parse.urlencode(params), useragent=self.opts['_useragent'], timeout=self.opts['timeout']) if data is None or not data.get('content'): self.info("No results returned from onionsearchengine.com.") return page += 1 if "url.php?u=" not in data['content']: # Work around some kind of bug in the site if "you didn't submit a keyword" in data['content']: continue return if "forward >" not in data['content']: keepGoing = False links = re.findall(r"url\.php\?u=(.[^\"\']+)[\"\']", data['content'], re.IGNORECASE | re.DOTALL) for link in links: if self.checkForStop(): return if link in self.results: continue self.results[link] = True blacklist = False for r in self.opts['blacklist']: if re.match(r, link, re.IGNORECASE): self.debug("Skipping " + link + " as it matches blacklist " + r) blacklist = True if blacklist: continue self.debug("Found a darknet mention: " + link) if not self.sf.urlFQDN(link).endswith(".onion"): continue if not self.opts['fetchlinks']: evt = SpiderFootEvent("DARKNET_MENTION_URL", link, self.__name__, event) self.notifyListeners(evt) continue res = self.sf.fetchUrl(link, timeout=self.opts['_fetchtimeout'], useragent=self.opts['_useragent'], verify=False) if res['content'] is None: self.debug("Ignoring " + link + " as no data returned") continue if eventData not in res['content']: self.debug("Ignoring " + link + " as no mention of " + eventData) continue evt = SpiderFootEvent("DARKNET_MENTION_URL", link, self.__name__, event) self.notifyListeners(evt) try: startIndex = res['content'].index(eventData) - 120 endIndex = startIndex + len(eventData) + 240 except Exception: self.debug('String "' + eventData + '" not found in content.') continue data = res['content'][startIndex:endIndex] evt = SpiderFootEvent("DARKNET_MENTION_CONTENT", "..." + data + "...", self.__name__, evt) self.notifyListeners(evt) # End of sfp_onionsearchengine class