# -*- coding: utf-8 -*- # ------------------------------------------------------------------------------- # Name: sfp_flickr # Purpose: Search Flickr API for domains, URLs and emails related to the # specified domain. # # Author: # # Created: 2018-10-08 # Copyright: (c) bcoles 2018 # Licence: MIT # ------------------------------------------------------------------------------- import json import re import time import urllib.error import urllib.parse import urllib.request from spiderfoot import SpiderFootEvent, SpiderFootHelpers, SpiderFootPlugin class sfp_flickr(SpiderFootPlugin): meta = { 'name': "Flickr", 'summary': "Search Flickr for domains, URLs and emails related to the specified domain.", 'flags': [], 'useCases': ["Footprint", "Investigate", "Passive"], 'categories': ["Social Media"], 'dataSource': { 'website': "https://www.flickr.com/", 'model': "FREE_NOAUTH_UNLIMITED", 'references': [ "https://www.flickr.com/services/api/", "https://www.flickr.com/services/developer/api/", "https://code.flickr.net/" ], 'favIcon': "https://combo.staticflickr.com/pw/favicon.ico", 'logo': "https://combo.staticflickr.com/pw/favicon.ico", 'description': "Flickr is almost certainly the best online photo management and sharing application in the world.\n " "On Flickr, members upload photos, share them securely, supplement their photos with " "metadata like license information, geo-location, people, tags, etc., " "and interact with their family, friends, contacts or anyone in the community. " "Practically all the features on Flickr's various platforms -- web, mobile and desktop -- " "are accompanied by a longstanding API program. " "Since 2005, developers have collaborated on top of Flickr's APIs to build fun, creative, " "and gorgeous experiences around photos that extend beyond Flickr.", } } # Default options opts = { 'pause': 1, 'per_page': 100, 'maxpages': 20, 'dns_resolve': True, } # Option descriptions optdescs = { 'pause': "Number of seconds to pause between fetches.", 'per_page': "Maximum number of results per page.", 'maxpages': "Maximum number of pages of results to fetch.", 'dns_resolve': "DNS resolve each identified domain.", } results = None def setup(self, sfc, userOpts=dict()): self.sf = sfc self.results = self.tempStorage() for opt in list(userOpts.keys()): self.opts[opt] = userOpts[opt] # What events is this module interested in for input def watchedEvents(self): return ["DOMAIN_NAME"] # What events this module produces def producedEvents(self): return ["EMAILADDR", "EMAILADDR_GENERIC", "INTERNET_NAME", "DOMAIN_NAME", "LINKED_URL_INTERNAL"] # Retrieve API key def retrieveApiKey(self): res = self.sf.fetchUrl("https://www.flickr.com/", timeout=self.opts['_fetchtimeout'], useragent=self.opts['_useragent']) if res['content'] is None: return None keys = re.findall(r'YUI_config.flickr.api.site_key = "([a-zA-Z0-9]+)"', str(res['content'])) if not keys: return None return keys[0] # Query the REST API def query(self, qry, api_key, page=1, per_page=200): params = { "sort": "relevance", "parse_tags": "1", "content_type": "7", "extras": "description,owner_name,path_alias,realname", "hermes": "1", "hermesClient": "1", "reqId": "", "nojsoncallback": "1", "viewerNSID": "", "method": "flickr.photos.search", "csrf": "", "lang": "en-US", "per_page": str(per_page), "page": str(page), "text": qry.encode('raw_unicode_escape').decode("ascii", errors='replace'), "api_key": api_key, "format": "json" } res = self.sf.fetchUrl("https://api.flickr.com/services/rest?" + urllib.parse.urlencode(params), useragent=self.opts['_useragent'], timeout=self.opts['_fetchtimeout']) time.sleep(self.opts['pause']) try: return json.loads(res['content']) except Exception as e: self.debug(f"Error processing JSON response: {e}") return None # Handle events sent to this module def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data if eventData in self.results: self.debug(f"Skipping {eventData}, already checked") return self.results[eventData] = True self.debug(f"Received event, {eventName}, from {srcModuleName}") if srcModuleName == 'sfp_flickr': self.debug(f"Ignoring {eventData}, from self.") return # Retrieve API key api_key = self.retrieveApiKey() if not api_key: self.error("Failed to obtain API key") return self.debug(f"Retrieved API key: {api_key}") # Query API for event data hosts = list() page = 1 pages = self.opts['maxpages'] per_page = self.opts['per_page'] while page <= pages: if self.checkForStop(): return if self.errorState: return data = self.query(eventData, api_key, page=page, per_page=per_page) if data is None: return # Check the response is ok if data.get('stat') != "ok": self.debug("Error retrieving search results.") return photos = data.get('photos') if not photos: self.debug("No search results.") return # Calculate number of pages to retrieve result_pages = int(photos.get('pages', 0)) if result_pages < pages: pages = result_pages if 'max_allowed_pages' in photos: allowed_pages = int(photos.get('max_allowed_pages', 0)) if pages > allowed_pages: pages = allowed_pages self.info(f"Parsing page {page} of {pages}") # Extract data for photo in photos.get('photo', list()): emails = SpiderFootHelpers.extractEmailsFromText(str(photo)) for email in emails: if email in self.results: continue mail_domain = email.lower().split('@')[1] if not self.getTarget().matches(mail_domain, includeChildren=True, includeParents=True): self.debug(f"Skipped unrelated address: {email}") continue self.info("Found e-mail address: " + email) if email.split("@")[0] in self.opts['_genericusers'].split(","): evttype = "EMAILADDR_GENERIC" else: evttype = "EMAILADDR" evt = SpiderFootEvent(evttype, email, self.__name__, event) self.notifyListeners(evt) self.results[email] = True links = SpiderFootHelpers.extractUrlsFromText(str(photo)) for link in links: if link in self.results: continue host = self.sf.urlFQDN(link) if not self.getTarget().matches(host, includeChildren=True, includeParents=True): self.debug(f"Skipped unrelated URL: {link}") continue hosts.append(host) self.debug(f"Found a URL: {link}") evt = SpiderFootEvent('LINKED_URL_INTERNAL', link, self.__name__, event) self.notifyListeners(evt) self.results[link] = True page += 1 for host in set(hosts): if self.checkForStop(): return if self.errorState: return if self.opts['dns_resolve'] and not self.sf.resolveHost(host) and not self.sf.resolveHost6(host): self.debug(f"Host {host} could not be resolved") evt = SpiderFootEvent("INTERNET_NAME_UNRESOLVED", host, self.__name__, event) self.notifyListeners(evt) continue evt = SpiderFootEvent("INTERNET_NAME", host, self.__name__, event) self.notifyListeners(evt) if self.sf.isDomain(host, self.opts["_internettlds"]): evt = SpiderFootEvent("DOMAIN_NAME", host, self.__name__, event) self.notifyListeners(evt) # End of sfp_flickr class