1. Scraping business directories
Google Maps is the largest business directory. Extract business names, phone numbers, websites, and ratings for any category and location:
#E8A0BF">import requests
#E8A0BF">import json
#E8A0BF">import time
#E8A0BF">import re
API_KEY = #A8D4A0">"sr_live_YOUR_KEY"
#E8A0BF">def scrape_business_directory(category, location, page=1):
#A8D4A0">""#A8D4A0">"Scrape businesses #E8A0BF">from Google Maps"#A8D4A0">""
query = f#A8D4A0">"{category}+#E8A0BF">in+{location}".replace(#A8D4A0">" ", #A8D4A0">"+")
url = f#A8D4A0">"https://www.google.com/maps/search/{query}"
resp = requests.post(
#A8D4A0">"https://api.snaprender.dev/v1/extract",
headers={
#A8D4A0">"x-api-key": API_KEY,
#A8D4A0">"Content-Type": #A8D4A0">"application/json"
},
json={
#A8D4A0">"url": url,
#A8D4A0">"selectors": {
#A8D4A0">"names": #A8D4A0">"[role=#A8D4A0">'feed'] .fontHeadlineSmall",
#A8D4A0">"ratings": #A8D4A0">"[role=#A8D4A0">'feed'] .MW4etd",
#A8D4A0">"review_counts": #A8D4A0">"[role=#A8D4A0">'feed'] .UY7F9",
#A8D4A0">"addresses": #A8D4A0">"[role=#A8D4A0">'feed'] .W4Efsd:nth-child(2)",
#A8D4A0">"phones": #A8D4A0">"[role=#A8D4A0">'feed'] .W4Efsd:nth-child(3)",
#A8D4A0">"websites": #A8D4A0">"[role=#A8D4A0">'feed'] a[data-value=#A8D4A0">'Website']::attr(href)",
#A8D4A0">"categories": #A8D4A0">"[role=#A8D4A0">'feed'] .W4Efsd:first-child"
},
#A8D4A0">"use_flaresolverr": true
}
)
#E8A0BF">return resp.json()[#A8D4A0">"data"]
# Scrape digital marketing agencies #E8A0BF">in Austin
all_leads = []
data = scrape_business_directory(#A8D4A0">"digital marketing agency", #A8D4A0">"Austin TX")
names = data.get(#A8D4A0">"names", [])
#E8A0BF">for i #E8A0BF">in range(len(names)):
all_leads.append({
#A8D4A0">"name": names[i],
#A8D4A0">"phone": data[#A8D4A0">"phones"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"phones", [])) #E8A0BF">else #A8D4A0">"",
#A8D4A0">"address": data[#A8D4A0">"addresses"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"addresses", [])) #E8A0BF">else #A8D4A0">"",
#A8D4A0">"website": data[#A8D4A0">"websites"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"websites", [])) #E8A0BF">else #A8D4A0">"",
#A8D4A0">"rating": data[#A8D4A0">"ratings"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"ratings", [])) #E8A0BF">else #A8D4A0">"",
#A8D4A0">"reviews": data[#A8D4A0">"review_counts"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"review_counts", [])) #E8A0BF">else #A8D4A0">"",
})
#E8A0BF">print(f#A8D4A0">"Found {len(all_leads)} businesses")2. Contact information extraction
Once you have company websites, scrape their contact and about pages for email addresses, phone numbers, and team members:
#E8A0BF">def scrape_contact_info(website_url):
#A8D4A0">""#A8D4A0">"Scrape contact information #E8A0BF">from a company website"#A8D4A0">""
# Try contact page first
contact_urls = [
f#A8D4A0">"{website_url}/contact",
f#A8D4A0">"{website_url}/contact-us",
f#A8D4A0">"{website_url}/about",
f#A8D4A0">"{website_url}/about-us",
]
contact_data = {}
#E8A0BF">for url #E8A0BF">in contact_urls:
#E8A0BF">try:
resp = requests.post(
#A8D4A0">"https://api.snaprender.dev/v1/extract",
headers={
#A8D4A0">"x-api-key": API_KEY,
#A8D4A0">"Content-Type": #A8D4A0">"application/json"
},
json={
#A8D4A0">"url": url,
#A8D4A0">"selectors": {
#A8D4A0">"emails": #A8D4A0">"a[href^=#A8D4A0">'mailto:']::attr(href)",
#A8D4A0">"phones": #A8D4A0">"a[href^=#A8D4A0">'tel:']::attr(href)",
#A8D4A0">"social_links": #A8D4A0">"a[href*=#A8D4A0">'linkedin.com'], a[href*=#A8D4A0">'twitter.com'], a[href*=#A8D4A0">'facebook.com']::attr(href)",
#A8D4A0">"team_names": #A8D4A0">".team-member h3, .team h3",
#A8D4A0">"team_titles": #A8D4A0">".team-member p, .team p",
#A8D4A0">"address": #A8D4A0">"address, .address"
},
#A8D4A0">"use_flaresolverr": true
}
)
data = resp.json()[#A8D4A0">"data"]
# Merge found data
#E8A0BF">for key, value #E8A0BF">in data.items():
#E8A0BF">if value #E8A0BF">and #E8A0BF">not contact_data.get(key):
contact_data[key] = value
time.sleep(1)
#E8A0BF">except Exception:
continue
# Clean email addresses
emails = contact_data.get(#A8D4A0">"emails", [])
clean_emails = [
e.replace(#A8D4A0">"mailto:", #A8D4A0">"")
#E8A0BF">for e #E8A0BF">in emails
#E8A0BF">if #A8D4A0">"@" #E8A0BF">in e #E8A0BF">and #A8D4A0">"example" #E8A0BF">not #E8A0BF">in e
]
#E8A0BF">return {
#A8D4A0">"emails": clean_emails,
#A8D4A0">"phones": [p.replace(#A8D4A0">"tel:", #A8D4A0">"") #E8A0BF">for p #E8A0BF">in contact_data.get(#A8D4A0">"phones", [])],
#A8D4A0">"social": contact_data.get(#A8D4A0">"social_links", []),
#A8D4A0">"team": list(zip(
contact_data.get(#A8D4A0">"team_names", []),
contact_data.get(#A8D4A0">"team_titles", [])
)),
}
# Enrich leads #E8A0BF">with contact info
#E8A0BF">for lead #E8A0BF">in all_leads[:10]:
#E8A0BF">if lead[#A8D4A0">"website"]:
contact = scrape_contact_info(lead[#A8D4A0">"website"])
lead[#A8D4A0">"emails"] = contact[#A8D4A0">"emails"]
lead[#A8D4A0">"team"] = contact[#A8D4A0">"team"]
#E8A0BF">print(f#A8D4A0">"{lead[#A8D4A0">'name']}: {len(contact[#A8D4A0">'emails'])} emails found")
time.sleep(2)3. Industry directory mining
B2B directories like Clutch provide pre-qualified leads with budgets, employee counts, and verified reviews:
#E8A0BF">def scrape_clutch_directory(category, page=0):
#A8D4A0">""#A8D4A0">"Scrape Clutch.co company directory #E8A0BF">for B2B leads"#A8D4A0">""
url = (
f#A8D4A0">"https://clutch.co/{category}"
f#A8D4A0">"?page={page}"
)
resp = requests.post(
#A8D4A0">"https://api.snaprender.dev/v1/extract",
headers={
#A8D4A0">"x-api-key": API_KEY,
#A8D4A0">"Content-Type": #A8D4A0">"application/json"
},
json={
#A8D4A0">"url": url,
#A8D4A0">"selectors": {
#A8D4A0">"names": #A8D4A0">".provider-info .company_info a",
#A8D4A0">"taglines": #A8D4A0">".provider-info .tagline",
#A8D4A0">"ratings": #A8D4A0">".provider-info .rating",
#A8D4A0">"review_counts": #A8D4A0">".provider-info .reviews-count",
#A8D4A0">"locations": #A8D4A0">".provider-info .locality",
#A8D4A0">"min_budgets": #A8D4A0">".provider-info .custom_popover span",
#A8D4A0">"employees": #A8D4A0">".provider-info .company_info li:nth-child(2)",
#A8D4A0">"links": #A8D4A0">".provider-info .company_info a::attr(href)"
},
#A8D4A0">"use_flaresolverr": true
}
)
#E8A0BF">return resp.json()[#A8D4A0">"data"]
# Scrape web development agencies
agencies = []
#E8A0BF">for page #E8A0BF">in range(5):
data = scrape_clutch_directory(#A8D4A0">"web-developers", page)
names = data.get(#A8D4A0">"names", [])
#E8A0BF">for i #E8A0BF">in range(len(names)):
agencies.append({
#A8D4A0">"name": names[i],
#A8D4A0">"rating": data[#A8D4A0">"ratings"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"ratings", [])) #E8A0BF">else #A8D4A0">"",
#A8D4A0">"reviews": data[#A8D4A0">"review_counts"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"review_counts", [])) #E8A0BF">else #A8D4A0">"",
#A8D4A0">"location": data[#A8D4A0">"locations"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"locations", [])) #E8A0BF">else #A8D4A0">"",
#A8D4A0">"min_budget": data[#A8D4A0">"min_budgets"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"min_budgets", [])) #E8A0BF">else #A8D4A0">"",
#A8D4A0">"employees": data[#A8D4A0">"employees"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"employees", [])) #E8A0BF">else #A8D4A0">"",
})
#E8A0BF">print(f#A8D4A0">"Page {page + 1}: {len(names)} agencies")
time.sleep(3)
#E8A0BF">print(f#A8D4A0">"Total: {len(agencies)} agencies scraped")Legal note
Always include an unsubscribe link in outreach emails. Under CAN-SPAM and GDPR, recipients must be able to opt out. Scraping public business data is different from sending unsolicited emails -- know the laws in your jurisdiction.
4. Lead scoring and enrichment
Score leads based on data completeness, engagement signals, and fit with your ideal customer profile:
#E8A0BF">import pandas #E8A0BF">as pd
#E8A0BF">def enrich_and_score_leads(leads):
#A8D4A0">""#A8D4A0">"Score #E8A0BF">and prioritize scraped leads"#A8D4A0">""
df = pd.DataFrame(leads)
# Parse review counts
df[#A8D4A0">"review_num"] = (
df[#A8D4A0">"reviews"]
.str.extract(r#A8D4A0">"([\d,]+)")
.replace(#A8D4A0">",", #A8D4A0">"", regex=#E8A0BF">True)
.astype(float)
)
df[#A8D4A0">"rating_num"] = pd.to_numeric(df[#A8D4A0">"rating"], errors=#A8D4A0">"coerce")
# Lead scoring (customize weights #E8A0BF">for your ICP)
df[#A8D4A0">"score"] = 0
df.loc[df[#A8D4A0">"review_num"] >= 20, #A8D4A0">"score"] += 2
df.loc[df[#A8D4A0">"review_num"] >= 50, #A8D4A0">"score"] += 2
df.loc[df[#A8D4A0">"rating_num"] >= 4.5, #A8D4A0">"score"] += 2
df.loc[df[#A8D4A0">"emails"].apply(#E8A0BF">lambda x: len(x) > 0 #E8A0BF">if isinstance(x, list) #E8A0BF">else #E8A0BF">False), #A8D4A0">"score"] += 3
# Sort by score
df = df.sort_values(#A8D4A0">"score", ascending=#E8A0BF">False)
#E8A0BF">print(#A8D4A0">"=== Lead Scoring Results ===")
#E8A0BF">print(f#A8D4A0">"Total leads: {len(df)}")
#E8A0BF">print(f#A8D4A0">"High quality (7+): {len(df[df[#A8D4A0">'score'] >= 7])}")
#E8A0BF">print(f#A8D4A0">"With emails: {len(df[df[#A8D4A0">'emails'].apply(#E8A0BF">lambda x: len(x) > 0 #E8A0BF">if isinstance(x, list) #E8A0BF">else #E8A0BF">False)])}")
# Export top leads
top_leads = df[df[#A8D4A0">"score"] >= 5]
top_leads.to_csv(#A8D4A0">"qualified_leads.csv", index=#E8A0BF">False)
#E8A0BF">print(f#A8D4A0">"\nExported {len(top_leads)} qualified leads to CSV")
#E8A0BF">return df
scored = enrich_and_score_leads(all_leads)Build your lead gen pipeline
SnapRender handles JavaScript rendering, bot detection, and structured data extraction from any directory or company website. Build lead lists with a single API.
Get Your API Key — FreeFrequently asked questions
Scraping publicly available business contact information (company websites, directories, social profiles) is generally permissible for B2B outreach. However, scraping personal data may violate GDPR (EU), CCPA (California), or CAN-SPAM. Always comply with applicable privacy laws and respect opt-out requests.
LinkedIn company pages and profiles, industry directories (Clutch, G2, Capterra), Google Maps business listings, Crunchbase for startup data, industry-specific directories, and company websites with public team/contact pages. Each source provides different data points.
Scrape contact pages, team pages, and footer sections for direct emails. For pattern-based discovery, identify the company email format (first.last@company.com) from one known email, then construct others from scraped employee names. Always verify emails before sending.
For most B2B use cases, 100-500 leads per day is sufficient for a sales team. Quality matters more than quantity. Focus on scraping detailed, enriched leads (company size, industry, tech stack, funding) rather than bulk email lists. Higher quality leads convert 5-10x better.
LinkedIn aggressively prohibits scraping and has sued companies for it (hiQ Labs case). While public profiles are technically accessible, LinkedIn detects and blocks automated access. Use LinkedIn's own Sales Navigator API for compliant lead generation, or scrape other public business directories instead.