1. Scraping search results
Redfin search pages list properties with prices, bed/bath counts, and square footage. Since Redfin is a React SPA, you need JavaScript rendering to access the data:
#E8A0BF">import requests
#E8A0BF">import json
API_KEY = #A8D4A0">"sr_live_YOUR_KEY"
#E8A0BF">def scrape_redfin_search(city, state, page=1):
#A8D4A0">""#A8D4A0">"Scrape Redfin search results #E8A0BF">for a city"#A8D4A0">""
url = f#A8D4A0">"https://www.redfin.com/city/{city}/{state}/filter/page={page}"
resp = requests.post(
#A8D4A0">"https://api.snaprender.dev/v1/extract",
headers={
#A8D4A0">"x-api-key": API_KEY,
#A8D4A0">"Content-Type": #A8D4A0">"application/json"
},
json={
#A8D4A0">"url": url,
#A8D4A0">"selectors": {
#A8D4A0">"addresses": #A8D4A0">".HomeCardContainer address",
#A8D4A0">"prices": #A8D4A0">".HomeCardContainer .homecardV2Price",
#A8D4A0">"beds": #A8D4A0">".HomeCardContainer .HomeStatsV2 .stats span:nth-child(1)",
#A8D4A0">"baths": #A8D4A0">".HomeCardContainer .HomeStatsV2 .stats span:nth-child(2)",
#A8D4A0">"sqft": #A8D4A0">".HomeCardContainer .HomeStatsV2 .stats span:nth-child(3)",
#A8D4A0">"links": #A8D4A0">".HomeCardContainer a.link-#E8A0BF">and-anchor::attr(href)",
#A8D4A0">"statuses": #A8D4A0">".HomeCardContainer .labelRow span"
},
#A8D4A0">"use_flaresolverr": true
}
)
#E8A0BF">return resp.json()[#A8D4A0">"data"]
# Scrape Austin TX listings across 3 pages
all_listings = []
#E8A0BF">for page #E8A0BF">in range(1, 4):
data = scrape_redfin_search(#A8D4A0">"30818/Austin", #A8D4A0">"TX", page)
addresses = data.get(#A8D4A0">"addresses", [])
#E8A0BF">for i #E8A0BF">in range(len(addresses)):
all_listings.append({
#A8D4A0">"address": addresses[i],
#A8D4A0">"price": data[#A8D4A0">"prices"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"prices", [])) #E8A0BF">else #A8D4A0">"",
#A8D4A0">"beds": data[#A8D4A0">"beds"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"beds", [])) #E8A0BF">else #A8D4A0">"",
#A8D4A0">"baths": data[#A8D4A0">"baths"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"baths", [])) #E8A0BF">else #A8D4A0">"",
#A8D4A0">"sqft": data[#A8D4A0">"sqft"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"sqft", [])) #E8A0BF">else #A8D4A0">"",
})
#E8A0BF">print(f#A8D4A0">"Page {page}: {len(addresses)} listings")
#E8A0BF">print(f#A8D4A0">"Total: {len(all_listings)} listings scraped")2. Property detail extraction
Individual property pages contain rich data including descriptions, HOA fees, school ratings, walk scores, and the Redfin Estimate:
#E8A0BF">def scrape_redfin_property(property_url):
#A8D4A0">""#A8D4A0">"Scrape detailed property data #E8A0BF">from Redfin"#A8D4A0">""
resp = requests.post(
#A8D4A0">"https://api.snaprender.dev/v1/extract",
headers={
#A8D4A0">"x-api-key": API_KEY,
#A8D4A0">"Content-Type": #A8D4A0">"application/json"
},
json={
#A8D4A0">"url": f#A8D4A0">"https://www.redfin.com{property_url}",
#A8D4A0">"selectors": {
#A8D4A0">"price": #A8D4A0">".statsValue .value",
#A8D4A0">"address": #A8D4A0">".street-address",
#A8D4A0">"beds": #A8D4A0">".bed-pill .statsValue",
#A8D4A0">"baths": #A8D4A0">".bath-pill .statsValue",
#A8D4A0">"sqft": #A8D4A0">".sqft-pill .statsValue",
#A8D4A0">"year_built": #A8D4A0">".amenity-group span:contains(#A8D4A0">'Built') + span",
#A8D4A0">"lot_size": #A8D4A0">".amenity-group span:contains(#A8D4A0">'Lot') + span",
#A8D4A0">"hoa": #A8D4A0">".amenity-group span:contains(#A8D4A0">'HOA') + span",
#A8D4A0">"description": #A8D4A0">"#marketing-remarks-scroll",
#A8D4A0">"days_on_market": #A8D4A0">".daysStat .value",
#A8D4A0">"redfin_estimate": #A8D4A0">".avm .statsValue",
#A8D4A0">"price_per_sqft": #A8D4A0">".price-per-sqft .value",
#A8D4A0">"schools": #A8D4A0">".school-name",
#A8D4A0">"walk_score": #A8D4A0">".walkscore .score"
},
#A8D4A0">"use_flaresolverr": true
}
)
#E8A0BF">return resp.json()[#A8D4A0">"data"]
property = scrape_redfin_property(
#A8D4A0">"/TX/Austin/123-Example-St-78701/home/12345678"
)
#E8A0BF">print(json.dumps(property, indent=2))3. Market analysis
Analyze scraped listing data to understand market conditions, price distributions, and price-per-square-foot trends:
#E8A0BF">import pandas #E8A0BF">as pd
#E8A0BF">from datetime #E8A0BF">import datetime
#E8A0BF">def analyze_market(listings):
#A8D4A0">""#A8D4A0">"Analyze scraped Redfin market data"#A8D4A0">""
df = pd.DataFrame(listings)
# Clean price data
df[#A8D4A0">"price_num"] = (
df[#A8D4A0">"price"]
.str.replace(#A8D4A0">"$", #A8D4A0">"", regex=#E8A0BF">False)
.str.replace(#A8D4A0">",", #A8D4A0">"", regex=#E8A0BF">False)
.astype(float)
)
# Clean sqft
df[#A8D4A0">"sqft_num"] = (
df[#A8D4A0">"sqft"]
.str.replace(#A8D4A0">",", #A8D4A0">"", regex=#E8A0BF">False)
.str.extract(r#A8D4A0">"([\d.]+)")
.astype(float)
)
df[#A8D4A0">"price_per_sqft"] = df[#A8D4A0">"price_num"] / df[#A8D4A0">"sqft_num"]
#E8A0BF">print(#A8D4A0">"=== Market Summary ===")
#E8A0BF">print(f#A8D4A0">"Listings: {len(df)}")
#E8A0BF">print(f#A8D4A0">"Median price: $" + f#A8D4A0">"{df[#A8D4A0">'price_num'].median():,.0f}")
#E8A0BF">print(f#A8D4A0">"Avg price: $" + f#A8D4A0">"{df[#A8D4A0">'price_num'].mean():,.0f}")
#E8A0BF">print(f#A8D4A0">"Price range: $" + f#A8D4A0">"{df[#A8D4A0">'price_num'].min():,.0f} - $" + f#A8D4A0">"{df[#A8D4A0">'price_num'].max():,.0f}")
#E8A0BF">print(f#A8D4A0">"Median $/sqft: $" + f#A8D4A0">"{df[#A8D4A0">'price_per_sqft'].median():.0f}")
# Price tiers
tiers = pd.cut(
df[#A8D4A0">"price_num"],
bins=[0, 300000, 500000, 750000, 1000000, float(#A8D4A0">"inf")],
labels=[#A8D4A0">"<$300K", #A8D4A0">"$300-500K", #A8D4A0">"$500-750K", #A8D4A0">"$750K-1M", #A8D4A0">"$1M+"]
)
#E8A0BF">print(#A8D4A0">"\n=== Price Distribution ===")
#E8A0BF">print(tiers.value_counts().sort_index())
df.to_csv(f#A8D4A0">"redfin_market_{datetime.now():%Y%m%d}.csv", index=#E8A0BF">False)
#E8A0BF">return df
analyze_market(all_listings)4. Sold comps scraping
Recently sold homes are essential for comparable sales analysis. Scrape sold listings with sale prices and dates:
#E8A0BF">def scrape_sold_homes(city, state, months_back=3):
#A8D4A0">""#A8D4A0">"Scrape recently sold homes #E8A0BF">for comps analysis"#A8D4A0">""
url = (
f#A8D4A0">"https://www.redfin.com/city/{city}/{state}"
f#A8D4A0">"/filter/sold-within-{months_back}-months"
)
resp = requests.post(
#A8D4A0">"https://api.snaprender.dev/v1/extract",
headers={
#A8D4A0">"x-api-key": API_KEY,
#A8D4A0">"Content-Type": #A8D4A0">"application/json"
},
json={
#A8D4A0">"url": url,
#A8D4A0">"selectors": {
#A8D4A0">"addresses": #A8D4A0">".HomeCardContainer address",
#A8D4A0">"sold_prices": #A8D4A0">".HomeCardContainer .homecardV2Price",
#A8D4A0">"sold_dates": #A8D4A0">".HomeCardContainer .HomeRecentlySold span",
#A8D4A0">"beds": #A8D4A0">".HomeCardContainer .stats span:nth-child(1)",
#A8D4A0">"baths": #A8D4A0">".HomeCardContainer .stats span:nth-child(2)",
#A8D4A0">"sqft": #A8D4A0">".HomeCardContainer .stats span:nth-child(3)"
},
#A8D4A0">"use_flaresolverr": true
}
)
data = resp.json()[#A8D4A0">"data"]
addresses = data.get(#A8D4A0">"addresses", [])
sold_homes = []
#E8A0BF">for i #E8A0BF">in range(len(addresses)):
sold_homes.append({
#A8D4A0">"address": addresses[i],
#A8D4A0">"sold_price": data[#A8D4A0">"sold_prices"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"sold_prices", [])) #E8A0BF">else #A8D4A0">"",
#A8D4A0">"sold_date": data[#A8D4A0">"sold_dates"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"sold_dates", [])) #E8A0BF">else #A8D4A0">"",
#A8D4A0">"beds": data[#A8D4A0">"beds"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"beds", [])) #E8A0BF">else #A8D4A0">"",
#A8D4A0">"baths": data[#A8D4A0">"baths"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"baths", [])) #E8A0BF">else #A8D4A0">"",
#A8D4A0">"sqft": data[#A8D4A0">"sqft"][i] #E8A0BF">if i < len(data.get(#A8D4A0">"sqft", [])) #E8A0BF">else #A8D4A0">"",
})
#E8A0BF">return sold_homes
# Get sold comps #E8A0BF">in Austin
sold = scrape_sold_homes(#A8D4A0">"30818/Austin", #A8D4A0">"TX", months_back=6)
#E8A0BF">print(f#A8D4A0">"Found {len(sold)} recently sold homes")
df = pd.DataFrame(sold)
df.to_csv(#A8D4A0">"austin_sold_comps.csv", index=#E8A0BF">False)Pro tip
Cross-reference sold comps with active listings to identify overpriced or underpriced properties. A home listed 15%+ above recent comps in the same neighborhood is likely overpriced.
Scrape Redfin without getting blocked
SnapRender handles JavaScript rendering, bot detection bypass, and structured data extraction. Get real estate data from Redfin with a single API call.
Get Your API Key — FreeFrequently asked questions
Redfin's Terms of Service prohibit automated scraping. However, publicly displayed listing data (prices, addresses, photos) can be accessed for personal research, market analysis, or academic purposes. Do not republish listings or use scraped data to build a competing real estate platform.
Redfin is a React single-page application that loads listing data via internal APIs after initial page load. The HTML source contains no property data. You need a headless browser or rendering API like SnapRender to execute JavaScript and extract the fully rendered DOM.
Yes. Redfin shows recently sold homes with final sale prices, days on market, and price-per-square-foot. Filter search results by "Sold" status and scrape the results. Historical sold data is valuable for comps analysis and market trend tracking.
For active markets, daily scraping catches new listings and price changes. For market trend analysis, weekly is sufficient. Always use polite delays (2-3 seconds between requests) and avoid scraping during peak hours to minimize server load.
Listing price, address, beds/baths, square footage, lot size, year built, HOA fees, days on market, price history, agent info, school ratings, walk score, and property photos. Redfin also provides estimated values (Redfin Estimate) for most properties.