Import recently funded companies from Crunchbase into HubSpot using code
Prerequisites
- Python 3.9+ or Node.js 18+
- Crunchbase API key (requires Enterprise or API/Data License plan)
- HubSpot private app token with
crm.objects.companies.readandcrm.objects.companies.writescopes - Custom HubSpot company properties for funding data (
funding_stage,funding_amount,funding_date,investors)
The search endpoints used here require a Crunchbase Enterprise or API/Data License plan (~$49+/mo). The Pro plan only gives you Zapier integration and CSV exports. Verify your plan supports direct API access before building this.
Step 1: Set up the project
# Test your Crunchbase API key
curl -s -X POST "https://api.crunchbase.com/api/v4/searches/funding_rounds" \
-H "X-cb-user-key: $CRUNCHBASE_API_KEY" \
-H "Content-Type: application/json" \
-d '{"field_ids": ["identifier"], "limit": 1}' | head -c 300
# Test your HubSpot token
curl -s "https://api.hubapi.com/crm/v3/objects/companies?limit=1" \
-H "Authorization: Bearer $HUBSPOT_TOKEN" | head -c 200Step 2: Search Crunchbase for recent funding rounds
import requests
import os
from datetime import datetime, timedelta
from urllib.parse import urlparse
CRUNCHBASE_API_KEY = os.environ["CRUNCHBASE_API_KEY"]
HUBSPOT_TOKEN = os.environ["HUBSPOT_TOKEN"]
CB_HEADERS = {
"X-cb-user-key": CRUNCHBASE_API_KEY,
"Content-Type": "application/json",
}
HS_HEADERS = {
"Authorization": f"Bearer {HUBSPOT_TOKEN}",
"Content-Type": "application/json",
}
def search_funding_rounds(days_back=30, stages=None):
"""Search Crunchbase for funding rounds in the last N days."""
if stages is None:
stages = ["series_a", "series_b", "series_c", "series_d"]
since_date = (datetime.now() - timedelta(days=days_back)).strftime("%Y-%m-%d")
all_entities = []
after_id = None
while True:
body = {
"field_ids": [
"identifier", "announced_on", "money_raised",
"funded_organization_identifier", "investment_type",
"investor_identifiers",
],
"query": [
{
"type": "predicate",
"field_id": "announced_on",
"operator_id": "gte",
"values": [since_date],
},
{
"type": "predicate",
"field_id": "investment_type",
"operator_id": "includes",
"values": stages,
},
],
"limit": 100,
}
if after_id:
body["after_id"] = after_id
resp = requests.post(
"https://api.crunchbase.com/api/v4/searches/funding_rounds",
headers=CB_HEADERS,
json=body,
)
resp.raise_for_status()
data = resp.json()
entities = data.get("entities", [])
all_entities.extend(entities)
if len(entities) < 100:
break
after_id = entities[-1]["uuid"]
return all_entities
rounds = search_funding_rounds()
print(f"Found {len(rounds)} funding rounds in the last 30 days")The Crunchbase search API returns max 100 results per page. Use the after_id parameter with the UUID of the last entity to fetch the next page. Keep paginating until you get fewer than 100 results.
Step 3: Fetch organization details for each round
import time
def get_organization(org_uuid):
"""Fetch organization details from Crunchbase."""
resp = requests.get(
f"https://api.crunchbase.com/api/v4/entities/organizations/{org_uuid}",
headers=CB_HEADERS,
params={
"field_ids": "short_description,categories,location_identifiers,"
"num_employees_enum,website_url,linkedin,founded_on",
},
)
resp.raise_for_status()
return resp.json().get("properties", {})
def extract_domain(url):
"""Extract clean domain from a URL."""
if not url:
return None
parsed = urlparse(url if url.startswith("http") else f"https://{url}")
domain = parsed.netloc or parsed.path
return domain.replace("www.", "").strip("/")Step 4: Filter for ICP matches and deduplicate
TARGET_EMPLOYEE_RANGES = [
"c_0051_0100", "c_0101_0250", "c_0251_0500", "c_0501_1000",
]
def matches_icp(org):
"""Check if company matches your ICP criteria."""
if not org.get("website_url"):
return False
emp_range = org.get("num_employees_enum", "")
return emp_range in TARGET_EMPLOYEE_RANGES
def company_exists_in_hubspot(domain):
"""Check if company already exists in HubSpot by domain."""
resp = requests.post(
"https://api.hubapi.com/crm/v3/objects/companies/search",
headers=HS_HEADERS,
json={
"filterGroups": [{"filters": [{
"propertyName": "domain",
"operator": "EQ",
"value": domain,
}]}],
},
)
resp.raise_for_status()
return len(resp.json().get("results", [])) > 0Step 5: Create companies in HubSpot with funding data
def create_hubspot_company(org, funding_round):
"""Create a company in HubSpot with Crunchbase funding data."""
domain = extract_domain(org.get("website_url"))
props = funding_round.get("properties", {})
investors = props.get("investor_identifiers") or []
investor_names = ", ".join(inv.get("value", "") for inv in investors)
money_raised = props.get("money_raised", {})
amount = money_raised.get("value") if money_raised else None
resp = requests.post(
"https://api.hubapi.com/crm/v3/objects/companies",
headers=HS_HEADERS,
json={
"properties": {
"domain": domain,
"name": org.get("identifier", {}).get("value", ""),
"description": org.get("short_description", ""),
"industry": (org.get("categories") or [{}])[0].get("value", ""),
"funding_stage": props.get("investment_type", ""),
"funding_amount": str(amount) if amount else "",
"funding_date": props.get("announced_on", ""),
"investors": investor_names,
}
},
)
resp.raise_for_status()
return resp.json()["id"]
# --- Main execution ---
seen_domains = set()
created = 0
skipped = 0
icp_miss = 0
for rnd in rounds:
org_ref = rnd.get("properties", {}).get("funded_organization_identifier", {})
org_uuid = org_ref.get("uuid")
if not org_uuid:
continue
org = get_organization(org_uuid)
time.sleep(0.2) # Respect Crunchbase rate limits
if not matches_icp(org):
icp_miss += 1
continue
domain = extract_domain(org.get("website_url"))
if not domain or domain in seen_domains:
continue
seen_domains.add(domain)
if company_exists_in_hubspot(domain):
print(f" EXISTS: {org.get('identifier', {}).get('value', 'Unknown')} ({domain})")
skipped += 1
continue
money = rnd.get("properties", {}).get("money_raised", {})
amount_str = f"${money.get('value', 0):,.0f}" if money.get("value") else "undisclosed"
stage = rnd.get("properties", {}).get("investment_type", "unknown")
company_id = create_hubspot_company(org, rnd)
print(f" CREATED: {org['identifier']['value']} — {stage} — {amount_str}")
created += 1
time.sleep(0.2)
print(f"\nDone. Created: {created}, Already existed: {skipped}, ICP misses: {icp_miss}")The funding_stage, funding_amount, funding_date, and investors properties must exist in HubSpot before running this script. Create them in HubSpot Settings → Properties → Company Properties. Use types: single-line text for stage/investors, number for amount, date for date.
Step 6: Schedule with cron or GitHub Actions
# .github/workflows/crunchbase-import.yml
name: Import Funded Companies
on:
schedule:
- cron: '0 14 * * 1' # Weekly on Monday at 9 AM ET
workflow_dispatch: {}
jobs:
run:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.12'
- run: pip install requests
- run: python import_funded_companies.py
env:
CRUNCHBASE_API_KEY: ${{ secrets.CRUNCHBASE_API_KEY }}
HUBSPOT_TOKEN: ${{ secrets.HUBSPOT_TOKEN }}Rate limits
| API | Limit | Impact |
|---|---|---|
| Crunchbase search | 200 req/min | Comfortable for weekly runs |
| Crunchbase entity | 200 req/min | Add 200ms delay between org lookups |
| HubSpot general | 150 req/10 sec | No concern at this volume |
| HubSpot Search | 5 req/sec | Add 200ms delay between dedup checks |
Cost
- Hosting: Free on GitHub Actions (2,000 min/month free tier)
- Crunchbase API: Enterprise plan required. Contact Crunchbase sales for pricing (starts ~$49/mo for basic API access).
- HubSpot API: Free with any plan that supports private apps
Need help implementing this?
We build and optimize automation systems for mid-market businesses. Let's discuss the right approach for your team.