Danny O'Brien's TIL

View the Project on GitHub

Automating Uber business receipt downloads

I needed to download a bunch of Uber receipts for my business expenses, and while Uber provides a way to manually download receipt PDFs one at a time through their website, there’s no batch download option. I asked Claude to help me write a Python script that would automate this process.

The script uses Playwright to control an existing browser session (to avoid having to handle login credentials), navigates through Uber’s trip history, and downloads each receipt as a PDF. The trickiest parts were handling the modal dialogs that appear when requesting receipts, and extracting the correct cost from the trip details page for the filename.

After some back-and-forth testing with Claude using its access to a test browser, we got it working reliably. The script now downloads receipts with filenames in a consistent format (YYYY-MM-DD-26.44USD-tripid.pdf), making them easy to organize and process for accounting.

I used the claude.ai desktop app with various MCP-servers and its artifact feature to work on this. Artifacts now use line-by-line editing to make changes, so the Claude Web interface and desktop client doesn’t necessarily re-output your code with every edit. This is about as big as I can get a Claude Sonnet 3.7-written program to get before it starts running out of context or getting confused.

Here’s the latest, maintained code for the script

To use it, you need the Python jack-of-all-trades, uv, installed.

Run your browser with remote debugging enabled (e.g., chromium --remote-debugging-port=9222), then run the script with your desired options:

# Download all available receipts
./uber-receipt-downloader.py --all

# Download receipts from a specific date range
./uber-receipt-downloader.py --start-date 2024-01-01 --end-date 2024-03-31

# Download receipts from the last 30 days
./uber-receipt-downloader.py --days 30

As the script mentions, Stack Overflow has instructions for getting Chrome running with remote debugging working on MacOS and Windows.

#!/usr/bin/env -S uv run
# /// script
# requires-python = ">=3.12"
# dependencies = [
#   "playwright",
# ]
# ///
"""
Uber Receipt Downloader

This script connects to an existing Chrome-compatible browser instance and downloads
Uber receipts as PDFs from the Uber riders website.

Launch chrome with: chrome --remote-debugging-port=9222

Some instructions on how to do that on [MacOS and Windows](https://stackoverflow.com/questions/51563287/how-to-make-chrome-always-launch-with-remote-debugging-port-flag).
"""

import asyncio
import os
import re
import time
import argparse
from datetime import datetime, timedelta
from typing import List, Optional, Dict, Any
from playwright.async_api import async_playwright

# Configuration
CDP_URL = "http://localhost:9222"  # CDP URL for Brave browser
DOWNLOAD_DIR = os.path.expanduser("~/Downloads/uber_receipts")  # Directory to save receipts
# Set to None to fetch all trips automatically
TRIP_IDS = None  

class UberReceiptDownloader:
    def __init__(self, cdp_url: str, download_dir: str):
        self.cdp_url = cdp_url
        self.download_dir = download_dir
        self.browser = None
        self.context = None
        self.page = None
        self.playwright = None

    async def connect_to_browser(self):
        """Connect to existing Brave browser instance over CDP"""
        try:
            self.playwright = await async_playwright().start()
            
            print(f"Connecting to Brave browser at {self.cdp_url}")
            self.browser = await self.playwright.chromium.connect_over_cdp(self.cdp_url)
            
            # Get the default browser context
            self.context = self.browser.contexts[0] if self.browser.contexts else await self.browser.new_context(
                ignore_https_errors=True,  # Ignore SSL certificate errors
                accept_downloads=True,     # Enable file downloads
            )
            
            print("Successfully connected to Brave browser")
        except Exception as e:
            print(f"Error connecting to Brave browser at {self.cdp_url}: {e}")
            print("Make sure Brave is running with the --remote-debugging-port=9222 flag")
            raise

    async def extract_cost(self, page):
        """Extract the cost value from the trip page"""
        try:
            # Approach 1: Find all trip detail divs and look for the one with a $ symbol
            cost_divs = await page.query_selector_all('div[data-baseweb="block"][class*="css-iMyxrY"]')
            
            for div in cost_divs:
                text = await div.inner_text()
                if '$' in text:
                    # Use regex to extract just the number part
                    import re
                    price_match = re.search(r'\$(\d+\.\d+)', text)
                    if price_match and price_match.group(1):
                        return price_match.group(1)
            
            # Approach 2: Try to find the Tag icon and get its parent div text
            tag_icon = await page.query_selector('svg[title="Tag"]')
            if tag_icon:
                # Get the parent element that contains the price
                parent_element = await page.query_selector('svg[title="Tag"] + div')
                if parent_element:
                    price_element = await parent_element.query_selector('p')
                    if price_element:
                        price_text = await price_element.inner_text()
                        # Extract the number part
                        import re
                        price_match = re.search(r'\$(\d+\.\d+)', price_text)
                        if price_match and price_match.group(1):
                            return price_match.group(1)
            
            # If we get here, we couldn't find the cost
            print("Warning: Could not extract cost from page")
            return "unknown"
        except Exception as e:
            print(f"Error extracting cost: {e}")
            return "unknown"

    async def fetch_trip_ids(self, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None) -> List[str]:
        """Fetch trip IDs from the Uber trips page within a date range"""
        if not self.browser or not self.context:
            raise ValueError("Browser not connected. Call connect_to_browser() first.")
        
        if not self.page:
            self.page = await self.context.new_page()
        
        try:
            # Navigate to trips page
            trips_url = "https://riders.uber.com/trips"
            print(f"Navigating to {trips_url}")
            await self.page.goto(trips_url, wait_until='networkidle', timeout=30000)
            
            # Wait for the page to load with trip elements
            print("Waiting for trip elements to load...")
            await self.page.wait_for_selector('div[href^="https://riders.uber.com/trips/"]', state='visible', timeout=10000)
            
            # Extract trip IDs and dates using JavaScript evaluation
            trips_with_dates = await self.page.evaluate('''
                () => {
                    // Look for divs with href attributes containing trip IDs
                    const tripElements = document.querySelectorAll('div[href^="https://riders.uber.com/trips/"]');
                    const trips = [];
                    
                    for (const element of tripElements) {
                        const href = element.getAttribute('href');
                        let tripId = null;
                        if (href) {
                            const match = href.match(/\\/trips\\/([^\\/?]+)/);
                            if (match && match[1]) {
                                tripId = match[1];
                            }
                        }
                        
                        // Try to find the date text
                        let dateText = null;
                        const dateElement = element.querySelector('div[data-baseweb="block"] div');
                        if (dateElement) {
                            dateText = dateElement.innerText;
                        }
                        
                        if (tripId) {
                            trips.push({ id: tripId, dateText: dateText });
                        }
                    }
                    
                    return trips;
                }
            ''')
            
            print(f"Found {len(trips_with_dates)} trip entries on the first page")
            
            # Check if we need to load more trips (click "More" button if it exists)
            try:
                more_button_selector = 'button:has-text("More")'
                
                while await self.page.is_visible(more_button_selector, timeout=5000):
                    print("Found 'More' button, clicking to load additional trips...")
                    await self.page.click(more_button_selector)
                    
                    # Wait for loading to complete
                    await asyncio.sleep(2)
                    
                    # Extract additional trip IDs
                    new_trips_with_dates = await self.page.evaluate('''
                        () => {
                            // Same extraction logic as before
                            const tripElements = document.querySelectorAll('div[href^="https://riders.uber.com/trips/"]');
                            const trips = [];
                            
                            for (const element of tripElements) {
                                const href = element.getAttribute('href');
                                let tripId = null;
                                if (href) {
                                    const match = href.match(/\\/trips\\/([^\\/?]+)/);
                                    if (match && match[1]) {
                                        tripId = match[1];
                                    }
                                }
                                
                                // Try to find the date text
                                let dateText = null;
                                const dateElement = element.querySelector('div[data-baseweb="block"] div');
                                if (dateElement) {
                                    dateText = dateElement.innerText;
                                }
                                
                                if (tripId) {
                                    trips.push({ id: tripId, dateText: dateText });
                                }
                            }
                            
                            return trips;
                        }
                    ''')
                    
                    if len(new_trips_with_dates) > len(trips_with_dates):
                        print(f"Loaded more trips, now found {len(new_trips_with_dates)} total")
                        trips_with_dates = new_trips_with_dates
                    else:
                        print("No new trips found after clicking 'More', stopping")
                        break
            except Exception as e:
                print(f"Note: Could not find or click 'More' button: {e}")
            
            # Filter by date if requested
            trip_ids = []
            
            if start_date or end_date:
                print(f"Filtering trips between {start_date.strftime('%Y-%m-%d') if start_date else 'earliest'} and {end_date.strftime('%Y-%m-%d') if end_date else 'latest'}")
                
                # Process each trip to check date
                for trip in trips_with_dates:
                    trip_id = trip['id']
                    date_text = trip['dateText']
                    
                    # Skip if no date text available
                    if not date_text:
                        trip_ids.append(trip_id)
                        continue
                    
                    # Try to extract date from date_text
                    try:
                        # Common patterns in Uber's interface:
                        # "Mar 6 • 2:25 PM"
                        # "March 6 • 2:25 PM"
                        import re
                        # Look for month names
                        months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", 
                                  "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
                                  "January", "February", "March", "April", "May", "June",
                                  "July", "August", "September", "October", "November", "December"]
                        
                        month_pattern = "|".join(months)
                        match = re.search(rf'({month_pattern}) (\d+)', date_text)
                        
                        if match:
                            month, day = match.groups()
                            # Use current year as Uber typically only shows month and day
                            # This is a limitation - for trips more than a year old
                            year = datetime.now().year
                            
                            # Try to parse with both full and abbreviated month names
                            trip_date = None
                            for fmt in ["%B %d %Y", "%b %d %Y"]:
                                try:
                                    trip_date = datetime.strptime(f"{month} {day} {year}", fmt)
                                    break
                                except ValueError:
                                    continue
                            
                            # Check if date is in range
                            if trip_date:
                                if start_date and trip_date < start_date:
                                    continue
                                if end_date and trip_date > end_date:
                                    continue
                        
                        # Include the trip if we couldn't parse the date or it's in range
                        trip_ids.append(trip_id)
                        
                    except Exception as e:
                        # If date parsing fails, include the trip to be safe
                        print(f"Error parsing date for trip {trip_id}: {e}")
                        trip_ids.append(trip_id)
            else:
                # If no date filtering, include all trip IDs
                trip_ids = [trip['id'] for trip in trips_with_dates]
            
            # Remove duplicates while preserving order
            seen = set()
            unique_trip_ids = [x for x in trip_ids if not (x in seen or seen.add(x))]
            
            print(f"Selected {len(unique_trip_ids)} trips after date filtering")
            return unique_trip_ids
            
        except Exception as e:
            print(f"Error fetching trip IDs: {e}")
            return []

    async def download_receipt(self, trip_id: str) -> Optional[str]:
        """Download a receipt for a specific trip ID"""
        if not self.browser or not self.context:
            raise ValueError("Browser not connected. Call connect_to_browser() first.")
        
        try:
            if not self.page:
                self.page = await self.context.new_page()
            
            # Navigate to the trip page
            trip_url = f"https://riders.uber.com/trips/{trip_id}"
            print(f"Navigating to {trip_url}")
            await self.page.goto(trip_url, wait_until='networkidle', timeout=30000)
            
            # Click "View Receipt" button - use known selector from our testing
            view_receipt_selectors = [
                'button[data-tracking-name="view-receipt-link"]',  # This worked in our testing
                ':text("View Receipt")',
                'button:has-text("View Receipt")',
                '[data-test="view-receipt-button"]'
            ]
            
            receipt_clicked = False
            for selector in view_receipt_selectors:
                try:
                    # Check if the selector exists and is visible
                    visible = await self.page.is_visible(selector, timeout=2000)
                    if visible:
                        print(f"Found 'View Receipt' button with selector: {selector}, clicking...")
                        await self.page.click(selector)
                        receipt_clicked = True
                        break
                except Exception:
                    continue
                    
            if not receipt_clicked:
                print("Could not find 'View Receipt' button with any of the tried selectors")
                # One last attempt - try to find it by looking at all buttons
                try:
                    all_buttons = await self.page.query_selector_all('button')
                    for button in all_buttons:
                        text = await button.inner_text()
                        if "receipt" in text.lower() or "view" in text.lower():
                            print(f"Found button with text: {text}, clicking...")
                            await button.click()
                            receipt_clicked = True
                            break
                except Exception as e:
                    print(f"Error in last attempt to find view receipt button: {e}")
                    
            if not receipt_clicked:
                raise Exception("Could not find or click 'View Receipt' button")
            
            # Wait for the popup dialog to appear
            print("Waiting for receipt popup dialog...")
            await asyncio.sleep(1)  # Brief pause to ensure modal is fully loaded
            
            # Click "Download PDF" button within the popup
            download_pdf_selectors = [
                ':text("Download PDF")',  # This worked in our testing
                'text="Download PDF"',
                'button:has-text("Download PDF")',
                '[data-test="download-pdf-button"]'
            ]
            
            pdf_button_found = False
            for selector in download_pdf_selectors:
                try:
                    # Check if the selector exists and is visible
                    visible = await self.page.is_visible(selector, timeout=2000)
                    if visible:
                        print(f"Found 'Download PDF' button with selector: {selector}")
                        pdf_button_found = True
                        
                        # Set up download event handler
                        download_path = None
                        async with self.page.expect_download() as download_info:
                            # Now click the button inside the expect_download context manager
                            await self.page.click(selector)
                            print("Waiting for download to start...")
                            
                            # Wait for the download to start with a timeout
                            try:
                                download = await asyncio.wait_for(download_info.value, 10.0)
                                
                                # Extract the cost from the page before clicking Download
                                cost = await self.extract_cost(self.page)
                                
                                # Get date from the page if possible
                                try:
                                    date_element = await self.page.query_selector('div[data-baseweb="block"] div[data-baseweb="typo-labellarge"]')
                                    date_text = await date_element.inner_text() if date_element else None
                                    
                                    if date_text:
                                        # Extract date from various formats that might appear
                                        import re
                                        
                                        # Try to find patterns like:
                                        # "2:28 PM, Thursday March 6 2025"
                                        # "Mar 6 • 2:25 PM"
                                        
                                        # Look for month names
                                        months = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", 
                                                  "Jul", "Aug", "Sep", "Oct", "Nov", "Dec",
                                                  "January", "February", "March", "April", "May", "June",
                                                  "July", "August", "September", "October", "November", "December"]
                                        
                                        month_pattern = "|".join(months)
                                        date_patterns = [
                                            # Pattern like "March 6 2025"
                                            rf'({month_pattern}) (\d+) (\d4)',
                                            # Pattern like "Mar 6 • 2:25 PM" (year will be current year)
                                            rf'({month_pattern}) (\d+)',
                                        ]
                                        
                                        date_formatted = None
                                        for pattern in date_patterns:
                                            date_match = re.search(pattern, date_text)
                                            if date_match:
                                                try:
                                                    if len(date_match.groups()) == 3:  # Full date with year
                                                        month, day, year = date_match.groups()
                                                        date_str = f"{month} {day} {year}"
                                                        # Try to parse with both full and abbreviated month names
                                                        for fmt in ["%B %d %Y", "%b %d %Y"]:
                                                            try:
                                                                date_obj = datetime.strptime(date_str, fmt)
                                                                date_formatted = date_obj.strftime("%Y-%m-%d")
                                                                break
                                                            except ValueError:
                                                                continue
                                                    else:  # Date without year
                                                        month, day = date_match.groups()
                                                        # Use current year
                                                        year = datetime.now().year
                                                        date_str = f"{month} {day} {year}"
                                                        # Try to parse with both full and abbreviated month names
                                                        for fmt in ["%B %d %Y", "%b %d %Y"]:
                                                            try:
                                                                date_obj = datetime.strptime(date_str, fmt)
                                                                date_formatted = date_obj.strftime("%Y-%m-%d")
                                                                break
                                                            except ValueError:
                                                                continue
                                                except Exception as e:
                                                    print(f"Error parsing date: {e}")
                                                
                                                if date_formatted:
                                                    break
                                        
                                        if not date_formatted:
                                            # If no date could be parsed, use current date
                                            date_formatted = datetime.now().strftime("%Y-%m-%d")
                                    else:
                                        date_formatted = datetime.now().strftime("%Y-%m-%d")
                                except Exception:
                                    # If date extraction fails, use current date
                                    date_formatted = datetime.now().strftime("%Y-%m-%d")
                                
                                # Generate a filename with date, cost and trip ID
                                filename = f"{date_formatted}-{cost}USD-{trip_id}.pdf"
                                
                                download_path = os.path.join(self.download_dir, filename)
                                
                                # Create directory if it doesn't exist
                                os.makedirs(self.download_dir, exist_ok=True)
                                
                                # Save the file
                                await download.save_as(download_path)
                                print(f"Downloaded receipt to: {download_path}")
                                
                            except asyncio.TimeoutError:
                                print("Download didn't start within the timeout period")
                        
                        # Wait for a moment to ensure download completes or dialog closes
                        await asyncio.sleep(2)
                        
                        # Close the popup by clicking outside or on the X button
                        try:
                            # Try specific close button first
                            close_button_selectors = [
                                'button[aria-label="Close"]',
                                '.ReactModalPortal button',
                                ':text("×")'
                            ]
                            
                            for close_selector in close_button_selectors:
                                if await self.page.is_visible(close_selector, timeout=1000):
                                    await self.page.click(close_selector)
                                    print("Closed receipt popup dialog")
                                    break
                            else:
                                # If no close button found, try pressing Escape
                                await self.page.keyboard.press("Escape")
                                print("Closed receipt popup dialog with Escape key")
                        except Exception as e:
                            print(f"Error closing popup: {e} - continuing anyway")
                        
                        return download_path
                except Exception:
                    continue
                    
            if not pdf_button_found:
                # Try to find any download-related element with text
                try:
                    # Extract all text from the page
                    all_text = await self.page.evaluate('''
                        () => {
                            return document.body.innerText;
                        }
                    ''')
                    
                    if "download pdf" in all_text.lower():
                        # Try a more generic selector
                        print("Found 'Download PDF' text, trying generic selector")
                        selector = ':text("Download PDF")'
                        pdf_button_found = True
                    elif "download" in all_text.lower() and "pdf" in all_text.lower():
                        # Try clicking anywhere with download or PDF text
                        print("Found 'Download' and 'PDF' text separately, trying broader selector")
                        selector = ':text-matches("(?i)download|pdf")'
                        pdf_button_found = True
                except Exception as e:
                    print(f"Error searching for download text: {e}")
                    
            if not pdf_button_found:
                raise Exception("Could not find 'Download PDF' button")
            
        except Exception as e:
            print(f"Error downloading receipt for trip {trip_id}: {e}")
            return None

    async def download_multiple_receipts(self, trip_ids: Optional[List[str]] = None, start_date: Optional[datetime] = None, end_date: Optional[datetime] = None):
        """Download receipts for multiple trip IDs"""
        # Connect to browser if not already connected
        if not self.browser:
            await self.connect_to_browser()
        
        # If no trip IDs provided, fetch them
        if not trip_ids:
            print("No trip IDs provided, fetching from Uber...")
            trip_ids = await self.fetch_trip_ids(start_date, end_date)
            
            if not trip_ids:
                print("No trips found.")
                return []
        
        print(f"Starting download of {len(trip_ids)} receipts...")
        
        results = []
        for i, trip_id in enumerate(trip_ids):
            print(f"Downloading receipt {i+1}/{len(trip_ids)} - Trip ID: {trip_id}")
            result = await self.download_receipt(trip_id)
            results.append((trip_id, result))
            # Small delay between downloads to avoid rate limiting
            await asyncio.sleep(2)
        
        return results

    async def close(self):
        """Close browser connection and clean up resources"""
        try:
            if self.page:
                await self.page.close()
            if self.browser:
                await self.browser.close()
            if self.playwright:
                await self.playwright.stop()
        except Exception as e:
            print(f"Error during cleanup: {e}")

async def main():
    # Parse command line arguments
    parser = argparse.ArgumentParser(description='Download Uber receipts as PDFs')
    parser.add_argument('--trip-id', action='append', help='Specific trip ID(s) to download (can be used multiple times)')
    parser.add_argument('--days', type=int, default=90, help='Number of days back to fetch trips (default: 90)')
    parser.add_argument('--start-date', type=str, help='Start date for trip range (format: YYYY-MM-DD)')
    parser.add_argument('--end-date', type=str, help='End date for trip range (format: YYYY-MM-DD, defaults to today)')
    parser.add_argument('--output-dir', type=str, default=DOWNLOAD_DIR, help=f'Directory to save receipts (default: {DOWNLOAD_DIR})')
    parser.add_argument('--cdp-url', type=str, default=CDP_URL, help=f'Chrome DevTools Protocol URL (default: {CDP_URL})')
    parser.add_argument('--all', action='store_true', help='Download all available trips')
    parser.add_argument('--test', action='store_true', help='Test cost extraction on a specific trip ID')
    parser.add_argument('--test-trip-id', type=str, default="1003c9ae-bd1c-48eb-b751-e260c336f7fa", help='Trip ID to use for testing (default: a specific trip ID)')
    
    args = parser.parse_args()
    
    # Use arguments or defaults
    cdp_url = args.cdp_url
    download_dir = args.output_dir
    
    # Initialize downloader
    downloader = UberReceiptDownloader(cdp_url, download_dir)
    
    try:
        await downloader.connect_to_browser()
        
        # Special test mode
        if args.test:
            test_trip_id = args.test_trip_id
            print(f"Testing cost extraction on trip ID: {test_trip_id}")
            
            # Navigate to the trip page
            if not downloader.page:
                downloader.page = await downloader.context.new_page()
                
            trip_url = f"https://riders.uber.com/trips/{test_trip_id}"
            print(f"Navigating to {trip_url}")
            await downloader.page.goto(trip_url, wait_until='networkidle', timeout=30000)
            
            # Extract the cost
            cost = await downloader.extract_cost(downloader.page)
            print(f"Extracted cost: {cost}")
            
            return
        
        # Process date arguments for normal operation
        start_date = None
        end_date = datetime.now()
        
        if args.start_date:
            try:
                start_date = datetime.strptime(args.start_date, "%Y-%m-%d")
            except ValueError:
                print(f"Error: Invalid start date format. Please use YYYY-MM-DD. Using default.")
                start_date = datetime.now() - timedelta(days=args.days)
        else:
            start_date = datetime.now() - timedelta(days=args.days)
        
        if args.end_date:
            try:
                end_date = datetime.strptime(args.end_date, "%Y-%m-%d")
            except ValueError:
                print(f"Error: Invalid end date format. Please use YYYY-MM-DD. Using today.")
        
        # Determine which trip IDs to use
        trip_ids = args.trip_id if args.trip_id else TRIP_IDS
        
        if trip_ids:
            print(f"Using {len(trip_ids)} provided trip ID(s)")
            results = await downloader.download_multiple_receipts(trip_ids)
        elif args.all or TRIP_IDS is None:
            # Fetch trip IDs automatically
            print(f"Fetching trip IDs from {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
            
            results = await downloader.download_multiple_receipts(
                trip_ids=None, 
                start_date=start_date,
                end_date=end_date
            )
        else:
            print("No trip IDs provided and --all not specified. Please provide trip IDs or use --all.")
            return
        
        # Print results
        successful = [trip_id for trip_id, path in results if path]
        failed = [trip_id for trip_id, path in results if not path]
        
        print("\nDownload Results:")
        print(f"Successfully downloaded: {len(successful)}/{len(results)}")
        
        if failed:
            print(f"Failed downloads: {len(failed)}")
            for trip_id in failed:
                print(f"  - {trip_id}")
        
        if successful:
            print(f"\nReceipts saved to: {download_dir}")
            
    except Exception as e:
        print(f"Error: {e}")
    finally:
        await downloader.close()

if __name__ == "__main__":
    asyncio.run(main())