Tinyletter was shut down unbeknownst to me earlier in 2024. I used TinyLetter to chronicle my nearly-a-year at the geographic South Pole. Now that I’ve left social media and am no longer reading the news, I’d like to keep in touch with folks more via email so I thought to look up my old subscribers list. I can’t though; it’s gone. So I used Anthropic’s claude.ai to help me search my email and automatically generate a list, at least from the active participants in my newsletter (those that replied). I then went through and extracted all the old newsletters, for safekeeping.
Luckily, since I had a footer including a defunct postal box unique to the newsletter, creating a search on that address uniquely returned these emails. And equally luckily, I ran a hugely successful postcard campaign where I sent hundreds of postcards from the South Pole to those who replied with their addresses. With a few iterations to the script, I had an excel exported with 278 unique subscribers.
Now, anyone could reproduce this, you’d probably need a small bit of fluency with programming, or at least a lot of grit to get through an error or two. I’ll share the full process in case it’s useful, and also to marvel at the power of AI to help with a task like this quickly, making minutes work of something that otherwise would have amounted to a significant project.
I’m so delighted to get in contact with a bunch of old friends and acquaintances! Thanks Claude =).
My prompts were:
a. I’d like to take all senders of emails matching a search string in google and add them to a spreadsheet
b. thanks that worked. I’d like to deduplicate the results, and retrieve all the results not just the top 100
And the instructions and code returned were:
- Set up Google API credentials:
- Go to the Google Cloud Console
- Create a new project
- Enable the Gmail API
- Create OAuth 2.0 credentials
- Download the credentials and save as ‘credentials.json’ in the same directory as the script
- Install required packages:
pip install google-auth-oauthlib google-api-python-client pandas openpyxl
- Run the script and enter your search query when prompted.
The code was:
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.discovery import build
import pickle
import os.path
import pandas as pd
def get_gmail_service():
"""Sets up Gmail API service."""
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
creds = None
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
return build('gmail', 'v1', credentials=creds)
def get_email_senders(search_query):
"""Retrieves all email senders matching the search query."""
service = get_gmail_service()
messages = []
next_page_token = None
# Keep getting pages until we've retrieved all messages
while True:
results = service.users().messages().list(
userId='me',
q=search_query,
pageToken=next_page_token,
maxResults=500 # Maximum allowed by Gmail API
).execute()
if 'messages' in results:
messages.extend(results['messages'])
next_page_token = results.get('nextPageToken')
if not next_page_token:
break
print(f'Found {len(messages)} matching messages')
senders = []
for message in messages:
msg = service.users().messages().get(
userId='me',
id=message['id'],
format='metadata',
metadataHeaders=['From']
).execute()
# Extract sender from headers
headers = msg['payload']['headers']
sender = next(
(header['value'] for header in headers if header['name'] == 'From'),
'Unknown Sender'
)
senders.append(sender)
return senders
def save_to_spreadsheet(senders, output_file='email_senders.xlsx'):
"""Saves unique senders to Excel spreadsheet."""
# Convert to DataFrame and remove duplicates
df = pd.DataFrame(senders, columns=['Sender'])
# Split sender into name and email address
df[['Name', 'Email']] = df['Sender'].apply(lambda x:
pd.Series([
x.split('<')[0].strip(' "\'') if '<' in x else '',
x.split('<')[1].strip('>') if '<' in x else x
])
)
# Remove duplicates based on email address
df_unique = df.drop_duplicates(subset=['Email'])
# Sort by email address
df_unique = df_unique.sort_values('Email')
# Save to Excel
df_unique.to_excel(output_file, index=False)
print(f'Found {len(df)} total senders')
print(f'Saved {len(df_unique)} unique senders to {output_file}')
def main():
search_query = input('Enter Gmail search query: ')
senders = get_email_senders(search_query)
save_to_spreadsheet(senders)
if __name__ == '__main__':
main()
Code to retrieve newsletter content, along side date and newsletter title.
Later, I used similar prompts to generate code to retrieve the content of the newsletters. It took more iterations of prompting but here was the code:
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from googleapiclient.discovery import build
import pickle
import os.path
import pandas as pd
def get_gmail_service():
"""Sets up Gmail API service."""
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
creds = None
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server(port=0)
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
return build('gmail', 'v1', credentials=creds)
def get_email_senders(search_query):
"""Retrieves all email senders matching the search query."""
service = get_gmail_service()
messages = []
next_page_token = None
# Keep getting pages until we've retrieved all messages
while True:
results = service.users().messages().list(
userId='me',
q=search_query,
pageToken=next_page_token,
maxResults=500 # Maximum allowed by Gmail API
).execute()
if 'messages' in results:
messages.extend(results['messages'])
next_page_token = results.get('nextPageToken')
if not next_page_token:
break
print(f'Found {len(messages)} matching messages')
senders = []
for message in messages:
msg = service.users().messages().get(
userId='me',
id=message['id'],
format='metadata',
metadataHeaders=['From']
).execute()
# Extract sender from headers
headers = msg['payload']['headers']
sender = next(
(header['value'] for header in headers if header['name'] == 'From'),
'Unknown Sender'
)
senders.append(sender)
return senders
def save_to_spreadsheet(senders, output_file='email_senders.xlsx'):
"""Saves unique senders to Excel spreadsheet."""
# Convert to DataFrame and remove duplicates
df = pd.DataFrame(senders, columns=['Sender'])
# Split sender into name and email address
df[['Name', 'Email']] = df['Sender'].apply(lambda x:
pd.Series([
x.split('<')[0].strip(' "\'') if '<' in x else '',
x.split('<')[1].strip('>') if '<' in x else x
])
)
# Remove duplicates based on email address
df_unique = df.drop_duplicates(subset=['Email'])
# Sort by email address
df_unique = df_unique.sort_values('Email')
# Save to Excel
df_unique.to_excel(output_file, index=False)
print(f'Found {len(df)} total senders')
print(f'Saved {len(df_unique)} unique senders to {output_file}')
def main():
#search_query = input('Enter Gmail search query: ')
search_query = "8605 Santa Monica Blvd #22500 Los Angeles, CA 90069 USA"
senders = get_email_senders(search_query)
save_to_spreadsheet(senders)
if __name__ == '__main__':
main()