Claude Integration
How to connect the remote cloud-os instance to your code and configure the anthropic claude.
Basic Setup
Main.py
import asyncio
import os
import json
import base64
from io import BytesIO
from PIL import Image
from IPython.display import display
from typing import Any
from datetime import datetime
# Import relevant ToyStack libraries
from toystack import ToyStackClient
from toystack.tools import BashTool, ComputerTool, EditTool, ToolResult
# Initialize ToyStack Client
toystack_client = ToyStackClient(api_key="your_toystack_api_key")
instance = toystack_client.start_instance(instance_type="medium")
# Define Anthropic Client if still using Claude (Optional)
from anthropic import Anthropic
anthropic_client = Anthropic(api_key="your_claude_api_key")
# System prompt adapted for ToyStack
SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
* You are utilizing an Ubuntu virtual machine managed via ToyStack with internet access.
* You can install Ubuntu applications using the bash tool. Use curl instead of wget.
* Firefox (firefox-esr) is pre-installed. To open it, click the Firefox icon.
* GUI applications can be started using the bash tool by setting export DISPLAY=:1 and using a subshell, e.g., "(DISPLAY=:1 xterm &)". GUI apps will appear in the desktop environment but may take some time. Use screenshots to confirm their state.
* For commands generating extensive text outputs, redirect the output to a tmp file and utilize grep or other tools to parse it.
* When browsing a page, either zoom out to view the entire content or scroll through it thoroughly before deciding that something isn't present.
* Chain multiple function calls where possible to optimize execution.
* The current date is {datetime.today().strftime('%A, %B %-d, %Y')}.
</SYSTEM_CAPABILITY>
<IMPORTANT>
* When using Firefox, if a startup wizard appears, IGNORE IT. Do not click "skip this step." Instead, navigate to the address bar, enter the URL or search term directly.
* If you encounter a PDF and need to extract text, determine its URL, use curl to download it, install pdftotext, and convert it to a text file for easier reading.
</IMPORTANT>"""
# Example usage of ToyStack tools
bash_tool = BashTool(instance=instance)
computer_tool = ComputerTool(instance=instance)
edit_tool = EditTool(instance=instance)
# Example function to execute a bash command using ToyStack's BashTool
async def run_bash_command(command: str):
result = await bash_tool.run(command)
print("Command Output:", result)
# Example usage
async def main():
await run_bash_command("echo 'Hello from ToyStack!'")
# Add more commands or tool usage as needed
# Run the script
asyncio.run(main())
Define Collection Tool
Main.py
from typing import Any, cast
# Import ToyStack tools
from toystack.tools import BashTool, ComputerTool, EditTool, ToolResult
class ToolCollection:
"""A collection of ToyStack-defined tools."""
def __init__(self, *tools):
self.tools = tools
self.tool_map = {tool.to_params()["name"]: tool for tool in tools}
def to_params(self) -> list:
"""Convert tools to their parameter representations."""
return [tool.to_params() for tool in self.tools]
async def run(self, *, name: str, tool_input: dict[str, Any]) -> ToolResult:
"""Run a tool by name with the provided input."""
tool = self.tool_map.get(name)
if not tool:
return None
try:
r = await tool(**tool_input)
return r
except Exception as e:
print(f"Error running tool {name}: {e}")
return None
def _make_api_tool_result(result: ToolResult, tool_use_id: str) -> dict:
"""Convert a ToolResult into the API's expected format."""
tool_result_content: list[dict | str] = [] # Changed this line
is_error = False
if result.error:
is_error = True
tool_result_content = result.error
else:
if result.output:
tool_result_content.append({
"type": "text",
"text": result.output,
})
if result.base64_image:
tool_result_content.append({
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": result.base64_image,
},
})
return {
"type": "tool_result",
"content": tool_result_content,
"tool_use_id": tool_use_id,
"is_error": is_error,
}
def _response_to_params(response):
"""Convert a response to the API's parameter format."""
res = []
for block in response.content:
if block["type"] == "text":
res.append({"type": "text", "text": block["text"]})
else:
res.append(block)
return res
def _maybe_filter_to_n_most_recent_images(
messages: list[dict],
images_to_keep: int,
min_removal_threshold: int,
):
"""Filter messages to retain only the most recent images."""
if images_to_keep is None:
return messages
tool_result_blocks = cast(
list[dict],
[
item
for message in messages
for item in (
message["content"] if isinstance(message["content"], list) else []
)
if isinstance(item, dict) and item.get("type") == "tool_result"
],
)
total_images = sum(
1
for tool_result in tool_result_blocks
for content in tool_result.get("content", [])
if isinstance(content, dict) and content.get("type") == "image"
)
images_to_remove = total_images - images_to_keep
images_to_remove -= images_to_remove % min_removal_threshold
for tool_result in tool_result_blocks:
if isinstance(tool_result.get("content"), list):
new_content = []
for content in tool_result.get("content", []):
if isinstance(content, dict) and content.get("type") == "image":
if images_to_remove > 0:
images_to_remove -= 1
continue
new_content.append(content)
tool_result["content"] = new_content
Sampling Loop
Main.py
from PIL import Image
from io import BytesIO
import base64
from typing import Any, cast
from toystack.tools import BashTool, ComputerTool, EditTool, ToolResult
from toystack import ToyStackClient
# Function to display a base64 image
def display_base64_image(base64_string, max_size=(800, 800)):
image_data = base64.b64decode(base64_string)
image = Image.open(BytesIO(image_data))
# Resize if larger than max_size while maintaining aspect ratio
if image.size[0] > max_size[0] or image.size[1] > max_size[1]:
image.thumbnail(max_size, Image.Resampling.LANCZOS)
display(image)
# Initialize ToyStack Client
toystack_client = ToyStackClient(api_key="your_toystack_api_key")
instance = toystack_client.start_instance(instance_type="medium")
async def sampling_loop(command: str):
"""
Run the sampling loop for a single command until completion.
"""
messages: list[dict] = []
tool_collection = ToolCollection(
ComputerTool(instance),
BashTool(instance),
EditTool(instance),
)
# Add initial command to messages
messages.append({
"role": "user",
"content": [{"type": "text", "text": command}],
})
while True:
_maybe_filter_to_n_most_recent_images(messages, 2, 2)
# Simulate getting ToyStack's response (if there is an API interaction similar to anthropic_client)
response = {
# Replace with actual ToyStack response call if available
"content": [
{"type": "text", "text": "Simulated response from ToyStack"}
]
}
# Convert response to params
response_params = _response_to_params(response)
# Process response content and handle tools before adding to messages
tool_result_content = []
for content_block in response_params:
if content_block["type"] == "text":
print(f"\nAssistant: {content_block['text']}")
elif content_block["type"] == "tool_use":
print(f"\nTool Use: {content_block['name']}")
print(f"Input: {content_block['input']}")
# Execute the tool
result = await tool_collection.run(
name=content_block["name"],
tool_input=cast(dict[str, Any], content_block["input"])
)
print(f"Result: {result}")
if content_block['name'] == 'bash' and not result:
result = await tool_collection.run(
name="computer",
tool_input={"action": "screenshot"}
)
print("Updated result: ", result)
if result:
print("Converting tool result: ", result)
tool_result = _make_api_tool_result(result, content_block["id"])
print(f"Tool Result: {tool_result}")
if result.output:
print(f"\nTool Output: {result.output}")
if result.error:
print(f"\nTool Error: {result.error}")
if result.base64_image:
print("\nTool generated an image (base64 data available)")
display_base64_image(result.base64_image)
tool_result_content.append(tool_result)
print("\n---")
# Add assistant's response to messages
messages.append({
"role": "assistant",
"content": response_params,
})
# If tools were used, add their results to messages
if tool_result_content:
messages.append({
"role": "user",
"content": tool_result_content
})
else:
# No tools used, task is complete
break
Execute
# Example commands using ToyStack
# command = "Open a spreadsheet and enter dummy data into it. Do this using the bash tool (launch LibreOffice Calc from bash using the DISPLAY=:1 setting)."
# command = "Open Firefox using the bash tool (launch from bash using the DISPLAY=:1 setting) and search Y Combinator."
# command = "The Y Combinator search results page is currently open in the browser. Take a look, go into the Y Combinator website, and start an application."
# command = "Firefox is currently open in the browser. Do some research on Chamath Palihapitiya."
# command = "Create a new file called temp.py on the desktop and write some code in it (FastAPI server code). Create the file using mouse right-click, open it in text editor using right-click, and write there."
command = "A text editor is currently open on the screen. Write some code in it (FastAPI server code)."
# Importing necessary modules
from toystack.tools import BashTool, ComputerTool, EditTool
from toystack import ToyStackClient
# Initialize ToyStack Client
toystack_client = ToyStackClient(api_key="your_toystack_api_key")
instance = toystack_client.start_instance(instance_type="cloud-os")
async def sampling_loop(command: str):
"""
Run the sampling loop for a single command until completion using ToyStack.
"""
messages: list[dict] = []
tool_collection = ToolCollection(
ComputerTool(instance),
BashTool(instance),
EditTool(instance),
)
# Add initial command to messages
messages.append({
"role": "user",
"content": [{"type": "text", "text": command}],
})
while True:
_maybe_filter_to_n_most_recent_images(messages, 2, 2)
# Simulate getting ToyStack's response (if there is an API interaction similar to anthropic_client)
response = {
# Replace with actual ToyStack response call if available
"content": [
{"type": "text", "text": "Simulated response from ToyStack"}
]
}
# Convert response to params
response_params = _response_to_params(response)
# Process response content and handle tools before adding to messages
tool_result_content = []
for content_block in response_params:
if content_block["type"] == "text":
print(f"\nAssistant: {content_block['text']}")
elif content_block["type"] == "tool_use":
print(f"\nTool Use: {content_block['name']}")
print(f"Input: {content_block['input']}")
# Execute the tool
result = await tool_collection.run(
name=content_block["name"],
tool_input=cast(dict[str, Any], content_block["input"])
)
print(f"Result: {result}")
if content_block['name'] == 'bash' and not result:
result = await tool_collection.run(
name="computer",
tool_input={"action": "screenshot"}
)
print("Updated result: ", result)
if result:
print("Converting tool result: ", result)
tool_result = _make_api_tool_result(result, content_block["id"])
print(f"Tool Result: {tool_result}")
if result.output:
print(f"\nTool Output: {result.output}")
if result.error:
print(f"\nTool Error: {result.error}")
if result.base64_image:
print("\nTool generated an image (base64 data available)")
display_base64_image(result.base64_image)
tool_result_content.append(tool_result)
print("\n---")
# Add assistant's response to messages
messages.append({
"role": "assistant",
"content": response_params,
})
# If tools were used, add their results to messages
if tool_result_content:
messages.append({
"role": "user",
"content": tool_result_content
})
else:
# No tools used, task is complete
break
# Run the sampling loop for this command
await sampling_loop(command)
Last updated