Speckle and ChatGPT - chat with your 3d model

oscarazzus · 4 November 2024 13:47

Hello everyone,

I’m writing to share a Python code I’m testing to create a page that integrates all my Speckle streams and, at the same time, allows me to interact with the branches through an OpenAI ChatGPT chat to query branch data.

You can find the Python code below, but to make it work, you’ll need some libraries and an OpenAI account with spendable tokens for the chat.

The necessary libraries are as follows:

streamlit
specklepy
pandas
pandasai

The problem I’m currently encountering is that, although the commit data seems to be correctly sent to OpenAI and the chat indicates that the data is interpretable, during the conversation, the chat says that the data is actually unavailable.

Can anyone help me correct this part so that, when the button is clicked to send the commit data to OpenAI, the data is actually used by the chat?

Thanks

import streamlit as st
from specklepy.api.client import SpeckleClient
from specklepy.api import operations
from specklepy.transports.server import ServerTransport
from specklepy.objects.base import Base
from specklepy.serialization.base_object_serializer import BaseObjectSerializer
import json
import io
import openai

# Set OpenAI API key and organization
openai.organization = "XXXXXXXX"
openai.api_key = "XXXXXX"

# Dark theme styling
st.markdown(
    """
    <style>
    body {
        background-color: #1e1e1e;
        color: #d3d3f3;
    }
    .stApp {
        background-color: #1e1e1e;
    }
    .stTextInput, .stButton, .stSelectbox, .stDownloadButton, .stMarkdown, .stInfo {
        color: #d3d3f3 !important;
        background-color: #333333 !important;
    }
    .stAlert p {
        color: #d3d3f3 !important;
    }
    h1, h2, h3, h4, h5, h6, p, div {
        color: #d3d3f3;
    }
    </style>
    """,
    unsafe_allow_html=True
)

# Function to retrieve all streams associated with the account
def get_all_streams(token):
    client = SpeckleClient(host="https://app.speckle.systems")
    client.authenticate_with_token(token)
    streams = client.stream.list()
    return [{"name": stream.name, "id": stream.id} for stream in streams]

# Function to retrieve all branches of a stream
def get_all_branches(client, stream_id):
    branches = client.branch.list(stream_id)
    return [{"name": branch.name, "id": branch.id} for branch in branches]

# Recursive serialization function using BaseObjectSerializer
def serialize_data(data):
    serializer = BaseObjectSerializer()
    
    if isinstance(data, Base):
        _, obj_dict = serializer.traverse_base(data)
        return obj_dict
    elif isinstance(data, list):
        return [serialize_data(item) for item in data]  # Recursively serialize list items
    elif hasattr(data, 'items'):
        return {k: serialize_data(v) for k, v in data.items()}
    elif hasattr(data, '__iter__') and not isinstance(data, str):  # Handle iterable Collections
        return [serialize_data(item) for item in data]
    else:
        return data  # Return the data if it’s already JSON serializable

# Function to get commit data as JSON, ensuring all objects are serialized
def get_commit_data_as_json(client, stream_id, branch_name):
    try:
        transport = ServerTransport(client=client, stream_id=stream_id)
        branch = client.branch.get(stream_id, branch_name)
        
        if branch.commits.totalCount == 0:
            st.warning(f"The branch '{branch_name}' has no commits.")
            return None
        
        commit_id = branch.commits.items[0].referencedObject
        commit_data = operations.receive(commit_id, transport)
        
        # Serialize commit data to JSON-compatible format
        commit_data_serialized = serialize_data(commit_data)
        
        return commit_data_serialized
    except Exception as e:
        st.error(f"Error loading commit data for branch '{branch_name}': {e}")
        return None

# Function to reduce JSON size by extracting only key data points
def reduce_json_data(data, max_keys=50):
    if isinstance(data, dict):
        return {k: reduce_json_data(v, max_keys) for i, (k, v) in enumerate(data.items()) if i < max_keys}
    elif isinstance(data, list):
        return [reduce_json_data(v, max_keys) for v in data[:max_keys]]
    else:
        return data

# Function to stream OpenAI Chat Completion response
def stream_openai_response(messages):
    response_container = st.empty()
    full_response = ""
    stream = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=messages,
        stream=True,
    )

    for chunk in stream:
        if chunk.choices[0].delta.get("content") is not None:
            content = chunk.choices[0].delta["content"]
            full_response += content
            response_container.markdown(full_response)
    return full_response

# Chat history and initial JSON context
if "chat_history" not in st.session_state:
    st.session_state["chat_history"] = []
if "initial_context" not in st.session_state:
    st.session_state["initial_context"] = None

# Speckle token and client initialization
speckle_token = "XXXXXXXXXXX"
try:
    client = SpeckleClient(host="https://app.speckle.systems")
    client.authenticate_with_token(speckle_token)
    streams = get_all_streams(speckle_token)

    # Sidebar selection for streams and branches
    st.sidebar.title("Streams and Branches")
    selected_stream = st.sidebar.selectbox("Select a Stream", streams, format_func=lambda x: x['name'])
    st.session_state["selected_stream"] = selected_stream
    branches = get_all_branches(client, selected_stream['id']) if selected_stream else None
    
    if branches:
        selected_branch = st.sidebar.selectbox("Select a Branch", branches, format_func=lambda x: x['name'])
        st.session_state["selected_branch"] = selected_branch

    if st.session_state["selected_stream"] and st.session_state["selected_branch"]:
        st.write(f"**Stream Name**: {st.session_state['selected_stream']['name']}")
        st.write(f"**Stream ID**: {st.session_state['selected_stream']['id']}")
        st.write(f"**Branch**: {st.session_state['selected_branch']['name']}")

        # 3D Viewer iframe
        viewer_url = f"https://app.speckle.systems/embed?stream={st.session_state['selected_stream']['id']}&branch={st.session_state['selected_branch']['name']}&c=%7B%7D"
        st.markdown(f'<iframe title="Speckle 3D Viewer" src="{viewer_url}" width="100%" height="500" frameborder="0"></iframe>', unsafe_allow_html=True)

        # Fetch commit data
        commit_data = get_commit_data_as_json(client, st.session_state['selected_stream']['id'], st.session_state['selected_branch']['name'])
        if commit_data:
            # Prepare JSON for download
            reduced_data = reduce_json_data(commit_data)
            commit_data_json = json.dumps(reduced_data, indent=4)
            json_bytes = io.StringIO(commit_data_json).getvalue().encode()
            json_filename = f"{st.session_state['selected_stream']['name']}_{st.session_state['selected_branch']['name']}_data.json"

            # Download button outside form
            st.download_button(
                label="Download Commit JSON",
                data=json_bytes,
                file_name=json_filename,
                mime="application/json"
            )

            # Button to send commit data context to OpenAI in the background
            if st.button("Send Data to Chat"):
                st.session_state["initial_context"] = commit_data_json
                st.session_state["chat_history"].append({
                    "message": "Here is the full commit data for your reference.",
                    "is_user": False
                })
                st.write("Data sent to chat context. You can now ask questions about it.")

            # OpenAI Chat for Q&A
            st.subheader("Ask OpenAI about this data")

            with st.form(key="chat_form", clear_on_submit=True):
                user_input = st.text_input("Ask a question about the commit data")
                submit_button = st.form_submit_button("Send")

            if submit_button and user_input:
                # Add the user question to chat history
                st.session_state["chat_history"].append({"message": user_input, "is_user": True})

                # Prepare OpenAI messages with full chat history and initial context
                def prepare_openai_messages(chat_history, initial_context, user_input):
                    messages = [{"role": "system", "content": "You are an assistant that answers questions based on JSON data."}]
                    if len(chat_history) == 1:
                        messages.append({"role": "user", "content": f"Here is the data context: {initial_context}"} )
                    for entry in chat_history:
                        role = "user" if entry["is_user"] else "assistant"
                        messages.append({"role": role, "content": entry["message"]})
                    messages.append({"role": "user", "content": user_input})
                    return messages

                messages = prepare_openai_messages(st.session_state["chat_history"], st.session_state["initial_context"], user_input)
                response = stream_openai_response(messages)
                st.session_state["chat_history"].append({"message": response, "is_user": False})

            # Display chat history
            for chat in st.session_state["chat_history"]:
                if chat["is_user"]:
                    st.write(f"**User**: {chat['message']}")
                else:
                    st.write(f"**Assistant**: {chat['message']}")

except Exception as e:
    st.error(f"Error retrieving streams or branches: {e}")

Nikos · 4 November 2024 15:30

Hey @oscarazzus,

This looks really cool!

If your first question is “can you describe the data?”

What will be the answer?

If the data exist in the first question and then it’s getting lost in the second, that means that the memory (“chat_history”) is not properly set for OpenAI.

Another thing to have in mind is, that the commit data in JSON format is very very big. I see that you have some reduce JSON function, but just in case you should check how many tokens it generates. Using the GPT-3-turbo you might easily hit the token limit when you append the history.

jonathon · 4 November 2024 16:07

As @Nikos says you will quickly expend tokens with large data sets and rather than stripping to the first 50 props you may want to be specific of the data types you don’t want to include.

Some of this will depend on the source of data and what sort of questions you wish to handle.

The largest set of resolved data deserialized by specklepy will be geometries (Mesh, Lines etc) mostly stored in displayValue prop. but could also be baselines, profiles and so on. Could be you get rid of any geomtry object this and save a huge bunch of data.

oscarazzus · 4 November 2024 16:51

Hi @Nikos thanks for your message.
One question, for exemple, can be to count all the elements of a specific category, or to count all the elements with a specific value in a specific parameter.
The ideal thing should be to be able to interact with the viewer for exemple higlighting elements in the viewer as complementar output of the chatbot text.

Concerning the json i’m not able to send directly the serialization of the branch, how can i send better the information to openai directly using an api query or graphql query?

For @jonathon , 1 million words costs 1.25 USD, it is true can be very expensive, so i’m checking for a local llm use, like ollama! Do you have any other idea on opensource local or cloud llm?

oscarazzus · 4 November 2024 17:56

this version works
The only limitation right now is the size of the data commit, with more than 20mb the page crash.
Do you have an idea on how to reduce the size or on how prefilter data ?
Enjoy the scritp by the way

import streamlit as st
from specklepy.api.client import SpeckleClient
from specklepy.api import operations
from specklepy.transports.server import ServerTransport
from specklepy.objects.base import Base
from specklepy.serialization.base_object_serializer import BaseObjectSerializer
import json
import io
import openai

# Set OpenAI API key and organization
openai.organization = "XXXX"
openai.api_key = "XXXX"

# Dark theme styling
st.markdown(
    """
    <style>
    body { background-color: #1e1e1e; color: #d3d3f3; }
    .stApp { background-color: #1e1e1e; }
    .stTextInput, .stButton, .stSelectbox, .stMarkdown, .stInfo {
        color: #d3d3f3 !important;
        background-color: #333333 !important;
    }
    h1, h2, h3, h4, h5, h6, p, div { color: #d3d3f3; }
    </style>
    """,
    unsafe_allow_html=True
)

# Function to retrieve all streams associated with the account
def get_all_streams(token):
    client = SpeckleClient(host="https://app.speckle.systems")
    client.authenticate_with_token(token)
    streams = client.stream.list()
    return [{"name": stream.name, "id": stream.id} for stream in streams]

# Function to retrieve all branches of a stream
def get_all_branches(client, stream_id):
    branches = client.branch.list(stream_id)
    return [{"name": branch.name, "id": branch.id} for branch in branches]

# Recursive serialization function using BaseObjectSerializer
def serialize_data(data):
    serializer = BaseObjectSerializer()
    if isinstance(data, Base):
        _, obj_dict = serializer.traverse_base(data)
        return obj_dict
    elif isinstance(data, list):
        return [serialize_data(item) for item in data]
    elif hasattr(data, 'items'):
        return {k: serialize_data(v) for k, v in data.items()}
    elif hasattr(data, '__iter__') and not isinstance(data, str):
        return [serialize_data(item) for item in data]
    return data

# Function to get commit data as JSON, ensuring all objects are serialized
def get_commit_data_as_json(client, stream_id, branch_name):
    try:
        transport = ServerTransport(client=client, stream_id=stream_id)
        branch = client.branch.get(stream_id, branch_name)
        
        if branch.commits.totalCount == 0:
            st.warning(f"No commits found in branch '{branch_name}'.")
            return None
        
        commit_id = branch.commits.items[0].referencedObject
        commit_data = operations.receive(commit_id, transport)
        return serialize_data(commit_data)
    except Exception as e:
        st.error(f"Error loading commit data: {e}")
        return None

# Main app logic
speckle_token = "xxxxxx"
client = SpeckleClient(host="https://app.speckle.systems")
client.authenticate_with_token(speckle_token)
streams = get_all_streams(speckle_token)

# Sidebar for stream and branch selection
st.sidebar.title("Streams and Branches")
selected_stream = st.sidebar.selectbox("Select a Stream", streams, format_func=lambda x: x['name'])
branches = get_all_branches(client, selected_stream['id']) if selected_stream else None

# Branch selection and data fetching
if branches:
    selected_branch = st.sidebar.selectbox("Select a Branch", branches, format_func=lambda x: x['name'])
    
    # Display stream and branch details
    st.write(f"**Stream Name**: {selected_stream['name']}")
    st.write(f"**Stream ID**: {selected_stream['id']}")
    st.write(f"**Branch**: {selected_branch['name']}")

    # Embed Speckle 3D Viewer
    viewer_url = f"https://app.speckle.systems/embed?stream={selected_stream['id']}&branch={selected_branch['name']}&c=%7B%7D"
    st.markdown(f'<iframe title="Speckle 3D Viewer" src="{viewer_url}" width="100%" height="500" frameborder="0"></iframe>', unsafe_allow_html=True)

    # Retrieve commit data from Speckle
    commit_data = get_commit_data_as_json(client, selected_stream['id'], selected_branch['name'])
    if commit_data:
        commit_data_json = json.dumps(commit_data, indent=4)
        
        # Display commit data as collapsible JSON
        with st.expander("View Commit Data JSON"):
            st.json(commit_data)

        # Question submission form
        st.subheader("Ask GPT-4 about this data")
        user_question = st.text_input("Your question about the commit data:")

        # Submit question and get response
        if st.button("Submit"):
            # Prepare OpenAI API messages
            messages = [
                {"role": "system", "content": "You are a helpful assistant with JSON data expertise."},
                {"role": "user", "content": f"Here is the JSON data: {commit_data_json}"},
                {"role": "user", "content": user_question}
            ]
            
            # Get response from OpenAI
            try:
                response = openai.ChatCompletion.create(model="gpt-4-turbo", messages=messages)
                st.write("**GPT-4 Response:**")
                st.write(response.choices[0].message["content"])
            except Exception as e:
                st.error(f"Error with OpenAI API: {e}")

oscarazzus · 4 November 2024 19:58

And BOOM

you can add all the filters you need to remove parts of your json!! It works really fast now!!

import streamlit as st
from specklepy.api.client import SpeckleClient
from specklepy.api import operations
from specklepy.transports.server import ServerTransport
from specklepy.objects.base import Base
from specklepy.serialization.base_object_serializer import BaseObjectSerializer
import json
import openai

# Set OpenAI API key and organization (ensure secure handling in production)
openai.organization = "XXXXXX"
openai.api_key = "XXXXXX"

# Dark theme styling
st.markdown("""
    <style>
    body { background-color: #1e1e1e; color: #d3d3f3; }
    .stApp { background-color: #1e1e1e; }
    .stTextInput, .stButton, .stSelectbox, .stMarkdown, .stInfo {
        color: #d3d3f3 !important;
        background-color: #333333 !important;
    }
    h1, h2, h3, h4, h5, h6, p, div { color: #d3d3f3; }
    </style>
    """, unsafe_allow_html=True
)

# Function to get all streams associated with the account
def get_all_streams(token):
    client = SpeckleClient(host="https://app.speckle.systems/")
    client.authenticate_with_token(token)
    streams = client.stream.list()
    return [{"name": stream.name, "id": stream.id} for stream in streams]

# Function to get all branches of a stream
def get_all_branches(client, stream_id):
    branches = client.branch.list(stream_id)
    return [{"name": branch.name, "id": branch.id} for branch in branches]

# Recursive serialization function to explore all commit data
def serialize_data(data):
    serializer = BaseObjectSerializer()
    if isinstance(data, Base):
        _, obj_dict = serializer.traverse_base(data)
        return obj_dict
    elif isinstance(data, list):
        return [serialize_data(item) for item in data]
    elif hasattr(data, 'items'):
        return {k: serialize_data(v) for k, v in data.items()}
    elif hasattr(data, '__iter__') and not isinstance(data, str):
        return [serialize_data(item) for item in data]
    return data

# Function to filter JSON data to include only relevant fields and exclude specific fields (globally and nested)
def filter_relevant_data(data, keys_of_interest, keys_to_exclude):
    if isinstance(data, dict):
        return {
            k: filter_relevant_data(v, keys_of_interest, keys_to_exclude)
            for k, v in data.items()
            if (k in keys_of_interest or isinstance(v, (dict, list))) and k not in keys_to_exclude
        }
    elif isinstance(data, list):
        return [filter_relevant_data(item, keys_of_interest, keys_to_exclude) for item in data]
    return data

# Function to get and serialize commit data, including only relevant fields and excluding specified keys at all levels
def get_commit_data_as_json(client, stream_id, branch_name):
    try:
        transport = ServerTransport(client=client, stream_id=stream_id)
        branch = client.branch.get(stream_id, branch_name)
        
        if branch.commits.totalCount == 0:
            st.warning(f"No commits found in branch '{branch_name}'.")
            return None
        
        commit_id = branch.commits.items[0].referencedObject
        commit_data = operations.receive(commit_id, transport)
        
        # Serialize commit data
        serialized_data = serialize_data(commit_data)
        
        # Define keys to include globally and keys to exclude at any level
        keys_of_interest = ["id", "name", "type", "location", "volume", "material", "level", "status"]
        keys_to_exclude = [
            "basePoint", "displayValue", "parameters", "outline", "voids",
            "ConnectedFrom", "ConnectedTo", "Decomposes", "FillsVoids", 
            "HasAssignments", "HasAssociations", "HasControlElements", 
            "HasCoverings", "HasOpenings", "HasPorts", "HasProjections", 
            "HasStructuralMember", "IsConnectionRealization", "IsDecomposedBy", 
            "ProvidesBoundaries", "Quantities", "ReferencedBy", "ReferencedInStructures"
        ]
        
        # Filter data to include only relevant fields and exclude unwanted ones globally and nested
        filtered_commit_data = filter_relevant_data(serialized_data, keys_of_interest, keys_to_exclude)
        
        return filtered_commit_data
    except Exception as e:
        st.error(f"Error loading commit data: {e}")
        return None

# Main app logic
speckle_token = "XXXXXX"  # Replace with a secure method in production
client = SpeckleClient(host="https://app.speckle.systems/")
client.authenticate_with_token(speckle_token)
streams = get_all_streams(speckle_token)

# Sidebar for selecting stream and branch
st.sidebar.title("Select Stream and Branch")
selected_stream = st.sidebar.selectbox("Select a Stream", streams, format_func=lambda x: x['name'])
branches = get_all_branches(client, selected_stream['id']) if selected_stream else None

# Branch selection and data retrieval
if branches:
    selected_branch = st.sidebar.selectbox("Select a Branch", branches, format_func=lambda x: x['name'])
    
    # Display stream and branch details
    st.write(f"**Stream Name**: {selected_stream['name']}")
    st.write(f"**Stream ID**: {selected_stream['id']}")
    st.write(f"**Branch**: {selected_branch['name']}")

    # Embed Speckle 3D viewer for the selected branch
    viewer_url = f"https://app.speckle.systems//embed?stream={selected_stream['id']}&branch={selected_branch['name']}&c=%7B%7D"
    st.markdown(f'<iframe title="Speckle 3D Viewer" src="{viewer_url}" width="100%" height="500" frameborder="0"></iframe>', unsafe_allow_html=True)

    # Retrieve and filter commit data from Speckle
    commit_data = get_commit_data_as_json(client, selected_stream['id'], selected_branch['name'])
    if commit_data:
        commit_data_json = json.dumps(commit_data, indent=4)
        
        # Display commit data as expandable JSON
        with st.expander("View Commit Data JSON"):
            st.json(commit_data)

        # User question input for GPT-4 based Q&A on JSON data
        st.subheader("Ask GPT-4 About the Commit Data")
        user_question = st.text_input("Enter your question about the commit data:")

        # Send question to OpenAI and display the answer
        if st.button("Ask"):
            messages = [
                {"role": "system", "content": "You are a JSON data expert."},
                {"role": "user", "content": f"Here is the filtered JSON data: {commit_data_json}"},
                {"role": "user", "content": user_question}
            ]
            
            try:
                response = openai.ChatCompletion.create(model="gpt-4-turbo", messages=messages)
                st.write("**GPT-4 Response:**")
                st.write(response.choices[0].message["content"])
            except Exception as e:
                st.error(f"OpenAI API error: {e}")

jonathon · 5 November 2024 00:34

Excellent work @oscarazzus

oscarazzus · 5 November 2024 03:50

I’m starting to test llama on pandasai