From 7d0570c27ec1ae8d91481d939f046431d84679d7 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 15 Jan 2026 23:38:21 +0000 Subject: [PATCH 1/4] Initial plan From d8549c0c8c139531ee5bf266609f7e5352384c5f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 15 Jan 2026 23:49:15 +0000 Subject: [PATCH 2/4] Add refresh data feature with backend endpoint and UI components Co-authored-by: Chenglong-MS <93549116+Chenglong-MS@users.noreply.github.com> --- py-src/data_formulator/tables_routes.py | 91 +++++ src/app/dfSlice.tsx | 20 ++ src/views/DataThread.tsx | 445 ++++++++++++++++++++++-- yarn.lock | 8 +- 4 files changed, 526 insertions(+), 38 deletions(-) diff --git a/py-src/data_formulator/tables_routes.py b/py-src/data_formulator/tables_routes.py index 74f31f2..3e3776a 100644 --- a/py-src/data_formulator/tables_routes.py +++ b/py-src/data_formulator/tables_routes.py @@ -841,4 +841,95 @@ def data_loader_ingest_data_from_query(): return jsonify({ "status": "error", "message": safe_msg + }), status_code + + +@tables_bp.route('/refresh-derived-data', methods=['POST']) +def refresh_derived_data(): + """Refresh derived data by re-executing Python code on updated base table""" + try: + from data_formulator.py_sandbox import run_transform_in_sandbox2020 + + data = request.get_json() + + # Get updated base table data and transformation info + updated_table = data.get('updated_table') # {name, rows, columns} + derived_tables = data.get('derived_tables', []) # [{id, code, source_tables: [names]}] + + if not updated_table: + return jsonify({"status": "error", "message": "No updated table provided"}), 400 + + if not derived_tables: + return jsonify({"status": "error", "message": "No derived tables to refresh"}), 400 + + # Validate updated table has same columns as before + updated_table_name = updated_table['name'] + updated_columns = set(updated_table['columns']) + + results = [] + + # Process each derived table + for derived_info in derived_tables: + try: + code = derived_info['code'] + source_table_names = derived_info['source_tables'] + derived_table_id = derived_info['id'] + + # Prepare input dataframes + df_list = [] + + for source_name in source_table_names: + if source_name == updated_table_name: + # Use the updated data + df = pd.DataFrame(updated_table['rows']) + else: + # Fetch from database + with db_manager.connection(session['session_id']) as db: + result = db.execute(f"SELECT * FROM {source_name}").fetchdf() + df = result + + df_list.append(df) + + # Execute the transformation code in subprocess for safety + exec_result = run_transform_in_sandbox2020(code, df_list, exec_python_in_subprocess=True) + + if exec_result['status'] == 'ok': + output_df = exec_result['content'] + + # Convert to records format for JSON serialization + rows = json.loads(output_df.to_json(orient='records', date_format='iso')) + columns = list(output_df.columns) + + results.append({ + 'id': derived_table_id, + 'status': 'success', + 'rows': rows, + 'columns': columns + }) + else: + results.append({ + 'id': derived_table_id, + 'status': 'error', + 'message': exec_result['content'] + }) + + except Exception as e: + logger.error(f"Error refreshing derived table {derived_info.get('id')}: {str(e)}") + results.append({ + 'id': derived_info.get('id'), + 'status': 'error', + 'message': str(e) + }) + + return jsonify({ + "status": "success", + "results": results + }) + + except Exception as e: + logger.error(f"Error refreshing derived data: {str(e)}") + safe_msg, status_code = sanitize_db_error_message(e) + return jsonify({ + "status": "error", + "message": safe_msg }), status_code \ No newline at end of file diff --git a/src/app/dfSlice.tsx b/src/app/dfSlice.tsx index 393d134..16a9cb4 100644 --- a/src/app/dfSlice.tsx +++ b/src/app/dfSlice.tsx @@ -475,6 +475,26 @@ export const dataFormulatorSlice = createSlice({ let attachedMetadata = action.payload.attachedMetadata; state.tables = state.tables.map(t => t.id == tableId ? {...t, attachedMetadata} : t); }, + updateTableRows: (state, action: PayloadAction<{tableId: string, rows: any[]}>) => { + let tableId = action.payload.tableId; + let rows = action.payload.rows; + state.tables = state.tables.map(t => { + if (t.id == tableId) { + // Update rows while preserving other table properties + return {...t, rows}; + } + return t; + }); + + // Update concept shelf items for this table if columns changed + let table = state.tables.find(t => t.id == tableId); + if (table) { + // Remove old field items for this table + state.conceptShelfItems = state.conceptShelfItems.filter(f => f.tableRef != tableId); + // Add new field items + state.conceptShelfItems = [...state.conceptShelfItems, ...getDataFieldItems(table)]; + } + }, extendTableWithNewFields: (state, action: PayloadAction<{tableId: string, columnName: string, values: any[], previousName: string | undefined, parentIDs: string[]}>) => { // extend the existing extTable with new columns from the new table let newValues = action.payload.values; diff --git a/src/views/DataThread.tsx b/src/views/DataThread.tsx index d873edb..5be62bb 100644 --- a/src/views/DataThread.tsx +++ b/src/views/DataThread.tsx @@ -22,7 +22,10 @@ import { Popper, Paper, ClickAwayListener, - Badge + Badge, + Menu, + MenuItem, + ListItemText } from '@mui/material'; import { VegaLite } from 'react-vega' @@ -46,6 +49,8 @@ import CloseIcon from '@mui/icons-material/Close'; import HelpOutlineIcon from '@mui/icons-material/HelpOutline'; import CheckCircleOutlineIcon from '@mui/icons-material/CheckCircleOutline'; import CancelOutlinedIcon from '@mui/icons-material/CancelOutlined'; +import RefreshIcon from '@mui/icons-material/Refresh'; +import MoreVertIcon from '@mui/icons-material/MoreVert'; import _ from 'lodash'; import { getChartTemplate } from '../components/ChartTemplates'; @@ -196,6 +201,190 @@ const MetadataPopup = memo<{ ); }); +// Refresh Data Dialog Component +const RefreshDataDialog = memo<{ + open: boolean; + anchorEl: HTMLElement | null; + onClose: () => void; + onRefresh: (file: File | null, rawData: string) => void; + tableName: string; + tableColumns: string[]; +}>(({ open, anchorEl, onClose, onRefresh, tableName, tableColumns }) => { + const [uploadMode, setUploadMode] = useState<'file' | 'raw'>('file'); + const [selectedFile, setSelectedFile] = useState(null); + const [rawData, setRawData] = useState(''); + const [error, setError] = useState(''); + + useEffect(() => { + if (!open) { + setSelectedFile(null); + setRawData(''); + setError(''); + setUploadMode('file'); + } + }, [open]); + + const validateData = (data: any[]) => { + if (!Array.isArray(data) || data.length === 0) { + return 'Data must be a non-empty array of objects'; + } + + const newColumns = Object.keys(data[0]); + const expectedColumns = new Set(tableColumns); + const actualColumns = new Set(newColumns); + + // Check if all expected columns are present + for (const col of expectedColumns) { + if (!actualColumns.has(col)) { + return `Missing required column: ${col}`; + } + } + + return null; + }; + + const handleFileChange = (e: React.ChangeEvent) => { + const file = e.target.files?.[0]; + if (file) { + setSelectedFile(file); + setError(''); + } + }; + + const handleRefresh = async () => { + try { + if (uploadMode === 'file' && selectedFile) { + // Validate file extension + const ext = selectedFile.name.split('.').pop()?.toLowerCase(); + if (!['csv', 'xlsx', 'xls', 'json'].includes(ext || '')) { + setError('Unsupported file format. Please use CSV, XLSX, or JSON.'); + return; + } + + onRefresh(selectedFile, ''); + onClose(); + } else if (uploadMode === 'raw' && rawData) { + // Validate JSON format + try { + const parsed = JSON.parse(rawData); + const validationError = validateData(parsed); + if (validationError) { + setError(validationError); + return; + } + onRefresh(null, rawData); + onClose(); + } catch (e) { + setError('Invalid JSON format'); + } + } else { + setError('Please provide data to refresh'); + } + } catch (e) { + setError('Error processing data'); + } + }; + + return ( + + + + + Refresh data for {tableName} + + + Upload new data with the same column names: {tableColumns.join(', ')} + + + + + + + + {uploadMode === 'file' ? ( + + + {selectedFile && ( + + Selected: {selectedFile.name} + + )} + + ) : ( + { + setRawData(e.target.value); + setError(''); + }} + sx={{ my: 1, '& .MuiInputBase-input': { fontSize: 12 } }} + /> + )} + + {error && ( + + {error} + + )} + + + + + + + + + ); +}); + // Agent Status Box Component const AgentStatusBox = memo<{ tableId: string; @@ -490,6 +679,16 @@ let SingleThreadGroupView: FC<{ const [selectedTableForMetadata, setSelectedTableForMetadata] = useState(null); const [metadataAnchorEl, setMetadataAnchorEl] = useState(null); + // Refresh data popup state + const [refreshDataPopupOpen, setRefreshDataPopupOpen] = useState(false); + const [selectedTableForRefresh, setSelectedTableForRefresh] = useState(null); + const [refreshDataAnchorEl, setRefreshDataAnchorEl] = useState(null); + + // Menu state for actions + const [menuAnchorEl, setMenuAnchorEl] = useState(null); + const [menuOpen, setMenuOpen] = useState(false); + const [menuTableId, setMenuTableId] = useState(null); + let handleUpdateTableDisplayId = (tableId: string, displayId: string) => { dispatch(dfActions.updateTableDisplayId({ @@ -519,6 +718,121 @@ let SingleThreadGroupView: FC<{ } }; + const handleOpenRefreshDataPopup = (table: DictTable, anchorEl: HTMLElement) => { + setSelectedTableForRefresh(table); + setRefreshDataAnchorEl(anchorEl); + setRefreshDataPopupOpen(true); + }; + + const handleCloseRefreshDataPopup = () => { + setRefreshDataPopupOpen(false); + setSelectedTableForRefresh(null); + setRefreshDataAnchorEl(null); + }; + + const handleRefreshData = async (file: File | null, rawData: string) => { + if (!selectedTableForRefresh) return; + + try { + const formData = new FormData(); + formData.append('table_name', selectedTableForRefresh.id); + + if (file) { + formData.append('file', file); + } else if (rawData) { + formData.append('raw_data', rawData); + } + + // First, replace the table data in the database + const replaceResponse = await fetch('/api/tables/create-table', { + method: 'POST', + body: formData + }); + + const replaceResult = await replaceResponse.json(); + + if (replaceResult.status !== 'success') { + throw new Error(replaceResult.message || 'Failed to replace table data'); + } + + // Get the updated table data from server + const tableResponse = await fetch(`/api/tables/get-table?table_name=${selectedTableForRefresh.id}`); + const tableResult = await tableResponse.json(); + + if (tableResult.status !== 'success') { + throw new Error('Failed to fetch updated table data'); + } + + // Update the base table in Redux + dispatch(dfActions.updateTableRows({ + tableId: selectedTableForRefresh.id, + rows: tableResult.rows + })); + + // Find all derived tables that depend on this table + const derivedTables = tables.filter(t => + t.derive?.source?.includes(selectedTableForRefresh.id) + ); + + if (derivedTables.length > 0) { + // Call the refresh-derived-data endpoint + const refreshPayload = { + updated_table: { + name: selectedTableForRefresh.id, + rows: tableResult.rows, + columns: tableResult.columns + }, + derived_tables: derivedTables.map(dt => ({ + id: dt.id, + code: dt.derive?.code || '', + source_tables: dt.derive?.source || [] + })) + }; + + const refreshResponse = await fetch('/api/tables/refresh-derived-data', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify(refreshPayload) + }); + + const refreshResult = await refreshResponse.json(); + + if (refreshResult.status === 'success') { + // Update Redux state with refreshed derived tables + refreshResult.results.forEach((result: any) => { + if (result.status === 'success') { + dispatch(dfActions.updateTableRows({ + tableId: result.id, + rows: result.rows + })); + } else { + console.error(`Failed to refresh table ${result.id}:`, result.message); + } + }); + } + } + + } catch (error) { + console.error('Error refreshing data:', error); + alert(`Error refreshing data: ${error instanceof Error ? error.message : 'Unknown error'}`); + } + }; + + const handleMenuOpen = (event: React.MouseEvent, tableId: string) => { + event.stopPropagation(); + setMenuAnchorEl(event.currentTarget); + setMenuTableId(tableId); + setMenuOpen(true); + }; + + const handleMenuClose = () => { + setMenuOpen(false); + setMenuAnchorEl(null); + setMenuTableId(null); + }; + let buildTriggerCard = (trigger: Trigger) => { let selectedClassName = trigger.chart?.id == focusedChartId ? 'selected-card' : ''; @@ -673,40 +987,46 @@ let SingleThreadGroupView: FC<{ - {table?.derive == undefined && - { - event.stopPropagation(); - handleOpenMetadataPopup(table!, event.currentTarget); - }} - > - - - } + {table?.derive == undefined && ( + + handleMenuOpen(event, tableId)} + > + + + + )} - {tableDeleteEnabled && - { - event.stopPropagation(); - dispatch(dfActions.deleteTable(tableId)); - }} - > - - - } + {table?.derive !== undefined && tableDeleteEnabled && ( + + { + event.stopPropagation(); + dispatch(dfActions.deleteTable(tableId)); + }} + > + + + + )} + + {/* Menu for original table actions */} + + { + event.stopPropagation(); + handleOpenMetadataPopup(table!, menuAnchorEl!); + }}> + + + + + + { + event.stopPropagation(); + handleOpenRefreshDataPopup(table!, menuAnchorEl!); + }}> + + + + + + {tableDeleteEnabled && ( + { + event.stopPropagation(); + dispatch(dfActions.deleteTable(tableId)); + }}> + + + + + + )} + @@ -888,6 +1257,14 @@ let SingleThreadGroupView: FC<{ initialValue={selectedTableForMetadata?.attachedMetadata || ''} tableName={selectedTableForMetadata?.displayId || selectedTableForMetadata?.id || ''} /> + } diff --git a/yarn.lock b/yarn.lock index f1d9bfb..0baef35 100644 --- a/yarn.lock +++ b/yarn.lock @@ -4801,10 +4801,10 @@ uuid@^8.3.0: resolved "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz" integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg== -validator@^13.15.22: - version "13.15.22" - resolved "https://registry.yarnpkg.com/validator/-/validator-13.15.22.tgz#5f847cf4a799107e5716fc87e5cf2a337a71eb14" - integrity sha512-uT/YQjiyLJP7HSrv/dPZqK9L28xf8hsNca01HSz1dfmI0DgMfjopp1rO/z13NeGF1tVystF0Ejx3y4rUKPw+bQ== +validator@^13.15.20: + version "13.15.26" + resolved "https://registry.yarnpkg.com/validator/-/validator-13.15.26.tgz#36c3deeab30e97806a658728a155c66fcaa5b944" + integrity sha512-spH26xU080ydGggxRyR1Yhcbgx+j3y5jbNXk/8L+iRvdIEQ4uTRH2Sgf2dokud6Q4oAtsbNvJ1Ft+9xmm6IZcA== vega-canvas@^2.0.0: version "2.0.0" From d45d762bad353f6cb86d38a4d7887cdad4bb6d86 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 15 Jan 2026 23:54:28 +0000 Subject: [PATCH 3/4] Address code review feedback: improve validation and fix efficiency issues Co-authored-by: Chenglong-MS <93549116+Chenglong-MS@users.noreply.github.com> --- py-src/data_formulator.egg-info/PKG-INFO | 305 ++++++++++++++++++ py-src/data_formulator.egg-info/SOURCES.txt | 178 ++++++++++ .../dependency_links.txt | 1 + .../data_formulator.egg-info/entry_points.txt | 2 + py-src/data_formulator.egg-info/requires.txt | 25 ++ py-src/data_formulator.egg-info/top_level.txt | 1 + py-src/data_formulator/tables_routes.py | 22 +- src/views/DataThread.tsx | 32 +- 8 files changed, 551 insertions(+), 15 deletions(-) create mode 100644 py-src/data_formulator.egg-info/PKG-INFO create mode 100644 py-src/data_formulator.egg-info/SOURCES.txt create mode 100644 py-src/data_formulator.egg-info/dependency_links.txt create mode 100644 py-src/data_formulator.egg-info/entry_points.txt create mode 100644 py-src/data_formulator.egg-info/requires.txt create mode 100644 py-src/data_formulator.egg-info/top_level.txt diff --git a/py-src/data_formulator.egg-info/PKG-INFO b/py-src/data_formulator.egg-info/PKG-INFO new file mode 100644 index 0000000..119ac3e --- /dev/null +++ b/py-src/data_formulator.egg-info/PKG-INFO @@ -0,0 +1,305 @@ +Metadata-Version: 2.4 +Name: data_formulator +Version: 0.5.1 +Summary: Data Formulator is research protoype data visualization tool powered by AI. +Author-email: Chenglong Wang , Dan Marshall +License-Expression: MIT +Project-URL: Homepage, https://github.com/microsoft/data-formulator +Project-URL: Repository, https://github.com/microsoft/data-formulator.git +Project-URL: Bug Tracker, https://github.com/microsoft/data-formulator/issues +Keywords: data visualization,LLM,AI +Classifier: Development Status :: 4 - Beta +Classifier: Programming Language :: Python +Requires-Python: >=3.9 +Description-Content-Type: text/markdown +License-File: LICENSE +Requires-Dist: jupyter +Requires-Dist: pandas +Requires-Dist: flask +Requires-Dist: flask-cors +Requires-Dist: openai +Requires-Dist: python-dotenv +Requires-Dist: vega_datasets +Requires-Dist: litellm +Requires-Dist: duckdb +Requires-Dist: numpy +Requires-Dist: vl-convert-python +Requires-Dist: backoff +Requires-Dist: beautifulsoup4 +Requires-Dist: scikit-learn +Requires-Dist: azure-identity +Requires-Dist: azure-kusto-data +Requires-Dist: azure-keyvault-secrets +Requires-Dist: azure-storage-blob +Requires-Dist: google-cloud-bigquery +Requires-Dist: google-auth +Requires-Dist: db-dtypes +Requires-Dist: boto3 +Requires-Dist: pymysql +Requires-Dist: pyodbc +Requires-Dist: pymongo +Dynamic: license-file + +

+ Data Formulator icon  + Data Formulator: AI-powered Data Visualization +

+ + +

+ 🪄 Explore data with visualizations, powered by AI agents. +

+ +

+ Try Online Demo +   + Install Locally +

+ +

+ arXiv  + License: MIT  + YouTube  + build  + Discord +

+ + + + + + + + + +## News 🔥🔥🔥 +[12-08-2025] **Data Formulator 0.5.1** — Connect more, visualize more, move faster +- 🔌 **Community data loaders**: Google BigQuery, MySQL, Postgres, MongoDB +- 📊 **New chart types**: US Map & Pie Chart (more to be added soon) +- ✏️ **Editable reports**: Refine generated reports with [Chartifact](https://github.com/microsoft/chartifact) in markdown style. [demo](https://github.com/microsoft/data-formulator/pull/200#issue-3635408217) +- ⚡ **Snappier UI**: Noticeably faster interactions across the board + +[11-07-2025] Data Formulator 0.5: Vibe with your data, in control + +- 📊 **Load (almost) any data**: load structured data, extract data from screenshots, from messy text blocks, or connect to databases. +- 🤖 **Explore data with AI agents**: + - In agent mode, provide a high-level goal and ask agents to explore data for you. + - To stay in control, directly interact with agents: ask for recommendations or specify chart designs with UI + NL inputs, and AI agents will formulate data to realize your design. + - Use data threads to control branching exploration paths: backtrack, branch, or follow up. +- ✅ **Verify AI generated results**: interact with charts and inspect data, formulas, explanations, and code. +- 📝 **Create reports to share insights**: choose charts you want to share, and ask agents to create reports grounded in data formulated throughout exploration. + +## Previous Updates + +Here are milestones that lead to the current design: +- **v0.2.2** ([Demo](https://github.com/microsoft/data-formulator/pull/176)): Goal-driven exploration with agent recommendations and performance improvements +- **v0.2.1.3/4** ([Readme](https://github.com/microsoft/data-formulator/tree/main/py-src/data_formulator/data_loader) | [Demo](https://github.com/microsoft/data-formulator/pull/155)): External data loaders (MySQL, PostgreSQL, MSSQL, Azure Data Explorer, S3, Azure Blob) +- **v0.2** ([Demos](https://github.com/microsoft/data-formulator/releases/tag/0.2)): Large data support with DuckDB integration +- **v0.1.7** ([Demos](https://github.com/microsoft/data-formulator/releases/tag/0.1.7)): Dataset anchoring for cleaner workflows +- **v0.1.6** ([Demo](https://github.com/microsoft/data-formulator/releases/tag/0.1.6)): Multi-table support with automatic joins +- **Model Support**: OpenAI, Azure, Ollama, Anthropic via [LiteLLM](https://github.com/BerriAI/litellm) ([feedback](https://github.com/microsoft/data-formulator/issues/49)) +- **Python Package**: Easy local installation ([try it](#get-started)) +- **Visualization Challenges**: Test your skills ([challenges](https://github.com/microsoft/data-formulator/issues/53)) +- **Data Extraction**: Parse data from images and text ([demo](https://github.com/microsoft/data-formulator/pull/31#issuecomment-2403652717)) +- **Initial Release**: [Blog](https://www.microsoft.com/en-us/research/blog/data-formulator-exploring-how-ai-can-help-analysts-create-rich-data-visualizations/) | [Video](https://youtu.be/3ndlwt0Wi3c) + +
+View detailed update history + +- [07-10-2025] Data Formulator 0.2.2: Start with an analysis goal + - Some key frontend performance updates. + - You can start your exploration with a goal, or, tab and see if the agent can recommend some good exploration ideas for you. [Demo](https://github.com/microsoft/data-formulator/pull/176) + +- [05-13-2025] Data Formulator 0.2.1.3/4: External Data Loader + - We introduced external data loader class to make import data easier. [Readme](https://github.com/microsoft/data-formulator/tree/main/py-src/data_formulator/data_loader) and [Demo](https://github.com/microsoft/data-formulator/pull/155) + - Current data loaders: MySQL, Azure Data Explorer (Kusto), Azure Blob and Amazon S3 (json, parquet, csv). + - [07-01-2025] Updated with: Postgresql, mssql. + - Call for action [link](https://github.com/microsoft/data-formulator/issues/156): + - Users: let us know which data source you'd like to load data from. + - Developers: let's build more data loaders. + +- [04-23-2025] Data Formulator 0.2: working with *large* data 📦📦📦 + - Explore large data by: + 1. Upload large data file to the local database (powered by [DuckDB](https://github.com/duckdb/duckdb)). + 2. Use drag-and-drop to specify charts, and Data Formulator dynamically fetches data from the database to create visualizations (with ⚡️⚡️⚡️ speeds). + 3. Work with AI agents: they generate SQL queries to transform the data to create rich visualizations! + 4. Anchor the result / follow up / create a new branch / join tables; let's dive deeper. + - Checkout the demos at [[https://github.com/microsoft/data-formulator/releases/tag/0.2]](https://github.com/microsoft/data-formulator/releases/tag/0.2) + - Improved overall system performance, and enjoy the updated derive concept functionality. + +- [03-20-2025] Data Formulator 0.1.7: Anchoring ⚓︎ + - Anchor an intermediate dataset, so that followup data analysis are built on top of the anchored data, not the original one. + - Clean a data and work with only the cleaned data; create a subset from the original data or join multiple data, and then go from there. AI agents will be less likely to get confused and work faster. ⚡️⚡️ + - Check out the demos at [[https://github.com/microsoft/data-formulator/releases/tag/0.1.7]](https://github.com/microsoft/data-formulator/releases/tag/0.1.7) + - Don't forget to update Data Formulator to test it out! + +- [02-20-2025] Data Formulator 0.1.6 released! + - Now supports working with multiple datasets at once! Tell Data Formulator which data tables you would like to use in the encoding shelf, and it will figure out how to join the tables to create a visualization to answer your question. 🪄 + - Checkout the demo at [[https://github.com/microsoft/data-formulator/releases/tag/0.1.6]](https://github.com/microsoft/data-formulator/releases/tag/0.1.6). + - Update your Data Formulator to the latest version to play with the new features. + +- [02-12-2025] More models supported now! + - Now supports OpenAI, Azure, Ollama, and Anthropic models (and more powered by [LiteLLM](https://github.com/BerriAI/litellm)); + - Models with strong code generation and instruction following capabilities are recommended (gpt-4o, claude-3-5-sonnet etc.); + - You can store API keys in `api-keys.env` to avoid typing them every time (see template `api-keys.env.template`). + - Let us know which models you have good/bad experiences with, and what models you would like to see supported! [[comment here]](https://github.com/microsoft/data-formulator/issues/49) + +- [11-07-2024] Minor fun update: data visualization challenges! + - We added a few visualization challenges with the sample datasets. Can you complete them all? [[try them out!]](https://github.com/microsoft/data-formulator/issues/53#issue-2641841252) + - Comment in the issue when you did, or share your results/questions with others! [[comment here]](https://github.com/microsoft/data-formulator/issues/53) + +- [10-11-2024] Data Formulator python package released! + - You can now install Data Formulator using Python and run it locally, easily. [[check it out]](#get-started). + - Our Codespaces configuration is also updated for fast start up ⚡️. [[try it now!]](https://codespaces.new/microsoft/data-formulator?quickstart=1) + - New experimental feature: load an image or a messy text, and ask AI to parse and clean it for you(!). [[demo]](https://github.com/microsoft/data-formulator/pull/31#issuecomment-2403652717) + +- [10-01-2024] Initial release of Data Formulator, check out our [[blog]](https://www.microsoft.com/en-us/research/blog/data-formulator-exploring-how-ai-can-help-analysts-create-rich-data-visualizations/) and [[video]](https://youtu.be/3ndlwt0Wi3c)! + +
+ +## Overview + +**Data Formulator** is a Microsoft Research prototype for data exploration with visualizations powered by AI agents. + +Data Formulator enables analysts to iteratively explore and visualize data. Started with data in any format (screenshot, text, csv, or database), users can work with AI agents with a novel blended interface that combines *user interface interactions (UI)* and *natural language (NL) inputs* to communicate their intents, control branching exploration directions, and create reports to share their insights. + +## Get Started + +Play with Data Formulator with one of the following options: + +- **Option 1: Install via Python PIP** + + Use Python PIP for an easy setup experience, running locally (recommend: install it in a virtual environment). + + ```bash + # install data_formulator + pip install data_formulator + + # Run data formulator with this command + python -m data_formulator + ``` + + Data Formulator will be automatically opened in the browser at [http://localhost:5000](http://localhost:5000). + + *you can specify the port number (e.g., 8080) by `python -m data_formulator --port 8080` if the default port is occupied.* + +- **Option 2: Codespaces (5 minutes)** + + You can also run Data Formulator in Codespaces; we have everything pre-configured. For more details, see [CODESPACES.md](CODESPACES.md). + + [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/microsoft/data-formulator?quickstart=1) + +- **Option 3: Working in the developer mode** + + You can build Data Formulator locally if you prefer full control over your development environment and develop your own version on top. For detailed instructions, refer to [DEVELOPMENT.md](DEVELOPMENT.md). + + +## Using Data Formulator + +### Load Data + +Besides uploading csv, tsv or xlsx files that contain structured data, you can ask Data Formulator to extract data from screenshots, text blocks or websites, or load data from databases use connectors. Then you are ready to explore. + +image + +### Explore Data + +There are four levels to explore data based depending on whether you want more vibe or more control: + +- Level 1 (most control): Create charts with UI via drag-and-drop, if all fields to be visualized are already in the data. +- Level 2: Specify chart designs with natural language + NL. Describe how new fields should be visualized in your chart, AI will automatically transform data to realize the design. +- Level 3: Get recommendations: Ask AI agents to recommend charts directly from NL descriptions, or even directly ask for exploration ideas. +- Level 4 (most vibe): In agent mode, provide a high-level goal and let AI agents automatically plan and explore data in multiple turns. Exploration threads will be created automatically. + +https://github.com/user-attachments/assets/164aff58-9f93-4792-b8ed-9944578fbb72 + +- Level 5: In practice, leverage all of them to keep up with both vibe and control! + +### Create Reports + +Use the report builder to compose a report of the style you like, based on selected charts. Then share the reports to others! + + + +## Developers' Guide + +Follow the [developers' instructions](DEVELOPMENT.md) to build your new data analysis tools on top of Data Formulator. + +Help wanted: + +* Add more database connectors (https://github.com/microsoft/data-formulator/issues/156) +* Scaling up messy data extractor: more document types and larger files. +* Adding more chart templates (e.g., maps). +* other ideas? + +## Research Papers +* [Data Formulator 2: Iteratively Creating Rich Visualizations with AI](https://arxiv.org/abs/2408.16119) + +``` +@article{wang2024dataformulator2iteratively, + title={Data Formulator 2: Iteratively Creating Rich Visualizations with AI}, + author={Chenglong Wang and Bongshin Lee and Steven Drucker and Dan Marshall and Jianfeng Gao}, + year={2024}, + booktitle={ArXiv preprint arXiv:2408.16119}, +} +``` + +* [Data Formulator: AI-powered Concept-driven Visualization Authoring](https://arxiv.org/abs/2309.10094) + +``` +@article{wang2023data, + title={Data Formulator: AI-powered Concept-driven Visualization Authoring}, + author={Wang, Chenglong and Thompson, John and Lee, Bongshin}, + journal={IEEE Transactions on Visualization and Computer Graphics}, + year={2023}, + publisher={IEEE} +} +``` + + +## Contributing + +This project welcomes contributions and suggestions. Most contributions require you to +agree to a Contributor License Agreement (CLA) declaring that you have the right to, +and actually do, grant us the rights to use your contribution. For details, visit +https://cla.microsoft.com. + +When you submit a pull request, a CLA-bot will automatically determine whether you need +to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the +instructions provided by the bot. You will only need to do this once across all repositories using our CLA. + +This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). +For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) +or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. + +## Trademarks + +This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft +trademarks or logos is subject to and must follow +[Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). +Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. +Any use of third-party trademarks or logos are subject to those third-party's policies. diff --git a/py-src/data_formulator.egg-info/SOURCES.txt b/py-src/data_formulator.egg-info/SOURCES.txt new file mode 100644 index 0000000..bd2eaf4 --- /dev/null +++ b/py-src/data_formulator.egg-info/SOURCES.txt @@ -0,0 +1,178 @@ +LICENSE +MANIFEST.in +README.md +pyproject.toml +py-src/data_formulator/__init__.py +py-src/data_formulator/__main__.py +py-src/data_formulator/agent_routes.py +py-src/data_formulator/app.py +py-src/data_formulator/db_manager.py +py-src/data_formulator/example_datasets_config.py +py-src/data_formulator/py_sandbox.py +py-src/data_formulator/tables_routes.py +py-src/data_formulator.egg-info/PKG-INFO +py-src/data_formulator.egg-info/SOURCES.txt +py-src/data_formulator.egg-info/dependency_links.txt +py-src/data_formulator.egg-info/entry_points.txt +py-src/data_formulator.egg-info/requires.txt +py-src/data_formulator.egg-info/top_level.txt +py-src/data_formulator/agents/__init__.py +py-src/data_formulator/agents/agent_code_explanation.py +py-src/data_formulator/agents/agent_concept_derive.py +py-src/data_formulator/agents/agent_data_clean.py +py-src/data_formulator/agents/agent_data_clean_stream.py +py-src/data_formulator/agents/agent_data_load.py +py-src/data_formulator/agents/agent_exploration.py +py-src/data_formulator/agents/agent_interactive_explore.py +py-src/data_formulator/agents/agent_py_concept_derive.py +py-src/data_formulator/agents/agent_py_data_rec.py +py-src/data_formulator/agents/agent_py_data_transform.py +py-src/data_formulator/agents/agent_query_completion.py +py-src/data_formulator/agents/agent_report_gen.py +py-src/data_formulator/agents/agent_sort_data.py +py-src/data_formulator/agents/agent_sql_data_rec.py +py-src/data_formulator/agents/agent_sql_data_transform.py +py-src/data_formulator/agents/agent_utils.py +py-src/data_formulator/agents/client_utils.py +py-src/data_formulator/agents/web_utils.py +py-src/data_formulator/data_loader/__init__.py +py-src/data_formulator/data_loader/azure_blob_data_loader.py +py-src/data_formulator/data_loader/bigquery_data_loader.py +py-src/data_formulator/data_loader/external_data_loader.py +py-src/data_formulator/data_loader/kusto_data_loader.py +py-src/data_formulator/data_loader/mongodb_data_loader.py +py-src/data_formulator/data_loader/mssql_data_loader.py +py-src/data_formulator/data_loader/mysql_data_loader.py +py-src/data_formulator/data_loader/postgresql_data_loader.py +py-src/data_formulator/data_loader/s3_data_loader.py +py-src/data_formulator/dist/DataFormulator.js +py-src/data_formulator/dist/data-formulator-screenshot-v0.5.png +py-src/data_formulator/dist/data-formulator-screenshot-v0.5.webp +py-src/data_formulator/dist/data-formulator-screenshot.png +py-src/data_formulator/dist/df_gas_prices.json +py-src/data_formulator/dist/df_global_energy.json +py-src/data_formulator/dist/df_movies.json +py-src/data_formulator/dist/df_unemployment.json +py-src/data_formulator/dist/favicon.ico +py-src/data_formulator/dist/feature-agent-mode.mp4 +py-src/data_formulator/dist/feature-extract-data.mp4 +py-src/data_formulator/dist/feature-generate-report.mp4 +py-src/data_formulator/dist/feature-interactive-control.mp4 +py-src/data_formulator/dist/gas_prices-thumbnail.webp +py-src/data_formulator/dist/global_energy-thumbnail.webp +py-src/data_formulator/dist/index.html +py-src/data_formulator/dist/manifest.json +py-src/data_formulator/dist/movies-thumbnail.webp +py-src/data_formulator/dist/pip-logo.svg +py-src/data_formulator/dist/robots.txt +py-src/data_formulator/dist/screenshot-claude-performance.webp +py-src/data_formulator/dist/screenshot-movies-report.webp +py-src/data_formulator/dist/screenshot-renewable-energy.webp +py-src/data_formulator/dist/screenshot-unemployment.webp +py-src/data_formulator/dist/unemployment-thumbnail.webp +py-src/data_formulator/dist/assets/KaTeX_AMS-Regular-BQhdFMY1.woff2 +py-src/data_formulator/dist/assets/KaTeX_AMS-Regular-DMm9YOAa.woff +py-src/data_formulator/dist/assets/KaTeX_AMS-Regular-DRggAlZN.ttf +py-src/data_formulator/dist/assets/KaTeX_Caligraphic-Bold-ATXxdsX0.ttf +py-src/data_formulator/dist/assets/KaTeX_Caligraphic-Bold-BEiXGLvX.woff +py-src/data_formulator/dist/assets/KaTeX_Caligraphic-Bold-Dq_IR9rO.woff2 +py-src/data_formulator/dist/assets/KaTeX_Caligraphic-Regular-CTRA-rTL.woff +py-src/data_formulator/dist/assets/KaTeX_Caligraphic-Regular-Di6jR-x-.woff2 +py-src/data_formulator/dist/assets/KaTeX_Caligraphic-Regular-wX97UBjC.ttf +py-src/data_formulator/dist/assets/KaTeX_Fraktur-Bold-BdnERNNW.ttf +py-src/data_formulator/dist/assets/KaTeX_Fraktur-Bold-BsDP51OF.woff +py-src/data_formulator/dist/assets/KaTeX_Fraktur-Bold-CL6g_b3V.woff2 +py-src/data_formulator/dist/assets/KaTeX_Fraktur-Regular-CB_wures.ttf +py-src/data_formulator/dist/assets/KaTeX_Fraktur-Regular-CTYiF6lA.woff2 +py-src/data_formulator/dist/assets/KaTeX_Fraktur-Regular-Dxdc4cR9.woff +py-src/data_formulator/dist/assets/KaTeX_Main-Bold-Cx986IdX.woff2 +py-src/data_formulator/dist/assets/KaTeX_Main-Bold-Jm3AIy58.woff +py-src/data_formulator/dist/assets/KaTeX_Main-Bold-waoOVXN0.ttf +py-src/data_formulator/dist/assets/KaTeX_Main-BoldItalic-DxDJ3AOS.woff2 +py-src/data_formulator/dist/assets/KaTeX_Main-BoldItalic-DzxPMmG6.ttf +py-src/data_formulator/dist/assets/KaTeX_Main-BoldItalic-SpSLRI95.woff +py-src/data_formulator/dist/assets/KaTeX_Main-Italic-3WenGoN9.ttf +py-src/data_formulator/dist/assets/KaTeX_Main-Italic-BMLOBm91.woff +py-src/data_formulator/dist/assets/KaTeX_Main-Italic-NWA7e6Wa.woff2 +py-src/data_formulator/dist/assets/KaTeX_Main-Regular-B22Nviop.woff2 +py-src/data_formulator/dist/assets/KaTeX_Main-Regular-Dr94JaBh.woff +py-src/data_formulator/dist/assets/KaTeX_Main-Regular-ypZvNtVU.ttf +py-src/data_formulator/dist/assets/KaTeX_Math-BoldItalic-B3XSjfu4.ttf +py-src/data_formulator/dist/assets/KaTeX_Math-BoldItalic-CZnvNsCZ.woff2 +py-src/data_formulator/dist/assets/KaTeX_Math-BoldItalic-iY-2wyZ7.woff +py-src/data_formulator/dist/assets/KaTeX_Math-Italic-DA0__PXp.woff +py-src/data_formulator/dist/assets/KaTeX_Math-Italic-flOr_0UB.ttf +py-src/data_formulator/dist/assets/KaTeX_Math-Italic-t53AETM-.woff2 +py-src/data_formulator/dist/assets/KaTeX_SansSerif-Bold-CFMepnvq.ttf +py-src/data_formulator/dist/assets/KaTeX_SansSerif-Bold-D1sUS0GD.woff2 +py-src/data_formulator/dist/assets/KaTeX_SansSerif-Bold-DbIhKOiC.woff +py-src/data_formulator/dist/assets/KaTeX_SansSerif-Italic-C3H0VqGB.woff2 +py-src/data_formulator/dist/assets/KaTeX_SansSerif-Italic-DN2j7dab.woff +py-src/data_formulator/dist/assets/KaTeX_SansSerif-Italic-YYjJ1zSn.ttf +py-src/data_formulator/dist/assets/KaTeX_SansSerif-Regular-BNo7hRIc.ttf +py-src/data_formulator/dist/assets/KaTeX_SansSerif-Regular-CS6fqUqJ.woff +py-src/data_formulator/dist/assets/KaTeX_SansSerif-Regular-DDBCnlJ7.woff2 +py-src/data_formulator/dist/assets/KaTeX_Script-Regular-C5JkGWo-.ttf +py-src/data_formulator/dist/assets/KaTeX_Script-Regular-D3wIWfF6.woff2 +py-src/data_formulator/dist/assets/KaTeX_Script-Regular-D5yQViql.woff +py-src/data_formulator/dist/assets/KaTeX_Size1-Regular-C195tn64.woff +py-src/data_formulator/dist/assets/KaTeX_Size1-Regular-Dbsnue_I.ttf +py-src/data_formulator/dist/assets/KaTeX_Size1-Regular-mCD8mA8B.woff2 +py-src/data_formulator/dist/assets/KaTeX_Size2-Regular-B7gKUWhC.ttf +py-src/data_formulator/dist/assets/KaTeX_Size2-Regular-Dy4dx90m.woff2 +py-src/data_formulator/dist/assets/KaTeX_Size2-Regular-oD1tc_U0.woff +py-src/data_formulator/dist/assets/KaTeX_Size3-Regular-CTq5MqoE.woff +py-src/data_formulator/dist/assets/KaTeX_Size3-Regular-DgpXs0kz.ttf +py-src/data_formulator/dist/assets/KaTeX_Size4-Regular-BF-4gkZK.woff +py-src/data_formulator/dist/assets/KaTeX_Size4-Regular-DWFBv043.ttf +py-src/data_formulator/dist/assets/KaTeX_Size4-Regular-Dl5lxZxV.woff2 +py-src/data_formulator/dist/assets/KaTeX_Typewriter-Regular-C0xS9mPB.woff +py-src/data_formulator/dist/assets/KaTeX_Typewriter-Regular-CO6r4hn1.woff2 +py-src/data_formulator/dist/assets/KaTeX_Typewriter-Regular-D3Ib7_Hf.ttf +py-src/data_formulator/dist/assets/chart-icon-dotted-line-min-iiUsajLi.png +py-src/data_formulator/dist/assets/chart-icon-pie-min-OPN8AStu.png +py-src/data_formulator/dist/assets/chart-icon-us-map-min-CBKwYuWG.png +py-src/data_formulator/dist/assets/df-logo-7DDRLOPN.png +py-src/data_formulator/dist/assets/example-image-table-Dt9uSzww.png +py-src/data_formulator/dist/assets/index-Bxecun_h.css +py-src/data_formulator/dist/assets/roboto-all-300-normal-lRRuIfal.woff +py-src/data_formulator/dist/assets/roboto-all-400-normal-BZJ9QssU.woff +py-src/data_formulator/dist/assets/roboto-all-500-normal-B0NPRryQ.woff +py-src/data_formulator/dist/assets/roboto-all-700-normal-BfaNsj0k.woff +py-src/data_formulator/dist/assets/roboto-cyrillic-300-normal-D6mjswgs.woff2 +py-src/data_formulator/dist/assets/roboto-cyrillic-400-normal-DVDTZtmW.woff2 +py-src/data_formulator/dist/assets/roboto-cyrillic-500-normal-DAkZhMOh.woff2 +py-src/data_formulator/dist/assets/roboto-cyrillic-700-normal-B5ZBKWCH.woff2 +py-src/data_formulator/dist/assets/roboto-cyrillic-ext-300-normal-TzZWIuiO.woff2 +py-src/data_formulator/dist/assets/roboto-cyrillic-ext-400-normal-DORK9bGA.woff2 +py-src/data_formulator/dist/assets/roboto-cyrillic-ext-500-normal-G9W8hgzQ.woff2 +py-src/data_formulator/dist/assets/roboto-cyrillic-ext-700-normal-CsrCEJIc.woff2 +py-src/data_formulator/dist/assets/roboto-greek-300-normal-ndiuWqED.woff2 +py-src/data_formulator/dist/assets/roboto-greek-400-normal-BRWHCUYo.woff2 +py-src/data_formulator/dist/assets/roboto-greek-500-normal-CpESfwfG.woff2 +py-src/data_formulator/dist/assets/roboto-greek-700-normal-Cc2Tq8FV.woff2 +py-src/data_formulator/dist/assets/roboto-latin-300-normal-ThHrQhYb.woff2 +py-src/data_formulator/dist/assets/roboto-latin-400-normal-mTIRXP6Y.woff2 +py-src/data_formulator/dist/assets/roboto-latin-500-normal-Dxdx3aXO.woff2 +py-src/data_formulator/dist/assets/roboto-latin-700-normal-CeM5gOv8.woff2 +py-src/data_formulator/dist/assets/roboto-latin-ext-300-normal-DEsNdRC-.woff2 +py-src/data_formulator/dist/assets/roboto-latin-ext-400-normal-4bLplyDh.woff2 +py-src/data_formulator/dist/assets/roboto-latin-ext-500-normal-BWKy6SgX.woff2 +py-src/data_formulator/dist/assets/roboto-latin-ext-700-normal-BYGCo3Go.woff2 +py-src/data_formulator/dist/assets/roboto-vietnamese-300-normal-CnPrVvBs.woff2 +py-src/data_formulator/dist/assets/roboto-vietnamese-400-normal-kCRe3VZk.woff2 +py-src/data_formulator/dist/assets/roboto-vietnamese-500-normal-CcijQRVW.woff2 +py-src/data_formulator/dist/assets/roboto-vietnamese-700-normal-SekShQfT.woff2 +py-src/data_formulator/dist/assets/vendor-d3-Cp0k42Wh.js +py-src/data_formulator/dist/assets/vendor-editor-DRupLUfx.js +py-src/data_formulator/dist/assets/vendor-markdown-Bo_kcIeG.js +py-src/data_formulator/dist/assets/vendor-misc-DMdy3E0c.js +py-src/data_formulator/dist/assets/vendor-mui-DGjarBfn.js +py-src/data_formulator/dist/assets/vendor-react-Z9Lqh7fp.js +py-src/data_formulator/dist/assets/vendor-utils-C67Rz6YQ.js +py-src/data_formulator/dist/assets/vendor-vega-DlwJlvTN.js +py-src/data_formulator/security/__init__.py +py-src/data_formulator/security/query_validator.py +py-src/data_formulator/workflows/__init__.py +py-src/data_formulator/workflows/create_vl_plots.py +py-src/data_formulator/workflows/exploration_flow.py \ No newline at end of file diff --git a/py-src/data_formulator.egg-info/dependency_links.txt b/py-src/data_formulator.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/py-src/data_formulator.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/py-src/data_formulator.egg-info/entry_points.txt b/py-src/data_formulator.egg-info/entry_points.txt new file mode 100644 index 0000000..485deb5 --- /dev/null +++ b/py-src/data_formulator.egg-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +data_formulator = data_formulator:run_app diff --git a/py-src/data_formulator.egg-info/requires.txt b/py-src/data_formulator.egg-info/requires.txt new file mode 100644 index 0000000..5311365 --- /dev/null +++ b/py-src/data_formulator.egg-info/requires.txt @@ -0,0 +1,25 @@ +jupyter +pandas +flask +flask-cors +openai +python-dotenv +vega_datasets +litellm +duckdb +numpy +vl-convert-python +backoff +beautifulsoup4 +scikit-learn +azure-identity +azure-kusto-data +azure-keyvault-secrets +azure-storage-blob +google-cloud-bigquery +google-auth +db-dtypes +boto3 +pymysql +pyodbc +pymongo diff --git a/py-src/data_formulator.egg-info/top_level.txt b/py-src/data_formulator.egg-info/top_level.txt new file mode 100644 index 0000000..9d9ca16 --- /dev/null +++ b/py-src/data_formulator.egg-info/top_level.txt @@ -0,0 +1 @@ +data_formulator diff --git a/py-src/data_formulator/tables_routes.py b/py-src/data_formulator/tables_routes.py index 3e3776a..0ffaad3 100644 --- a/py-src/data_formulator/tables_routes.py +++ b/py-src/data_formulator/tables_routes.py @@ -862,10 +862,26 @@ def refresh_derived_data(): if not derived_tables: return jsonify({"status": "error", "message": "No derived tables to refresh"}), 400 - # Validate updated table has same columns as before + # Validate updated table has expected structure updated_table_name = updated_table['name'] updated_columns = set(updated_table['columns']) + # Verify columns match by checking against database schema + with db_manager.connection(session['session_id']) as db: + try: + existing_columns = [col[0] for col in db.execute(f"DESCRIBE {updated_table_name}").fetchall()] + existing_columns_set = set(existing_columns) + + # Validate that all existing columns are present in updated data + if not existing_columns_set.issubset(updated_columns): + missing = existing_columns_set - updated_columns + return jsonify({ + "status": "error", + "message": f"Updated data is missing required columns: {', '.join(missing)}" + }), 400 + except Exception as e: + logger.warning(f"Could not validate columns for {updated_table_name}: {str(e)}") + results = [] # Process each derived table @@ -896,8 +912,8 @@ def refresh_derived_data(): if exec_result['status'] == 'ok': output_df = exec_result['content'] - # Convert to records format for JSON serialization - rows = json.loads(output_df.to_json(orient='records', date_format='iso')) + # Convert to records format efficiently + rows = output_df.to_dict(orient='records') columns = list(output_df.columns) results.append({ diff --git a/src/views/DataThread.tsx b/src/views/DataThread.tsx index 5be62bb..d199972 100644 --- a/src/views/DataThread.tsx +++ b/src/views/DataThread.tsx @@ -229,14 +229,21 @@ const RefreshDataDialog = memo<{ return 'Data must be a non-empty array of objects'; } - const newColumns = Object.keys(data[0]); const expectedColumns = new Set(tableColumns); - const actualColumns = new Set(newColumns); - // Check if all expected columns are present - for (const col of expectedColumns) { - if (!actualColumns.has(col)) { - return `Missing required column: ${col}`; + // Validate all objects have the same columns + for (const row of data) { + if (typeof row !== 'object' || row === null) { + return 'All data elements must be objects'; + } + + const rowColumns = new Set(Object.keys(row)); + + // Check if all expected columns are present + for (const col of expectedColumns) { + if (!rowColumns.has(col)) { + return `Missing required column: ${col}`; + } } } @@ -346,9 +353,7 @@ const RefreshDataDialog = memo<{ placeholder='[{"col1": "value1", "col2": "value2"}, ...]' fullWidth multiline - slotProps={{ - inputLabel: {shrink: true}, - }} + InputLabelProps={{ shrink: true }} minRows={4} maxRows={10} variant="outlined" @@ -770,9 +775,12 @@ let SingleThreadGroupView: FC<{ })); // Find all derived tables that depend on this table - const derivedTables = tables.filter(t => - t.derive?.source?.includes(selectedTableForRefresh.id) - ); + const derivedTables = tables.filter(t => { + if (!t.derive?.source) return false; + // Check if source is an array or string and handle accordingly + const sources = Array.isArray(t.derive.source) ? t.derive.source : [t.derive.source]; + return sources.includes(selectedTableForRefresh.id); + }); if (derivedTables.length > 0) { // Call the refresh-derived-data endpoint From e06101440d6a154b0c177d13901943a9a7a52818 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 15 Jan 2026 23:58:27 +0000 Subject: [PATCH 4/4] Update .gitignore to exclude egg-info directory Co-authored-by: Chenglong-MS <93549116+Chenglong-MS@users.noreply.github.com> --- .gitignore | 1 + py-src/data_formulator.egg-info/PKG-INFO | 305 ------------------ py-src/data_formulator.egg-info/SOURCES.txt | 178 ---------- .../dependency_links.txt | 1 - .../data_formulator.egg-info/entry_points.txt | 2 - py-src/data_formulator.egg-info/requires.txt | 25 -- py-src/data_formulator.egg-info/top_level.txt | 1 - 7 files changed, 1 insertion(+), 512 deletions(-) delete mode 100644 py-src/data_formulator.egg-info/PKG-INFO delete mode 100644 py-src/data_formulator.egg-info/SOURCES.txt delete mode 100644 py-src/data_formulator.egg-info/dependency_links.txt delete mode 100644 py-src/data_formulator.egg-info/entry_points.txt delete mode 100644 py-src/data_formulator.egg-info/requires.txt delete mode 100644 py-src/data_formulator.egg-info/top_level.txt diff --git a/.gitignore b/.gitignore index df0ef7c..8ca846c 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ .DS_Store build/ dist/ +*.egg-info/ ## Ignore Visual Studio temporary files, build results, and ## files generated by popular Visual Studio add-ons. diff --git a/py-src/data_formulator.egg-info/PKG-INFO b/py-src/data_formulator.egg-info/PKG-INFO deleted file mode 100644 index 119ac3e..0000000 --- a/py-src/data_formulator.egg-info/PKG-INFO +++ /dev/null @@ -1,305 +0,0 @@ -Metadata-Version: 2.4 -Name: data_formulator -Version: 0.5.1 -Summary: Data Formulator is research protoype data visualization tool powered by AI. -Author-email: Chenglong Wang , Dan Marshall -License-Expression: MIT -Project-URL: Homepage, https://github.com/microsoft/data-formulator -Project-URL: Repository, https://github.com/microsoft/data-formulator.git -Project-URL: Bug Tracker, https://github.com/microsoft/data-formulator/issues -Keywords: data visualization,LLM,AI -Classifier: Development Status :: 4 - Beta -Classifier: Programming Language :: Python -Requires-Python: >=3.9 -Description-Content-Type: text/markdown -License-File: LICENSE -Requires-Dist: jupyter -Requires-Dist: pandas -Requires-Dist: flask -Requires-Dist: flask-cors -Requires-Dist: openai -Requires-Dist: python-dotenv -Requires-Dist: vega_datasets -Requires-Dist: litellm -Requires-Dist: duckdb -Requires-Dist: numpy -Requires-Dist: vl-convert-python -Requires-Dist: backoff -Requires-Dist: beautifulsoup4 -Requires-Dist: scikit-learn -Requires-Dist: azure-identity -Requires-Dist: azure-kusto-data -Requires-Dist: azure-keyvault-secrets -Requires-Dist: azure-storage-blob -Requires-Dist: google-cloud-bigquery -Requires-Dist: google-auth -Requires-Dist: db-dtypes -Requires-Dist: boto3 -Requires-Dist: pymysql -Requires-Dist: pyodbc -Requires-Dist: pymongo -Dynamic: license-file - -

- Data Formulator icon  - Data Formulator: AI-powered Data Visualization -

- - -

- 🪄 Explore data with visualizations, powered by AI agents. -

- -

- Try Online Demo -   - Install Locally -

- -

- arXiv  - License: MIT  - YouTube  - build  - Discord -

- - - - - - - - - -## News 🔥🔥🔥 -[12-08-2025] **Data Formulator 0.5.1** — Connect more, visualize more, move faster -- 🔌 **Community data loaders**: Google BigQuery, MySQL, Postgres, MongoDB -- 📊 **New chart types**: US Map & Pie Chart (more to be added soon) -- ✏️ **Editable reports**: Refine generated reports with [Chartifact](https://github.com/microsoft/chartifact) in markdown style. [demo](https://github.com/microsoft/data-formulator/pull/200#issue-3635408217) -- ⚡ **Snappier UI**: Noticeably faster interactions across the board - -[11-07-2025] Data Formulator 0.5: Vibe with your data, in control - -- 📊 **Load (almost) any data**: load structured data, extract data from screenshots, from messy text blocks, or connect to databases. -- 🤖 **Explore data with AI agents**: - - In agent mode, provide a high-level goal and ask agents to explore data for you. - - To stay in control, directly interact with agents: ask for recommendations or specify chart designs with UI + NL inputs, and AI agents will formulate data to realize your design. - - Use data threads to control branching exploration paths: backtrack, branch, or follow up. -- ✅ **Verify AI generated results**: interact with charts and inspect data, formulas, explanations, and code. -- 📝 **Create reports to share insights**: choose charts you want to share, and ask agents to create reports grounded in data formulated throughout exploration. - -## Previous Updates - -Here are milestones that lead to the current design: -- **v0.2.2** ([Demo](https://github.com/microsoft/data-formulator/pull/176)): Goal-driven exploration with agent recommendations and performance improvements -- **v0.2.1.3/4** ([Readme](https://github.com/microsoft/data-formulator/tree/main/py-src/data_formulator/data_loader) | [Demo](https://github.com/microsoft/data-formulator/pull/155)): External data loaders (MySQL, PostgreSQL, MSSQL, Azure Data Explorer, S3, Azure Blob) -- **v0.2** ([Demos](https://github.com/microsoft/data-formulator/releases/tag/0.2)): Large data support with DuckDB integration -- **v0.1.7** ([Demos](https://github.com/microsoft/data-formulator/releases/tag/0.1.7)): Dataset anchoring for cleaner workflows -- **v0.1.6** ([Demo](https://github.com/microsoft/data-formulator/releases/tag/0.1.6)): Multi-table support with automatic joins -- **Model Support**: OpenAI, Azure, Ollama, Anthropic via [LiteLLM](https://github.com/BerriAI/litellm) ([feedback](https://github.com/microsoft/data-formulator/issues/49)) -- **Python Package**: Easy local installation ([try it](#get-started)) -- **Visualization Challenges**: Test your skills ([challenges](https://github.com/microsoft/data-formulator/issues/53)) -- **Data Extraction**: Parse data from images and text ([demo](https://github.com/microsoft/data-formulator/pull/31#issuecomment-2403652717)) -- **Initial Release**: [Blog](https://www.microsoft.com/en-us/research/blog/data-formulator-exploring-how-ai-can-help-analysts-create-rich-data-visualizations/) | [Video](https://youtu.be/3ndlwt0Wi3c) - -
-View detailed update history - -- [07-10-2025] Data Formulator 0.2.2: Start with an analysis goal - - Some key frontend performance updates. - - You can start your exploration with a goal, or, tab and see if the agent can recommend some good exploration ideas for you. [Demo](https://github.com/microsoft/data-formulator/pull/176) - -- [05-13-2025] Data Formulator 0.2.1.3/4: External Data Loader - - We introduced external data loader class to make import data easier. [Readme](https://github.com/microsoft/data-formulator/tree/main/py-src/data_formulator/data_loader) and [Demo](https://github.com/microsoft/data-formulator/pull/155) - - Current data loaders: MySQL, Azure Data Explorer (Kusto), Azure Blob and Amazon S3 (json, parquet, csv). - - [07-01-2025] Updated with: Postgresql, mssql. - - Call for action [link](https://github.com/microsoft/data-formulator/issues/156): - - Users: let us know which data source you'd like to load data from. - - Developers: let's build more data loaders. - -- [04-23-2025] Data Formulator 0.2: working with *large* data 📦📦📦 - - Explore large data by: - 1. Upload large data file to the local database (powered by [DuckDB](https://github.com/duckdb/duckdb)). - 2. Use drag-and-drop to specify charts, and Data Formulator dynamically fetches data from the database to create visualizations (with ⚡️⚡️⚡️ speeds). - 3. Work with AI agents: they generate SQL queries to transform the data to create rich visualizations! - 4. Anchor the result / follow up / create a new branch / join tables; let's dive deeper. - - Checkout the demos at [[https://github.com/microsoft/data-formulator/releases/tag/0.2]](https://github.com/microsoft/data-formulator/releases/tag/0.2) - - Improved overall system performance, and enjoy the updated derive concept functionality. - -- [03-20-2025] Data Formulator 0.1.7: Anchoring ⚓︎ - - Anchor an intermediate dataset, so that followup data analysis are built on top of the anchored data, not the original one. - - Clean a data and work with only the cleaned data; create a subset from the original data or join multiple data, and then go from there. AI agents will be less likely to get confused and work faster. ⚡️⚡️ - - Check out the demos at [[https://github.com/microsoft/data-formulator/releases/tag/0.1.7]](https://github.com/microsoft/data-formulator/releases/tag/0.1.7) - - Don't forget to update Data Formulator to test it out! - -- [02-20-2025] Data Formulator 0.1.6 released! - - Now supports working with multiple datasets at once! Tell Data Formulator which data tables you would like to use in the encoding shelf, and it will figure out how to join the tables to create a visualization to answer your question. 🪄 - - Checkout the demo at [[https://github.com/microsoft/data-formulator/releases/tag/0.1.6]](https://github.com/microsoft/data-formulator/releases/tag/0.1.6). - - Update your Data Formulator to the latest version to play with the new features. - -- [02-12-2025] More models supported now! - - Now supports OpenAI, Azure, Ollama, and Anthropic models (and more powered by [LiteLLM](https://github.com/BerriAI/litellm)); - - Models with strong code generation and instruction following capabilities are recommended (gpt-4o, claude-3-5-sonnet etc.); - - You can store API keys in `api-keys.env` to avoid typing them every time (see template `api-keys.env.template`). - - Let us know which models you have good/bad experiences with, and what models you would like to see supported! [[comment here]](https://github.com/microsoft/data-formulator/issues/49) - -- [11-07-2024] Minor fun update: data visualization challenges! - - We added a few visualization challenges with the sample datasets. Can you complete them all? [[try them out!]](https://github.com/microsoft/data-formulator/issues/53#issue-2641841252) - - Comment in the issue when you did, or share your results/questions with others! [[comment here]](https://github.com/microsoft/data-formulator/issues/53) - -- [10-11-2024] Data Formulator python package released! - - You can now install Data Formulator using Python and run it locally, easily. [[check it out]](#get-started). - - Our Codespaces configuration is also updated for fast start up ⚡️. [[try it now!]](https://codespaces.new/microsoft/data-formulator?quickstart=1) - - New experimental feature: load an image or a messy text, and ask AI to parse and clean it for you(!). [[demo]](https://github.com/microsoft/data-formulator/pull/31#issuecomment-2403652717) - -- [10-01-2024] Initial release of Data Formulator, check out our [[blog]](https://www.microsoft.com/en-us/research/blog/data-formulator-exploring-how-ai-can-help-analysts-create-rich-data-visualizations/) and [[video]](https://youtu.be/3ndlwt0Wi3c)! - -
- -## Overview - -**Data Formulator** is a Microsoft Research prototype for data exploration with visualizations powered by AI agents. - -Data Formulator enables analysts to iteratively explore and visualize data. Started with data in any format (screenshot, text, csv, or database), users can work with AI agents with a novel blended interface that combines *user interface interactions (UI)* and *natural language (NL) inputs* to communicate their intents, control branching exploration directions, and create reports to share their insights. - -## Get Started - -Play with Data Formulator with one of the following options: - -- **Option 1: Install via Python PIP** - - Use Python PIP for an easy setup experience, running locally (recommend: install it in a virtual environment). - - ```bash - # install data_formulator - pip install data_formulator - - # Run data formulator with this command - python -m data_formulator - ``` - - Data Formulator will be automatically opened in the browser at [http://localhost:5000](http://localhost:5000). - - *you can specify the port number (e.g., 8080) by `python -m data_formulator --port 8080` if the default port is occupied.* - -- **Option 2: Codespaces (5 minutes)** - - You can also run Data Formulator in Codespaces; we have everything pre-configured. For more details, see [CODESPACES.md](CODESPACES.md). - - [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/microsoft/data-formulator?quickstart=1) - -- **Option 3: Working in the developer mode** - - You can build Data Formulator locally if you prefer full control over your development environment and develop your own version on top. For detailed instructions, refer to [DEVELOPMENT.md](DEVELOPMENT.md). - - -## Using Data Formulator - -### Load Data - -Besides uploading csv, tsv or xlsx files that contain structured data, you can ask Data Formulator to extract data from screenshots, text blocks or websites, or load data from databases use connectors. Then you are ready to explore. - -image - -### Explore Data - -There are four levels to explore data based depending on whether you want more vibe or more control: - -- Level 1 (most control): Create charts with UI via drag-and-drop, if all fields to be visualized are already in the data. -- Level 2: Specify chart designs with natural language + NL. Describe how new fields should be visualized in your chart, AI will automatically transform data to realize the design. -- Level 3: Get recommendations: Ask AI agents to recommend charts directly from NL descriptions, or even directly ask for exploration ideas. -- Level 4 (most vibe): In agent mode, provide a high-level goal and let AI agents automatically plan and explore data in multiple turns. Exploration threads will be created automatically. - -https://github.com/user-attachments/assets/164aff58-9f93-4792-b8ed-9944578fbb72 - -- Level 5: In practice, leverage all of them to keep up with both vibe and control! - -### Create Reports - -Use the report builder to compose a report of the style you like, based on selected charts. Then share the reports to others! - - - -## Developers' Guide - -Follow the [developers' instructions](DEVELOPMENT.md) to build your new data analysis tools on top of Data Formulator. - -Help wanted: - -* Add more database connectors (https://github.com/microsoft/data-formulator/issues/156) -* Scaling up messy data extractor: more document types and larger files. -* Adding more chart templates (e.g., maps). -* other ideas? - -## Research Papers -* [Data Formulator 2: Iteratively Creating Rich Visualizations with AI](https://arxiv.org/abs/2408.16119) - -``` -@article{wang2024dataformulator2iteratively, - title={Data Formulator 2: Iteratively Creating Rich Visualizations with AI}, - author={Chenglong Wang and Bongshin Lee and Steven Drucker and Dan Marshall and Jianfeng Gao}, - year={2024}, - booktitle={ArXiv preprint arXiv:2408.16119}, -} -``` - -* [Data Formulator: AI-powered Concept-driven Visualization Authoring](https://arxiv.org/abs/2309.10094) - -``` -@article{wang2023data, - title={Data Formulator: AI-powered Concept-driven Visualization Authoring}, - author={Wang, Chenglong and Thompson, John and Lee, Bongshin}, - journal={IEEE Transactions on Visualization and Computer Graphics}, - year={2023}, - publisher={IEEE} -} -``` - - -## Contributing - -This project welcomes contributions and suggestions. Most contributions require you to -agree to a Contributor License Agreement (CLA) declaring that you have the right to, -and actually do, grant us the rights to use your contribution. For details, visit -https://cla.microsoft.com. - -When you submit a pull request, a CLA-bot will automatically determine whether you need -to provide a CLA and decorate the PR appropriately (e.g., label, comment). Simply follow the -instructions provided by the bot. You will only need to do this once across all repositories using our CLA. - -This project has adopted the [Microsoft Open Source Code of Conduct](https://opensource.microsoft.com/codeofconduct/). -For more information see the [Code of Conduct FAQ](https://opensource.microsoft.com/codeofconduct/faq/) -or contact [opencode@microsoft.com](mailto:opencode@microsoft.com) with any additional questions or comments. - -## Trademarks - -This project may contain trademarks or logos for projects, products, or services. Authorized use of Microsoft -trademarks or logos is subject to and must follow -[Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). -Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. -Any use of third-party trademarks or logos are subject to those third-party's policies. diff --git a/py-src/data_formulator.egg-info/SOURCES.txt b/py-src/data_formulator.egg-info/SOURCES.txt deleted file mode 100644 index bd2eaf4..0000000 --- a/py-src/data_formulator.egg-info/SOURCES.txt +++ /dev/null @@ -1,178 +0,0 @@ -LICENSE -MANIFEST.in -README.md -pyproject.toml -py-src/data_formulator/__init__.py -py-src/data_formulator/__main__.py -py-src/data_formulator/agent_routes.py -py-src/data_formulator/app.py -py-src/data_formulator/db_manager.py -py-src/data_formulator/example_datasets_config.py -py-src/data_formulator/py_sandbox.py -py-src/data_formulator/tables_routes.py -py-src/data_formulator.egg-info/PKG-INFO -py-src/data_formulator.egg-info/SOURCES.txt -py-src/data_formulator.egg-info/dependency_links.txt -py-src/data_formulator.egg-info/entry_points.txt -py-src/data_formulator.egg-info/requires.txt -py-src/data_formulator.egg-info/top_level.txt -py-src/data_formulator/agents/__init__.py -py-src/data_formulator/agents/agent_code_explanation.py -py-src/data_formulator/agents/agent_concept_derive.py -py-src/data_formulator/agents/agent_data_clean.py -py-src/data_formulator/agents/agent_data_clean_stream.py -py-src/data_formulator/agents/agent_data_load.py -py-src/data_formulator/agents/agent_exploration.py -py-src/data_formulator/agents/agent_interactive_explore.py -py-src/data_formulator/agents/agent_py_concept_derive.py -py-src/data_formulator/agents/agent_py_data_rec.py -py-src/data_formulator/agents/agent_py_data_transform.py -py-src/data_formulator/agents/agent_query_completion.py -py-src/data_formulator/agents/agent_report_gen.py -py-src/data_formulator/agents/agent_sort_data.py -py-src/data_formulator/agents/agent_sql_data_rec.py -py-src/data_formulator/agents/agent_sql_data_transform.py -py-src/data_formulator/agents/agent_utils.py -py-src/data_formulator/agents/client_utils.py -py-src/data_formulator/agents/web_utils.py -py-src/data_formulator/data_loader/__init__.py -py-src/data_formulator/data_loader/azure_blob_data_loader.py -py-src/data_formulator/data_loader/bigquery_data_loader.py -py-src/data_formulator/data_loader/external_data_loader.py -py-src/data_formulator/data_loader/kusto_data_loader.py -py-src/data_formulator/data_loader/mongodb_data_loader.py -py-src/data_formulator/data_loader/mssql_data_loader.py -py-src/data_formulator/data_loader/mysql_data_loader.py -py-src/data_formulator/data_loader/postgresql_data_loader.py -py-src/data_formulator/data_loader/s3_data_loader.py -py-src/data_formulator/dist/DataFormulator.js -py-src/data_formulator/dist/data-formulator-screenshot-v0.5.png -py-src/data_formulator/dist/data-formulator-screenshot-v0.5.webp -py-src/data_formulator/dist/data-formulator-screenshot.png -py-src/data_formulator/dist/df_gas_prices.json -py-src/data_formulator/dist/df_global_energy.json -py-src/data_formulator/dist/df_movies.json -py-src/data_formulator/dist/df_unemployment.json -py-src/data_formulator/dist/favicon.ico -py-src/data_formulator/dist/feature-agent-mode.mp4 -py-src/data_formulator/dist/feature-extract-data.mp4 -py-src/data_formulator/dist/feature-generate-report.mp4 -py-src/data_formulator/dist/feature-interactive-control.mp4 -py-src/data_formulator/dist/gas_prices-thumbnail.webp -py-src/data_formulator/dist/global_energy-thumbnail.webp -py-src/data_formulator/dist/index.html -py-src/data_formulator/dist/manifest.json -py-src/data_formulator/dist/movies-thumbnail.webp -py-src/data_formulator/dist/pip-logo.svg -py-src/data_formulator/dist/robots.txt -py-src/data_formulator/dist/screenshot-claude-performance.webp -py-src/data_formulator/dist/screenshot-movies-report.webp -py-src/data_formulator/dist/screenshot-renewable-energy.webp -py-src/data_formulator/dist/screenshot-unemployment.webp -py-src/data_formulator/dist/unemployment-thumbnail.webp -py-src/data_formulator/dist/assets/KaTeX_AMS-Regular-BQhdFMY1.woff2 -py-src/data_formulator/dist/assets/KaTeX_AMS-Regular-DMm9YOAa.woff -py-src/data_formulator/dist/assets/KaTeX_AMS-Regular-DRggAlZN.ttf -py-src/data_formulator/dist/assets/KaTeX_Caligraphic-Bold-ATXxdsX0.ttf -py-src/data_formulator/dist/assets/KaTeX_Caligraphic-Bold-BEiXGLvX.woff -py-src/data_formulator/dist/assets/KaTeX_Caligraphic-Bold-Dq_IR9rO.woff2 -py-src/data_formulator/dist/assets/KaTeX_Caligraphic-Regular-CTRA-rTL.woff -py-src/data_formulator/dist/assets/KaTeX_Caligraphic-Regular-Di6jR-x-.woff2 -py-src/data_formulator/dist/assets/KaTeX_Caligraphic-Regular-wX97UBjC.ttf -py-src/data_formulator/dist/assets/KaTeX_Fraktur-Bold-BdnERNNW.ttf -py-src/data_formulator/dist/assets/KaTeX_Fraktur-Bold-BsDP51OF.woff -py-src/data_formulator/dist/assets/KaTeX_Fraktur-Bold-CL6g_b3V.woff2 -py-src/data_formulator/dist/assets/KaTeX_Fraktur-Regular-CB_wures.ttf -py-src/data_formulator/dist/assets/KaTeX_Fraktur-Regular-CTYiF6lA.woff2 -py-src/data_formulator/dist/assets/KaTeX_Fraktur-Regular-Dxdc4cR9.woff -py-src/data_formulator/dist/assets/KaTeX_Main-Bold-Cx986IdX.woff2 -py-src/data_formulator/dist/assets/KaTeX_Main-Bold-Jm3AIy58.woff -py-src/data_formulator/dist/assets/KaTeX_Main-Bold-waoOVXN0.ttf -py-src/data_formulator/dist/assets/KaTeX_Main-BoldItalic-DxDJ3AOS.woff2 -py-src/data_formulator/dist/assets/KaTeX_Main-BoldItalic-DzxPMmG6.ttf -py-src/data_formulator/dist/assets/KaTeX_Main-BoldItalic-SpSLRI95.woff -py-src/data_formulator/dist/assets/KaTeX_Main-Italic-3WenGoN9.ttf -py-src/data_formulator/dist/assets/KaTeX_Main-Italic-BMLOBm91.woff -py-src/data_formulator/dist/assets/KaTeX_Main-Italic-NWA7e6Wa.woff2 -py-src/data_formulator/dist/assets/KaTeX_Main-Regular-B22Nviop.woff2 -py-src/data_formulator/dist/assets/KaTeX_Main-Regular-Dr94JaBh.woff -py-src/data_formulator/dist/assets/KaTeX_Main-Regular-ypZvNtVU.ttf -py-src/data_formulator/dist/assets/KaTeX_Math-BoldItalic-B3XSjfu4.ttf -py-src/data_formulator/dist/assets/KaTeX_Math-BoldItalic-CZnvNsCZ.woff2 -py-src/data_formulator/dist/assets/KaTeX_Math-BoldItalic-iY-2wyZ7.woff -py-src/data_formulator/dist/assets/KaTeX_Math-Italic-DA0__PXp.woff -py-src/data_formulator/dist/assets/KaTeX_Math-Italic-flOr_0UB.ttf -py-src/data_formulator/dist/assets/KaTeX_Math-Italic-t53AETM-.woff2 -py-src/data_formulator/dist/assets/KaTeX_SansSerif-Bold-CFMepnvq.ttf -py-src/data_formulator/dist/assets/KaTeX_SansSerif-Bold-D1sUS0GD.woff2 -py-src/data_formulator/dist/assets/KaTeX_SansSerif-Bold-DbIhKOiC.woff -py-src/data_formulator/dist/assets/KaTeX_SansSerif-Italic-C3H0VqGB.woff2 -py-src/data_formulator/dist/assets/KaTeX_SansSerif-Italic-DN2j7dab.woff -py-src/data_formulator/dist/assets/KaTeX_SansSerif-Italic-YYjJ1zSn.ttf -py-src/data_formulator/dist/assets/KaTeX_SansSerif-Regular-BNo7hRIc.ttf -py-src/data_formulator/dist/assets/KaTeX_SansSerif-Regular-CS6fqUqJ.woff -py-src/data_formulator/dist/assets/KaTeX_SansSerif-Regular-DDBCnlJ7.woff2 -py-src/data_formulator/dist/assets/KaTeX_Script-Regular-C5JkGWo-.ttf -py-src/data_formulator/dist/assets/KaTeX_Script-Regular-D3wIWfF6.woff2 -py-src/data_formulator/dist/assets/KaTeX_Script-Regular-D5yQViql.woff -py-src/data_formulator/dist/assets/KaTeX_Size1-Regular-C195tn64.woff -py-src/data_formulator/dist/assets/KaTeX_Size1-Regular-Dbsnue_I.ttf -py-src/data_formulator/dist/assets/KaTeX_Size1-Regular-mCD8mA8B.woff2 -py-src/data_formulator/dist/assets/KaTeX_Size2-Regular-B7gKUWhC.ttf -py-src/data_formulator/dist/assets/KaTeX_Size2-Regular-Dy4dx90m.woff2 -py-src/data_formulator/dist/assets/KaTeX_Size2-Regular-oD1tc_U0.woff -py-src/data_formulator/dist/assets/KaTeX_Size3-Regular-CTq5MqoE.woff -py-src/data_formulator/dist/assets/KaTeX_Size3-Regular-DgpXs0kz.ttf -py-src/data_formulator/dist/assets/KaTeX_Size4-Regular-BF-4gkZK.woff -py-src/data_formulator/dist/assets/KaTeX_Size4-Regular-DWFBv043.ttf -py-src/data_formulator/dist/assets/KaTeX_Size4-Regular-Dl5lxZxV.woff2 -py-src/data_formulator/dist/assets/KaTeX_Typewriter-Regular-C0xS9mPB.woff -py-src/data_formulator/dist/assets/KaTeX_Typewriter-Regular-CO6r4hn1.woff2 -py-src/data_formulator/dist/assets/KaTeX_Typewriter-Regular-D3Ib7_Hf.ttf -py-src/data_formulator/dist/assets/chart-icon-dotted-line-min-iiUsajLi.png -py-src/data_formulator/dist/assets/chart-icon-pie-min-OPN8AStu.png -py-src/data_formulator/dist/assets/chart-icon-us-map-min-CBKwYuWG.png -py-src/data_formulator/dist/assets/df-logo-7DDRLOPN.png -py-src/data_formulator/dist/assets/example-image-table-Dt9uSzww.png -py-src/data_formulator/dist/assets/index-Bxecun_h.css -py-src/data_formulator/dist/assets/roboto-all-300-normal-lRRuIfal.woff -py-src/data_formulator/dist/assets/roboto-all-400-normal-BZJ9QssU.woff -py-src/data_formulator/dist/assets/roboto-all-500-normal-B0NPRryQ.woff -py-src/data_formulator/dist/assets/roboto-all-700-normal-BfaNsj0k.woff -py-src/data_formulator/dist/assets/roboto-cyrillic-300-normal-D6mjswgs.woff2 -py-src/data_formulator/dist/assets/roboto-cyrillic-400-normal-DVDTZtmW.woff2 -py-src/data_formulator/dist/assets/roboto-cyrillic-500-normal-DAkZhMOh.woff2 -py-src/data_formulator/dist/assets/roboto-cyrillic-700-normal-B5ZBKWCH.woff2 -py-src/data_formulator/dist/assets/roboto-cyrillic-ext-300-normal-TzZWIuiO.woff2 -py-src/data_formulator/dist/assets/roboto-cyrillic-ext-400-normal-DORK9bGA.woff2 -py-src/data_formulator/dist/assets/roboto-cyrillic-ext-500-normal-G9W8hgzQ.woff2 -py-src/data_formulator/dist/assets/roboto-cyrillic-ext-700-normal-CsrCEJIc.woff2 -py-src/data_formulator/dist/assets/roboto-greek-300-normal-ndiuWqED.woff2 -py-src/data_formulator/dist/assets/roboto-greek-400-normal-BRWHCUYo.woff2 -py-src/data_formulator/dist/assets/roboto-greek-500-normal-CpESfwfG.woff2 -py-src/data_formulator/dist/assets/roboto-greek-700-normal-Cc2Tq8FV.woff2 -py-src/data_formulator/dist/assets/roboto-latin-300-normal-ThHrQhYb.woff2 -py-src/data_formulator/dist/assets/roboto-latin-400-normal-mTIRXP6Y.woff2 -py-src/data_formulator/dist/assets/roboto-latin-500-normal-Dxdx3aXO.woff2 -py-src/data_formulator/dist/assets/roboto-latin-700-normal-CeM5gOv8.woff2 -py-src/data_formulator/dist/assets/roboto-latin-ext-300-normal-DEsNdRC-.woff2 -py-src/data_formulator/dist/assets/roboto-latin-ext-400-normal-4bLplyDh.woff2 -py-src/data_formulator/dist/assets/roboto-latin-ext-500-normal-BWKy6SgX.woff2 -py-src/data_formulator/dist/assets/roboto-latin-ext-700-normal-BYGCo3Go.woff2 -py-src/data_formulator/dist/assets/roboto-vietnamese-300-normal-CnPrVvBs.woff2 -py-src/data_formulator/dist/assets/roboto-vietnamese-400-normal-kCRe3VZk.woff2 -py-src/data_formulator/dist/assets/roboto-vietnamese-500-normal-CcijQRVW.woff2 -py-src/data_formulator/dist/assets/roboto-vietnamese-700-normal-SekShQfT.woff2 -py-src/data_formulator/dist/assets/vendor-d3-Cp0k42Wh.js -py-src/data_formulator/dist/assets/vendor-editor-DRupLUfx.js -py-src/data_formulator/dist/assets/vendor-markdown-Bo_kcIeG.js -py-src/data_formulator/dist/assets/vendor-misc-DMdy3E0c.js -py-src/data_formulator/dist/assets/vendor-mui-DGjarBfn.js -py-src/data_formulator/dist/assets/vendor-react-Z9Lqh7fp.js -py-src/data_formulator/dist/assets/vendor-utils-C67Rz6YQ.js -py-src/data_formulator/dist/assets/vendor-vega-DlwJlvTN.js -py-src/data_formulator/security/__init__.py -py-src/data_formulator/security/query_validator.py -py-src/data_formulator/workflows/__init__.py -py-src/data_formulator/workflows/create_vl_plots.py -py-src/data_formulator/workflows/exploration_flow.py \ No newline at end of file diff --git a/py-src/data_formulator.egg-info/dependency_links.txt b/py-src/data_formulator.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/py-src/data_formulator.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/py-src/data_formulator.egg-info/entry_points.txt b/py-src/data_formulator.egg-info/entry_points.txt deleted file mode 100644 index 485deb5..0000000 --- a/py-src/data_formulator.egg-info/entry_points.txt +++ /dev/null @@ -1,2 +0,0 @@ -[console_scripts] -data_formulator = data_formulator:run_app diff --git a/py-src/data_formulator.egg-info/requires.txt b/py-src/data_formulator.egg-info/requires.txt deleted file mode 100644 index 5311365..0000000 --- a/py-src/data_formulator.egg-info/requires.txt +++ /dev/null @@ -1,25 +0,0 @@ -jupyter -pandas -flask -flask-cors -openai -python-dotenv -vega_datasets -litellm -duckdb -numpy -vl-convert-python -backoff -beautifulsoup4 -scikit-learn -azure-identity -azure-kusto-data -azure-keyvault-secrets -azure-storage-blob -google-cloud-bigquery -google-auth -db-dtypes -boto3 -pymysql -pyodbc -pymongo diff --git a/py-src/data_formulator.egg-info/top_level.txt b/py-src/data_formulator.egg-info/top_level.txt deleted file mode 100644 index 9d9ca16..0000000 --- a/py-src/data_formulator.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -data_formulator