Spaces:
Runtime error
Runtime error
Commit
·
d197237
1
Parent(s):
0e660e8
remove test
Browse files- test_notebook.ipynb +0 -509
test_notebook.ipynb
DELETED
|
@@ -1,509 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [
|
| 3 |
-
{
|
| 4 |
-
"cell_type": "code",
|
| 5 |
-
"execution_count": 4,
|
| 6 |
-
"metadata": {},
|
| 7 |
-
"outputs": [],
|
| 8 |
-
"source": [
|
| 9 |
-
"import os\n",
|
| 10 |
-
"import pandas as pd\n",
|
| 11 |
-
"import gradio as gr\n",
|
| 12 |
-
"from pydantic import BaseModel, Field\n",
|
| 13 |
-
"\n",
|
| 14 |
-
"import langchain\n",
|
| 15 |
-
"from langchain.output_parsers import PydanticOutputParser\n",
|
| 16 |
-
"from langchain.prompts import ChatPromptTemplate\n",
|
| 17 |
-
"from langchain.prompts import ChatPromptTemplate\n",
|
| 18 |
-
"from langchain.tools import PythonAstREPLTool\n",
|
| 19 |
-
"from langchain.chat_models import ChatOpenAI\n",
|
| 20 |
-
"from langchain.schema.output_parser import StrOutputParser"
|
| 21 |
-
]
|
| 22 |
-
},
|
| 23 |
-
{
|
| 24 |
-
"cell_type": "code",
|
| 25 |
-
"execution_count": 8,
|
| 26 |
-
"metadata": {},
|
| 27 |
-
"outputs": [],
|
| 28 |
-
"source": [
|
| 29 |
-
"langchain.debug = False\n",
|
| 30 |
-
"# Throwaway key with strict usage limit\n",
|
| 31 |
-
"os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
|
| 32 |
-
"pd.set_option('display.max_columns', 20)\n",
|
| 33 |
-
"pd.set_option('display.max_rows', 20)"
|
| 34 |
-
]
|
| 35 |
-
},
|
| 36 |
-
{
|
| 37 |
-
"cell_type": "code",
|
| 38 |
-
"execution_count": 9,
|
| 39 |
-
"metadata": {},
|
| 40 |
-
"outputs": [],
|
| 41 |
-
"source": [
|
| 42 |
-
"data_dir_path = os.path.join(os.getcwd(), 'data')\n",
|
| 43 |
-
"NUM_ROWS_TO_RETURN = 5\n",
|
| 44 |
-
"\n",
|
| 45 |
-
"table_1_df = pd.read_csv(os.path.join(data_dir_path, 'legal_entries_a.csv'))\n",
|
| 46 |
-
"table_2_df = pd.read_csv(os.path.join(data_dir_path, 'legal_entries_b.csv'))\n",
|
| 47 |
-
"template_df = pd.read_csv(os.path.join(data_dir_path, 'legal_template.csv'))"
|
| 48 |
-
]
|
| 49 |
-
},
|
| 50 |
-
{
|
| 51 |
-
"cell_type": "code",
|
| 52 |
-
"execution_count": 10,
|
| 53 |
-
"metadata": {},
|
| 54 |
-
"outputs": [],
|
| 55 |
-
"source": [
|
| 56 |
-
"transform_model = ChatOpenAI(\n",
|
| 57 |
-
" model_name='gpt-4',\n",
|
| 58 |
-
" temperature=0,\n",
|
| 59 |
-
")\n",
|
| 60 |
-
"\n",
|
| 61 |
-
"natural_language_model = ChatOpenAI(\n",
|
| 62 |
-
" model_name='gpt-4',\n",
|
| 63 |
-
" temperature=0.1,\n",
|
| 64 |
-
")"
|
| 65 |
-
]
|
| 66 |
-
},
|
| 67 |
-
{
|
| 68 |
-
"cell_type": "code",
|
| 69 |
-
"execution_count": 11,
|
| 70 |
-
"metadata": {},
|
| 71 |
-
"outputs": [],
|
| 72 |
-
"source": [
|
| 73 |
-
"# TODO: add validation to models, coupled with retry mechanism in chain\n",
|
| 74 |
-
"class TableMappingEntry(BaseModel):\n",
|
| 75 |
-
" '''A single row in a table mapping. Describes how a single column in a source table maps to a single column in a target table, including any necessary transformations, and their explanations.'''\n",
|
| 76 |
-
" source_column_name: str = Field(..., description=\"Name of the column in the source table.\")\n",
|
| 77 |
-
" target_column_name: str = Field(..., description=\"Name of the column in the target table, to which the source column maps.\")\n",
|
| 78 |
-
" value_transformations: str = Field(..., description=\"Transformations needed make the source values match the target values. If unncecessary, write 'NO_TRANSFORM'.\")\n",
|
| 79 |
-
" explanation: str = Field(..., description=\"One-sentence explanation of this row (source-target mapping/transformation). Include any information that might be relevant to a software engineer building an ETL pipeline with this document.\")\n",
|
| 80 |
-
"\n",
|
| 81 |
-
"class TableMapping(BaseModel):\n",
|
| 82 |
-
" '''A list of table mappings collectively describe how a source table should be transformed to match the schema of a target table.'''\n",
|
| 83 |
-
" table_mappings: list[TableMappingEntry] = Field(..., description=\"A list of table mappings.\")\n",
|
| 84 |
-
" \n",
|
| 85 |
-
"analyst_prompt_str = '''\n",
|
| 86 |
-
"You are a Data Scientist, who specializes in generating schema mappings for use by Software Engineers in ETL pipelines.\n",
|
| 87 |
-
"\n",
|
| 88 |
-
"Head of `source_csv`:\n",
|
| 89 |
-
"\n",
|
| 90 |
-
"{source_1_csv_str}\n",
|
| 91 |
-
"\n",
|
| 92 |
-
"Head of `target_csv`:\n",
|
| 93 |
-
"\n",
|
| 94 |
-
"{target_csv_str}\n",
|
| 95 |
-
"\n",
|
| 96 |
-
"Your job is to generate a thorough, precise summary of how `source_csv` should be transformed to adhere exactly to the `target_csv` schema.\n",
|
| 97 |
-
"\n",
|
| 98 |
-
"For each column in the `source_csv`, you must communicate which column in the `target_csv` it maps to, and how the values in the `source_csv` column should be transformed to match those in the `target_csv`.\n",
|
| 99 |
-
"You can assume the rows are aligned: that is, the first row in `source_csv` corresponds to the first row in `target_csv`, and so on.\n",
|
| 100 |
-
"\n",
|
| 101 |
-
"Remember:\n",
|
| 102 |
-
"1. Which column in `target_csv` it maps to. You should consider the semantic meaning of the columns, not just the character similarity. \n",
|
| 103 |
-
"\n",
|
| 104 |
-
"Example mappings:\n",
|
| 105 |
-
"- 'MunICipality' in `source_csv` should map to 'City' in `target_csv`.\n",
|
| 106 |
-
"- 'fullname' in `source_csv` should map to both 'FirstName' and 'LastName' in `target_csv`. You must explain this transformation, as well, including the target sequencing of first and last name.\n",
|
| 107 |
-
"\n",
|
| 108 |
-
"Example transformations:\n",
|
| 109 |
-
"- If date in `source_csv` is `2020-01-01` and date in `target_csv` is `01/01/2020`, explain exactly how this should be transformed and the reasoning behind it.\n",
|
| 110 |
-
"- If city in `source_csv` is `New York` and city in `target_csv` is `NEW YORK` or `NYC`, explain exactly how this should be transformed and the reasoning behind it.\n",
|
| 111 |
-
"\n",
|
| 112 |
-
"Lastly, point out any other oddities, such as duplicate columns, erroneous columns, etc.\n",
|
| 113 |
-
"\n",
|
| 114 |
-
"{format_instructions}\n",
|
| 115 |
-
"\n",
|
| 116 |
-
"Remember:\n",
|
| 117 |
-
"- Be concise: you are speaking to engineers, not customers.\n",
|
| 118 |
-
"- Be precise: all of these values are case sensitive. Consider casing for city names, exact prefixes for identifiers, ordering of people's names, etc.\n",
|
| 119 |
-
"- DO NOT include commas, quotes, or any other characters that might interfere with JSON serialization or CSV generation\n",
|
| 120 |
-
"\n",
|
| 121 |
-
"Your response:\n",
|
| 122 |
-
"'''\n",
|
| 123 |
-
"\n",
|
| 124 |
-
"def get_data_str_from_df_for_prompt(df, use_head=True, num_rows_to_return=NUM_ROWS_TO_RETURN):\n",
|
| 125 |
-
" data = df.head(num_rows_to_return) if use_head else df.tail(num_rows_to_return)\n",
|
| 126 |
-
" return f'<df>\\n{data.to_markdown()}\\n</df>'\n",
|
| 127 |
-
"\n",
|
| 128 |
-
"table_mapping_parser = PydanticOutputParser(pydantic_object=TableMapping)\n",
|
| 129 |
-
"analyst_prompt = ChatPromptTemplate.from_template(\n",
|
| 130 |
-
" template=analyst_prompt_str, \n",
|
| 131 |
-
" partial_variables={'format_instructions': table_mapping_parser.get_format_instructions()},\n",
|
| 132 |
-
")\n",
|
| 133 |
-
"\n",
|
| 134 |
-
"mapping_chain = analyst_prompt | transform_model | table_mapping_parser\n",
|
| 135 |
-
"table_mapping: TableMapping = mapping_chain.invoke({\"source_1_csv_str\": get_data_str_from_df_for_prompt(table_1_df), \"target_csv_str\": get_data_str_from_df_for_prompt(template_df)})"
|
| 136 |
-
]
|
| 137 |
-
},
|
| 138 |
-
{
|
| 139 |
-
"cell_type": "code",
|
| 140 |
-
"execution_count": 12,
|
| 141 |
-
"metadata": {},
|
| 142 |
-
"outputs": [],
|
| 143 |
-
"source": [
|
| 144 |
-
"# spec writer\n",
|
| 145 |
-
"spec_writer_prompt_str = '''\n",
|
| 146 |
-
"You are an expert product manager and technical writer for a software company, who generates clean, concise, precise specification documents for your employees.\n",
|
| 147 |
-
"Your job is to write a plaintext spec for a python script for a software engineer to develop a component within an ETL pipeline.\n",
|
| 148 |
-
"\n",
|
| 149 |
-
"This document must include 100% of the information your employee needs to write a successful script to transform source_df to target_df.\n",
|
| 150 |
-
"However, DO NOT include the original table_mapping. Your job is to translate everything into natural language.\n",
|
| 151 |
-
"\n",
|
| 152 |
-
"Here is a stringified pydantic object that describes the mapping and the transformation steps:\n",
|
| 153 |
-
"\n",
|
| 154 |
-
"{table_mapping}\n",
|
| 155 |
-
"\n",
|
| 156 |
-
"You must translate this into clean, concise, and complete instructions for your employee.\n",
|
| 157 |
-
"\n",
|
| 158 |
-
"This document should be formatted like a technical document in plaintext. Do not include code or data.\n",
|
| 159 |
-
"\n",
|
| 160 |
-
"This document must include:\n",
|
| 161 |
-
"- Overview\n",
|
| 162 |
-
"- Input (source_df), Output (target_df)\n",
|
| 163 |
-
"- Exact column mapping\n",
|
| 164 |
-
"- Exact transformation steps for each column\n",
|
| 165 |
-
"- Precise instructions for what this script should do\n",
|
| 166 |
-
"- Script input: Pandas Dataframe named `source_df`.\n",
|
| 167 |
-
"- Script output: Pandas Dataframe named `target_df`.\n",
|
| 168 |
-
"- Do not modify the source_df. Create a new dataframe named target_df.\n",
|
| 169 |
-
"- This script should never include the source data. It should only include the transormations required to create the target_df.\n",
|
| 170 |
-
"- Return the target_df.\n",
|
| 171 |
-
"\n",
|
| 172 |
-
"You will never see this employee. They cannot contact you. You will never see their code. You must include 100% of the information they need to write a successful script.\n",
|
| 173 |
-
"Remember:\n",
|
| 174 |
-
"- Clean: No extra information, no formatting aside from plaintext\n",
|
| 175 |
-
"- Concise: Your employees benefit from brevity\n",
|
| 176 |
-
"- Precise: your words must be unambiguous, exact, and full represent a perfect translation of the table_mapping object.\n",
|
| 177 |
-
"\n",
|
| 178 |
-
"Your response:\n",
|
| 179 |
-
"'''\n",
|
| 180 |
-
"spec_writer_prompt = ChatPromptTemplate.from_template(spec_writer_prompt_str)\n",
|
| 181 |
-
"\n",
|
| 182 |
-
"spec_writer_chain = spec_writer_prompt | natural_language_model | StrOutputParser()\n",
|
| 183 |
-
"spec_str = spec_writer_chain.invoke({\"table_mapping\": str(table_mapping)})"
|
| 184 |
-
]
|
| 185 |
-
},
|
| 186 |
-
{
|
| 187 |
-
"cell_type": "code",
|
| 188 |
-
"execution_count": 19,
|
| 189 |
-
"metadata": {},
|
| 190 |
-
"outputs": [],
|
| 191 |
-
"source": [
|
| 192 |
-
"engineer_prompt_str = '''\n",
|
| 193 |
-
"You are a Senior Software Engineer, who specializes in writing Python code for ETL pipelines.\n",
|
| 194 |
-
"Your Product Manager has written a spec for a new transormation script. You must follow this document exactly, write python code that implements the spec, validate that code, and then return it.\n",
|
| 195 |
-
"Your output should only be python code in Markdown format, eg:\n",
|
| 196 |
-
" ```python\n",
|
| 197 |
-
" ....\n",
|
| 198 |
-
" ```\"\"\"\n",
|
| 199 |
-
"Do not return any additional text / explanation. This code will be executed by a robot without human intervention.\n",
|
| 200 |
-
"\n",
|
| 201 |
-
"Here is the technical specification for your code:\n",
|
| 202 |
-
"\n",
|
| 203 |
-
"{spec_str}\n",
|
| 204 |
-
"\n",
|
| 205 |
-
"Remember: return only clean python code in markdown format. The python interpreter running this code will already have `source_df` as a local variable.\n",
|
| 206 |
-
"\n",
|
| 207 |
-
"Your must return `target_df` at the end.\n",
|
| 208 |
-
"'''\n",
|
| 209 |
-
"engineer_prompt = ChatPromptTemplate.from_template(engineer_prompt_str)\n",
|
| 210 |
-
"\n",
|
| 211 |
-
"# engineer_chain = engineer_prompt | transform_model | StrOutputParser() | PythonAstREPLTool(locals={'source_df': table_1_df}).run\n",
|
| 212 |
-
"# table_1_df_transformed = engineer_chain.invoke({\"spec_str\": spec_str})\n",
|
| 213 |
-
"engineer_chain = engineer_prompt | transform_model | StrOutputParser()\n",
|
| 214 |
-
"transform_code = engineer_chain.invoke({\"spec_str\": spec_str})"
|
| 215 |
-
]
|
| 216 |
-
},
|
| 217 |
-
{
|
| 218 |
-
"cell_type": "code",
|
| 219 |
-
"execution_count": 17,
|
| 220 |
-
"metadata": {},
|
| 221 |
-
"outputs": [
|
| 222 |
-
{
|
| 223 |
-
"name": "stdout",
|
| 224 |
-
"output_type": "stream",
|
| 225 |
-
"text": [
|
| 226 |
-
"Running on local URL: http://127.0.0.1:7874\n",
|
| 227 |
-
"\n",
|
| 228 |
-
"To create a public link, set `share=True` in `launch()`.\n"
|
| 229 |
-
]
|
| 230 |
-
},
|
| 231 |
-
{
|
| 232 |
-
"data": {
|
| 233 |
-
"text/html": [
|
| 234 |
-
"<div><iframe src=\"http://127.0.0.1:7874/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
| 235 |
-
],
|
| 236 |
-
"text/plain": [
|
| 237 |
-
"<IPython.core.display.HTML object>"
|
| 238 |
-
]
|
| 239 |
-
},
|
| 240 |
-
"metadata": {},
|
| 241 |
-
"output_type": "display_data"
|
| 242 |
-
},
|
| 243 |
-
{
|
| 244 |
-
"data": {
|
| 245 |
-
"text/plain": []
|
| 246 |
-
},
|
| 247 |
-
"execution_count": 17,
|
| 248 |
-
"metadata": {},
|
| 249 |
-
"output_type": "execute_result"
|
| 250 |
-
}
|
| 251 |
-
],
|
| 252 |
-
"source": [
|
| 253 |
-
"def show_mapping(file):\n",
|
| 254 |
-
" # TODO: add code\n",
|
| 255 |
-
" return pd.DataFrame(table_mapping.dict()['table_mappings'])\n",
|
| 256 |
-
"demo = gr.Interface(fn=show_mapping, inputs=[\"file\"], outputs='dataframe')\n",
|
| 257 |
-
"demo.launch()"
|
| 258 |
-
]
|
| 259 |
-
},
|
| 260 |
-
{
|
| 261 |
-
"cell_type": "code",
|
| 262 |
-
"execution_count": 34,
|
| 263 |
-
"metadata": {},
|
| 264 |
-
"outputs": [
|
| 265 |
-
{
|
| 266 |
-
"name": "stdout",
|
| 267 |
-
"output_type": "stream",
|
| 268 |
-
"text": [
|
| 269 |
-
"Running on local URL: http://127.0.0.1:7885\n",
|
| 270 |
-
"\n",
|
| 271 |
-
"Thanks for being a Gradio user! If you have questions or feedback, please join our Discord server and chat with us: https://discord.gg/feTf9x3ZSB\n",
|
| 272 |
-
"\n",
|
| 273 |
-
"To create a public link, set `share=True` in `launch()`.\n"
|
| 274 |
-
]
|
| 275 |
-
},
|
| 276 |
-
{
|
| 277 |
-
"data": {
|
| 278 |
-
"text/html": [
|
| 279 |
-
"<div><iframe src=\"http://127.0.0.1:7885/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
| 280 |
-
],
|
| 281 |
-
"text/plain": [
|
| 282 |
-
"<IPython.core.display.HTML object>"
|
| 283 |
-
]
|
| 284 |
-
},
|
| 285 |
-
"metadata": {},
|
| 286 |
-
"output_type": "display_data"
|
| 287 |
-
},
|
| 288 |
-
{
|
| 289 |
-
"data": {
|
| 290 |
-
"text/plain": []
|
| 291 |
-
},
|
| 292 |
-
"execution_count": 34,
|
| 293 |
-
"metadata": {},
|
| 294 |
-
"output_type": "execute_result"
|
| 295 |
-
}
|
| 296 |
-
],
|
| 297 |
-
"source": [
|
| 298 |
-
"def _sanitize_python_output(text: str):\n",
|
| 299 |
-
" _, after = text.split(\"```python\")\n",
|
| 300 |
-
" return after.split(\"```\")[0]\n",
|
| 301 |
-
"\n",
|
| 302 |
-
"def show_code(button):\n",
|
| 303 |
-
" # TODO: add code\n",
|
| 304 |
-
" return _sanitize_python_output(transform_code)\n",
|
| 305 |
-
"check_mapping_text = 'How does that mapping look? \\n\\nFeel free to update it: your changes will be incorporated! \\n\\nWhen you are ready, click the Submit below, and the mapping code will be generated for your approval.'\n",
|
| 306 |
-
"demo = gr.Interface(fn=show_code, inputs=[gr.Textbox(value=check_mapping_text, interactive=False)], outputs=[gr.Code(language=\"python\")])\n",
|
| 307 |
-
"demo.launch()"
|
| 308 |
-
]
|
| 309 |
-
},
|
| 310 |
-
{
|
| 311 |
-
"cell_type": "code",
|
| 312 |
-
"execution_count": 41,
|
| 313 |
-
"metadata": {},
|
| 314 |
-
"outputs": [
|
| 315 |
-
{
|
| 316 |
-
"name": "stderr",
|
| 317 |
-
"output_type": "stream",
|
| 318 |
-
"text": [
|
| 319 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/4236222443.py:4: GradioDeprecationWarning: `layout` parameter is deprecated, and it has no effect\n",
|
| 320 |
-
" demo = gr.Interface(\n"
|
| 321 |
-
]
|
| 322 |
-
},
|
| 323 |
-
{
|
| 324 |
-
"name": "stdout",
|
| 325 |
-
"output_type": "stream",
|
| 326 |
-
"text": [
|
| 327 |
-
"Running on local URL: http://127.0.0.1:7892\n",
|
| 328 |
-
"\n",
|
| 329 |
-
"To create a public link, set `share=True` in `launch()`.\n"
|
| 330 |
-
]
|
| 331 |
-
},
|
| 332 |
-
{
|
| 333 |
-
"data": {
|
| 334 |
-
"text/html": [
|
| 335 |
-
"<div><iframe src=\"http://127.0.0.1:7892/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
| 336 |
-
],
|
| 337 |
-
"text/plain": [
|
| 338 |
-
"<IPython.core.display.HTML object>"
|
| 339 |
-
]
|
| 340 |
-
},
|
| 341 |
-
"metadata": {},
|
| 342 |
-
"output_type": "display_data"
|
| 343 |
-
},
|
| 344 |
-
{
|
| 345 |
-
"data": {
|
| 346 |
-
"text/plain": []
|
| 347 |
-
},
|
| 348 |
-
"execution_count": 41,
|
| 349 |
-
"metadata": {},
|
| 350 |
-
"output_type": "execute_result"
|
| 351 |
-
}
|
| 352 |
-
],
|
| 353 |
-
"source": [
|
| 354 |
-
"def get_transformed_table(button):\n",
|
| 355 |
-
" return template_df, PythonAstREPLTool(locals={'source_df': table_1_df}).run(transform_code)\n",
|
| 356 |
-
"check_mapping_text = 'How does that code look? \\n\\nWhen you are ready, click the Submit button and the transformed source file will be transformed.'\n",
|
| 357 |
-
"demo = gr.Interface(\n",
|
| 358 |
-
" fn=get_transformed_table,\n",
|
| 359 |
-
" inputs=[gr.Textbox(value=check_mapping_text, interactive=False)],\n",
|
| 360 |
-
" outputs=[gr.Dataframe(label='Template Table (target)'), gr.Dataframe(label='Table 1 (transformed)')],\n",
|
| 361 |
-
" layout=\"column\",\n",
|
| 362 |
-
" examples=[[1]],\n",
|
| 363 |
-
")\n",
|
| 364 |
-
"demo.launch()"
|
| 365 |
-
]
|
| 366 |
-
},
|
| 367 |
-
{
|
| 368 |
-
"cell_type": "code",
|
| 369 |
-
"execution_count": 89,
|
| 370 |
-
"metadata": {},
|
| 371 |
-
"outputs": [
|
| 372 |
-
{
|
| 373 |
-
"name": "stderr",
|
| 374 |
-
"output_type": "stream",
|
| 375 |
-
"text": [
|
| 376 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:18: GradioDeprecationWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components\n",
|
| 377 |
-
" gr.inputs.File(label=\"Template\", type=\"file\", file_count='single')\n",
|
| 378 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:18: GradioDeprecationWarning: `optional` parameter is deprecated, and it has no effect\n",
|
| 379 |
-
" gr.inputs.File(label=\"Template\", type=\"file\", file_count='single')\n",
|
| 380 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:18: GradioDeprecationWarning: `keep_filename` parameter is deprecated, and it has no effect\n",
|
| 381 |
-
" gr.inputs.File(label=\"Template\", type=\"file\", file_count='single')\n",
|
| 382 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:19: GradioDeprecationWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components\n",
|
| 383 |
-
" gr.inputs.File(label=\"Source\", type=\"file\", file_count='single')\n",
|
| 384 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:19: GradioDeprecationWarning: `optional` parameter is deprecated, and it has no effect\n",
|
| 385 |
-
" gr.inputs.File(label=\"Source\", type=\"file\", file_count='single')\n",
|
| 386 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:19: GradioDeprecationWarning: `keep_filename` parameter is deprecated, and it has no effect\n",
|
| 387 |
-
" gr.inputs.File(label=\"Source\", type=\"file\", file_count='single')\n",
|
| 388 |
-
"/Users/andybryant/Desktop/projects/zero-mapper/venv/lib/python3.9/site-packages/gradio/utils.py:841: UserWarning: Expected 1 arguments for function <function generate_code at 0x12cb559d0>, received 0.\n",
|
| 389 |
-
" warnings.warn(\n",
|
| 390 |
-
"/Users/andybryant/Desktop/projects/zero-mapper/venv/lib/python3.9/site-packages/gradio/utils.py:845: UserWarning: Expected at least 1 arguments for function <function generate_code at 0x12cb559d0>, received 0.\n",
|
| 391 |
-
" warnings.warn(\n",
|
| 392 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:39: GradioUnusedKwargWarning: You have unused kwarg parameters in Button, please remove them: {'trigger': 'transform_source'}\n",
|
| 393 |
-
" gr.Button(value=\"Transform Source\", variant=\"primary\", trigger=\"transform_source\")\n",
|
| 394 |
-
"/var/folders/lx/3ksh07r96gn2v7b8mb__3mpc0000gn/T/ipykernel_94012/2180252060.py:40: GradioUnusedKwargWarning: You have unused kwarg parameters in Button, please remove them: {'trigger': 'save_code'}\n",
|
| 395 |
-
" gr.Button(value=\"Save Code\", variant=\"secondary\", trigger=\"save_code\")\n"
|
| 396 |
-
]
|
| 397 |
-
},
|
| 398 |
-
{
|
| 399 |
-
"name": "stdout",
|
| 400 |
-
"output_type": "stream",
|
| 401 |
-
"text": [
|
| 402 |
-
"Running on local URL: http://127.0.0.1:7934\n",
|
| 403 |
-
"\n",
|
| 404 |
-
"To create a public link, set `share=True` in `launch()`.\n"
|
| 405 |
-
]
|
| 406 |
-
},
|
| 407 |
-
{
|
| 408 |
-
"data": {
|
| 409 |
-
"text/html": [
|
| 410 |
-
"<div><iframe src=\"http://127.0.0.1:7934/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
| 411 |
-
],
|
| 412 |
-
"text/plain": [
|
| 413 |
-
"<IPython.core.display.HTML object>"
|
| 414 |
-
]
|
| 415 |
-
},
|
| 416 |
-
"metadata": {},
|
| 417 |
-
"output_type": "display_data"
|
| 418 |
-
},
|
| 419 |
-
{
|
| 420 |
-
"data": {
|
| 421 |
-
"text/plain": []
|
| 422 |
-
},
|
| 423 |
-
"execution_count": 89,
|
| 424 |
-
"metadata": {},
|
| 425 |
-
"output_type": "execute_result"
|
| 426 |
-
}
|
| 427 |
-
],
|
| 428 |
-
"source": [
|
| 429 |
-
"def _sanitize_python_output(text: str):\n",
|
| 430 |
-
" _, after = text.split(\"```python\")\n",
|
| 431 |
-
" return after.split(\"```\")[0]\n",
|
| 432 |
-
"\n",
|
| 433 |
-
"def do_stuff(val):\n",
|
| 434 |
-
" print(val)\n",
|
| 435 |
-
"\n",
|
| 436 |
-
"def generate_code(val):\n",
|
| 437 |
-
" return '# check this out'\n",
|
| 438 |
-
"\n",
|
| 439 |
-
"def save_csv_file(df, filename):\n",
|
| 440 |
-
" df.to_csv(os.path.join(data_dir_path, 'output', filename) + '.csv')\n",
|
| 441 |
-
"\n",
|
| 442 |
-
"with gr.Blocks() as demo:\n",
|
| 443 |
-
" with gr.Column():\n",
|
| 444 |
-
" gr.Markdown(\"## To begin, upload a Template CSV and a Source CSV file.\")\n",
|
| 445 |
-
" with gr.Row():\n",
|
| 446 |
-
" gr.inputs.File(label=\"Template\", type=\"file\", file_count='single')\n",
|
| 447 |
-
" gr.inputs.File(label=\"Source\", type=\"file\", file_count='single')\n",
|
| 448 |
-
"\n",
|
| 449 |
-
" with gr.Column():\n",
|
| 450 |
-
" gr.Markdown(\"## Mapping from Source to Template\")\n",
|
| 451 |
-
" with gr.Row():\n",
|
| 452 |
-
" table_mapping_df = pd.DataFrame(table_mapping.dict()['table_mappings'])\n",
|
| 453 |
-
" gr.DataFrame(value=table_mapping_df)\n",
|
| 454 |
-
" save_mapping_btn = gr.Button(value=\"Save Mapping\", variant=\"secondary\")\n",
|
| 455 |
-
" save_mapping_btn.click(fn=lambda : save_csv_file(table_mapping_df, 'table_mapping'))\n",
|
| 456 |
-
"\n",
|
| 457 |
-
" with gr.Row():\n",
|
| 458 |
-
" test = gr.Markdown()\n",
|
| 459 |
-
" generate_code_btn = gr.Button(value=\"Generate Code from Mapping\", variant=\"primary\")\n",
|
| 460 |
-
" generate_code_btn.click(fn=generate_code, outputs=test)\n",
|
| 461 |
-
"\n",
|
| 462 |
-
" with gr.Column():\n",
|
| 463 |
-
" gr.Markdown(\"## Here is the code that will be used to transform the source file into the template schema:\")\n",
|
| 464 |
-
" gr.Code(language=\"python\", value=_sanitize_python_output(transform_code))\n",
|
| 465 |
-
"\n",
|
| 466 |
-
" with gr.Row():\n",
|
| 467 |
-
" gr.Button(value=\"Transform Source\", variant=\"primary\", trigger=\"transform_source\")\n",
|
| 468 |
-
" gr.Button(value=\"Save Code\", variant=\"secondary\", trigger=\"save_code\")\n",
|
| 469 |
-
" \n",
|
| 470 |
-
" with gr.Row():\n",
|
| 471 |
-
" with gr.Column():\n",
|
| 472 |
-
" gr.Dataframe(label='Target (template)', type='pandas', value=template_df)\n",
|
| 473 |
-
" with gr.Column():\n",
|
| 474 |
-
" gr.Dataframe(label='Source (transformed)', type='pandas', value=PythonAstREPLTool(locals={'source_df': table_1_df}).run(transform_code))\n",
|
| 475 |
-
"\n",
|
| 476 |
-
"demo.launch()"
|
| 477 |
-
]
|
| 478 |
-
},
|
| 479 |
-
{
|
| 480 |
-
"cell_type": "code",
|
| 481 |
-
"execution_count": null,
|
| 482 |
-
"metadata": {},
|
| 483 |
-
"outputs": [],
|
| 484 |
-
"source": []
|
| 485 |
-
}
|
| 486 |
-
],
|
| 487 |
-
"metadata": {
|
| 488 |
-
"kernelspec": {
|
| 489 |
-
"display_name": "venv",
|
| 490 |
-
"language": "python",
|
| 491 |
-
"name": "python3"
|
| 492 |
-
},
|
| 493 |
-
"language_info": {
|
| 494 |
-
"codemirror_mode": {
|
| 495 |
-
"name": "ipython",
|
| 496 |
-
"version": 3
|
| 497 |
-
},
|
| 498 |
-
"file_extension": ".py",
|
| 499 |
-
"mimetype": "text/x-python",
|
| 500 |
-
"name": "python",
|
| 501 |
-
"nbconvert_exporter": "python",
|
| 502 |
-
"pygments_lexer": "ipython3",
|
| 503 |
-
"version": "3.9.6"
|
| 504 |
-
},
|
| 505 |
-
"orig_nbformat": 4
|
| 506 |
-
},
|
| 507 |
-
"nbformat": 4,
|
| 508 |
-
"nbformat_minor": 2
|
| 509 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|