import gradio as gr import camelot import pandas as pd # Global variables to store PDF data pdf1_data = None pdf2_data = None dropdown_options = [] table_data = [] def process_pdf(file_path): """Extract tables from PDF using Camelot""" try: tables = camelot.read_pdf(file_path) return [table.df for table in tables] except Exception as e: return f"Error: {str(e)}" def update_dropdown(pdf1, pdf2): """Process both PDFs and update dropdown options""" global pdf1_data, pdf2_data, dropdown_options # Process first PDF if pdf1: result = process_pdf(pdf1.name) if isinstance(result, list): pdf1_data = result else: return [], f"Error processing first PDF: {result}", [] # Process second PDF if pdf2: result = process_pdf(pdf2.name) if isinstance(result, list): pdf2_data = result else: return [], f"Error processing second PDF: {result}", [] # Generate dropdown options when both PDFs are processed if pdf1_data and pdf2_data: # Create options combining tables from both PDFs dropdown_options = [] for i, table in enumerate(pdf1_data): dropdown_options.append(f"PDF1-Table{i+1}") for i, table in enumerate(pdf2_data): dropdown_options.append(f"PDF2-Table{i+1}") return dropdown_options, "Both PDFs processed successfully", [] return [], "Upload both PDFs to see options", [] def update_table(selected_option): """Update table based on selected option""" global pdf1_data, pdf2_data if not selected_option: return [] # Determine which PDF and table to use if selected_option.startswith("PDF1"): idx = int(selected_option.split("Table")[1]) - 1 table_df = pdf1_data[idx] else: idx = int(selected_option.split("Table")[1]) - 1 table_df = pdf2_data[idx] # Convert to list of lists for Gradio headers = [f"Col {i+1}" for i in range(len(table_df.columns))] rows = [headers] + table_df.values.tolist() return rows with gr.Blocks() as app: gr.Markdown("## PDF Table Comparison Tool") with gr.Row(): # Left column for first PDF with gr.Column(): pdf1_input = gr.File(label="Upload First PDF", file_types=[".pdf"]) pdf1_preview = gr.File(label="First PDF Preview") # Right column for second PDF with gr.Column(): pdf2_input = gr.File(label="Upload Second PDF", file_types=[".pdf"]) pdf2_preview = gr.File(label="Second PDF Preview") # Error/warning textbox error_output = gr.Textbox(label="Status", interactive=False) # Dropdown for selecting tables dropdown = gr.Dropdown(label="Select Table", choices=[]) # Output table output_table = gr.Dataframe(label="Table Content", headers=["Column 1", "Column 2", "Column 3"]) # Event handling pdf1_input.change( fn=lambda x: x, inputs=pdf1_input, outputs=pdf1_preview ) pdf2_input.change( fn=lambda x: x, inputs=pdf2_input, outputs=pdf2_preview ) # Update dropdown when both files are uploaded inputs = [pdf1_input, pdf2_input] pdf1_input.change(update_dropdown, inputs=inputs, outputs=[dropdown, error_output, output_table]) pdf2_input.change(update_dropdown, inputs=inputs, outputs=[dropdown, error_output, output_table]) # Update table when dropdown selection changes dropdown.change(update_table, inputs=dropdown, outputs=output_table) if __name__ == "__main__": app.launch()