| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116 |
- import gradio as gr
- import camelot
- import pandas as pd
- # Global variables to store PDF data
- pdf1_data = None
- pdf2_data = None
- dropdown_options = []
- table_data = []
- def process_pdf(file_path):
- """Extract tables from PDF using Camelot"""
- try:
- tables = camelot.read_pdf(file_path)
- return [table.df for table in tables]
- except Exception as e:
- return f"Error: {str(e)}"
- def update_dropdown(pdf1, pdf2):
- """Process both PDFs and update dropdown options"""
- global pdf1_data, pdf2_data, dropdown_options
-
- # Process first PDF
- if pdf1:
- result = process_pdf(pdf1.name)
- if isinstance(result, list):
- pdf1_data = result
- else:
- return [], f"Error processing first PDF: {result}", []
-
- # Process second PDF
- if pdf2:
- result = process_pdf(pdf2.name)
- if isinstance(result, list):
- pdf2_data = result
- else:
- return [], f"Error processing second PDF: {result}", []
-
- # Generate dropdown options when both PDFs are processed
- if pdf1_data and pdf2_data:
- # Create options combining tables from both PDFs
- dropdown_options = []
- for i, table in enumerate(pdf1_data):
- dropdown_options.append(f"PDF1-Table{i+1}")
- for i, table in enumerate(pdf2_data):
- dropdown_options.append(f"PDF2-Table{i+1}")
- return dropdown_options, "Both PDFs processed successfully", []
-
- return [], "Upload both PDFs to see options", []
- def update_table(selected_option):
- """Update table based on selected option"""
- global pdf1_data, pdf2_data
-
- if not selected_option:
- return []
-
- # Determine which PDF and table to use
- if selected_option.startswith("PDF1"):
- idx = int(selected_option.split("Table")[1]) - 1
- table_df = pdf1_data[idx]
- else:
- idx = int(selected_option.split("Table")[1]) - 1
- table_df = pdf2_data[idx]
-
- # Convert to list of lists for Gradio
- headers = [f"Col {i+1}" for i in range(len(table_df.columns))]
- rows = [headers] + table_df.values.tolist()
- return rows
- with gr.Blocks() as app:
- gr.Markdown("## PDF Table Comparison Tool")
-
- with gr.Row():
- # Left column for first PDF
- with gr.Column():
- pdf1_input = gr.File(label="Upload First PDF", file_types=[".pdf"])
- pdf1_preview = gr.File(label="First PDF Preview")
-
- # Right column for second PDF
- with gr.Column():
- pdf2_input = gr.File(label="Upload Second PDF", file_types=[".pdf"])
- pdf2_preview = gr.File(label="Second PDF Preview")
-
- # Error/warning textbox
- error_output = gr.Textbox(label="Status", interactive=False)
-
- # Dropdown for selecting tables
- dropdown = gr.Dropdown(label="Select Table", choices=[])
-
- # Output table
- output_table = gr.Dataframe(label="Table Content", headers=["Column 1", "Column 2", "Column 3"])
- # Event handling
- pdf1_input.change(
- fn=lambda x: x,
- inputs=pdf1_input,
- outputs=pdf1_preview
- )
-
- pdf2_input.change(
- fn=lambda x: x,
- inputs=pdf2_input,
- outputs=pdf2_preview
- )
-
- # Update dropdown when both files are uploaded
- inputs = [pdf1_input, pdf2_input]
- pdf1_input.change(update_dropdown, inputs=inputs, outputs=[dropdown, error_output, output_table])
- pdf2_input.change(update_dropdown, inputs=inputs, outputs=[dropdown, error_output, output_table])
-
- # Update table when dropdown selection changes
- dropdown.change(update_table, inputs=dropdown, outputs=output_table)
- if __name__ == "__main__":
- app.launch()
|