GradioPDFComparer.original from qwen.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. import gradio as gr
  2. import camelot
  3. import pandas as pd
  4. # Global variables to store PDF data
  5. pdf1_data = None
  6. pdf2_data = None
  7. dropdown_options = []
  8. table_data = []
  9. def process_pdf(file_path):
  10. """Extract tables from PDF using Camelot"""
  11. try:
  12. tables = camelot.read_pdf(file_path)
  13. return [table.df for table in tables]
  14. except Exception as e:
  15. return f"Error: {str(e)}"
  16. def update_dropdown(pdf1, pdf2):
  17. """Process both PDFs and update dropdown options"""
  18. global pdf1_data, pdf2_data, dropdown_options
  19. # Process first PDF
  20. if pdf1:
  21. result = process_pdf(pdf1.name)
  22. if isinstance(result, list):
  23. pdf1_data = result
  24. else:
  25. return [], f"Error processing first PDF: {result}", []
  26. # Process second PDF
  27. if pdf2:
  28. result = process_pdf(pdf2.name)
  29. if isinstance(result, list):
  30. pdf2_data = result
  31. else:
  32. return [], f"Error processing second PDF: {result}", []
  33. # Generate dropdown options when both PDFs are processed
  34. if pdf1_data and pdf2_data:
  35. # Create options combining tables from both PDFs
  36. dropdown_options = []
  37. for i, table in enumerate(pdf1_data):
  38. dropdown_options.append(f"PDF1-Table{i+1}")
  39. for i, table in enumerate(pdf2_data):
  40. dropdown_options.append(f"PDF2-Table{i+1}")
  41. return dropdown_options, "Both PDFs processed successfully", []
  42. return [], "Upload both PDFs to see options", []
  43. def update_table(selected_option):
  44. """Update table based on selected option"""
  45. global pdf1_data, pdf2_data
  46. if not selected_option:
  47. return []
  48. # Determine which PDF and table to use
  49. if selected_option.startswith("PDF1"):
  50. idx = int(selected_option.split("Table")[1]) - 1
  51. table_df = pdf1_data[idx]
  52. else:
  53. idx = int(selected_option.split("Table")[1]) - 1
  54. table_df = pdf2_data[idx]
  55. # Convert to list of lists for Gradio
  56. headers = [f"Col {i+1}" for i in range(len(table_df.columns))]
  57. rows = [headers] + table_df.values.tolist()
  58. return rows
  59. with gr.Blocks() as app:
  60. gr.Markdown("## PDF Table Comparison Tool")
  61. with gr.Row():
  62. # Left column for first PDF
  63. with gr.Column():
  64. pdf1_input = gr.File(label="Upload First PDF", file_types=[".pdf"])
  65. pdf1_preview = gr.File(label="First PDF Preview")
  66. # Right column for second PDF
  67. with gr.Column():
  68. pdf2_input = gr.File(label="Upload Second PDF", file_types=[".pdf"])
  69. pdf2_preview = gr.File(label="Second PDF Preview")
  70. # Error/warning textbox
  71. error_output = gr.Textbox(label="Status", interactive=False)
  72. # Dropdown for selecting tables
  73. dropdown = gr.Dropdown(label="Select Table", choices=[])
  74. # Output table
  75. output_table = gr.Dataframe(label="Table Content", headers=["Column 1", "Column 2", "Column 3"])
  76. # Event handling
  77. pdf1_input.change(
  78. fn=lambda x: x,
  79. inputs=pdf1_input,
  80. outputs=pdf1_preview
  81. )
  82. pdf2_input.change(
  83. fn=lambda x: x,
  84. inputs=pdf2_input,
  85. outputs=pdf2_preview
  86. )
  87. # Update dropdown when both files are uploaded
  88. inputs = [pdf1_input, pdf2_input]
  89. pdf1_input.change(update_dropdown, inputs=inputs, outputs=[dropdown, error_output, output_table])
  90. pdf2_input.change(update_dropdown, inputs=inputs, outputs=[dropdown, error_output, output_table])
  91. # Update table when dropdown selection changes
  92. dropdown.change(update_table, inputs=dropdown, outputs=output_table)
  93. if __name__ == "__main__":
  94. app.launch()