import os import json import shutil from pathlib import Path def copy_files_from_groundtruth(): """ Reads JSON files from the groundtruth directory, finds matching files in the docs directory, and copies them to a new directory. """ base_dir = Path("/home/jackjack/test/ocr_macro/workspace/shared_sessions/b66123d5") groundtruth_dir = base_dir / "groundtruth" docs_dir = base_dir / "docs" destination_dir = base_dir / "aa" # Ensure destination directory exists destination_dir.mkdir(exist_ok=True) print(f"Destination directory created or already exists: {destination_dir}") if not groundtruth_dir.is_dir(): print(f"Error: Groundtruth directory not found at {groundtruth_dir}") return if not docs_dir.is_dir(): print(f"Error: Docs directory not found at {docs_dir}") return json_files = list(groundtruth_dir.glob("*.json")) if not json_files: print(f"No JSON files found in {groundtruth_dir}") return print(f"Found {len(json_files)} JSON files to process...") copied_count = 0 not_found_count = 0 for json_file in json_files: try: with open(json_file, 'r', encoding='utf-8') as f: data = json.load(f) filename_to_find = data.get("filename") if not filename_to_find: print(f"Warning: 'filename' key not found in {json_file.name}. Skipping.") continue source_file_path = docs_dir / filename_to_find destination_file_path = destination_dir / filename_to_find if source_file_path.exists(): print(f"Copying '{source_file_path}' to '{destination_file_path}'...") shutil.copy(source_file_path, destination_file_path) copied_count += 1 else: print(f"Warning: File not found in docs directory: {filename_to_find}") not_found_count += 1 except json.JSONDecodeError: print(f"Error: Could not decode JSON from {json_file.name}. Skipping.") except Exception as e: print(f"An unexpected error occurred while processing {json_file.name}: {e}") print("\n--- Operation Summary ---") print(f"Files copied successfully: {copied_count}") print(f"Files not found: {not_found_count}") print("-------------------------") if __name__ == "__main__": copy_files_from_groundtruth()