71 lines
2.4 KiB
Python
71 lines
2.4 KiB
Python
import os
|
|
import json
|
|
import shutil
|
|
from pathlib import Path
|
|
|
|
def copy_files_from_groundtruth():
|
|
"""
|
|
Reads JSON files from the groundtruth directory, finds matching files
|
|
in the docs directory, and copies them to a new directory.
|
|
"""
|
|
base_dir = Path("/home/jackjack/test/ocr_macro/workspace/shared_sessions/b66123d5")
|
|
groundtruth_dir = base_dir / "groundtruth"
|
|
docs_dir = base_dir / "docs"
|
|
destination_dir = base_dir / "aa"
|
|
|
|
# Ensure destination directory exists
|
|
destination_dir.mkdir(exist_ok=True)
|
|
print(f"Destination directory created or already exists: {destination_dir}")
|
|
|
|
if not groundtruth_dir.is_dir():
|
|
print(f"Error: Groundtruth directory not found at {groundtruth_dir}")
|
|
return
|
|
|
|
if not docs_dir.is_dir():
|
|
print(f"Error: Docs directory not found at {docs_dir}")
|
|
return
|
|
|
|
json_files = list(groundtruth_dir.glob("*.json"))
|
|
if not json_files:
|
|
print(f"No JSON files found in {groundtruth_dir}")
|
|
return
|
|
|
|
print(f"Found {len(json_files)} JSON files to process...")
|
|
|
|
copied_count = 0
|
|
not_found_count = 0
|
|
|
|
for json_file in json_files:
|
|
try:
|
|
with open(json_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
filename_to_find = data.get("filename")
|
|
if not filename_to_find:
|
|
print(f"Warning: 'filename' key not found in {json_file.name}. Skipping.")
|
|
continue
|
|
|
|
source_file_path = docs_dir / filename_to_find
|
|
destination_file_path = destination_dir / filename_to_find
|
|
|
|
if source_file_path.exists():
|
|
print(f"Copying '{source_file_path}' to '{destination_file_path}'...")
|
|
shutil.copy(source_file_path, destination_file_path)
|
|
copied_count += 1
|
|
else:
|
|
print(f"Warning: File not found in docs directory: {filename_to_find}")
|
|
not_found_count += 1
|
|
|
|
except json.JSONDecodeError:
|
|
print(f"Error: Could not decode JSON from {json_file.name}. Skipping.")
|
|
except Exception as e:
|
|
print(f"An unexpected error occurred while processing {json_file.name}: {e}")
|
|
|
|
print("\n--- Operation Summary ---")
|
|
print(f"Files copied successfully: {copied_count}")
|
|
print(f"Files not found: {not_found_count}")
|
|
print("-------------------------")
|
|
|
|
if __name__ == "__main__":
|
|
copy_files_from_groundtruth()
|