convert_image

import boto3
import PyPDF2
from io import BytesIO
from pdf2image import convert_from_bytes

# Initialize the S3 client
s3_client = boto3.client('s3')
s3 = boto3.resource('s3')


def convert_pdf_to_images(bucket_name, object_key, start_page, end_page):
    # Get the object
    obj = s3.Object(bucket_name, object_key)

    # Download the PDF file into memory
    response = obj.get()
    file_data = response['Body'].read()

    # Read the PDF content using PyPDF2
    with BytesIO(file_data) as file:
        pdf_reader = PyPDF2.PdfFileReader(file)

        # Create a temporary PDF with the specified pages
        with BytesIO() as temp_pdf:
            pdf_writer = PyPDF2.PdfFileWriter()
            for page_num in range(start_page, end_page + 1):
                page = pdf_reader.getPage(page_num)
                pdf_writer.addPage(page)
            pdf_writer.write(temp_pdf)
            temp_pdf.seek(0)

            # Convert the temporary PDF file to images using pdf2image
            images = convert_from_bytes(temp_pdf.read())

    # Save the images to your local environment
    for i, image in enumerate(images):
        image.save(f'{object_key}_page_{start_page + i + 1}.png', 'PNG')


def merge_bounding_boxes(bboxes):
    """
    Merge multiple bounding boxes into one.
    
    Args:
    bboxes (list of tuples): A list of bounding boxes in the format (x, y, w, h).

    Returns:
    A single bounding box in the format (x, y, w, h) that encloses all given bounding boxes.
    """
    x_min = min([x for x, y, w, h in bboxes])
    y_min = min([y for x, y, w, h in bboxes])
    x_max = max([x + w for x, y, w, h in bboxes])
    y_max = max([y + h for x, y, w, h in bboxes])

    return (x_min, y_min, x_max - x_min, y_max - y_min)

# Example usage:
bboxes = [(30, 40, 10, 10), (31, 41, 15, 15), (50, 60, 20, 20), (52, 62, 25, 25), (80, 90, 30, 30)]
merged_bbox = merge_bounding_boxes(bboxes)
print(merged_bbox)


import pandas as pd

# Data as a list of dictionaries
data = [
    {'key': 'insured name', 'coordinates': [123.4, 443.4, 355.7, 354.6]},
    {'key': 'insured name', 'coordinates': [121.4, 487.4, 397.5, 343.6]},
    {'key': 'insured name value', 'coordinates': [142.4, 447.4, 396.5, 367.6]},
]

# Create DataFrame from the data
df = pd.DataFrame(data)

# Filter the DataFrame based on the key 'insured name'
filtered_rows = df[df['key'] == 'insured name']

# Extract the coordinates as nested lists
combined_coordinates = filtered_rows['coordinates'].tolist()

print(combined_coordinates)
df['coordinates'] = df['coordinates'].apply(lambda x: [tuple(coord) for coord in x])


from PIL import Image, ImageDraw

def merge_bounding_boxes(boxes):
    if not boxes:
        return None

    left = min(box[0] for box in boxes)
    top = min(box[1] for box in boxes)
    right = max(box[2] for box in boxes)
    bottom = max(box[3] for box in boxes)

    return (left, top, right, bottom)

image_path = 'path/to/your/image.jpg'
image = Image.open(image_path)

# List of bounding boxes to be merged
bounding_boxes = [
    (50, 50, 200, 200),
    (100, 100, 250, 250),
]

merged_bounding_box = merge_bounding_boxes(bounding_boxes)

draw = ImageDraw.Draw(image)
draw.rectangle(merged_bounding_box, outline='red', width=3)

image.save('path/to/save/your/output_image.jpg')
# image.show()


import boto3
import pandas as pd
from io import BytesIO
import PyPDF2

# Define the S3 bucket name
BUCKET_NAME = 'your_bucket_name'

# Initialize the S3 client using the instance's IAM role
s3 = boto3.client('s3')

# Create a sample DataFrame with file paths
data = {'file_paths': ['/path/to/first_file.pdf', '/path/to/second_file.pdf']}
df = pd.DataFrame(data)

# Function to read a PDF file from S3 bucket
def read_pdf_from_s3(file_path):
    obj = s3.get_object(Bucket=BUCKET_NAME, Key=file_path)
    pdf_file = BytesIO(obj['Body'].read())
    pdf_reader = PyPDF2.PdfFileReader(pdf_file)

    content = ""
    for page_num in range(pdf_reader.numPages):
        content += pdf_reader.getPage(page_num).extract_text()

    return content

# Loop through the DataFrame and read the PDF files
for index, row in df.iterrows():
    file_path = row['file_paths']
    pdf_content = read_pdf_from_s3(file_path)
    print(f"Content of {file_path}:")
    print(pdf_content)