File System Ingestion

File System Ingestion

Ingest documents from local and network file systems with monitoring and change detection.

File System Ingestion

Monitor file systems for new and updated documents.

import os
from pathlib import Path

def scan_directory(root_path):
    for path in Path(root_path).rglob('*'):
        if path.is_file() and is_supported_format(path):
            yield {
                'path': str(path),
                'size': path.stat().st_size,
                'modified': path.stat().st_mtime,
                'extension': path.suffix
            }

def is_supported_format(path):
    supported = {'.pdf', '.txt', '.md', '.docx', '.pptx'}
    return path.suffix.lower() in supported

File System Watcher

from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler

class IngestionHandler(FileSystemEventHandler):
    def on_created(self, event):
        ingest_file(event.src_path)
    
    def on_modified(self, event):
        update_file(event.src_path)
    
    def on_deleted(self, event):
        remove_from_index(event.src_path)

Next:Cloud storage ingestion.

Subscribe to our newsletter

Get the latest posts delivered right to your inbox.

Subscribe on LinkedIn