
File System Ingestion
Ingest documents from local and network file systems with monitoring and change detection.
File System Ingestion
Monitor file systems for new and updated documents.
import os
from pathlib import Path
def scan_directory(root_path):
for path in Path(root_path).rglob('*'):
if path.is_file() and is_supported_format(path):
yield {
'path': str(path),
'size': path.stat().st_size,
'modified': path.stat().st_mtime,
'extension': path.suffix
}
def is_supported_format(path):
supported = {'.pdf', '.txt', '.md', '.docx', '.pptx'}
return path.suffix.lower() in supported
File System Watcher
from watchdog.observers import Observer
from watchdog.events import FileSystemEventHandler
class IngestionHandler(FileSystemEventHandler):
def on_created(self, event):
ingest_file(event.src_path)
def on_modified(self, event):
update_file(event.src_path)
def on_deleted(self, event):
remove_from_index(event.src_path)
Next:Cloud storage ingestion.