mirror of
https://github.com/ArchiveBox/ArchiveBox.git
synced 2026-06-22 03:20:45 -04:00
6ce2555dfd
Renames (no functional change, just consistency with the rest of the codebase): - cli/cli_utils.py → cli/cli_util.py - core/host_utils.py → core/host_util.py - core/tag_utils.py → core/tag_util.py - crawls/schedule_utils.py → crawls/schedule_util.py - machine/env_utils.py → machine/env_util.py Functional fixes: - archivebox add --index-only now materializes Snapshot rows synchronously via crawl.create_snapshots_from_urls() instead of just queueing the Crawl and leaving the index empty. The previous behavior broke every test that expected --index-only to populate the index, since the runner is never started in index-only mode. - config/collection.py: add _coerce_from_str_dict as the inverse of _coerce_to_str_dict so JSON-encoded INI values are decoded back to native dict/list types when mirrored into Machine.config (a JSONField). Without this, downstream consumers like MachineEvent / abx-dl get raw JSON strings where they expect dicts. Plus matching admin / middleware / model touch-ups, the registration password_change_form template, and assorted small cleanups the user worked through while validating the deploy path. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
45 lines
1.3 KiB
Python
45 lines
1.3 KiB
Python
"""
|
|
Shared CLI utilities for ArchiveBox commands.
|
|
|
|
This module contains common utilities used across multiple CLI commands,
|
|
extracted to avoid code duplication.
|
|
"""
|
|
|
|
__package__ = "archivebox.cli"
|
|
|
|
|
|
def apply_filters(queryset, filter_kwargs: dict, limit: int | None = None):
|
|
"""
|
|
Apply Django-style filters from CLI kwargs to a QuerySet.
|
|
|
|
Supports: --status=queued, --url__icontains=example, --id__in=uuid1,uuid2
|
|
|
|
Args:
|
|
queryset: Django QuerySet to filter
|
|
filter_kwargs: Dict of filter key-value pairs from CLI
|
|
limit: Optional limit on results
|
|
|
|
Returns:
|
|
Filtered QuerySet
|
|
|
|
Example:
|
|
queryset = Snapshot.objects.all()
|
|
filter_kwargs = {'status': 'queued', 'url__icontains': 'example.com'}
|
|
filtered = apply_filters(queryset, filter_kwargs, limit=10)
|
|
"""
|
|
filters = {}
|
|
for key, value in filter_kwargs.items():
|
|
if value is None or key in ("limit", "offset"):
|
|
continue
|
|
# Handle CSV lists for __in filters
|
|
if key.endswith("__in") and isinstance(value, str):
|
|
value = [v.strip() for v in value.split(",")]
|
|
filters[key] = value
|
|
|
|
if filters:
|
|
queryset = queryset.filter(**filters)
|
|
if limit:
|
|
queryset = queryset[:limit]
|
|
|
|
return queryset
|