Classifier rewrite (#213)

The classifier has been re-implemented and now uses a DSL allowing for full customisation. Several bugs have also been fixed. - Closes https://github.com/bitmagnet-io/bitmagnet/issues/182 - Closes https://github.com/bitmagnet-io/bitmagnet/issues/70 - Closes https://github.com/bitmagnet-io/bitmagnet/issues/68 - Hopefully fixes https://github.com/bitmagnet-io/bitmagnet/issues/126
2026-05-06 04:16:44 -04:00 · 2024-04-21 16:24:10 +01:00
parent 7902b93bd7
commit c16f76130c
163 changed files with 7879 additions and 2310 deletions
@@ -113,6 +113,12 @@ jobs:
        uses: actions/setup-node@v3
        with:
          node-version: 20.x
+      - name: Setup protoc
+        uses: arduino/setup-protoc@v3
+        with:
+          version: "23.4"
+      - name: Install protoc-gen-go
+        run: go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.33.0
      - name: Install web app, apply database migrations, generate code and build web app
        run: |
          (cd webui && npm ci); \
@@ -4,9 +4,15 @@ mockname: "{{.InterfaceName}}"
 outpkg: "{{.PackageName}}_mocks"
 filename: "{{.InterfaceName}}.go"
 packages:
+  github.com/bitmagnet-io/bitmagnet/internal/classifier:
+    interfaces:
+      LocalSearch:
  github.com/bitmagnet-io/bitmagnet/internal/protocol/dht/ktable:
    interfaces:
      Table:
  github.com/bitmagnet-io/bitmagnet/internal/protocol/dht/responder:
    interfaces:
      Limiter:
+  github.com/bitmagnet-io/bitmagnet/internal/tmdb:
+    interfaces:
+      Client:
@@ -1,2 +1,3 @@
+bitmagnet.io/schemas/**/*.*
 webui/dist/**/*.*
 webui/src/app/graphql/generated/**/*.*
@@ -8,7 +8,9 @@ tasks:
      - go run ./internal/gql/enums/gen/genenums.go
      - go run ./internal/torznab/gencategories/gencategories.go
      - go run github.com/99designs/gqlgen generate --config ./internal/gql/gqlgen.yml
+      - protoc --go_out=. ./internal/protobuf/bitmagnet.proto
      - go run github.com/vektra/mockery/v2
+      - go run . classifier schema --format json > ./bitmagnet.io/schemas/classifier-0.1.json

  lint:
    cmds:
@@ -82,3 +84,14 @@ tasks:
      - goose -s create {{.NAME}} sql
    vars:
      NAME: migration
+
+  install-protoc:
+    cmds:
+      - |
+        curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v{{.VERSION}}/protoc-{{.VERSION}}-{{.PLATFORM}}.zip
+        sudo unzip -o protoc-{{.VERSION}}-{{.PLATFORM}}.zip -d /usr/local bin/protoc
+        sudo unzip -o protoc-{{.VERSION}}-{{.PLATFORM}}.zip -d /usr/local 'include/*'
+        rm -f protoc-{{.VERSION}}-{{.PLATFORM}}.zip
+    vars:
+      VERSION: 23.4
+      PLATFORM: osx-x86_64
@@ -3,6 +3,7 @@ source 'https://rubygems.org'
 gem "just-the-docs", "~> 0.6"
 gem "jekyll", "~> 4.3"
 gem "jekyll-redirect-from", "~> 0.16"
+gem "jekyll-target-blank", "~> 2.0"
 gem "kramdown", "~> 2.3"
 gem "kramdown-parser-gfm", "~> 1.1"
 gem "webrick", "~> 1.8"
@@ -39,6 +39,9 @@ GEM
      sass-embedded (~> 1.54)
    jekyll-seo-tag (2.8.0)
      jekyll (>= 3.8, < 5.0)
+    jekyll-target-blank (2.0.2)
+      jekyll (>= 3.0, < 5.0)
+      nokogiri (~> 1.10)
    jekyll-watch (2.2.1)
      listen (~> 3.0)
    just-the-docs (0.6.2)
@@ -55,9 +58,12 @@ GEM
      rb-fsevent (~> 0.10, >= 0.10.3)
      rb-inotify (~> 0.9, >= 0.9.10)
    mercenary (0.4.0)
+    nokogiri (1.16.4-arm64-darwin)
+      racc (~> 1.4)
    pathutil (0.16.2)
      forwardable-extended (~> 2.6)
    public_suffix (5.0.3)
+    racc (1.7.3)
    rake (13.0.6)
    rb-fsevent (0.11.2)
    rb-inotify (0.10.1)
@@ -78,6 +84,7 @@ PLATFORMS
 DEPENDENCIES
  jekyll (~> 4.3)
  jekyll-redirect-from (~> 0.16)
+  jekyll-target-blank (~> 2.0)
  just-the-docs (~> 0.6)
  kramdown (~> 2.3)
  kramdown-parser-gfm (~> 1.1)
@@ -29,3 +29,4 @@ nav_external_links:
 favicon_ico: "/assets/images/favicon.png"
 plugins:
  - jekyll-redirect-from
+  - jekyll-target-blank
@@ -0,0 +1,11 @@
+module Schemas
+  class Generator < Jekyll::Generator
+    def generate(site)
+      Dir.glob(File.join(site.source, 'schemas', '*.json')).each do |json_file|
+        File.open(File.join(site.dest, 'schemas', File.basename(json_file)), 'w') do |file|
+          file.write(File.read(json_file))
+        end
+      end
+    end
+  end
+end
@@ -8,7 +8,7 @@ nav_order: 7

 Community members have developed the following resources, tools and packages; these are not maintained under the **bitmagnet** project:

- [@davispuh](https://github.com/davispuh){:target="\_blank"} has published an Arch package, `bitmagnet-git`, [in the AUR repository](https://aur.archlinux.org/packages/bitmagnet-git){:target="\_blank"}.
- [@DyonR](https://github.com/DyonR){:target="\_blank"} has developed [magnetico2bitmagnet](https://github.com/DyonR/magnetico2bitmagnet){:target="\_blank"}, a collection of scripts for importing into **bitmagnet** from Magnetico and other sources.
- [@DyonR](https://github.com/DyonR){:target="\_blank"} has written [a **bitmagnet** on Unraid guide](https://github.com/DyonR/bitmagnet-unraid){:target="\_blank"}.
+- [@davispuh](https://github.com/davispuh) has published an Arch package, `bitmagnet-git`, [in the AUR repository](https://aur.archlinux.org/packages/bitmagnet-git).
+- [@DyonR](https://github.com/DyonR) has developed [magnetico2bitmagnet](https://github.com/DyonR/magnetico2bitmagnet), a collection of scripts for importing into **bitmagnet** from Magnetico and other sources.
+- [@DyonR](https://github.com/DyonR) has written [a **bitmagnet** on Unraid guide](https://github.com/DyonR/bitmagnet-unraid).
 - Your link could be here!
@@ -12,7 +12,7 @@ No. **bitmagnet** does not download, store or distribute any content _at all_. I

 ## Should I use a VPN with **bitmagnet**?

-It is recommended to use a VPN: **bitmagnet** may download **metadata about** illegal and copyrighted content. It is possible that rudimentary law enforcement and anti-piracy tracking tools would incorrectly flag this activity, although we've never heard about anyone getting into trouble for using this or similar metadata crawlers. Setting up a VPN is simple and cheap, and it's better to be safe than sorry. We are not affiliated with any VPN providers, but if you're unsure which provider to choose, we can recommend [Mullvad](https://mullvad.net/){:target="\_blank"}.
+It is recommended to use a VPN: **bitmagnet** may download **metadata about** illegal and copyrighted content. It is possible that rudimentary law enforcement and anti-piracy tracking tools would incorrectly flag this activity, although we've never heard about anyone getting into trouble for using this or similar metadata crawlers. Setting up a VPN is simple and cheap, and it's better to be safe than sorry. We are not affiliated with any VPN providers, but if you're unsure which provider to choose, we can recommend [Mullvad](https://mullvad.net/).

 ## Is **bitmagnet** intended to be used as a public service?

@@ -47,7 +47,7 @@ Visit the metrics endpoint at `/metrics` and check the metric `bitmagnet_dht_cra

 ## How are the seeders/leechers numbers determined for torrents crawled from the DHT?

-The DHT crawler uses a [BEP33 scrape request](https://www.bittorrent.org/beps/bep_0033.html){:target="\_blank"} to provide a very rough estimate of the current seeders/leechers.
+The DHT crawler uses a [BEP33 scrape request](https://www.bittorrent.org/beps/bep_0033.html) to provide a very rough estimate of the current seeders/leechers.

 ## How do I know if a torrent crawled by **bitmagnet** is being actively seeded, and that I'll be able to download it?

@@ -59,7 +59,7 @@ No. The DHT crawler works by sampling random info hashes from the network, and w

 ## I'm seeing a lot of torrents in the "Unknown" category, that are clearly of a particular content type - what's wrong?

-**bitmagnet** is in early development, and improving the classifier will be an ongoing effort. When new versions are released, you can follow the [reclassify turorial](/tutorials/reprocess-reclassify.html) to reclassify torrents.
+**bitmagnet** is in early development, and improving the classifier will be an ongoing effort. When new versions are released, you can follow the [reclassify turorial](/tutorials/reprocess-reclassify.html) to reclassify torrents. If you'd like to [improve or customize the classifier](/tutorials/classifier.html), this is also possible.

 ## Can I run multiple **bitmagnet** instances pointing to the same database?

@@ -14,7 +14,7 @@ nav_order: -1

 > Important
 >
-> This software is currently in alpha. It is ready to preview some interesting and unique features, but there will likely be bugs, as well as API and database schema changes before the (currently theoretical) 1.0 release. If you'd like to support this project and help it gain momentum, **[please give it a star on GitHub](https://github.com/bitmagnet-io/bitmagnet){:target="\_blank"}**.
+> This software is currently in alpha. It is ready to preview some interesting and unique features, but there will likely be bugs, as well as API and database schema changes before the (currently theoretical) 1.0 release. If you'd like to support this project and help it gain momentum, **[please give it a star on GitHub](https://github.com/bitmagnet-io/bitmagnet)**.
 >
 > [If you're interested in getting involved and you're a backend GoLang or frontend TypeScript/Angular developer, or you're knowledgeable about BitTorrent protocols then **I'd like to hear from you**](/internals-development.html) - let's get this thing over the line!

@@ -51,7 +51,7 @@ This means that **bitmagnet** is not reliant on any external trackers or torrent
 - [ ] A more complete web UI
 - [ ] Saved searches for content of particular interest, enabling custom feeds in addition to the following feature
 - [ ] Smart deletion: there's a lot of crap out there; crawling DHT can quickly use lots of database disk space, and search becomes slower with millions of indexed torrents of which 90% are of no interest. A smart deletion feature would use saved searches to identify content that you're _not_ interested in, including low quality content (such as low resolution movies). It would automatically delete associated metadata and add the info hash to a bloom filter, preventing the torrent from being re-indexed in future.
- [ ] Bi-directional integration with the [Prowlarr indexer proxy](https://prowlarr.com/){:target="\_blank"}: Currently **bitmagnet** can be added as an indexer in Prowlarr; bi-directional integration would allow **bitmagnet** to crawl content from any indexer configured in Prowlarr, unlocking many new sources of content
+- [ ] Bi-directional integration with the [Prowlarr indexer proxy](https://prowlarr.com/): Currently **bitmagnet** can be added as an indexer in Prowlarr; bi-directional integration would allow **bitmagnet** to crawl content from any indexer configured in Prowlarr, unlocking many new sources of content
 - [ ] More documentation and more tests!

 ### Pipe dream features
@@ -61,5 +61,5 @@ This is where things start to get a bit nebulous. For now all focus is on delive
 - [ ] In-place seeding: identify files on your computer that are part of an indexed torrent, and allow them to be seeded in place after having moved, renamed or deleted parts of the torrent
 - [ ] Integration with popular BitTorrent clients
 - [ ] Federation of some sort: allow friends to connect instances and pool the indexing effort, perhaps involving crowd sourcing manual content curation to supplement the automated classifiers
- [ ] Something that looks like a decentralized private tracker; by this I probably mean something that's based partly on personal trust and manually weeding out any bad actors; I'd be wary of creating something that looks a bit like [Tribler](https://github.com/Tribler/tribler){:target="\_blank"}, which while an interesting project seems to have demonstrated that implementing trust, reputation and privacy at the protocol level carries too much overhead to be a compelling alternative to plain old BitTorrent, for all its imperfections
- [ ] Support for the [BitTorrent v2 protocol](https://blog.libtorrent.org/2020/09/bittorrent-v2/){:target="\_blank"}: It remains to be seen if wider adoption will ever make this a valuable feature
+- [ ] Something that looks like a decentralized private tracker; by this I probably mean something that's based partly on personal trust and manually weeding out any bad actors; I'd be wary of creating something that looks a bit like [Tribler](https://github.com/Tribler/tribler), which while an interesting project seems to have demonstrated that implementing trust, reputation and privacy at the protocol level carries too much overhead to be a compelling alternative to plain old BitTorrent, for all its imperfections
+- [ ] Support for the [BitTorrent v2 protocol](https://blog.libtorrent.org/2020/09/bittorrent-v2/): It remains to be seen if wider adoption will ever make this a valuable feature
@@ -8,4 +8,4 @@ has_children: true
 # Internals & Development

 {: .highlight }
-Are you an experienced developer with knowledge of GoLang, Postgres, TypeScript/Angular and/or BitTorrent protocols? I'm currently a lone developer with a full time job and many other commitments, and have been working on this in spare moments for the past few months. This project is too big for one person! If you're interested in contributing please [review the open issues](https://github.com/bitmagnet-io/bitmagnet/issues){:target="\_blank"} and feel free to open a PR!
+Are you an experienced developer with knowledge of GoLang, Postgres, TypeScript/Angular and/or BitTorrent protocols? I'm currently a lone developer with a full time job and many other commitments, and have been working on this in spare moments for the past few months. This project is too big for one person! If you're interested in contributing please [review the open issues](https://github.com/bitmagnet-io/bitmagnet/issues) and feel free to open a PR!
@@ -7,15 +7,15 @@ nav_order: 2

 # Architecture & Lifecycle of the DHT Crawler

-The DHT and BitTorrent protocols are (rather impenetrably) documented at [bittorrent.org](http://bittorrent.org/beps/bep_0000.html){:target="\_blank"}. Relevant resources include:
+The DHT and BitTorrent protocols are (rather impenetrably) documented at [bittorrent.org](http://bittorrent.org/beps/bep_0000.html). Relevant resources include:

- [BEP 5: DHT Protocol](http://bittorrent.org/beps/bep_0005.html){:target="\_blank"}
- [BEP 51: Infohash Indexing](https://www.bittorrent.org/beps/bep_0051.html){:target="\_blank"}
- [BEP 33: DHT Scrapes](https://www.bittorrent.org/beps/bep_0033.html){:target="\_blank"}
- [BEP 10: Extension Protocol](https://www.bittorrent.org/beps/bep_0010.html){:target="\_blank"}
- [The Kademlia paper](https://pdos.csail.mit.edu/~petar/papers/maymounkov-kademlia-lncs.pdf){:target="\_blank"}
+- [BEP 5: DHT Protocol](http://bittorrent.org/beps/bep_0005.html)
+- [BEP 51: Infohash Indexing](https://www.bittorrent.org/beps/bep_0051.html)
+- [BEP 33: DHT Scrapes](https://www.bittorrent.org/beps/bep_0033.html)
+- [BEP 10: Extension Protocol](https://www.bittorrent.org/beps/bep_0010.html)
+- [The Kademlia paper](https://pdos.csail.mit.edu/~petar/papers/maymounkov-kademlia-lncs.pdf)

-The rest of what I've figured out about how to implement a DHT crawler was cobbled together from [the now archived **magnetico** project](https://github.com/boramalper/magnetico){:target="\_blank"} and [anacrolix's BitTorrent libraries](https://github.com/anacrolix){:target="\_blank"}.
+The rest of what I've figured out about how to implement a DHT crawler was cobbled together from [the now archived **magnetico** project](https://github.com/boramalper/magnetico) and [anacrolix's BitTorrent libraries](https://github.com/anacrolix).

 The following diagram illustrates roughly how the crawler has been implemented within **bitmagnet**. It's debatable if this will help stop anyone's brain from melting, including my own.

@@ -9,22 +9,22 @@ nav_order: 3

 ## Grafana stack & Prometheus integration

-**bitmagnet** can integrate with the [Grafana stack](https://grafana.com/){:target="\_blank"} and [Prometheus](https://prometheus.io/){:target="\_blank"} for monitoring and building observability dashboards for the DHT crawler and other components. See the "Optional observability services" section of the [example docker compose configuration](https://github.com/bitmagnet-io/bitmagnet/blob/main/docker-compose.yml){:target="\_blank"} and [example Grafana / Prometheus configuration files and a provisioned Grafana dashboard](https://github.com/bitmagnet-io/bitmagnet/tree/main/observability){:target="\_blank"}.
+**bitmagnet** can integrate with the [Grafana stack](https://grafana.com/) and [Prometheus](https://prometheus.io/) for monitoring and building observability dashboards for the DHT crawler and other components. See the "Optional observability services" section of the [example docker compose configuration](https://github.com/bitmagnet-io/bitmagnet/blob/main/docker-compose.yml) and [example Grafana / Prometheus configuration files and a provisioned Grafana dashboard](https://github.com/bitmagnet-io/bitmagnet/tree/main/observability).

 ![Grafana dashboard](/assets/images/grafana-1.png)

 The example integration includes:

- [Grafana](https://grafana.com/oss/grafana/){:target="\_blank"} - A dashboarding and visualization tool
- [Grafana Agent](https://grafana.com/oss/agent/){:target="\_blank"} - Collects metrics and logs, and forwards them to storage backends
- [Prometheus](https://prometheus.io/){:target="\_blank"} - A time series database for metrics
- [Loki](https://grafana.com/oss/loki/){:target="\_blank"} - A log aggregation system
- [Pyroscope](https://pyroscope.io/){:target="\_blank"} - A continuous profiling tool
- [Postgres exporter](https://github.com/prometheus-community/postgres_exporter){:target="\_blank"} - Exposes Postgres metrics to Prometheus
+- [Grafana](https://grafana.com/oss/grafana/) - A dashboarding and visualization tool
+- [Grafana Agent](https://grafana.com/oss/agent/) - Collects metrics and logs, and forwards them to storage backends
+- [Prometheus](https://prometheus.io/) - A time series database for metrics
+- [Loki](https://grafana.com/oss/loki/) - A log aggregation system
+- [Pyroscope](https://pyroscope.io/) - A continuous profiling tool
+- [Postgres exporter](https://github.com/prometheus-community/postgres_exporter) - Exposes Postgres metrics to Prometheus

 # Profiling with pprof

-**bitmagnet** exposes [Go pprof](https://golang.org/pkg/net/http/pprof/){:target="\_blank"} profiling endpoints at `/debug/pprof/*`, for example:
+**bitmagnet** exposes [Go pprof](https://golang.org/pkg/net/http/pprof/) profiling endpoints at `/debug/pprof/*`, for example:

 ```sh
 go tool pprof http://localhost:3333/debug/pprof/heap
@@ -15,21 +15,21 @@ Postgres is the primary data store, and powers the search engine and message que

 Some key libraries used include:

- [anacrolix/torrent](https://github.com/anacrolix/torrent){:target="\_blank"} not heavily used right now, but contains many useful BitTorrent utilities and could drive future features such as in-place seeding
- [fx](https://uber-go.github.io/fx/){:target="\_blank"} for dependency injection and management of the application lifecycle
- [gin](https://gin-gonic.com/){:target="\_blank"} for the HTTP server
- [goose](https://pressly.github.io/goose/){:target="\_blank"} for database migrations
- [gorm](https://gorm.io/){:target="\_blank"} for database access
- [gqlgen](https://gqlgen.com/){:target="\_blank"} for the GraphQL server implementation
- [rex](https://github.com/hedhyw/rex){:target="\_blank"} a regular expression library that makes some of the monstrous classification regexes more manageable
- [urfave/cli](https://cli.urfave.org/){:target="\_blank"} for the command line interface
- [zap](https://github.com/uber-go/zap){:target="\_blank"} for logging
+- [anacrolix/torrent](https://github.com/anacrolix/torrent) not heavily used right now, but contains many useful BitTorrent utilities and could drive future features such as in-place seeding
+- [fx](https://uber-go.github.io/fx/) for dependency injection and management of the application lifecycle
+- [gin](https://gin-gonic.com/) for the HTTP server
+- [goose](https://pressly.github.io/goose/) for database migrations
+- [gorm](https://gorm.io/) for database access
+- [gqlgen](https://gqlgen.com/) for the GraphQL server implementation
+- [rex](https://github.com/hedhyw/rex) a regular expression library that makes some of the monstrous classification regexes more manageable
+- [urfave/cli](https://cli.urfave.org/) for the command line interface
+- [zap](https://github.com/uber-go/zap) for logging

 ## TypeScript/Angular Web UI

-Using [Angular Material components](https://material.angular.io/){:target="\_blank"}. The web UI is embedded in the GoLang binary and served by the Gin web framework, and hence the build artifacts are committed into the repository.
+Using [Angular Material components](https://material.angular.io/). The web UI is embedded in the GoLang binary and served by the Gin web framework, and hence the build artifacts are committed into the repository.

 ## Other tooling

- The repository includes a [Taskfile](https://taskfile.dev/){:target="\_blank"} containing several useful development scripts
+- The repository includes a [Taskfile](https://taskfile.dev/) containing several useful development scripts
 - GitHub actions are used for CI, building the Docker image and for building this website
@@ -0,0 +1,331 @@
+{
+  "$id": "https://bitmagnet.io/schemas/classifier-0.1.json",
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "additionalProperties": false,
+  "definitions": {
+    "action": {
+      "oneOf": [
+        {
+          "$ref": "#/definitions/action_single"
+        },
+        {
+          "$ref": "#/definitions/action_multi"
+        }
+      ]
+    },
+    "action__add_tag": {
+      "additionalProperties": false,
+      "description": "Add one or more tags to the current torrent",
+      "properties": {
+        "add_tag": {
+          "items": {
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "add_tag"
+      ],
+      "type": "object"
+    },
+    "action__attach_local_content_by_id": {
+      "const": "attach_local_content_by_id",
+      "description": "Use the torrent hint to attach locally stored content by ID"
+    },
+    "action__attach_local_content_by_search": {
+      "const": "attach_local_content_by_search",
+      "description": "Attempt to attach local content with a search on the torrent name"
+    },
+    "action__attach_tmdb_content_by_id": {
+      "const": "attach_tmdb_content_by_id",
+      "description": "Use the torrent hint to attach content from the TMDB API by ID"
+    },
+    "action__attach_tmdb_content_by_search": {
+      "const": "attach_tmdb_content_by_search",
+      "description": "Attempt to attach content from the TMDB API with a search on the torrent name"
+    },
+    "action__delete": {
+      "const": "delete",
+      "description": "Delete the current torrent"
+    },
+    "action__find_match": {
+      "additionalProperties": false,
+      "description": "Iterate through a series of actions to find the first that does not return an unmatched error",
+      "properties": {
+        "find_match": {
+          "items": {
+            "$ref": "#/definitions/action_single"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "find_match"
+      ],
+      "type": "object"
+    },
+    "action__if_else": {
+      "additionalProperties": false,
+      "description": "Execute an action based on a condition",
+      "properties": {
+        "if_else": {
+          "additionalProperties": false,
+          "properties": {
+            "condition": {
+              "$ref": "#/definitions/condition"
+            },
+            "else_action": {
+              "$ref": "#/definitions/action"
+            },
+            "if_action": {
+              "$ref": "#/definitions/action"
+            }
+          },
+          "required": [
+            "condition"
+          ],
+          "type": "object"
+        }
+      },
+      "required": [
+        "if_else"
+      ],
+      "type": "object"
+    },
+    "action__parse_date": {
+      "const": "parse_date",
+      "description": "Try to parse a date from the name of the current torrent"
+    },
+    "action__parse_video_content": {
+      "const": "parse_video_content",
+      "description": "Parse video-related attributes from the name of the current torrent"
+    },
+    "action__run_workflow": {
+      "additionalProperties": false,
+      "description": "Run a different workflow within the current workflow",
+      "properties": {
+        "run_workflow": {
+          "items": {
+            "minLength": 1,
+            "type": "string"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "run_workflow"
+      ],
+      "type": "object"
+    },
+    "action__set_content_type": {
+      "additionalProperties": false,
+      "description": "Set the content type of the current torrent",
+      "properties": {
+        "set_content_type": {
+          "enum": [
+            "movie",
+            "tv_show",
+            "music",
+            "ebook",
+            "comic",
+            "audiobook",
+            "game",
+            "software",
+            "xxx",
+            "unknown"
+          ],
+          "type": "string"
+        }
+      },
+      "required": [
+        "set_content_type"
+      ],
+      "type": "object"
+    },
+    "action__unmatched": {
+      "const": "unmatched",
+      "description": "Return a unmatched error for the current torrent"
+    },
+    "action_multi": {
+      "items": {
+        "$ref": "#/definitions/action_single"
+      },
+      "type": "array"
+    },
+    "action_single": {
+      "oneOf": [
+        {
+          "$ref": "#/definitions/action__add_tag"
+        },
+        {
+          "$ref": "#/definitions/action__attach_local_content_by_id"
+        },
+        {
+          "$ref": "#/definitions/action__attach_local_content_by_search"
+        },
+        {
+          "$ref": "#/definitions/action__attach_tmdb_content_by_id"
+        },
+        {
+          "$ref": "#/definitions/action__attach_tmdb_content_by_search"
+        },
+        {
+          "$ref": "#/definitions/action__delete"
+        },
+        {
+          "$ref": "#/definitions/action__find_match"
+        },
+        {
+          "$ref": "#/definitions/action__if_else"
+        },
+        {
+          "$ref": "#/definitions/action__unmatched"
+        },
+        {
+          "$ref": "#/definitions/action__parse_date"
+        },
+        {
+          "$ref": "#/definitions/action__parse_video_content"
+        },
+        {
+          "$ref": "#/definitions/action__run_workflow"
+        },
+        {
+          "$ref": "#/definitions/action__set_content_type"
+        }
+      ]
+    },
+    "condition": {
+      "oneOf": [
+        {
+          "$ref": "#/definitions/condition__and"
+        },
+        {
+          "$ref": "#/definitions/condition__not"
+        },
+        {
+          "$ref": "#/definitions/condition__or"
+        },
+        {
+          "$ref": "#/definitions/condition__expression"
+        }
+      ]
+    },
+    "condition__and": {
+      "additionalProperties": false,
+      "properties": {
+        "and": {
+          "description": "A condition that is satisfied if all conditions in a list are satisfied",
+          "items": {
+            "$ref": "#/definitions/condition"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "and"
+      ],
+      "type": "object"
+    },
+    "condition__expression": {
+      "oneOf": [
+        {
+          "additionalProperties": false,
+          "properties": {
+            "expression": {
+              "description": "A CEL expression describing a condition",
+              "minLength": 1,
+              "type": "string"
+            }
+          },
+          "required": [
+            "expression"
+          ],
+          "type": "object"
+        },
+        {
+          "description": "A CEL expression describing a condition",
+          "minLength": 1,
+          "type": "string"
+        }
+      ]
+    },
+    "condition__not": {
+      "additionalProperties": false,
+      "description": "A condition that negates the provided condition",
+      "properties": {
+        "not": {
+          "$ref": "#/definitions/condition"
+        }
+      },
+      "required": [
+        "not"
+      ],
+      "type": "object"
+    },
+    "condition__or": {
+      "additionalProperties": false,
+      "properties": {
+        "or": {
+          "description": "A condition that is satisfied if any of the conditions in a list are satisfied",
+          "items": {
+            "$ref": "#/definitions/condition"
+          },
+          "type": "array"
+        }
+      },
+      "required": [
+        "or"
+      ],
+      "type": "object"
+    }
+  },
+  "properties": {
+    "$schema": {
+      "const": "https://bitmagnet.io/schemas/classifier-0.1.json"
+    },
+    "extensions": {
+      "additionalProperties": {
+        "items": {
+          "type": "string"
+        },
+        "type": "array"
+      },
+      "type": "object"
+    },
+    "flag_definitions": {
+      "additionalProperties": {
+        "enum": [
+          "bool",
+          "string",
+          "int",
+          "string_list",
+          "content_type_list"
+        ],
+        "type": "string"
+      },
+      "type": "object"
+    },
+    "flags": {
+      "additionalProperties": true,
+      "type": "object"
+    },
+    "keywords": {
+      "additionalProperties": {
+        "items": {
+          "type": "string"
+        },
+        "type": "array"
+      },
+      "type": "object"
+    },
+    "workflows": {
+      "additionalProperties": {
+        "$ref": "#/definitions/action"
+      },
+      "type": "object"
+    }
+  },
+  "type": "object"
+}
@@ -11,6 +11,7 @@ nav_order: 2

 - `postgres.host`, `postgres.name` `postgres.user` `postgres.password` (default: `localhost`, `bitmagnet`, `postgres`, _empty_): Set these values to configure connection to your Postgres database.
 - `tmdb.api_key`: This is quite an important one, please [see below](#obtaining-a-tmdb-api-key) for more details.
+- `tmdb.enabled` (default: `true`): Specify `false` to disable the TMDB API integration.
 - `dht_crawler.save_files_threshold` (default: `100`): Some torrents contain many thousands of files, which impacts performance and uses a lot of database disk space. This parameter sets a maximum limit for the number of files saved by the crawler with each torrent.
 - `dht_crawler.save_pieces` (default: `false`): If true, the DHT crawler will save the pieces bytes from the torrent metadata. The pieces take up quite a lot of space, and aren't currently very useful, but they may be used by future features.
 - `log.level` (default: `info`): If you're developing or just curious then you may want to set this to `debug`; note that `debug` output will be very verbose.
@@ -78,15 +79,18 @@ Environment variables can be used to configure simple scalar types (strings, num

 ## VPN configuration

-It's recommended that you run **bitmagnet** behind a VPN. If you're using Docker then [gluetun](https://github.com/qdm12/gluetun-wiki){:target="\_blank"} is a good solution for this, although the networking settings can be tricky. The [example docker-compose file](https://github.com/bitmagnet-io/bitmagnet/blob/main/docker-compose.yml){:target="\_blank"} demonstrates this.
+It's recommended that you run **bitmagnet** behind a VPN. If you're using Docker then [gluetun](https://github.com/qdm12/gluetun-wiki) is a good solution for this, although the networking settings can be tricky. The [example docker-compose file](https://github.com/bitmagnet-io/bitmagnet/blob/main/docker-compose.yml) demonstrates this.

 ## Obtaining a TMDB API Key

 {: .highlight }
-**bitmagnet** uses [the TMDB API](https://developer.themoviedb.org/docs){:target="\_blank"} to fetch metadata for movies and TV shows. By default you'll be sharing an API key with other users. If you're using this app and its content classifier heavily then you'll need to get a personal TMDB API key. Until you do this you'll see a warning message in the logs on startup, and you'll be limited to 1 TMDB API request per second. This is just about enough for running the DHT crawler, but if you're importing and classifying a lot of content this will be a major bottleneck. If many people are using this app with the default API key then that could add up to many requests per second, so please get your own API key if you are using this app more than casually!
+**bitmagnet** uses [the TMDB API](https://developer.themoviedb.org/docs) to fetch metadata for movies and TV shows. By default you'll be sharing an API key with other users. If you're using this app and its content classifier heavily then you'll need to get a personal TMDB API key. Until you do this you'll see a warning message in the logs on startup, and you'll be limited to 1 TMDB API request per second. This is just about enough for running the DHT crawler, but if you're importing and classifying a lot of content this will be a major bottleneck. If many people are using this app with the default API key then that could add up to many requests per second, so please get your own API key if you are using this app more than casually!

 Obtaining an API key is free and relatively easy, but you'll have to register for a TMDB account, provide them with some personal information such as contact details, a website URL (such as your GitHub account or social media profile) and a short description of your use case (**tip:** this app provides _"A content classifier that identifies movies and TV shows based on filenames"_). Once you've filled in the request form, approval should be instant.

-[Synology have provided a full tutorial on obtaining a TMDB API key](https://kb.synology.com/en-au/DSM/tutorial/How_to_apply_for_a_personal_API_key_to_get_video_info){:target="\_blank"}.
+[Synology have provided a full tutorial on obtaining a TMDB API key](https://kb.synology.com/en-au/DSM/tutorial/How_to_apply_for_a_personal_API_key_to_get_video_info).

-Once you've obtained your API key you'll need to configure the `tmdb.api_key` value. Your rate limit will then default to 20 requests per second, which is well within [TMDB's stated fair usage limit](https://developer.themoviedb.org/docs/rate-limiting){:target="\_blank"}.
+Once you've obtained your API key you'll need to configure the `tmdb.api_key` value. Your rate limit will then default to 20 requests per second, which is well within [TMDB's stated fair usage limit](https://developer.themoviedb.org/docs/rate-limiting).
+
+{: .highlight }
+The TMDB API integration can be disabled altogether by setting `tmdb.enabled` to `false`.
@@ -9,7 +9,7 @@ nav_order: 1

 ## Docker

-The quickest way to get up-and-running with **bitmagnet** is with [Docker Compose](https://docs.docker.com/compose/){:target="\_blank"}. The following `docker-compose.yml` is a minimal example. For a more full-featured example including VPN routing and observability services see the [docker compose configuration in the GitHub repository](https://github.com/bitmagnet-io/bitmagnet/blob/main/docker-compose.yml){:target="\_blank"}.
+The quickest way to get up-and-running with **bitmagnet** is with [Docker Compose](https://docs.docker.com/compose/). The following `docker-compose.yml` is a minimal example. For a more full-featured example including VPN routing and observability services see the [docker compose configuration in the GitHub repository](https://github.com/bitmagnet-io/bitmagnet/blob/main/docker-compose.yml).

 ```yml
 services:
@@ -77,7 +77,7 @@ You can also install **bitmagnet** natively with `go install github.com/bitmagne

 The **bitmagnet** CLI is the entrypoint into the application. Take note of the command needed to run the CLI, depending on your installation method.

- If you are using the docker-compose example above, you can run the CLI (while the stack is started) with `docker exec -it bitmagnet /bitmagnet`.
+- If you are using the docker-compose example above, you can run the CLI (while the stack is started) with `docker exec -it bitmagnet bitmagnet`.
 - If you installed bitmagnet with `go install`, you can run the CLI with `bitmagnet`.

 When referring to CLI commands in the rest of the documentation, for simplicity we will use `bitmagnet`; please substitute this for the correct command. For example, to show the CLI help, run:
@@ -23,7 +23,7 @@ This tutorial will show you how to backup, restore and merge **bitmagnet** datab

 The following command will take a backup of the critical **bitmagnet** data and save it to a file named `export.sql`. (note this is not a full backup of the database which would include creation of tables, indexes etc.). By exporting with the `--data-only` flag the resulting file can be imported into a new or existing installation, after **bitmagnet** has run its migrations to set up the database and tables.

-Please refer to [the `pg_dump` documentation](https://www.postgresql.org/docs/current/app-pgdump.html){:target="\_blank"} and ensure to specify the correct values (e.g. `host`, `username` and `password`) for the source database.
+Please refer to [the `pg_dump` documentation](https://www.postgresql.org/docs/current/app-pgdump.html) and ensure to specify the correct values (e.g. `host`, `username` and `password`) for the source database.

 ```sh
 pg_dump \
@@ -53,7 +53,7 @@ First, ensure you have a target **bitmagnet** instance up and running, _of the s

 The following command will import the backup file into the target database, merging the data with any existing data.

-Please refer to [the `psql` documentation](https://www.postgresql.org/docs/current/app-psql.html){:target="\_blank"} and ensure to specify the correct values (e.g. `host`, `username` and `password`) for the target database.
+Please refer to [the `psql` documentation](https://www.postgresql.org/docs/current/app-psql.html) and ensure to specify the correct values (e.g. `host`, `username` and `password`) for the target database.

 ```sh
 psql bitmagnet < backup.sql
@@ -0,0 +1,380 @@
+---
+title: Classifier
+parent: Tutorials
+layout: default
+nav_order: 3
+---
+
+# Classifier
+
+{: .note-title }
+
+> tl;dr:
+>
+> The classifier can be configured and customized to do things like:
+>
+> - automatically delete torrents you don't want in your index
+> - add custom tags to torrents you're interested in
+> - customize the keywords and file extensions used for determining a torrent's content type
+> - specify completely custom logic to classify and perform other actions on torrents
+>
+> Skip to [practical use cases and examples](#practical-use-cases-and-examples)
+
+## Background
+
+After a torrent is crawled or imported, some further processing must be done to gather metadata, have a guess at the torrent's contents and finally index it in the database, allowing it to be searched and displayed in the UI/API.
+
+**bitmagnet**'s classifier is powered by a [Domain Specific Language](https://en.wikipedia.org/wiki/Domain-specific_language). The aim of this is to provide a high level of customisability, along with transparency into the classification process which will hopefully aid collaboration on improvements to the core classifier logic.
+
+The classifier is declared in YAML format. The application includes a [core classifier](https://github.com/bitmagnet-io/bitmagnet/blob/main/internal/classifier/classifier.core.yaml) that can be configured, extended or completely replaced with a custom classifier. This page documents the required format.
+
+## Source precedence
+
+**bitmagnet** will attempt to load classifier source code from all the following locations. Any discovered classifier source will be merged with other sources in the following order of precedence:
+
+- [the core classifier](https://github.com/bitmagnet-io/bitmagnet/blob/main/internal/classifier/classifier.core.yaml)
+- `classifier.yml` in the [XDG-compliant](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) config location for the current user (for example on MacOS this is `~/Library/Application Support/bitmagnet/classifier.yml`)
+- `classifier.yml` in the current working directory
+- [Classifier configuration](#configuration)
+
+Note that multiple sources will be merged, not replaced. For example, keywords added to the classifier configuration will be merged with the core keywords.
+
+The merged classifier source can be viewed with the CLI command `bitmagnet classifier show`.
+
+{% include callout_cli.md %}
+
+## Schema
+
+A [JSON schema for the classifier](https://bitmagnet.io/schemas/classifier-0.1.json) is available; some editors and IDEs will be able to validate the structure of your classifier document by specifying the `$schema` attribute:
+
+```yaml
+$schema: https://bitmagnet.io/schemas/classifier-0.1.json
+```
+
+The classifier schema can also be viewed by running the cli command `bitmagnet classifier schema`.
+
+{% include callout_cli.md %}
+
+The classifier declaration comprises the following components:
+
+## Workflows
+
+A workflow is a list of [actions](#actions) that will be executed on all torrents when they are classified. When no custom configuration is provided, the `default` workflow will be run. To use a different workflow instead, specify the `classifier.workflow` configuration option with the name of your custom workflow.
+
+## Actions
+
+An action is a piece of [workflow](#workflows) to be executed. All actions either return an updated classification result or an error.
+
+For example, the following action will set the content type of the current torrent to `audiobook`:
+
+```yaml
+set_content_type: audiobook
+```
+
+The following action will return an `unmatched` error:
+
+```yaml
+unmatched
+```
+
+And the following action will delete the current torrent being classified (returning a `delete` error):
+
+```yaml
+delete
+```
+
+These actions aren't much use on their own - we'd want to check some conditions are satisfied before setting a content type or deleting a torrent, and for this we'd use the `if_else` action. For example, the following action will set the content type to `audiobook` if the torrent name contains audiobook-related keywords, and will otherwise return an `unmatched` error:
+
+```yaml
+if_else:
+  condition: "torrent.baseName.matches(keywords.audiobook)"
+  if_action:
+    set_content_type: audiobook
+  else_action: unmatched
+```
+
+The following action will delete a torrent if its name matches the list of`banned` keywords:
+
+```yaml
+if_else:
+  condition: "torrent.baseName.matches(keywords.banned)"
+  if_action: delete
+```
+
+Actions may return the following types of error:
+
+- An `unmatched` error indicates that the current action did not match for the current torrent
+- A `delete` error indicates that the torrent should be deleted
+- An unhandled error may occur, for example if the TMDB API was unreachable
+
+Whenever an error is returned, the current classification will be terminated.
+
+Note that a workflow should never return an `unmatched` error. We expect to iterate through a series of checks corresponding to each content type. If the current torrent does not match the content type being checked, we'll proceed to the next check until we find a match; if no match can be found, the content type will be `unknown`. To facilitate this, we can use the `find_match` action.
+
+The `find_match` action is a bit like a try/catch block in some programming languages; it will try to match a particular content type, and if an `unmatched` error is returned, it will catch the `unmatched` error proceed to the next check. For example, the following action will attempt to classify a torrent as an `audiobook`, and then as an `ebook`. If both checks fail, the content type will be `unknown`:
+
+```yaml
+find_match:
+  # match audiobooks:
+  - if_else:
+      condition: "torrent.baseName.matches(keywords.audiobook)"
+      if_action:
+        set_content_type: audiobook
+      else_action: unmatched
+  # match ebooks:
+  - if_else:
+      condition: "torrent.files.map(f, f.extension in extensions.ebook ? f.size : - f.size).sum() > 0"
+      if_action:
+        set_content_type: ebook
+      else_action: unmatched
+```
+
+For a full list of available actions, please refer to [the JSON schema](https://bitmagnet.io/schemas/classifier-0.1.json).
+
+## Conditions
+
+Conditions are used in conjunction with the `if_else` [action](#actions), in order to execute an action if a particular condition is satisfied.
+
+The conditions in the examples above use [CEL (Common Expression Language) expressions](https://cel.dev/).
+
+### The CEL environment
+
+CEL is already a [well-documented](https://github.com/google/cel-spec/blob/master/doc/intro.md) language, so this page won't go into detail about the CEL syntax. In the context of the **bitmagnet** classifier, the CEL environment exposes a number of variables:
+
+- `torrent`: The current torrent being classified (protobuf type: `bitmagnet.Torrent`)
+- `result`: The current classification result (protobuf type: `bitmagnet.Classification`)
+- `keywords`: A map of strings to regular expressions, representing named lists of [keywords](#keywords)
+- `extensions`: A map of strings to string lists, representing named lists of [extensions](#extensions)
+- `contentType`: A map of strings to enum values representing content types (e.g. `contentType.movie`, `contentType.music`)
+- `fileType`: A map of strings to enum values representing file types (e.g. `fileType.video`, `fileType.audio`)
+- `flags`: A map of strings to the configured values of [flags](#flags)
+- `kb`, `mb`, `gb`: Variables defined for convenience, equal to the number of bytes in a kilobyte, megabyte and gigabyte respectively
+
+For more details on the protocol buffer types, please refer to [the protobuf schema](https://github.com/bitmagnet-io/bitmagnet/blob/main/internal/protobuf/bitmagnet.proto).
+
+### Boolean logic (`or`, `and` & `not`)
+
+In addition to CEL expressions, conditions may be declared using the boolean logic operators `or`, `and` and `not`. For example, the following condition evaluates to true, if either the torrent consists mostly of file extensions very commonly used for music (e.g. `flac`), OR if the torrent both has a name that includes music-related keywords, and consists mostly of audio files:
+
+```yaml
+or:
+  - "torrent.files.map(f, f.extension in extensions.music ? f.size : - f.size).sum() > 0"
+  - and:
+      - "torrent.baseName.matches(keywords.music)"
+      - "torrent.files.map(f, f.fileType == fileType.audio ? f.size : - f.size).sum() > 0"
+```
+
+Note that we could also have specified the above condition using just one CEL expression, but breaking up complex conditions like this is more readable.
+
+## Keywords
+
+The classifier includes lists of keywords associated with different types of torrents. These aim to provide a simpler alternative to regular expressions, and the classifier will compile all keyword lists to regular expressions that can be used within CEL expressions. In order for a keyword to match, it must appear as an isolated token in the test string - that is, it must be either at the beginning or preceded by a non-word character, and either at the end or followed by a non-word character.
+
+Reserved characters in the syntax are:
+
+- parentheses `(` and `)` enclose a group
+- `|` is an OR operator
+- `*` is a wildcard operator
+- `?` makes the previous character or group optional
+- `+` specifies one or more of the previous character
+- `#` specifies any number
+- ` ` specifies any non-word or non-number character
+
+For example, to define some music- and audiobook-related keywords:
+
+```yaml
+keywords:
+  music: # define music-related keywords
+    - music # all letters are case-insensitive, and must be defined in lowercase unless escaped
+    - discography
+    - album
+    - \V.?\A # escaped letters are case-sensitive; matches "VA", "V.A" and "V.A.", but not "va"
+    - various artists # matches "various artists" and "Various.Artists"
+  audiobook: # define audiobook-related keywords
+    - (audio)?books?
+    - (un)?abridged
+    - narrated
+    - novels?
+    - (auto)?biograph(y|ies) # matches "biography", "autobiographies" etc.
+```
+
+{: .note }
+
+> If you'd rather use plain old regular expressions, the CEL syntax supports that too, for example `torrent.baseName.matches("^myregex$")`.
+
+## Extensions
+
+The classifier includes lists of file extensions associated with different types of content. For example, to identify torrents of type `comic` by their file extensions, the extensions are first declared:
+
+```yaml
+extensions:
+  comic:
+    - cb7
+    - cba
+    - cbr
+    - cbt
+    - cbz
+```
+
+The extensions can now be used as part of a condition within an `if_else` action:
+
+```yaml
+if_else:
+  condition: "torrent.files.map(f, f.extension in extensions.comic ? f.size : - f.size).sum() > 0"
+  if_action:
+    set_content_type: comic
+  else_action: unmatched
+```
+
+## Flags
+
+Flags can be used to configure workflows. In order to use a flag in a workflow, it must first be defined. For example, the core classifier defines the following flags that are used in the `default` workflow:
+
+```yaml
+flag_definitions:
+  tmdb_enabled: bool
+  delete_content_types: content_type_list
+  delete_xxx: bool
+```
+
+These flags can be referenced within CEL expressions, for example to delete adult content if the `delete_xxx` flag is set to `true`:
+
+```yaml
+if_else:
+  condition: "flags.delete_xxx && result.contentType == contentType.xxx"
+  if_action: delete
+```
+
+## Configuration
+
+The classifier can be customized by providing a `classifier.yml` file in a supported location [as described above](#source-precedence). If you only want to make some minor modifications, it may be convenient to specify these [using the main application configuration](/setup/configuration.html) instead, by providing values in either `config.yml` or as environment variables. The application configuration exposes some but not all properties of the classifier.
+
+For example, in your `config.yml` you could specify:
+
+```yaml
+classifier:
+  # specify a custom workflow to be used:
+  workflow: custom
+  # add to the core list of music keywords:
+  keywords:
+    music:
+      - my-custom-music-keyword
+  # add a file extension to the list of audiobook-related extensions:
+  extensions:
+    audiobook:
+      - abc
+  # auto-delete all comics
+  flags:
+    delete_content_types:
+      - comics
+```
+
+Or as environment variables you could specify:
+
+```sh
+TMDB_ENABLED=false \ # disable the TMDB API integration
+  CLASSIFIER_WORKFLOW=custom \ # specify a custom workflow to be used
+  CLASSIFIER_DELETE_XXX=true \ # auto-delete all adult content
+  bitmagnet worker run --all
+```
+
+## Validation
+
+The classifier source is compiled on initial load, and all structural and syntax errors should be caught at compile time. If there are errors in your classifier source, **bitmagnet** should exit with an error message indicating the location of the problem.
+
+## Testing on individual torrents
+
+You can test the classifier on an individual torrent or torrents using the `bitmagnet process` CLI command:
+
+```sh
+bitmagnet process --infoHash=aaaaaaaaaaaaaaaaaaaa --infoHash=bbbbbbbbbbbbbbbbbbbb
+```
+
+{% include callout_cli.md %}
+
+## Reclassify all torrents
+
+Read how to [reclassify all torrents](/tutorials/reprocess-reclassify.html).
+
+## Practical use cases and examples
+
+### Auto-delete specific content types
+
+The default workflow provides a flag that allows for automatically deleting specific content types. For example, to delete all `comic`, `software` and `xxx` torrents:
+
+```yaml
+flags:
+  delete_content_types:
+    - comic
+    - software
+    - xxx
+```
+
+Auto-deleting adult content has been one of the most requested features. For convenience, this is exposed as the configuration option `classifier.delete_xxx`, and can be specified with the environment variable `CLASSIFIER_DELETE_XXX=true`.
+
+### Auto-delete torrents containing specific keywords
+
+Any torrents containing keywords in the `banned` list will be automatically deleted. This is primarily used for deleting <abbr title="Child Sexual Abuse Material">CSAM</abbr> content, but the list can be extended to auto-delete any other keywords:
+
+```yaml
+keywords:
+  banned:
+    - my-hated-keyword
+```
+
+### Disable the TMDB API integration
+
+The `tmdb_enabled` flag can be used to disable the TMDB API integration:
+
+```yaml
+flags:
+  tmdb_enabled: false
+```
+
+For convenience, this is also exposed as the configuration option `tmdb.enabled`, and can be specified with the environment variable `TMDB_ENABLED=false`.
+
+### Extend the default workflow with custom logic
+
+Custom workflows can be added in the `workflows` section of the classifier document. It is possible to extend the default workflow by using the `run_workflow` action within your custom workflow, for example:
+
+```yaml
+workflows:
+  custom:
+    - <my custom action to be executed before the default workflow>
+    - run_workflow: default
+    - <my custom action to be executed after the default workflow>
+```
+
+A concrete example of this is adding tags to torrents based on custom criteria.
+
+### Use tags to create custom torrent categories
+
+Is there a category of torrent you're interested in that isn't captured by one of the core content types? Torrent tags are intended to capture custom categories and content types.
+
+Let's imagine you'd like to surface torrents containing interesting documents. The interesting documents have specific file extensions, and their filenames contain specific keywords. Let's create a custom action to tag torrents containing interesting documents:
+
+```yaml
+# define file extensions for the documents we're interested in:
+extensions:
+  interesting_documents:
+    - doc
+    - docx
+    - pdf
+# define keywords that must be present in the filenames of the interesting documents:
+keywords:
+  interesting_documents:
+    - interesting
+    - fascinating
+# extend the default workflow with a custom workflow to tag torrents containing interesting documents:
+workflows:
+  custom:
+    # first run the default workflow:
+    - run_workflow: default
+    # then add the tag to any torrents containing interesting documents:
+    - if_else:
+        condition: "torrent.files.filter(f, f.extension in extensions.interesting_documents && f.basePath.matches(keywords.interesting_documents)).size() > 0"
+        if_action:
+          add_tag: interesting-documents
+```
+
+To specify that the custom workflow should be used, remember to specify the `classifier.workflow` configuration option, e.g. `CLASSIFIER_WORKFLOW=custom bitmagnet worker run --all`.
@@ -2,7 +2,7 @@
 title: Import
 parent: Tutorials
 layout: default
-nav_order: 4
+nav_order: 5
 redirect_from:
  - /tutorials/importing.html
 ---
@@ -13,7 +13,7 @@ redirect_from:

 > Important
 >
-> Before continuing with this tutorial, please [obtain and configure a personal TMDB API key]({% link setup/configuration.md %}#obtaining-a-tmdb-api-key).
+> Before continuing with this tutorial, please [obtain and configure a personal TMDB API key, or disable the TMDB API integration]({% link setup/configuration.md %}#obtaining-a-tmdb-api-key).

 **bitmagnet** includes an import endpoint at `/import`; this can be used for importing Torrent files from any source.

@@ -29,10 +29,10 @@ For the purposes of this tutorial we'll use the RARBG SQLite backup, but you can

 > Pre-requisites
 >
-> - [x] You have [obtained and configured a personal TMDB API key]({% link setup/configuration.md %}#obtaining-a-tmdb-api-key)
+> - [x] You have [obtained and configured a personal TMDB API key, or disabled the TMDB API integration]({% link setup/configuration.md %}#obtaining-a-tmdb-api-key)
 > - [x] You have obtained a copy of the RARBG SQLite backup (I can't assist you in getting a copy of this, but it's generally available)
-> - [x] You have [installed the SQLite3 CLI](https://www.tutorialspoint.com/sqlite/sqlite_installation.htm){:target="\_blank"}
-> - [x] You have [installed jq](https://jqlang.github.io/jq/download/){:target="\_blank"}
+> - [x] You have [installed the SQLite3 CLI](https://www.tutorialspoint.com/sqlite/sqlite_installation.htm)
+> - [x] You have [installed jq](https://jqlang.github.io/jq/download/)

 Let's start by write a SQLite query in a file named `rarbg-import.sql`. This will extract the data we need and get it looking a bit more like the format that **bitmagnet** expects. The following is a starting point, please adapt it to your requirements:

@@ -111,7 +111,7 @@ sqlite3 -json -batch /path/to/your/rarbg_db.sqlite "$(cat rarbg-import.sql)" \
 So what's happening here?

 - First we are executing the SQL query we made above against the backup database; we tell SQLite to output the result as JSON. To test this bit in isolation you might try running just `sqlite3 -json -batch /path/to/your/rarbg_db.sqlite "$(cat rarbg-import.sql)"` (while testing you'll probably want to `limit` your results to say 10 or 100)
- Next we need to make some tweaks to the JSON structure, so we'll pipe the result into [jq](https://jqlang.github.io/jq/){:target="\_blank"}. You can add the line beginning `| jq` to the previous part to test what we have so far. Here we will:
+- Next we need to make some tweaks to the JSON structure, so we'll pipe the result into [jq](https://jqlang.github.io/jq/). You can add the line beginning `| jq` to the previous part to test what we have so far. Here we will:
  - Add a `source` field with value `rarbg`: each torrent stored in **bitmagnet** is associated with one or more sources, this association allows filtering by source within the search facility, and can carry some source-specific information such as an import ID, and numbers of seeders and leechers (more docs needed here!)
  - Add the `contentSource` and `contentId` fields which **bitmagnet** expects, containing the IMDB ID, if it exists; these are not a required field, but if you know the external IMDB or TMDB ID of your content then it will give the classifier an easier job
  - Delete the `imdb` field which won't be recognised by **bitmagnet**
@@ -2,9 +2,7 @@
 title: Reprocess & Reclassify
 parent: Tutorials
 layout: default
-nav_order: 3
-redirect_from:
-  - /tutorials/importing.html
+nav_order: 4
 ---

 # Reprocess & Reclassify Torrents
@@ -28,6 +26,5 @@ To reprocess all torrents in your index, simply run `bitmagnet reprocess`. If yo
 - `classifyMode`: This controls how already matched torrents are handled. A torrent is "matched" if it's associated with a specific piece of content from one of the API integrations (currently only TMDB). Making a lot of API calls can take a long time, so if items are already matched you might want to just do the other processing steps without re-matching them. The available modes are:
  - `default`: Only attempt to match previously unmatched torrents
  - `rematch`: Ignore any pre-existing match and always classify from scratch
-  - `skip`: Skip classification for previously unmatched torrents, unless they have a hint\* attached to them.

 \*hints tell the classifier to use the hinted information instead of any classification results, which can save a lot of work for the classifier and help fix errors. Currently, the only way to add hints is by using [the `/import` endpoint](/tutorials/import.html).
@@ -2,12 +2,12 @@
 title: Servarr Integration
 parent: Tutorials
 layout: default
-nav_order: 5
+nav_order: 6
 ---

 # Servarr Integration

-**bitmagnet**'s HTTP server exposes an endpoint at `/torznab`, allowing it to integrate with any application that supports [the Torznab specification](https://torznab.github.io/spec-1.3-draft/index.html){:target="\_blank"}, most notably apps in [the Servarr stack](https://wiki.servarr.com/){:target="\_blank"} (Prowlarr, Sonarr, Radarr etc.).
+**bitmagnet**'s HTTP server exposes an endpoint at `/torznab`, allowing it to integrate with any application that supports [the Torznab specification](https://torznab.github.io/spec-1.3-draft/index.html), most notably apps in [the Servarr stack](https://wiki.servarr.com/) (Prowlarr, Sonarr, Radarr etc.).

 ## Adding **bitmagnet** as an indexer in Prowlarr

@@ -19,4 +19,4 @@ The required settings are fairly basic. Assuming you've adapted from the [exampl

 ![Prowlarr configure bitmagnet](/assets/images/prowlarr-2.png)

-[Depending on your Prowlarr configuration](https://wiki.servarr.com/prowlarr/settings#applications){:target="\_blank"}, the **bitmagnet** indexer should now be synced to your other \*arr applications. Alternatively, you can add **bitmagnet** as an indexer directly in those applications, following the same steps as above.
+[Depending on your Prowlarr configuration](https://wiki.servarr.com/prowlarr/settings#applications), the **bitmagnet** indexer should now be synced to your other \*arr applications. Alternatively, you can add **bitmagnet** as an indexer directly in those applications, following the same steps as above.
@@ -16,6 +16,7 @@ require (
 	github.com/gin-gonic/gin v1.9.1
 	github.com/go-playground/validator/v10 v10.17.0
 	github.com/go-resty/resty/v2 v2.11.0
+	github.com/google/cel-go v0.20.0
 	github.com/grafana/pyroscope-go/godeltaprof v0.1.7
 	github.com/hashicorp/golang-lru/v2 v2.0.7
 	github.com/hedhyw/rex v0.6.0
@@ -37,12 +38,14 @@ require (
 	github.com/urfave/cli/v2 v2.27.1
 	github.com/vektah/gqlparser/v2 v2.5.11
 	github.com/vektra/mockery/v2 v2.40.1
+	github.com/xeipuuv/gojsonschema v1.2.0
 	go.uber.org/fx v1.20.1
 	go.uber.org/zap v1.26.0
 	golang.org/x/sync v0.6.0
 	golang.org/x/sys v0.16.0
 	golang.org/x/text v0.14.0
 	golang.org/x/time v0.5.0
+	google.golang.org/protobuf v1.33.0
 	gopkg.in/yaml.v3 v3.0.1
 	gorm.io/driver/postgres v1.5.4
 	gorm.io/gen v0.3.25
@@ -63,6 +66,7 @@ require (
 	github.com/anacrolix/multiless v0.3.1-0.20221221005021-2d12701f83f7 // indirect
 	github.com/anacrolix/stm v0.5.0 // indirect
 	github.com/anacrolix/sync v0.5.1 // indirect
+	github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
 	github.com/benbjohnson/immutable v0.4.3 // indirect
 	github.com/beorn7/perks v1.0.1 // indirect
 	github.com/bits-and-blooms/bitset v1.13.0 // indirect
@@ -84,6 +88,7 @@ require (
 	github.com/go-sql-driver/mysql v1.7.1 // indirect
 	github.com/goccy/go-json v0.10.2 // indirect
 	github.com/golang/mock v1.6.0 // indirect
+	github.com/golang/protobuf v1.5.4 // indirect
 	github.com/google/go-cmp v0.6.0 // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/gorilla/websocket v1.5.1 // indirect
@@ -121,7 +126,7 @@ require (
 	github.com/prometheus/client_model v0.5.0 // indirect
 	github.com/prometheus/common v0.46.0 // indirect
 	github.com/prometheus/procfs v0.12.0 // indirect
-	github.com/rivo/uniseg v0.4.4 // indirect
+	github.com/rivo/uniseg v0.4.7 // indirect
 	github.com/rogpeppe/go-internal v1.11.0 // indirect
 	github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 // indirect
 	github.com/rs/zerolog v1.31.0 // indirect
@@ -137,10 +142,13 @@ require (
 	github.com/spf13/cobra v1.8.0 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
 	github.com/spf13/viper v1.18.2 // indirect
+	github.com/stoewer/go-strcase v1.2.0 // indirect
 	github.com/stretchr/objx v0.5.0 // indirect
 	github.com/subosito/gotenv v1.6.0 // indirect
 	github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
 	github.com/ugorji/go/codec v1.2.12 // indirect
+	github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
+	github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
 	github.com/xrash/smetrics v0.0.0-20231213231151-1d8dd44e695e // indirect
 	go.opentelemetry.io/otel v1.22.0 // indirect
 	go.opentelemetry.io/otel/trace v1.22.0 // indirect
@@ -156,7 +164,8 @@ require (
 	golang.org/x/term v0.16.0 // indirect
 	golang.org/x/tools v0.17.0 // indirect
 	golang.org/x/tools/cmd/cover v0.1.0-deprecated // indirect
-	google.golang.org/protobuf v1.32.0 // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20231106174013-bbf56f31fb17 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20231211222908-989df2bf70f3 // indirect
 	gopkg.in/ini.v1 v1.67.0 // indirect
 	gopkg.in/yaml.v2 v2.4.0 // indirect
 	gorm.io/datatypes v1.2.0 // indirect
@@ -92,6 +92,8 @@ github.com/andybalholm/brotli v1.0.6 h1:Yf9fFpf49Zrxb9NlQaluyE92/+X7UVHlhMNJN2sx
 github.com/andybalholm/brotli v1.0.6/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
 github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
 github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
+github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
+github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
 github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
 github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
 github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
@@ -239,12 +241,14 @@ github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:x
 github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
 github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
 github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
-github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
-github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
+github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
+github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
 github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
 github.com/google/btree v0.0.0-20180124185431-e89373fe6b4a/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
 github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
+github.com/google/cel-go v0.20.0 h1:h4n6DOCppEMpWERzllyNkntl7JrDyxoE543KWS6BLpc=
+github.com/google/cel-go v0.20.0/go.mod h1:kWcIzTsPX0zmQ+H3TirHstLLf9ep5QTsZBN9u4dOYLg=
 github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
 github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
 github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
@@ -452,8 +456,9 @@ github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqn
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
 github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
 github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
-github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
 github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
+github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
+github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
 github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
 github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
 github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
@@ -507,6 +512,8 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
 github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
 github.com/spf13/viper v1.18.2 h1:LUXCnvUvSM6FXAsj6nnfc8Q2tp1dIgUfY9Kc8GsSOiQ=
 github.com/spf13/viper v1.18.2/go.mod h1:EKmWIqdnk5lOcmR72yw6hS+8OPYcwD0jteitLMVB+yk=
+github.com/stoewer/go-strcase v1.2.0 h1:Z2iHWqGXH00XYgqDmNgQbIBxf3wrNq0F3feEy0ainaU=
+github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8=
 github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
 github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
@@ -547,6 +554,7 @@ github.com/vertica/vertica-sql-go v1.3.3 h1:fL+FKEAEy5ONmsvya2WH5T8bhkvY27y/Ik3R
 github.com/vertica/vertica-sql-go v1.3.3/go.mod h1:jnn2GFuv+O2Jcjktb7zyc4Utlbu9YVqpHH/lx63+1M4=
 github.com/willf/bitset v1.1.9/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
 github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
+github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
 github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
 github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
 github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
@@ -716,7 +724,8 @@ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7
 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
 google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
 google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
-google.golang.org/genproto v0.0.0-20231106174013-bbf56f31fb17 h1:wpZ8pe2x1Q3f2KyT5f8oP/fa9rHAKgFPr/HZdNuS+PQ=
+google.golang.org/genproto/googleapis/api v0.0.0-20231106174013-bbf56f31fb17 h1:JpwMPBpFN3uKhdaekDpiNlImDdkUAyiJ6ez/uxGaUSo=
+google.golang.org/genproto/googleapis/api v0.0.0-20231106174013-bbf56f31fb17/go.mod h1:0xJLfVdJqpAPl8tDg1ujOCGzx6LFLttXT5NhllGOXY4=
 google.golang.org/genproto/googleapis/rpc v0.0.0-20231211222908-989df2bf70f3 h1:kzJAXnzZoFbe5bhZd4zjUuHos/I31yH4thfMb/13oVY=
 google.golang.org/genproto/googleapis/rpc v0.0.0-20231211222908-989df2bf70f3/go.mod h1:eJVxU6o+4G1PSczBr85xmyvSNYAKvAYgkub40YGomFM=
 google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
@@ -729,8 +738,8 @@ google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ
 google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
 google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
 google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
-google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I=
-google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
+google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
+google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
 gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
@@ -1,8 +1,9 @@
 package appfx

 import (
+	"github.com/bitmagnet-io/bitmagnet/internal/app/cmd/classifiercmd"
+	"github.com/bitmagnet-io/bitmagnet/internal/app/cmd/processcmd"
 	"github.com/bitmagnet-io/bitmagnet/internal/app/cmd/reprocesscmd"
-	"github.com/bitmagnet-io/bitmagnet/internal/app/cmd/torrentcmd"
 	"github.com/bitmagnet-io/bitmagnet/internal/blocking/blockingfx"
 	"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/app/boilerplateappfx"
 	"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/httpserver/httpserverfx"
@@ -29,7 +30,6 @@ func New() fx.Option {
 		"app",
 		blockingfx.New(),
 		boilerplateappfx.New(),
-		classifierfx.New(),
 		dhtcrawlerfx.New(),
 		dhtfx.New(),
 		databasefx.New(),
@@ -43,10 +43,12 @@ func New() fx.Option {
 		tmdbfx.New(),
 		torznabfx.New(),
 		versionfx.New(),
+		classifierfx.New(),
 		// cli commands:
 		fx.Provide(
+			classifiercmd.New,
 			reprocesscmd.New,
-			torrentcmd.New,
+			processcmd.New,
 		),
 		fx.Provide(webui.New),
 		fx.Decorate(migrations.NewDecorator),
@@ -0,0 +1,81 @@
+package classifiercmd
+
+import (
+	"encoding/json"
+	"fmt"
+	"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier"
+	"github.com/urfave/cli/v2"
+	"go.uber.org/fx"
+	"gopkg.in/yaml.v3"
+	"io"
+)
+
+type Params struct {
+	fx.In
+	WorkflowSource lazy.Lazy[classifier.Source]
+}
+
+type Result struct {
+	fx.Out
+	Command *cli.Command `group:"commands"`
+}
+
+var formatFlag = cli.StringFlag{
+	Name:  "format",
+	Usage: "Output format (json or yaml)",
+	Value: "yaml",
+}
+
+func New(p Params) (Result, error) {
+	return Result{Command: &cli.Command{
+		Name: "classifier",
+		Subcommands: []*cli.Command{
+			{
+				Name:  "show",
+				Usage: "Show the classifier workflow source",
+				Flags: []cli.Flag{
+					&formatFlag,
+				},
+				Action: func(ctx *cli.Context) error {
+					src, srcErr := p.WorkflowSource.Get()
+					if srcErr != nil {
+						return srcErr
+					}
+					return write(ctx.App.Writer, src, ctx.String("format"))
+				},
+			},
+			{
+				Name:  "schema",
+				Usage: "Show the classifier JSON schema",
+				Flags: []cli.Flag{
+					&formatFlag,
+				},
+				Action: func(ctx *cli.Context) error {
+					return write(ctx.App.Writer, classifier.DefaultJsonSchema(), ctx.String("format"))
+				},
+			},
+		},
+	}}, nil
+}
+
+func write(writer io.Writer, src any, format string) error {
+	var (
+		output    []byte
+		outputErr error
+	)
+	switch format {
+	case "json":
+		output, outputErr = json.MarshalIndent(src, "", "  ")
+		output = append(output, '\n')
+	case "yaml":
+		output, outputErr = yaml.Marshal(src)
+	default:
+		outputErr = fmt.Errorf("unsupported format: %s", format)
+	}
+	if outputErr != nil {
+		return outputErr
+	}
+	_, writeErr := writer.Write(output)
+	return writeErr
+}
@@ -0,0 +1,54 @@
+package processcmd
+
+import (
+	"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
+	"github.com/bitmagnet-io/bitmagnet/internal/processor"
+	"github.com/bitmagnet-io/bitmagnet/internal/protocol"
+	"github.com/urfave/cli/v2"
+	"go.uber.org/fx"
+	"go.uber.org/zap"
+)
+
+type Params struct {
+	fx.In
+	Processor lazy.Lazy[processor.Processor]
+	Logger    *zap.SugaredLogger
+}
+
+type Result struct {
+	fx.Out
+	Command *cli.Command `group:"commands"`
+}
+
+func New(p Params) (Result, error) {
+	return Result{Command: &cli.Command{
+		Name: "process",
+		Flags: []cli.Flag{
+			&cli.StringSliceFlag{
+				Name: "infoHash",
+			},
+		},
+		Action: func(ctx *cli.Context) error {
+			pr, err := p.Processor.Get()
+			if err != nil {
+				return err
+			}
+			var infoHashes []protocol.ID
+			for _, infoHash := range ctx.StringSlice("infoHash") {
+				id, err := protocol.ParseID(infoHash)
+				if err != nil {
+					return err
+				}
+				infoHashes = append(infoHashes, id)
+			}
+			if err != nil {
+				return err
+			}
+			return pr.Process(ctx.Context, processor.MessageParams{
+				ClassifyMode: processor.ClassifyModeRematch,
+				InfoHashes:   infoHashes,
+			})
+		},
+	},
+	}, nil
+}
@@ -45,8 +45,7 @@ func New(p Params) (Result, error) {
 				Name:  "classifyMode",
 				Value: "default",
 				Usage: "default (only attempt to match previously unmatched torrents);\n" +
-					"rematch (ignore any pre-existing match and always classify from scratch);\n" +
-					"skip (skip classification for previously unmatched torrents that don't have any hint)",
+					"rematch (ignore any pre-existing match and always classify from scratch)",
 			},
 		},
 		Action: func(ctx *cli.Context) error {
@@ -56,8 +55,6 @@ func New(p Params) (Result, error) {
 				classifyMode = processor.ClassifyModeDefault
 			case "rematch":
 				classifyMode = processor.ClassifyModeRematch
-			case "skip":
-				classifyMode = processor.ClassifyModeSkipUnmatched
 			default:
 				return cli.Exit("invalid classifyMode", 1)
 			}
@@ -1,88 +0,0 @@
-package torrentcmd
-
-import (
-	"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
-	"github.com/bitmagnet-io/bitmagnet/internal/processor"
-	"github.com/bitmagnet-io/bitmagnet/internal/protocol"
-	"github.com/bitmagnet-io/bitmagnet/internal/protocol/metainfo/metainforequester"
-	"github.com/urfave/cli/v2"
-	"go.uber.org/fx"
-	"go.uber.org/zap"
-	"net/netip"
-)
-
-type Params struct {
-	fx.In
-	MetaInfoRequester metainforequester.Requester
-	Processor         lazy.Lazy[processor.Processor]
-	Logger            *zap.SugaredLogger
-}
-
-type Result struct {
-	fx.Out
-	Command *cli.Command `group:"commands"`
-}
-
-func New(p Params) (Result, error) {
-	return Result{Command: &cli.Command{
-		Name: "torrent",
-		Subcommands: []*cli.Command{
-			{
-				Name: "process",
-				Flags: []cli.Flag{
-					&cli.StringSliceFlag{
-						Name: "infoHash",
-					},
-				},
-				Action: func(ctx *cli.Context) error {
-					pr, err := p.Processor.Get()
-					if err != nil {
-						return err
-					}
-					var infoHashes []protocol.ID
-					for _, infoHash := range ctx.StringSlice("infoHash") {
-						id, err := protocol.ParseID(infoHash)
-						if err != nil {
-							return err
-						}
-						infoHashes = append(infoHashes, id)
-					}
-					if err != nil {
-						return err
-					}
-					return pr.Process(ctx.Context, processor.MessageParams{
-						ClassifyMode: processor.ClassifyModeRematch,
-						InfoHashes:   infoHashes,
-					})
-				},
-			},
-			{
-				Name: "requestMetaInfo",
-				Flags: []cli.Flag{
-					&cli.StringFlag{
-						Name: "infoHash",
-					},
-					&cli.StringFlag{
-						Name: "address",
-					},
-				},
-				Action: func(ctx *cli.Context) error {
-					infoHash, err := protocol.ParseID(ctx.String("infoHash"))
-					if err != nil {
-						return err
-					}
-					addr, err := netip.ParseAddrPort(ctx.String("address"))
-					if err != nil {
-						return err
-					}
-					info, err := p.MetaInfoRequester.Request(ctx.Context, protocol.ID(infoHash), addr)
-					if err != nil {
-						return err
-					}
-					p.Logger.Infow("got infoBytes", "info", info)
-					return nil
-				},
-			},
-		},
-	}}, nil
-}
@@ -117,7 +117,7 @@ func CustomRecoveryWithZap(logger ZapLogger, stack bool, recovery gin.RecoveryFu
 	return func(c *gin.Context) {
 		defer func() {
 			if err := recover(); err != nil {
-				// Check for a broken connection, as it is not really a
+				// check for a broken connection, as it is not really a
 				// condition that warrants a panic stack trace.
 				var brokenPipe bool
 				if ne, ok := err.(*net.OpError); ok {
@@ -0,0 +1,72 @@
+package classifier
+
+import (
+	"errors"
+	"fmt"
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+)
+
+func actions(defs ...actionDefinition) feature {
+	return func(c *features) {
+		c.actions = append(c.actions, defs...)
+	}
+}
+
+type actionCompiler interface {
+	compileAction(ctx compilerContext) (action, error)
+}
+
+type actionDefinition interface {
+	HasJsonSchema
+	name() string
+	actionCompiler
+}
+
+func (c compilerContext) compileAction(ctx compilerContext) (action, error) {
+	var rawActions []any
+	isArray := false
+	if s, ok := ctx.source.([]any); ok {
+		rawActions = s
+		isArray = true
+	} else {
+		rawActions = []any{ctx.source}
+	}
+	var actions []action
+	var errs []error
+outer:
+	for i, rawAction := range rawActions {
+		actionCtx := ctx
+		if isArray {
+			actionCtx = ctx.child(numericPathPart(i), rawAction)
+		}
+		for _, def := range c.actions {
+			a, err := def.compileAction(actionCtx.child(def.name(), rawAction))
+			if err == nil {
+				actions = append(actions, a)
+				continue outer
+			} else {
+				if asFatalCompilerError(err) != nil {
+					return action{}, err
+				}
+			}
+		}
+		errs = append(errs, fmt.Errorf("no action matched: %v", ctx.source))
+	}
+	if len(errs) > 0 {
+		return action{}, errors.Join(errs...)
+	}
+	return action{func(ctx executionContext) (classification.Result, error) {
+		for _, a := range actions {
+			result, err := a.run(ctx)
+			if err != nil {
+				return classification.Result{}, err
+			}
+			ctx = ctx.withResult(result)
+		}
+		return ctx.result, nil
+	}}, errors.Join(errs...)
+}
+
+type action struct {
+	run func(executionContext) (classification.Result, error)
+}
@@ -0,0 +1,61 @@
+package classifier
+
+import (
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+	"github.com/bitmagnet-io/bitmagnet/internal/model"
+)
+
+const addTagName = "add_tag"
+
+type addTagAction struct{}
+
+func (addTagAction) name() string {
+	return addTagName
+}
+
+var tagPayloadSpec = payloadTransformer[string, string]{
+	spec: payloadGeneric[string]{
+		jsonSchema: JsonSchema{
+			"type": "string",
+		},
+	},
+	transform: func(str string, _ compilerContext) (string, error) {
+		if err := model.ValidateTagName(str); err != nil {
+			return "", err
+		}
+		return str, nil
+	},
+}
+
+var addTagPayloadSpec = payloadSingleKeyValue[[]string]{
+	key: addTagName,
+	valueSpec: payloadMustSucceed[[]string]{
+		payloadList[string]{
+			itemSpec: tagPayloadSpec,
+		},
+	},
+	description: "Add one or more tags to the current torrent",
+}
+
+func (addTagAction) compileAction(ctx compilerContext) (action, error) {
+	tags, err := addTagPayloadSpec.Unmarshal(ctx)
+	if err != nil {
+		return action{}, ctx.error(err)
+	}
+	return action{
+		func(ctx executionContext) (classification.Result, error) {
+			cl := ctx.result
+			if cl.Tags == nil {
+				cl.Tags = make(map[string]struct{})
+			}
+			for _, tag := range tags {
+				cl.Tags[tag] = struct{}{}
+			}
+			return cl, nil
+		},
+	}, nil
+}
+
+func (addTagAction) JsonSchema() JsonSchema {
+	return addTagPayloadSpec.JsonSchema()
+}
@@ -0,0 +1,47 @@
+package classifier
+
+import (
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+	"github.com/bitmagnet-io/bitmagnet/internal/model"
+)
+
+const attachLocalContentByIdName = "attach_local_content_by_id"
+
+type attachLocalContentByIdAction struct{}
+
+func (attachLocalContentByIdAction) name() string {
+	return attachLocalContentByIdName
+}
+
+var attachLocalContentByIdPayloadSpec = payloadLiteral[string]{
+	literal:     attachLocalContentByIdName,
+	description: "Use the torrent hint to attach locally stored content by ID",
+}
+
+func (a attachLocalContentByIdAction) compileAction(ctx compilerContext) (action, error) {
+	if _, err := attachLocalContentByIdPayloadSpec.Unmarshal(ctx); err != nil {
+		return action{}, ctx.error(err)
+	}
+	return action{
+		run: func(ctx executionContext) (classification.Result, error) {
+			cl := ctx.result
+			if ctx.torrent.Hint.IsNil() || !ctx.torrent.Hint.ContentSource.Valid {
+				return cl, classification.ErrUnmatched
+			}
+			content, err := ctx.search.ContentById(ctx, model.ContentRef{
+				Type:   ctx.torrent.Hint.ContentType,
+				Source: ctx.torrent.Hint.ContentSource.String,
+				ID:     ctx.torrent.Hint.ContentID.String,
+			})
+			if err != nil {
+				return cl, err
+			}
+			cl.AttachContent(&content)
+			return cl, nil
+		},
+	}, nil
+}
+
+func (a attachLocalContentByIdAction) JsonSchema() JsonSchema {
+	return attachLocalContentByIdPayloadSpec.JsonSchema()
+}
@@ -0,0 +1,42 @@
+package classifier
+
+import (
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+)
+
+const attachLocalContentBySearchName = "attach_local_content_by_search"
+
+type attachLocalContentBySearchAction struct{}
+
+func (attachLocalContentBySearchAction) name() string {
+	return attachLocalContentBySearchName
+}
+
+var attachLocalContentBySearchPayloadSpec = payloadLiteral[string]{
+	literal:     attachLocalContentBySearchName,
+	description: "Attempt to attach local content with a search on the torrent name",
+}
+
+func (a attachLocalContentBySearchAction) compileAction(ctx compilerContext) (action, error) {
+	if _, err := attachLocalContentBySearchPayloadSpec.Unmarshal(ctx); err != nil {
+		return action{}, ctx.error(err)
+	}
+	return action{
+		run: func(ctx executionContext) (classification.Result, error) {
+			cl := ctx.result
+			if !cl.ContentType.Valid || !cl.BaseTitle.Valid {
+				return cl, classification.ErrUnmatched
+			}
+			content, err := ctx.search.ContentBySearch(ctx.Context, cl.ContentType.ContentType, cl.BaseTitle.String, cl.Date.Year)
+			if err != nil {
+				return cl, err
+			}
+			cl.AttachContent(&content)
+			return cl, nil
+		},
+	}, nil
+}
+
+func (attachLocalContentBySearchAction) JsonSchema() JsonSchema {
+	return attachLocalContentBySearchPayloadSpec.JsonSchema()
+}
@@ -0,0 +1,78 @@
+package classifier
+
+import (
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+	"github.com/bitmagnet-io/bitmagnet/internal/model"
+	"strconv"
+)
+
+const attachTmdbContentByIdName = "attach_tmdb_content_by_id"
+
+type attachTmdbContentByIdAction struct{}
+
+func (attachTmdbContentByIdAction) name() string {
+	return attachTmdbContentByIdName
+}
+
+var attachTmdbContentByIdPayloadSpec = payloadLiteral[string]{
+	literal:     attachTmdbContentByIdName,
+	description: "Use the torrent hint to attach content from the TMDB API by ID",
+}
+
+func (a attachTmdbContentByIdAction) compileAction(ctx compilerContext) (action, error) {
+	if _, err := attachTmdbContentByIdPayloadSpec.Unmarshal(ctx); err != nil {
+		return action{}, ctx.error(err)
+	}
+	return action{
+		run: func(ctx executionContext) (classification.Result, error) {
+			cl := ctx.result
+			var ref model.ContentRef
+			if maybeRef := ctx.torrent.Hint.ContentRef(); !maybeRef.Valid {
+				return cl, classification.ErrUnmatched
+			} else {
+				ref = maybeRef.Val
+			}
+			if cl.ContentType.Valid {
+				ref.Type = cl.ContentType.ContentType
+			}
+			var tmdbId int64
+			switch ref.Source {
+			case model.SourceTmdb:
+				if id, err := strconv.Atoi(ref.ID); err != nil {
+					return cl, classification.ErrUnmatched
+				} else {
+					tmdbId = int64(id)
+				}
+			default:
+				if id, err := ctx.tmdb_getTmdbIdByExternalId(ref); err != nil {
+					return cl, err
+				} else {
+					tmdbId = id
+				}
+			}
+			var content *model.Content
+			switch ref.Type {
+			case model.ContentTypeMovie, model.ContentTypeXxx:
+				if c, err := ctx.tmdb_getMovieByTmbdId(tmdbId); err != nil {
+					return cl, err
+				} else {
+					content = &c
+				}
+			case model.ContentTypeTvShow:
+				if c, err := ctx.tmdb_getTvShowByTmbdId(tmdbId); err != nil {
+					return cl, err
+				} else {
+					content = &c
+				}
+			default:
+				return cl, classification.ErrUnmatched
+			}
+			cl.AttachContent(content)
+			return cl, nil
+		},
+	}, nil
+}
+
+func (attachTmdbContentByIdAction) JsonSchema() JsonSchema {
+	return attachTmdbContentByIdPayloadSpec.JsonSchema()
+}
@@ -0,0 +1,57 @@
+package classifier
+
+import (
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+	"github.com/bitmagnet-io/bitmagnet/internal/model"
+)
+
+const attachTmdbContentBySearchName = "attach_tmdb_content_by_search"
+
+type attachTmdbContentBySearchAction struct{}
+
+func (attachTmdbContentBySearchAction) name() string {
+	return attachTmdbContentBySearchName
+}
+
+var attachTmdbContentBySearchPayloadSpec = payloadLiteral[string]{
+	literal:     attachTmdbContentBySearchName,
+	description: "Attempt to attach content from the TMDB API with a search on the torrent name",
+}
+
+func (a attachTmdbContentBySearchAction) compileAction(ctx compilerContext) (action, error) {
+	if _, err := attachTmdbContentBySearchPayloadSpec.Unmarshal(ctx); err != nil {
+		return action{}, ctx.error(err)
+	}
+	return action{
+		run: func(ctx executionContext) (classification.Result, error) {
+			cl := ctx.result
+			if !cl.BaseTitle.Valid {
+				return cl, classification.ErrUnmatched
+			}
+			var content *model.Content
+			switch cl.ContentType.ContentType {
+			case model.ContentTypeTvShow:
+				if result, searchErr := ctx.tmdb_searchTvShow(cl.BaseTitle.String, cl.Date.Year); searchErr != nil {
+					return cl, searchErr
+				} else {
+					content = &result
+				}
+			default:
+				if len(cl.Episodes) > 0 {
+					return cl, classification.ErrUnmatched
+				}
+				if result, searchErr := ctx.tmdb_searchMovie(cl.BaseTitle.String, cl.Date.Year); searchErr != nil {
+					return cl, searchErr
+				} else {
+					content = &result
+				}
+			}
+			cl.AttachContent(content)
+			return cl, nil
+		},
+	}, nil
+}
+
+func (attachTmdbContentBySearchAction) JsonSchema() JsonSchema {
+	return attachTmdbContentBySearchPayloadSpec.JsonSchema()
+}
@@ -0,0 +1,32 @@
+package classifier
+
+import "github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+
+const deleteName = "delete"
+
+type deleteAction struct{}
+
+func (deleteAction) name() string {
+	return deleteName
+}
+
+var deletePayloadSpec = payloadLiteral[string]{
+	literal:     deleteName,
+	description: "Delete the current torrent",
+}
+
+func (deleteAction) compileAction(ctx compilerContext) (action, error) {
+	if _, err := deletePayloadSpec.Unmarshal(ctx); err != nil {
+		return action{}, ctx.error(err)
+	}
+	path := ctx.path
+	return action{
+		run: func(ctx executionContext) (classification.Result, error) {
+			return ctx.result, classification.RuntimeError{Cause: classification.ErrDeleteTorrent, Path: path}
+		},
+	}, nil
+}
+
+func (deleteAction) JsonSchema() JsonSchema {
+	return deletePayloadSpec.JsonSchema()
+}
@@ -0,0 +1,63 @@
+package classifier
+
+import (
+	"errors"
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+)
+
+const findMatchName = "find_match"
+
+type findMatchAction struct{}
+
+func (findMatchAction) name() string {
+	return findMatchName
+}
+
+var findMatchActionPayloadSpec = payloadSingleKeyValue[[]any]{
+	key: findMatchName,
+	valueSpec: payloadMustSucceed[[]any]{payloadList[any]{itemSpec: payloadGeneric[any]{
+		jsonSchema: map[string]any{
+			"$ref": "#/definitions/action_single",
+		},
+	}}},
+	description: "Iterate through a series of actions to find the first that does not return an unmatched error",
+}
+
+func (findMatchAction) compileAction(ctx compilerContext) (action, error) {
+	payload, err := findMatchActionPayloadSpec.Unmarshal(ctx)
+	if err != nil {
+		return action{}, ctx.error(err)
+	}
+	actions := make([]action, len(payload))
+	for i, actionPayload := range payload {
+		a, err := ctx.compileAction(ctx.child(numericPathPart(i), actionPayload))
+		if err != nil {
+			return action{}, err
+		}
+		actions[i] = a
+	}
+	path := ctx.path
+	return action{
+		func(ctx executionContext) (classification.Result, error) {
+			for _, action := range actions {
+				result, err := action.run(ctx)
+				if err != nil {
+					if errors.Is(err, classification.ErrUnmatched) {
+						continue
+					}
+					return classification.Result{}, classification.RuntimeError{
+						Cause: err,
+						Path:  path,
+					}
+				} else {
+					return result, nil
+				}
+			}
+			return ctx.result, nil
+		},
+	}, nil
+}
+
+func (findMatchAction) JsonSchema() JsonSchema {
+	return findMatchActionPayloadSpec.JsonSchema()
+}
@@ -0,0 +1,86 @@
+package classifier
+
+import "github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+
+type ifElseAction struct{}
+
+const ifElseName = "if_else"
+
+func (ifElseAction) name() string {
+	return ifElseName
+}
+
+type ifElsePayload struct {
+	Condition  any
+	IfAction   any
+	ElseAction any
+}
+
+var ifElsePayloadSpec = payloadSingleKeyValue[ifElsePayload]{
+	key: ifElseName,
+	valueSpec: payloadMustSucceed[ifElsePayload]{payloadStruct[ifElsePayload]{
+		jsonSchema: map[string]any{
+			"type": "object",
+			"properties": map[string]any{
+				"condition": map[string]any{
+					"$ref": "#/definitions/condition",
+				},
+				"if_action": map[string]any{
+					"$ref": "#/definitions/action",
+				},
+				"else_action": map[string]any{
+					"$ref": "#/definitions/action",
+				},
+			},
+			"required":             []string{"condition"},
+			"additionalProperties": false,
+		},
+	}},
+	description: "Execute an action based on a condition",
+}
+
+func (ifElseAction) compileAction(ctx compilerContext) (action, error) {
+	p, decodeErr := ifElsePayloadSpec.Unmarshal(ctx)
+	if decodeErr != nil {
+		return action{}, ctx.error(decodeErr)
+	}
+	cond, cErr := ctx.compileCondition(ctx.child("condition", p.Condition))
+	if cErr != nil {
+		return action{}, ctx.error(cErr)
+	}
+	var ifAction, elseAction action
+	if p.IfAction != nil {
+		pIfAction, ifErr := ctx.compileAction(ctx.child("if_action", p.IfAction))
+		if ifErr != nil {
+			return action{}, ctx.error(ifErr)
+		}
+		ifAction = pIfAction
+	}
+	if p.ElseAction != nil {
+		pElseAction, err := ctx.compileAction(ctx.child("else_action", p.ElseAction))
+		if err != nil {
+			return action{}, ctx.error(err)
+		}
+		elseAction = pElseAction
+	}
+	return action{
+		run: func(ctx executionContext) (classification.Result, error) {
+			if result, err := cond.check(ctx); err != nil {
+				return classification.Result{}, err
+			} else if result {
+				if ifAction.run != nil {
+					return ifAction.run(ctx)
+				}
+			} else {
+				if elseAction.run != nil {
+					return elseAction.run(ctx)
+				}
+			}
+			return ctx.result, nil
+		},
+	}, nil
+}
+
+func (ifElseAction) JsonSchema() JsonSchema {
+	return ifElsePayloadSpec.JsonSchema()
+}
@@ -0,0 +1,40 @@
+package classifier
+
+import (
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/parsers"
+)
+
+const parseDateName = "parse_date"
+
+type parseDateAction struct{}
+
+func (parseDateAction) name() string {
+	return parseDateName
+}
+
+var parseDatePayloadSpec = payloadLiteral[string]{
+	literal:     parseDateName,
+	description: "Try to parse a date from the name of the current torrent",
+}
+
+func (parseDateAction) compileAction(ctx compilerContext) (action, error) {
+	if _, err := parseDatePayloadSpec.Unmarshal(ctx); err != nil {
+		return action{}, ctx.error(err)
+	}
+	return action{
+		run: func(ctx executionContext) (classification.Result, error) {
+			parsed := parsers.ParseDate(ctx.torrent.Name)
+			if parsed.IsNil() {
+				return ctx.result, classification.ErrUnmatched
+			}
+			cl := ctx.result
+			cl.Date = parsed
+			return cl, nil
+		},
+	}, nil
+}
+
+func (parseDateAction) JsonSchema() JsonSchema {
+	return parseDatePayloadSpec.JsonSchema()
+}
@@ -0,0 +1,40 @@
+package classifier
+
+import (
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/parsers"
+)
+
+const parseVideoContentName = "parse_video_content"
+
+type parseVideoContentAction struct{}
+
+func (parseVideoContentAction) name() string {
+	return parseVideoContentName
+}
+
+var parseVideoContentPayloadSpec = payloadLiteral[string]{
+	literal:     parseVideoContentName,
+	description: "Parse video-related attributes from the name of the current torrent",
+}
+
+func (parseVideoContentAction) compileAction(ctx compilerContext) (action, error) {
+	if _, err := parseVideoContentPayloadSpec.Unmarshal(ctx); err != nil {
+		return action{}, ctx.error(err)
+	}
+	return action{
+		run: func(ctx executionContext) (classification.Result, error) {
+			parsed, err := parsers.ParseVideoContent(ctx.torrent, ctx.result)
+			cl := ctx.result
+			if err != nil {
+				return cl, err
+			}
+			cl.Merge(parsed)
+			return cl, nil
+		},
+	}, nil
+}
+
+func (parseVideoContentAction) JsonSchema() JsonSchema {
+	return parseVideoContentPayloadSpec.JsonSchema()
+}
@@ -0,0 +1,58 @@
+package classifier
+
+import (
+	"fmt"
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+)
+
+const runWorkflowName = "run_workflow"
+
+type runWorkflowAction struct{}
+
+func (runWorkflowAction) name() string {
+	return runWorkflowName
+}
+
+var runWorkflowPayloadSpec = payloadSingleKeyValue[[]string]{
+	key: runWorkflowName,
+	valueSpec: payloadMustSucceed[[]string]{
+		payloadList[string]{
+			itemSpec: payloadGeneric[string]{
+				jsonSchema: map[string]interface{}{
+					"type":      "string",
+					"minLength": 1,
+				},
+			},
+		},
+	},
+	description: "Run a different workflow within the current workflow",
+}
+
+func (runWorkflowAction) compileAction(ctx compilerContext) (action, error) {
+	names, err := runWorkflowPayloadSpec.Unmarshal(ctx)
+	if err != nil {
+		return action{}, ctx.error(err)
+	}
+	for _, name := range names {
+		if _, ok := ctx.workflowNames[name]; !ok {
+			return action{}, ctx.fatal(fmt.Errorf("workflow %s not found", name))
+		}
+	}
+	return action{
+		func(ctx executionContext) (classification.Result, error) {
+			cl := ctx.result
+			for _, name := range names {
+				if nextCl, err := ctx.workflows[name].run(ctx.withResult(cl)); err != nil {
+					return cl, err
+				} else {
+					cl = nextCl
+				}
+			}
+			return cl, nil
+		},
+	}, nil
+}
+
+func (runWorkflowAction) JsonSchema() JsonSchema {
+	return runWorkflowPayloadSpec.JsonSchema()
+}
@@ -0,0 +1,40 @@
+package classifier
+
+import (
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+	"github.com/bitmagnet-io/bitmagnet/internal/model"
+)
+
+const setContentTypeName = "set_content_type"
+
+type setContentTypeAction struct{}
+
+func (setContentTypeAction) name() string {
+	return setContentTypeName
+}
+
+var setContentTypePayloadSpec = payloadSingleKeyValue[model.NullContentType]{
+	key: setContentTypeName,
+	valueSpec: payloadMustSucceed[model.NullContentType]{
+		payload: contentTypePayloadSpec,
+	},
+	description: "Set the content type of the current torrent",
+}
+
+func (setContentTypeAction) compileAction(ctx compilerContext) (action, error) {
+	contentType, err := setContentTypePayloadSpec.Unmarshal(ctx)
+	if err != nil {
+		return action{}, ctx.error(err)
+	}
+	return action{
+		func(ctx executionContext) (classification.Result, error) {
+			cl := ctx.result
+			cl.ContentType = contentType
+			return cl, nil
+		},
+	}, nil
+}
+
+func (setContentTypeAction) JsonSchema() JsonSchema {
+	return setContentTypePayloadSpec.JsonSchema()
+}
@@ -0,0 +1,32 @@
+package classifier
+
+import "github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+
+const unmatchedName = "unmatched"
+
+type unmatchedAction struct{}
+
+func (unmatchedAction) name() string {
+	return unmatchedName
+}
+
+var unmatchedPayloadSpec = payloadLiteral[string]{
+	literal:     unmatchedName,
+	description: "Return a unmatched error for the current torrent",
+}
+
+func (unmatchedAction) compileAction(ctx compilerContext) (action, error) {
+	if _, err := unmatchedPayloadSpec.Unmarshal(ctx); err != nil {
+		return action{}, ctx.error(err)
+	}
+	path := ctx.path
+	return action{
+		run: func(ctx executionContext) (classification.Result, error) {
+			return ctx.result, classification.RuntimeError{Cause: classification.ErrUnmatched, Path: path}
+		},
+	}, nil
+}
+
+func (unmatchedAction) JsonSchema() JsonSchema {
+	return unmatchedPayloadSpec.JsonSchema()
+}
@@ -0,0 +1,112 @@
+package classifier
+
+import (
+	"fmt"
+	"github.com/bitmagnet-io/bitmagnet/internal/keywords"
+	"github.com/bitmagnet-io/bitmagnet/internal/model"
+	"github.com/bitmagnet-io/bitmagnet/internal/protobuf"
+	"github.com/google/cel-go/cel"
+	"github.com/google/cel-go/common/types"
+	"github.com/google/cel-go/ext"
+)
+
+func celEnvOption(src Source, ctx *compilerContext) error {
+	options := []cel.EnvOption{
+		cel.StdLib(),
+		Lists(),
+		cel.EagerlyValidateDeclarations(true),
+		cel.ExtendedValidations(),
+		ext.Strings(ext.StringsValidateFormatCalls(true)),
+		cel.Types(&protobuf.Torrent{}, &protobuf.Classification{}),
+		cel.Variable("torrent", cel.ObjectType("bitmagnet.Torrent")),
+		cel.Variable("result", cel.ObjectType("bitmagnet.Classification")),
+	}
+	// `flags` is masquerading as a map of strings to regexes, but it's actually individual string constants defined with a dot in the name,
+	// along with a placeholder map of strings to nulls. This achieves correct compile-time checking with acceptable error messages.
+	for name, tp := range src.FlagDefinitions {
+		rawVal := src.Flags[name]
+		val, err := tp.celVal(rawVal)
+		if err != nil {
+			return err
+		}
+		options = append(
+			options,
+			cel.Constant("flags."+name, tp.celType(), val),
+		)
+	}
+	options = append(
+		options,
+		cel.Constant("flags", cel.MapType(cel.StringType, cel.NullType), types.NullValue),
+	)
+	// `keywords`, `extensions` etc use a similar trick.
+	for group, kws := range src.Keywords {
+		r, err := keywords.NewRegexFromKeywords(kws...)
+		if err != nil {
+			return err
+		}
+		options = append(
+			options,
+			cel.Constant("keywords."+group, cel.StringType, types.String(r.String())),
+		)
+	}
+	options = append(
+		options,
+		cel.Constant("keywords", cel.MapType(cel.StringType, cel.NullType), types.NullValue),
+	)
+	for group, extensions := range src.Extensions {
+		options = append(
+			options,
+			cel.Constant("extensions."+group, cel.ListType(cel.StringType), types.NewStringList(types.DefaultTypeAdapter, extensions)),
+		)
+	}
+	options = append(
+		options,
+		cel.Constant("extensions", cel.MapType(cel.StringType, cel.NullType), types.NullValue),
+	)
+	options = append(
+		options,
+		cel.Constant("fileType.unknown", cel.IntType, types.Int(protobuf.Torrent_File_unknown)),
+	)
+	for _, ft := range model.FileTypeValues() {
+		options = append(
+			options,
+			cel.Constant(fmt.Sprintf("fileType.%s", ft.String()), cel.IntType, types.Int(protobuf.NewFileType(model.NullFileType{Valid: true, FileType: ft}))),
+		)
+	}
+	options = append(
+		options,
+		cel.Constant("fileType", cel.MapType(cel.StringType, cel.NullType), types.NullValue),
+	)
+	options = append(
+		options,
+		cel.Constant("contentType.unknown", cel.IntType, types.Int(protobuf.Classification_unknown)),
+	)
+	for _, ct := range model.ContentTypeValues() {
+		options = append(
+			options,
+			cel.Constant(fmt.Sprintf("contentType.%s", ct.String()), cel.IntType, types.Int(protobuf.NewContentType(model.NullContentType{Valid: true, ContentType: ct}))),
+		)
+	}
+	options = append(
+		options,
+		cel.Constant("contentType", cel.MapType(cel.StringType, cel.NullType), types.NullValue),
+	)
+	options = append(
+		options,
+		cel.Constant("kb", cel.IntType, types.Int(1_000)),
+	)
+	options = append(
+		options,
+		cel.Constant("mb", cel.IntType, types.Int(1_000_000)),
+	)
+	options = append(
+		options,
+		cel.Constant("gb", cel.IntType, types.Int(1_000_000_000)),
+	)
+	env, err := cel.NewCustomEnv(options...)
+	if err != nil {
+		return err
+	}
+	ctx.celEnv = env
+	return nil
+}
@@ -0,0 +1,315 @@
+/*
+Copyright 2022 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package classifier
+
+import (
+	"fmt"
+	"github.com/google/cel-go/cel"
+	"github.com/google/cel-go/common/types"
+	"github.com/google/cel-go/common/types/ref"
+	"github.com/google/cel-go/common/types/traits"
+	"github.com/google/cel-go/interpreter/functions"
+)
+
+// Lists provides a CEL function library extension of list utility functions.
+//
+// isSorted
+//
+// Returns true if the provided list of comparable elements is sorted, else returns false.
+//
+//	<list<T>>.isSorted() <bool>, T must be a comparable type
+//
+// Examples:
+//
+//	[1, 2, 3].isSorted()  // return true
+//	['a', 'b', 'b', 'c'].isSorted()  // return true
+//	[2.0, 1.0].isSorted()  // return false
+//	[1].isSorted()  // return true
+//	[].isSorted()  // return true
+//
+// sum
+//
+// Returns the sum of the elements of the provided list. Supports CEL number (int, uint, double) and duration types.
+//
+//	<list<T>>.sum() <T>, T must be a numeric type or a duration
+//
+// Examples:
+//
+//	[1, 3].sum() // returns 4
+//	[1.0, 3.0].sum() // returns 4.0
+//	['1m', '1s'].sum() // returns '1m1s'
+//	emptyIntList.sum() // returns 0
+//	emptyDoubleList.sum() // returns 0.0
+//	[].sum() // returns 0
+//
+// min / max
+//
+// Returns the minimum/maximum valued element of the provided list. Supports all comparable types.
+// If the list is empty, an error is returned.
+//
+//	<list<T>>.min() <T>, T must be a comparable type
+//	<list<T>>.max() <T>, T must be a comparable type
+//
+// Examples:
+//
+//	[1, 3].min() // returns 1
+//	[1, 3].max() // returns 3
+//	[].min() // error
+//	[1].min() // returns 1
+//	([0] + emptyList).min() // returns 0
+//
+// indexOf / lastIndexOf
+//
+// Returns either the first or last positional index of the provided element in the list.
+// If the element is not found, -1 is returned. Supports all equatable types.
+//
+//	<list<T>>.indexOf(<T>) <int>, T must be an equatable type
+//	<list<T>>.lastIndexOf(<T>) <int>, T must be an equatable type
+//
+// Examples:
+//
+//	[1, 2, 2, 3].indexOf(2) // returns 1
+//	['a', 'b', 'b', 'c'].lastIndexOf('b') // returns 2
+//	[1.0].indexOf(1.1) // returns -1
+//	[].indexOf('string') // returns -1
+func Lists() cel.EnvOption {
+	return cel.Lib(listsLib)
+}
+
+var listsLib = &lists{}
+
+type lists struct{}
+
+func (*lists) LibraryName() string {
+	return "k8s.lists"
+}
+
+var paramA = cel.TypeParamType("A")
+
+// CEL typeParams can be used to constraint to a specific trait (e.g. traits.ComparableType) if the 1st operand is the type to constrain.
+// But the functions we need to constrain are <list<paramType>>, not just <paramType>.
+// Make sure the order of overload set is deterministic
+type namedCELType struct {
+	typeName string
+	celType  *cel.Type
+}
+
+var summableTypes = []namedCELType{
+	{typeName: "int", celType: cel.IntType},
+	{typeName: "uint", celType: cel.UintType},
+	{typeName: "double", celType: cel.DoubleType},
+	{typeName: "duration", celType: cel.DurationType},
+}
+
+var zeroValuesOfSummableTypes = map[string]ref.Val{
+	"int":      types.Int(0),
+	"uint":     types.Uint(0),
+	"double":   types.Double(0.0),
+	"duration": types.Duration{Duration: 0},
+}
+var comparableTypes = []namedCELType{
+	{typeName: "int", celType: cel.IntType},
+	{typeName: "uint", celType: cel.UintType},
+	{typeName: "double", celType: cel.DoubleType},
+	{typeName: "bool", celType: cel.BoolType},
+	{typeName: "duration", celType: cel.DurationType},
+	{typeName: "timestamp", celType: cel.TimestampType},
+	{typeName: "string", celType: cel.StringType},
+	{typeName: "bytes", celType: cel.BytesType},
+}
+
+// WARNING: All library additions or modifications must follow
+// https://github.com/kubernetes/enhancements/tree/master/keps/sig-api-machinery/2876-crd-validation-expression-language#function-library-updates
+var listsLibraryDecls = map[string][]cel.FunctionOpt{
+	"isSorted": templatedOverloads(comparableTypes, func(name string, paramType *cel.Type) cel.FunctionOpt {
+		return cel.MemberOverload(fmt.Sprintf("list_%s_is_sorted_bool", name),
+			[]*cel.Type{cel.ListType(paramType)}, cel.BoolType, cel.UnaryBinding(isSorted))
+	}),
+	"sum": templatedOverloads(summableTypes, func(name string, paramType *cel.Type) cel.FunctionOpt {
+		return cel.MemberOverload(fmt.Sprintf("list_%s_sum_%s", name, name),
+			[]*cel.Type{cel.ListType(paramType)}, paramType, cel.UnaryBinding(func(list ref.Val) ref.Val {
+				return sum(
+					func() ref.Val {
+						return zeroValuesOfSummableTypes[name]
+					})(list)
+			}))
+	}),
+	"max": templatedOverloads(comparableTypes, func(name string, paramType *cel.Type) cel.FunctionOpt {
+		return cel.MemberOverload(fmt.Sprintf("list_%s_max_%s", name, name),
+			[]*cel.Type{cel.ListType(paramType)}, paramType, cel.UnaryBinding(max()))
+	}),
+	"min": templatedOverloads(comparableTypes, func(name string, paramType *cel.Type) cel.FunctionOpt {
+		return cel.MemberOverload(fmt.Sprintf("list_%s_min_%s", name, name),
+			[]*cel.Type{cel.ListType(paramType)}, paramType, cel.UnaryBinding(min()))
+	}),
+	"indexOf": {
+		cel.MemberOverload("list_a_index_of_int", []*cel.Type{cel.ListType(paramA), paramA}, cel.IntType,
+			cel.BinaryBinding(indexOf)),
+	},
+	"lastIndexOf": {
+		cel.MemberOverload("list_a_last_index_of_int", []*cel.Type{cel.ListType(paramA), paramA}, cel.IntType,
+			cel.BinaryBinding(lastIndexOf)),
+	},
+}
+
+func (*lists) CompileOptions() []cel.EnvOption {
+	options := []cel.EnvOption{}
+	for name, overloads := range listsLibraryDecls {
+		options = append(options, cel.Function(name, overloads...))
+	}
+	return options
+}
+
+func (*lists) ProgramOptions() []cel.ProgramOption {
+	return []cel.ProgramOption{}
+}
+
+func isSorted(val ref.Val) ref.Val {
+	var prev traits.Comparer
+	iterable, ok := val.(traits.Iterable)
+	if !ok {
+		return types.MaybeNoSuchOverloadErr(val)
+	}
+	for it := iterable.Iterator(); it.HasNext() == types.True; {
+		next := it.Next()
+		nextCmp, ok := next.(traits.Comparer)
+		if !ok {
+			return types.MaybeNoSuchOverloadErr(next)
+		}
+		if prev != nil {
+			cmp := prev.Compare(next)
+			if cmp == types.IntOne {
+				return types.False
+			}
+		}
+		prev = nextCmp
+	}
+	return types.True
+}
+
+func sum(init func() ref.Val) functions.UnaryOp {
+	return func(val ref.Val) ref.Val {
+		i := init()
+		acc, ok := i.(traits.Adder)
+		if !ok {
+			// Should never happen since all passed in init values are valid
+			return types.MaybeNoSuchOverloadErr(i)
+		}
+		iterable, ok := val.(traits.Iterable)
+		if !ok {
+			return types.MaybeNoSuchOverloadErr(val)
+		}
+		for it := iterable.Iterator(); it.HasNext() == types.True; {
+			next := it.Next()
+			nextAdder, ok := next.(traits.Adder)
+			if !ok {
+				// Should never happen for type checked CEL programs
+				return types.MaybeNoSuchOverloadErr(next)
+			}
+			if acc != nil {
+				s := acc.Add(next)
+				sum, ok := s.(traits.Adder)
+				if !ok {
+					// Should never happen for type checked CEL programs
+					return types.MaybeNoSuchOverloadErr(s)
+				}
+				acc = sum
+			} else {
+				acc = nextAdder
+			}
+		}
+		return acc.(ref.Val)
+	}
+}
+
+func min() functions.UnaryOp {
+	return cmp("min", types.IntOne)
+}
+
+func max() functions.UnaryOp {
+	return cmp("max", types.IntNegOne)
+}
+
+func cmp(opName string, opPreferCmpResult ref.Val) functions.UnaryOp {
+	return func(val ref.Val) ref.Val {
+		var result traits.Comparer
+		iterable, ok := val.(traits.Iterable)
+		if !ok {
+			return types.MaybeNoSuchOverloadErr(val)
+		}
+		for it := iterable.Iterator(); it.HasNext() == types.True; {
+			next := it.Next()
+			nextCmp, ok := next.(traits.Comparer)
+			if !ok {
+				// Should never happen for type checked CEL programs
+				return types.MaybeNoSuchOverloadErr(next)
+			}
+			if result == nil {
+				result = nextCmp
+			} else {
+				cmp := result.Compare(next)
+				if cmp == opPreferCmpResult {
+					result = nextCmp
+				}
+			}
+		}
+		if result == nil {
+			return types.NewErr("%s called on empty list", opName)
+		}
+		return result.(ref.Val)
+	}
+}
+
+func indexOf(list ref.Val, item ref.Val) ref.Val {
+	lister, ok := list.(traits.Lister)
+	if !ok {
+		return types.MaybeNoSuchOverloadErr(list)
+	}
+	sz := lister.Size().(types.Int)
+	for i := types.Int(0); i < sz; i++ {
+		if lister.Get(types.Int(i)).Equal(item) == types.True {
+			return types.Int(i)
+		}
+	}
+	return types.Int(-1)
+}
+
+func lastIndexOf(list ref.Val, item ref.Val) ref.Val {
+	lister, ok := list.(traits.Lister)
+	if !ok {
+		return types.MaybeNoSuchOverloadErr(list)
+	}
+	sz := lister.Size().(types.Int)
+	for i := sz - 1; i >= 0; i-- {
+		if lister.Get(types.Int(i)).Equal(item) == types.True {
+			return types.Int(i)
+		}
+	}
+	return types.Int(-1)
+}
+
+// templatedOverloads returns overloads for each of the provided types. The template function is called with each type
+// name (map key) and type to construct the overloads.
+func templatedOverloads(types []namedCELType, template func(name string, t *cel.Type) cel.FunctionOpt) []cel.FunctionOpt {
+	overloads := make([]cel.FunctionOpt, len(types))
+	i := 0
+	for _, t := range types {
+		overloads[i] = template(t.typeName, t.celType)
+		i++
+	}
+	return overloads
+}
@@ -1,61 +0,0 @@
-package classifier
-
-import "github.com/bitmagnet-io/bitmagnet/internal/model"
-
-type ContentAttributes struct {
-	Languages       model.Languages
-	LanguageMulti   bool
-	Episodes        model.Episodes
-	VideoResolution model.NullVideoResolution
-	VideoSource     model.NullVideoSource
-	VideoCodec      model.NullVideoCodec
-	Video3d         model.NullVideo3d
-	VideoModifier   model.NullVideoModifier
-	ReleaseGroup    model.NullString
-}
-
-type Classification struct {
-	ContentType model.NullContentType
-	Content     *model.Content
-	ContentAttributes
-}
-
-func (a *ContentAttributes) ApplyHint(h model.TorrentHint) {
-	if len(h.Episodes) > 0 {
-		a.Episodes = h.Episodes
-	}
-	if len(h.Languages) > 0 {
-		a.Languages = h.Languages
-	}
-	if h.VideoResolution.Valid {
-		a.VideoResolution = h.VideoResolution
-	}
-	if h.VideoSource.Valid {
-		a.VideoSource = h.VideoSource
-	}
-	if h.VideoCodec.Valid {
-		a.VideoCodec = h.VideoCodec
-	}
-	if h.Video3d.Valid {
-		a.Video3d = h.Video3d
-	}
-	if h.VideoModifier.Valid {
-		a.VideoModifier = h.VideoModifier
-	}
-	if h.ReleaseGroup.Valid {
-		a.ReleaseGroup = h.ReleaseGroup
-	}
-}
-
-func (a *ContentAttributes) InferVideoAttributes(input string) {
-	a.VideoResolution = model.InferVideoResolution(input)
-	a.VideoSource = model.InferVideoSource(input)
-	a.VideoCodec, a.ReleaseGroup = model.InferVideoCodecAndReleaseGroup(input)
-	a.Video3d = model.InferVideo3d(input)
-	a.VideoModifier = model.InferVideoModifier(input)
-}
-
-func (c *Classification) ApplyHint(h model.TorrentHint) {
-	c.ContentType = h.NullContentType()
-	c.ContentAttributes.ApplyHint(h)
-}
@@ -0,0 +1,48 @@
+package classification
+
+import (
+	"fmt"
+	"strings"
+)
+
+type Error interface {
+	error
+	Key() string
+}
+
+type WorkflowError struct {
+	key     string
+	message string
+}
+
+func (e WorkflowError) Error() string {
+	if e.message != "" {
+		return e.message
+	}
+	return fmt.Sprintf("workflow unmarshalError: %s", e.key)
+}
+
+func (e WorkflowError) Key() string {
+	return e.key
+}
+
+var ErrUnmatched = WorkflowError{
+	key: "unmatched",
+}
+
+var ErrDeleteTorrent = WorkflowError{
+	key: "delete_torrent",
+}
+
+type RuntimeError struct {
+	Path  []string
+	Cause error
+}
+
+func (e RuntimeError) Error() string {
+	return fmt.Sprintf("runtime error at Path %s: %s", strings.Join(e.Path, "."), e.Cause)
+}
+
+func (e RuntimeError) Unwrap() error {
+	return e.Cause
+}
@@ -0,0 +1,114 @@
+package classification
+
+import "github.com/bitmagnet-io/bitmagnet/internal/model"
+
+type Result struct {
+	ContentAttributes
+	Content *model.Content
+	Tags    map[string]struct{}
+}
+
+func (r *Result) ApplyHint(h model.TorrentHint) {
+	r.ContentType = h.NullContentType()
+	r.ContentAttributes.ApplyHint(h)
+}
+
+func (r *Result) AttachContent(content *model.Content) {
+	r.Content = content
+	r.ContentAttributes.ContentType = model.NewNullContentType(content.Type)
+	if content.OriginalLanguage.Valid {
+		if len(r.Languages) == 0 || r.LanguageMulti {
+			if r.Languages == nil {
+				r.Languages = make(model.Languages)
+			}
+			r.Languages[content.OriginalLanguage.Language] = struct{}{}
+		}
+	}
+}
+
+type ContentAttributes struct {
+	ContentType     model.NullContentType
+	BaseTitle       model.NullString
+	Date            model.Date
+	Languages       model.Languages
+	LanguageMulti   bool
+	Episodes        model.Episodes
+	VideoResolution model.NullVideoResolution
+	VideoSource     model.NullVideoSource
+	VideoCodec      model.NullVideoCodec
+	Video3d         model.NullVideo3d
+	VideoModifier   model.NullVideoModifier
+	ReleaseGroup    model.NullString
+}
+
+func (a *ContentAttributes) Merge(other ContentAttributes) {
+	if !a.ContentType.Valid {
+		a.ContentType = other.ContentType
+	}
+	if !a.BaseTitle.Valid {
+		a.BaseTitle = other.BaseTitle
+	}
+	if a.Date.IsNil() {
+		a.Date = other.Date
+	}
+	if len(a.Languages) == 0 {
+		a.Languages = other.Languages
+	}
+	a.LanguageMulti = a.LanguageMulti || other.LanguageMulti
+	if len(a.Episodes) == 0 {
+		a.Episodes = other.Episodes
+	}
+	if !a.VideoResolution.Valid {
+		a.VideoResolution = other.VideoResolution
+	}
+	if !a.VideoSource.Valid {
+		a.VideoSource = other.VideoSource
+	}
+	if !a.VideoCodec.Valid {
+		a.VideoCodec = other.VideoCodec
+	}
+	if !a.Video3d.Valid {
+		a.Video3d = other.Video3d
+	}
+	if !a.VideoModifier.Valid {
+		a.VideoModifier = other.VideoModifier
+	}
+	if !a.ReleaseGroup.Valid {
+		a.ReleaseGroup = other.ReleaseGroup
+	}
+}
+
+func (a *ContentAttributes) ApplyHint(h model.TorrentHint) {
+	if len(h.Episodes) > 0 {
+		a.Episodes = h.Episodes
+	}
+	if len(h.Languages) > 0 {
+		a.Languages = h.Languages
+	}
+	if h.VideoResolution.Valid {
+		a.VideoResolution = h.VideoResolution
+	}
+	if h.VideoSource.Valid {
+		a.VideoSource = h.VideoSource
+	}
+	if h.VideoCodec.Valid {
+		a.VideoCodec = h.VideoCodec
+	}
+	if h.Video3d.Valid {
+		a.Video3d = h.Video3d
+	}
+	if h.VideoModifier.Valid {
+		a.VideoModifier = h.VideoModifier
+	}
+	if h.ReleaseGroup.Valid {
+		a.ReleaseGroup = h.ReleaseGroup
+	}
+}
+
+func (a *ContentAttributes) InferVideoAttributes(input string) {
+	a.VideoResolution = model.InferVideoResolution(input)
+	a.VideoSource = model.InferVideoSource(input)
+	a.VideoCodec, a.ReleaseGroup = model.InferVideoCodecAndReleaseGroup(input)
+	a.Video3d = model.InferVideo3d(input)
+	a.VideoModifier = model.InferVideoModifier(input)
+}
@@ -0,0 +1,228 @@
+$schema: "https://bitmagnet.io/schemas/classifier-0.1.json"
+workflows:
+  default:
+    # delete torrents containing banned keywords:
+    - if_else:
+        condition: "([torrent.baseName] + torrent.files.map(f, f.basePath)).join(' ').matches(keywords.banned)"
+        if_action: delete
+    # try to identify content type for torrents without a hinted content type:
+    - if_else:
+        condition: "result.contentType == contentType.unknown"
+        if_action:
+          find_match:
+            # match audiobooks:
+            - if_else:
+                condition:
+                  and:
+                    - "torrent.files.map(f, f.fileType == fileType.audio ? f.size : - f.size).sum() > 50*mb"
+                    - or:
+                        - "torrent.baseName.matches(keywords.audiobook)"
+                        - "torrent.files.filter(f, f.extension in extensions.audiobook).size() > 0"
+                if_action:
+                  set_content_type: audiobook
+                else_action: unmatched
+            # match comics:
+            - if_else:
+                condition: "torrent.files.map(f, f.extension in extensions.comic ? f.size : - f.size).sum() > 0"
+                if_action:
+                  set_content_type: comic
+                else_action: unmatched
+            # match ebooks:
+            - if_else:
+                condition: "torrent.files.map(f, f.extension in extensions.ebook ? f.size : - f.size).sum() > 0"
+                if_action:
+                  set_content_type: ebook
+                else_action: unmatched
+            # match music:
+            - if_else:
+                condition:
+                  or:
+                    - "torrent.files.map(f, f.extension in extensions.music ? f.size : - f.size).sum() > 0"
+                    - and:
+                        - "torrent.baseName.matches(keywords.music)"
+                        - "torrent.files.map(f, f.fileType == fileType.audio ? f.size : - f.size).sum() > 0"
+                if_action:
+                  set_content_type: music
+                else_action: unmatched
+            # match software:
+            - if_else:
+                condition: "torrent.files.map(f, f.fileType == fileType.software ? f.size : - f.size).sum() > 0"
+                if_action:
+                  set_content_type: software
+                else_action: unmatched
+            # match xxx:
+            - if_else:
+                condition: "([torrent.baseName] + torrent.files.map(f, f.basePath)).join(' ').matches(keywords.xxx)"
+                if_action:
+                  set_content_type: xxx
+                else_action: unmatched
+    # if the name contains a full date, attach it to the result as it's a good indicator of content type:
+    - find_match:
+        - parse_date
+    # if we know a content ID that isn't already attached, try to attach it, either from the local `content` table or an API integration:
+    - if_else:
+        condition:
+          and:
+            - "torrent.hasHintedContentId && !result.hasAttachedContent"
+            # we might as well save work by restricting this to content types that have an API integration:
+            - "result.contentType in [contentType.movie, contentType.tv_show, contentType.xxx]"
+        if_action:
+          find_match:
+            - attach_local_content_by_id
+            - if_else:
+                condition: "flags.tmdb_enabled"
+                if_action: attach_tmdb_content_by_id
+                else_action: unmatched
+    # parse video-related attributes for video torrents (including the base title, needed for the next step):
+    - if_else:
+        condition:
+          or:
+            - "result.contentType in [contentType.movie, contentType.tv_show]"
+            - "torrent.files.map(f, f.fileType == fileType.video ? f.size : - f.size).sum() > 100*mb"
+        if_action:
+          find_match:
+            # parse video-related attributes from the torrent name;
+            # if the content type wasn't previously specified and the name format doesn't suggest a movie or TV show, a "no match" will be returned:
+            - parse_video_content
+    # if content isn't already attached, and a base title has been parsed, then search for the content, either from the local `content` table or an API integration:
+    - if_else:
+        condition: "!result.hasAttachedContent && result.hasBaseTitle"
+        if_action:
+          find_match:
+            - attach_local_content_by_search
+            - if_else:
+                condition: "flags.tmdb_enabled"
+                if_action: attach_tmdb_content_by_search
+                else_action: unmatched
+    # delete specific content types based on the configured flags:
+    - if_else:
+        condition:
+          or:
+            - "result.contentType in flags.delete_content_types"
+            - "flags.delete_xxx && result.contentType == contentType.xxx"
+        if_action: delete
+extensions:
+  audiobook:
+    - m4b
+  comic:
+    - cb7
+    - cba
+    - cbr
+    - cbt
+    - cbz
+  ebook:
+    - azw
+    - azw3
+    - azw4
+    - azw8
+    - chm
+    - doc
+    - docx
+    - djvu
+    - epub
+    - lit
+    - mobi
+    - odt
+    - pdf
+    - rtf
+  music:
+    - ape
+    - dsf
+    - flac
+  software:
+    - apk
+    - app
+    - bat
+    - bin
+    - deb
+    - dll
+    - dmg
+    - exe
+    - iso
+    - jar
+    - lua
+    - msi
+    - package
+    - pkg
+    - rpm
+    - sh
+keywords:
+  audiobook:
+    - audiobooks?
+    - books?
+    - (un)?abridged
+    - narrated
+    - novels?
+    - "*biograph*"
+  music:
+    - discography
+    - music
+    - album
+    - \V.?\A.?
+    - various artists
+    - compilation
+    - ep
+    - lp
+    - single
+    - vinyl
+    - classical
+    - disco
+    - folk
+    - hits
+    - house
+    - indie
+    - jazz
+    - metal
+    - pop
+    - jazz
+    - reggae
+    - rock
+    - trance
+  xxx:
+    - anal
+    - ass
+    - blowjobs?
+    - boob*
+    - cocks?
+    - cum*
+    - dicks?
+    - erotic*
+    - "*fuck*"
+    - "*gloryhole*"
+    - hardcore
+    - kink*
+    - milf*
+    - nubile*
+    - onlyfans
+    - orgasm*
+    - orgy
+    - "*porn*"
+    - pov
+    - pussy
+    - seduc*
+    - sex*
+    - slut*
+    - tits?
+    - threesome
+    - "*wank*"
+    - "*xxx*"
+  banned:
+    - pa?edo(fil*|phil*)?
+    - preteen
+    - pthc
+    - ptsc
+    - lsbar
+    - lsm
+    - underage
+    - hebefilia
+    - opva
+    - child porn*
+    - (#|10|11|12|13|14|15|16|17) ?y ?o
+flag_definitions:
+  tmdb_enabled: bool
+  delete_content_types: content_type_list
+  delete_xxx: bool
+flags:
+  tmdb_enabled: true
+  delete_content_types: []
+  delete_xxx: false
@@ -3,39 +3,160 @@ package classifier
 import (
 	"context"
 	"errors"
+	"fmt"
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
 	"github.com/bitmagnet-io/bitmagnet/internal/model"
-	"go.uber.org/zap"
+	"github.com/bitmagnet-io/bitmagnet/internal/protobuf"
+	"github.com/google/cel-go/cel"
+	"strings"
 )

-var (
-	ErrNoMatch = errors.New("no match")
-)
-
-type Classifier interface {
-	Classify(ctx context.Context, torrent model.Torrent) (Classification, error)
+type Compiler interface {
+	Compile(source Source) (Runner, error)
 }

-type SubClassifier interface {
-	Classifier
-	Key() string
-	Priority() int
+type Runner interface {
+	Run(ctx context.Context, workflow string, t model.Torrent) (classification.Result, error)
 }

-type classifier struct {
-	subClassifiers []SubClassifier
-	logger         *zap.SugaredLogger
+type compiler struct {
+	options      []compilerOption
+	dependencies dependencies
 }

-func (c classifier) Classify(ctx context.Context, t model.Torrent) (Classification, error) {
-	for _, sc := range c.subClassifiers {
-		tc, err := sc.Classify(ctx, t)
-		if err == nil {
-			return tc, nil
-		}
-		if !errors.Is(err, ErrNoMatch) {
-			c.logger.Errorw("error classifying content", "classifier", sc.Key(), "torrent", t, "error", err)
-			return Classification{}, err
+type compilerContext struct {
+	features
+	celEnv        *cel.Env
+	source        any
+	path          []string
+	workflowNames map[string]struct{}
+}
+
+type compilerOption func(Source, *compilerContext) error
+
+type executionContext struct {
+	context.Context
+	dependencies
+	workflows map[string]action
+	torrent   model.Torrent
+	torrentPb *protobuf.Torrent
+	result    classification.Result
+	resultPb  *protobuf.Classification
+}
+
+func (c executionContext) withResult(result classification.Result) executionContext {
+	c.result = result
+	c.resultPb = protobuf.NewClassification(result)
+	return c
+}
+
+func (c compilerContext) child(pathPart string, source any) compilerContext {
+	c.source = source
+	newPath := make([]string, len(c.path), len(c.path)+1)
+	copy(newPath, c.path)
+	newPath = append(newPath, pathPart)
+	c.path = newPath
+	return c
+}
+
+func (c compilerContext) error(cause error) error {
+	if asCompilerError(cause) != nil {
+		return cause
+	}
+	return compilerError{c.path, cause}
+}
+
+func (c compilerContext) fatal(cause error) error {
+	if asFatalCompilerError(cause) != nil {
+		return cause
+	}
+	cErr := asCompilerError(cause)
+	if cErr != nil {
+		return fatalCompilerError{compilerError: *cErr}
+	}
+	return fatalCompilerError{compilerError{c.path, cause}}
+}
+
+func (c compiler) Compile(source Source) (Runner, error) {
+	ctx := &compilerContext{
+		source:        source,
+		workflowNames: source.workflowNames(),
+	}
+	source, sourceErr := decode[Source](*ctx)
+	if sourceErr != nil {
+		return nil, ctx.fatal(sourceErr)
+	}
+	for _, opt := range c.options {
+		if err := opt(source, ctx); err != nil {
+			return nil, ctx.fatal(err)
 		}
 	}
-	return Classification{}, ErrNoMatch
+	workflowsCtx := ctx.child("workflows", source.Workflows)
+	workflows := make(map[string]action)
+	for name, src := range source.Workflows {
+		a, err := ctx.compileAction(workflowsCtx.child(name, src))
+		if err != nil {
+			return nil, ctx.fatal(err)
+		}
+		workflows[name] = a
+	}
+	return runner{
+		dependencies: c.dependencies,
+		workflows:    workflows,
+	}, nil
+}
+
+func decodeTo[T any](ctx compilerContext, target *T) error {
+	decoder, decoderErr := newDecoder(target)
+	if decoderErr != nil {
+		return ctx.error(decoderErr)
+	}
+	return decoder.Decode(ctx.source)
+}
+
+func decode[T any](ctx compilerContext) (T, error) {
+	var target T
+	err := decodeTo(ctx, &target)
+	return target, err
+}
+
+type compilerError struct {
+	path  []string
+	cause error
+}
+
+func (e compilerError) Error() string {
+	return fmt.Sprintf("compiler error at path '%s': %s", strings.Join(e.path, "."), e.cause)
+}
+
+func (e compilerError) Unwrap() error {
+	return e.cause
+}
+
+func asCompilerError(err error) *compilerError {
+	ue := &compilerError{}
+	if ok := errors.As(err, ue); ok {
+		return ue
+	}
+	return nil
+}
+
+type fatalCompilerError struct {
+	compilerError
+}
+
+func (e fatalCompilerError) Unwrap() error {
+	return e.compilerError
+}
+
+func asFatalCompilerError(err error) *fatalCompilerError {
+	ue := &fatalCompilerError{}
+	if ok := errors.As(err, ue); ok {
+		return ue
+	}
+	return nil
+}
+
+func numericPathPart(num int) string {
+	return fmt.Sprintf("[%d]", num)
 }
@@ -0,0 +1,244 @@
+package classifier
+
+import (
+	"context"
+	"fmt"
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+	classifier_mocks "github.com/bitmagnet-io/bitmagnet/internal/classifier/mocks"
+	"github.com/bitmagnet-io/bitmagnet/internal/model"
+	"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
+	tmdb_mocks "github.com/bitmagnet-io/bitmagnet/internal/tmdb/mocks"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/mock"
+	"testing"
+)
+
+func TestClassifier(t *testing.T) {
+	matchContext := mock.MatchedBy(func(ctx any) bool {
+		_, ok := ctx.(context.Context)
+		return ok
+	})
+	testCases := []struct {
+		torrent      model.Torrent
+		prepareMocks func(mocks testClassifierMocks)
+		expected     classification.Result
+		expectedErr  error
+	}{
+		{
+			torrent: model.Torrent{
+				Name:        "The Regular Movie (2000).mkv",
+				FilesStatus: model.FilesStatusSingle,
+				Extension:   model.NewNullString("mkv"),
+				Size:        1000000000,
+			},
+			prepareMocks: func(mocks testClassifierMocks) {
+				mocks.search.On(
+					"ContentBySearch",
+					matchContext,
+					model.ContentTypeMovie,
+					"The Regular Movie",
+					model.Year(2000),
+				).
+					Return(model.Content{}, classification.ErrUnmatched)
+				mocks.tmdbClient.On(
+					"SearchMovie",
+					matchContext,
+					tmdb.SearchMovieRequest{
+						Query:        "The Regular Movie",
+						Year:         2000,
+						IncludeAdult: true,
+					},
+				).
+					Return(tmdb.SearchMovieResponse{}, nil)
+			},
+			expected: classification.Result{
+				ContentAttributes: classification.ContentAttributes{
+					ContentType: model.NewNullContentType(model.ContentTypeMovie),
+					BaseTitle:   model.NewNullString("The Regular Movie"),
+					Date: model.Date{
+						Year: 2000,
+					},
+				},
+			},
+		},
+		{
+			torrent: model.Torrent{
+				Name:        "The Regular Local Movie (2000).mkv",
+				FilesStatus: model.FilesStatusSingle,
+				Extension:   model.NewNullString("mkv"),
+				Size:        1000000000,
+			},
+			prepareMocks: func(mocks testClassifierMocks) {
+				mocks.search.On(
+					"ContentBySearch",
+					matchContext,
+					model.ContentTypeMovie,
+					"The Regular Local Movie",
+					model.Year(2000),
+				).
+					Return(model.Content{
+						Type:        model.ContentTypeMovie,
+						Source:      "local",
+						ID:          "123",
+						Title:       "The Regular Local Movie",
+						ReleaseYear: 2000,
+					}, nil)
+			},
+			expected: classification.Result{
+				ContentAttributes: classification.ContentAttributes{
+					ContentType: model.NewNullContentType(model.ContentTypeMovie),
+					BaseTitle:   model.NewNullString("The Regular Local Movie"),
+					Date: model.Date{
+						Year: 2000,
+					},
+				},
+				Content: &model.Content{
+					Type:        model.ContentTypeMovie,
+					Source:      "local",
+					ID:          "123",
+					Title:       "The Regular Local Movie",
+					ReleaseYear: 2000,
+				},
+			},
+		},
+		{
+			torrent: model.Torrent{
+				Name:        "The Regular TMDB Movie (2000).mkv",
+				FilesStatus: model.FilesStatusSingle,
+				Extension:   model.NewNullString("mkv"),
+				Size:        1000000000,
+			},
+			prepareMocks: func(mocks testClassifierMocks) {
+				mocks.search.On(
+					"ContentBySearch",
+					matchContext,
+					model.ContentTypeMovie,
+					"The Regular TMDB Movie",
+					model.Year(2000),
+				).
+					Return(model.Content{}, classification.ErrUnmatched)
+				mocks.tmdbClient.On(
+					"SearchMovie",
+					matchContext,
+					tmdb.SearchMovieRequest{
+						Query:        "The Regular TMDB Movie",
+						Year:         2000,
+						IncludeAdult: true,
+					},
+				).
+					Return(tmdb.SearchMovieResponse{
+						Results: []tmdb.SearchMovieResult{
+							{
+								ID:          123,
+								Title:       "The Regular TMDB Movie",
+								ReleaseDate: "2000-01-01",
+							},
+						},
+					}, nil)
+				mocks.tmdbClient.On(
+					"MovieDetails",
+					matchContext,
+					tmdb.MovieDetailsRequest{
+						ID: 123,
+					},
+				).
+					Return(tmdb.MovieDetailsResponse{
+						ID:            123,
+						Title:         "The Regular TMDB Movie",
+						OriginalTitle: "The Regular TMDB Movie Original",
+						ReleaseDate:   "2000-01-01",
+					}, nil)
+			},
+			expected: classification.Result{
+				ContentAttributes: classification.ContentAttributes{
+					ContentType: model.NewNullContentType(model.ContentTypeMovie),
+					BaseTitle:   model.NewNullString("The Regular TMDB Movie"),
+					Date: model.Date{
+						Year: 2000,
+					},
+				},
+				Content: &model.Content{
+					Type:   model.ContentTypeMovie,
+					Source: "tmdb",
+					ID:     "123",
+					Title:  "The Regular TMDB Movie",
+					ReleaseDate: model.Date{
+						Year:  2000,
+						Month: 1,
+						Day:   1,
+					},
+					ReleaseYear:   2000,
+					Adult:         model.NewNullBool(false),
+					OriginalTitle: model.NewNullString("The Regular TMDB Movie Original"),
+					Popularity:    model.NewNullFloat32(0),
+					VoteAverage:   model.NewNullFloat32(0),
+					VoteCount:     model.NewNullUint(0),
+				},
+			},
+		},
+		{
+			torrent: model.Torrent{
+				Name:        "The XXX Movie 1080p.mkv",
+				FilesStatus: model.FilesStatusSingle,
+				Extension:   model.NewNullString("mkv"),
+				Size:        1000000000,
+			},
+			expected: classification.Result{
+				ContentAttributes: classification.ContentAttributes{
+					ContentType:     model.NewNullContentType(model.ContentTypeXxx),
+					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
+				},
+			},
+		},
+	}
+	for _, tc := range testCases {
+		t.Run(fmt.Sprintf("torrent: %s", tc.torrent.Name), func(t *testing.T) {
+			mocks := newTestClassifierMocks(t)
+			source, sourceErr := yamlSourceProvider{rawSourceProvider: coreSourceProvider{}}.source()
+			if sourceErr != nil {
+				t.Fatal(sourceErr)
+				return
+			}
+			workflow, compileErr := mocks.compiler.Compile(source)
+			if compileErr != nil {
+				t.Fatal(compileErr)
+				return
+			}
+			if tc.prepareMocks != nil {
+				tc.prepareMocks(mocks)
+			}
+			result, runErr := workflow.Run(context.Background(), "default", tc.torrent)
+			if runErr != nil {
+				assert.Equal(t, tc.expectedErr, runErr)
+				t.Log(runErr)
+			} else {
+				assert.Equal(t, tc.expected, result)
+			}
+		})
+	}
+}
+
+type testClassifierMocks struct {
+	compiler   Compiler
+	search     *classifier_mocks.LocalSearch
+	tmdbClient *tmdb_mocks.Client
+}
+
+func newTestClassifierMocks(t *testing.T) testClassifierMocks {
+	search := classifier_mocks.NewLocalSearch(t)
+	tmdbClient := tmdb_mocks.NewClient(t)
+	return testClassifierMocks{
+		compiler: compiler{
+			options: []compilerOption{
+				compilerFeatures(defaultFeatures),
+				celEnvOption,
+			},
+			dependencies: dependencies{
+				search:     search,
+				tmdbClient: tmdbClient,
+			},
+		},
+		search:     search,
+		tmdbClient: tmdbClient,
+	}
+}
@@ -1,21 +1,17 @@
 package classifierfx

 import (
+	"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/config/configfx"
 	"github.com/bitmagnet-io/bitmagnet/internal/classifier"
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier/extension"
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier/keywords"
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier/video/videofx"
 	"go.uber.org/fx"
 )

 func New() fx.Option {
 	return fx.Module(
-		"classifier",
+		"workflow",
+		configfx.NewConfigModule[classifier.Config]("classifier", classifier.NewDefaultConfig()),
 		fx.Provide(
 			classifier.New,
-			extension.New,
-			keywords.New,
 		),
-		videofx.New(),
 	)
 }
@@ -0,0 +1,41 @@
+package classifier
+
+import (
+	"errors"
+)
+
+func conditions(defs ...conditionDefinition) feature {
+	return func(c *features) {
+		c.conditions = append(c.conditions, defs...)
+	}
+}
+
+type conditionCompiler interface {
+	compileCondition(ctx compilerContext) (condition, error)
+}
+
+type conditionDefinition interface {
+	HasJsonSchema
+	name() string
+	conditionCompiler
+}
+
+func (c compilerContext) compileCondition(ctx compilerContext) (condition, error) {
+	var errs []error
+	for _, def := range c.conditions {
+		c, err := def.compileCondition(ctx.child(def.name(), ctx.source))
+		if err == nil {
+			return c, nil
+		}
+		if asFatalCompilerError(err) != nil {
+			return condition{}, err
+		}
+		errs = append(errs, err)
+	}
+	errs = append(errs, errors.New("no condition matched"))
+	return condition{}, errors.Join(errs...)
+}
+
+type condition struct {
+	check func(executionContext) (bool, error)
+}
@@ -0,0 +1,52 @@
+package classifier
+
+const andName = "and"
+
+type andCondition struct{}
+
+func (andCondition) name() string {
+	return andName
+}
+
+var andConditionPayloadSpec = payloadSingleKeyValue[[]any]{
+	key: andName,
+	valueSpec: payloadMustSucceed[[]any]{payloadList[any]{
+		itemSpec: payloadGeneric[any]{
+			jsonSchema: map[string]any{
+				"$ref": "#/definitions/condition",
+			},
+		},
+		description: "A condition that is satisfied if all conditions in a list are satisfied",
+	}},
+}
+
+func (andCondition) compileCondition(ctx compilerContext) (condition, error) {
+	payload, err := andConditionPayloadSpec.Unmarshal(ctx)
+	if err != nil {
+		return condition{}, ctx.error(err)
+	}
+	conds := make([]condition, len(payload))
+	for i, rawCond := range payload {
+		cond, err := ctx.compileCondition(ctx.child(numericPathPart(i), rawCond))
+		if err != nil {
+			return condition{}, ctx.fatal(err)
+		}
+		conds[i] = cond
+	}
+	return condition{
+		check: func(ctx executionContext) (bool, error) {
+			for _, c := range conds {
+				if result, err := c.check(ctx); err != nil {
+					return false, err
+				} else if !result {
+					return false, nil
+				}
+			}
+			return true, nil
+		},
+	}, nil
+}
+
+func (andCondition) JsonSchema() JsonSchema {
+	return andConditionPayloadSpec.JsonSchema()
+}
@@ -0,0 +1,79 @@
+package classifier
+
+import (
+	"errors"
+	"fmt"
+	"github.com/google/cel-go/cel"
+	"reflect"
+)
+
+const expressionName = "expression"
+
+type expressionCondition struct{}
+
+var celProgramPayload = payloadTransformer[string, cel.Program]{
+	spec: payloadGeneric[string]{
+		jsonSchema: JsonSchema{
+			"type":        "string",
+			"minLength":   1,
+			"description": "A CEL expression describing a condition",
+		},
+	},
+	transform: func(s string, ctx compilerContext) (cel.Program, error) {
+		ast, issues := ctx.celEnv.Compile(s)
+		if issues != nil && issues.Err() != nil {
+			return nil, ctx.error(fmt.Errorf("type-check error: %w", issues.Err()))
+		}
+		if !reflect.DeepEqual(ast.OutputType(), cel.BoolType) {
+			return nil, ctx.error(fmt.Errorf("got %v, wanted %v output type", ast.OutputType(), cel.BoolType))
+		}
+		prg, prgErr := ctx.celEnv.Program(ast,
+			cel.EvalOptions(cel.OptOptimize),
+		)
+		if prgErr != nil {
+			return nil, ctx.error(fmt.Errorf("program construction error: %w", prgErr))
+		}
+		return prg, nil
+	},
+}
+
+var expressionConditionPayload = payloadUnion[cel.Program]{
+	oneOf: []TypedPayload[cel.Program]{
+		payloadSingleKeyValue[cel.Program]{
+			key:       expressionName,
+			valueSpec: payloadMustSucceed[cel.Program]{celProgramPayload},
+		},
+		payloadMustSucceed[cel.Program]{celProgramPayload},
+	},
+}
+
+func (c expressionCondition) name() string {
+	return expressionName
+}
+
+func (c expressionCondition) compileCondition(ctx compilerContext) (condition, error) {
+	prg, err := expressionConditionPayload.Unmarshal(ctx)
+	if err != nil {
+		return condition{}, ctx.error(err)
+	}
+	return condition{
+		check: func(ctx executionContext) (bool, error) {
+			result, _, err := prg.Eval(map[string]any{
+				"torrent": ctx.torrentPb,
+				"result":  ctx.resultPb,
+			})
+			if err != nil {
+				return false, err
+			}
+			bl, ok := result.Value().(bool)
+			if !ok {
+				return false, errors.New("not bool")
+			}
+			return bl, nil
+		},
+	}, nil
+}
+
+func (c expressionCondition) JsonSchema() JsonSchema {
+	return expressionConditionPayload.JsonSchema()
+}
@@ -0,0 +1,40 @@
+package classifier
+
+const notName = "not"
+
+type notCondition struct{}
+
+func (notCondition) name() string {
+	return notName
+}
+
+var notConditionPayloadSpec = payloadSingleKeyValue[any]{
+	key: notName,
+	valueSpec: payloadMustSucceed[any]{payloadGeneric[any]{
+		jsonSchema: map[string]any{
+			"$ref": "#/definitions/condition",
+		},
+	}},
+	description: "A condition that negates the provided condition",
+}
+
+func (notCondition) compileCondition(ctx compilerContext) (condition, error) {
+	p, decodeErr := notConditionPayloadSpec.Unmarshal(ctx)
+	if decodeErr != nil {
+		return condition{}, ctx.error(decodeErr)
+	}
+	cond, cErr := ctx.compileCondition(ctx.child("not", p))
+	if cErr != nil {
+		return condition{}, ctx.error(cErr)
+	}
+	return condition{
+		check: func(ctx executionContext) (bool, error) {
+			result, err := cond.check(ctx)
+			return !result, err
+		},
+	}, nil
+}
+
+func (notCondition) JsonSchema() JsonSchema {
+	return notConditionPayloadSpec.JsonSchema()
+}
@@ -0,0 +1,50 @@
+package classifier
+
+const orName = "or"
+
+type orCondition struct{}
+
+func (orCondition) name() string {
+	return orName
+}
+
+var orConditionSpec = payloadSingleKeyValue[[]any]{
+	key: orName,
+	valueSpec: payloadMustSucceed[[]any]{payloadList[any]{
+		itemSpec: payloadGeneric[any]{
+			jsonSchema: map[string]any{
+				"$ref": "#/definitions/condition",
+			},
+		},
+		description: "A condition that is satisfied if any of the conditions in a list are satisfied",
+	}},
+}
+
+func (orCondition) compileCondition(ctx compilerContext) (condition, error) {
+	rawConds, err := orConditionSpec.Unmarshal(ctx)
+	if err != nil {
+		return condition{}, err
+	}
+	conds := make([]condition, len(rawConds))
+	for i, rawCond := range rawConds {
+		cond, err := ctx.compileCondition(ctx.child(numericPathPart(i), rawCond))
+		if err != nil {
+			return condition{}, err
+		}
+		conds[i] = cond
+	}
+	return condition{func(ctx executionContext) (bool, error) {
+		for _, c := range conds {
+			if result, err := c.check(ctx); err != nil {
+				return false, err
+			} else if result {
+				return true, nil
+			}
+		}
+		return false, nil
+	}}, nil
+}
+
+func (orCondition) JsonSchema() JsonSchema {
+	return orConditionSpec.JsonSchema()
+}
@@ -0,0 +1,15 @@
+package classifier
+
+type Config struct {
+	Workflow   string
+	Keywords   map[string][]string
+	Extensions map[string][]string
+	Flags      map[string]any
+	DeleteXxx  bool
+}
+
+func NewDefaultConfig() Config {
+	return Config{
+		Workflow: "default",
+	}
+}
@@ -0,0 +1,17 @@
+package classifier
+
+import (
+	"github.com/iancoleman/strcase"
+	"github.com/mitchellh/mapstructure"
+)
+
+func newDecoder[T any](target *T) (*mapstructure.Decoder, error) {
+	return mapstructure.NewDecoder(&mapstructure.DecoderConfig{
+		Result: target,
+		MatchName: func(mapKey, fieldName string) bool {
+			return mapKey == strcase.ToSnake(fieldName)
+		},
+		ErrorUnused: true,
+		TagName:     "json",
+	})
+}
@@ -0,0 +1,10 @@
+package classifier
+
+import (
+	"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
+)
+
+type dependencies struct {
+	search     LocalSearch
+	tmdbClient tmdb.Client
+}
@@ -1,63 +0,0 @@
-package extension
-
-import (
-	"context"
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier"
-	"github.com/bitmagnet-io/bitmagnet/internal/model"
-)
-
-type extensionClassifier struct {
-}
-
-func (c extensionClassifier) Key() string {
-	return "extension"
-}
-
-func (c extensionClassifier) Priority() int {
-	return 10
-}
-
-func (c extensionClassifier) Classify(_ context.Context, t model.Torrent) (classifier.Classification, error) {
-	if !t.Hint.IsNil() || t.FilesStatus == model.FilesStatusNoInfo || t.FilesStatus == model.FilesStatusOverThreshold {
-		return classifier.Classification{}, classifier.ErrNoMatch
-	}
-	if t.FilesStatus == model.FilesStatusSingle {
-		if t.Extension.Valid {
-			ct := model.ContentTypeFromExtension(t.Extension.String)
-			if ct.Valid {
-				return classifier.Classification{
-					ContentType: ct,
-				}, nil
-			}
-		}
-		return classifier.Classification{}, classifier.ErrNoMatch
-	}
-	var unknownSize uint64
-	sizeMap := make(map[model.ContentType]uint64)
-	for _, f := range t.Files {
-		if f.Size == 0 {
-			unknownSize++
-			continue
-		}
-		ct := model.ContentTypeFromExtension(f.Extension.String)
-		if ct.Valid {
-			sizeMap[ct.ContentType] += f.Size
-		} else {
-			unknownSize += f.Size
-		}
-	}
-	var maxSize uint64
-	var maxType model.ContentType
-	for k, v := range sizeMap {
-		if v > maxSize {
-			maxSize = v
-			maxType = k
-		}
-	}
-	if maxSize > 0 && maxSize > unknownSize {
-		return classifier.Classification{
-			ContentType: model.NewNullContentType(maxType),
-		}, nil
-	}
-	return classifier.Classification{}, classifier.ErrNoMatch
-}
@@ -1,20 +0,0 @@
-package extension
-
-import (
-	"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier"
-	"go.uber.org/fx"
-)
-
-type Result struct {
-	fx.Out
-	Classifier lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
-}
-
-func New() Result {
-	return Result{
-		Classifier: lazy.New(func() (classifier.SubClassifier, error) {
-			return extensionClassifier{}, nil
-		}),
-	}
-}
@@ -1,39 +1,72 @@
 package classifier

 import (
+	"fmt"
 	"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
+	"github.com/bitmagnet-io/bitmagnet/internal/database/search"
+	"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
 	"go.uber.org/fx"
-	"go.uber.org/zap"
-	"sort"
 )

 type Params struct {
 	fx.In
-	SubClassifiers []lazy.Lazy[SubClassifier] `group:"content_classifiers"`
-	Logger         *zap.SugaredLogger
+	Config     Config
+	TmdbConfig tmdb.Config
+	Search     lazy.Lazy[search.Search]
+	TmdbClient lazy.Lazy[tmdb.Client]
 }

 type Result struct {
 	fx.Out
-	Classifier lazy.Lazy[Classifier]
+	Compiler lazy.Lazy[Compiler]
+	Source   lazy.Lazy[Source]
+	Runner   lazy.Lazy[Runner]
 }

-func New(p Params) Result {
+func New(params Params) Result {
+	lc := lazy.New(func() (Compiler, error) {
+		s, err := params.Search.Get()
+		if err != nil {
+			return nil, err
+		}
+		tmdbClient, err := params.TmdbClient.Get()
+		if err != nil {
+			return nil, err
+		}
+		return compiler{
+			options: []compilerOption{
+				compilerFeatures(defaultFeatures),
+				celEnvOption,
+			},
+			dependencies: dependencies{
+				search:     localSearch{s},
+				tmdbClient: tmdbClient,
+			},
+		}, nil
+	})
+	lsrc := lazy.New[Source](func() (Source, error) {
+		src, err := newSourceProvider(params.Config, params.TmdbConfig).source()
+		if err != nil {
+			return Source{}, err
+		}
+		if _, ok := src.Workflows[params.Config.Workflow]; !ok {
+			return Source{}, fmt.Errorf("default workflow '%s' not found", params.Config.Workflow)
+		}
+		return src, nil
+	})
 	return Result{
-		Classifier: lazy.New(func() (Classifier, error) {
-			subClassifiers := make([]SubClassifier, 0, len(p.SubClassifiers)+1)
-			for _, subResolver := range p.SubClassifiers {
-				r, err := subResolver.Get()
-				if err != nil {
-					return nil, err
-				}
-				subClassifiers = append(subClassifiers, r)
+		Compiler: lc,
+		Source:   lsrc,
+		Runner: lazy.New(func() (Runner, error) {
+			src, err := lsrc.Get()
+			if err != nil {
+				return nil, err
 			}
-			subClassifiers = append(subClassifiers, FallbackClassifier{})
-			sort.Slice(subClassifiers, func(i, j int) bool {
-				return subClassifiers[i].Priority() < subClassifiers[j].Priority()
-			})
-			return classifier{subClassifiers, p.Logger}, nil
+			c, err := lc.Get()
+			if err != nil {
+				return nil, err
+			}
+			return c.Compile(src)
 		}),
 	}
 }
@@ -1,27 +0,0 @@
-package classifier
-
-import (
-	"context"
-	"github.com/bitmagnet-io/bitmagnet/internal/model"
-	"math"
-)
-
-type FallbackClassifier struct{}
-
-func (c FallbackClassifier) Key() string {
-	return "fallback"
-}
-
-func (c FallbackClassifier) Priority() int {
-	return math.MaxInt
-}
-
-func (c FallbackClassifier) Classify(_ context.Context, t model.Torrent) (Classification, error) {
-	cl := Classification{}
-	cl.ApplyHint(t.Hint)
-	hasVideo := t.HasFileType(model.FileTypeVideo)
-	if hasVideo.Valid && hasVideo.Bool {
-		cl.InferVideoAttributes(t.Name)
-	}
-	return cl, nil
-}
@@ -0,0 +1,47 @@
+package classifier
+
+type features struct {
+	conditions []conditionDefinition
+	actions    []actionDefinition
+}
+
+type feature func(*features)
+
+func newFeatures(fs ...feature) features {
+	result := features{}
+	for _, f := range fs {
+		f(&result)
+	}
+	return result
+}
+
+func compilerFeatures(features features) compilerOption {
+	return func(_ Source, c *compilerContext) error {
+		c.features = features
+		return nil
+	}
+}
+
+var defaultFeatures = newFeatures(
+	conditions(
+		andCondition{},
+		notCondition{},
+		orCondition{},
+		expressionCondition{},
+	),
+	actions(
+		addTagAction{},
+		attachLocalContentByIdAction{},
+		attachLocalContentBySearchAction{},
+		attachTmdbContentByIdAction{},
+		attachTmdbContentBySearchAction{},
+		deleteAction{},
+		findMatchAction{},
+		ifElseAction{},
+		unmatchedAction{},
+		parseDateAction{},
+		parseVideoContentAction{},
+		runWorkflowAction{},
+		setContentTypeAction{},
+	),
+)
@@ -0,0 +1,117 @@
+package classifier
+
+import (
+	"fmt"
+	"github.com/bitmagnet-io/bitmagnet/internal/model"
+	"github.com/bitmagnet-io/bitmagnet/internal/protobuf"
+	"github.com/google/cel-go/cel"
+	"github.com/google/cel-go/common/types"
+	"github.com/google/cel-go/common/types/ref"
+)
+
+type flagDefinitions map[string]FlagType
+
+func (d flagDefinitions) merge(other flagDefinitions) (flagDefinitions, error) {
+	result := make(flagDefinitions)
+	for k, v := range d {
+		if tp, ok := other[k]; ok && tp != v {
+			return nil, fmt.Errorf("conflicting flag definition %s", k)
+		} else {
+			result[k] = v
+		}
+	}
+	for k, v := range other {
+		if _, ok := result[k]; !ok {
+			result[k] = v
+		}
+	}
+	return result, nil
+}
+
+type flags map[string]any
+
+func (f flags) merge(other flags) flags {
+	result := make(flags)
+	for k, v := range f {
+		if _, ok := other[k]; ok {
+			result[k] = other[k]
+		} else {
+			result[k] = v
+		}
+	}
+	for k, v := range other {
+		if _, ok := result[k]; !ok {
+			result[k] = v
+		}
+	}
+	return result
+}
+
+func (t FlagType) celType() *cel.Type {
+	switch t {
+	case FlagTypeBool:
+		return cel.BoolType
+	case FlagTypeString:
+		return cel.StringType
+	case FlagTypeInt:
+		return cel.IntType
+	case FlagTypeStringList:
+		return cel.ListType(cel.StringType)
+	case FlagTypeContentTypeList:
+		return cel.ListType(cel.IntType)
+	default:
+		return nil
+	}
+}
+
+func (t FlagType) celVal(rawVal any) (ref.Val, error) {
+	switch t {
+	case FlagTypeBool:
+		if nativeVal, ok := rawVal.(bool); ok {
+			return types.Bool(nativeVal), nil
+		}
+	case FlagTypeString:
+		if nativeVal, ok := rawVal.(string); ok {
+			return types.String(nativeVal), nil
+		}
+	case FlagTypeInt:
+		if nativeVal, ok := rawVal.(int); ok {
+			return types.Int(nativeVal), nil
+		}
+	case FlagTypeStringList:
+		if sliceVal, ok := rawVal.([]any); ok {
+			nativeVal := make([]string, len(sliceVal))
+			for i, v := range sliceVal {
+				if strVal, ok := v.(string); ok {
+					nativeVal[i] = strVal
+				} else {
+					return nil, fmt.Errorf("could not convert type %T to string", v)
+				}
+			}
+			return types.NewStringList(types.DefaultTypeAdapter, nativeVal), nil
+		}
+	case FlagTypeContentTypeList:
+		if sliceVal, ok := rawVal.([]any); ok {
+			celVal := make([]protobuf.Classification_ContentType, len(sliceVal))
+			for i, v := range sliceVal {
+				if strVal, ok := v.(string); ok {
+					var ct model.NullContentType
+					if strVal != "unknown" {
+						if parsed, parseErr := model.ParseContentType(strVal); parseErr != nil {
+							return nil, fmt.Errorf("could not parse content type %s: %w", strVal, parseErr)
+						} else {
+							ct = model.NewNullContentType(parsed)
+						}
+					}
+					celVal[i] = protobuf.NewContentType(ct)
+				} else {
+					return nil, fmt.Errorf("could not convert type %T to content type", v)
+				}
+			}
+			return types.NewDynamicList(types.DefaultTypeAdapter, celVal), nil
+		}
+	default:
+		return nil, ErrInvalidFlagType
+	}
+	return nil, fmt.Errorf("could not convert type %T to %s", rawVal, t)
+}
@@ -0,0 +1,7 @@
+package classifier
+
+//go:generate go run github.com/abice/go-enum --marshal --names --nocase --nocomments --sql --sqlnullstr --values -f flag_type.go
+
+// FlagType represents the type of a flag
+// ENUM(bool, string, int, string_list, content_type_list)
+type FlagType string
@@ -0,0 +1,191 @@
+// Code generated by go-enum DO NOT EDIT.
+// Version:
+// Revision:
+// Build Date:
+// Built By:
+
+package classifier
+
+import (
+	"database/sql/driver"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"strings"
+)
+
+const (
+	FlagTypeBool            FlagType = "bool"
+	FlagTypeString          FlagType = "string"
+	FlagTypeInt             FlagType = "int"
+	FlagTypeStringList      FlagType = "string_list"
+	FlagTypeContentTypeList FlagType = "content_type_list"
+)
+
+var ErrInvalidFlagType = fmt.Errorf("not a valid FlagType, try [%s]", strings.Join(_FlagTypeNames, ", "))
+
+var _FlagTypeNames = []string{
+	string(FlagTypeBool),
+	string(FlagTypeString),
+	string(FlagTypeInt),
+	string(FlagTypeStringList),
+	string(FlagTypeContentTypeList),
+}
+
+// FlagTypeNames returns a list of possible string values of FlagType.
+func FlagTypeNames() []string {
+	tmp := make([]string, len(_FlagTypeNames))
+	copy(tmp, _FlagTypeNames)
+	return tmp
+}
+
+// FlagTypeValues returns a list of the values for FlagType
+func FlagTypeValues() []FlagType {
+	return []FlagType{
+		FlagTypeBool,
+		FlagTypeString,
+		FlagTypeInt,
+		FlagTypeStringList,
+		FlagTypeContentTypeList,
+	}
+}
+
+// String implements the Stringer interface.
+func (x FlagType) String() string {
+	return string(x)
+}
+
+// IsValid provides a quick way to determine if the typed value is
+// part of the allowed enumerated values
+func (x FlagType) IsValid() bool {
+	_, err := ParseFlagType(string(x))
+	return err == nil
+}
+
+var _FlagTypeValue = map[string]FlagType{
+	"bool":              FlagTypeBool,
+	"string":            FlagTypeString,
+	"int":               FlagTypeInt,
+	"string_list":       FlagTypeStringList,
+	"content_type_list": FlagTypeContentTypeList,
+}
+
+// ParseFlagType attempts to convert a string to a FlagType.
+func ParseFlagType(name string) (FlagType, error) {
+	if x, ok := _FlagTypeValue[name]; ok {
+		return x, nil
+	}
+	// Case insensitive parse, do a separate lookup to prevent unnecessary cost of lowercasing a string if we don't need to.
+	if x, ok := _FlagTypeValue[strings.ToLower(name)]; ok {
+		return x, nil
+	}
+	return FlagType(""), fmt.Errorf("%s is %w", name, ErrInvalidFlagType)
+}
+
+// MarshalText implements the text marshaller method.
+func (x FlagType) MarshalText() ([]byte, error) {
+	return []byte(string(x)), nil
+}
+
+// UnmarshalText implements the text unmarshaller method.
+func (x *FlagType) UnmarshalText(text []byte) error {
+	tmp, err := ParseFlagType(string(text))
+	if err != nil {
+		return err
+	}
+	*x = tmp
+	return nil
+}
+
+var errFlagTypeNilPtr = errors.New("value pointer is nil") // one per type for package clashes
+
+// Scan implements the Scanner interface.
+func (x *FlagType) Scan(value interface{}) (err error) {
+	if value == nil {
+		*x = FlagType("")
+		return
+	}
+
+	// A wider range of scannable types.
+	// driver.Value values at the top of the list for expediency
+	switch v := value.(type) {
+	case string:
+		*x, err = ParseFlagType(v)
+	case []byte:
+		*x, err = ParseFlagType(string(v))
+	case FlagType:
+		*x = v
+	case *FlagType:
+		if v == nil {
+			return errFlagTypeNilPtr
+		}
+		*x = *v
+	case *string:
+		if v == nil {
+			return errFlagTypeNilPtr
+		}
+		*x, err = ParseFlagType(*v)
+	default:
+		return errors.New("invalid type for FlagType")
+	}
+
+	return
+}
+
+// Value implements the driver Valuer interface.
+func (x FlagType) Value() (driver.Value, error) {
+	return x.String(), nil
+}
+
+type NullFlagType struct {
+	FlagType FlagType
+	Valid    bool
+	Set      bool
+}
+
+func NewNullFlagType(val interface{}) (x NullFlagType) {
+	err := x.Scan(val) // yes, we ignore this error, it will just be an invalid value.
+	_ = err            // make any errcheck linters happy
+	return
+}
+
+// Scan implements the Scanner interface.
+func (x *NullFlagType) Scan(value interface{}) (err error) {
+	if value == nil {
+		x.FlagType, x.Valid = FlagType(""), false
+		return
+	}
+
+	err = x.FlagType.Scan(value)
+	x.Valid = (err == nil)
+	return
+}
+
+// Value implements the driver Valuer interface.
+func (x NullFlagType) Value() (driver.Value, error) {
+	if !x.Valid {
+		return nil, nil
+	}
+	return x.FlagType.String(), nil
+}
+
+// MarshalJSON correctly serializes a NullFlagType to JSON.
+func (n NullFlagType) MarshalJSON() ([]byte, error) {
+	const nullStr = "null"
+	if n.Valid {
+		return json.Marshal(n.FlagType)
+	}
+	return []byte(nullStr), nil
+}
+
+// UnmarshalJSON correctly deserializes a NullFlagType from JSON.
+func (n *NullFlagType) UnmarshalJSON(b []byte) error {
+	n.Set = true
+	var x interface{}
+	err := json.Unmarshal(b, &x)
+	if err != nil {
+		return err
+	}
+	err = n.Scan(x)
+	return err
+}
@@ -0,0 +1,166 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "http://json-schema.org/draft-07/schema#",
+  "title": "Core schema meta-schema",
+  "definitions": {
+    "schemaArray": {
+      "type": "array",
+      "minItems": 1,
+      "items": { "$ref": "#" }
+    },
+    "nonNegativeInteger": {
+      "type": "integer",
+      "minimum": 0
+    },
+    "nonNegativeIntegerDefault0": {
+      "allOf": [
+        { "$ref": "#/definitions/nonNegativeInteger" },
+        { "default": 0 }
+      ]
+    },
+    "simpleTypes": {
+      "enum": [
+        "array",
+        "boolean",
+        "integer",
+        "null",
+        "number",
+        "object",
+        "string"
+      ]
+    },
+    "stringArray": {
+      "type": "array",
+      "items": { "type": "string" },
+      "uniqueItems": true,
+      "default": []
+    }
+  },
+  "type": ["object", "boolean"],
+  "properties": {
+    "$id": {
+      "type": "string",
+      "format": "uri-reference"
+    },
+    "$schema": {
+      "type": "string",
+      "format": "uri"
+    },
+    "$ref": {
+      "type": "string",
+      "format": "uri-reference"
+    },
+    "$comment": {
+      "type": "string"
+    },
+    "title": {
+      "type": "string"
+    },
+    "description": {
+      "type": "string"
+    },
+    "default": true,
+    "readOnly": {
+      "type": "boolean",
+      "default": false
+    },
+    "writeOnly": {
+      "type": "boolean",
+      "default": false
+    },
+    "examples": {
+      "type": "array",
+      "items": true
+    },
+    "multipleOf": {
+      "type": "number",
+      "exclusiveMinimum": 0
+    },
+    "maximum": {
+      "type": "number"
+    },
+    "exclusiveMaximum": {
+      "type": "number"
+    },
+    "minimum": {
+      "type": "number"
+    },
+    "exclusiveMinimum": {
+      "type": "number"
+    },
+    "maxLength": { "$ref": "#/definitions/nonNegativeInteger" },
+    "minLength": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
+    "pattern": {
+      "type": "string",
+      "format": "regex"
+    },
+    "additionalItems": { "$ref": "#" },
+    "items": {
+      "anyOf": [{ "$ref": "#" }, { "$ref": "#/definitions/schemaArray" }],
+      "default": true
+    },
+    "maxItems": { "$ref": "#/definitions/nonNegativeInteger" },
+    "minItems": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
+    "uniqueItems": {
+      "type": "boolean",
+      "default": false
+    },
+    "contains": { "$ref": "#" },
+    "maxProperties": { "$ref": "#/definitions/nonNegativeInteger" },
+    "minProperties": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
+    "required": { "$ref": "#/definitions/stringArray" },
+    "additionalProperties": { "$ref": "#" },
+    "definitions": {
+      "type": "object",
+      "additionalProperties": { "$ref": "#" },
+      "default": {}
+    },
+    "properties": {
+      "type": "object",
+      "additionalProperties": { "$ref": "#" },
+      "default": {}
+    },
+    "patternProperties": {
+      "type": "object",
+      "additionalProperties": { "$ref": "#" },
+      "propertyNames": { "format": "regex" },
+      "default": {}
+    },
+    "dependencies": {
+      "type": "object",
+      "additionalProperties": {
+        "anyOf": [{ "$ref": "#" }, { "$ref": "#/definitions/stringArray" }]
+      }
+    },
+    "propertyNames": { "$ref": "#" },
+    "const": true,
+    "enum": {
+      "type": "array",
+      "items": true,
+      "minItems": 1,
+      "uniqueItems": true
+    },
+    "type": {
+      "anyOf": [
+        { "$ref": "#/definitions/simpleTypes" },
+        {
+          "type": "array",
+          "items": { "$ref": "#/definitions/simpleTypes" },
+          "minItems": 1,
+          "uniqueItems": true
+        }
+      ]
+    },
+    "format": { "type": "string" },
+    "contentMediaType": { "type": "string" },
+    "contentEncoding": { "type": "string" },
+    "if": { "$ref": "#" },
+    "then": { "$ref": "#" },
+    "else": { "$ref": "#" },
+    "allOf": { "$ref": "#/definitions/schemaArray" },
+    "anyOf": { "$ref": "#/definitions/schemaArray" },
+    "oneOf": { "$ref": "#/definitions/schemaArray" },
+    "not": { "$ref": "#" }
+  },
+  "default": true
+}
@@ -0,0 +1,115 @@
+package classifier
+
+import (
+	"encoding/json"
+)
+
+type JsonSchema map[string]any
+
+func (s JsonSchema) MarshalJSON() ([]byte, error) {
+	return json.MarshalIndent(map[string]any(s), "", "  ")
+}
+
+const schemaId = "https://bitmagnet.io/schemas/classifier-0.1.json"
+
+func (f features) JsonSchema() JsonSchema {
+	return map[string]any{
+		"$schema": "http://json-schema.org/draft-07/schema#",
+		"$id":     schemaId,
+		"type":    "object",
+		"properties": map[string]any{
+			"$schema": map[string]any{
+				"const": schemaId,
+			},
+			"workflows": map[string]any{
+				"type": "object",
+				"additionalProperties": map[string]any{
+					"$ref": "#/definitions/action",
+				},
+			},
+			"flag_definitions": map[string]any{
+				"type": "object",
+				"additionalProperties": map[string]any{
+					"type": "string",
+					"enum": FlagTypeValues(),
+				},
+			},
+			"flags": map[string]any{
+				"type":                 "object",
+				"additionalProperties": true,
+			},
+			"keywords": map[string]any{
+				"type": "object",
+				"additionalProperties": map[string]any{
+					"type": "array",
+					"items": map[string]any{
+						"type": "string",
+					},
+				},
+			},
+			"extensions": map[string]any{
+				"type": "object",
+				"additionalProperties": map[string]any{
+					"type": "array",
+					"items": map[string]any{
+						"type": "string",
+					},
+				},
+			},
+		},
+		"additionalProperties": false,
+		"definitions": func() map[string]any {
+			defs := map[string]any{
+				"action": map[string]any{
+					"oneOf": []map[string]any{
+						{
+							"$ref": "#/definitions/action_single",
+						},
+						{
+							"$ref": "#/definitions/action_multi",
+						},
+					},
+				},
+				"action_multi": map[string]any{
+					"type": "array",
+					"items": map[string]any{
+						"$ref": "#/definitions/action_single",
+					},
+				},
+				"action_single": map[string]any{
+					"oneOf": func() []map[string]any {
+						var result []map[string]any
+						for _, def := range f.actions {
+							result = append(result, map[string]any{
+								"$ref": "#/definitions/action__" + def.name(),
+							})
+						}
+						return result
+					}(),
+				},
+				"condition": map[string]any{
+					"oneOf": func() []map[string]any {
+						var result []map[string]any
+						for _, def := range f.conditions {
+							result = append(result, map[string]any{
+								"$ref": "#/definitions/condition__" + def.name(),
+							})
+						}
+						return result
+					}(),
+				},
+			}
+			for _, def := range f.actions {
+				defs["action__"+def.name()] = def.JsonSchema()
+			}
+			for _, def := range f.conditions {
+				defs["condition__"+def.name()] = def.JsonSchema()
+			}
+			return defs
+		}(),
+	}
+}
+
+func DefaultJsonSchema() JsonSchema {
+	return defaultFeatures.JsonSchema()
+}
@@ -0,0 +1,38 @@
+package classifier
+
+import (
+	_ "embed"
+	"encoding/json"
+	"github.com/stretchr/testify/assert"
+	"github.com/xeipuuv/gojsonschema"
+	"testing"
+)
+
+//go:embed json-schema.draft-07.json
+var metaSchemaJson []byte
+
+func TestJsonSchema(t *testing.T) {
+
+	schemaJson, err := DefaultJsonSchema().MarshalJSON()
+	assert.NoError(t, err)
+
+	schemaLoader := gojsonschema.NewBytesLoader(schemaJson)
+	metaSchemaLoader := gojsonschema.NewBytesLoader(metaSchemaJson)
+
+	// validate the schema against the meta schema
+	metaResult, err := gojsonschema.Validate(metaSchemaLoader, schemaLoader)
+	assert.NoError(t, err)
+	assert.True(t, metaResult.Valid())
+
+	coreClassifier, err := yamlSourceProvider{rawSourceProvider: coreSourceProvider{}}.source()
+	assert.NoError(t, err)
+	coreClassifierJson, err := json.Marshal(coreClassifier)
+	assert.NoError(t, err)
+
+	documentLoader := gojsonschema.NewBytesLoader(coreClassifierJson)
+
+	// validate the classifier against the schema
+	result, err := gojsonschema.Validate(schemaLoader, documentLoader)
+	assert.NoError(t, err)
+	assert.True(t, result.Valid())
+}
@@ -1,46 +0,0 @@
-package keywords
-
-import (
-	"context"
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier"
-	"github.com/bitmagnet-io/bitmagnet/internal/model"
-	"regexp"
-)
-
-type keywordsClassifier struct {
-	contentType       model.ContentType
-	priority          int
-	regex             *regexp.Regexp
-	requiredFileTypes []model.FileType
-}
-
-func (c keywordsClassifier) Key() string {
-	return "keywords_" + c.contentType.String()
-}
-
-func (c keywordsClassifier) Priority() int {
-	return c.priority
-}
-
-func (c keywordsClassifier) Classify(_ context.Context, t model.Torrent) (classifier.Classification, error) {
-	if !t.Hint.IsNil() || !c.regex.MatchString(t.Name) {
-		return classifier.Classification{}, classifier.ErrNoMatch
-	}
-	if len(c.requiredFileTypes) > 0 {
-		hasRequiredFileTypes := t.HasFileType(c.requiredFileTypes...)
-		if hasRequiredFileTypes.Valid && !hasRequiredFileTypes.Bool {
-			return classifier.Classification{}, classifier.ErrNoMatch
-		}
-	}
-	cl := classifier.Classification{
-		ContentType: model.NullContentType{
-			Valid:       true,
-			ContentType: c.contentType,
-		},
-	}
-	hasVideo := t.HasFileType(model.FileTypeVideo)
-	if hasVideo.Valid && hasVideo.Bool {
-		cl.InferVideoAttributes(t.Name)
-	}
-	return cl, nil
-}
@@ -1,53 +0,0 @@
-package keywords
-
-import (
-	"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier"
-	"github.com/bitmagnet-io/bitmagnet/internal/model"
-	"github.com/bitmagnet-io/bitmagnet/internal/regex"
-	"go.uber.org/fx"
-)
-
-type Result struct {
-	fx.Out
-	Music     lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
-	Audiobook lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
-	Ebook     lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
-	Xxx       lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
-}
-
-func New() Result {
-	return Result{
-		Audiobook: lazy.New(func() (classifier.SubClassifier, error) {
-			return keywordsClassifier{
-				contentType:       model.ContentTypeAudiobook,
-				regex:             regex.NewRegexFromNames(audiobookWords...),
-				priority:          20,
-				requiredFileTypes: []model.FileType{model.FileTypeAudio},
-			}, nil
-		}),
-		Music: lazy.New(func() (classifier.SubClassifier, error) {
-			return keywordsClassifier{
-				contentType:       model.ContentTypeMusic,
-				regex:             regex.NewRegexFromNames(musicWords...),
-				priority:          21,
-				requiredFileTypes: []model.FileType{model.FileTypeAudio},
-			}, nil
-		}),
-		Ebook: lazy.New(func() (classifier.SubClassifier, error) {
-			return keywordsClassifier{
-				contentType:       model.ContentTypeAudiobook,
-				regex:             regex.NewRegexFromNames(ebookWords...),
-				priority:          22,
-				requiredFileTypes: []model.FileType{model.FileTypeDocument},
-			}, nil
-		}),
-		Xxx: lazy.New(func() (classifier.SubClassifier, error) {
-			return keywordsClassifier{
-				contentType: model.ContentTypeXxx,
-				regex:       regex.NewRegexFromNames(xxxWords...),
-				priority:    23,
-			}, nil
-		}),
-	}
-}
@@ -1,53 +0,0 @@
-package keywords
-
-var musicWords = []string{
-	"discography",
-	"music",
-	"album",
-	"va",
-	"various",
-	"compilation",
-	"ep",
-	"lp",
-	"single",
-	"vinyl",
-	"classical",
-	"disco",
-	"folk",
-	"hits",
-	"house",
-	"indie",
-	"jazz",
-	"metal",
-	"pop",
-	"jazz",
-	"reggae",
-	"rock",
-	"trance",
-}
-
-var audiobookWords = []string{
-	"audiobook",
-	"audiobooks",
-	"book",
-	"books",
-	"abridged",
-	"unabridged",
-	"narrated",
-}
-
-var ebookWords = []string{
-	"book",
-	"books",
-	"ebook",
-	"ebooks",
-	"abridged",
-	"unabridged",
-}
-
-var xxxWords = []string{
-	"xxx",
-	"porn",
-	"porno",
-	"sex",
-}
@@ -0,0 +1,153 @@
+// Code generated by mockery v2.40.1. DO NOT EDIT.
+
+package classifier_mocks
+
+import (
+	context "context"
+
+	model "github.com/bitmagnet-io/bitmagnet/internal/model"
+	mock "github.com/stretchr/testify/mock"
+)
+
+// LocalSearch is an autogenerated mock type for the LocalSearch type
+type LocalSearch struct {
+	mock.Mock
+}
+
+type LocalSearch_Expecter struct {
+	mock *mock.Mock
+}
+
+func (_m *LocalSearch) EXPECT() *LocalSearch_Expecter {
+	return &LocalSearch_Expecter{mock: &_m.Mock}
+}
+
+// ContentById provides a mock function with given fields: _a0, _a1
+func (_m *LocalSearch) ContentById(_a0 context.Context, _a1 model.ContentRef) (model.Content, error) {
+	ret := _m.Called(_a0, _a1)
+
+	if len(ret) == 0 {
+		panic("no return value specified for ContentById")
+	}
+
+	var r0 model.Content
+	var r1 error
+	if rf, ok := ret.Get(0).(func(context.Context, model.ContentRef) (model.Content, error)); ok {
+		return rf(_a0, _a1)
+	}
+	if rf, ok := ret.Get(0).(func(context.Context, model.ContentRef) model.Content); ok {
+		r0 = rf(_a0, _a1)
+	} else {
+		r0 = ret.Get(0).(model.Content)
+	}
+
+	if rf, ok := ret.Get(1).(func(context.Context, model.ContentRef) error); ok {
+		r1 = rf(_a0, _a1)
+	} else {
+		r1 = ret.Error(1)
+	}
+
+	return r0, r1
+}
+
+// LocalSearch_ContentById_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ContentById'
+type LocalSearch_ContentById_Call struct {
+	*mock.Call
+}
+
+// ContentById is a helper method to define mock.On call
+//   - _a0 context.Context
+//   - _a1 model.ContentRef
+func (_e *LocalSearch_Expecter) ContentById(_a0 interface{}, _a1 interface{}) *LocalSearch_ContentById_Call {
+	return &LocalSearch_ContentById_Call{Call: _e.mock.On("ContentById", _a0, _a1)}
+}
+
+func (_c *LocalSearch_ContentById_Call) Run(run func(_a0 context.Context, _a1 model.ContentRef)) *LocalSearch_ContentById_Call {
+	_c.Call.Run(func(args mock.Arguments) {
+		run(args[0].(context.Context), args[1].(model.ContentRef))
+	})
+	return _c
+}
+
+func (_c *LocalSearch_ContentById_Call) Return(_a0 model.Content, _a1 error) *LocalSearch_ContentById_Call {
+	_c.Call.Return(_a0, _a1)
+	return _c
+}
+
+func (_c *LocalSearch_ContentById_Call) RunAndReturn(run func(context.Context, model.ContentRef) (model.Content, error)) *LocalSearch_ContentById_Call {
+	_c.Call.Return(run)
+	return _c
+}
+
+// ContentBySearch provides a mock function with given fields: _a0, _a1, _a2, _a3
+func (_m *LocalSearch) ContentBySearch(_a0 context.Context, _a1 model.ContentType, _a2 string, _a3 model.Year) (model.Content, error) {
+	ret := _m.Called(_a0, _a1, _a2, _a3)
+
+	if len(ret) == 0 {
+		panic("no return value specified for ContentBySearch")
+	}
+
+	var r0 model.Content
+	var r1 error
+	if rf, ok := ret.Get(0).(func(context.Context, model.ContentType, string, model.Year) (model.Content, error)); ok {
+		return rf(_a0, _a1, _a2, _a3)
+	}
+	if rf, ok := ret.Get(0).(func(context.Context, model.ContentType, string, model.Year) model.Content); ok {
+		r0 = rf(_a0, _a1, _a2, _a3)
+	} else {
+		r0 = ret.Get(0).(model.Content)
+	}
+
+	if rf, ok := ret.Get(1).(func(context.Context, model.ContentType, string, model.Year) error); ok {
+		r1 = rf(_a0, _a1, _a2, _a3)
+	} else {
+		r1 = ret.Error(1)
+	}
+
+	return r0, r1
+}
+
+// LocalSearch_ContentBySearch_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ContentBySearch'
+type LocalSearch_ContentBySearch_Call struct {
+	*mock.Call
+}
+
+// ContentBySearch is a helper method to define mock.On call
+//   - _a0 context.Context
+//   - _a1 model.ContentType
+//   - _a2 string
+//   - _a3 model.Year
+func (_e *LocalSearch_Expecter) ContentBySearch(_a0 interface{}, _a1 interface{}, _a2 interface{}, _a3 interface{}) *LocalSearch_ContentBySearch_Call {
+	return &LocalSearch_ContentBySearch_Call{Call: _e.mock.On("ContentBySearch", _a0, _a1, _a2, _a3)}
+}
+
+func (_c *LocalSearch_ContentBySearch_Call) Run(run func(_a0 context.Context, _a1 model.ContentType, _a2 string, _a3 model.Year)) *LocalSearch_ContentBySearch_Call {
+	_c.Call.Run(func(args mock.Arguments) {
+		run(args[0].(context.Context), args[1].(model.ContentType), args[2].(string), args[3].(model.Year))
+	})
+	return _c
+}
+
+func (_c *LocalSearch_ContentBySearch_Call) Return(_a0 model.Content, _a1 error) *LocalSearch_ContentBySearch_Call {
+	_c.Call.Return(_a0, _a1)
+	return _c
+}
+
+func (_c *LocalSearch_ContentBySearch_Call) RunAndReturn(run func(context.Context, model.ContentType, string, model.Year) (model.Content, error)) *LocalSearch_ContentBySearch_Call {
+	_c.Call.Return(run)
+	return _c
+}
+
+// NewLocalSearch creates a new instance of LocalSearch. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
+// The first argument is typically a *testing.T value.
+func NewLocalSearch(t interface {
+	mock.TestingT
+	Cleanup(func())
+}) *LocalSearch {
+	mock := &LocalSearch{}
+	mock.Mock.Test(t)
+
+	t.Cleanup(func() { mock.AssertExpectations(t) })
+
+	return mock
+}
@@ -0,0 +1,200 @@
+package parsers
+
+import (
+	"github.com/bitmagnet-io/bitmagnet/internal/lexer"
+	"github.com/bitmagnet-io/bitmagnet/internal/model"
+	"regexp"
+	"strconv"
+	"strings"
+	"time"
+)
+
+type dateLexer struct {
+	lexer.Lexer
+}
+
+func ParseDate(str string) model.Date {
+	l := dateLexer{Lexer: lexer.NewLexer(str)}
+	return l.lexDate()
+}
+
+var strMonths = map[string]time.Month{
+	"jan": time.January, "feb": time.February, "mar": time.March,
+	"apr": time.April, "may": time.May, "jun": time.June,
+	"jul": time.July, "aug": time.August, "sep": time.September,
+	"oct": time.October, "nov": time.November, "dec": time.December,
+	"january": time.January, "february": time.February, "march": time.March,
+	"april": time.April, "june": time.June,
+	"july": time.July, "august": time.August, "september": time.September,
+	"october": time.October, "november": time.November, "december": time.December,
+}
+
+var separators = map[string]struct{}{
+	".": {}, "-": {}, "/": {}, " ": {},
+}
+
+const minParts = 5
+
+func (l *dateLexer) lexDate() model.Date {
+	parts := l.lexDateParts()
+	isStartOrWordBreak := true
+	for i := 0; i < len(parts)-minParts+1; i++ {
+		part1 := parts[i]
+		if !isStartOrWordBreak {
+			if part1.format == datePartNonWordChars {
+				isStartOrWordBreak = true
+			}
+			continue
+		}
+		if !part1.IsNil() {
+			i++
+			sep := parts[i]
+			if sep.format == datePartNonWordChars {
+				if _, ok := separators[sep.literal]; ok {
+					i++
+					part2 := parts[i]
+					if !part2.IsNil() {
+						i++
+						sep2 := parts[i]
+						if sep2.literal != sep.literal {
+							isStartOrWordBreak = sep2.format == datePartNonWordChars
+							continue
+						}
+						i++
+						part3 := parts[i]
+						if !part3.IsNil() && (i == len(parts)-1 || parts[i+1].format == datePartNonWordChars) {
+							if date := findFirstValidDate(part1.Date, part2.Date, part3.Date); !date.IsNil() {
+								return date
+							} else {
+								isStartOrWordBreak = false
+								continue
+							}
+						} else {
+							isStartOrWordBreak = part3.format == datePartNonWordChars
+							continue
+						}
+					} else {
+						isStartOrWordBreak = part2.format == datePartNonWordChars
+						continue
+					}
+				} else {
+					isStartOrWordBreak = true
+					continue
+				}
+			} else {
+				isStartOrWordBreak = false
+				continue
+			}
+		} else {
+			isStartOrWordBreak = part1.format == datePartNonWordChars
+		}
+	}
+	return model.Date{}
+}
+
+func findFirstValidDate(part1, part2, part3 model.Date) model.Date {
+	// Y-M-D
+	if part1.Year != 0 && part2.Month != 0 && part3.Day != 0 {
+		d := model.Date{Year: part1.Year, Month: part2.Month, Day: part3.Day}
+		if d.IsValid() {
+			return d
+		}
+	}
+	// D-M-Y
+	if part1.Day != 0 && part2.Month != 0 && part3.Year != 0 {
+		d := model.Date{Year: part3.Year, Month: part2.Month, Day: part1.Day}
+		if d.IsValid() {
+			return d
+		}
+	}
+	// M-D-Y
+	if part1.Month != 0 && part2.Day != 0 && part3.Year != 0 {
+		d := model.Date{Year: part3.Year, Month: part1.Month, Day: part2.Day}
+		if d.IsValid() {
+			return d
+		}
+	}
+	return model.Date{}
+}
+
+type datePartFormat int
+
+const (
+	datePart1Digit datePartFormat = 1 + iota
+	datePart2Digits
+	datePart4Digits
+	datePartStrMonth
+	datePartWordChars
+	datePartNonWordChars
+)
+
+type datePart struct {
+	model.Date
+	format  datePartFormat
+	literal string
+}
+
+func (l *dateLexer) lexDateParts() []datePart {
+	var parts []datePart
+	for !l.IsEof() {
+		parts = append(parts, l.lexDatePart())
+	}
+	return parts
+}
+
+var regex1Digit = regexp.MustCompile(`^\d$`)
+var regex2Digits = regexp.MustCompile(`^\d{2}$`)
+var regex4Digits = regexp.MustCompile(`^\d{4}$`)
+
+func (l *dateLexer) lexDatePart() datePart {
+	str := l.ReadWhile(lexer.IsWordChar)
+	if str == "" {
+		str = l.ReadWhile(lexer.IsNonWordChar)
+		return datePart{
+			format:  datePartNonWordChars,
+			literal: str,
+		}
+	}
+	if m, ok := strMonths[strings.ToLower(str)]; ok {
+		return datePart{
+			Date:    model.Date{Month: m},
+			format:  datePartStrMonth,
+			literal: str,
+		}
+	}
+	if regex1Digit.MatchString(str) {
+		i, _ := strconv.Atoi(str)
+		return datePart{
+			Date:    model.Date{Day: uint8(i), Month: time.Month(i)},
+			format:  datePart1Digit,
+			literal: str,
+		}
+	}
+	if regex2Digits.MatchString(str) {
+		i, _ := strconv.Atoi(str)
+		date := model.Date{Year: model.Year(2000 + i)}
+		if i >= 1 && i <= 12 {
+			date.Month = time.Month(i)
+		}
+		if i >= 1 && i <= 31 {
+			date.Day = uint8(i)
+		}
+		return datePart{
+			Date:    date,
+			format:  datePart2Digits,
+			literal: str,
+		}
+	}
+	if regex4Digits.MatchString(str) {
+		i, _ := strconv.Atoi(str)
+		return datePart{
+			Date:    model.Date{Year: model.Year(i)},
+			format:  datePart4Digits,
+			literal: str,
+		}
+	}
+	return datePart{
+		format:  datePartWordChars,
+		literal: str,
+	}
+}
@@ -0,0 +1,33 @@
+package parsers
+
+import (
+	"github.com/bitmagnet-io/bitmagnet/internal/model"
+	"github.com/stretchr/testify/assert"
+	"testing"
+)
+
+func TestParseDate(t *testing.T) {
+	tests := []struct {
+		input    string
+		expected model.Date
+	}{
+		{"2020-01-01", model.Date{Year: 2020, Month: 1, Day: 1}},
+		{"01-01-2020", model.Date{Year: 2020, Month: 1, Day: 1}},
+		{"01-Jan-2020", model.Date{Year: 2020, Month: 1, Day: 1}},
+		{"Jan-01-2020", model.Date{Year: 2020, Month: 1, Day: 1}},
+		{"MP3-daily-2019-July-16-Disco", model.Date{Year: 2019, Month: 7, Day: 16}},
+		{"XXX Video (2022-09-21) 1080p.mp4", model.Date{Year: 2022, Month: 9, Day: 21}},
+		{"Exxtra.23.02.01.Bla.Bla.Bla.XXX.1080p.HEVC.x265.PRT[XvX]", model.Date{Year: 2023, Month: 2, Day: 1}},
+		{"The Movie (13.10.2017)_1080p.mp4", model.Date{Year: 2017, Month: 10, Day: 13}},
+		{"Movie.23.05.15..The.Best.Of.XXX.1080p.MP4-WRB[rarbg]", model.Date{Year: 2023, Month: 5, Day: 15}},
+		{"2021.09.11_Serie_C_2021.22_R.03_Xxx_FC_vs_Xxx_FC_[football.net]_720p.50_RUS.mkv", model.Date{Year: 2021, Month: 9, Day: 11}},
+		//{"Bla Bla June 27, 2015", model.Date{Year: 2015, Month: 6, Day: 27}},
+		{input: "Software.Pro.X2.Suite.v19.0.2.23117-R2R"},
+	}
+	for _, test := range tests {
+		t.Run(test.input, func(t *testing.T) {
+			result := ParseDate(test.input)
+			assert.Equal(t, test.expected, result)
+		})
+	}
+}
@@ -1,7 +1,8 @@
-package video
+package parsers

 import (
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier"
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+	"github.com/bitmagnet-io/bitmagnet/internal/keywords"
 	"github.com/bitmagnet-io/bitmagnet/internal/model"
 	"github.com/bitmagnet-io/bitmagnet/internal/regex"
 	"github.com/hedhyw/rex/pkg/dialect"
@@ -59,7 +60,7 @@ var titleEpisodesRegex = rex.New(
 	),
 ).MustCompile()

-var multiRegex = regex.NewRegexFromNames("multi", "dual")
+var multiRegex = keywords.MustNewRegexFromKeywords("multi", "dual")

 var separatorToken = rex.Chars.Runes(" ._")

@@ -115,7 +116,7 @@ func parseTitleYear(input string) (string, model.Year, string, error) {
 			return title, model.Year(yearMatch), input[len(match[0]):], nil
 		}
 	}
-	return "", 0, "", classifier.ErrNoMatch
+	return "", 0, "", classification.ErrUnmatched
 }

 func parseTitle(input string) (string, string, error) {
@@ -125,7 +126,7 @@ func parseTitle(input string) (string, string, error) {
 			return title, input[len(match[0]):], nil
 		}
 	}
-	return "", "", classifier.ErrNoMatch
+	return "", "", classification.ErrUnmatched
 }

 func parseTitleYearEpisodes(input string) (string, model.Year, model.Episodes, string, error) {
@@ -141,7 +142,7 @@ func parseTitleYearEpisodes(input string) (string, model.Year, model.Episodes, s
 		episodes := model.EpisodesMatchToEpisodes(match[2:])
 		return title, year, episodes, input[len(match[0]):], nil
 	}
-	return "", 0, nil, "", classifier.ErrNoMatch
+	return "", 0, nil, "", classification.ErrUnmatched
 }

 func ParseTitleYearEpisodes(contentType model.NullContentType, input string) (string, model.Year, model.Episodes, string, error) {
@@ -156,30 +157,40 @@ func ParseTitleYearEpisodes(contentType model.NullContentType, input string) (st
 	if title, rest, err := parseTitle(input); err == nil {
 		return title, 0, nil, rest, nil
 	}
-	return "", 0, nil, "", classifier.ErrNoMatch
+	return "", 0, nil, "", classification.ErrUnmatched
 }

-func ParseContent(hintCt model.NullContentType, input string) (model.ContentType, string, model.Year, classifier.ContentAttributes, error) {
-	title, year, episodes, rest, err := ParseTitleYearEpisodes(hintCt, input)
+func ParseVideoContent(torrent model.Torrent, result classification.Result) (classification.ContentAttributes, error) {
+	title, year, episodes, rest, err := ParseTitleYearEpisodes(result.ContentType, torrent.Name)
 	if err != nil {
-		return "", "", 0, classifier.ContentAttributes{}, err
+		if !result.ContentType.Valid {
+			return classification.ContentAttributes{}, err
+		}
+		rest = torrent.Name
 	}
-	var ct model.ContentType
-	if hintCt.Valid {
-		ct = hintCt.ContentType
-	} else if len(episodes) > 0 {
-		ct = model.ContentTypeTvShow
-	} else {
-		ct = model.ContentTypeMovie
+	ct := model.NullContentType{}
+	if result.ContentType.Valid {
+		ct = model.NullContentType{Valid: true, ContentType: result.ContentType.ContentType}
+	} else if len(episodes) > 0 || result.Date.IsValid() {
+		ct = model.NullContentType{Valid: true, ContentType: model.ContentTypeTvShow}
+	} else if !year.IsNil() {
+		ct = model.NullContentType{Valid: true, ContentType: model.ContentTypeMovie}
 	}
-	if ct != model.ContentTypeTvShow {
+	if ct.ContentType != model.ContentTypeTvShow {
 		episodes = nil
+		if year.IsNil() {
+			title = ""
+			rest = torrent.Name
+		}
 	}
-	attrs := classifier.ContentAttributes{
+	attrs := classification.ContentAttributes{
+		ContentType:   ct,
+		BaseTitle:     model.NullString{Valid: title != "", String: title},
+		Date:          model.Date{Year: year},
 		Episodes:      episodes,
 		Languages:     model.InferLanguages(rest),
 		LanguageMulti: multiRegex.MatchString(rest),
 	}
 	attrs.InferVideoAttributes(rest)
-	return ct, title, year, attrs, nil
+	return attrs, nil
 }
@@ -0,0 +1,245 @@
+package classifier
+
+import (
+	"errors"
+	"fmt"
+	"github.com/bitmagnet-io/bitmagnet/internal/model"
+)
+
+type HasJsonSchema interface {
+	JsonSchema() JsonSchema
+}
+
+type TypedPayload[T any] interface {
+	HasJsonSchema
+	Unmarshal(ctx compilerContext) (T, error)
+}
+
+type PayloadTransformerFunc[From any, To any] func(From, compilerContext) (To, error)
+
+type payloadTransformer[From any, To any] struct {
+	spec      TypedPayload[From]
+	transform PayloadTransformerFunc[From, To]
+}
+
+func (s payloadTransformer[From, To]) JsonSchema() JsonSchema {
+	return s.spec.JsonSchema()
+}
+
+func (s payloadTransformer[From, To]) Unmarshal(ctx compilerContext) (to To, _ error) {
+	from, err := s.spec.Unmarshal(ctx)
+	if err != nil {
+		return to, err
+	}
+	return s.transform(from, ctx)
+}
+
+type payloadUnion[T any] struct {
+	oneOf []TypedPayload[T]
+}
+
+func (s payloadUnion[T]) JsonSchema() JsonSchema {
+	schemas := make([]any, len(s.oneOf))
+	for i, spec := range s.oneOf {
+		schemas[i] = spec.JsonSchema()
+	}
+	return map[string]any{
+		"oneOf": schemas,
+	}
+}
+
+func (s payloadUnion[T]) Unmarshal(ctx compilerContext) (to T, _ error) {
+	var errs []error
+	for _, def := range s.oneOf {
+		result, err := def.Unmarshal(ctx)
+		if err == nil {
+			return result, nil
+		}
+		errs = append(errs, err)
+	}
+	errs = append(errs, errors.New("no definition matched"))
+	return to, errors.Join(errs...)
+}
+
+type payloadGeneric[T any] struct {
+	jsonSchema map[string]any
+}
+
+func (s payloadGeneric[T]) JsonSchema() JsonSchema {
+	return s.jsonSchema
+}
+
+func (s payloadGeneric[T]) Unmarshal(ctx compilerContext) (to T, err error) {
+	to, ok := ctx.source.(T)
+	if !ok {
+		err = ctx.error(errors.New("not ok"))
+	}
+	return to, err
+}
+
+type payloadStruct[T any] struct {
+	jsonSchema map[string]any
+}
+
+func (s payloadStruct[T]) JsonSchema() JsonSchema {
+	return s.jsonSchema
+}
+
+func (s payloadStruct[T]) Unmarshal(ctx compilerContext) (to T, err error) {
+	return decode[T](ctx)
+}
+
+type payloadLiteral[T comparable] struct {
+	literal     T
+	description string
+}
+
+func (s payloadLiteral[T]) JsonSchema() JsonSchema {
+	schema := map[string]any{
+		"const": s.literal,
+	}
+	if s.description != "" {
+		schema["description"] = s.description
+	}
+	return schema
+}
+
+func (s payloadLiteral[T]) Unmarshal(ctx compilerContext) (to T, _ error) {
+	typedPayload, err := decode[T](ctx)
+	if err != nil {
+		return to, err
+	}
+	if typedPayload != s.literal {
+		return to, errors.New("value mismatch")
+	}
+	return typedPayload, nil
+}
+
+type payloadList[T any] struct {
+	itemSpec    TypedPayload[T]
+	description string
+}
+
+func (s payloadList[T]) JsonSchema() JsonSchema {
+	schema := map[string]any{
+		"type":  "array",
+		"items": s.itemSpec.JsonSchema(),
+	}
+	if s.description != "" {
+		schema["description"] = s.description
+	}
+	return schema
+}
+
+func (s payloadList[T]) Unmarshal(ctx compilerContext) (to []T, _ error) {
+	if ctx.source == nil {
+		return nil, nil
+	}
+	rawList, ok := ctx.source.([]any)
+	if !ok {
+		rawList = []any{ctx.source}
+	}
+	to = make([]T, len(rawList))
+	for i, rawItem := range rawList {
+		item, err := s.itemSpec.Unmarshal(ctx.child(numericPathPart(i), rawItem))
+		if err != nil {
+			return to, err
+		}
+		to[i] = item
+	}
+	return to, nil
+}
+
+type payloadSingleKeyValue[T any] struct {
+	key         string
+	valueSpec   TypedPayload[T]
+	description string
+}
+
+func (s payloadSingleKeyValue[T]) JsonSchema() JsonSchema {
+	schema := map[string]any{
+		"type": "object",
+		"properties": map[string]any{
+			s.key: s.valueSpec.JsonSchema(),
+		},
+		"required":             []string{s.key},
+		"additionalProperties": false,
+	}
+	if s.description != "" {
+		schema["description"] = s.description
+	}
+	return schema
+}
+
+func (s payloadSingleKeyValue[T]) Unmarshal(ctx compilerContext) (to T, _ error) {
+	rawMap, err := decode[map[string]any](ctx)
+	if err != nil {
+		return to, err
+	}
+	if len(rawMap) != 1 {
+		return to, ctx.error(errors.New("expected a single key"))
+	}
+	rawValue, ok := rawMap[s.key]
+	if !ok {
+		return to, ctx.error(fmt.Errorf("missing expected key: '%s' %+v", s.key, rawMap))
+	}
+	value, err := s.valueSpec.Unmarshal(ctx.child(s.key, rawValue))
+	if err != nil {
+		return to, err
+	}
+	return value, nil
+}
+
+type payloadEnum[T string] struct {
+	values []T
+}
+
+func (s payloadEnum[T]) JsonSchema() JsonSchema {
+	return map[string]any{
+		"type": "string",
+		"enum": s.values,
+	}
+}
+
+func (s payloadEnum[T]) Unmarshal(ctx compilerContext) (to T, _ error) {
+	value, err := decode[T](ctx)
+	if err != nil {
+		return to, ctx.error(err)
+	}
+	for _, validValue := range s.values {
+		if value == validValue {
+			return value, nil
+		}
+	}
+	return to, ctx.error(fmt.Errorf("value not in enum: '%s'", value))
+}
+
+type payloadMustSucceed[T any] struct {
+	payload TypedPayload[T]
+}
+
+func (p payloadMustSucceed[T]) Unmarshal(ctx compilerContext) (t T, _ error) {
+	result, err := p.payload.Unmarshal(ctx)
+	if err != nil {
+		return t, ctx.fatal(err)
+	}
+	return result, nil
+}
+
+func (p payloadMustSucceed[T]) JsonSchema() JsonSchema {
+	return p.payload.JsonSchema()
+}
+
+var contentTypePayloadSpec = payloadTransformer[string, model.NullContentType]{
+	spec: payloadEnum[string]{append(model.ContentTypeNames(), "unknown")},
+	transform: func(str string, _ compilerContext) (model.NullContentType, error) {
+		if str == "unknown" {
+			return model.NullContentType{}, nil
+		}
+		contentType, err := model.ParseContentType(str)
+		if err != nil {
+			return model.NullContentType{}, err
+		}
+		return model.NullContentType{ContentType: contentType, Valid: true}, nil
+	},
+}
@@ -0,0 +1,49 @@
+package classifier
+
+import (
+	"context"
+	"fmt"
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+	"github.com/bitmagnet-io/bitmagnet/internal/model"
+	"github.com/bitmagnet-io/bitmagnet/internal/protobuf"
+)
+
+type runner struct {
+	dependencies
+	workflows map[string]action
+}
+
+func (r runner) Run(ctx context.Context, workflow string, t model.Torrent) (classification.Result, error) {
+	w, ok := r.workflows[workflow]
+	if !ok {
+		return classification.Result{}, fmt.Errorf("workflow not found: %s", workflow)
+	}
+	cl := classification.Result{}
+	if !t.Hint.IsNil() {
+		cl.ApplyHint(t.Hint)
+	}
+	// if possible, attach the existing content to the result to save some work:
+	if !t.Hint.IsNil() && t.Hint.ContentSource.Valid {
+		for _, tc := range t.Contents {
+			if tc.ContentType.Valid &&
+				tc.ContentType.ContentType == t.Hint.ContentType &&
+				tc.ContentSource.Valid &&
+				tc.ContentSource.String == t.Hint.ContentSource.String &&
+				tc.ContentID.String == t.Hint.ContentID.String &&
+				tc.Content.Source == tc.ContentSource.String {
+				content := tc.Content
+				cl.AttachContent(&content)
+				break
+			}
+		}
+	}
+	exCtx := executionContext{
+		Context:      ctx,
+		dependencies: r.dependencies,
+		workflows:    r.workflows,
+		torrent:      t,
+		torrentPb:    protobuf.NewTorrent(t),
+		result:       cl,
+	}
+	return w.run(exCtx)
+}
@@ -0,0 +1,90 @@
+package classifier
+
+import (
+	"context"
+	"fmt"
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+	"github.com/bitmagnet-io/bitmagnet/internal/database/query"
+	"github.com/bitmagnet-io/bitmagnet/internal/database/search"
+	"github.com/bitmagnet-io/bitmagnet/internal/model"
+)
+
+type LocalSearch interface {
+	ContentById(context.Context, model.ContentRef) (model.Content, error)
+	ContentBySearch(context.Context, model.ContentType, string, model.Year) (model.Content, error)
+}
+
+type localSearch struct {
+	search.Search
+}
+
+func (l localSearch) ContentById(ctx context.Context, ref model.ContentRef) (model.Content, error) {
+	options := []query.Option{
+		query.Where(
+			search.ContentTypeCriteria(ref.Type),
+		),
+		search.ContentDefaultPreload(),
+		search.ContentDefaultHydrate(),
+		query.Limit(1),
+	}
+	if ref.Source == "tmdb" {
+		options = append(options, query.Where(
+			search.ContentCanonicalIdentifierCriteria(model.ContentRef{
+				Source: ref.Source,
+				ID:     ref.ID,
+			}),
+		))
+	} else {
+		options = append(options, query.Where(
+			search.ContentAlternativeIdentifierCriteria(model.ContentRef{
+				Source: ref.Source,
+				ID:     ref.ID,
+			}),
+		))
+	}
+	result, err := l.Search.Content(ctx, options...)
+	if err != nil {
+		return model.Content{}, err
+	}
+	if len(result.Items) == 0 {
+		return model.Content{}, classification.ErrUnmatched
+	}
+	return result.Items[0].Content, nil
+}
+
+func (l localSearch) ContentBySearch(ctx context.Context, ct model.ContentType, baseTitle string, year model.Year) (model.Content, error) {
+	options := []query.Option{
+		query.Where(search.ContentTypeCriteria(ct)),
+		query.QueryString(fmt.Sprintf("\"%s\"", baseTitle)),
+		query.OrderByQueryStringRank(),
+		query.Limit(5),
+		search.ContentDefaultPreload(),
+		search.ContentDefaultHydrate(),
+	}
+	if !year.IsNil() {
+		options = append(options, query.Where(search.ContentReleaseDateCriteria(model.NewDateRangeFromYear(year))))
+	}
+	result, searchErr := l.Search.Content(
+		ctx,
+		options...,
+	)
+	if searchErr != nil {
+		return model.Content{}, searchErr
+	}
+	var content *model.Content
+	for _, item := range result.Items {
+		candidates := []string{item.Title}
+		if item.OriginalTitle.Valid {
+			candidates = append(candidates, item.OriginalTitle.String)
+		}
+		if levenshteinCheck(baseTitle, candidates, levenshteinThreshold) {
+			c := item.Content
+			content = &c
+			break
+		}
+	}
+	if content == nil {
+		return model.Content{}, classification.ErrUnmatched
+	}
+	return *content, nil
+}
@@ -0,0 +1,83 @@
+package classifier
+
+type Source struct {
+	Schema          string          `json:"$schema,omitempty" yaml:"$schema,omitempty"`
+	Workflows       workflowSources `json:"workflows"`
+	FlagDefinitions flagDefinitions `json:"flag_definitions"`
+	Flags           flags           `json:"flags"`
+	Keywords        keywordGroups   `json:"keywords"`
+	Extensions      extensionGroups `json:"extensions"`
+}
+
+func (s Source) merge(other Source) (Source, error) {
+	flagDefs, err := s.FlagDefinitions.merge(other.FlagDefinitions)
+	if err != nil {
+		return Source{}, err
+	}
+	return Source{
+		FlagDefinitions: flagDefs,
+		Flags:           s.Flags.merge(other.Flags),
+		Keywords:        s.Keywords.merge(other.Keywords),
+		Extensions:      s.Extensions.merge(other.Extensions),
+		Workflows:       s.Workflows.merge(other.Workflows),
+	}, nil
+}
+
+func (s Source) workflowNames() map[string]struct{} {
+	result := make(map[string]struct{})
+	for k := range s.Workflows {
+		result[k] = struct{}{}
+	}
+	return result
+}
+
+type keywordGroups map[string][]string
+
+func (g keywordGroups) merge(other keywordGroups) keywordGroups {
+	result := make(keywordGroups)
+	for k, v := range g {
+		if _, ok := other[k]; ok {
+			result[k] = append(v, other[k]...)
+		} else {
+			result[k] = v
+		}
+	}
+	for k, v := range other {
+		if _, ok := result[k]; !ok {
+			result[k] = v
+		}
+	}
+	return result
+}
+
+type extensionGroups map[string][]string
+
+func (g extensionGroups) merge(other extensionGroups) extensionGroups {
+	result := make(extensionGroups)
+	for k, v := range g {
+		if _, ok := other[k]; ok {
+			result[k] = append(v, other[k]...)
+		} else {
+			result[k] = v
+		}
+	}
+	for k, v := range other {
+		if _, ok := result[k]; !ok {
+			result[k] = v
+		}
+	}
+	return result
+}
+
+type workflowSources map[string]any
+
+func (s workflowSources) merge(other workflowSources) workflowSources {
+	result := make(workflowSources)
+	for k, v := range s {
+		result[k] = v
+	}
+	for k, v := range other {
+		result[k] = v
+	}
+	return result
+}
@@ -0,0 +1,8 @@
+package classifier
+
+import (
+	_ "embed"
+)
+
+//go:embed classifier.core.yaml
+var classifierCoreYaml []byte
@@ -0,0 +1,128 @@
+package classifier
+
+import (
+	"github.com/adrg/xdg"
+	"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
+	"gopkg.in/yaml.v3"
+	"os"
+)
+
+func newSourceProvider(config Config, tmdbConfig tmdb.Config) sourceProvider {
+	return mergeSourceProvider{
+		providers: []sourceProvider{
+			yamlSourceProvider{rawSourceProvider: coreSourceProvider{}},
+			yamlSourceProvider{rawSourceProvider: xdgSourceProvider{}},
+			yamlSourceProvider{rawSourceProvider: cwdSourceProvider{}},
+			configSourceProvider{
+				config:      config,
+				tmdbEnabled: tmdbConfig.Enabled,
+			},
+		},
+	}
+}
+
+type sourceProvider interface {
+	source() (Source, error)
+}
+
+type mergeSourceProvider struct {
+	providers []sourceProvider
+}
+
+func (m mergeSourceProvider) source() (Source, error) {
+	source := Source{}
+	for _, p := range m.providers {
+		s, err := p.source()
+		if err != nil {
+			return source, err
+		}
+		if merged, err := source.merge(s); err != nil {
+			return source, err
+		} else {
+			source = merged
+		}
+	}
+	return source, nil
+}
+
+type rawSourceProvider interface {
+	source() ([]byte, error)
+}
+
+type yamlSourceProvider struct {
+	rawSourceProvider
+}
+
+func (y yamlSourceProvider) source() (Source, error) {
+	raw, err := y.rawSourceProvider.source()
+	if err != nil {
+		return Source{}, err
+	}
+	rawWorkflow := make(map[string]interface{})
+	parseErr := yaml.Unmarshal(raw, &rawWorkflow)
+	if parseErr != nil {
+		return Source{}, parseErr
+	}
+	src := Source{}
+	decoder, decoderErr := newDecoder(&src)
+	if decoderErr != nil {
+		return Source{}, decoderErr
+	}
+	if decodeErr := decoder.Decode(rawWorkflow); decodeErr != nil {
+		return Source{}, decodeErr
+	}
+	return src, nil
+}
+
+type coreSourceProvider struct{}
+
+func (c coreSourceProvider) source() ([]byte, error) {
+	return classifierCoreYaml, nil
+}
+
+type xdgSourceProvider struct{}
+
+func (_ xdgSourceProvider) source() ([]byte, error) {
+	if path, pathErr := xdg.ConfigFile("bitmagnet/classifier.yml"); pathErr == nil {
+		if bytes, readErr := os.ReadFile(path); readErr == nil {
+			return bytes, nil
+		} else if !os.IsNotExist(readErr) {
+			return nil, readErr
+		}
+	}
+	return []byte{'{', '}'}, nil
+}
+
+type cwdSourceProvider struct{}
+
+func (_ cwdSourceProvider) source() ([]byte, error) {
+	if bytes, readErr := os.ReadFile("./classifier.yml"); readErr == nil {
+		return bytes, nil
+	} else if !os.IsNotExist(readErr) {
+		return nil, readErr
+	}
+	return []byte{'{', '}'}, nil
+}
+
+type configSourceProvider struct {
+	config      Config
+	tmdbEnabled bool
+}
+
+func (c configSourceProvider) source() (Source, error) {
+	fs := make(flags)
+	for k, v := range c.config.Flags {
+		fs[k] = v
+	}
+	if c.config.DeleteXxx {
+		fs["delete_xxx"] = true
+	}
+	if !c.tmdbEnabled {
+		fs["tmdb_enabled"] = false
+	}
+	return Source{
+		Keywords:   c.config.Keywords,
+		Extensions: c.config.Extensions,
+		Flags:      fs,
+	}, nil
+}
@@ -0,0 +1,105 @@
+package classifier
+
+import (
+	"errors"
+	"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
+	"github.com/bitmagnet-io/bitmagnet/internal/model"
+	"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
+)
+
+func (c executionContext) tmdb_searchMovie(title string, year model.Year) (model.Content, error) {
+	req := tmdb.SearchMovieRequest{
+		Query:        title,
+		IncludeAdult: true,
+	}
+	if !year.IsNil() {
+		req.Year = year
+	}
+	searchResult, searchErr := c.tmdbClient.SearchMovie(c.Context, req)
+	if searchErr != nil {
+		return model.Content{}, searchErr
+	}
+	for _, item := range searchResult.Results {
+		if levenshteinCheck(title, []string{item.Title, item.OriginalTitle}, levenshteinThreshold) {
+			return c.tmdb_getMovieByTmbdId(item.ID)
+		}
+	}
+	return model.Content{}, classification.ErrUnmatched
+}
+
+func (c executionContext) tmdb_searchTvShow(title string, year model.Year) (model.Content, error) {
+	req := tmdb.SearchTvRequest{
+		Query:        title,
+		IncludeAdult: true,
+	}
+	if !year.IsNil() {
+		req.FirstAirDateYear = year
+	}
+	searchResult, searchErr := c.tmdbClient.SearchTv(c.Context, req)
+	if searchErr != nil {
+		return model.Content{}, searchErr
+	}
+	for _, item := range searchResult.Results {
+		if levenshteinCheck(title, []string{item.Name, item.OriginalName}, levenshteinThreshold) {
+			return c.tmdb_getTvShowByTmbdId(item.ID)
+		}
+	}
+	return model.Content{}, classification.ErrUnmatched
+}
+
+func (c executionContext) tmdb_getMovieByTmbdId(id int64) (movie model.Content, err error) {
+	d, getDetailsErr := c.tmdbClient.MovieDetails(c.Context, tmdb.MovieDetailsRequest{
+		ID: id,
+	})
+	if getDetailsErr != nil {
+		if errors.Is(getDetailsErr, tmdb.ErrNotFound) {
+			getDetailsErr = classification.ErrUnmatched
+		}
+		err = getDetailsErr
+		return
+	}
+	return tmdb.MovieDetailsToMovieModel(d)
+}
+
+func (c executionContext) tmdb_getTvShowByTmbdId(id int64) (movie model.Content, err error) {
+	d, getDetailsErr := c.tmdbClient.TvDetails(c.Context, tmdb.TvDetailsRequest{
+		SeriesID:         id,
+		AppendToResponse: []string{"external_ids"},
+	})
+	if getDetailsErr != nil {
+		if errors.Is(getDetailsErr, tmdb.ErrNotFound) {
+			getDetailsErr = classification.ErrUnmatched
+		}
+		err = getDetailsErr
+		return
+	}
+	return tmdb.TvShowDetailsToTvShowModel(d)
+}
+
+func (c executionContext) tmdb_getTmdbIdByExternalId(ref model.ContentRef) (int64, error) {
+	externalSource, externalId, externalSourceErr := tmdb.ExternalSource(ref)
+	if externalSourceErr != nil {
+		return 0, externalSourceErr
+	}
+	byIdResult, byIdErr := c.tmdbClient.FindByID(c.Context, tmdb.FindByIDRequest{
+		ExternalSource: externalSource,
+		ExternalID:     externalId,
+	})
+	if byIdErr != nil {
+		return 0, byIdErr
+	}
+	switch ref.Type {
+	case model.ContentTypeMovie, model.ContentTypeXxx:
+		if len(byIdResult.MovieResults) == 0 {
+			return 0, classification.ErrUnmatched
+		}
+		return byIdResult.MovieResults[0].ID, nil
+	case model.ContentTypeTvShow:
+		if len(byIdResult.TvResults) == 0 {
+			return 0, classification.ErrUnmatched
+		}
+		return byIdResult.TvResults[0].ID, nil
+	default:
+		return 0, classification.ErrUnmatched
+	}
+}
@@ -1,10 +1,12 @@
-package tmdb
+package classifier

 import (
 	"github.com/agnivade/levenshtein"
 	"github.com/bitmagnet-io/bitmagnet/internal/regex"
 )

+const levenshteinThreshold = 5
+
 func levenshteinCheck(target string, candidates []string, threshold uint) bool {
 	normTarget := regex.NormalizeString(target)
 	triedCandidates := make(map[string]struct{}, len(candidates))
@@ -1,94 +0,0 @@
-package video
-
-import (
-	"context"
-	"errors"
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier"
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier/video/tmdb"
-	"github.com/bitmagnet-io/bitmagnet/internal/model"
-)
-
-type videoClassifier struct {
-	tmdbClient tmdb.Client
-}
-
-func (c videoClassifier) Key() string {
-	return "video"
-}
-
-func (c videoClassifier) Priority() int {
-	return 1
-}
-
-func (c videoClassifier) Classify(ctx context.Context, t model.Torrent) (classifier.Classification, error) {
-	if hasVideo := t.HasFileType(model.FileTypeVideo); hasVideo.Valid && !hasVideo.Bool {
-		return classifier.Classification{}, classifier.ErrNoMatch
-	}
-	if !t.Hint.IsNil() && !t.Hint.ContentType.IsVideo() {
-		return classifier.Classification{}, classifier.ErrNoMatch
-	}
-	ct, title, year, attrs, err := ParseContent(t.Hint.NullContentType(), t.Name)
-	if err != nil {
-		return classifier.Classification{}, err
-	}
-	ref := t.Hint.ContentRef()
-	if t.Hint.Title.Valid {
-		title = t.Hint.Title.String
-	}
-	cl := classifier.Classification{
-		ContentAttributes: attrs,
-	}
-	if content, err := c.resolveContent(ctx, ct, ref, title, year); err == nil {
-		cl.Content = &content
-	} else if !errors.Is(err, classifier.ErrNoMatch) {
-		return classifier.Classification{}, err
-	}
-	cl.ApplyHint(t.Hint)
-	if cl.Content != nil {
-		cl.ContentType = model.NewNullContentType(cl.Content.Type)
-		if cl.Content.OriginalLanguage.Valid {
-			if len(cl.Languages) == 0 || cl.LanguageMulti {
-				if cl.Languages == nil {
-					cl.Languages = make(model.Languages)
-				}
-				cl.Languages[cl.Content.OriginalLanguage.Language] = struct{}{}
-			}
-		}
-	}
-	if !cl.ContentType.Valid {
-		return classifier.Classification{}, classifier.ErrNoMatch
-	}
-	return cl, nil
-}
-
-func (c videoClassifier) resolveContent(
-	ctx context.Context,
-	ct model.ContentType,
-	ref model.Maybe[model.ContentRef],
-	title string,
-	year model.Year,
-) (model.Content, error) {
-	if ct == model.ContentTypeMovie || ct == model.ContentTypeXxx {
-		if ref.Valid {
-			return c.tmdbClient.GetMovieByExternalId(ctx, ref.Val.Source, ref.Val.ID)
-		}
-		return c.tmdbClient.SearchMovie(ctx, tmdb.SearchMovieParams{
-			Title:                title,
-			Year:                 year,
-			IncludeAdult:         true,
-			LevenshteinThreshold: 5,
-		})
-	}
-	if ct == model.ContentTypeTvShow {
-		if ref.Valid {
-			return c.tmdbClient.GetTvShowByExternalId(ctx, ref.Val.Source, ref.Val.ID)
-		}
-		return c.tmdbClient.SearchTvShow(ctx, tmdb.SearchTvShowParams{
-			Name:                 title,
-			Year:                 year,
-			IncludeAdult:         true,
-			LevenshteinThreshold: 5,
-		})
-	}
-	return model.Content{}, classifier.ErrNoMatch
-}
@@ -1,32 +0,0 @@
-package video
-
-import (
-	"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier"
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier/video/tmdb"
-	"go.uber.org/fx"
-)
-
-type Params struct {
-	fx.In
-	TmdbClient lazy.Lazy[tmdb.Client]
-}
-
-type Result struct {
-	fx.Out
-	Classifier lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
-}
-
-func New(p Params) Result {
-	return Result{
-		Classifier: lazy.New(func() (classifier.SubClassifier, error) {
-			tmdbClient, err := p.TmdbClient.Get()
-			if err != nil {
-				return nil, err
-			}
-			return videoClassifier{
-				tmdbClient: tmdbClient,
-			}, nil
-		}),
-	}
-}
@@ -1,330 +0,0 @@
-package video
-
-import (
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier"
-	"github.com/bitmagnet-io/bitmagnet/internal/model"
-	"github.com/stretchr/testify/assert"
-	"testing"
-)
-
-func TestParse(t *testing.T) {
-
-	type output struct {
-		contentType model.ContentType
-		title       string
-		releaseYear model.Year
-		attrs       classifier.ContentAttributes
-	}
-
-	type parseTest struct {
-		contentType    model.NullContentType
-		inputString    string
-		expectedOutput output
-	}
-
-	var parseTests = []parseTest{
-		{
-			inputString: "Mission.Impossible",
-			expectedOutput: output{
-				contentType: model.ContentTypeMovie,
-				title:       "Mission Impossible",
-			},
-		},
-		{
-			inputString: "Mission.Impossible.2023",
-			expectedOutput: output{
-				contentType: model.ContentTypeMovie,
-				title:       "Mission Impossible",
-				releaseYear: 2023,
-			},
-		},
-		{
-			inputString: "Mission.Impossible.2023.1080p.BluRay.x264-SPARKS",
-			expectedOutput: output{
-				contentType: model.ContentTypeMovie,
-				title:       "Mission Impossible",
-				releaseYear: 2023,
-				attrs: classifier.ContentAttributes{
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
-					VideoSource:     model.NewNullVideoSource(model.VideoSourceBluRay),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecX264),
-					ReleaseGroup: model.NullString{
-						String: "SPARKS",
-						Valid:  true,
-					},
-				},
-			},
-		},
-		{
-			inputString: "Die.Hard.(With.A.Vengeance!).And.A.Suffix.2023.1080p.BluRay.x264-SPARKS",
-			expectedOutput: output{
-				contentType: model.ContentTypeMovie,
-				title:       "Die Hard (With A Vengeance!) And A Suffix",
-				releaseYear: 2023,
-				attrs: classifier.ContentAttributes{
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
-					VideoSource:     model.NewNullVideoSource(model.VideoSourceBluRay),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecX264),
-					ReleaseGroup: model.NullString{
-						String: "SPARKS",
-						Valid:  true,
-					},
-				},
-			},
-		},
-		{
-			inputString: "The.Movie.from.U.N.C.L.E.2015.1080p.BluRay.x264-SPARKS",
-			expectedOutput: output{
-				contentType: model.ContentTypeMovie,
-				title:       "The Movie from U.N.C.L.E.",
-				releaseYear: 2015,
-				attrs: classifier.ContentAttributes{
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
-					VideoSource:     model.NewNullVideoSource(model.VideoSourceBluRay),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecX264),
-					ReleaseGroup: model.NullString{
-						String: "SPARKS",
-						Valid:  true,
-					},
-				},
-			},
-		},
-		{
-			inputString: "1776.1979.EXTENDED.HD.BluRay.X264-AMIABLE",
-			expectedOutput: output{
-				contentType: model.ContentTypeMovie,
-				title:       "1776",
-				releaseYear: 1979,
-				attrs: classifier.ContentAttributes{
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV720p),
-					VideoSource:     model.NewNullVideoSource(model.VideoSourceBluRay),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecX264),
-					ReleaseGroup: model.NullString{
-						String: "AMIABLE",
-						Valid:  true,
-					},
-				},
-			},
-		},
-		{
-			inputString: "MY MOVIE (2016) [R][Action, Horror][720p.WEB-DL.AVC.8Bit.6ch.AC3].mkv",
-			expectedOutput: output{
-				contentType: model.ContentTypeMovie,
-				title:       "MY MOVIE",
-				releaseYear: 2016,
-				attrs: classifier.ContentAttributes{
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV720p),
-					VideoSource:     model.NewNullVideoSource(model.VideoSourceWEBDL),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecH264),
-				},
-			},
-		},
-		{
-			inputString: "R.I.P.D.2013.720p.BluRay.x264-SPARKS",
-			expectedOutput: output{
-				contentType: model.ContentTypeMovie,
-				title:       "R.I.P.D.",
-				releaseYear: 2013,
-				attrs: classifier.ContentAttributes{
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV720p),
-					VideoSource:     model.NewNullVideoSource(model.VideoSourceBluRay),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecX264),
-					ReleaseGroup: model.NullString{
-						String: "SPARKS",
-						Valid:  true,
-					},
-				},
-			},
-		},
-		{
-			inputString: "This Is A Movie (1999) [IMDB #] <Genre, Genre, Genre> {ACTORS} !DIRECTOR +MORE_SILLY_STUFF_NO_ONE_NEEDS ?",
-			expectedOutput: output{
-				contentType: model.ContentTypeMovie,
-				title:       "This Is A Movie",
-				releaseYear: 1999,
-			},
-		},
-		{
-			inputString: "We Are the Movie!.2013.720p.H264.mkv",
-			expectedOutput: output{
-				contentType: model.ContentTypeMovie,
-				title:       "We Are the Movie!",
-				releaseYear: 2013,
-				attrs: classifier.ContentAttributes{
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV720p),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecH264),
-				},
-			},
-		},
-		{
-			inputString: "[ example.com ] We Are the Movie!.2013.720p.H264.mkv",
-			expectedOutput: output{
-				contentType: model.ContentTypeMovie,
-				title:       "We Are the Movie!",
-				releaseYear: 2013,
-				attrs: classifier.ContentAttributes{
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV720p),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecH264),
-				},
-			},
-		},
-		{
-			inputString: "【 example.com 】We Are the Movie!.2013.720p.H264.mkv",
-			expectedOutput: output{
-				contentType: model.ContentTypeMovie,
-				title:       "We Are the Movie!",
-				releaseYear: 2013,
-				attrs: classifier.ContentAttributes{
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV720p),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecH264),
-				},
-			},
-		},
-		{
-			inputString: "Маша и Медведь в кино-12 месяцев.2022.WEBRip.1080p_от New-Team.mkv",
-			expectedOutput: output{
-				contentType: model.ContentTypeMovie,
-				title:       "Маша и Медведь в кино-12 месяцев",
-				releaseYear: 2022,
-				attrs: classifier.ContentAttributes{
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
-					VideoSource:     model.NewNullVideoSource(model.VideoSourceWEBRip),
-					//ReleaseGroup: "New-Team",
-				},
-			},
-		},
-		{
-			inputString: "The.Series.name.S04E08.1080p.WEB.h264-GRP[eztv.re].mkv",
-			expectedOutput: output{
-				contentType: model.ContentTypeTvShow,
-				title:       "The Series name",
-				attrs: classifier.ContentAttributes{
-					Episodes:        make(model.Episodes).AddEpisode(4, 8),
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
-					VideoSource:     model.NewNullVideoSource(model.VideoSourceWEBRip),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecH264),
-					ReleaseGroup: model.NullString{
-						String: "GRP",
-						Valid:  true,
-					},
-				},
-			},
-		},
-		{
-			inputString: "The.Series.name.S03-5.1080p.WEB.h264-GRP[eztv.re].mkv",
-			expectedOutput: output{
-				contentType: model.ContentTypeTvShow,
-				title:       "The Series name",
-				attrs: classifier.ContentAttributes{
-					Episodes:        make(model.Episodes).AddSeason(3).AddSeason(4).AddSeason(5),
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
-					VideoSource:     model.NewNullVideoSource(model.VideoSourceWEBRip),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecH264),
-					ReleaseGroup: model.NullString{
-						String: "GRP",
-						Valid:  true,
-					},
-				},
-			},
-		},
-		{
-			inputString: "The.Series.name.S03,4,5,6.1080p.WEB.h264-GRP[eztv.re].mkv",
-			expectedOutput: output{
-				contentType: model.ContentTypeTvShow,
-				title:       "The Series name",
-				attrs: classifier.ContentAttributes{
-					Episodes:        make(model.Episodes).AddSeason(3).AddSeason(4).AddSeason(5).AddSeason(6),
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
-					VideoSource:     model.NewNullVideoSource(model.VideoSourceWEBRip),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecH264),
-					ReleaseGroup: model.NullString{
-						String: "GRP",
-						Valid:  true,
-					},
-				},
-			},
-		},
-		{
-			inputString: "The.Series.name.S04E03-5.1080p.WEB.h264-GRP[eztv.re].mkv",
-			expectedOutput: output{
-				contentType: model.ContentTypeTvShow,
-				title:       "The Series name",
-				attrs: classifier.ContentAttributes{
-					Episodes:        make(model.Episodes).AddEpisode(4, 3).AddEpisode(4, 4).AddEpisode(4, 5),
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
-					VideoSource:     model.NewNullVideoSource(model.VideoSourceWEBRip),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecH264),
-					ReleaseGroup: model.NullString{
-						String: "GRP",
-						Valid:  true,
-					},
-				},
-			},
-		},
-		{
-			inputString: "The.Series.name.S04E03,4,5.1080p.WEB.h264-GRP[eztv.re].mkv",
-			expectedOutput: output{
-				contentType: model.ContentTypeTvShow,
-				title:       "The Series name",
-				attrs: classifier.ContentAttributes{
-					Episodes:        make(model.Episodes).AddEpisode(4, 3).AddEpisode(4, 4).AddEpisode(4, 5),
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
-					VideoSource:     model.NewNullVideoSource(model.VideoSourceWEBRip),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecH264),
-					ReleaseGroup: model.NullString{
-						String: "GRP",
-						Valid:  true,
-					},
-				},
-			},
-		},
-		{
-			inputString: "The.Series.name.1x02.1080p.WEB.h264-GRP[eztv.re].mkv",
-			expectedOutput: output{
-				contentType: model.ContentTypeTvShow,
-				title:       "The Series name",
-				attrs: classifier.ContentAttributes{
-					Episodes:        make(model.Episodes).AddEpisode(1, 2),
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
-					VideoSource:     model.NewNullVideoSource(model.VideoSourceWEBRip),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecH264),
-					ReleaseGroup: model.NullString{
-						String: "GRP",
-						Valid:  true,
-					},
-				},
-			},
-		},
-		{
-			inputString: "The.Series.name.1x01-03.1080p.WEB.h264-GRP[eztv.re].mkv",
-			expectedOutput: output{
-				contentType: model.ContentTypeTvShow,
-				title:       "The Series name",
-				attrs: classifier.ContentAttributes{
-					Episodes:        make(model.Episodes).AddEpisode(1, 1).AddEpisode(1, 2).AddEpisode(1, 3),
-					VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
-					VideoSource:     model.NewNullVideoSource(model.VideoSourceWEBRip),
-					VideoCodec:      model.NewNullVideoCodec(model.VideoCodecH264),
-					ReleaseGroup: model.NullString{
-						String: "GRP",
-						Valid:  true,
-					},
-				},
-			},
-		},
-	}
-
-	for _, test := range parseTests {
-		t.Run(test.inputString, func(t *testing.T) {
-			ct, title, year, attrs, err := ParseContent(
-				test.contentType,
-				test.inputString,
-			)
-			assert.NoError(t, err)
-			assert.Equal(t, test.expectedOutput.contentType, ct)
-			assert.Equal(t, test.expectedOutput.title, title)
-			assert.Equal(t, test.expectedOutput.releaseYear, year)
-			assert.Equal(t, test.expectedOutput.attrs, attrs)
-		})
-	}
-}
@@ -1,23 +0,0 @@
-package tmdb
-
-import (
-	"errors"
-	"github.com/bitmagnet-io/bitmagnet/internal/database/search"
-	"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
-)
-
-type Client interface {
-	MovieClient
-	TvShowClient
-}
-
-type client struct {
-	c tmdb.Client
-	s search.Search
-}
-
-const SourceTmdb = "tmdb"
-
-var (
-	ErrUnknownSource = errors.New("unknown source")
-)
@@ -1,40 +0,0 @@
-package tmdb
-
-import (
-	"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
-	"github.com/bitmagnet-io/bitmagnet/internal/database/search"
-	"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
-	"go.uber.org/fx"
-	"go.uber.org/zap"
-)
-
-type Params struct {
-	fx.In
-	Search     lazy.Lazy[search.Search]
-	TmdbClient lazy.Lazy[tmdb.Client]
-	Logger     *zap.SugaredLogger
-}
-
-type Result struct {
-	fx.Out
-	Client lazy.Lazy[Client]
-}
-
-func New(p Params) Result {
-	return Result{
-		Client: lazy.New(func() (Client, error) {
-			s, err := p.Search.Get()
-			if err != nil {
-				return nil, err
-			}
-			c, err := p.TmdbClient.Get()
-			if err != nil {
-				return nil, err
-			}
-			return &client{
-				c: c,
-				s: s,
-			}, nil
-		}),
-	}
-}
@@ -1,263 +0,0 @@
-package tmdb
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier"
-	"github.com/bitmagnet-io/bitmagnet/internal/database/query"
-	"github.com/bitmagnet-io/bitmagnet/internal/database/search"
-	"github.com/bitmagnet-io/bitmagnet/internal/model"
-	"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
-	"strconv"
-)
-
-type MovieClient interface {
-	SearchMovie(ctx context.Context, p SearchMovieParams) (model.Content, error)
-	GetMovieByExternalId(ctx context.Context, source, id string) (model.Content, error)
-}
-
-type SearchMovieParams struct {
-	Title                string
-	Year                 model.Year
-	IncludeAdult         bool
-	LevenshteinThreshold uint
-}
-
-func (c *client) SearchMovie(ctx context.Context, p SearchMovieParams) (movie model.Content, err error) {
-	if localResult, localErr := c.searchMovieLocal(ctx, p); localErr == nil {
-		return localResult, nil
-	} else if !errors.Is(localErr, classifier.ErrNoMatch) {
-		err = localErr
-		return
-	}
-	return c.searchMovieTmdb(ctx, p)
-}
-
-func (c *client) searchMovieLocal(ctx context.Context, p SearchMovieParams) (movie model.Content, err error) {
-	options := []query.Option{
-		query.Where(search.ContentTypeCriteria(model.ContentTypeMovie, model.ContentTypeXxx)),
-		query.QueryString(fmt.Sprintf("\"%s\"", p.Title)),
-		query.OrderByQueryStringRank(),
-		query.Limit(5),
-		search.ContentDefaultPreload(),
-		search.ContentDefaultHydrate(),
-	}
-	if !p.Year.IsNil() {
-		options = append(options, query.Where(search.ContentReleaseDateCriteria(model.NewDateRangeFromYear(p.Year))))
-	}
-	result, searchErr := c.s.Content(
-		ctx,
-		options...,
-	)
-	if searchErr != nil {
-		err = searchErr
-		return
-	}
-	for _, item := range result.Items {
-		candidates := []string{item.Title}
-		if item.OriginalTitle.Valid {
-			candidates = append(candidates, item.OriginalTitle.String)
-		}
-		if levenshteinCheck(p.Title, candidates, p.LevenshteinThreshold) {
-			return item.Content, nil
-		}
-	}
-	err = classifier.ErrNoMatch
-	return
-}
-
-func (c *client) searchMovieTmdb(ctx context.Context, p SearchMovieParams) (model.Content, error) {
-	searchResult, searchErr := c.c.SearchMovie(ctx, tmdb.SearchMovieRequest{
-		Query:        p.Title,
-		IncludeAdult: p.IncludeAdult,
-		Year:         p.Year,
-	})
-	if searchErr != nil {
-		return model.Content{}, searchErr
-	}
-	for _, item := range searchResult.Results {
-		if levenshteinCheck(p.Title, []string{item.Title, item.OriginalTitle}, p.LevenshteinThreshold) {
-			return c.GetMovieByExternalId(ctx, SourceTmdb, strconv.Itoa(int(item.ID)))
-		}
-	}
-	return model.Content{}, classifier.ErrNoMatch
-}
-
-func (c *client) GetMovieByExternalId(ctx context.Context, source, id string) (model.Content, error) {
-	options := []query.Option{
-		query.Where(
-			search.ContentTypeCriteria(model.ContentTypeMovie, model.ContentTypeXxx),
-		),
-		search.ContentDefaultPreload(),
-		search.ContentDefaultHydrate(),
-		query.Limit(1),
-	}
-	if source == SourceTmdb {
-		canonicalResult, canonicalErr := c.s.Content(ctx,
-			append(options, query.Where(
-				search.ContentCanonicalIdentifierCriteria(model.ContentRef{
-					Source: source,
-					ID:     id,
-				}),
-			))...,
-		)
-		if canonicalErr != nil {
-			return model.Content{}, canonicalErr
-		}
-		if len(canonicalResult.Items) > 0 {
-			return canonicalResult.Items[0].Content, nil
-		}
-	} else {
-		alternativeResult, alternativeErr := c.s.Content(ctx,
-			append(options, query.Where(
-				search.ContentAlternativeIdentifierCriteria(model.ContentRef{
-					Source: source,
-					ID:     id,
-				}),
-			))...,
-		)
-		if alternativeErr != nil {
-			return model.Content{}, alternativeErr
-		}
-		if len(alternativeResult.Items) > 0 {
-			return alternativeResult.Items[0].Content, nil
-		}
-	}
-	if source == SourceTmdb {
-		intId, idErr := strconv.Atoi(id)
-		if idErr != nil {
-			return model.Content{}, idErr
-		}
-		return c.getMovieByTmbdId(ctx, intId)
-	}
-	externalSource, externalId, externalSourceErr := getExternalSource(source, id)
-	if externalSourceErr != nil {
-		return model.Content{}, externalSourceErr
-	}
-	byIdResult, byIdErr := c.c.FindByID(ctx, tmdb.FindByIDRequest{
-		ExternalSource: externalSource,
-		ExternalID:     externalId,
-	})
-	if byIdErr != nil {
-		return model.Content{}, byIdErr
-	}
-	if len(byIdResult.MovieResults) == 0 {
-		return model.Content{}, classifier.ErrNoMatch
-	}
-	return c.getMovieByTmbdId(ctx, int(byIdResult.MovieResults[0].ID))
-}
-
-const SourceImdb = "imdb"
-const SourceTvdb = "tvdb"
-
-func getExternalSource(source string, id string) (externalSource string, externalId string, err error) {
-	switch source {
-	case SourceImdb:
-		externalSource = "imdb_id"
-		externalId = id
-	case SourceTvdb:
-		externalSource = "tvdb_id"
-		externalId = id
-	default:
-		err = ErrUnknownSource
-	}
-	return
-}
-
-func (c *client) getMovieByTmbdId(ctx context.Context, id int) (movie model.Content, err error) {
-	d, getDetailsErr := c.c.MovieDetails(ctx, tmdb.MovieDetailsRequest{
-		ID: int64(id),
-	})
-	if getDetailsErr != nil {
-		if errors.Is(getDetailsErr, tmdb.ErrNotFound) {
-			getDetailsErr = classifier.ErrNoMatch
-		}
-		err = getDetailsErr
-		return
-	}
-	return MovieDetailsToMovieModel(d)
-}
-
-func MovieDetailsToMovieModel(details tmdb.MovieDetailsResponse) (movie model.Content, err error) {
-	releaseDate := model.Date{}
-	if details.ReleaseDate != "" {
-		parsedDate, parseDateErr := model.NewDateFromIsoString(details.ReleaseDate)
-		if parseDateErr != nil {
-			err = parseDateErr
-			return
-		}
-		releaseDate = parsedDate
-	}
-	var collections []model.ContentCollection
-	if details.BelongsToCollection.ID != 0 {
-		collections = append(collections, model.ContentCollection{
-			Type:   "franchise",
-			Source: SourceTmdb,
-			ID:     strconv.Itoa(int(details.BelongsToCollection.ID)),
-			Name:   details.BelongsToCollection.Name,
-		})
-	}
-	for _, genre := range details.Genres {
-		collections = append(collections, model.ContentCollection{
-			Type:   "genre",
-			Source: SourceTmdb,
-			ID:     strconv.Itoa(int(genre.ID)),
-			Name:   genre.Name,
-		})
-	}
-	var attributes []model.ContentAttribute
-	if details.IMDbID != "" {
-		attributes = append(attributes, model.ContentAttribute{
-			Source: "imdb",
-			Key:    "id",
-			Value:  details.IMDbID,
-		})
-	}
-	if details.PosterPath != "" {
-		attributes = append(attributes, model.ContentAttribute{
-			Source: "tmdb",
-			Key:    "poster_path",
-			Value:  details.PosterPath,
-		})
-	}
-	if details.BackdropPath != "" {
-		attributes = append(attributes, model.ContentAttribute{
-			Source: "tmdb",
-			Key:    "backdrop_path",
-			Value:  details.BackdropPath,
-		})
-	}
-	releaseYear := releaseDate.Year
-
-	typeVideo := model.ContentTypeMovie
-
-	if details.Adult {
-		typeVideo = model.ContentTypeXxx
-	}
-
-	return model.Content{
-		Type:             typeVideo,
-		Source:           SourceTmdb,
-		ID:               strconv.Itoa(int(details.ID)),
-		Title:            details.Title,
-		ReleaseDate:      releaseDate,
-		ReleaseYear:      releaseYear,
-		Adult:            model.NewNullBool(details.Adult),
-		OriginalLanguage: model.ParseLanguage(details.OriginalLanguage),
-		OriginalTitle:    model.NewNullString(details.OriginalTitle),
-		Overview: model.NullString{
-			String: details.Overview,
-			Valid:  details.Overview != "",
-		},
-		Runtime: model.NullUint16{
-			Uint16: uint16(details.Runtime),
-			Valid:  details.Runtime > 0,
-		},
-		Popularity:  model.NewNullFloat32(details.Popularity),
-		VoteAverage: model.NewNullFloat32(details.VoteAverage),
-		VoteCount:   model.NewNullUint(uint(details.VoteCount)),
-		Collections: collections,
-		Attributes:  attributes,
-	}, nil
-}
@@ -1,242 +0,0 @@
-package tmdb
-
-import (
-	"context"
-	"errors"
-	"fmt"
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier"
-	"github.com/bitmagnet-io/bitmagnet/internal/database/query"
-	"github.com/bitmagnet-io/bitmagnet/internal/database/search"
-	"github.com/bitmagnet-io/bitmagnet/internal/model"
-	"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
-	"strconv"
-)
-
-type TvShowClient interface {
-	SearchTvShow(ctx context.Context, p SearchTvShowParams) (model.Content, error)
-	GetTvShowByExternalId(ctx context.Context, source, id string) (model.Content, error)
-}
-
-type SearchTvShowParams struct {
-	Name                 string
-	Year                 model.Year
-	IncludeAdult         bool
-	LevenshteinThreshold uint
-}
-
-func (c *client) SearchTvShow(ctx context.Context, p SearchTvShowParams) (tvShow model.Content, err error) {
-	if localResult, localErr := c.searchTvShowLocal(ctx, p); localErr == nil {
-		return localResult, nil
-	} else if !errors.Is(localErr, classifier.ErrNoMatch) {
-		err = localErr
-		return
-	}
-	return c.searchTvShowTmdb(ctx, p)
-}
-
-func (c *client) searchTvShowLocal(ctx context.Context, p SearchTvShowParams) (tvShow model.Content, err error) {
-	options := []query.Option{
-		query.Where(search.ContentTypeCriteria(model.ContentTypeTvShow)),
-		query.QueryString(fmt.Sprintf("\"%s\"", p.Name)),
-		query.OrderByQueryStringRank(),
-		query.Limit(5),
-		search.ContentDefaultPreload(),
-		search.ContentDefaultHydrate(),
-	}
-	if !p.Year.IsNil() {
-		options = append(options, query.Where(search.ContentReleaseDateCriteria(model.NewDateRangeFromYear(p.Year))))
-	}
-	result, searchErr := c.s.Content(
-		ctx,
-		options...,
-	)
-	if searchErr != nil {
-		err = searchErr
-		return
-	}
-	for _, item := range result.Items {
-		candidates := []string{item.Title}
-		if item.OriginalTitle.Valid {
-			candidates = append(candidates, item.OriginalTitle.String)
-		}
-		if levenshteinCheck(p.Name, candidates, p.LevenshteinThreshold) {
-			return item.Content, nil
-		}
-	}
-	err = classifier.ErrNoMatch
-	return
-}
-
-func (c *client) searchTvShowTmdb(ctx context.Context, p SearchTvShowParams) (tvShow model.Content, err error) {
-	urlOptions := make(map[string]string)
-	if !p.Year.IsNil() {
-		urlOptions["first_air_date_year"] = strconv.Itoa(int(p.Year))
-	}
-	if p.IncludeAdult {
-		urlOptions["include_adult"] = "true"
-	}
-	searchResult, searchErr := c.c.SearchTv(ctx, tmdb.SearchTvRequest{
-		Query:        p.Name,
-		Year:         p.Year,
-		IncludeAdult: p.IncludeAdult,
-	})
-	if searchErr != nil {
-		err = searchErr
-		return
-	}
-	for _, item := range searchResult.Results {
-		if levenshteinCheck(p.Name, []string{item.Name, item.OriginalName}, p.LevenshteinThreshold) {
-			return c.GetTvShowByExternalId(ctx, SourceTmdb, strconv.Itoa(int(item.ID)))
-		}
-	}
-	err = classifier.ErrNoMatch
-	return
-}
-
-func (c *client) GetTvShowByExternalId(ctx context.Context, source, id string) (tvShow model.Content, err error) {
-	options := []query.Option{
-		search.ContentDefaultPreload(),
-		search.ContentDefaultHydrate(),
-		query.Limit(1),
-	}
-	if source == SourceTmdb {
-		canonicalResult, canonicalErr := c.s.Content(ctx,
-			append(options, query.Where(search.ContentCanonicalIdentifierCriteria(model.ContentRef{
-				Type:   model.ContentTypeTvShow,
-				Source: source,
-				ID:     id,
-			})))...,
-		)
-		if canonicalErr != nil {
-			return model.Content{}, canonicalErr
-		}
-		if len(canonicalResult.Items) > 0 {
-			return canonicalResult.Items[0].Content, nil
-		}
-	} else {
-		alternativeResult, alternativeErr := c.s.Content(ctx,
-			append(options, query.Where(search.ContentAlternativeIdentifierCriteria(model.ContentRef{
-				Type:   model.ContentTypeTvShow,
-				Source: source,
-				ID:     id,
-			})))...,
-		)
-		if alternativeErr != nil {
-			return model.Content{}, alternativeErr
-		}
-		if len(alternativeResult.Items) > 0 {
-			return alternativeResult.Items[0].Content, nil
-		}
-	}
-	if source == SourceTmdb {
-		intId, idErr := strconv.Atoi(id)
-		if idErr != nil {
-			err = idErr
-			return
-		}
-		return c.getTvShowByTmdbId(ctx, intId)
-	}
-	externalSource, externalId, externalSourceErr := getExternalSource(source, id)
-	if externalSourceErr != nil {
-		err = externalSourceErr
-		return
-	}
-	byIdResult, byIdErr := c.c.FindByID(ctx, tmdb.FindByIDRequest{
-		ExternalSource: externalSource,
-		ExternalID:     externalId,
-	})
-	if byIdErr != nil {
-		err = byIdErr
-		return
-	}
-	if len(byIdResult.TvResults) == 0 {
-		err = classifier.ErrNoMatch
-		return
-	}
-	return c.getTvShowByTmdbId(ctx, int(byIdResult.TvResults[0].ID))
-}
-
-func (c *client) getTvShowByTmdbId(ctx context.Context, id int) (tvShow model.Content, err error) {
-	d, getDetailsErr := c.c.TvDetails(ctx, tmdb.TvDetailsRequest{
-		SeriesID:         int64(id),
-		AppendToResponse: []string{"external_ids"},
-	})
-	if getDetailsErr != nil {
-		if errors.Is(getDetailsErr, tmdb.ErrNotFound) {
-			getDetailsErr = classifier.ErrNoMatch
-		}
-		err = getDetailsErr
-		return
-	}
-	return TvShowDetailsToTvShowModel(d)
-}
-
-func TvShowDetailsToTvShowModel(details tmdb.TvDetailsResponse) (movie model.Content, err error) {
-	firstAirDate := model.Date{}
-	if details.FirstAirDate != "" {
-		parsedDate, parseDateErr := model.NewDateFromIsoString(details.FirstAirDate)
-		if parseDateErr != nil {
-			err = parseDateErr
-			return
-		}
-		firstAirDate = parsedDate
-	}
-	var collections []model.ContentCollection
-	for _, genre := range details.Genres {
-		collections = append(collections, model.ContentCollection{
-			Type:   "genre",
-			Source: SourceTmdb,
-			ID:     strconv.Itoa(int(genre.ID)),
-			Name:   genre.Name,
-		})
-	}
-	var attributes []model.ContentAttribute
-	if details.ExternalIDs.IMDbID != "" {
-		attributes = append(attributes, model.ContentAttribute{
-			Source: "imdb",
-			Key:    "id",
-			Value:  details.ExternalIDs.IMDbID,
-		})
-	}
-	if details.ExternalIDs.TVDBID != 0 {
-		attributes = append(attributes, model.ContentAttribute{
-			Source: "tvdb",
-			Key:    "id",
-			Value:  strconv.Itoa(int(details.ExternalIDs.TVDBID)),
-		})
-	}
-	releaseYear := firstAirDate.Year
-	if details.PosterPath != "" {
-		attributes = append(attributes, model.ContentAttribute{
-			Source: "tmdb",
-			Key:    "poster_path",
-			Value:  details.PosterPath,
-		})
-	}
-	if details.BackdropPath != "" {
-		attributes = append(attributes, model.ContentAttribute{
-			Source: "tmdb",
-			Key:    "backdrop_path",
-			Value:  details.BackdropPath,
-		})
-	}
-	return model.Content{
-		Type:             model.ContentTypeTvShow,
-		Source:           SourceTmdb,
-		ID:               strconv.Itoa(int(details.ID)),
-		Title:            details.Name,
-		ReleaseDate:      firstAirDate,
-		ReleaseYear:      releaseYear,
-		OriginalLanguage: model.ParseLanguage(details.OriginalLanguage),
-		OriginalTitle:    model.NewNullString(details.OriginalName),
-		Overview: model.NullString{
-			String: details.Overview,
-			Valid:  details.Overview != "",
-		},
-		Popularity:  model.NewNullFloat32(details.Popularity),
-		VoteAverage: model.NewNullFloat32(details.VoteAverage),
-		VoteCount:   model.NewNullUint(uint(details.VoteCount)),
-		Collections: collections,
-		Attributes:  attributes,
-	}, nil
-}
@@ -1,17 +0,0 @@
-package videofx
-
-import (
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier/video"
-	"github.com/bitmagnet-io/bitmagnet/internal/classifier/video/tmdb"
-	"go.uber.org/fx"
-)
-
-func New() fx.Option {
-	return fx.Module(
-		"movie",
-		fx.Provide(
-			tmdb.New,
-			video.New,
-		),
-	)
-}
@@ -43,6 +43,9 @@ func newTorrentContent(db *gorm.DB, opts ...gen.DOOption) torrentContent {
 	_torrentContent.CreatedAt = field.NewTime(tableName, "created_at")
 	_torrentContent.UpdatedAt = field.NewTime(tableName, "updated_at")
 	_torrentContent.Tsv = field.NewField(tableName, "tsv")
+	_torrentContent.Seeders = field.NewField(tableName, "seeders")
+	_torrentContent.Leechers = field.NewField(tableName, "leechers")
+	_torrentContent.PublishedAt = field.NewTime(tableName, "published_at")
 	_torrentContent.Torrent = torrentContentBelongsToTorrent{
 		db: db.Session(&gorm.Session{}),

@@ -149,6 +152,9 @@ type torrentContent struct {
 	CreatedAt       field.Time
 	UpdatedAt       field.Time
 	Tsv             field.Field
+	Seeders         field.Field
+	Leechers        field.Field
+	PublishedAt     field.Time
 	Torrent         torrentContentBelongsToTorrent

 	Content torrentContentBelongsToContent
@@ -184,6 +190,9 @@ func (t *torrentContent) updateTableName(table string) *torrentContent {
 	t.CreatedAt = field.NewTime(table, "created_at")
 	t.UpdatedAt = field.NewTime(table, "updated_at")
 	t.Tsv = field.NewField(table, "tsv")
+	t.Seeders = field.NewField(table, "seeders")
+	t.Leechers = field.NewField(table, "leechers")
+	t.PublishedAt = field.NewTime(table, "published_at")

 	t.fillFieldMap()

@@ -200,7 +209,7 @@ func (t *torrentContent) GetFieldByName(fieldName string) (field.OrderExpr, bool
 }

 func (t *torrentContent) fillFieldMap() {
-	t.fieldMap = make(map[string]field.Expr, 18)
+	t.fieldMap = make(map[string]field.Expr, 21)
 	t.fieldMap["id"] = t.ID
 	t.fieldMap["info_hash"] = t.InfoHash
 	t.fieldMap["content_type"] = t.ContentType
@@ -217,6 +226,9 @@ func (t *torrentContent) fillFieldMap() {
 	t.fieldMap["created_at"] = t.CreatedAt
 	t.fieldMap["updated_at"] = t.UpdatedAt
 	t.fieldMap["tsv"] = t.Tsv
+	t.fieldMap["seeders"] = t.Seeders
+	t.fieldMap["leechers"] = t.Leechers
+	t.fieldMap["published_at"] = t.PublishedAt

 }

@@ -30,11 +30,9 @@ func newTorrentsTorrentSource(db *gorm.DB, opts ...gen.DOOption) torrentsTorrent
 	_torrentsTorrentSource.Source = field.NewString(tableName, "source")
 	_torrentsTorrentSource.InfoHash = field.NewField(tableName, "info_hash")
 	_torrentsTorrentSource.ImportID = field.NewField(tableName, "import_id")
-	_torrentsTorrentSource.Bfsd = field.NewBytes(tableName, "bfsd")
-	_torrentsTorrentSource.Bfpe = field.NewBytes(tableName, "bfpe")
 	_torrentsTorrentSource.Seeders = field.NewField(tableName, "seeders")
 	_torrentsTorrentSource.Leechers = field.NewField(tableName, "leechers")
-	_torrentsTorrentSource.PublishedAt = field.NewTime(tableName, "published_at")
+	_torrentsTorrentSource.PublishedAt = field.NewField(tableName, "published_at")
 	_torrentsTorrentSource.CreatedAt = field.NewTime(tableName, "created_at")
 	_torrentsTorrentSource.UpdatedAt = field.NewTime(tableName, "updated_at")
 	_torrentsTorrentSource.TorrentSource = torrentsTorrentSourceHasOneTorrentSource{
@@ -55,11 +53,9 @@ type torrentsTorrentSource struct {
 	Source        field.String
 	InfoHash      field.Field
 	ImportID      field.Field
-	Bfsd          field.Bytes
-	Bfpe          field.Bytes
 	Seeders       field.Field
 	Leechers      field.Field
-	PublishedAt   field.Time
+	PublishedAt   field.Field
 	CreatedAt     field.Time
 	UpdatedAt     field.Time
 	TorrentSource torrentsTorrentSourceHasOneTorrentSource
@@ -82,11 +78,9 @@ func (t *torrentsTorrentSource) updateTableName(table string) *torrentsTorrentSo
 	t.Source = field.NewString(table, "source")
 	t.InfoHash = field.NewField(table, "info_hash")
 	t.ImportID = field.NewField(table, "import_id")
-	t.Bfsd = field.NewBytes(table, "bfsd")
-	t.Bfpe = field.NewBytes(table, "bfpe")
 	t.Seeders = field.NewField(table, "seeders")
 	t.Leechers = field.NewField(table, "leechers")
-	t.PublishedAt = field.NewTime(table, "published_at")
+	t.PublishedAt = field.NewField(table, "published_at")
 	t.CreatedAt = field.NewTime(table, "created_at")
 	t.UpdatedAt = field.NewTime(table, "updated_at")

@@ -105,12 +99,10 @@ func (t *torrentsTorrentSource) GetFieldByName(fieldName string) (field.OrderExp
 }

 func (t *torrentsTorrentSource) fillFieldMap() {
-	t.fieldMap = make(map[string]field.Expr, 11)
+	t.fieldMap = make(map[string]field.Expr, 9)
 	t.fieldMap["source"] = t.Source
 	t.fieldMap["info_hash"] = t.InfoHash
 	t.fieldMap["import_id"] = t.ImportID
-	t.fieldMap["bfsd"] = t.Bfsd
-	t.fieldMap["bfpe"] = t.Bfpe
 	t.fieldMap["seeders"] = t.Seeders
 	t.fieldMap["leechers"] = t.Leechers
 	t.fieldMap["published_at"] = t.PublishedAt
@@ -1,122 +1,32 @@
 package fts

 import (
-	"bufio"
 	"errors"
-	"io"
-	"strconv"
-	"strings"
-	"unicode"
+	"github.com/bitmagnet-io/bitmagnet/internal/lexer"
 )

-func newLexer(str string) lexer {
-	return lexer{
-		reader: bufio.NewReader(strings.NewReader(str)),
-	}
+func newLexer(str string) ftsLexer {
+	return ftsLexer{lexer.NewLexer(str)}
 }

-type lexer struct {
-	pos    int
-	reader *bufio.Reader
+type ftsLexer struct {
+	lexer.Lexer
 }

-func (l *lexer) read() (rune, bool) {
-	r, _, err := l.reader.ReadRune()
-	if err != nil {
-		if errors.Is(err, io.EOF) {
-			return 0, false
-		}
-		panic(err)
-	}
-	l.pos++
-	return r, true
-}
-
-func (l *lexer) backup() {
-	if err := l.reader.UnreadRune(); err != nil {
-		panic(err)
-	}
-	l.pos--
-}
-
-func (l *lexer) isEof() bool {
-	_, ok := l.read()
-	if !ok {
-		return true
-	}
-	l.backup()
-	return false
-}
-
-func (l *lexer) readIf(fn func(rune) bool) (rune, bool) {
-	r, ok := l.read()
-	if !ok {
-		return 0, false
-	}
-	if !fn(r) {
-		l.backup()
-		return 0, false
-	}
-	return r, true
-}
-
-func (l *lexer) readWhile(fn func(rune) bool) string {
-	var str string
-	for {
-		r, ok := l.readIf(fn)
-		if !ok {
-			break
-		}
-		str = str + string(r)
-	}
-	return str
-}
-
-func (l *lexer) readInt() (int, bool) {
-	str := l.readWhile(isInt)
-	if str == "" {
-		return 0, false
-	}
-	n, err := strconv.Atoi(str)
-	if err != nil {
-		panic(err)
-	}
-	return n, true
-}
-
-func (l *lexer) readChar(r1 rune) bool {
-	_, ok := l.readIf(isChar(r1))
-	return ok
-}
-
-func (l *lexer) readQuotedString(quoteChar rune) (string, error) {
-	if !l.readChar(quoteChar) {
+func (l *ftsLexer) readQuotedString(quoteChar rune) (string, error) {
+	if !l.ReadChar(quoteChar) {
 		return "", errors.New("missing opening quote")
 	}
 	var str string
 	for {
-		ch, ok := l.read()
+		ch, ok := l.Read()
 		if !ok {
 			return str, errors.New("unexpected EOF")
 		}
-		if ch == quoteChar && !l.readChar(quoteChar) {
+		if ch == quoteChar && !l.ReadChar(quoteChar) {
 			break
 		}
 		str = str + string(ch)
 	}
 	return str, nil
 }
-
-func isInt(r rune) bool {
-	return r >= '0' && r <= '9'
-}
-
-func isChar(r1 rune) func(rune) bool {
-	return func(r2 rune) bool {
-		return r1 == r2
-	}
-}
-
-func IsWordChar(r rune) bool {
-	return unicode.IsLetter(r) || unicode.IsDigit(r)
-}
@@ -1,6 +1,7 @@
 package fts

 import (
+	"github.com/bitmagnet-io/bitmagnet/internal/lexer"
 	"github.com/mozillazg/go-unidecode/table"
 	"strings"
 	"unicode"
@@ -20,7 +21,7 @@ func Tokenize(str string) [][]string {
 }

 type tokenizerLexer struct {
-	lexer
+	ftsLexer
 }

 func TokenizeFlat(str string) []string {
@@ -44,11 +45,11 @@ func (l *tokenizerLexer) readPhrase() []string {
 		lexeme = lexeme + str
 	}
 	for {
-		if l.isEof() {
+		if l.IsEof() {
 			breakWord()
 			return phrase
 		}
-		if ch, ok := l.readIf(IsWordChar); ok {
+		if ch, ok := l.ReadIf(lexer.IsWordChar); ok {
 			ch = unicode.ToLower(ch)
 			if ch < unicode.MaxASCII {
 				appendStr(string(ch))
@@ -81,6 +82,6 @@ func (l *tokenizerLexer) readPhrase() []string {
 		if len(phrase) > 0 {
 			return phrase
 		}
-		l.read()
+		l.Read()
 	}
 }
--- a/Show More
+++ b/Show More