Classifier rewrite (#213)

The classifier has been re-implemented and now uses a DSL allowing for full customisation. Several bugs have also been fixed.

- Closes https://github.com/bitmagnet-io/bitmagnet/issues/182
- Closes https://github.com/bitmagnet-io/bitmagnet/issues/70
- Closes https://github.com/bitmagnet-io/bitmagnet/issues/68
- Hopefully fixes https://github.com/bitmagnet-io/bitmagnet/issues/126
This commit is contained in:
mgdigital
2024-04-21 16:24:10 +01:00
committed by GitHub
parent 7902b93bd7
commit c16f76130c
163 changed files with 7879 additions and 2310 deletions
+6
View File
@@ -113,6 +113,12 @@ jobs:
uses: actions/setup-node@v3
with:
node-version: 20.x
- name: Setup protoc
uses: arduino/setup-protoc@v3
with:
version: "23.4"
- name: Install protoc-gen-go
run: go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.33.0
- name: Install web app, apply database migrations, generate code and build web app
run: |
(cd webui && npm ci); \
+6
View File
@@ -4,9 +4,15 @@ mockname: "{{.InterfaceName}}"
outpkg: "{{.PackageName}}_mocks"
filename: "{{.InterfaceName}}.go"
packages:
github.com/bitmagnet-io/bitmagnet/internal/classifier:
interfaces:
LocalSearch:
github.com/bitmagnet-io/bitmagnet/internal/protocol/dht/ktable:
interfaces:
Table:
github.com/bitmagnet-io/bitmagnet/internal/protocol/dht/responder:
interfaces:
Limiter:
github.com/bitmagnet-io/bitmagnet/internal/tmdb:
interfaces:
Client:
+1
View File
@@ -1,2 +1,3 @@
bitmagnet.io/schemas/**/*.*
webui/dist/**/*.*
webui/src/app/graphql/generated/**/*.*
+13
View File
@@ -8,7 +8,9 @@ tasks:
- go run ./internal/gql/enums/gen/genenums.go
- go run ./internal/torznab/gencategories/gencategories.go
- go run github.com/99designs/gqlgen generate --config ./internal/gql/gqlgen.yml
- protoc --go_out=. ./internal/protobuf/bitmagnet.proto
- go run github.com/vektra/mockery/v2
- go run . classifier schema --format json > ./bitmagnet.io/schemas/classifier-0.1.json
lint:
cmds:
@@ -82,3 +84,14 @@ tasks:
- goose -s create {{.NAME}} sql
vars:
NAME: migration
install-protoc:
cmds:
- |
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v{{.VERSION}}/protoc-{{.VERSION}}-{{.PLATFORM}}.zip
sudo unzip -o protoc-{{.VERSION}}-{{.PLATFORM}}.zip -d /usr/local bin/protoc
sudo unzip -o protoc-{{.VERSION}}-{{.PLATFORM}}.zip -d /usr/local 'include/*'
rm -f protoc-{{.VERSION}}-{{.PLATFORM}}.zip
vars:
VERSION: 23.4
PLATFORM: osx-x86_64
+1
View File
@@ -3,6 +3,7 @@ source 'https://rubygems.org'
gem "just-the-docs", "~> 0.6"
gem "jekyll", "~> 4.3"
gem "jekyll-redirect-from", "~> 0.16"
gem "jekyll-target-blank", "~> 2.0"
gem "kramdown", "~> 2.3"
gem "kramdown-parser-gfm", "~> 1.1"
gem "webrick", "~> 1.8"
+7
View File
@@ -39,6 +39,9 @@ GEM
sass-embedded (~> 1.54)
jekyll-seo-tag (2.8.0)
jekyll (>= 3.8, < 5.0)
jekyll-target-blank (2.0.2)
jekyll (>= 3.0, < 5.0)
nokogiri (~> 1.10)
jekyll-watch (2.2.1)
listen (~> 3.0)
just-the-docs (0.6.2)
@@ -55,9 +58,12 @@ GEM
rb-fsevent (~> 0.10, >= 0.10.3)
rb-inotify (~> 0.9, >= 0.9.10)
mercenary (0.4.0)
nokogiri (1.16.4-arm64-darwin)
racc (~> 1.4)
pathutil (0.16.2)
forwardable-extended (~> 2.6)
public_suffix (5.0.3)
racc (1.7.3)
rake (13.0.6)
rb-fsevent (0.11.2)
rb-inotify (0.10.1)
@@ -78,6 +84,7 @@ PLATFORMS
DEPENDENCIES
jekyll (~> 4.3)
jekyll-redirect-from (~> 0.16)
jekyll-target-blank (~> 2.0)
just-the-docs (~> 0.6)
kramdown (~> 2.3)
kramdown-parser-gfm (~> 1.1)
+1
View File
@@ -29,3 +29,4 @@ nav_external_links:
favicon_ico: "/assets/images/favicon.png"
plugins:
- jekyll-redirect-from
- jekyll-target-blank
+11
View File
@@ -0,0 +1,11 @@
module Schemas
class Generator < Jekyll::Generator
def generate(site)
Dir.glob(File.join(site.source, 'schemas', '*.json')).each do |json_file|
File.open(File.join(site.dest, 'schemas', File.basename(json_file)), 'w') do |file|
file.write(File.read(json_file))
end
end
end
end
end
+3 -3
View File
@@ -8,7 +8,7 @@ nav_order: 7
Community members have developed the following resources, tools and packages; these are not maintained under the **bitmagnet** project:
- [@davispuh](https://github.com/davispuh){:target="\_blank"} has published an Arch package, `bitmagnet-git`, [in the AUR repository](https://aur.archlinux.org/packages/bitmagnet-git){:target="\_blank"}.
- [@DyonR](https://github.com/DyonR){:target="\_blank"} has developed [magnetico2bitmagnet](https://github.com/DyonR/magnetico2bitmagnet){:target="\_blank"}, a collection of scripts for importing into **bitmagnet** from Magnetico and other sources.
- [@DyonR](https://github.com/DyonR){:target="\_blank"} has written [a **bitmagnet** on Unraid guide](https://github.com/DyonR/bitmagnet-unraid){:target="\_blank"}.
- [@davispuh](https://github.com/davispuh) has published an Arch package, `bitmagnet-git`, [in the AUR repository](https://aur.archlinux.org/packages/bitmagnet-git).
- [@DyonR](https://github.com/DyonR) has developed [magnetico2bitmagnet](https://github.com/DyonR/magnetico2bitmagnet), a collection of scripts for importing into **bitmagnet** from Magnetico and other sources.
- [@DyonR](https://github.com/DyonR) has written [a **bitmagnet** on Unraid guide](https://github.com/DyonR/bitmagnet-unraid).
- Your link could be here!
+3 -3
View File
@@ -12,7 +12,7 @@ No. **bitmagnet** does not download, store or distribute any content _at all_. I
## Should I use a VPN with **bitmagnet**?
It is recommended to use a VPN: **bitmagnet** may download **metadata about** illegal and copyrighted content. It is possible that rudimentary law enforcement and anti-piracy tracking tools would incorrectly flag this activity, although we've never heard about anyone getting into trouble for using this or similar metadata crawlers. Setting up a VPN is simple and cheap, and it's better to be safe than sorry. We are not affiliated with any VPN providers, but if you're unsure which provider to choose, we can recommend [Mullvad](https://mullvad.net/){:target="\_blank"}.
It is recommended to use a VPN: **bitmagnet** may download **metadata about** illegal and copyrighted content. It is possible that rudimentary law enforcement and anti-piracy tracking tools would incorrectly flag this activity, although we've never heard about anyone getting into trouble for using this or similar metadata crawlers. Setting up a VPN is simple and cheap, and it's better to be safe than sorry. We are not affiliated with any VPN providers, but if you're unsure which provider to choose, we can recommend [Mullvad](https://mullvad.net/).
## Is **bitmagnet** intended to be used as a public service?
@@ -47,7 +47,7 @@ Visit the metrics endpoint at `/metrics` and check the metric `bitmagnet_dht_cra
## How are the seeders/leechers numbers determined for torrents crawled from the DHT?
The DHT crawler uses a [BEP33 scrape request](https://www.bittorrent.org/beps/bep_0033.html){:target="\_blank"} to provide a very rough estimate of the current seeders/leechers.
The DHT crawler uses a [BEP33 scrape request](https://www.bittorrent.org/beps/bep_0033.html) to provide a very rough estimate of the current seeders/leechers.
## How do I know if a torrent crawled by **bitmagnet** is being actively seeded, and that I'll be able to download it?
@@ -59,7 +59,7 @@ No. The DHT crawler works by sampling random info hashes from the network, and w
## I'm seeing a lot of torrents in the "Unknown" category, that are clearly of a particular content type - what's wrong?
**bitmagnet** is in early development, and improving the classifier will be an ongoing effort. When new versions are released, you can follow the [reclassify turorial](/tutorials/reprocess-reclassify.html) to reclassify torrents.
**bitmagnet** is in early development, and improving the classifier will be an ongoing effort. When new versions are released, you can follow the [reclassify turorial](/tutorials/reprocess-reclassify.html) to reclassify torrents. If you'd like to [improve or customize the classifier](/tutorials/classifier.html), this is also possible.
## Can I run multiple **bitmagnet** instances pointing to the same database?
+4 -4
View File
@@ -14,7 +14,7 @@ nav_order: -1
> Important
>
> This software is currently in alpha. It is ready to preview some interesting and unique features, but there will likely be bugs, as well as API and database schema changes before the (currently theoretical) 1.0 release. If you'd like to support this project and help it gain momentum, **[please give it a star on GitHub](https://github.com/bitmagnet-io/bitmagnet){:target="\_blank"}**.
> This software is currently in alpha. It is ready to preview some interesting and unique features, but there will likely be bugs, as well as API and database schema changes before the (currently theoretical) 1.0 release. If you'd like to support this project and help it gain momentum, **[please give it a star on GitHub](https://github.com/bitmagnet-io/bitmagnet)**.
>
> [If you're interested in getting involved and you're a backend GoLang or frontend TypeScript/Angular developer, or you're knowledgeable about BitTorrent protocols then **I'd like to hear from you**](/internals-development.html) - let's get this thing over the line!
@@ -51,7 +51,7 @@ This means that **bitmagnet** is not reliant on any external trackers or torrent
- [ ] A more complete web UI
- [ ] Saved searches for content of particular interest, enabling custom feeds in addition to the following feature
- [ ] Smart deletion: there's a lot of crap out there; crawling DHT can quickly use lots of database disk space, and search becomes slower with millions of indexed torrents of which 90% are of no interest. A smart deletion feature would use saved searches to identify content that you're _not_ interested in, including low quality content (such as low resolution movies). It would automatically delete associated metadata and add the info hash to a bloom filter, preventing the torrent from being re-indexed in future.
- [ ] Bi-directional integration with the [Prowlarr indexer proxy](https://prowlarr.com/){:target="\_blank"}: Currently **bitmagnet** can be added as an indexer in Prowlarr; bi-directional integration would allow **bitmagnet** to crawl content from any indexer configured in Prowlarr, unlocking many new sources of content
- [ ] Bi-directional integration with the [Prowlarr indexer proxy](https://prowlarr.com/): Currently **bitmagnet** can be added as an indexer in Prowlarr; bi-directional integration would allow **bitmagnet** to crawl content from any indexer configured in Prowlarr, unlocking many new sources of content
- [ ] More documentation and more tests!
### Pipe dream features
@@ -61,5 +61,5 @@ This is where things start to get a bit nebulous. For now all focus is on delive
- [ ] In-place seeding: identify files on your computer that are part of an indexed torrent, and allow them to be seeded in place after having moved, renamed or deleted parts of the torrent
- [ ] Integration with popular BitTorrent clients
- [ ] Federation of some sort: allow friends to connect instances and pool the indexing effort, perhaps involving crowd sourcing manual content curation to supplement the automated classifiers
- [ ] Something that looks like a decentralized private tracker; by this I probably mean something that's based partly on personal trust and manually weeding out any bad actors; I'd be wary of creating something that looks a bit like [Tribler](https://github.com/Tribler/tribler){:target="\_blank"}, which while an interesting project seems to have demonstrated that implementing trust, reputation and privacy at the protocol level carries too much overhead to be a compelling alternative to plain old BitTorrent, for all its imperfections
- [ ] Support for the [BitTorrent v2 protocol](https://blog.libtorrent.org/2020/09/bittorrent-v2/){:target="\_blank"}: It remains to be seen if wider adoption will ever make this a valuable feature
- [ ] Something that looks like a decentralized private tracker; by this I probably mean something that's based partly on personal trust and manually weeding out any bad actors; I'd be wary of creating something that looks a bit like [Tribler](https://github.com/Tribler/tribler), which while an interesting project seems to have demonstrated that implementing trust, reputation and privacy at the protocol level carries too much overhead to be a compelling alternative to plain old BitTorrent, for all its imperfections
- [ ] Support for the [BitTorrent v2 protocol](https://blog.libtorrent.org/2020/09/bittorrent-v2/): It remains to be seen if wider adoption will ever make this a valuable feature
+1 -1
View File
@@ -8,4 +8,4 @@ has_children: true
# Internals & Development
{: .highlight }
Are you an experienced developer with knowledge of GoLang, Postgres, TypeScript/Angular and/or BitTorrent protocols? I'm currently a lone developer with a full time job and many other commitments, and have been working on this in spare moments for the past few months. This project is too big for one person! If you're interested in contributing please [review the open issues](https://github.com/bitmagnet-io/bitmagnet/issues){:target="\_blank"} and feel free to open a PR!
Are you an experienced developer with knowledge of GoLang, Postgres, TypeScript/Angular and/or BitTorrent protocols? I'm currently a lone developer with a full time job and many other commitments, and have been working on this in spare moments for the past few months. This project is too big for one person! If you're interested in contributing please [review the open issues](https://github.com/bitmagnet-io/bitmagnet/issues) and feel free to open a PR!
@@ -7,15 +7,15 @@ nav_order: 2
# Architecture & Lifecycle of the DHT Crawler
The DHT and BitTorrent protocols are (rather impenetrably) documented at [bittorrent.org](http://bittorrent.org/beps/bep_0000.html){:target="\_blank"}. Relevant resources include:
The DHT and BitTorrent protocols are (rather impenetrably) documented at [bittorrent.org](http://bittorrent.org/beps/bep_0000.html). Relevant resources include:
- [BEP 5: DHT Protocol](http://bittorrent.org/beps/bep_0005.html){:target="\_blank"}
- [BEP 51: Infohash Indexing](https://www.bittorrent.org/beps/bep_0051.html){:target="\_blank"}
- [BEP 33: DHT Scrapes](https://www.bittorrent.org/beps/bep_0033.html){:target="\_blank"}
- [BEP 10: Extension Protocol](https://www.bittorrent.org/beps/bep_0010.html){:target="\_blank"}
- [The Kademlia paper](https://pdos.csail.mit.edu/~petar/papers/maymounkov-kademlia-lncs.pdf){:target="\_blank"}
- [BEP 5: DHT Protocol](http://bittorrent.org/beps/bep_0005.html)
- [BEP 51: Infohash Indexing](https://www.bittorrent.org/beps/bep_0051.html)
- [BEP 33: DHT Scrapes](https://www.bittorrent.org/beps/bep_0033.html)
- [BEP 10: Extension Protocol](https://www.bittorrent.org/beps/bep_0010.html)
- [The Kademlia paper](https://pdos.csail.mit.edu/~petar/papers/maymounkov-kademlia-lncs.pdf)
The rest of what I've figured out about how to implement a DHT crawler was cobbled together from [the now archived **magnetico** project](https://github.com/boramalper/magnetico){:target="\_blank"} and [anacrolix's BitTorrent libraries](https://github.com/anacrolix){:target="\_blank"}.
The rest of what I've figured out about how to implement a DHT crawler was cobbled together from [the now archived **magnetico** project](https://github.com/boramalper/magnetico) and [anacrolix's BitTorrent libraries](https://github.com/anacrolix).
The following diagram illustrates roughly how the crawler has been implemented within **bitmagnet**. It's debatable if this will help stop anyone's brain from melting, including my own.
@@ -9,22 +9,22 @@ nav_order: 3
## Grafana stack & Prometheus integration
**bitmagnet** can integrate with the [Grafana stack](https://grafana.com/){:target="\_blank"} and [Prometheus](https://prometheus.io/){:target="\_blank"} for monitoring and building observability dashboards for the DHT crawler and other components. See the "Optional observability services" section of the [example docker compose configuration](https://github.com/bitmagnet-io/bitmagnet/blob/main/docker-compose.yml){:target="\_blank"} and [example Grafana / Prometheus configuration files and a provisioned Grafana dashboard](https://github.com/bitmagnet-io/bitmagnet/tree/main/observability){:target="\_blank"}.
**bitmagnet** can integrate with the [Grafana stack](https://grafana.com/) and [Prometheus](https://prometheus.io/) for monitoring and building observability dashboards for the DHT crawler and other components. See the "Optional observability services" section of the [example docker compose configuration](https://github.com/bitmagnet-io/bitmagnet/blob/main/docker-compose.yml) and [example Grafana / Prometheus configuration files and a provisioned Grafana dashboard](https://github.com/bitmagnet-io/bitmagnet/tree/main/observability).
![Grafana dashboard](/assets/images/grafana-1.png)
The example integration includes:
- [Grafana](https://grafana.com/oss/grafana/){:target="\_blank"} - A dashboarding and visualization tool
- [Grafana Agent](https://grafana.com/oss/agent/){:target="\_blank"} - Collects metrics and logs, and forwards them to storage backends
- [Prometheus](https://prometheus.io/){:target="\_blank"} - A time series database for metrics
- [Loki](https://grafana.com/oss/loki/){:target="\_blank"} - A log aggregation system
- [Pyroscope](https://pyroscope.io/){:target="\_blank"} - A continuous profiling tool
- [Postgres exporter](https://github.com/prometheus-community/postgres_exporter){:target="\_blank"} - Exposes Postgres metrics to Prometheus
- [Grafana](https://grafana.com/oss/grafana/) - A dashboarding and visualization tool
- [Grafana Agent](https://grafana.com/oss/agent/) - Collects metrics and logs, and forwards them to storage backends
- [Prometheus](https://prometheus.io/) - A time series database for metrics
- [Loki](https://grafana.com/oss/loki/) - A log aggregation system
- [Pyroscope](https://pyroscope.io/) - A continuous profiling tool
- [Postgres exporter](https://github.com/prometheus-community/postgres_exporter) - Exposes Postgres metrics to Prometheus
# Profiling with pprof
**bitmagnet** exposes [Go pprof](https://golang.org/pkg/net/http/pprof/){:target="\_blank"} profiling endpoints at `/debug/pprof/*`, for example:
**bitmagnet** exposes [Go pprof](https://golang.org/pkg/net/http/pprof/) profiling endpoints at `/debug/pprof/*`, for example:
```sh
go tool pprof http://localhost:3333/debug/pprof/heap
@@ -15,21 +15,21 @@ Postgres is the primary data store, and powers the search engine and message que
Some key libraries used include:
- [anacrolix/torrent](https://github.com/anacrolix/torrent){:target="\_blank"} not heavily used right now, but contains many useful BitTorrent utilities and could drive future features such as in-place seeding
- [fx](https://uber-go.github.io/fx/){:target="\_blank"} for dependency injection and management of the application lifecycle
- [gin](https://gin-gonic.com/){:target="\_blank"} for the HTTP server
- [goose](https://pressly.github.io/goose/){:target="\_blank"} for database migrations
- [gorm](https://gorm.io/){:target="\_blank"} for database access
- [gqlgen](https://gqlgen.com/){:target="\_blank"} for the GraphQL server implementation
- [rex](https://github.com/hedhyw/rex){:target="\_blank"} a regular expression library that makes some of the monstrous classification regexes more manageable
- [urfave/cli](https://cli.urfave.org/){:target="\_blank"} for the command line interface
- [zap](https://github.com/uber-go/zap){:target="\_blank"} for logging
- [anacrolix/torrent](https://github.com/anacrolix/torrent) not heavily used right now, but contains many useful BitTorrent utilities and could drive future features such as in-place seeding
- [fx](https://uber-go.github.io/fx/) for dependency injection and management of the application lifecycle
- [gin](https://gin-gonic.com/) for the HTTP server
- [goose](https://pressly.github.io/goose/) for database migrations
- [gorm](https://gorm.io/) for database access
- [gqlgen](https://gqlgen.com/) for the GraphQL server implementation
- [rex](https://github.com/hedhyw/rex) a regular expression library that makes some of the monstrous classification regexes more manageable
- [urfave/cli](https://cli.urfave.org/) for the command line interface
- [zap](https://github.com/uber-go/zap) for logging
## TypeScript/Angular Web UI
Using [Angular Material components](https://material.angular.io/){:target="\_blank"}. The web UI is embedded in the GoLang binary and served by the Gin web framework, and hence the build artifacts are committed into the repository.
Using [Angular Material components](https://material.angular.io/). The web UI is embedded in the GoLang binary and served by the Gin web framework, and hence the build artifacts are committed into the repository.
## Other tooling
- The repository includes a [Taskfile](https://taskfile.dev/){:target="\_blank"} containing several useful development scripts
- The repository includes a [Taskfile](https://taskfile.dev/) containing several useful development scripts
- GitHub actions are used for CI, building the Docker image and for building this website
+331
View File
@@ -0,0 +1,331 @@
{
"$id": "https://bitmagnet.io/schemas/classifier-0.1.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"additionalProperties": false,
"definitions": {
"action": {
"oneOf": [
{
"$ref": "#/definitions/action_single"
},
{
"$ref": "#/definitions/action_multi"
}
]
},
"action__add_tag": {
"additionalProperties": false,
"description": "Add one or more tags to the current torrent",
"properties": {
"add_tag": {
"items": {
"type": "string"
},
"type": "array"
}
},
"required": [
"add_tag"
],
"type": "object"
},
"action__attach_local_content_by_id": {
"const": "attach_local_content_by_id",
"description": "Use the torrent hint to attach locally stored content by ID"
},
"action__attach_local_content_by_search": {
"const": "attach_local_content_by_search",
"description": "Attempt to attach local content with a search on the torrent name"
},
"action__attach_tmdb_content_by_id": {
"const": "attach_tmdb_content_by_id",
"description": "Use the torrent hint to attach content from the TMDB API by ID"
},
"action__attach_tmdb_content_by_search": {
"const": "attach_tmdb_content_by_search",
"description": "Attempt to attach content from the TMDB API with a search on the torrent name"
},
"action__delete": {
"const": "delete",
"description": "Delete the current torrent"
},
"action__find_match": {
"additionalProperties": false,
"description": "Iterate through a series of actions to find the first that does not return an unmatched error",
"properties": {
"find_match": {
"items": {
"$ref": "#/definitions/action_single"
},
"type": "array"
}
},
"required": [
"find_match"
],
"type": "object"
},
"action__if_else": {
"additionalProperties": false,
"description": "Execute an action based on a condition",
"properties": {
"if_else": {
"additionalProperties": false,
"properties": {
"condition": {
"$ref": "#/definitions/condition"
},
"else_action": {
"$ref": "#/definitions/action"
},
"if_action": {
"$ref": "#/definitions/action"
}
},
"required": [
"condition"
],
"type": "object"
}
},
"required": [
"if_else"
],
"type": "object"
},
"action__parse_date": {
"const": "parse_date",
"description": "Try to parse a date from the name of the current torrent"
},
"action__parse_video_content": {
"const": "parse_video_content",
"description": "Parse video-related attributes from the name of the current torrent"
},
"action__run_workflow": {
"additionalProperties": false,
"description": "Run a different workflow within the current workflow",
"properties": {
"run_workflow": {
"items": {
"minLength": 1,
"type": "string"
},
"type": "array"
}
},
"required": [
"run_workflow"
],
"type": "object"
},
"action__set_content_type": {
"additionalProperties": false,
"description": "Set the content type of the current torrent",
"properties": {
"set_content_type": {
"enum": [
"movie",
"tv_show",
"music",
"ebook",
"comic",
"audiobook",
"game",
"software",
"xxx",
"unknown"
],
"type": "string"
}
},
"required": [
"set_content_type"
],
"type": "object"
},
"action__unmatched": {
"const": "unmatched",
"description": "Return a unmatched error for the current torrent"
},
"action_multi": {
"items": {
"$ref": "#/definitions/action_single"
},
"type": "array"
},
"action_single": {
"oneOf": [
{
"$ref": "#/definitions/action__add_tag"
},
{
"$ref": "#/definitions/action__attach_local_content_by_id"
},
{
"$ref": "#/definitions/action__attach_local_content_by_search"
},
{
"$ref": "#/definitions/action__attach_tmdb_content_by_id"
},
{
"$ref": "#/definitions/action__attach_tmdb_content_by_search"
},
{
"$ref": "#/definitions/action__delete"
},
{
"$ref": "#/definitions/action__find_match"
},
{
"$ref": "#/definitions/action__if_else"
},
{
"$ref": "#/definitions/action__unmatched"
},
{
"$ref": "#/definitions/action__parse_date"
},
{
"$ref": "#/definitions/action__parse_video_content"
},
{
"$ref": "#/definitions/action__run_workflow"
},
{
"$ref": "#/definitions/action__set_content_type"
}
]
},
"condition": {
"oneOf": [
{
"$ref": "#/definitions/condition__and"
},
{
"$ref": "#/definitions/condition__not"
},
{
"$ref": "#/definitions/condition__or"
},
{
"$ref": "#/definitions/condition__expression"
}
]
},
"condition__and": {
"additionalProperties": false,
"properties": {
"and": {
"description": "A condition that is satisfied if all conditions in a list are satisfied",
"items": {
"$ref": "#/definitions/condition"
},
"type": "array"
}
},
"required": [
"and"
],
"type": "object"
},
"condition__expression": {
"oneOf": [
{
"additionalProperties": false,
"properties": {
"expression": {
"description": "A CEL expression describing a condition",
"minLength": 1,
"type": "string"
}
},
"required": [
"expression"
],
"type": "object"
},
{
"description": "A CEL expression describing a condition",
"minLength": 1,
"type": "string"
}
]
},
"condition__not": {
"additionalProperties": false,
"description": "A condition that negates the provided condition",
"properties": {
"not": {
"$ref": "#/definitions/condition"
}
},
"required": [
"not"
],
"type": "object"
},
"condition__or": {
"additionalProperties": false,
"properties": {
"or": {
"description": "A condition that is satisfied if any of the conditions in a list are satisfied",
"items": {
"$ref": "#/definitions/condition"
},
"type": "array"
}
},
"required": [
"or"
],
"type": "object"
}
},
"properties": {
"$schema": {
"const": "https://bitmagnet.io/schemas/classifier-0.1.json"
},
"extensions": {
"additionalProperties": {
"items": {
"type": "string"
},
"type": "array"
},
"type": "object"
},
"flag_definitions": {
"additionalProperties": {
"enum": [
"bool",
"string",
"int",
"string_list",
"content_type_list"
],
"type": "string"
},
"type": "object"
},
"flags": {
"additionalProperties": true,
"type": "object"
},
"keywords": {
"additionalProperties": {
"items": {
"type": "string"
},
"type": "array"
},
"type": "object"
},
"workflows": {
"additionalProperties": {
"$ref": "#/definitions/action"
},
"type": "object"
}
},
"type": "object"
}
+8 -4
View File
@@ -11,6 +11,7 @@ nav_order: 2
- `postgres.host`, `postgres.name` `postgres.user` `postgres.password` (default: `localhost`, `bitmagnet`, `postgres`, _empty_): Set these values to configure connection to your Postgres database.
- `tmdb.api_key`: This is quite an important one, please [see below](#obtaining-a-tmdb-api-key) for more details.
- `tmdb.enabled` (default: `true`): Specify `false` to disable the TMDB API integration.
- `dht_crawler.save_files_threshold` (default: `100`): Some torrents contain many thousands of files, which impacts performance and uses a lot of database disk space. This parameter sets a maximum limit for the number of files saved by the crawler with each torrent.
- `dht_crawler.save_pieces` (default: `false`): If true, the DHT crawler will save the pieces bytes from the torrent metadata. The pieces take up quite a lot of space, and aren't currently very useful, but they may be used by future features.
- `log.level` (default: `info`): If you're developing or just curious then you may want to set this to `debug`; note that `debug` output will be very verbose.
@@ -78,15 +79,18 @@ Environment variables can be used to configure simple scalar types (strings, num
## VPN configuration
It's recommended that you run **bitmagnet** behind a VPN. If you're using Docker then [gluetun](https://github.com/qdm12/gluetun-wiki){:target="\_blank"} is a good solution for this, although the networking settings can be tricky. The [example docker-compose file](https://github.com/bitmagnet-io/bitmagnet/blob/main/docker-compose.yml){:target="\_blank"} demonstrates this.
It's recommended that you run **bitmagnet** behind a VPN. If you're using Docker then [gluetun](https://github.com/qdm12/gluetun-wiki) is a good solution for this, although the networking settings can be tricky. The [example docker-compose file](https://github.com/bitmagnet-io/bitmagnet/blob/main/docker-compose.yml) demonstrates this.
## Obtaining a TMDB API Key
{: .highlight }
**bitmagnet** uses [the TMDB API](https://developer.themoviedb.org/docs){:target="\_blank"} to fetch metadata for movies and TV shows. By default you'll be sharing an API key with other users. If you're using this app and its content classifier heavily then you'll need to get a personal TMDB API key. Until you do this you'll see a warning message in the logs on startup, and you'll be limited to 1 TMDB API request per second. This is just about enough for running the DHT crawler, but if you're importing and classifying a lot of content this will be a major bottleneck. If many people are using this app with the default API key then that could add up to many requests per second, so please get your own API key if you are using this app more than casually!
**bitmagnet** uses [the TMDB API](https://developer.themoviedb.org/docs) to fetch metadata for movies and TV shows. By default you'll be sharing an API key with other users. If you're using this app and its content classifier heavily then you'll need to get a personal TMDB API key. Until you do this you'll see a warning message in the logs on startup, and you'll be limited to 1 TMDB API request per second. This is just about enough for running the DHT crawler, but if you're importing and classifying a lot of content this will be a major bottleneck. If many people are using this app with the default API key then that could add up to many requests per second, so please get your own API key if you are using this app more than casually!
Obtaining an API key is free and relatively easy, but you'll have to register for a TMDB account, provide them with some personal information such as contact details, a website URL (such as your GitHub account or social media profile) and a short description of your use case (**tip:** this app provides _"A content classifier that identifies movies and TV shows based on filenames"_). Once you've filled in the request form, approval should be instant.
[Synology have provided a full tutorial on obtaining a TMDB API key](https://kb.synology.com/en-au/DSM/tutorial/How_to_apply_for_a_personal_API_key_to_get_video_info){:target="\_blank"}.
[Synology have provided a full tutorial on obtaining a TMDB API key](https://kb.synology.com/en-au/DSM/tutorial/How_to_apply_for_a_personal_API_key_to_get_video_info).
Once you've obtained your API key you'll need to configure the `tmdb.api_key` value. Your rate limit will then default to 20 requests per second, which is well within [TMDB's stated fair usage limit](https://developer.themoviedb.org/docs/rate-limiting){:target="\_blank"}.
Once you've obtained your API key you'll need to configure the `tmdb.api_key` value. Your rate limit will then default to 20 requests per second, which is well within [TMDB's stated fair usage limit](https://developer.themoviedb.org/docs/rate-limiting).
{: .highlight }
The TMDB API integration can be disabled altogether by setting `tmdb.enabled` to `false`.
+2 -2
View File
@@ -9,7 +9,7 @@ nav_order: 1
## Docker
The quickest way to get up-and-running with **bitmagnet** is with [Docker Compose](https://docs.docker.com/compose/){:target="\_blank"}. The following `docker-compose.yml` is a minimal example. For a more full-featured example including VPN routing and observability services see the [docker compose configuration in the GitHub repository](https://github.com/bitmagnet-io/bitmagnet/blob/main/docker-compose.yml){:target="\_blank"}.
The quickest way to get up-and-running with **bitmagnet** is with [Docker Compose](https://docs.docker.com/compose/). The following `docker-compose.yml` is a minimal example. For a more full-featured example including VPN routing and observability services see the [docker compose configuration in the GitHub repository](https://github.com/bitmagnet-io/bitmagnet/blob/main/docker-compose.yml).
```yml
services:
@@ -77,7 +77,7 @@ You can also install **bitmagnet** natively with `go install github.com/bitmagne
The **bitmagnet** CLI is the entrypoint into the application. Take note of the command needed to run the CLI, depending on your installation method.
- If you are using the docker-compose example above, you can run the CLI (while the stack is started) with `docker exec -it bitmagnet /bitmagnet`.
- If you are using the docker-compose example above, you can run the CLI (while the stack is started) with `docker exec -it bitmagnet bitmagnet`.
- If you installed bitmagnet with `go install`, you can run the CLI with `bitmagnet`.
When referring to CLI commands in the rest of the documentation, for simplicity we will use `bitmagnet`; please substitute this for the correct command. For example, to show the CLI help, run:
@@ -23,7 +23,7 @@ This tutorial will show you how to backup, restore and merge **bitmagnet** datab
The following command will take a backup of the critical **bitmagnet** data and save it to a file named `export.sql`. (note this is not a full backup of the database which would include creation of tables, indexes etc.). By exporting with the `--data-only` flag the resulting file can be imported into a new or existing installation, after **bitmagnet** has run its migrations to set up the database and tables.
Please refer to [the `pg_dump` documentation](https://www.postgresql.org/docs/current/app-pgdump.html){:target="\_blank"} and ensure to specify the correct values (e.g. `host`, `username` and `password`) for the source database.
Please refer to [the `pg_dump` documentation](https://www.postgresql.org/docs/current/app-pgdump.html) and ensure to specify the correct values (e.g. `host`, `username` and `password`) for the source database.
```sh
pg_dump \
@@ -53,7 +53,7 @@ First, ensure you have a target **bitmagnet** instance up and running, _of the s
The following command will import the backup file into the target database, merging the data with any existing data.
Please refer to [the `psql` documentation](https://www.postgresql.org/docs/current/app-psql.html){:target="\_blank"} and ensure to specify the correct values (e.g. `host`, `username` and `password`) for the target database.
Please refer to [the `psql` documentation](https://www.postgresql.org/docs/current/app-psql.html) and ensure to specify the correct values (e.g. `host`, `username` and `password`) for the target database.
```sh
psql bitmagnet < backup.sql
+380
View File
@@ -0,0 +1,380 @@
---
title: Classifier
parent: Tutorials
layout: default
nav_order: 3
---
# Classifier
{: .note-title }
> tl;dr:
>
> The classifier can be configured and customized to do things like:
>
> - automatically delete torrents you don't want in your index
> - add custom tags to torrents you're interested in
> - customize the keywords and file extensions used for determining a torrent's content type
> - specify completely custom logic to classify and perform other actions on torrents
>
> Skip to [practical use cases and examples](#practical-use-cases-and-examples)
## Background
After a torrent is crawled or imported, some further processing must be done to gather metadata, have a guess at the torrent's contents and finally index it in the database, allowing it to be searched and displayed in the UI/API.
**bitmagnet**'s classifier is powered by a [Domain Specific Language](https://en.wikipedia.org/wiki/Domain-specific_language). The aim of this is to provide a high level of customisability, along with transparency into the classification process which will hopefully aid collaboration on improvements to the core classifier logic.
The classifier is declared in YAML format. The application includes a [core classifier](https://github.com/bitmagnet-io/bitmagnet/blob/main/internal/classifier/classifier.core.yaml) that can be configured, extended or completely replaced with a custom classifier. This page documents the required format.
## Source precedence
**bitmagnet** will attempt to load classifier source code from all the following locations. Any discovered classifier source will be merged with other sources in the following order of precedence:
- [the core classifier](https://github.com/bitmagnet-io/bitmagnet/blob/main/internal/classifier/classifier.core.yaml)
- `classifier.yml` in the [XDG-compliant](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) config location for the current user (for example on MacOS this is `~/Library/Application Support/bitmagnet/classifier.yml`)
- `classifier.yml` in the current working directory
- [Classifier configuration](#configuration)
Note that multiple sources will be merged, not replaced. For example, keywords added to the classifier configuration will be merged with the core keywords.
The merged classifier source can be viewed with the CLI command `bitmagnet classifier show`.
{% include callout_cli.md %}
## Schema
A [JSON schema for the classifier](https://bitmagnet.io/schemas/classifier-0.1.json) is available; some editors and IDEs will be able to validate the structure of your classifier document by specifying the `$schema` attribute:
```yaml
$schema: https://bitmagnet.io/schemas/classifier-0.1.json
```
The classifier schema can also be viewed by running the cli command `bitmagnet classifier schema`.
{% include callout_cli.md %}
The classifier declaration comprises the following components:
## Workflows
A workflow is a list of [actions](#actions) that will be executed on all torrents when they are classified. When no custom configuration is provided, the `default` workflow will be run. To use a different workflow instead, specify the `classifier.workflow` configuration option with the name of your custom workflow.
## Actions
An action is a piece of [workflow](#workflows) to be executed. All actions either return an updated classification result or an error.
For example, the following action will set the content type of the current torrent to `audiobook`:
```yaml
set_content_type: audiobook
```
The following action will return an `unmatched` error:
```yaml
unmatched
```
And the following action will delete the current torrent being classified (returning a `delete` error):
```yaml
delete
```
These actions aren't much use on their own - we'd want to check some conditions are satisfied before setting a content type or deleting a torrent, and for this we'd use the `if_else` action. For example, the following action will set the content type to `audiobook` if the torrent name contains audiobook-related keywords, and will otherwise return an `unmatched` error:
```yaml
if_else:
condition: "torrent.baseName.matches(keywords.audiobook)"
if_action:
set_content_type: audiobook
else_action: unmatched
```
The following action will delete a torrent if its name matches the list of`banned` keywords:
```yaml
if_else:
condition: "torrent.baseName.matches(keywords.banned)"
if_action: delete
```
Actions may return the following types of error:
- An `unmatched` error indicates that the current action did not match for the current torrent
- A `delete` error indicates that the torrent should be deleted
- An unhandled error may occur, for example if the TMDB API was unreachable
Whenever an error is returned, the current classification will be terminated.
Note that a workflow should never return an `unmatched` error. We expect to iterate through a series of checks corresponding to each content type. If the current torrent does not match the content type being checked, we'll proceed to the next check until we find a match; if no match can be found, the content type will be `unknown`. To facilitate this, we can use the `find_match` action.
The `find_match` action is a bit like a try/catch block in some programming languages; it will try to match a particular content type, and if an `unmatched` error is returned, it will catch the `unmatched` error proceed to the next check. For example, the following action will attempt to classify a torrent as an `audiobook`, and then as an `ebook`. If both checks fail, the content type will be `unknown`:
```yaml
find_match:
# match audiobooks:
- if_else:
condition: "torrent.baseName.matches(keywords.audiobook)"
if_action:
set_content_type: audiobook
else_action: unmatched
# match ebooks:
- if_else:
condition: "torrent.files.map(f, f.extension in extensions.ebook ? f.size : - f.size).sum() > 0"
if_action:
set_content_type: ebook
else_action: unmatched
```
For a full list of available actions, please refer to [the JSON schema](https://bitmagnet.io/schemas/classifier-0.1.json).
## Conditions
Conditions are used in conjunction with the `if_else` [action](#actions), in order to execute an action if a particular condition is satisfied.
The conditions in the examples above use [CEL (Common Expression Language) expressions](https://cel.dev/).
### The CEL environment
CEL is already a [well-documented](https://github.com/google/cel-spec/blob/master/doc/intro.md) language, so this page won't go into detail about the CEL syntax. In the context of the **bitmagnet** classifier, the CEL environment exposes a number of variables:
- `torrent`: The current torrent being classified (protobuf type: `bitmagnet.Torrent`)
- `result`: The current classification result (protobuf type: `bitmagnet.Classification`)
- `keywords`: A map of strings to regular expressions, representing named lists of [keywords](#keywords)
- `extensions`: A map of strings to string lists, representing named lists of [extensions](#extensions)
- `contentType`: A map of strings to enum values representing content types (e.g. `contentType.movie`, `contentType.music`)
- `fileType`: A map of strings to enum values representing file types (e.g. `fileType.video`, `fileType.audio`)
- `flags`: A map of strings to the configured values of [flags](#flags)
- `kb`, `mb`, `gb`: Variables defined for convenience, equal to the number of bytes in a kilobyte, megabyte and gigabyte respectively
For more details on the protocol buffer types, please refer to [the protobuf schema](https://github.com/bitmagnet-io/bitmagnet/blob/main/internal/protobuf/bitmagnet.proto).
### Boolean logic (`or`, `and` & `not`)
In addition to CEL expressions, conditions may be declared using the boolean logic operators `or`, `and` and `not`. For example, the following condition evaluates to true, if either the torrent consists mostly of file extensions very commonly used for music (e.g. `flac`), OR if the torrent both has a name that includes music-related keywords, and consists mostly of audio files:
```yaml
or:
- "torrent.files.map(f, f.extension in extensions.music ? f.size : - f.size).sum() > 0"
- and:
- "torrent.baseName.matches(keywords.music)"
- "torrent.files.map(f, f.fileType == fileType.audio ? f.size : - f.size).sum() > 0"
```
Note that we could also have specified the above condition using just one CEL expression, but breaking up complex conditions like this is more readable.
## Keywords
The classifier includes lists of keywords associated with different types of torrents. These aim to provide a simpler alternative to regular expressions, and the classifier will compile all keyword lists to regular expressions that can be used within CEL expressions. In order for a keyword to match, it must appear as an isolated token in the test string - that is, it must be either at the beginning or preceded by a non-word character, and either at the end or followed by a non-word character.
Reserved characters in the syntax are:
- parentheses `(` and `)` enclose a group
- `|` is an OR operator
- `*` is a wildcard operator
- `?` makes the previous character or group optional
- `+` specifies one or more of the previous character
- `#` specifies any number
- ` ` specifies any non-word or non-number character
For example, to define some music- and audiobook-related keywords:
```yaml
keywords:
music: # define music-related keywords
- music # all letters are case-insensitive, and must be defined in lowercase unless escaped
- discography
- album
- \V.?\A # escaped letters are case-sensitive; matches "VA", "V.A" and "V.A.", but not "va"
- various artists # matches "various artists" and "Various.Artists"
audiobook: # define audiobook-related keywords
- (audio)?books?
- (un)?abridged
- narrated
- novels?
- (auto)?biograph(y|ies) # matches "biography", "autobiographies" etc.
```
{: .note }
> If you'd rather use plain old regular expressions, the CEL syntax supports that too, for example `torrent.baseName.matches("^myregex$")`.
## Extensions
The classifier includes lists of file extensions associated with different types of content. For example, to identify torrents of type `comic` by their file extensions, the extensions are first declared:
```yaml
extensions:
comic:
- cb7
- cba
- cbr
- cbt
- cbz
```
The extensions can now be used as part of a condition within an `if_else` action:
```yaml
if_else:
condition: "torrent.files.map(f, f.extension in extensions.comic ? f.size : - f.size).sum() > 0"
if_action:
set_content_type: comic
else_action: unmatched
```
## Flags
Flags can be used to configure workflows. In order to use a flag in a workflow, it must first be defined. For example, the core classifier defines the following flags that are used in the `default` workflow:
```yaml
flag_definitions:
tmdb_enabled: bool
delete_content_types: content_type_list
delete_xxx: bool
```
These flags can be referenced within CEL expressions, for example to delete adult content if the `delete_xxx` flag is set to `true`:
```yaml
if_else:
condition: "flags.delete_xxx && result.contentType == contentType.xxx"
if_action: delete
```
## Configuration
The classifier can be customized by providing a `classifier.yml` file in a supported location [as described above](#source-precedence). If you only want to make some minor modifications, it may be convenient to specify these [using the main application configuration](/setup/configuration.html) instead, by providing values in either `config.yml` or as environment variables. The application configuration exposes some but not all properties of the classifier.
For example, in your `config.yml` you could specify:
```yaml
classifier:
# specify a custom workflow to be used:
workflow: custom
# add to the core list of music keywords:
keywords:
music:
- my-custom-music-keyword
# add a file extension to the list of audiobook-related extensions:
extensions:
audiobook:
- abc
# auto-delete all comics
flags:
delete_content_types:
- comics
```
Or as environment variables you could specify:
```sh
TMDB_ENABLED=false \ # disable the TMDB API integration
CLASSIFIER_WORKFLOW=custom \ # specify a custom workflow to be used
CLASSIFIER_DELETE_XXX=true \ # auto-delete all adult content
bitmagnet worker run --all
```
## Validation
The classifier source is compiled on initial load, and all structural and syntax errors should be caught at compile time. If there are errors in your classifier source, **bitmagnet** should exit with an error message indicating the location of the problem.
## Testing on individual torrents
You can test the classifier on an individual torrent or torrents using the `bitmagnet process` CLI command:
```sh
bitmagnet process --infoHash=aaaaaaaaaaaaaaaaaaaa --infoHash=bbbbbbbbbbbbbbbbbbbb
```
{% include callout_cli.md %}
## Reclassify all torrents
Read how to [reclassify all torrents](/tutorials/reprocess-reclassify.html).
## Practical use cases and examples
### Auto-delete specific content types
The default workflow provides a flag that allows for automatically deleting specific content types. For example, to delete all `comic`, `software` and `xxx` torrents:
```yaml
flags:
delete_content_types:
- comic
- software
- xxx
```
Auto-deleting adult content has been one of the most requested features. For convenience, this is exposed as the configuration option `classifier.delete_xxx`, and can be specified with the environment variable `CLASSIFIER_DELETE_XXX=true`.
### Auto-delete torrents containing specific keywords
Any torrents containing keywords in the `banned` list will be automatically deleted. This is primarily used for deleting <abbr title="Child Sexual Abuse Material">CSAM</abbr> content, but the list can be extended to auto-delete any other keywords:
```yaml
keywords:
banned:
- my-hated-keyword
```
### Disable the TMDB API integration
The `tmdb_enabled` flag can be used to disable the TMDB API integration:
```yaml
flags:
tmdb_enabled: false
```
For convenience, this is also exposed as the configuration option `tmdb.enabled`, and can be specified with the environment variable `TMDB_ENABLED=false`.
### Extend the default workflow with custom logic
Custom workflows can be added in the `workflows` section of the classifier document. It is possible to extend the default workflow by using the `run_workflow` action within your custom workflow, for example:
```yaml
workflows:
custom:
- <my custom action to be executed before the default workflow>
- run_workflow: default
- <my custom action to be executed after the default workflow>
```
A concrete example of this is adding tags to torrents based on custom criteria.
### Use tags to create custom torrent categories
Is there a category of torrent you're interested in that isn't captured by one of the core content types? Torrent tags are intended to capture custom categories and content types.
Let's imagine you'd like to surface torrents containing interesting documents. The interesting documents have specific file extensions, and their filenames contain specific keywords. Let's create a custom action to tag torrents containing interesting documents:
```yaml
# define file extensions for the documents we're interested in:
extensions:
interesting_documents:
- doc
- docx
- pdf
# define keywords that must be present in the filenames of the interesting documents:
keywords:
interesting_documents:
- interesting
- fascinating
# extend the default workflow with a custom workflow to tag torrents containing interesting documents:
workflows:
custom:
# first run the default workflow:
- run_workflow: default
# then add the tag to any torrents containing interesting documents:
- if_else:
condition: "torrent.files.filter(f, f.extension in extensions.interesting_documents && f.basePath.matches(keywords.interesting_documents)).size() > 0"
if_action:
add_tag: interesting-documents
```
To specify that the custom workflow should be used, remember to specify the `classifier.workflow` configuration option, e.g. `CLASSIFIER_WORKFLOW=custom bitmagnet worker run --all`.
+6 -6
View File
@@ -2,7 +2,7 @@
title: Import
parent: Tutorials
layout: default
nav_order: 4
nav_order: 5
redirect_from:
- /tutorials/importing.html
---
@@ -13,7 +13,7 @@ redirect_from:
> Important
>
> Before continuing with this tutorial, please [obtain and configure a personal TMDB API key]({% link setup/configuration.md %}#obtaining-a-tmdb-api-key).
> Before continuing with this tutorial, please [obtain and configure a personal TMDB API key, or disable the TMDB API integration]({% link setup/configuration.md %}#obtaining-a-tmdb-api-key).
**bitmagnet** includes an import endpoint at `/import`; this can be used for importing Torrent files from any source.
@@ -29,10 +29,10 @@ For the purposes of this tutorial we'll use the RARBG SQLite backup, but you can
> Pre-requisites
>
> - [x] You have [obtained and configured a personal TMDB API key]({% link setup/configuration.md %}#obtaining-a-tmdb-api-key)
> - [x] You have [obtained and configured a personal TMDB API key, or disabled the TMDB API integration]({% link setup/configuration.md %}#obtaining-a-tmdb-api-key)
> - [x] You have obtained a copy of the RARBG SQLite backup (I can't assist you in getting a copy of this, but it's generally available)
> - [x] You have [installed the SQLite3 CLI](https://www.tutorialspoint.com/sqlite/sqlite_installation.htm){:target="\_blank"}
> - [x] You have [installed jq](https://jqlang.github.io/jq/download/){:target="\_blank"}
> - [x] You have [installed the SQLite3 CLI](https://www.tutorialspoint.com/sqlite/sqlite_installation.htm)
> - [x] You have [installed jq](https://jqlang.github.io/jq/download/)
Let's start by write a SQLite query in a file named `rarbg-import.sql`. This will extract the data we need and get it looking a bit more like the format that **bitmagnet** expects. The following is a starting point, please adapt it to your requirements:
@@ -111,7 +111,7 @@ sqlite3 -json -batch /path/to/your/rarbg_db.sqlite "$(cat rarbg-import.sql)" \
So what's happening here?
- First we are executing the SQL query we made above against the backup database; we tell SQLite to output the result as JSON. To test this bit in isolation you might try running just `sqlite3 -json -batch /path/to/your/rarbg_db.sqlite "$(cat rarbg-import.sql)"` (while testing you'll probably want to `limit` your results to say 10 or 100)
- Next we need to make some tweaks to the JSON structure, so we'll pipe the result into [jq](https://jqlang.github.io/jq/){:target="\_blank"}. You can add the line beginning `| jq` to the previous part to test what we have so far. Here we will:
- Next we need to make some tweaks to the JSON structure, so we'll pipe the result into [jq](https://jqlang.github.io/jq/). You can add the line beginning `| jq` to the previous part to test what we have so far. Here we will:
- Add a `source` field with value `rarbg`: each torrent stored in **bitmagnet** is associated with one or more sources, this association allows filtering by source within the search facility, and can carry some source-specific information such as an import ID, and numbers of seeders and leechers (more docs needed here!)
- Add the `contentSource` and `contentId` fields which **bitmagnet** expects, containing the IMDB ID, if it exists; these are not a required field, but if you know the external IMDB or TMDB ID of your content then it will give the classifier an easier job
- Delete the `imdb` field which won't be recognised by **bitmagnet**
@@ -2,9 +2,7 @@
title: Reprocess & Reclassify
parent: Tutorials
layout: default
nav_order: 3
redirect_from:
- /tutorials/importing.html
nav_order: 4
---
# Reprocess & Reclassify Torrents
@@ -28,6 +26,5 @@ To reprocess all torrents in your index, simply run `bitmagnet reprocess`. If yo
- `classifyMode`: This controls how already matched torrents are handled. A torrent is "matched" if it's associated with a specific piece of content from one of the API integrations (currently only TMDB). Making a lot of API calls can take a long time, so if items are already matched you might want to just do the other processing steps without re-matching them. The available modes are:
- `default`: Only attempt to match previously unmatched torrents
- `rematch`: Ignore any pre-existing match and always classify from scratch
- `skip`: Skip classification for previously unmatched torrents, unless they have a hint\* attached to them.
\*hints tell the classifier to use the hinted information instead of any classification results, which can save a lot of work for the classifier and help fix errors. Currently, the only way to add hints is by using [the `/import` endpoint](/tutorials/import.html).
@@ -2,12 +2,12 @@
title: Servarr Integration
parent: Tutorials
layout: default
nav_order: 5
nav_order: 6
---
# Servarr Integration
**bitmagnet**'s HTTP server exposes an endpoint at `/torznab`, allowing it to integrate with any application that supports [the Torznab specification](https://torznab.github.io/spec-1.3-draft/index.html){:target="\_blank"}, most notably apps in [the Servarr stack](https://wiki.servarr.com/){:target="\_blank"} (Prowlarr, Sonarr, Radarr etc.).
**bitmagnet**'s HTTP server exposes an endpoint at `/torznab`, allowing it to integrate with any application that supports [the Torznab specification](https://torznab.github.io/spec-1.3-draft/index.html), most notably apps in [the Servarr stack](https://wiki.servarr.com/) (Prowlarr, Sonarr, Radarr etc.).
## Adding **bitmagnet** as an indexer in Prowlarr
@@ -19,4 +19,4 @@ The required settings are fairly basic. Assuming you've adapted from the [exampl
![Prowlarr configure bitmagnet](/assets/images/prowlarr-2.png)
[Depending on your Prowlarr configuration](https://wiki.servarr.com/prowlarr/settings#applications){:target="\_blank"}, the **bitmagnet** indexer should now be synced to your other \*arr applications. Alternatively, you can add **bitmagnet** as an indexer directly in those applications, following the same steps as above.
[Depending on your Prowlarr configuration](https://wiki.servarr.com/prowlarr/settings#applications), the **bitmagnet** indexer should now be synced to your other \*arr applications. Alternatively, you can add **bitmagnet** as an indexer directly in those applications, following the same steps as above.
+11 -2
View File
@@ -16,6 +16,7 @@ require (
github.com/gin-gonic/gin v1.9.1
github.com/go-playground/validator/v10 v10.17.0
github.com/go-resty/resty/v2 v2.11.0
github.com/google/cel-go v0.20.0
github.com/grafana/pyroscope-go/godeltaprof v0.1.7
github.com/hashicorp/golang-lru/v2 v2.0.7
github.com/hedhyw/rex v0.6.0
@@ -37,12 +38,14 @@ require (
github.com/urfave/cli/v2 v2.27.1
github.com/vektah/gqlparser/v2 v2.5.11
github.com/vektra/mockery/v2 v2.40.1
github.com/xeipuuv/gojsonschema v1.2.0
go.uber.org/fx v1.20.1
go.uber.org/zap v1.26.0
golang.org/x/sync v0.6.0
golang.org/x/sys v0.16.0
golang.org/x/text v0.14.0
golang.org/x/time v0.5.0
google.golang.org/protobuf v1.33.0
gopkg.in/yaml.v3 v3.0.1
gorm.io/driver/postgres v1.5.4
gorm.io/gen v0.3.25
@@ -63,6 +66,7 @@ require (
github.com/anacrolix/multiless v0.3.1-0.20221221005021-2d12701f83f7 // indirect
github.com/anacrolix/stm v0.5.0 // indirect
github.com/anacrolix/sync v0.5.1 // indirect
github.com/antlr4-go/antlr/v4 v4.13.0 // indirect
github.com/benbjohnson/immutable v0.4.3 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/bits-and-blooms/bitset v1.13.0 // indirect
@@ -84,6 +88,7 @@ require (
github.com/go-sql-driver/mysql v1.7.1 // indirect
github.com/goccy/go-json v0.10.2 // indirect
github.com/golang/mock v1.6.0 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/gorilla/websocket v1.5.1 // indirect
@@ -121,7 +126,7 @@ require (
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/common v0.46.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/rivo/uniseg v0.4.4 // indirect
github.com/rivo/uniseg v0.4.7 // indirect
github.com/rogpeppe/go-internal v1.11.0 // indirect
github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529 // indirect
github.com/rs/zerolog v1.31.0 // indirect
@@ -137,10 +142,13 @@ require (
github.com/spf13/cobra v1.8.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/spf13/viper v1.18.2 // indirect
github.com/stoewer/go-strcase v1.2.0 // indirect
github.com/stretchr/objx v0.5.0 // indirect
github.com/subosito/gotenv v1.6.0 // indirect
github.com/twitchyliquid64/golang-asm v0.15.1 // indirect
github.com/ugorji/go/codec v1.2.12 // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
github.com/xrash/smetrics v0.0.0-20231213231151-1d8dd44e695e // indirect
go.opentelemetry.io/otel v1.22.0 // indirect
go.opentelemetry.io/otel/trace v1.22.0 // indirect
@@ -156,7 +164,8 @@ require (
golang.org/x/term v0.16.0 // indirect
golang.org/x/tools v0.17.0 // indirect
golang.org/x/tools/cmd/cover v0.1.0-deprecated // indirect
google.golang.org/protobuf v1.32.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20231106174013-bbf56f31fb17 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20231211222908-989df2bf70f3 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gorm.io/datatypes v1.2.0 // indirect
+15 -6
View File
@@ -92,6 +92,8 @@ github.com/andybalholm/brotli v1.0.6 h1:Yf9fFpf49Zrxb9NlQaluyE92/+X7UVHlhMNJN2sx
github.com/andybalholm/brotli v1.0.6/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
github.com/apache/thrift v0.12.0/go.mod h1:cp2SuWMxlEZw2r+iP2GNCdIi4C1qmUzdZFSVb+bacwQ=
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0 h1:jfIu9sQUG6Ig+0+Ap1h4unLjW6YQJpKZVmUzxsD4E/Q=
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
@@ -239,12 +241,14 @@ github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:x
github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs=
github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w=
github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0=
github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg=
github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q=
github.com/google/btree v0.0.0-20180124185431-e89373fe6b4a/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
github.com/google/cel-go v0.20.0 h1:h4n6DOCppEMpWERzllyNkntl7JrDyxoE543KWS6BLpc=
github.com/google/cel-go v0.20.0/go.mod h1:kWcIzTsPX0zmQ+H3TirHstLLf9ep5QTsZBN9u4dOYLg=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
@@ -452,8 +456,9 @@ github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqn
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis=
github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M=
github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA=
@@ -507,6 +512,8 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg=
github.com/spf13/viper v1.18.2 h1:LUXCnvUvSM6FXAsj6nnfc8Q2tp1dIgUfY9Kc8GsSOiQ=
github.com/spf13/viper v1.18.2/go.mod h1:EKmWIqdnk5lOcmR72yw6hS+8OPYcwD0jteitLMVB+yk=
github.com/stoewer/go-strcase v1.2.0 h1:Z2iHWqGXH00XYgqDmNgQbIBxf3wrNq0F3feEy0ainaU=
github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag1KpM8ahLw8=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
@@ -547,6 +554,7 @@ github.com/vertica/vertica-sql-go v1.3.3 h1:fL+FKEAEy5ONmsvya2WH5T8bhkvY27y/Ik3R
github.com/vertica/vertica-sql-go v1.3.3/go.mod h1:jnn2GFuv+O2Jcjktb7zyc4Utlbu9YVqpHH/lx63+1M4=
github.com/willf/bitset v1.1.9/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
github.com/willf/bitset v1.1.10/go.mod h1:RjeCKbqT1RxIR/KWY6phxZiaY1IyutSBfGjNPySAYV4=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 h1:EzJWgHovont7NscjpAxXsDA8S8BMYve8Y5+7cuRE7R0=
@@ -716,7 +724,8 @@ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7
google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc=
google.golang.org/genproto v0.0.0-20190307195333-5fe7a883aa19/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20190425155659-357c62f0e4bb/go.mod h1:VzzqZJRnGkLBvHegQrXjBqPurQTc5/KpmUdxsrq26oE=
google.golang.org/genproto v0.0.0-20231106174013-bbf56f31fb17 h1:wpZ8pe2x1Q3f2KyT5f8oP/fa9rHAKgFPr/HZdNuS+PQ=
google.golang.org/genproto/googleapis/api v0.0.0-20231106174013-bbf56f31fb17 h1:JpwMPBpFN3uKhdaekDpiNlImDdkUAyiJ6ez/uxGaUSo=
google.golang.org/genproto/googleapis/api v0.0.0-20231106174013-bbf56f31fb17/go.mod h1:0xJLfVdJqpAPl8tDg1ujOCGzx6LFLttXT5NhllGOXY4=
google.golang.org/genproto/googleapis/rpc v0.0.0-20231211222908-989df2bf70f3 h1:kzJAXnzZoFbe5bhZd4zjUuHos/I31yH4thfMb/13oVY=
google.golang.org/genproto/googleapis/rpc v0.0.0-20231211222908-989df2bf70f3/go.mod h1:eJVxU6o+4G1PSczBr85xmyvSNYAKvAYgkub40YGomFM=
google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
@@ -729,8 +738,8 @@ google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ
google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM=
google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE=
google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo=
google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I=
google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+5 -3
View File
@@ -1,8 +1,9 @@
package appfx
import (
"github.com/bitmagnet-io/bitmagnet/internal/app/cmd/classifiercmd"
"github.com/bitmagnet-io/bitmagnet/internal/app/cmd/processcmd"
"github.com/bitmagnet-io/bitmagnet/internal/app/cmd/reprocesscmd"
"github.com/bitmagnet-io/bitmagnet/internal/app/cmd/torrentcmd"
"github.com/bitmagnet-io/bitmagnet/internal/blocking/blockingfx"
"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/app/boilerplateappfx"
"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/httpserver/httpserverfx"
@@ -29,7 +30,6 @@ func New() fx.Option {
"app",
blockingfx.New(),
boilerplateappfx.New(),
classifierfx.New(),
dhtcrawlerfx.New(),
dhtfx.New(),
databasefx.New(),
@@ -43,10 +43,12 @@ func New() fx.Option {
tmdbfx.New(),
torznabfx.New(),
versionfx.New(),
classifierfx.New(),
// cli commands:
fx.Provide(
classifiercmd.New,
reprocesscmd.New,
torrentcmd.New,
processcmd.New,
),
fx.Provide(webui.New),
fx.Decorate(migrations.NewDecorator),
+81
View File
@@ -0,0 +1,81 @@
package classifiercmd
import (
"encoding/json"
"fmt"
"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"github.com/urfave/cli/v2"
"go.uber.org/fx"
"gopkg.in/yaml.v3"
"io"
)
type Params struct {
fx.In
WorkflowSource lazy.Lazy[classifier.Source]
}
type Result struct {
fx.Out
Command *cli.Command `group:"commands"`
}
var formatFlag = cli.StringFlag{
Name: "format",
Usage: "Output format (json or yaml)",
Value: "yaml",
}
func New(p Params) (Result, error) {
return Result{Command: &cli.Command{
Name: "classifier",
Subcommands: []*cli.Command{
{
Name: "show",
Usage: "Show the classifier workflow source",
Flags: []cli.Flag{
&formatFlag,
},
Action: func(ctx *cli.Context) error {
src, srcErr := p.WorkflowSource.Get()
if srcErr != nil {
return srcErr
}
return write(ctx.App.Writer, src, ctx.String("format"))
},
},
{
Name: "schema",
Usage: "Show the classifier JSON schema",
Flags: []cli.Flag{
&formatFlag,
},
Action: func(ctx *cli.Context) error {
return write(ctx.App.Writer, classifier.DefaultJsonSchema(), ctx.String("format"))
},
},
},
}}, nil
}
func write(writer io.Writer, src any, format string) error {
var (
output []byte
outputErr error
)
switch format {
case "json":
output, outputErr = json.MarshalIndent(src, "", " ")
output = append(output, '\n')
case "yaml":
output, outputErr = yaml.Marshal(src)
default:
outputErr = fmt.Errorf("unsupported format: %s", format)
}
if outputErr != nil {
return outputErr
}
_, writeErr := writer.Write(output)
return writeErr
}
+54
View File
@@ -0,0 +1,54 @@
package processcmd
import (
"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
"github.com/bitmagnet-io/bitmagnet/internal/processor"
"github.com/bitmagnet-io/bitmagnet/internal/protocol"
"github.com/urfave/cli/v2"
"go.uber.org/fx"
"go.uber.org/zap"
)
type Params struct {
fx.In
Processor lazy.Lazy[processor.Processor]
Logger *zap.SugaredLogger
}
type Result struct {
fx.Out
Command *cli.Command `group:"commands"`
}
func New(p Params) (Result, error) {
return Result{Command: &cli.Command{
Name: "process",
Flags: []cli.Flag{
&cli.StringSliceFlag{
Name: "infoHash",
},
},
Action: func(ctx *cli.Context) error {
pr, err := p.Processor.Get()
if err != nil {
return err
}
var infoHashes []protocol.ID
for _, infoHash := range ctx.StringSlice("infoHash") {
id, err := protocol.ParseID(infoHash)
if err != nil {
return err
}
infoHashes = append(infoHashes, id)
}
if err != nil {
return err
}
return pr.Process(ctx.Context, processor.MessageParams{
ClassifyMode: processor.ClassifyModeRematch,
InfoHashes: infoHashes,
})
},
},
}, nil
}
+1 -4
View File
@@ -45,8 +45,7 @@ func New(p Params) (Result, error) {
Name: "classifyMode",
Value: "default",
Usage: "default (only attempt to match previously unmatched torrents);\n" +
"rematch (ignore any pre-existing match and always classify from scratch);\n" +
"skip (skip classification for previously unmatched torrents that don't have any hint)",
"rematch (ignore any pre-existing match and always classify from scratch)",
},
},
Action: func(ctx *cli.Context) error {
@@ -56,8 +55,6 @@ func New(p Params) (Result, error) {
classifyMode = processor.ClassifyModeDefault
case "rematch":
classifyMode = processor.ClassifyModeRematch
case "skip":
classifyMode = processor.ClassifyModeSkipUnmatched
default:
return cli.Exit("invalid classifyMode", 1)
}
-88
View File
@@ -1,88 +0,0 @@
package torrentcmd
import (
"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
"github.com/bitmagnet-io/bitmagnet/internal/processor"
"github.com/bitmagnet-io/bitmagnet/internal/protocol"
"github.com/bitmagnet-io/bitmagnet/internal/protocol/metainfo/metainforequester"
"github.com/urfave/cli/v2"
"go.uber.org/fx"
"go.uber.org/zap"
"net/netip"
)
type Params struct {
fx.In
MetaInfoRequester metainforequester.Requester
Processor lazy.Lazy[processor.Processor]
Logger *zap.SugaredLogger
}
type Result struct {
fx.Out
Command *cli.Command `group:"commands"`
}
func New(p Params) (Result, error) {
return Result{Command: &cli.Command{
Name: "torrent",
Subcommands: []*cli.Command{
{
Name: "process",
Flags: []cli.Flag{
&cli.StringSliceFlag{
Name: "infoHash",
},
},
Action: func(ctx *cli.Context) error {
pr, err := p.Processor.Get()
if err != nil {
return err
}
var infoHashes []protocol.ID
for _, infoHash := range ctx.StringSlice("infoHash") {
id, err := protocol.ParseID(infoHash)
if err != nil {
return err
}
infoHashes = append(infoHashes, id)
}
if err != nil {
return err
}
return pr.Process(ctx.Context, processor.MessageParams{
ClassifyMode: processor.ClassifyModeRematch,
InfoHashes: infoHashes,
})
},
},
{
Name: "requestMetaInfo",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "infoHash",
},
&cli.StringFlag{
Name: "address",
},
},
Action: func(ctx *cli.Context) error {
infoHash, err := protocol.ParseID(ctx.String("infoHash"))
if err != nil {
return err
}
addr, err := netip.ParseAddrPort(ctx.String("address"))
if err != nil {
return err
}
info, err := p.MetaInfoRequester.Request(ctx.Context, protocol.ID(infoHash), addr)
if err != nil {
return err
}
p.Logger.Infow("got infoBytes", "info", info)
return nil
},
},
},
}}, nil
}
@@ -117,7 +117,7 @@ func CustomRecoveryWithZap(logger ZapLogger, stack bool, recovery gin.RecoveryFu
return func(c *gin.Context) {
defer func() {
if err := recover(); err != nil {
// Check for a broken connection, as it is not really a
// check for a broken connection, as it is not really a
// condition that warrants a panic stack trace.
var brokenPipe bool
if ne, ok := err.(*net.OpError); ok {
+72
View File
@@ -0,0 +1,72 @@
package classifier
import (
"errors"
"fmt"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
)
func actions(defs ...actionDefinition) feature {
return func(c *features) {
c.actions = append(c.actions, defs...)
}
}
type actionCompiler interface {
compileAction(ctx compilerContext) (action, error)
}
type actionDefinition interface {
HasJsonSchema
name() string
actionCompiler
}
func (c compilerContext) compileAction(ctx compilerContext) (action, error) {
var rawActions []any
isArray := false
if s, ok := ctx.source.([]any); ok {
rawActions = s
isArray = true
} else {
rawActions = []any{ctx.source}
}
var actions []action
var errs []error
outer:
for i, rawAction := range rawActions {
actionCtx := ctx
if isArray {
actionCtx = ctx.child(numericPathPart(i), rawAction)
}
for _, def := range c.actions {
a, err := def.compileAction(actionCtx.child(def.name(), rawAction))
if err == nil {
actions = append(actions, a)
continue outer
} else {
if asFatalCompilerError(err) != nil {
return action{}, err
}
}
}
errs = append(errs, fmt.Errorf("no action matched: %v", ctx.source))
}
if len(errs) > 0 {
return action{}, errors.Join(errs...)
}
return action{func(ctx executionContext) (classification.Result, error) {
for _, a := range actions {
result, err := a.run(ctx)
if err != nil {
return classification.Result{}, err
}
ctx = ctx.withResult(result)
}
return ctx.result, nil
}}, errors.Join(errs...)
}
type action struct {
run func(executionContext) (classification.Result, error)
}
+61
View File
@@ -0,0 +1,61 @@
package classifier
import (
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
"github.com/bitmagnet-io/bitmagnet/internal/model"
)
const addTagName = "add_tag"
type addTagAction struct{}
func (addTagAction) name() string {
return addTagName
}
var tagPayloadSpec = payloadTransformer[string, string]{
spec: payloadGeneric[string]{
jsonSchema: JsonSchema{
"type": "string",
},
},
transform: func(str string, _ compilerContext) (string, error) {
if err := model.ValidateTagName(str); err != nil {
return "", err
}
return str, nil
},
}
var addTagPayloadSpec = payloadSingleKeyValue[[]string]{
key: addTagName,
valueSpec: payloadMustSucceed[[]string]{
payloadList[string]{
itemSpec: tagPayloadSpec,
},
},
description: "Add one or more tags to the current torrent",
}
func (addTagAction) compileAction(ctx compilerContext) (action, error) {
tags, err := addTagPayloadSpec.Unmarshal(ctx)
if err != nil {
return action{}, ctx.error(err)
}
return action{
func(ctx executionContext) (classification.Result, error) {
cl := ctx.result
if cl.Tags == nil {
cl.Tags = make(map[string]struct{})
}
for _, tag := range tags {
cl.Tags[tag] = struct{}{}
}
return cl, nil
},
}, nil
}
func (addTagAction) JsonSchema() JsonSchema {
return addTagPayloadSpec.JsonSchema()
}
@@ -0,0 +1,47 @@
package classifier
import (
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
"github.com/bitmagnet-io/bitmagnet/internal/model"
)
const attachLocalContentByIdName = "attach_local_content_by_id"
type attachLocalContentByIdAction struct{}
func (attachLocalContentByIdAction) name() string {
return attachLocalContentByIdName
}
var attachLocalContentByIdPayloadSpec = payloadLiteral[string]{
literal: attachLocalContentByIdName,
description: "Use the torrent hint to attach locally stored content by ID",
}
func (a attachLocalContentByIdAction) compileAction(ctx compilerContext) (action, error) {
if _, err := attachLocalContentByIdPayloadSpec.Unmarshal(ctx); err != nil {
return action{}, ctx.error(err)
}
return action{
run: func(ctx executionContext) (classification.Result, error) {
cl := ctx.result
if ctx.torrent.Hint.IsNil() || !ctx.torrent.Hint.ContentSource.Valid {
return cl, classification.ErrUnmatched
}
content, err := ctx.search.ContentById(ctx, model.ContentRef{
Type: ctx.torrent.Hint.ContentType,
Source: ctx.torrent.Hint.ContentSource.String,
ID: ctx.torrent.Hint.ContentID.String,
})
if err != nil {
return cl, err
}
cl.AttachContent(&content)
return cl, nil
},
}, nil
}
func (a attachLocalContentByIdAction) JsonSchema() JsonSchema {
return attachLocalContentByIdPayloadSpec.JsonSchema()
}
@@ -0,0 +1,42 @@
package classifier
import (
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
)
const attachLocalContentBySearchName = "attach_local_content_by_search"
type attachLocalContentBySearchAction struct{}
func (attachLocalContentBySearchAction) name() string {
return attachLocalContentBySearchName
}
var attachLocalContentBySearchPayloadSpec = payloadLiteral[string]{
literal: attachLocalContentBySearchName,
description: "Attempt to attach local content with a search on the torrent name",
}
func (a attachLocalContentBySearchAction) compileAction(ctx compilerContext) (action, error) {
if _, err := attachLocalContentBySearchPayloadSpec.Unmarshal(ctx); err != nil {
return action{}, ctx.error(err)
}
return action{
run: func(ctx executionContext) (classification.Result, error) {
cl := ctx.result
if !cl.ContentType.Valid || !cl.BaseTitle.Valid {
return cl, classification.ErrUnmatched
}
content, err := ctx.search.ContentBySearch(ctx.Context, cl.ContentType.ContentType, cl.BaseTitle.String, cl.Date.Year)
if err != nil {
return cl, err
}
cl.AttachContent(&content)
return cl, nil
},
}, nil
}
func (attachLocalContentBySearchAction) JsonSchema() JsonSchema {
return attachLocalContentBySearchPayloadSpec.JsonSchema()
}
@@ -0,0 +1,78 @@
package classifier
import (
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"strconv"
)
const attachTmdbContentByIdName = "attach_tmdb_content_by_id"
type attachTmdbContentByIdAction struct{}
func (attachTmdbContentByIdAction) name() string {
return attachTmdbContentByIdName
}
var attachTmdbContentByIdPayloadSpec = payloadLiteral[string]{
literal: attachTmdbContentByIdName,
description: "Use the torrent hint to attach content from the TMDB API by ID",
}
func (a attachTmdbContentByIdAction) compileAction(ctx compilerContext) (action, error) {
if _, err := attachTmdbContentByIdPayloadSpec.Unmarshal(ctx); err != nil {
return action{}, ctx.error(err)
}
return action{
run: func(ctx executionContext) (classification.Result, error) {
cl := ctx.result
var ref model.ContentRef
if maybeRef := ctx.torrent.Hint.ContentRef(); !maybeRef.Valid {
return cl, classification.ErrUnmatched
} else {
ref = maybeRef.Val
}
if cl.ContentType.Valid {
ref.Type = cl.ContentType.ContentType
}
var tmdbId int64
switch ref.Source {
case model.SourceTmdb:
if id, err := strconv.Atoi(ref.ID); err != nil {
return cl, classification.ErrUnmatched
} else {
tmdbId = int64(id)
}
default:
if id, err := ctx.tmdb_getTmdbIdByExternalId(ref); err != nil {
return cl, err
} else {
tmdbId = id
}
}
var content *model.Content
switch ref.Type {
case model.ContentTypeMovie, model.ContentTypeXxx:
if c, err := ctx.tmdb_getMovieByTmbdId(tmdbId); err != nil {
return cl, err
} else {
content = &c
}
case model.ContentTypeTvShow:
if c, err := ctx.tmdb_getTvShowByTmbdId(tmdbId); err != nil {
return cl, err
} else {
content = &c
}
default:
return cl, classification.ErrUnmatched
}
cl.AttachContent(content)
return cl, nil
},
}, nil
}
func (attachTmdbContentByIdAction) JsonSchema() JsonSchema {
return attachTmdbContentByIdPayloadSpec.JsonSchema()
}
@@ -0,0 +1,57 @@
package classifier
import (
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
"github.com/bitmagnet-io/bitmagnet/internal/model"
)
const attachTmdbContentBySearchName = "attach_tmdb_content_by_search"
type attachTmdbContentBySearchAction struct{}
func (attachTmdbContentBySearchAction) name() string {
return attachTmdbContentBySearchName
}
var attachTmdbContentBySearchPayloadSpec = payloadLiteral[string]{
literal: attachTmdbContentBySearchName,
description: "Attempt to attach content from the TMDB API with a search on the torrent name",
}
func (a attachTmdbContentBySearchAction) compileAction(ctx compilerContext) (action, error) {
if _, err := attachTmdbContentBySearchPayloadSpec.Unmarshal(ctx); err != nil {
return action{}, ctx.error(err)
}
return action{
run: func(ctx executionContext) (classification.Result, error) {
cl := ctx.result
if !cl.BaseTitle.Valid {
return cl, classification.ErrUnmatched
}
var content *model.Content
switch cl.ContentType.ContentType {
case model.ContentTypeTvShow:
if result, searchErr := ctx.tmdb_searchTvShow(cl.BaseTitle.String, cl.Date.Year); searchErr != nil {
return cl, searchErr
} else {
content = &result
}
default:
if len(cl.Episodes) > 0 {
return cl, classification.ErrUnmatched
}
if result, searchErr := ctx.tmdb_searchMovie(cl.BaseTitle.String, cl.Date.Year); searchErr != nil {
return cl, searchErr
} else {
content = &result
}
}
cl.AttachContent(content)
return cl, nil
},
}, nil
}
func (attachTmdbContentBySearchAction) JsonSchema() JsonSchema {
return attachTmdbContentBySearchPayloadSpec.JsonSchema()
}
+32
View File
@@ -0,0 +1,32 @@
package classifier
import "github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
const deleteName = "delete"
type deleteAction struct{}
func (deleteAction) name() string {
return deleteName
}
var deletePayloadSpec = payloadLiteral[string]{
literal: deleteName,
description: "Delete the current torrent",
}
func (deleteAction) compileAction(ctx compilerContext) (action, error) {
if _, err := deletePayloadSpec.Unmarshal(ctx); err != nil {
return action{}, ctx.error(err)
}
path := ctx.path
return action{
run: func(ctx executionContext) (classification.Result, error) {
return ctx.result, classification.RuntimeError{Cause: classification.ErrDeleteTorrent, Path: path}
},
}, nil
}
func (deleteAction) JsonSchema() JsonSchema {
return deletePayloadSpec.JsonSchema()
}
+63
View File
@@ -0,0 +1,63 @@
package classifier
import (
"errors"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
)
const findMatchName = "find_match"
type findMatchAction struct{}
func (findMatchAction) name() string {
return findMatchName
}
var findMatchActionPayloadSpec = payloadSingleKeyValue[[]any]{
key: findMatchName,
valueSpec: payloadMustSucceed[[]any]{payloadList[any]{itemSpec: payloadGeneric[any]{
jsonSchema: map[string]any{
"$ref": "#/definitions/action_single",
},
}}},
description: "Iterate through a series of actions to find the first that does not return an unmatched error",
}
func (findMatchAction) compileAction(ctx compilerContext) (action, error) {
payload, err := findMatchActionPayloadSpec.Unmarshal(ctx)
if err != nil {
return action{}, ctx.error(err)
}
actions := make([]action, len(payload))
for i, actionPayload := range payload {
a, err := ctx.compileAction(ctx.child(numericPathPart(i), actionPayload))
if err != nil {
return action{}, err
}
actions[i] = a
}
path := ctx.path
return action{
func(ctx executionContext) (classification.Result, error) {
for _, action := range actions {
result, err := action.run(ctx)
if err != nil {
if errors.Is(err, classification.ErrUnmatched) {
continue
}
return classification.Result{}, classification.RuntimeError{
Cause: err,
Path: path,
}
} else {
return result, nil
}
}
return ctx.result, nil
},
}, nil
}
func (findMatchAction) JsonSchema() JsonSchema {
return findMatchActionPayloadSpec.JsonSchema()
}
+86
View File
@@ -0,0 +1,86 @@
package classifier
import "github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
type ifElseAction struct{}
const ifElseName = "if_else"
func (ifElseAction) name() string {
return ifElseName
}
type ifElsePayload struct {
Condition any
IfAction any
ElseAction any
}
var ifElsePayloadSpec = payloadSingleKeyValue[ifElsePayload]{
key: ifElseName,
valueSpec: payloadMustSucceed[ifElsePayload]{payloadStruct[ifElsePayload]{
jsonSchema: map[string]any{
"type": "object",
"properties": map[string]any{
"condition": map[string]any{
"$ref": "#/definitions/condition",
},
"if_action": map[string]any{
"$ref": "#/definitions/action",
},
"else_action": map[string]any{
"$ref": "#/definitions/action",
},
},
"required": []string{"condition"},
"additionalProperties": false,
},
}},
description: "Execute an action based on a condition",
}
func (ifElseAction) compileAction(ctx compilerContext) (action, error) {
p, decodeErr := ifElsePayloadSpec.Unmarshal(ctx)
if decodeErr != nil {
return action{}, ctx.error(decodeErr)
}
cond, cErr := ctx.compileCondition(ctx.child("condition", p.Condition))
if cErr != nil {
return action{}, ctx.error(cErr)
}
var ifAction, elseAction action
if p.IfAction != nil {
pIfAction, ifErr := ctx.compileAction(ctx.child("if_action", p.IfAction))
if ifErr != nil {
return action{}, ctx.error(ifErr)
}
ifAction = pIfAction
}
if p.ElseAction != nil {
pElseAction, err := ctx.compileAction(ctx.child("else_action", p.ElseAction))
if err != nil {
return action{}, ctx.error(err)
}
elseAction = pElseAction
}
return action{
run: func(ctx executionContext) (classification.Result, error) {
if result, err := cond.check(ctx); err != nil {
return classification.Result{}, err
} else if result {
if ifAction.run != nil {
return ifAction.run(ctx)
}
} else {
if elseAction.run != nil {
return elseAction.run(ctx)
}
}
return ctx.result, nil
},
}, nil
}
func (ifElseAction) JsonSchema() JsonSchema {
return ifElsePayloadSpec.JsonSchema()
}
+40
View File
@@ -0,0 +1,40 @@
package classifier
import (
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/parsers"
)
const parseDateName = "parse_date"
type parseDateAction struct{}
func (parseDateAction) name() string {
return parseDateName
}
var parseDatePayloadSpec = payloadLiteral[string]{
literal: parseDateName,
description: "Try to parse a date from the name of the current torrent",
}
func (parseDateAction) compileAction(ctx compilerContext) (action, error) {
if _, err := parseDatePayloadSpec.Unmarshal(ctx); err != nil {
return action{}, ctx.error(err)
}
return action{
run: func(ctx executionContext) (classification.Result, error) {
parsed := parsers.ParseDate(ctx.torrent.Name)
if parsed.IsNil() {
return ctx.result, classification.ErrUnmatched
}
cl := ctx.result
cl.Date = parsed
return cl, nil
},
}, nil
}
func (parseDateAction) JsonSchema() JsonSchema {
return parseDatePayloadSpec.JsonSchema()
}
@@ -0,0 +1,40 @@
package classifier
import (
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/parsers"
)
const parseVideoContentName = "parse_video_content"
type parseVideoContentAction struct{}
func (parseVideoContentAction) name() string {
return parseVideoContentName
}
var parseVideoContentPayloadSpec = payloadLiteral[string]{
literal: parseVideoContentName,
description: "Parse video-related attributes from the name of the current torrent",
}
func (parseVideoContentAction) compileAction(ctx compilerContext) (action, error) {
if _, err := parseVideoContentPayloadSpec.Unmarshal(ctx); err != nil {
return action{}, ctx.error(err)
}
return action{
run: func(ctx executionContext) (classification.Result, error) {
parsed, err := parsers.ParseVideoContent(ctx.torrent, ctx.result)
cl := ctx.result
if err != nil {
return cl, err
}
cl.Merge(parsed)
return cl, nil
},
}, nil
}
func (parseVideoContentAction) JsonSchema() JsonSchema {
return parseVideoContentPayloadSpec.JsonSchema()
}
@@ -0,0 +1,58 @@
package classifier
import (
"fmt"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
)
const runWorkflowName = "run_workflow"
type runWorkflowAction struct{}
func (runWorkflowAction) name() string {
return runWorkflowName
}
var runWorkflowPayloadSpec = payloadSingleKeyValue[[]string]{
key: runWorkflowName,
valueSpec: payloadMustSucceed[[]string]{
payloadList[string]{
itemSpec: payloadGeneric[string]{
jsonSchema: map[string]interface{}{
"type": "string",
"minLength": 1,
},
},
},
},
description: "Run a different workflow within the current workflow",
}
func (runWorkflowAction) compileAction(ctx compilerContext) (action, error) {
names, err := runWorkflowPayloadSpec.Unmarshal(ctx)
if err != nil {
return action{}, ctx.error(err)
}
for _, name := range names {
if _, ok := ctx.workflowNames[name]; !ok {
return action{}, ctx.fatal(fmt.Errorf("workflow %s not found", name))
}
}
return action{
func(ctx executionContext) (classification.Result, error) {
cl := ctx.result
for _, name := range names {
if nextCl, err := ctx.workflows[name].run(ctx.withResult(cl)); err != nil {
return cl, err
} else {
cl = nextCl
}
}
return cl, nil
},
}, nil
}
func (runWorkflowAction) JsonSchema() JsonSchema {
return runWorkflowPayloadSpec.JsonSchema()
}
@@ -0,0 +1,40 @@
package classifier
import (
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
"github.com/bitmagnet-io/bitmagnet/internal/model"
)
const setContentTypeName = "set_content_type"
type setContentTypeAction struct{}
func (setContentTypeAction) name() string {
return setContentTypeName
}
var setContentTypePayloadSpec = payloadSingleKeyValue[model.NullContentType]{
key: setContentTypeName,
valueSpec: payloadMustSucceed[model.NullContentType]{
payload: contentTypePayloadSpec,
},
description: "Set the content type of the current torrent",
}
func (setContentTypeAction) compileAction(ctx compilerContext) (action, error) {
contentType, err := setContentTypePayloadSpec.Unmarshal(ctx)
if err != nil {
return action{}, ctx.error(err)
}
return action{
func(ctx executionContext) (classification.Result, error) {
cl := ctx.result
cl.ContentType = contentType
return cl, nil
},
}, nil
}
func (setContentTypeAction) JsonSchema() JsonSchema {
return setContentTypePayloadSpec.JsonSchema()
}
+32
View File
@@ -0,0 +1,32 @@
package classifier
import "github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
const unmatchedName = "unmatched"
type unmatchedAction struct{}
func (unmatchedAction) name() string {
return unmatchedName
}
var unmatchedPayloadSpec = payloadLiteral[string]{
literal: unmatchedName,
description: "Return a unmatched error for the current torrent",
}
func (unmatchedAction) compileAction(ctx compilerContext) (action, error) {
if _, err := unmatchedPayloadSpec.Unmarshal(ctx); err != nil {
return action{}, ctx.error(err)
}
path := ctx.path
return action{
run: func(ctx executionContext) (classification.Result, error) {
return ctx.result, classification.RuntimeError{Cause: classification.ErrUnmatched, Path: path}
},
}, nil
}
func (unmatchedAction) JsonSchema() JsonSchema {
return unmatchedPayloadSpec.JsonSchema()
}
+112
View File
@@ -0,0 +1,112 @@
package classifier
import (
"fmt"
"github.com/bitmagnet-io/bitmagnet/internal/keywords"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"github.com/bitmagnet-io/bitmagnet/internal/protobuf"
"github.com/google/cel-go/cel"
"github.com/google/cel-go/common/types"
"github.com/google/cel-go/ext"
)
func celEnvOption(src Source, ctx *compilerContext) error {
options := []cel.EnvOption{
cel.StdLib(),
Lists(),
cel.EagerlyValidateDeclarations(true),
cel.ExtendedValidations(),
ext.Strings(ext.StringsValidateFormatCalls(true)),
cel.Types(&protobuf.Torrent{}, &protobuf.Classification{}),
cel.Variable("torrent", cel.ObjectType("bitmagnet.Torrent")),
cel.Variable("result", cel.ObjectType("bitmagnet.Classification")),
}
// `flags` is masquerading as a map of strings to regexes, but it's actually individual string constants defined with a dot in the name,
// along with a placeholder map of strings to nulls. This achieves correct compile-time checking with acceptable error messages.
for name, tp := range src.FlagDefinitions {
rawVal := src.Flags[name]
val, err := tp.celVal(rawVal)
if err != nil {
return err
}
options = append(
options,
cel.Constant("flags."+name, tp.celType(), val),
)
}
options = append(
options,
cel.Constant("flags", cel.MapType(cel.StringType, cel.NullType), types.NullValue),
)
// `keywords`, `extensions` etc use a similar trick.
for group, kws := range src.Keywords {
r, err := keywords.NewRegexFromKeywords(kws...)
if err != nil {
return err
}
options = append(
options,
cel.Constant("keywords."+group, cel.StringType, types.String(r.String())),
)
}
options = append(
options,
cel.Constant("keywords", cel.MapType(cel.StringType, cel.NullType), types.NullValue),
)
for group, extensions := range src.Extensions {
options = append(
options,
cel.Constant("extensions."+group, cel.ListType(cel.StringType), types.NewStringList(types.DefaultTypeAdapter, extensions)),
)
}
options = append(
options,
cel.Constant("extensions", cel.MapType(cel.StringType, cel.NullType), types.NullValue),
)
options = append(
options,
cel.Constant("fileType.unknown", cel.IntType, types.Int(protobuf.Torrent_File_unknown)),
)
for _, ft := range model.FileTypeValues() {
options = append(
options,
cel.Constant(fmt.Sprintf("fileType.%s", ft.String()), cel.IntType, types.Int(protobuf.NewFileType(model.NullFileType{Valid: true, FileType: ft}))),
)
}
options = append(
options,
cel.Constant("fileType", cel.MapType(cel.StringType, cel.NullType), types.NullValue),
)
options = append(
options,
cel.Constant("contentType.unknown", cel.IntType, types.Int(protobuf.Classification_unknown)),
)
for _, ct := range model.ContentTypeValues() {
options = append(
options,
cel.Constant(fmt.Sprintf("contentType.%s", ct.String()), cel.IntType, types.Int(protobuf.NewContentType(model.NullContentType{Valid: true, ContentType: ct}))),
)
}
options = append(
options,
cel.Constant("contentType", cel.MapType(cel.StringType, cel.NullType), types.NullValue),
)
options = append(
options,
cel.Constant("kb", cel.IntType, types.Int(1_000)),
)
options = append(
options,
cel.Constant("mb", cel.IntType, types.Int(1_000_000)),
)
options = append(
options,
cel.Constant("gb", cel.IntType, types.Int(1_000_000_000)),
)
env, err := cel.NewCustomEnv(options...)
if err != nil {
return err
}
ctx.celEnv = env
return nil
}
+315
View File
@@ -0,0 +1,315 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package classifier
import (
"fmt"
"github.com/google/cel-go/cel"
"github.com/google/cel-go/common/types"
"github.com/google/cel-go/common/types/ref"
"github.com/google/cel-go/common/types/traits"
"github.com/google/cel-go/interpreter/functions"
)
// Lists provides a CEL function library extension of list utility functions.
//
// isSorted
//
// Returns true if the provided list of comparable elements is sorted, else returns false.
//
// <list<T>>.isSorted() <bool>, T must be a comparable type
//
// Examples:
//
// [1, 2, 3].isSorted() // return true
// ['a', 'b', 'b', 'c'].isSorted() // return true
// [2.0, 1.0].isSorted() // return false
// [1].isSorted() // return true
// [].isSorted() // return true
//
// sum
//
// Returns the sum of the elements of the provided list. Supports CEL number (int, uint, double) and duration types.
//
// <list<T>>.sum() <T>, T must be a numeric type or a duration
//
// Examples:
//
// [1, 3].sum() // returns 4
// [1.0, 3.0].sum() // returns 4.0
// ['1m', '1s'].sum() // returns '1m1s'
// emptyIntList.sum() // returns 0
// emptyDoubleList.sum() // returns 0.0
// [].sum() // returns 0
//
// min / max
//
// Returns the minimum/maximum valued element of the provided list. Supports all comparable types.
// If the list is empty, an error is returned.
//
// <list<T>>.min() <T>, T must be a comparable type
// <list<T>>.max() <T>, T must be a comparable type
//
// Examples:
//
// [1, 3].min() // returns 1
// [1, 3].max() // returns 3
// [].min() // error
// [1].min() // returns 1
// ([0] + emptyList).min() // returns 0
//
// indexOf / lastIndexOf
//
// Returns either the first or last positional index of the provided element in the list.
// If the element is not found, -1 is returned. Supports all equatable types.
//
// <list<T>>.indexOf(<T>) <int>, T must be an equatable type
// <list<T>>.lastIndexOf(<T>) <int>, T must be an equatable type
//
// Examples:
//
// [1, 2, 2, 3].indexOf(2) // returns 1
// ['a', 'b', 'b', 'c'].lastIndexOf('b') // returns 2
// [1.0].indexOf(1.1) // returns -1
// [].indexOf('string') // returns -1
func Lists() cel.EnvOption {
return cel.Lib(listsLib)
}
var listsLib = &lists{}
type lists struct{}
func (*lists) LibraryName() string {
return "k8s.lists"
}
var paramA = cel.TypeParamType("A")
// CEL typeParams can be used to constraint to a specific trait (e.g. traits.ComparableType) if the 1st operand is the type to constrain.
// But the functions we need to constrain are <list<paramType>>, not just <paramType>.
// Make sure the order of overload set is deterministic
type namedCELType struct {
typeName string
celType *cel.Type
}
var summableTypes = []namedCELType{
{typeName: "int", celType: cel.IntType},
{typeName: "uint", celType: cel.UintType},
{typeName: "double", celType: cel.DoubleType},
{typeName: "duration", celType: cel.DurationType},
}
var zeroValuesOfSummableTypes = map[string]ref.Val{
"int": types.Int(0),
"uint": types.Uint(0),
"double": types.Double(0.0),
"duration": types.Duration{Duration: 0},
}
var comparableTypes = []namedCELType{
{typeName: "int", celType: cel.IntType},
{typeName: "uint", celType: cel.UintType},
{typeName: "double", celType: cel.DoubleType},
{typeName: "bool", celType: cel.BoolType},
{typeName: "duration", celType: cel.DurationType},
{typeName: "timestamp", celType: cel.TimestampType},
{typeName: "string", celType: cel.StringType},
{typeName: "bytes", celType: cel.BytesType},
}
// WARNING: All library additions or modifications must follow
// https://github.com/kubernetes/enhancements/tree/master/keps/sig-api-machinery/2876-crd-validation-expression-language#function-library-updates
var listsLibraryDecls = map[string][]cel.FunctionOpt{
"isSorted": templatedOverloads(comparableTypes, func(name string, paramType *cel.Type) cel.FunctionOpt {
return cel.MemberOverload(fmt.Sprintf("list_%s_is_sorted_bool", name),
[]*cel.Type{cel.ListType(paramType)}, cel.BoolType, cel.UnaryBinding(isSorted))
}),
"sum": templatedOverloads(summableTypes, func(name string, paramType *cel.Type) cel.FunctionOpt {
return cel.MemberOverload(fmt.Sprintf("list_%s_sum_%s", name, name),
[]*cel.Type{cel.ListType(paramType)}, paramType, cel.UnaryBinding(func(list ref.Val) ref.Val {
return sum(
func() ref.Val {
return zeroValuesOfSummableTypes[name]
})(list)
}))
}),
"max": templatedOverloads(comparableTypes, func(name string, paramType *cel.Type) cel.FunctionOpt {
return cel.MemberOverload(fmt.Sprintf("list_%s_max_%s", name, name),
[]*cel.Type{cel.ListType(paramType)}, paramType, cel.UnaryBinding(max()))
}),
"min": templatedOverloads(comparableTypes, func(name string, paramType *cel.Type) cel.FunctionOpt {
return cel.MemberOverload(fmt.Sprintf("list_%s_min_%s", name, name),
[]*cel.Type{cel.ListType(paramType)}, paramType, cel.UnaryBinding(min()))
}),
"indexOf": {
cel.MemberOverload("list_a_index_of_int", []*cel.Type{cel.ListType(paramA), paramA}, cel.IntType,
cel.BinaryBinding(indexOf)),
},
"lastIndexOf": {
cel.MemberOverload("list_a_last_index_of_int", []*cel.Type{cel.ListType(paramA), paramA}, cel.IntType,
cel.BinaryBinding(lastIndexOf)),
},
}
func (*lists) CompileOptions() []cel.EnvOption {
options := []cel.EnvOption{}
for name, overloads := range listsLibraryDecls {
options = append(options, cel.Function(name, overloads...))
}
return options
}
func (*lists) ProgramOptions() []cel.ProgramOption {
return []cel.ProgramOption{}
}
func isSorted(val ref.Val) ref.Val {
var prev traits.Comparer
iterable, ok := val.(traits.Iterable)
if !ok {
return types.MaybeNoSuchOverloadErr(val)
}
for it := iterable.Iterator(); it.HasNext() == types.True; {
next := it.Next()
nextCmp, ok := next.(traits.Comparer)
if !ok {
return types.MaybeNoSuchOverloadErr(next)
}
if prev != nil {
cmp := prev.Compare(next)
if cmp == types.IntOne {
return types.False
}
}
prev = nextCmp
}
return types.True
}
func sum(init func() ref.Val) functions.UnaryOp {
return func(val ref.Val) ref.Val {
i := init()
acc, ok := i.(traits.Adder)
if !ok {
// Should never happen since all passed in init values are valid
return types.MaybeNoSuchOverloadErr(i)
}
iterable, ok := val.(traits.Iterable)
if !ok {
return types.MaybeNoSuchOverloadErr(val)
}
for it := iterable.Iterator(); it.HasNext() == types.True; {
next := it.Next()
nextAdder, ok := next.(traits.Adder)
if !ok {
// Should never happen for type checked CEL programs
return types.MaybeNoSuchOverloadErr(next)
}
if acc != nil {
s := acc.Add(next)
sum, ok := s.(traits.Adder)
if !ok {
// Should never happen for type checked CEL programs
return types.MaybeNoSuchOverloadErr(s)
}
acc = sum
} else {
acc = nextAdder
}
}
return acc.(ref.Val)
}
}
func min() functions.UnaryOp {
return cmp("min", types.IntOne)
}
func max() functions.UnaryOp {
return cmp("max", types.IntNegOne)
}
func cmp(opName string, opPreferCmpResult ref.Val) functions.UnaryOp {
return func(val ref.Val) ref.Val {
var result traits.Comparer
iterable, ok := val.(traits.Iterable)
if !ok {
return types.MaybeNoSuchOverloadErr(val)
}
for it := iterable.Iterator(); it.HasNext() == types.True; {
next := it.Next()
nextCmp, ok := next.(traits.Comparer)
if !ok {
// Should never happen for type checked CEL programs
return types.MaybeNoSuchOverloadErr(next)
}
if result == nil {
result = nextCmp
} else {
cmp := result.Compare(next)
if cmp == opPreferCmpResult {
result = nextCmp
}
}
}
if result == nil {
return types.NewErr("%s called on empty list", opName)
}
return result.(ref.Val)
}
}
func indexOf(list ref.Val, item ref.Val) ref.Val {
lister, ok := list.(traits.Lister)
if !ok {
return types.MaybeNoSuchOverloadErr(list)
}
sz := lister.Size().(types.Int)
for i := types.Int(0); i < sz; i++ {
if lister.Get(types.Int(i)).Equal(item) == types.True {
return types.Int(i)
}
}
return types.Int(-1)
}
func lastIndexOf(list ref.Val, item ref.Val) ref.Val {
lister, ok := list.(traits.Lister)
if !ok {
return types.MaybeNoSuchOverloadErr(list)
}
sz := lister.Size().(types.Int)
for i := sz - 1; i >= 0; i-- {
if lister.Get(types.Int(i)).Equal(item) == types.True {
return types.Int(i)
}
}
return types.Int(-1)
}
// templatedOverloads returns overloads for each of the provided types. The template function is called with each type
// name (map key) and type to construct the overloads.
func templatedOverloads(types []namedCELType, template func(name string, t *cel.Type) cel.FunctionOpt) []cel.FunctionOpt {
overloads := make([]cel.FunctionOpt, len(types))
i := 0
for _, t := range types {
overloads[i] = template(t.typeName, t.celType)
i++
}
return overloads
}
-61
View File
@@ -1,61 +0,0 @@
package classifier
import "github.com/bitmagnet-io/bitmagnet/internal/model"
type ContentAttributes struct {
Languages model.Languages
LanguageMulti bool
Episodes model.Episodes
VideoResolution model.NullVideoResolution
VideoSource model.NullVideoSource
VideoCodec model.NullVideoCodec
Video3d model.NullVideo3d
VideoModifier model.NullVideoModifier
ReleaseGroup model.NullString
}
type Classification struct {
ContentType model.NullContentType
Content *model.Content
ContentAttributes
}
func (a *ContentAttributes) ApplyHint(h model.TorrentHint) {
if len(h.Episodes) > 0 {
a.Episodes = h.Episodes
}
if len(h.Languages) > 0 {
a.Languages = h.Languages
}
if h.VideoResolution.Valid {
a.VideoResolution = h.VideoResolution
}
if h.VideoSource.Valid {
a.VideoSource = h.VideoSource
}
if h.VideoCodec.Valid {
a.VideoCodec = h.VideoCodec
}
if h.Video3d.Valid {
a.Video3d = h.Video3d
}
if h.VideoModifier.Valid {
a.VideoModifier = h.VideoModifier
}
if h.ReleaseGroup.Valid {
a.ReleaseGroup = h.ReleaseGroup
}
}
func (a *ContentAttributes) InferVideoAttributes(input string) {
a.VideoResolution = model.InferVideoResolution(input)
a.VideoSource = model.InferVideoSource(input)
a.VideoCodec, a.ReleaseGroup = model.InferVideoCodecAndReleaseGroup(input)
a.Video3d = model.InferVideo3d(input)
a.VideoModifier = model.InferVideoModifier(input)
}
func (c *Classification) ApplyHint(h model.TorrentHint) {
c.ContentType = h.NullContentType()
c.ContentAttributes.ApplyHint(h)
}
@@ -0,0 +1,48 @@
package classification
import (
"fmt"
"strings"
)
type Error interface {
error
Key() string
}
type WorkflowError struct {
key string
message string
}
func (e WorkflowError) Error() string {
if e.message != "" {
return e.message
}
return fmt.Sprintf("workflow unmarshalError: %s", e.key)
}
func (e WorkflowError) Key() string {
return e.key
}
var ErrUnmatched = WorkflowError{
key: "unmatched",
}
var ErrDeleteTorrent = WorkflowError{
key: "delete_torrent",
}
type RuntimeError struct {
Path []string
Cause error
}
func (e RuntimeError) Error() string {
return fmt.Sprintf("runtime error at Path %s: %s", strings.Join(e.Path, "."), e.Cause)
}
func (e RuntimeError) Unwrap() error {
return e.Cause
}
@@ -0,0 +1,114 @@
package classification
import "github.com/bitmagnet-io/bitmagnet/internal/model"
type Result struct {
ContentAttributes
Content *model.Content
Tags map[string]struct{}
}
func (r *Result) ApplyHint(h model.TorrentHint) {
r.ContentType = h.NullContentType()
r.ContentAttributes.ApplyHint(h)
}
func (r *Result) AttachContent(content *model.Content) {
r.Content = content
r.ContentAttributes.ContentType = model.NewNullContentType(content.Type)
if content.OriginalLanguage.Valid {
if len(r.Languages) == 0 || r.LanguageMulti {
if r.Languages == nil {
r.Languages = make(model.Languages)
}
r.Languages[content.OriginalLanguage.Language] = struct{}{}
}
}
}
type ContentAttributes struct {
ContentType model.NullContentType
BaseTitle model.NullString
Date model.Date
Languages model.Languages
LanguageMulti bool
Episodes model.Episodes
VideoResolution model.NullVideoResolution
VideoSource model.NullVideoSource
VideoCodec model.NullVideoCodec
Video3d model.NullVideo3d
VideoModifier model.NullVideoModifier
ReleaseGroup model.NullString
}
func (a *ContentAttributes) Merge(other ContentAttributes) {
if !a.ContentType.Valid {
a.ContentType = other.ContentType
}
if !a.BaseTitle.Valid {
a.BaseTitle = other.BaseTitle
}
if a.Date.IsNil() {
a.Date = other.Date
}
if len(a.Languages) == 0 {
a.Languages = other.Languages
}
a.LanguageMulti = a.LanguageMulti || other.LanguageMulti
if len(a.Episodes) == 0 {
a.Episodes = other.Episodes
}
if !a.VideoResolution.Valid {
a.VideoResolution = other.VideoResolution
}
if !a.VideoSource.Valid {
a.VideoSource = other.VideoSource
}
if !a.VideoCodec.Valid {
a.VideoCodec = other.VideoCodec
}
if !a.Video3d.Valid {
a.Video3d = other.Video3d
}
if !a.VideoModifier.Valid {
a.VideoModifier = other.VideoModifier
}
if !a.ReleaseGroup.Valid {
a.ReleaseGroup = other.ReleaseGroup
}
}
func (a *ContentAttributes) ApplyHint(h model.TorrentHint) {
if len(h.Episodes) > 0 {
a.Episodes = h.Episodes
}
if len(h.Languages) > 0 {
a.Languages = h.Languages
}
if h.VideoResolution.Valid {
a.VideoResolution = h.VideoResolution
}
if h.VideoSource.Valid {
a.VideoSource = h.VideoSource
}
if h.VideoCodec.Valid {
a.VideoCodec = h.VideoCodec
}
if h.Video3d.Valid {
a.Video3d = h.Video3d
}
if h.VideoModifier.Valid {
a.VideoModifier = h.VideoModifier
}
if h.ReleaseGroup.Valid {
a.ReleaseGroup = h.ReleaseGroup
}
}
func (a *ContentAttributes) InferVideoAttributes(input string) {
a.VideoResolution = model.InferVideoResolution(input)
a.VideoSource = model.InferVideoSource(input)
a.VideoCodec, a.ReleaseGroup = model.InferVideoCodecAndReleaseGroup(input)
a.Video3d = model.InferVideo3d(input)
a.VideoModifier = model.InferVideoModifier(input)
}
+228
View File
@@ -0,0 +1,228 @@
$schema: "https://bitmagnet.io/schemas/classifier-0.1.json"
workflows:
default:
# delete torrents containing banned keywords:
- if_else:
condition: "([torrent.baseName] + torrent.files.map(f, f.basePath)).join(' ').matches(keywords.banned)"
if_action: delete
# try to identify content type for torrents without a hinted content type:
- if_else:
condition: "result.contentType == contentType.unknown"
if_action:
find_match:
# match audiobooks:
- if_else:
condition:
and:
- "torrent.files.map(f, f.fileType == fileType.audio ? f.size : - f.size).sum() > 50*mb"
- or:
- "torrent.baseName.matches(keywords.audiobook)"
- "torrent.files.filter(f, f.extension in extensions.audiobook).size() > 0"
if_action:
set_content_type: audiobook
else_action: unmatched
# match comics:
- if_else:
condition: "torrent.files.map(f, f.extension in extensions.comic ? f.size : - f.size).sum() > 0"
if_action:
set_content_type: comic
else_action: unmatched
# match ebooks:
- if_else:
condition: "torrent.files.map(f, f.extension in extensions.ebook ? f.size : - f.size).sum() > 0"
if_action:
set_content_type: ebook
else_action: unmatched
# match music:
- if_else:
condition:
or:
- "torrent.files.map(f, f.extension in extensions.music ? f.size : - f.size).sum() > 0"
- and:
- "torrent.baseName.matches(keywords.music)"
- "torrent.files.map(f, f.fileType == fileType.audio ? f.size : - f.size).sum() > 0"
if_action:
set_content_type: music
else_action: unmatched
# match software:
- if_else:
condition: "torrent.files.map(f, f.fileType == fileType.software ? f.size : - f.size).sum() > 0"
if_action:
set_content_type: software
else_action: unmatched
# match xxx:
- if_else:
condition: "([torrent.baseName] + torrent.files.map(f, f.basePath)).join(' ').matches(keywords.xxx)"
if_action:
set_content_type: xxx
else_action: unmatched
# if the name contains a full date, attach it to the result as it's a good indicator of content type:
- find_match:
- parse_date
# if we know a content ID that isn't already attached, try to attach it, either from the local `content` table or an API integration:
- if_else:
condition:
and:
- "torrent.hasHintedContentId && !result.hasAttachedContent"
# we might as well save work by restricting this to content types that have an API integration:
- "result.contentType in [contentType.movie, contentType.tv_show, contentType.xxx]"
if_action:
find_match:
- attach_local_content_by_id
- if_else:
condition: "flags.tmdb_enabled"
if_action: attach_tmdb_content_by_id
else_action: unmatched
# parse video-related attributes for video torrents (including the base title, needed for the next step):
- if_else:
condition:
or:
- "result.contentType in [contentType.movie, contentType.tv_show]"
- "torrent.files.map(f, f.fileType == fileType.video ? f.size : - f.size).sum() > 100*mb"
if_action:
find_match:
# parse video-related attributes from the torrent name;
# if the content type wasn't previously specified and the name format doesn't suggest a movie or TV show, a "no match" will be returned:
- parse_video_content
# if content isn't already attached, and a base title has been parsed, then search for the content, either from the local `content` table or an API integration:
- if_else:
condition: "!result.hasAttachedContent && result.hasBaseTitle"
if_action:
find_match:
- attach_local_content_by_search
- if_else:
condition: "flags.tmdb_enabled"
if_action: attach_tmdb_content_by_search
else_action: unmatched
# delete specific content types based on the configured flags:
- if_else:
condition:
or:
- "result.contentType in flags.delete_content_types"
- "flags.delete_xxx && result.contentType == contentType.xxx"
if_action: delete
extensions:
audiobook:
- m4b
comic:
- cb7
- cba
- cbr
- cbt
- cbz
ebook:
- azw
- azw3
- azw4
- azw8
- chm
- doc
- docx
- djvu
- epub
- lit
- mobi
- odt
- pdf
- rtf
music:
- ape
- dsf
- flac
software:
- apk
- app
- bat
- bin
- deb
- dll
- dmg
- exe
- iso
- jar
- lua
- msi
- package
- pkg
- rpm
- sh
keywords:
audiobook:
- audiobooks?
- books?
- (un)?abridged
- narrated
- novels?
- "*biograph*"
music:
- discography
- music
- album
- \V.?\A.?
- various artists
- compilation
- ep
- lp
- single
- vinyl
- classical
- disco
- folk
- hits
- house
- indie
- jazz
- metal
- pop
- jazz
- reggae
- rock
- trance
xxx:
- anal
- ass
- blowjobs?
- boob*
- cocks?
- cum*
- dicks?
- erotic*
- "*fuck*"
- "*gloryhole*"
- hardcore
- kink*
- milf*
- nubile*
- onlyfans
- orgasm*
- orgy
- "*porn*"
- pov
- pussy
- seduc*
- sex*
- slut*
- tits?
- threesome
- "*wank*"
- "*xxx*"
banned:
- pa?edo(fil*|phil*)?
- preteen
- pthc
- ptsc
- lsbar
- lsm
- underage
- hebefilia
- opva
- child porn*
- (#|10|11|12|13|14|15|16|17) ?y ?o
flag_definitions:
tmdb_enabled: bool
delete_content_types: content_type_list
delete_xxx: bool
flags:
tmdb_enabled: true
delete_content_types: []
delete_xxx: false
+145 -24
View File
@@ -3,39 +3,160 @@ package classifier
import (
"context"
"errors"
"fmt"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"go.uber.org/zap"
"github.com/bitmagnet-io/bitmagnet/internal/protobuf"
"github.com/google/cel-go/cel"
"strings"
)
var (
ErrNoMatch = errors.New("no match")
)
type Classifier interface {
Classify(ctx context.Context, torrent model.Torrent) (Classification, error)
type Compiler interface {
Compile(source Source) (Runner, error)
}
type SubClassifier interface {
Classifier
Key() string
Priority() int
type Runner interface {
Run(ctx context.Context, workflow string, t model.Torrent) (classification.Result, error)
}
type classifier struct {
subClassifiers []SubClassifier
logger *zap.SugaredLogger
type compiler struct {
options []compilerOption
dependencies dependencies
}
func (c classifier) Classify(ctx context.Context, t model.Torrent) (Classification, error) {
for _, sc := range c.subClassifiers {
tc, err := sc.Classify(ctx, t)
if err == nil {
return tc, nil
}
if !errors.Is(err, ErrNoMatch) {
c.logger.Errorw("error classifying content", "classifier", sc.Key(), "torrent", t, "error", err)
return Classification{}, err
type compilerContext struct {
features
celEnv *cel.Env
source any
path []string
workflowNames map[string]struct{}
}
type compilerOption func(Source, *compilerContext) error
type executionContext struct {
context.Context
dependencies
workflows map[string]action
torrent model.Torrent
torrentPb *protobuf.Torrent
result classification.Result
resultPb *protobuf.Classification
}
func (c executionContext) withResult(result classification.Result) executionContext {
c.result = result
c.resultPb = protobuf.NewClassification(result)
return c
}
func (c compilerContext) child(pathPart string, source any) compilerContext {
c.source = source
newPath := make([]string, len(c.path), len(c.path)+1)
copy(newPath, c.path)
newPath = append(newPath, pathPart)
c.path = newPath
return c
}
func (c compilerContext) error(cause error) error {
if asCompilerError(cause) != nil {
return cause
}
return compilerError{c.path, cause}
}
func (c compilerContext) fatal(cause error) error {
if asFatalCompilerError(cause) != nil {
return cause
}
cErr := asCompilerError(cause)
if cErr != nil {
return fatalCompilerError{compilerError: *cErr}
}
return fatalCompilerError{compilerError{c.path, cause}}
}
func (c compiler) Compile(source Source) (Runner, error) {
ctx := &compilerContext{
source: source,
workflowNames: source.workflowNames(),
}
source, sourceErr := decode[Source](*ctx)
if sourceErr != nil {
return nil, ctx.fatal(sourceErr)
}
for _, opt := range c.options {
if err := opt(source, ctx); err != nil {
return nil, ctx.fatal(err)
}
}
return Classification{}, ErrNoMatch
workflowsCtx := ctx.child("workflows", source.Workflows)
workflows := make(map[string]action)
for name, src := range source.Workflows {
a, err := ctx.compileAction(workflowsCtx.child(name, src))
if err != nil {
return nil, ctx.fatal(err)
}
workflows[name] = a
}
return runner{
dependencies: c.dependencies,
workflows: workflows,
}, nil
}
func decodeTo[T any](ctx compilerContext, target *T) error {
decoder, decoderErr := newDecoder(target)
if decoderErr != nil {
return ctx.error(decoderErr)
}
return decoder.Decode(ctx.source)
}
func decode[T any](ctx compilerContext) (T, error) {
var target T
err := decodeTo(ctx, &target)
return target, err
}
type compilerError struct {
path []string
cause error
}
func (e compilerError) Error() string {
return fmt.Sprintf("compiler error at path '%s': %s", strings.Join(e.path, "."), e.cause)
}
func (e compilerError) Unwrap() error {
return e.cause
}
func asCompilerError(err error) *compilerError {
ue := &compilerError{}
if ok := errors.As(err, ue); ok {
return ue
}
return nil
}
type fatalCompilerError struct {
compilerError
}
func (e fatalCompilerError) Unwrap() error {
return e.compilerError
}
func asFatalCompilerError(err error) *fatalCompilerError {
ue := &fatalCompilerError{}
if ok := errors.As(err, ue); ok {
return ue
}
return nil
}
func numericPathPart(num int) string {
return fmt.Sprintf("[%d]", num)
}
+244
View File
@@ -0,0 +1,244 @@
package classifier
import (
"context"
"fmt"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
classifier_mocks "github.com/bitmagnet-io/bitmagnet/internal/classifier/mocks"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
tmdb_mocks "github.com/bitmagnet-io/bitmagnet/internal/tmdb/mocks"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"testing"
)
func TestClassifier(t *testing.T) {
matchContext := mock.MatchedBy(func(ctx any) bool {
_, ok := ctx.(context.Context)
return ok
})
testCases := []struct {
torrent model.Torrent
prepareMocks func(mocks testClassifierMocks)
expected classification.Result
expectedErr error
}{
{
torrent: model.Torrent{
Name: "The Regular Movie (2000).mkv",
FilesStatus: model.FilesStatusSingle,
Extension: model.NewNullString("mkv"),
Size: 1000000000,
},
prepareMocks: func(mocks testClassifierMocks) {
mocks.search.On(
"ContentBySearch",
matchContext,
model.ContentTypeMovie,
"The Regular Movie",
model.Year(2000),
).
Return(model.Content{}, classification.ErrUnmatched)
mocks.tmdbClient.On(
"SearchMovie",
matchContext,
tmdb.SearchMovieRequest{
Query: "The Regular Movie",
Year: 2000,
IncludeAdult: true,
},
).
Return(tmdb.SearchMovieResponse{}, nil)
},
expected: classification.Result{
ContentAttributes: classification.ContentAttributes{
ContentType: model.NewNullContentType(model.ContentTypeMovie),
BaseTitle: model.NewNullString("The Regular Movie"),
Date: model.Date{
Year: 2000,
},
},
},
},
{
torrent: model.Torrent{
Name: "The Regular Local Movie (2000).mkv",
FilesStatus: model.FilesStatusSingle,
Extension: model.NewNullString("mkv"),
Size: 1000000000,
},
prepareMocks: func(mocks testClassifierMocks) {
mocks.search.On(
"ContentBySearch",
matchContext,
model.ContentTypeMovie,
"The Regular Local Movie",
model.Year(2000),
).
Return(model.Content{
Type: model.ContentTypeMovie,
Source: "local",
ID: "123",
Title: "The Regular Local Movie",
ReleaseYear: 2000,
}, nil)
},
expected: classification.Result{
ContentAttributes: classification.ContentAttributes{
ContentType: model.NewNullContentType(model.ContentTypeMovie),
BaseTitle: model.NewNullString("The Regular Local Movie"),
Date: model.Date{
Year: 2000,
},
},
Content: &model.Content{
Type: model.ContentTypeMovie,
Source: "local",
ID: "123",
Title: "The Regular Local Movie",
ReleaseYear: 2000,
},
},
},
{
torrent: model.Torrent{
Name: "The Regular TMDB Movie (2000).mkv",
FilesStatus: model.FilesStatusSingle,
Extension: model.NewNullString("mkv"),
Size: 1000000000,
},
prepareMocks: func(mocks testClassifierMocks) {
mocks.search.On(
"ContentBySearch",
matchContext,
model.ContentTypeMovie,
"The Regular TMDB Movie",
model.Year(2000),
).
Return(model.Content{}, classification.ErrUnmatched)
mocks.tmdbClient.On(
"SearchMovie",
matchContext,
tmdb.SearchMovieRequest{
Query: "The Regular TMDB Movie",
Year: 2000,
IncludeAdult: true,
},
).
Return(tmdb.SearchMovieResponse{
Results: []tmdb.SearchMovieResult{
{
ID: 123,
Title: "The Regular TMDB Movie",
ReleaseDate: "2000-01-01",
},
},
}, nil)
mocks.tmdbClient.On(
"MovieDetails",
matchContext,
tmdb.MovieDetailsRequest{
ID: 123,
},
).
Return(tmdb.MovieDetailsResponse{
ID: 123,
Title: "The Regular TMDB Movie",
OriginalTitle: "The Regular TMDB Movie Original",
ReleaseDate: "2000-01-01",
}, nil)
},
expected: classification.Result{
ContentAttributes: classification.ContentAttributes{
ContentType: model.NewNullContentType(model.ContentTypeMovie),
BaseTitle: model.NewNullString("The Regular TMDB Movie"),
Date: model.Date{
Year: 2000,
},
},
Content: &model.Content{
Type: model.ContentTypeMovie,
Source: "tmdb",
ID: "123",
Title: "The Regular TMDB Movie",
ReleaseDate: model.Date{
Year: 2000,
Month: 1,
Day: 1,
},
ReleaseYear: 2000,
Adult: model.NewNullBool(false),
OriginalTitle: model.NewNullString("The Regular TMDB Movie Original"),
Popularity: model.NewNullFloat32(0),
VoteAverage: model.NewNullFloat32(0),
VoteCount: model.NewNullUint(0),
},
},
},
{
torrent: model.Torrent{
Name: "The XXX Movie 1080p.mkv",
FilesStatus: model.FilesStatusSingle,
Extension: model.NewNullString("mkv"),
Size: 1000000000,
},
expected: classification.Result{
ContentAttributes: classification.ContentAttributes{
ContentType: model.NewNullContentType(model.ContentTypeXxx),
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
},
},
},
}
for _, tc := range testCases {
t.Run(fmt.Sprintf("torrent: %s", tc.torrent.Name), func(t *testing.T) {
mocks := newTestClassifierMocks(t)
source, sourceErr := yamlSourceProvider{rawSourceProvider: coreSourceProvider{}}.source()
if sourceErr != nil {
t.Fatal(sourceErr)
return
}
workflow, compileErr := mocks.compiler.Compile(source)
if compileErr != nil {
t.Fatal(compileErr)
return
}
if tc.prepareMocks != nil {
tc.prepareMocks(mocks)
}
result, runErr := workflow.Run(context.Background(), "default", tc.torrent)
if runErr != nil {
assert.Equal(t, tc.expectedErr, runErr)
t.Log(runErr)
} else {
assert.Equal(t, tc.expected, result)
}
})
}
}
type testClassifierMocks struct {
compiler Compiler
search *classifier_mocks.LocalSearch
tmdbClient *tmdb_mocks.Client
}
func newTestClassifierMocks(t *testing.T) testClassifierMocks {
search := classifier_mocks.NewLocalSearch(t)
tmdbClient := tmdb_mocks.NewClient(t)
return testClassifierMocks{
compiler: compiler{
options: []compilerOption{
compilerFeatures(defaultFeatures),
celEnvOption,
},
dependencies: dependencies{
search: search,
tmdbClient: tmdbClient,
},
},
search: search,
tmdbClient: tmdbClient,
}
}
+3 -7
View File
@@ -1,21 +1,17 @@
package classifierfx
import (
"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/config/configfx"
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/extension"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/keywords"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/video/videofx"
"go.uber.org/fx"
)
func New() fx.Option {
return fx.Module(
"classifier",
"workflow",
configfx.NewConfigModule[classifier.Config]("classifier", classifier.NewDefaultConfig()),
fx.Provide(
classifier.New,
extension.New,
keywords.New,
),
videofx.New(),
)
}
+41
View File
@@ -0,0 +1,41 @@
package classifier
import (
"errors"
)
func conditions(defs ...conditionDefinition) feature {
return func(c *features) {
c.conditions = append(c.conditions, defs...)
}
}
type conditionCompiler interface {
compileCondition(ctx compilerContext) (condition, error)
}
type conditionDefinition interface {
HasJsonSchema
name() string
conditionCompiler
}
func (c compilerContext) compileCondition(ctx compilerContext) (condition, error) {
var errs []error
for _, def := range c.conditions {
c, err := def.compileCondition(ctx.child(def.name(), ctx.source))
if err == nil {
return c, nil
}
if asFatalCompilerError(err) != nil {
return condition{}, err
}
errs = append(errs, err)
}
errs = append(errs, errors.New("no condition matched"))
return condition{}, errors.Join(errs...)
}
type condition struct {
check func(executionContext) (bool, error)
}
+52
View File
@@ -0,0 +1,52 @@
package classifier
const andName = "and"
type andCondition struct{}
func (andCondition) name() string {
return andName
}
var andConditionPayloadSpec = payloadSingleKeyValue[[]any]{
key: andName,
valueSpec: payloadMustSucceed[[]any]{payloadList[any]{
itemSpec: payloadGeneric[any]{
jsonSchema: map[string]any{
"$ref": "#/definitions/condition",
},
},
description: "A condition that is satisfied if all conditions in a list are satisfied",
}},
}
func (andCondition) compileCondition(ctx compilerContext) (condition, error) {
payload, err := andConditionPayloadSpec.Unmarshal(ctx)
if err != nil {
return condition{}, ctx.error(err)
}
conds := make([]condition, len(payload))
for i, rawCond := range payload {
cond, err := ctx.compileCondition(ctx.child(numericPathPart(i), rawCond))
if err != nil {
return condition{}, ctx.fatal(err)
}
conds[i] = cond
}
return condition{
check: func(ctx executionContext) (bool, error) {
for _, c := range conds {
if result, err := c.check(ctx); err != nil {
return false, err
} else if !result {
return false, nil
}
}
return true, nil
},
}, nil
}
func (andCondition) JsonSchema() JsonSchema {
return andConditionPayloadSpec.JsonSchema()
}
@@ -0,0 +1,79 @@
package classifier
import (
"errors"
"fmt"
"github.com/google/cel-go/cel"
"reflect"
)
const expressionName = "expression"
type expressionCondition struct{}
var celProgramPayload = payloadTransformer[string, cel.Program]{
spec: payloadGeneric[string]{
jsonSchema: JsonSchema{
"type": "string",
"minLength": 1,
"description": "A CEL expression describing a condition",
},
},
transform: func(s string, ctx compilerContext) (cel.Program, error) {
ast, issues := ctx.celEnv.Compile(s)
if issues != nil && issues.Err() != nil {
return nil, ctx.error(fmt.Errorf("type-check error: %w", issues.Err()))
}
if !reflect.DeepEqual(ast.OutputType(), cel.BoolType) {
return nil, ctx.error(fmt.Errorf("got %v, wanted %v output type", ast.OutputType(), cel.BoolType))
}
prg, prgErr := ctx.celEnv.Program(ast,
cel.EvalOptions(cel.OptOptimize),
)
if prgErr != nil {
return nil, ctx.error(fmt.Errorf("program construction error: %w", prgErr))
}
return prg, nil
},
}
var expressionConditionPayload = payloadUnion[cel.Program]{
oneOf: []TypedPayload[cel.Program]{
payloadSingleKeyValue[cel.Program]{
key: expressionName,
valueSpec: payloadMustSucceed[cel.Program]{celProgramPayload},
},
payloadMustSucceed[cel.Program]{celProgramPayload},
},
}
func (c expressionCondition) name() string {
return expressionName
}
func (c expressionCondition) compileCondition(ctx compilerContext) (condition, error) {
prg, err := expressionConditionPayload.Unmarshal(ctx)
if err != nil {
return condition{}, ctx.error(err)
}
return condition{
check: func(ctx executionContext) (bool, error) {
result, _, err := prg.Eval(map[string]any{
"torrent": ctx.torrentPb,
"result": ctx.resultPb,
})
if err != nil {
return false, err
}
bl, ok := result.Value().(bool)
if !ok {
return false, errors.New("not bool")
}
return bl, nil
},
}, nil
}
func (c expressionCondition) JsonSchema() JsonSchema {
return expressionConditionPayload.JsonSchema()
}
+40
View File
@@ -0,0 +1,40 @@
package classifier
const notName = "not"
type notCondition struct{}
func (notCondition) name() string {
return notName
}
var notConditionPayloadSpec = payloadSingleKeyValue[any]{
key: notName,
valueSpec: payloadMustSucceed[any]{payloadGeneric[any]{
jsonSchema: map[string]any{
"$ref": "#/definitions/condition",
},
}},
description: "A condition that negates the provided condition",
}
func (notCondition) compileCondition(ctx compilerContext) (condition, error) {
p, decodeErr := notConditionPayloadSpec.Unmarshal(ctx)
if decodeErr != nil {
return condition{}, ctx.error(decodeErr)
}
cond, cErr := ctx.compileCondition(ctx.child("not", p))
if cErr != nil {
return condition{}, ctx.error(cErr)
}
return condition{
check: func(ctx executionContext) (bool, error) {
result, err := cond.check(ctx)
return !result, err
},
}, nil
}
func (notCondition) JsonSchema() JsonSchema {
return notConditionPayloadSpec.JsonSchema()
}
+50
View File
@@ -0,0 +1,50 @@
package classifier
const orName = "or"
type orCondition struct{}
func (orCondition) name() string {
return orName
}
var orConditionSpec = payloadSingleKeyValue[[]any]{
key: orName,
valueSpec: payloadMustSucceed[[]any]{payloadList[any]{
itemSpec: payloadGeneric[any]{
jsonSchema: map[string]any{
"$ref": "#/definitions/condition",
},
},
description: "A condition that is satisfied if any of the conditions in a list are satisfied",
}},
}
func (orCondition) compileCondition(ctx compilerContext) (condition, error) {
rawConds, err := orConditionSpec.Unmarshal(ctx)
if err != nil {
return condition{}, err
}
conds := make([]condition, len(rawConds))
for i, rawCond := range rawConds {
cond, err := ctx.compileCondition(ctx.child(numericPathPart(i), rawCond))
if err != nil {
return condition{}, err
}
conds[i] = cond
}
return condition{func(ctx executionContext) (bool, error) {
for _, c := range conds {
if result, err := c.check(ctx); err != nil {
return false, err
} else if result {
return true, nil
}
}
return false, nil
}}, nil
}
func (orCondition) JsonSchema() JsonSchema {
return orConditionSpec.JsonSchema()
}
+15
View File
@@ -0,0 +1,15 @@
package classifier
type Config struct {
Workflow string
Keywords map[string][]string
Extensions map[string][]string
Flags map[string]any
DeleteXxx bool
}
func NewDefaultConfig() Config {
return Config{
Workflow: "default",
}
}
+17
View File
@@ -0,0 +1,17 @@
package classifier
import (
"github.com/iancoleman/strcase"
"github.com/mitchellh/mapstructure"
)
func newDecoder[T any](target *T) (*mapstructure.Decoder, error) {
return mapstructure.NewDecoder(&mapstructure.DecoderConfig{
Result: target,
MatchName: func(mapKey, fieldName string) bool {
return mapKey == strcase.ToSnake(fieldName)
},
ErrorUnused: true,
TagName: "json",
})
}
+10
View File
@@ -0,0 +1,10 @@
package classifier
import (
"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
)
type dependencies struct {
search LocalSearch
tmdbClient tmdb.Client
}
@@ -1,63 +0,0 @@
package extension
import (
"context"
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"github.com/bitmagnet-io/bitmagnet/internal/model"
)
type extensionClassifier struct {
}
func (c extensionClassifier) Key() string {
return "extension"
}
func (c extensionClassifier) Priority() int {
return 10
}
func (c extensionClassifier) Classify(_ context.Context, t model.Torrent) (classifier.Classification, error) {
if !t.Hint.IsNil() || t.FilesStatus == model.FilesStatusNoInfo || t.FilesStatus == model.FilesStatusOverThreshold {
return classifier.Classification{}, classifier.ErrNoMatch
}
if t.FilesStatus == model.FilesStatusSingle {
if t.Extension.Valid {
ct := model.ContentTypeFromExtension(t.Extension.String)
if ct.Valid {
return classifier.Classification{
ContentType: ct,
}, nil
}
}
return classifier.Classification{}, classifier.ErrNoMatch
}
var unknownSize uint64
sizeMap := make(map[model.ContentType]uint64)
for _, f := range t.Files {
if f.Size == 0 {
unknownSize++
continue
}
ct := model.ContentTypeFromExtension(f.Extension.String)
if ct.Valid {
sizeMap[ct.ContentType] += f.Size
} else {
unknownSize += f.Size
}
}
var maxSize uint64
var maxType model.ContentType
for k, v := range sizeMap {
if v > maxSize {
maxSize = v
maxType = k
}
}
if maxSize > 0 && maxSize > unknownSize {
return classifier.Classification{
ContentType: model.NewNullContentType(maxType),
}, nil
}
return classifier.Classification{}, classifier.ErrNoMatch
}
-20
View File
@@ -1,20 +0,0 @@
package extension
import (
"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"go.uber.org/fx"
)
type Result struct {
fx.Out
Classifier lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
}
func New() Result {
return Result{
Classifier: lazy.New(func() (classifier.SubClassifier, error) {
return extensionClassifier{}, nil
}),
}
}
+52 -19
View File
@@ -1,39 +1,72 @@
package classifier
import (
"fmt"
"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
"github.com/bitmagnet-io/bitmagnet/internal/database/search"
"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
"go.uber.org/fx"
"go.uber.org/zap"
"sort"
)
type Params struct {
fx.In
SubClassifiers []lazy.Lazy[SubClassifier] `group:"content_classifiers"`
Logger *zap.SugaredLogger
Config Config
TmdbConfig tmdb.Config
Search lazy.Lazy[search.Search]
TmdbClient lazy.Lazy[tmdb.Client]
}
type Result struct {
fx.Out
Classifier lazy.Lazy[Classifier]
Compiler lazy.Lazy[Compiler]
Source lazy.Lazy[Source]
Runner lazy.Lazy[Runner]
}
func New(p Params) Result {
func New(params Params) Result {
lc := lazy.New(func() (Compiler, error) {
s, err := params.Search.Get()
if err != nil {
return nil, err
}
tmdbClient, err := params.TmdbClient.Get()
if err != nil {
return nil, err
}
return compiler{
options: []compilerOption{
compilerFeatures(defaultFeatures),
celEnvOption,
},
dependencies: dependencies{
search: localSearch{s},
tmdbClient: tmdbClient,
},
}, nil
})
lsrc := lazy.New[Source](func() (Source, error) {
src, err := newSourceProvider(params.Config, params.TmdbConfig).source()
if err != nil {
return Source{}, err
}
if _, ok := src.Workflows[params.Config.Workflow]; !ok {
return Source{}, fmt.Errorf("default workflow '%s' not found", params.Config.Workflow)
}
return src, nil
})
return Result{
Classifier: lazy.New(func() (Classifier, error) {
subClassifiers := make([]SubClassifier, 0, len(p.SubClassifiers)+1)
for _, subResolver := range p.SubClassifiers {
r, err := subResolver.Get()
if err != nil {
return nil, err
}
subClassifiers = append(subClassifiers, r)
Compiler: lc,
Source: lsrc,
Runner: lazy.New(func() (Runner, error) {
src, err := lsrc.Get()
if err != nil {
return nil, err
}
subClassifiers = append(subClassifiers, FallbackClassifier{})
sort.Slice(subClassifiers, func(i, j int) bool {
return subClassifiers[i].Priority() < subClassifiers[j].Priority()
})
return classifier{subClassifiers, p.Logger}, nil
c, err := lc.Get()
if err != nil {
return nil, err
}
return c.Compile(src)
}),
}
}
-27
View File
@@ -1,27 +0,0 @@
package classifier
import (
"context"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"math"
)
type FallbackClassifier struct{}
func (c FallbackClassifier) Key() string {
return "fallback"
}
func (c FallbackClassifier) Priority() int {
return math.MaxInt
}
func (c FallbackClassifier) Classify(_ context.Context, t model.Torrent) (Classification, error) {
cl := Classification{}
cl.ApplyHint(t.Hint)
hasVideo := t.HasFileType(model.FileTypeVideo)
if hasVideo.Valid && hasVideo.Bool {
cl.InferVideoAttributes(t.Name)
}
return cl, nil
}
+47
View File
@@ -0,0 +1,47 @@
package classifier
type features struct {
conditions []conditionDefinition
actions []actionDefinition
}
type feature func(*features)
func newFeatures(fs ...feature) features {
result := features{}
for _, f := range fs {
f(&result)
}
return result
}
func compilerFeatures(features features) compilerOption {
return func(_ Source, c *compilerContext) error {
c.features = features
return nil
}
}
var defaultFeatures = newFeatures(
conditions(
andCondition{},
notCondition{},
orCondition{},
expressionCondition{},
),
actions(
addTagAction{},
attachLocalContentByIdAction{},
attachLocalContentBySearchAction{},
attachTmdbContentByIdAction{},
attachTmdbContentBySearchAction{},
deleteAction{},
findMatchAction{},
ifElseAction{},
unmatchedAction{},
parseDateAction{},
parseVideoContentAction{},
runWorkflowAction{},
setContentTypeAction{},
),
)
+117
View File
@@ -0,0 +1,117 @@
package classifier
import (
"fmt"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"github.com/bitmagnet-io/bitmagnet/internal/protobuf"
"github.com/google/cel-go/cel"
"github.com/google/cel-go/common/types"
"github.com/google/cel-go/common/types/ref"
)
type flagDefinitions map[string]FlagType
func (d flagDefinitions) merge(other flagDefinitions) (flagDefinitions, error) {
result := make(flagDefinitions)
for k, v := range d {
if tp, ok := other[k]; ok && tp != v {
return nil, fmt.Errorf("conflicting flag definition %s", k)
} else {
result[k] = v
}
}
for k, v := range other {
if _, ok := result[k]; !ok {
result[k] = v
}
}
return result, nil
}
type flags map[string]any
func (f flags) merge(other flags) flags {
result := make(flags)
for k, v := range f {
if _, ok := other[k]; ok {
result[k] = other[k]
} else {
result[k] = v
}
}
for k, v := range other {
if _, ok := result[k]; !ok {
result[k] = v
}
}
return result
}
func (t FlagType) celType() *cel.Type {
switch t {
case FlagTypeBool:
return cel.BoolType
case FlagTypeString:
return cel.StringType
case FlagTypeInt:
return cel.IntType
case FlagTypeStringList:
return cel.ListType(cel.StringType)
case FlagTypeContentTypeList:
return cel.ListType(cel.IntType)
default:
return nil
}
}
func (t FlagType) celVal(rawVal any) (ref.Val, error) {
switch t {
case FlagTypeBool:
if nativeVal, ok := rawVal.(bool); ok {
return types.Bool(nativeVal), nil
}
case FlagTypeString:
if nativeVal, ok := rawVal.(string); ok {
return types.String(nativeVal), nil
}
case FlagTypeInt:
if nativeVal, ok := rawVal.(int); ok {
return types.Int(nativeVal), nil
}
case FlagTypeStringList:
if sliceVal, ok := rawVal.([]any); ok {
nativeVal := make([]string, len(sliceVal))
for i, v := range sliceVal {
if strVal, ok := v.(string); ok {
nativeVal[i] = strVal
} else {
return nil, fmt.Errorf("could not convert type %T to string", v)
}
}
return types.NewStringList(types.DefaultTypeAdapter, nativeVal), nil
}
case FlagTypeContentTypeList:
if sliceVal, ok := rawVal.([]any); ok {
celVal := make([]protobuf.Classification_ContentType, len(sliceVal))
for i, v := range sliceVal {
if strVal, ok := v.(string); ok {
var ct model.NullContentType
if strVal != "unknown" {
if parsed, parseErr := model.ParseContentType(strVal); parseErr != nil {
return nil, fmt.Errorf("could not parse content type %s: %w", strVal, parseErr)
} else {
ct = model.NewNullContentType(parsed)
}
}
celVal[i] = protobuf.NewContentType(ct)
} else {
return nil, fmt.Errorf("could not convert type %T to content type", v)
}
}
return types.NewDynamicList(types.DefaultTypeAdapter, celVal), nil
}
default:
return nil, ErrInvalidFlagType
}
return nil, fmt.Errorf("could not convert type %T to %s", rawVal, t)
}
+7
View File
@@ -0,0 +1,7 @@
package classifier
//go:generate go run github.com/abice/go-enum --marshal --names --nocase --nocomments --sql --sqlnullstr --values -f flag_type.go
// FlagType represents the type of a flag
// ENUM(bool, string, int, string_list, content_type_list)
type FlagType string
+191
View File
@@ -0,0 +1,191 @@
// Code generated by go-enum DO NOT EDIT.
// Version:
// Revision:
// Build Date:
// Built By:
package classifier
import (
"database/sql/driver"
"encoding/json"
"errors"
"fmt"
"strings"
)
const (
FlagTypeBool FlagType = "bool"
FlagTypeString FlagType = "string"
FlagTypeInt FlagType = "int"
FlagTypeStringList FlagType = "string_list"
FlagTypeContentTypeList FlagType = "content_type_list"
)
var ErrInvalidFlagType = fmt.Errorf("not a valid FlagType, try [%s]", strings.Join(_FlagTypeNames, ", "))
var _FlagTypeNames = []string{
string(FlagTypeBool),
string(FlagTypeString),
string(FlagTypeInt),
string(FlagTypeStringList),
string(FlagTypeContentTypeList),
}
// FlagTypeNames returns a list of possible string values of FlagType.
func FlagTypeNames() []string {
tmp := make([]string, len(_FlagTypeNames))
copy(tmp, _FlagTypeNames)
return tmp
}
// FlagTypeValues returns a list of the values for FlagType
func FlagTypeValues() []FlagType {
return []FlagType{
FlagTypeBool,
FlagTypeString,
FlagTypeInt,
FlagTypeStringList,
FlagTypeContentTypeList,
}
}
// String implements the Stringer interface.
func (x FlagType) String() string {
return string(x)
}
// IsValid provides a quick way to determine if the typed value is
// part of the allowed enumerated values
func (x FlagType) IsValid() bool {
_, err := ParseFlagType(string(x))
return err == nil
}
var _FlagTypeValue = map[string]FlagType{
"bool": FlagTypeBool,
"string": FlagTypeString,
"int": FlagTypeInt,
"string_list": FlagTypeStringList,
"content_type_list": FlagTypeContentTypeList,
}
// ParseFlagType attempts to convert a string to a FlagType.
func ParseFlagType(name string) (FlagType, error) {
if x, ok := _FlagTypeValue[name]; ok {
return x, nil
}
// Case insensitive parse, do a separate lookup to prevent unnecessary cost of lowercasing a string if we don't need to.
if x, ok := _FlagTypeValue[strings.ToLower(name)]; ok {
return x, nil
}
return FlagType(""), fmt.Errorf("%s is %w", name, ErrInvalidFlagType)
}
// MarshalText implements the text marshaller method.
func (x FlagType) MarshalText() ([]byte, error) {
return []byte(string(x)), nil
}
// UnmarshalText implements the text unmarshaller method.
func (x *FlagType) UnmarshalText(text []byte) error {
tmp, err := ParseFlagType(string(text))
if err != nil {
return err
}
*x = tmp
return nil
}
var errFlagTypeNilPtr = errors.New("value pointer is nil") // one per type for package clashes
// Scan implements the Scanner interface.
func (x *FlagType) Scan(value interface{}) (err error) {
if value == nil {
*x = FlagType("")
return
}
// A wider range of scannable types.
// driver.Value values at the top of the list for expediency
switch v := value.(type) {
case string:
*x, err = ParseFlagType(v)
case []byte:
*x, err = ParseFlagType(string(v))
case FlagType:
*x = v
case *FlagType:
if v == nil {
return errFlagTypeNilPtr
}
*x = *v
case *string:
if v == nil {
return errFlagTypeNilPtr
}
*x, err = ParseFlagType(*v)
default:
return errors.New("invalid type for FlagType")
}
return
}
// Value implements the driver Valuer interface.
func (x FlagType) Value() (driver.Value, error) {
return x.String(), nil
}
type NullFlagType struct {
FlagType FlagType
Valid bool
Set bool
}
func NewNullFlagType(val interface{}) (x NullFlagType) {
err := x.Scan(val) // yes, we ignore this error, it will just be an invalid value.
_ = err // make any errcheck linters happy
return
}
// Scan implements the Scanner interface.
func (x *NullFlagType) Scan(value interface{}) (err error) {
if value == nil {
x.FlagType, x.Valid = FlagType(""), false
return
}
err = x.FlagType.Scan(value)
x.Valid = (err == nil)
return
}
// Value implements the driver Valuer interface.
func (x NullFlagType) Value() (driver.Value, error) {
if !x.Valid {
return nil, nil
}
return x.FlagType.String(), nil
}
// MarshalJSON correctly serializes a NullFlagType to JSON.
func (n NullFlagType) MarshalJSON() ([]byte, error) {
const nullStr = "null"
if n.Valid {
return json.Marshal(n.FlagType)
}
return []byte(nullStr), nil
}
// UnmarshalJSON correctly deserializes a NullFlagType from JSON.
func (n *NullFlagType) UnmarshalJSON(b []byte) error {
n.Set = true
var x interface{}
err := json.Unmarshal(b, &x)
if err != nil {
return err
}
err = n.Scan(x)
return err
}
@@ -0,0 +1,166 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": "http://json-schema.org/draft-07/schema#",
"title": "Core schema meta-schema",
"definitions": {
"schemaArray": {
"type": "array",
"minItems": 1,
"items": { "$ref": "#" }
},
"nonNegativeInteger": {
"type": "integer",
"minimum": 0
},
"nonNegativeIntegerDefault0": {
"allOf": [
{ "$ref": "#/definitions/nonNegativeInteger" },
{ "default": 0 }
]
},
"simpleTypes": {
"enum": [
"array",
"boolean",
"integer",
"null",
"number",
"object",
"string"
]
},
"stringArray": {
"type": "array",
"items": { "type": "string" },
"uniqueItems": true,
"default": []
}
},
"type": ["object", "boolean"],
"properties": {
"$id": {
"type": "string",
"format": "uri-reference"
},
"$schema": {
"type": "string",
"format": "uri"
},
"$ref": {
"type": "string",
"format": "uri-reference"
},
"$comment": {
"type": "string"
},
"title": {
"type": "string"
},
"description": {
"type": "string"
},
"default": true,
"readOnly": {
"type": "boolean",
"default": false
},
"writeOnly": {
"type": "boolean",
"default": false
},
"examples": {
"type": "array",
"items": true
},
"multipleOf": {
"type": "number",
"exclusiveMinimum": 0
},
"maximum": {
"type": "number"
},
"exclusiveMaximum": {
"type": "number"
},
"minimum": {
"type": "number"
},
"exclusiveMinimum": {
"type": "number"
},
"maxLength": { "$ref": "#/definitions/nonNegativeInteger" },
"minLength": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"pattern": {
"type": "string",
"format": "regex"
},
"additionalItems": { "$ref": "#" },
"items": {
"anyOf": [{ "$ref": "#" }, { "$ref": "#/definitions/schemaArray" }],
"default": true
},
"maxItems": { "$ref": "#/definitions/nonNegativeInteger" },
"minItems": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"uniqueItems": {
"type": "boolean",
"default": false
},
"contains": { "$ref": "#" },
"maxProperties": { "$ref": "#/definitions/nonNegativeInteger" },
"minProperties": { "$ref": "#/definitions/nonNegativeIntegerDefault0" },
"required": { "$ref": "#/definitions/stringArray" },
"additionalProperties": { "$ref": "#" },
"definitions": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"properties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"default": {}
},
"patternProperties": {
"type": "object",
"additionalProperties": { "$ref": "#" },
"propertyNames": { "format": "regex" },
"default": {}
},
"dependencies": {
"type": "object",
"additionalProperties": {
"anyOf": [{ "$ref": "#" }, { "$ref": "#/definitions/stringArray" }]
}
},
"propertyNames": { "$ref": "#" },
"const": true,
"enum": {
"type": "array",
"items": true,
"minItems": 1,
"uniqueItems": true
},
"type": {
"anyOf": [
{ "$ref": "#/definitions/simpleTypes" },
{
"type": "array",
"items": { "$ref": "#/definitions/simpleTypes" },
"minItems": 1,
"uniqueItems": true
}
]
},
"format": { "type": "string" },
"contentMediaType": { "type": "string" },
"contentEncoding": { "type": "string" },
"if": { "$ref": "#" },
"then": { "$ref": "#" },
"else": { "$ref": "#" },
"allOf": { "$ref": "#/definitions/schemaArray" },
"anyOf": { "$ref": "#/definitions/schemaArray" },
"oneOf": { "$ref": "#/definitions/schemaArray" },
"not": { "$ref": "#" }
},
"default": true
}
+115
View File
@@ -0,0 +1,115 @@
package classifier
import (
"encoding/json"
)
type JsonSchema map[string]any
func (s JsonSchema) MarshalJSON() ([]byte, error) {
return json.MarshalIndent(map[string]any(s), "", " ")
}
const schemaId = "https://bitmagnet.io/schemas/classifier-0.1.json"
func (f features) JsonSchema() JsonSchema {
return map[string]any{
"$schema": "http://json-schema.org/draft-07/schema#",
"$id": schemaId,
"type": "object",
"properties": map[string]any{
"$schema": map[string]any{
"const": schemaId,
},
"workflows": map[string]any{
"type": "object",
"additionalProperties": map[string]any{
"$ref": "#/definitions/action",
},
},
"flag_definitions": map[string]any{
"type": "object",
"additionalProperties": map[string]any{
"type": "string",
"enum": FlagTypeValues(),
},
},
"flags": map[string]any{
"type": "object",
"additionalProperties": true,
},
"keywords": map[string]any{
"type": "object",
"additionalProperties": map[string]any{
"type": "array",
"items": map[string]any{
"type": "string",
},
},
},
"extensions": map[string]any{
"type": "object",
"additionalProperties": map[string]any{
"type": "array",
"items": map[string]any{
"type": "string",
},
},
},
},
"additionalProperties": false,
"definitions": func() map[string]any {
defs := map[string]any{
"action": map[string]any{
"oneOf": []map[string]any{
{
"$ref": "#/definitions/action_single",
},
{
"$ref": "#/definitions/action_multi",
},
},
},
"action_multi": map[string]any{
"type": "array",
"items": map[string]any{
"$ref": "#/definitions/action_single",
},
},
"action_single": map[string]any{
"oneOf": func() []map[string]any {
var result []map[string]any
for _, def := range f.actions {
result = append(result, map[string]any{
"$ref": "#/definitions/action__" + def.name(),
})
}
return result
}(),
},
"condition": map[string]any{
"oneOf": func() []map[string]any {
var result []map[string]any
for _, def := range f.conditions {
result = append(result, map[string]any{
"$ref": "#/definitions/condition__" + def.name(),
})
}
return result
}(),
},
}
for _, def := range f.actions {
defs["action__"+def.name()] = def.JsonSchema()
}
for _, def := range f.conditions {
defs["condition__"+def.name()] = def.JsonSchema()
}
return defs
}(),
}
}
func DefaultJsonSchema() JsonSchema {
return defaultFeatures.JsonSchema()
}
+38
View File
@@ -0,0 +1,38 @@
package classifier
import (
_ "embed"
"encoding/json"
"github.com/stretchr/testify/assert"
"github.com/xeipuuv/gojsonschema"
"testing"
)
//go:embed json-schema.draft-07.json
var metaSchemaJson []byte
func TestJsonSchema(t *testing.T) {
schemaJson, err := DefaultJsonSchema().MarshalJSON()
assert.NoError(t, err)
schemaLoader := gojsonschema.NewBytesLoader(schemaJson)
metaSchemaLoader := gojsonschema.NewBytesLoader(metaSchemaJson)
// validate the schema against the meta schema
metaResult, err := gojsonschema.Validate(metaSchemaLoader, schemaLoader)
assert.NoError(t, err)
assert.True(t, metaResult.Valid())
coreClassifier, err := yamlSourceProvider{rawSourceProvider: coreSourceProvider{}}.source()
assert.NoError(t, err)
coreClassifierJson, err := json.Marshal(coreClassifier)
assert.NoError(t, err)
documentLoader := gojsonschema.NewBytesLoader(coreClassifierJson)
// validate the classifier against the schema
result, err := gojsonschema.Validate(schemaLoader, documentLoader)
assert.NoError(t, err)
assert.True(t, result.Valid())
}
@@ -1,46 +0,0 @@
package keywords
import (
"context"
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"regexp"
)
type keywordsClassifier struct {
contentType model.ContentType
priority int
regex *regexp.Regexp
requiredFileTypes []model.FileType
}
func (c keywordsClassifier) Key() string {
return "keywords_" + c.contentType.String()
}
func (c keywordsClassifier) Priority() int {
return c.priority
}
func (c keywordsClassifier) Classify(_ context.Context, t model.Torrent) (classifier.Classification, error) {
if !t.Hint.IsNil() || !c.regex.MatchString(t.Name) {
return classifier.Classification{}, classifier.ErrNoMatch
}
if len(c.requiredFileTypes) > 0 {
hasRequiredFileTypes := t.HasFileType(c.requiredFileTypes...)
if hasRequiredFileTypes.Valid && !hasRequiredFileTypes.Bool {
return classifier.Classification{}, classifier.ErrNoMatch
}
}
cl := classifier.Classification{
ContentType: model.NullContentType{
Valid: true,
ContentType: c.contentType,
},
}
hasVideo := t.HasFileType(model.FileTypeVideo)
if hasVideo.Valid && hasVideo.Bool {
cl.InferVideoAttributes(t.Name)
}
return cl, nil
}
-53
View File
@@ -1,53 +0,0 @@
package keywords
import (
"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"github.com/bitmagnet-io/bitmagnet/internal/regex"
"go.uber.org/fx"
)
type Result struct {
fx.Out
Music lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
Audiobook lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
Ebook lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
Xxx lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
}
func New() Result {
return Result{
Audiobook: lazy.New(func() (classifier.SubClassifier, error) {
return keywordsClassifier{
contentType: model.ContentTypeAudiobook,
regex: regex.NewRegexFromNames(audiobookWords...),
priority: 20,
requiredFileTypes: []model.FileType{model.FileTypeAudio},
}, nil
}),
Music: lazy.New(func() (classifier.SubClassifier, error) {
return keywordsClassifier{
contentType: model.ContentTypeMusic,
regex: regex.NewRegexFromNames(musicWords...),
priority: 21,
requiredFileTypes: []model.FileType{model.FileTypeAudio},
}, nil
}),
Ebook: lazy.New(func() (classifier.SubClassifier, error) {
return keywordsClassifier{
contentType: model.ContentTypeAudiobook,
regex: regex.NewRegexFromNames(ebookWords...),
priority: 22,
requiredFileTypes: []model.FileType{model.FileTypeDocument},
}, nil
}),
Xxx: lazy.New(func() (classifier.SubClassifier, error) {
return keywordsClassifier{
contentType: model.ContentTypeXxx,
regex: regex.NewRegexFromNames(xxxWords...),
priority: 23,
}, nil
}),
}
}
-53
View File
@@ -1,53 +0,0 @@
package keywords
var musicWords = []string{
"discography",
"music",
"album",
"va",
"various",
"compilation",
"ep",
"lp",
"single",
"vinyl",
"classical",
"disco",
"folk",
"hits",
"house",
"indie",
"jazz",
"metal",
"pop",
"jazz",
"reggae",
"rock",
"trance",
}
var audiobookWords = []string{
"audiobook",
"audiobooks",
"book",
"books",
"abridged",
"unabridged",
"narrated",
}
var ebookWords = []string{
"book",
"books",
"ebook",
"ebooks",
"abridged",
"unabridged",
}
var xxxWords = []string{
"xxx",
"porn",
"porno",
"sex",
}
+153
View File
@@ -0,0 +1,153 @@
// Code generated by mockery v2.40.1. DO NOT EDIT.
package classifier_mocks
import (
context "context"
model "github.com/bitmagnet-io/bitmagnet/internal/model"
mock "github.com/stretchr/testify/mock"
)
// LocalSearch is an autogenerated mock type for the LocalSearch type
type LocalSearch struct {
mock.Mock
}
type LocalSearch_Expecter struct {
mock *mock.Mock
}
func (_m *LocalSearch) EXPECT() *LocalSearch_Expecter {
return &LocalSearch_Expecter{mock: &_m.Mock}
}
// ContentById provides a mock function with given fields: _a0, _a1
func (_m *LocalSearch) ContentById(_a0 context.Context, _a1 model.ContentRef) (model.Content, error) {
ret := _m.Called(_a0, _a1)
if len(ret) == 0 {
panic("no return value specified for ContentById")
}
var r0 model.Content
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, model.ContentRef) (model.Content, error)); ok {
return rf(_a0, _a1)
}
if rf, ok := ret.Get(0).(func(context.Context, model.ContentRef) model.Content); ok {
r0 = rf(_a0, _a1)
} else {
r0 = ret.Get(0).(model.Content)
}
if rf, ok := ret.Get(1).(func(context.Context, model.ContentRef) error); ok {
r1 = rf(_a0, _a1)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// LocalSearch_ContentById_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ContentById'
type LocalSearch_ContentById_Call struct {
*mock.Call
}
// ContentById is a helper method to define mock.On call
// - _a0 context.Context
// - _a1 model.ContentRef
func (_e *LocalSearch_Expecter) ContentById(_a0 interface{}, _a1 interface{}) *LocalSearch_ContentById_Call {
return &LocalSearch_ContentById_Call{Call: _e.mock.On("ContentById", _a0, _a1)}
}
func (_c *LocalSearch_ContentById_Call) Run(run func(_a0 context.Context, _a1 model.ContentRef)) *LocalSearch_ContentById_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(model.ContentRef))
})
return _c
}
func (_c *LocalSearch_ContentById_Call) Return(_a0 model.Content, _a1 error) *LocalSearch_ContentById_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *LocalSearch_ContentById_Call) RunAndReturn(run func(context.Context, model.ContentRef) (model.Content, error)) *LocalSearch_ContentById_Call {
_c.Call.Return(run)
return _c
}
// ContentBySearch provides a mock function with given fields: _a0, _a1, _a2, _a3
func (_m *LocalSearch) ContentBySearch(_a0 context.Context, _a1 model.ContentType, _a2 string, _a3 model.Year) (model.Content, error) {
ret := _m.Called(_a0, _a1, _a2, _a3)
if len(ret) == 0 {
panic("no return value specified for ContentBySearch")
}
var r0 model.Content
var r1 error
if rf, ok := ret.Get(0).(func(context.Context, model.ContentType, string, model.Year) (model.Content, error)); ok {
return rf(_a0, _a1, _a2, _a3)
}
if rf, ok := ret.Get(0).(func(context.Context, model.ContentType, string, model.Year) model.Content); ok {
r0 = rf(_a0, _a1, _a2, _a3)
} else {
r0 = ret.Get(0).(model.Content)
}
if rf, ok := ret.Get(1).(func(context.Context, model.ContentType, string, model.Year) error); ok {
r1 = rf(_a0, _a1, _a2, _a3)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// LocalSearch_ContentBySearch_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ContentBySearch'
type LocalSearch_ContentBySearch_Call struct {
*mock.Call
}
// ContentBySearch is a helper method to define mock.On call
// - _a0 context.Context
// - _a1 model.ContentType
// - _a2 string
// - _a3 model.Year
func (_e *LocalSearch_Expecter) ContentBySearch(_a0 interface{}, _a1 interface{}, _a2 interface{}, _a3 interface{}) *LocalSearch_ContentBySearch_Call {
return &LocalSearch_ContentBySearch_Call{Call: _e.mock.On("ContentBySearch", _a0, _a1, _a2, _a3)}
}
func (_c *LocalSearch_ContentBySearch_Call) Run(run func(_a0 context.Context, _a1 model.ContentType, _a2 string, _a3 model.Year)) *LocalSearch_ContentBySearch_Call {
_c.Call.Run(func(args mock.Arguments) {
run(args[0].(context.Context), args[1].(model.ContentType), args[2].(string), args[3].(model.Year))
})
return _c
}
func (_c *LocalSearch_ContentBySearch_Call) Return(_a0 model.Content, _a1 error) *LocalSearch_ContentBySearch_Call {
_c.Call.Return(_a0, _a1)
return _c
}
func (_c *LocalSearch_ContentBySearch_Call) RunAndReturn(run func(context.Context, model.ContentType, string, model.Year) (model.Content, error)) *LocalSearch_ContentBySearch_Call {
_c.Call.Return(run)
return _c
}
// NewLocalSearch creates a new instance of LocalSearch. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations.
// The first argument is typically a *testing.T value.
func NewLocalSearch(t interface {
mock.TestingT
Cleanup(func())
}) *LocalSearch {
mock := &LocalSearch{}
mock.Mock.Test(t)
t.Cleanup(func() { mock.AssertExpectations(t) })
return mock
}
+200
View File
@@ -0,0 +1,200 @@
package parsers
import (
"github.com/bitmagnet-io/bitmagnet/internal/lexer"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"regexp"
"strconv"
"strings"
"time"
)
type dateLexer struct {
lexer.Lexer
}
func ParseDate(str string) model.Date {
l := dateLexer{Lexer: lexer.NewLexer(str)}
return l.lexDate()
}
var strMonths = map[string]time.Month{
"jan": time.January, "feb": time.February, "mar": time.March,
"apr": time.April, "may": time.May, "jun": time.June,
"jul": time.July, "aug": time.August, "sep": time.September,
"oct": time.October, "nov": time.November, "dec": time.December,
"january": time.January, "february": time.February, "march": time.March,
"april": time.April, "june": time.June,
"july": time.July, "august": time.August, "september": time.September,
"october": time.October, "november": time.November, "december": time.December,
}
var separators = map[string]struct{}{
".": {}, "-": {}, "/": {}, " ": {},
}
const minParts = 5
func (l *dateLexer) lexDate() model.Date {
parts := l.lexDateParts()
isStartOrWordBreak := true
for i := 0; i < len(parts)-minParts+1; i++ {
part1 := parts[i]
if !isStartOrWordBreak {
if part1.format == datePartNonWordChars {
isStartOrWordBreak = true
}
continue
}
if !part1.IsNil() {
i++
sep := parts[i]
if sep.format == datePartNonWordChars {
if _, ok := separators[sep.literal]; ok {
i++
part2 := parts[i]
if !part2.IsNil() {
i++
sep2 := parts[i]
if sep2.literal != sep.literal {
isStartOrWordBreak = sep2.format == datePartNonWordChars
continue
}
i++
part3 := parts[i]
if !part3.IsNil() && (i == len(parts)-1 || parts[i+1].format == datePartNonWordChars) {
if date := findFirstValidDate(part1.Date, part2.Date, part3.Date); !date.IsNil() {
return date
} else {
isStartOrWordBreak = false
continue
}
} else {
isStartOrWordBreak = part3.format == datePartNonWordChars
continue
}
} else {
isStartOrWordBreak = part2.format == datePartNonWordChars
continue
}
} else {
isStartOrWordBreak = true
continue
}
} else {
isStartOrWordBreak = false
continue
}
} else {
isStartOrWordBreak = part1.format == datePartNonWordChars
}
}
return model.Date{}
}
func findFirstValidDate(part1, part2, part3 model.Date) model.Date {
// Y-M-D
if part1.Year != 0 && part2.Month != 0 && part3.Day != 0 {
d := model.Date{Year: part1.Year, Month: part2.Month, Day: part3.Day}
if d.IsValid() {
return d
}
}
// D-M-Y
if part1.Day != 0 && part2.Month != 0 && part3.Year != 0 {
d := model.Date{Year: part3.Year, Month: part2.Month, Day: part1.Day}
if d.IsValid() {
return d
}
}
// M-D-Y
if part1.Month != 0 && part2.Day != 0 && part3.Year != 0 {
d := model.Date{Year: part3.Year, Month: part1.Month, Day: part2.Day}
if d.IsValid() {
return d
}
}
return model.Date{}
}
type datePartFormat int
const (
datePart1Digit datePartFormat = 1 + iota
datePart2Digits
datePart4Digits
datePartStrMonth
datePartWordChars
datePartNonWordChars
)
type datePart struct {
model.Date
format datePartFormat
literal string
}
func (l *dateLexer) lexDateParts() []datePart {
var parts []datePart
for !l.IsEof() {
parts = append(parts, l.lexDatePart())
}
return parts
}
var regex1Digit = regexp.MustCompile(`^\d$`)
var regex2Digits = regexp.MustCompile(`^\d{2}$`)
var regex4Digits = regexp.MustCompile(`^\d{4}$`)
func (l *dateLexer) lexDatePart() datePart {
str := l.ReadWhile(lexer.IsWordChar)
if str == "" {
str = l.ReadWhile(lexer.IsNonWordChar)
return datePart{
format: datePartNonWordChars,
literal: str,
}
}
if m, ok := strMonths[strings.ToLower(str)]; ok {
return datePart{
Date: model.Date{Month: m},
format: datePartStrMonth,
literal: str,
}
}
if regex1Digit.MatchString(str) {
i, _ := strconv.Atoi(str)
return datePart{
Date: model.Date{Day: uint8(i), Month: time.Month(i)},
format: datePart1Digit,
literal: str,
}
}
if regex2Digits.MatchString(str) {
i, _ := strconv.Atoi(str)
date := model.Date{Year: model.Year(2000 + i)}
if i >= 1 && i <= 12 {
date.Month = time.Month(i)
}
if i >= 1 && i <= 31 {
date.Day = uint8(i)
}
return datePart{
Date: date,
format: datePart2Digits,
literal: str,
}
}
if regex4Digits.MatchString(str) {
i, _ := strconv.Atoi(str)
return datePart{
Date: model.Date{Year: model.Year(i)},
format: datePart4Digits,
literal: str,
}
}
return datePart{
format: datePartWordChars,
literal: str,
}
}
+33
View File
@@ -0,0 +1,33 @@
package parsers
import (
"github.com/bitmagnet-io/bitmagnet/internal/model"
"github.com/stretchr/testify/assert"
"testing"
)
func TestParseDate(t *testing.T) {
tests := []struct {
input string
expected model.Date
}{
{"2020-01-01", model.Date{Year: 2020, Month: 1, Day: 1}},
{"01-01-2020", model.Date{Year: 2020, Month: 1, Day: 1}},
{"01-Jan-2020", model.Date{Year: 2020, Month: 1, Day: 1}},
{"Jan-01-2020", model.Date{Year: 2020, Month: 1, Day: 1}},
{"MP3-daily-2019-July-16-Disco", model.Date{Year: 2019, Month: 7, Day: 16}},
{"XXX Video (2022-09-21) 1080p.mp4", model.Date{Year: 2022, Month: 9, Day: 21}},
{"Exxtra.23.02.01.Bla.Bla.Bla.XXX.1080p.HEVC.x265.PRT[XvX]", model.Date{Year: 2023, Month: 2, Day: 1}},
{"The Movie (13.10.2017)_1080p.mp4", model.Date{Year: 2017, Month: 10, Day: 13}},
{"Movie.23.05.15..The.Best.Of.XXX.1080p.MP4-WRB[rarbg]", model.Date{Year: 2023, Month: 5, Day: 15}},
{"2021.09.11_Serie_C_2021.22_R.03_Xxx_FC_vs_Xxx_FC_[football.net]_720p.50_RUS.mkv", model.Date{Year: 2021, Month: 9, Day: 11}},
//{"Bla Bla June 27, 2015", model.Date{Year: 2015, Month: 6, Day: 27}},
{input: "Software.Pro.X2.Suite.v19.0.2.23117-R2R"},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
result := ParseDate(test.input)
assert.Equal(t, test.expected, result)
})
}
}
@@ -1,7 +1,8 @@
package video
package parsers
import (
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
"github.com/bitmagnet-io/bitmagnet/internal/keywords"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"github.com/bitmagnet-io/bitmagnet/internal/regex"
"github.com/hedhyw/rex/pkg/dialect"
@@ -59,7 +60,7 @@ var titleEpisodesRegex = rex.New(
),
).MustCompile()
var multiRegex = regex.NewRegexFromNames("multi", "dual")
var multiRegex = keywords.MustNewRegexFromKeywords("multi", "dual")
var separatorToken = rex.Chars.Runes(" ._")
@@ -115,7 +116,7 @@ func parseTitleYear(input string) (string, model.Year, string, error) {
return title, model.Year(yearMatch), input[len(match[0]):], nil
}
}
return "", 0, "", classifier.ErrNoMatch
return "", 0, "", classification.ErrUnmatched
}
func parseTitle(input string) (string, string, error) {
@@ -125,7 +126,7 @@ func parseTitle(input string) (string, string, error) {
return title, input[len(match[0]):], nil
}
}
return "", "", classifier.ErrNoMatch
return "", "", classification.ErrUnmatched
}
func parseTitleYearEpisodes(input string) (string, model.Year, model.Episodes, string, error) {
@@ -141,7 +142,7 @@ func parseTitleYearEpisodes(input string) (string, model.Year, model.Episodes, s
episodes := model.EpisodesMatchToEpisodes(match[2:])
return title, year, episodes, input[len(match[0]):], nil
}
return "", 0, nil, "", classifier.ErrNoMatch
return "", 0, nil, "", classification.ErrUnmatched
}
func ParseTitleYearEpisodes(contentType model.NullContentType, input string) (string, model.Year, model.Episodes, string, error) {
@@ -156,30 +157,40 @@ func ParseTitleYearEpisodes(contentType model.NullContentType, input string) (st
if title, rest, err := parseTitle(input); err == nil {
return title, 0, nil, rest, nil
}
return "", 0, nil, "", classifier.ErrNoMatch
return "", 0, nil, "", classification.ErrUnmatched
}
func ParseContent(hintCt model.NullContentType, input string) (model.ContentType, string, model.Year, classifier.ContentAttributes, error) {
title, year, episodes, rest, err := ParseTitleYearEpisodes(hintCt, input)
func ParseVideoContent(torrent model.Torrent, result classification.Result) (classification.ContentAttributes, error) {
title, year, episodes, rest, err := ParseTitleYearEpisodes(result.ContentType, torrent.Name)
if err != nil {
return "", "", 0, classifier.ContentAttributes{}, err
if !result.ContentType.Valid {
return classification.ContentAttributes{}, err
}
rest = torrent.Name
}
var ct model.ContentType
if hintCt.Valid {
ct = hintCt.ContentType
} else if len(episodes) > 0 {
ct = model.ContentTypeTvShow
} else {
ct = model.ContentTypeMovie
ct := model.NullContentType{}
if result.ContentType.Valid {
ct = model.NullContentType{Valid: true, ContentType: result.ContentType.ContentType}
} else if len(episodes) > 0 || result.Date.IsValid() {
ct = model.NullContentType{Valid: true, ContentType: model.ContentTypeTvShow}
} else if !year.IsNil() {
ct = model.NullContentType{Valid: true, ContentType: model.ContentTypeMovie}
}
if ct != model.ContentTypeTvShow {
if ct.ContentType != model.ContentTypeTvShow {
episodes = nil
if year.IsNil() {
title = ""
rest = torrent.Name
}
}
attrs := classifier.ContentAttributes{
attrs := classification.ContentAttributes{
ContentType: ct,
BaseTitle: model.NullString{Valid: title != "", String: title},
Date: model.Date{Year: year},
Episodes: episodes,
Languages: model.InferLanguages(rest),
LanguageMulti: multiRegex.MatchString(rest),
}
attrs.InferVideoAttributes(rest)
return ct, title, year, attrs, nil
return attrs, nil
}
+245
View File
@@ -0,0 +1,245 @@
package classifier
import (
"errors"
"fmt"
"github.com/bitmagnet-io/bitmagnet/internal/model"
)
type HasJsonSchema interface {
JsonSchema() JsonSchema
}
type TypedPayload[T any] interface {
HasJsonSchema
Unmarshal(ctx compilerContext) (T, error)
}
type PayloadTransformerFunc[From any, To any] func(From, compilerContext) (To, error)
type payloadTransformer[From any, To any] struct {
spec TypedPayload[From]
transform PayloadTransformerFunc[From, To]
}
func (s payloadTransformer[From, To]) JsonSchema() JsonSchema {
return s.spec.JsonSchema()
}
func (s payloadTransformer[From, To]) Unmarshal(ctx compilerContext) (to To, _ error) {
from, err := s.spec.Unmarshal(ctx)
if err != nil {
return to, err
}
return s.transform(from, ctx)
}
type payloadUnion[T any] struct {
oneOf []TypedPayload[T]
}
func (s payloadUnion[T]) JsonSchema() JsonSchema {
schemas := make([]any, len(s.oneOf))
for i, spec := range s.oneOf {
schemas[i] = spec.JsonSchema()
}
return map[string]any{
"oneOf": schemas,
}
}
func (s payloadUnion[T]) Unmarshal(ctx compilerContext) (to T, _ error) {
var errs []error
for _, def := range s.oneOf {
result, err := def.Unmarshal(ctx)
if err == nil {
return result, nil
}
errs = append(errs, err)
}
errs = append(errs, errors.New("no definition matched"))
return to, errors.Join(errs...)
}
type payloadGeneric[T any] struct {
jsonSchema map[string]any
}
func (s payloadGeneric[T]) JsonSchema() JsonSchema {
return s.jsonSchema
}
func (s payloadGeneric[T]) Unmarshal(ctx compilerContext) (to T, err error) {
to, ok := ctx.source.(T)
if !ok {
err = ctx.error(errors.New("not ok"))
}
return to, err
}
type payloadStruct[T any] struct {
jsonSchema map[string]any
}
func (s payloadStruct[T]) JsonSchema() JsonSchema {
return s.jsonSchema
}
func (s payloadStruct[T]) Unmarshal(ctx compilerContext) (to T, err error) {
return decode[T](ctx)
}
type payloadLiteral[T comparable] struct {
literal T
description string
}
func (s payloadLiteral[T]) JsonSchema() JsonSchema {
schema := map[string]any{
"const": s.literal,
}
if s.description != "" {
schema["description"] = s.description
}
return schema
}
func (s payloadLiteral[T]) Unmarshal(ctx compilerContext) (to T, _ error) {
typedPayload, err := decode[T](ctx)
if err != nil {
return to, err
}
if typedPayload != s.literal {
return to, errors.New("value mismatch")
}
return typedPayload, nil
}
type payloadList[T any] struct {
itemSpec TypedPayload[T]
description string
}
func (s payloadList[T]) JsonSchema() JsonSchema {
schema := map[string]any{
"type": "array",
"items": s.itemSpec.JsonSchema(),
}
if s.description != "" {
schema["description"] = s.description
}
return schema
}
func (s payloadList[T]) Unmarshal(ctx compilerContext) (to []T, _ error) {
if ctx.source == nil {
return nil, nil
}
rawList, ok := ctx.source.([]any)
if !ok {
rawList = []any{ctx.source}
}
to = make([]T, len(rawList))
for i, rawItem := range rawList {
item, err := s.itemSpec.Unmarshal(ctx.child(numericPathPart(i), rawItem))
if err != nil {
return to, err
}
to[i] = item
}
return to, nil
}
type payloadSingleKeyValue[T any] struct {
key string
valueSpec TypedPayload[T]
description string
}
func (s payloadSingleKeyValue[T]) JsonSchema() JsonSchema {
schema := map[string]any{
"type": "object",
"properties": map[string]any{
s.key: s.valueSpec.JsonSchema(),
},
"required": []string{s.key},
"additionalProperties": false,
}
if s.description != "" {
schema["description"] = s.description
}
return schema
}
func (s payloadSingleKeyValue[T]) Unmarshal(ctx compilerContext) (to T, _ error) {
rawMap, err := decode[map[string]any](ctx)
if err != nil {
return to, err
}
if len(rawMap) != 1 {
return to, ctx.error(errors.New("expected a single key"))
}
rawValue, ok := rawMap[s.key]
if !ok {
return to, ctx.error(fmt.Errorf("missing expected key: '%s' %+v", s.key, rawMap))
}
value, err := s.valueSpec.Unmarshal(ctx.child(s.key, rawValue))
if err != nil {
return to, err
}
return value, nil
}
type payloadEnum[T string] struct {
values []T
}
func (s payloadEnum[T]) JsonSchema() JsonSchema {
return map[string]any{
"type": "string",
"enum": s.values,
}
}
func (s payloadEnum[T]) Unmarshal(ctx compilerContext) (to T, _ error) {
value, err := decode[T](ctx)
if err != nil {
return to, ctx.error(err)
}
for _, validValue := range s.values {
if value == validValue {
return value, nil
}
}
return to, ctx.error(fmt.Errorf("value not in enum: '%s'", value))
}
type payloadMustSucceed[T any] struct {
payload TypedPayload[T]
}
func (p payloadMustSucceed[T]) Unmarshal(ctx compilerContext) (t T, _ error) {
result, err := p.payload.Unmarshal(ctx)
if err != nil {
return t, ctx.fatal(err)
}
return result, nil
}
func (p payloadMustSucceed[T]) JsonSchema() JsonSchema {
return p.payload.JsonSchema()
}
var contentTypePayloadSpec = payloadTransformer[string, model.NullContentType]{
spec: payloadEnum[string]{append(model.ContentTypeNames(), "unknown")},
transform: func(str string, _ compilerContext) (model.NullContentType, error) {
if str == "unknown" {
return model.NullContentType{}, nil
}
contentType, err := model.ParseContentType(str)
if err != nil {
return model.NullContentType{}, err
}
return model.NullContentType{ContentType: contentType, Valid: true}, nil
},
}
+49
View File
@@ -0,0 +1,49 @@
package classifier
import (
"context"
"fmt"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"github.com/bitmagnet-io/bitmagnet/internal/protobuf"
)
type runner struct {
dependencies
workflows map[string]action
}
func (r runner) Run(ctx context.Context, workflow string, t model.Torrent) (classification.Result, error) {
w, ok := r.workflows[workflow]
if !ok {
return classification.Result{}, fmt.Errorf("workflow not found: %s", workflow)
}
cl := classification.Result{}
if !t.Hint.IsNil() {
cl.ApplyHint(t.Hint)
}
// if possible, attach the existing content to the result to save some work:
if !t.Hint.IsNil() && t.Hint.ContentSource.Valid {
for _, tc := range t.Contents {
if tc.ContentType.Valid &&
tc.ContentType.ContentType == t.Hint.ContentType &&
tc.ContentSource.Valid &&
tc.ContentSource.String == t.Hint.ContentSource.String &&
tc.ContentID.String == t.Hint.ContentID.String &&
tc.Content.Source == tc.ContentSource.String {
content := tc.Content
cl.AttachContent(&content)
break
}
}
}
exCtx := executionContext{
Context: ctx,
dependencies: r.dependencies,
workflows: r.workflows,
torrent: t,
torrentPb: protobuf.NewTorrent(t),
result: cl,
}
return w.run(exCtx)
}
+90
View File
@@ -0,0 +1,90 @@
package classifier
import (
"context"
"fmt"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
"github.com/bitmagnet-io/bitmagnet/internal/database/query"
"github.com/bitmagnet-io/bitmagnet/internal/database/search"
"github.com/bitmagnet-io/bitmagnet/internal/model"
)
type LocalSearch interface {
ContentById(context.Context, model.ContentRef) (model.Content, error)
ContentBySearch(context.Context, model.ContentType, string, model.Year) (model.Content, error)
}
type localSearch struct {
search.Search
}
func (l localSearch) ContentById(ctx context.Context, ref model.ContentRef) (model.Content, error) {
options := []query.Option{
query.Where(
search.ContentTypeCriteria(ref.Type),
),
search.ContentDefaultPreload(),
search.ContentDefaultHydrate(),
query.Limit(1),
}
if ref.Source == "tmdb" {
options = append(options, query.Where(
search.ContentCanonicalIdentifierCriteria(model.ContentRef{
Source: ref.Source,
ID: ref.ID,
}),
))
} else {
options = append(options, query.Where(
search.ContentAlternativeIdentifierCriteria(model.ContentRef{
Source: ref.Source,
ID: ref.ID,
}),
))
}
result, err := l.Search.Content(ctx, options...)
if err != nil {
return model.Content{}, err
}
if len(result.Items) == 0 {
return model.Content{}, classification.ErrUnmatched
}
return result.Items[0].Content, nil
}
func (l localSearch) ContentBySearch(ctx context.Context, ct model.ContentType, baseTitle string, year model.Year) (model.Content, error) {
options := []query.Option{
query.Where(search.ContentTypeCriteria(ct)),
query.QueryString(fmt.Sprintf("\"%s\"", baseTitle)),
query.OrderByQueryStringRank(),
query.Limit(5),
search.ContentDefaultPreload(),
search.ContentDefaultHydrate(),
}
if !year.IsNil() {
options = append(options, query.Where(search.ContentReleaseDateCriteria(model.NewDateRangeFromYear(year))))
}
result, searchErr := l.Search.Content(
ctx,
options...,
)
if searchErr != nil {
return model.Content{}, searchErr
}
var content *model.Content
for _, item := range result.Items {
candidates := []string{item.Title}
if item.OriginalTitle.Valid {
candidates = append(candidates, item.OriginalTitle.String)
}
if levenshteinCheck(baseTitle, candidates, levenshteinThreshold) {
c := item.Content
content = &c
break
}
}
if content == nil {
return model.Content{}, classification.ErrUnmatched
}
return *content, nil
}
+83
View File
@@ -0,0 +1,83 @@
package classifier
type Source struct {
Schema string `json:"$schema,omitempty" yaml:"$schema,omitempty"`
Workflows workflowSources `json:"workflows"`
FlagDefinitions flagDefinitions `json:"flag_definitions"`
Flags flags `json:"flags"`
Keywords keywordGroups `json:"keywords"`
Extensions extensionGroups `json:"extensions"`
}
func (s Source) merge(other Source) (Source, error) {
flagDefs, err := s.FlagDefinitions.merge(other.FlagDefinitions)
if err != nil {
return Source{}, err
}
return Source{
FlagDefinitions: flagDefs,
Flags: s.Flags.merge(other.Flags),
Keywords: s.Keywords.merge(other.Keywords),
Extensions: s.Extensions.merge(other.Extensions),
Workflows: s.Workflows.merge(other.Workflows),
}, nil
}
func (s Source) workflowNames() map[string]struct{} {
result := make(map[string]struct{})
for k := range s.Workflows {
result[k] = struct{}{}
}
return result
}
type keywordGroups map[string][]string
func (g keywordGroups) merge(other keywordGroups) keywordGroups {
result := make(keywordGroups)
for k, v := range g {
if _, ok := other[k]; ok {
result[k] = append(v, other[k]...)
} else {
result[k] = v
}
}
for k, v := range other {
if _, ok := result[k]; !ok {
result[k] = v
}
}
return result
}
type extensionGroups map[string][]string
func (g extensionGroups) merge(other extensionGroups) extensionGroups {
result := make(extensionGroups)
for k, v := range g {
if _, ok := other[k]; ok {
result[k] = append(v, other[k]...)
} else {
result[k] = v
}
}
for k, v := range other {
if _, ok := result[k]; !ok {
result[k] = v
}
}
return result
}
type workflowSources map[string]any
func (s workflowSources) merge(other workflowSources) workflowSources {
result := make(workflowSources)
for k, v := range s {
result[k] = v
}
for k, v := range other {
result[k] = v
}
return result
}
+8
View File
@@ -0,0 +1,8 @@
package classifier
import (
_ "embed"
)
//go:embed classifier.core.yaml
var classifierCoreYaml []byte
+128
View File
@@ -0,0 +1,128 @@
package classifier
import (
"github.com/adrg/xdg"
"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
"gopkg.in/yaml.v3"
"os"
)
func newSourceProvider(config Config, tmdbConfig tmdb.Config) sourceProvider {
return mergeSourceProvider{
providers: []sourceProvider{
yamlSourceProvider{rawSourceProvider: coreSourceProvider{}},
yamlSourceProvider{rawSourceProvider: xdgSourceProvider{}},
yamlSourceProvider{rawSourceProvider: cwdSourceProvider{}},
configSourceProvider{
config: config,
tmdbEnabled: tmdbConfig.Enabled,
},
},
}
}
type sourceProvider interface {
source() (Source, error)
}
type mergeSourceProvider struct {
providers []sourceProvider
}
func (m mergeSourceProvider) source() (Source, error) {
source := Source{}
for _, p := range m.providers {
s, err := p.source()
if err != nil {
return source, err
}
if merged, err := source.merge(s); err != nil {
return source, err
} else {
source = merged
}
}
return source, nil
}
type rawSourceProvider interface {
source() ([]byte, error)
}
type yamlSourceProvider struct {
rawSourceProvider
}
func (y yamlSourceProvider) source() (Source, error) {
raw, err := y.rawSourceProvider.source()
if err != nil {
return Source{}, err
}
rawWorkflow := make(map[string]interface{})
parseErr := yaml.Unmarshal(raw, &rawWorkflow)
if parseErr != nil {
return Source{}, parseErr
}
src := Source{}
decoder, decoderErr := newDecoder(&src)
if decoderErr != nil {
return Source{}, decoderErr
}
if decodeErr := decoder.Decode(rawWorkflow); decodeErr != nil {
return Source{}, decodeErr
}
return src, nil
}
type coreSourceProvider struct{}
func (c coreSourceProvider) source() ([]byte, error) {
return classifierCoreYaml, nil
}
type xdgSourceProvider struct{}
func (_ xdgSourceProvider) source() ([]byte, error) {
if path, pathErr := xdg.ConfigFile("bitmagnet/classifier.yml"); pathErr == nil {
if bytes, readErr := os.ReadFile(path); readErr == nil {
return bytes, nil
} else if !os.IsNotExist(readErr) {
return nil, readErr
}
}
return []byte{'{', '}'}, nil
}
type cwdSourceProvider struct{}
func (_ cwdSourceProvider) source() ([]byte, error) {
if bytes, readErr := os.ReadFile("./classifier.yml"); readErr == nil {
return bytes, nil
} else if !os.IsNotExist(readErr) {
return nil, readErr
}
return []byte{'{', '}'}, nil
}
type configSourceProvider struct {
config Config
tmdbEnabled bool
}
func (c configSourceProvider) source() (Source, error) {
fs := make(flags)
for k, v := range c.config.Flags {
fs[k] = v
}
if c.config.DeleteXxx {
fs["delete_xxx"] = true
}
if !c.tmdbEnabled {
fs["tmdb_enabled"] = false
}
return Source{
Keywords: c.config.Keywords,
Extensions: c.config.Extensions,
Flags: fs,
}, nil
}
+105
View File
@@ -0,0 +1,105 @@
package classifier
import (
"errors"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/classification"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
)
func (c executionContext) tmdb_searchMovie(title string, year model.Year) (model.Content, error) {
req := tmdb.SearchMovieRequest{
Query: title,
IncludeAdult: true,
}
if !year.IsNil() {
req.Year = year
}
searchResult, searchErr := c.tmdbClient.SearchMovie(c.Context, req)
if searchErr != nil {
return model.Content{}, searchErr
}
for _, item := range searchResult.Results {
if levenshteinCheck(title, []string{item.Title, item.OriginalTitle}, levenshteinThreshold) {
return c.tmdb_getMovieByTmbdId(item.ID)
}
}
return model.Content{}, classification.ErrUnmatched
}
func (c executionContext) tmdb_searchTvShow(title string, year model.Year) (model.Content, error) {
req := tmdb.SearchTvRequest{
Query: title,
IncludeAdult: true,
}
if !year.IsNil() {
req.FirstAirDateYear = year
}
searchResult, searchErr := c.tmdbClient.SearchTv(c.Context, req)
if searchErr != nil {
return model.Content{}, searchErr
}
for _, item := range searchResult.Results {
if levenshteinCheck(title, []string{item.Name, item.OriginalName}, levenshteinThreshold) {
return c.tmdb_getTvShowByTmbdId(item.ID)
}
}
return model.Content{}, classification.ErrUnmatched
}
func (c executionContext) tmdb_getMovieByTmbdId(id int64) (movie model.Content, err error) {
d, getDetailsErr := c.tmdbClient.MovieDetails(c.Context, tmdb.MovieDetailsRequest{
ID: id,
})
if getDetailsErr != nil {
if errors.Is(getDetailsErr, tmdb.ErrNotFound) {
getDetailsErr = classification.ErrUnmatched
}
err = getDetailsErr
return
}
return tmdb.MovieDetailsToMovieModel(d)
}
func (c executionContext) tmdb_getTvShowByTmbdId(id int64) (movie model.Content, err error) {
d, getDetailsErr := c.tmdbClient.TvDetails(c.Context, tmdb.TvDetailsRequest{
SeriesID: id,
AppendToResponse: []string{"external_ids"},
})
if getDetailsErr != nil {
if errors.Is(getDetailsErr, tmdb.ErrNotFound) {
getDetailsErr = classification.ErrUnmatched
}
err = getDetailsErr
return
}
return tmdb.TvShowDetailsToTvShowModel(d)
}
func (c executionContext) tmdb_getTmdbIdByExternalId(ref model.ContentRef) (int64, error) {
externalSource, externalId, externalSourceErr := tmdb.ExternalSource(ref)
if externalSourceErr != nil {
return 0, externalSourceErr
}
byIdResult, byIdErr := c.tmdbClient.FindByID(c.Context, tmdb.FindByIDRequest{
ExternalSource: externalSource,
ExternalID: externalId,
})
if byIdErr != nil {
return 0, byIdErr
}
switch ref.Type {
case model.ContentTypeMovie, model.ContentTypeXxx:
if len(byIdResult.MovieResults) == 0 {
return 0, classification.ErrUnmatched
}
return byIdResult.MovieResults[0].ID, nil
case model.ContentTypeTvShow:
if len(byIdResult.TvResults) == 0 {
return 0, classification.ErrUnmatched
}
return byIdResult.TvResults[0].ID, nil
default:
return 0, classification.ErrUnmatched
}
}
@@ -1,10 +1,12 @@
package tmdb
package classifier
import (
"github.com/agnivade/levenshtein"
"github.com/bitmagnet-io/bitmagnet/internal/regex"
)
const levenshteinThreshold = 5
func levenshteinCheck(target string, candidates []string, threshold uint) bool {
normTarget := regex.NormalizeString(target)
triedCandidates := make(map[string]struct{}, len(candidates))
-94
View File
@@ -1,94 +0,0 @@
package video
import (
"context"
"errors"
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/video/tmdb"
"github.com/bitmagnet-io/bitmagnet/internal/model"
)
type videoClassifier struct {
tmdbClient tmdb.Client
}
func (c videoClassifier) Key() string {
return "video"
}
func (c videoClassifier) Priority() int {
return 1
}
func (c videoClassifier) Classify(ctx context.Context, t model.Torrent) (classifier.Classification, error) {
if hasVideo := t.HasFileType(model.FileTypeVideo); hasVideo.Valid && !hasVideo.Bool {
return classifier.Classification{}, classifier.ErrNoMatch
}
if !t.Hint.IsNil() && !t.Hint.ContentType.IsVideo() {
return classifier.Classification{}, classifier.ErrNoMatch
}
ct, title, year, attrs, err := ParseContent(t.Hint.NullContentType(), t.Name)
if err != nil {
return classifier.Classification{}, err
}
ref := t.Hint.ContentRef()
if t.Hint.Title.Valid {
title = t.Hint.Title.String
}
cl := classifier.Classification{
ContentAttributes: attrs,
}
if content, err := c.resolveContent(ctx, ct, ref, title, year); err == nil {
cl.Content = &content
} else if !errors.Is(err, classifier.ErrNoMatch) {
return classifier.Classification{}, err
}
cl.ApplyHint(t.Hint)
if cl.Content != nil {
cl.ContentType = model.NewNullContentType(cl.Content.Type)
if cl.Content.OriginalLanguage.Valid {
if len(cl.Languages) == 0 || cl.LanguageMulti {
if cl.Languages == nil {
cl.Languages = make(model.Languages)
}
cl.Languages[cl.Content.OriginalLanguage.Language] = struct{}{}
}
}
}
if !cl.ContentType.Valid {
return classifier.Classification{}, classifier.ErrNoMatch
}
return cl, nil
}
func (c videoClassifier) resolveContent(
ctx context.Context,
ct model.ContentType,
ref model.Maybe[model.ContentRef],
title string,
year model.Year,
) (model.Content, error) {
if ct == model.ContentTypeMovie || ct == model.ContentTypeXxx {
if ref.Valid {
return c.tmdbClient.GetMovieByExternalId(ctx, ref.Val.Source, ref.Val.ID)
}
return c.tmdbClient.SearchMovie(ctx, tmdb.SearchMovieParams{
Title: title,
Year: year,
IncludeAdult: true,
LevenshteinThreshold: 5,
})
}
if ct == model.ContentTypeTvShow {
if ref.Valid {
return c.tmdbClient.GetTvShowByExternalId(ctx, ref.Val.Source, ref.Val.ID)
}
return c.tmdbClient.SearchTvShow(ctx, tmdb.SearchTvShowParams{
Name: title,
Year: year,
IncludeAdult: true,
LevenshteinThreshold: 5,
})
}
return model.Content{}, classifier.ErrNoMatch
}
-32
View File
@@ -1,32 +0,0 @@
package video
import (
"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/video/tmdb"
"go.uber.org/fx"
)
type Params struct {
fx.In
TmdbClient lazy.Lazy[tmdb.Client]
}
type Result struct {
fx.Out
Classifier lazy.Lazy[classifier.SubClassifier] `group:"content_classifiers"`
}
func New(p Params) Result {
return Result{
Classifier: lazy.New(func() (classifier.SubClassifier, error) {
tmdbClient, err := p.TmdbClient.Get()
if err != nil {
return nil, err
}
return videoClassifier{
tmdbClient: tmdbClient,
}, nil
}),
}
}
-330
View File
@@ -1,330 +0,0 @@
package video
import (
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"github.com/stretchr/testify/assert"
"testing"
)
func TestParse(t *testing.T) {
type output struct {
contentType model.ContentType
title string
releaseYear model.Year
attrs classifier.ContentAttributes
}
type parseTest struct {
contentType model.NullContentType
inputString string
expectedOutput output
}
var parseTests = []parseTest{
{
inputString: "Mission.Impossible",
expectedOutput: output{
contentType: model.ContentTypeMovie,
title: "Mission Impossible",
},
},
{
inputString: "Mission.Impossible.2023",
expectedOutput: output{
contentType: model.ContentTypeMovie,
title: "Mission Impossible",
releaseYear: 2023,
},
},
{
inputString: "Mission.Impossible.2023.1080p.BluRay.x264-SPARKS",
expectedOutput: output{
contentType: model.ContentTypeMovie,
title: "Mission Impossible",
releaseYear: 2023,
attrs: classifier.ContentAttributes{
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
VideoSource: model.NewNullVideoSource(model.VideoSourceBluRay),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecX264),
ReleaseGroup: model.NullString{
String: "SPARKS",
Valid: true,
},
},
},
},
{
inputString: "Die.Hard.(With.A.Vengeance!).And.A.Suffix.2023.1080p.BluRay.x264-SPARKS",
expectedOutput: output{
contentType: model.ContentTypeMovie,
title: "Die Hard (With A Vengeance!) And A Suffix",
releaseYear: 2023,
attrs: classifier.ContentAttributes{
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
VideoSource: model.NewNullVideoSource(model.VideoSourceBluRay),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecX264),
ReleaseGroup: model.NullString{
String: "SPARKS",
Valid: true,
},
},
},
},
{
inputString: "The.Movie.from.U.N.C.L.E.2015.1080p.BluRay.x264-SPARKS",
expectedOutput: output{
contentType: model.ContentTypeMovie,
title: "The Movie from U.N.C.L.E.",
releaseYear: 2015,
attrs: classifier.ContentAttributes{
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
VideoSource: model.NewNullVideoSource(model.VideoSourceBluRay),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecX264),
ReleaseGroup: model.NullString{
String: "SPARKS",
Valid: true,
},
},
},
},
{
inputString: "1776.1979.EXTENDED.HD.BluRay.X264-AMIABLE",
expectedOutput: output{
contentType: model.ContentTypeMovie,
title: "1776",
releaseYear: 1979,
attrs: classifier.ContentAttributes{
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV720p),
VideoSource: model.NewNullVideoSource(model.VideoSourceBluRay),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecX264),
ReleaseGroup: model.NullString{
String: "AMIABLE",
Valid: true,
},
},
},
},
{
inputString: "MY MOVIE (2016) [R][Action, Horror][720p.WEB-DL.AVC.8Bit.6ch.AC3].mkv",
expectedOutput: output{
contentType: model.ContentTypeMovie,
title: "MY MOVIE",
releaseYear: 2016,
attrs: classifier.ContentAttributes{
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV720p),
VideoSource: model.NewNullVideoSource(model.VideoSourceWEBDL),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecH264),
},
},
},
{
inputString: "R.I.P.D.2013.720p.BluRay.x264-SPARKS",
expectedOutput: output{
contentType: model.ContentTypeMovie,
title: "R.I.P.D.",
releaseYear: 2013,
attrs: classifier.ContentAttributes{
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV720p),
VideoSource: model.NewNullVideoSource(model.VideoSourceBluRay),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecX264),
ReleaseGroup: model.NullString{
String: "SPARKS",
Valid: true,
},
},
},
},
{
inputString: "This Is A Movie (1999) [IMDB #] <Genre, Genre, Genre> {ACTORS} !DIRECTOR +MORE_SILLY_STUFF_NO_ONE_NEEDS ?",
expectedOutput: output{
contentType: model.ContentTypeMovie,
title: "This Is A Movie",
releaseYear: 1999,
},
},
{
inputString: "We Are the Movie!.2013.720p.H264.mkv",
expectedOutput: output{
contentType: model.ContentTypeMovie,
title: "We Are the Movie!",
releaseYear: 2013,
attrs: classifier.ContentAttributes{
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV720p),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecH264),
},
},
},
{
inputString: "[ example.com ] We Are the Movie!.2013.720p.H264.mkv",
expectedOutput: output{
contentType: model.ContentTypeMovie,
title: "We Are the Movie!",
releaseYear: 2013,
attrs: classifier.ContentAttributes{
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV720p),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecH264),
},
},
},
{
inputString: "【 example.com 】We Are the Movie!.2013.720p.H264.mkv",
expectedOutput: output{
contentType: model.ContentTypeMovie,
title: "We Are the Movie!",
releaseYear: 2013,
attrs: classifier.ContentAttributes{
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV720p),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecH264),
},
},
},
{
inputString: "Маша и Медведь в кино-12 месяцев.2022.WEBRip.1080p_от New-Team.mkv",
expectedOutput: output{
contentType: model.ContentTypeMovie,
title: "Маша и Медведь в кино-12 месяцев",
releaseYear: 2022,
attrs: classifier.ContentAttributes{
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
VideoSource: model.NewNullVideoSource(model.VideoSourceWEBRip),
//ReleaseGroup: "New-Team",
},
},
},
{
inputString: "The.Series.name.S04E08.1080p.WEB.h264-GRP[eztv.re].mkv",
expectedOutput: output{
contentType: model.ContentTypeTvShow,
title: "The Series name",
attrs: classifier.ContentAttributes{
Episodes: make(model.Episodes).AddEpisode(4, 8),
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
VideoSource: model.NewNullVideoSource(model.VideoSourceWEBRip),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecH264),
ReleaseGroup: model.NullString{
String: "GRP",
Valid: true,
},
},
},
},
{
inputString: "The.Series.name.S03-5.1080p.WEB.h264-GRP[eztv.re].mkv",
expectedOutput: output{
contentType: model.ContentTypeTvShow,
title: "The Series name",
attrs: classifier.ContentAttributes{
Episodes: make(model.Episodes).AddSeason(3).AddSeason(4).AddSeason(5),
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
VideoSource: model.NewNullVideoSource(model.VideoSourceWEBRip),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecH264),
ReleaseGroup: model.NullString{
String: "GRP",
Valid: true,
},
},
},
},
{
inputString: "The.Series.name.S03,4,5,6.1080p.WEB.h264-GRP[eztv.re].mkv",
expectedOutput: output{
contentType: model.ContentTypeTvShow,
title: "The Series name",
attrs: classifier.ContentAttributes{
Episodes: make(model.Episodes).AddSeason(3).AddSeason(4).AddSeason(5).AddSeason(6),
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
VideoSource: model.NewNullVideoSource(model.VideoSourceWEBRip),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecH264),
ReleaseGroup: model.NullString{
String: "GRP",
Valid: true,
},
},
},
},
{
inputString: "The.Series.name.S04E03-5.1080p.WEB.h264-GRP[eztv.re].mkv",
expectedOutput: output{
contentType: model.ContentTypeTvShow,
title: "The Series name",
attrs: classifier.ContentAttributes{
Episodes: make(model.Episodes).AddEpisode(4, 3).AddEpisode(4, 4).AddEpisode(4, 5),
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
VideoSource: model.NewNullVideoSource(model.VideoSourceWEBRip),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecH264),
ReleaseGroup: model.NullString{
String: "GRP",
Valid: true,
},
},
},
},
{
inputString: "The.Series.name.S04E03,4,5.1080p.WEB.h264-GRP[eztv.re].mkv",
expectedOutput: output{
contentType: model.ContentTypeTvShow,
title: "The Series name",
attrs: classifier.ContentAttributes{
Episodes: make(model.Episodes).AddEpisode(4, 3).AddEpisode(4, 4).AddEpisode(4, 5),
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
VideoSource: model.NewNullVideoSource(model.VideoSourceWEBRip),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecH264),
ReleaseGroup: model.NullString{
String: "GRP",
Valid: true,
},
},
},
},
{
inputString: "The.Series.name.1x02.1080p.WEB.h264-GRP[eztv.re].mkv",
expectedOutput: output{
contentType: model.ContentTypeTvShow,
title: "The Series name",
attrs: classifier.ContentAttributes{
Episodes: make(model.Episodes).AddEpisode(1, 2),
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
VideoSource: model.NewNullVideoSource(model.VideoSourceWEBRip),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecH264),
ReleaseGroup: model.NullString{
String: "GRP",
Valid: true,
},
},
},
},
{
inputString: "The.Series.name.1x01-03.1080p.WEB.h264-GRP[eztv.re].mkv",
expectedOutput: output{
contentType: model.ContentTypeTvShow,
title: "The Series name",
attrs: classifier.ContentAttributes{
Episodes: make(model.Episodes).AddEpisode(1, 1).AddEpisode(1, 2).AddEpisode(1, 3),
VideoResolution: model.NewNullVideoResolution(model.VideoResolutionV1080p),
VideoSource: model.NewNullVideoSource(model.VideoSourceWEBRip),
VideoCodec: model.NewNullVideoCodec(model.VideoCodecH264),
ReleaseGroup: model.NullString{
String: "GRP",
Valid: true,
},
},
},
},
}
for _, test := range parseTests {
t.Run(test.inputString, func(t *testing.T) {
ct, title, year, attrs, err := ParseContent(
test.contentType,
test.inputString,
)
assert.NoError(t, err)
assert.Equal(t, test.expectedOutput.contentType, ct)
assert.Equal(t, test.expectedOutput.title, title)
assert.Equal(t, test.expectedOutput.releaseYear, year)
assert.Equal(t, test.expectedOutput.attrs, attrs)
})
}
}
-23
View File
@@ -1,23 +0,0 @@
package tmdb
import (
"errors"
"github.com/bitmagnet-io/bitmagnet/internal/database/search"
"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
)
type Client interface {
MovieClient
TvShowClient
}
type client struct {
c tmdb.Client
s search.Search
}
const SourceTmdb = "tmdb"
var (
ErrUnknownSource = errors.New("unknown source")
)
-40
View File
@@ -1,40 +0,0 @@
package tmdb
import (
"github.com/bitmagnet-io/bitmagnet/internal/boilerplate/lazy"
"github.com/bitmagnet-io/bitmagnet/internal/database/search"
"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
"go.uber.org/fx"
"go.uber.org/zap"
)
type Params struct {
fx.In
Search lazy.Lazy[search.Search]
TmdbClient lazy.Lazy[tmdb.Client]
Logger *zap.SugaredLogger
}
type Result struct {
fx.Out
Client lazy.Lazy[Client]
}
func New(p Params) Result {
return Result{
Client: lazy.New(func() (Client, error) {
s, err := p.Search.Get()
if err != nil {
return nil, err
}
c, err := p.TmdbClient.Get()
if err != nil {
return nil, err
}
return &client{
c: c,
s: s,
}, nil
}),
}
}
-263
View File
@@ -1,263 +0,0 @@
package tmdb
import (
"context"
"errors"
"fmt"
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"github.com/bitmagnet-io/bitmagnet/internal/database/query"
"github.com/bitmagnet-io/bitmagnet/internal/database/search"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
"strconv"
)
type MovieClient interface {
SearchMovie(ctx context.Context, p SearchMovieParams) (model.Content, error)
GetMovieByExternalId(ctx context.Context, source, id string) (model.Content, error)
}
type SearchMovieParams struct {
Title string
Year model.Year
IncludeAdult bool
LevenshteinThreshold uint
}
func (c *client) SearchMovie(ctx context.Context, p SearchMovieParams) (movie model.Content, err error) {
if localResult, localErr := c.searchMovieLocal(ctx, p); localErr == nil {
return localResult, nil
} else if !errors.Is(localErr, classifier.ErrNoMatch) {
err = localErr
return
}
return c.searchMovieTmdb(ctx, p)
}
func (c *client) searchMovieLocal(ctx context.Context, p SearchMovieParams) (movie model.Content, err error) {
options := []query.Option{
query.Where(search.ContentTypeCriteria(model.ContentTypeMovie, model.ContentTypeXxx)),
query.QueryString(fmt.Sprintf("\"%s\"", p.Title)),
query.OrderByQueryStringRank(),
query.Limit(5),
search.ContentDefaultPreload(),
search.ContentDefaultHydrate(),
}
if !p.Year.IsNil() {
options = append(options, query.Where(search.ContentReleaseDateCriteria(model.NewDateRangeFromYear(p.Year))))
}
result, searchErr := c.s.Content(
ctx,
options...,
)
if searchErr != nil {
err = searchErr
return
}
for _, item := range result.Items {
candidates := []string{item.Title}
if item.OriginalTitle.Valid {
candidates = append(candidates, item.OriginalTitle.String)
}
if levenshteinCheck(p.Title, candidates, p.LevenshteinThreshold) {
return item.Content, nil
}
}
err = classifier.ErrNoMatch
return
}
func (c *client) searchMovieTmdb(ctx context.Context, p SearchMovieParams) (model.Content, error) {
searchResult, searchErr := c.c.SearchMovie(ctx, tmdb.SearchMovieRequest{
Query: p.Title,
IncludeAdult: p.IncludeAdult,
Year: p.Year,
})
if searchErr != nil {
return model.Content{}, searchErr
}
for _, item := range searchResult.Results {
if levenshteinCheck(p.Title, []string{item.Title, item.OriginalTitle}, p.LevenshteinThreshold) {
return c.GetMovieByExternalId(ctx, SourceTmdb, strconv.Itoa(int(item.ID)))
}
}
return model.Content{}, classifier.ErrNoMatch
}
func (c *client) GetMovieByExternalId(ctx context.Context, source, id string) (model.Content, error) {
options := []query.Option{
query.Where(
search.ContentTypeCriteria(model.ContentTypeMovie, model.ContentTypeXxx),
),
search.ContentDefaultPreload(),
search.ContentDefaultHydrate(),
query.Limit(1),
}
if source == SourceTmdb {
canonicalResult, canonicalErr := c.s.Content(ctx,
append(options, query.Where(
search.ContentCanonicalIdentifierCriteria(model.ContentRef{
Source: source,
ID: id,
}),
))...,
)
if canonicalErr != nil {
return model.Content{}, canonicalErr
}
if len(canonicalResult.Items) > 0 {
return canonicalResult.Items[0].Content, nil
}
} else {
alternativeResult, alternativeErr := c.s.Content(ctx,
append(options, query.Where(
search.ContentAlternativeIdentifierCriteria(model.ContentRef{
Source: source,
ID: id,
}),
))...,
)
if alternativeErr != nil {
return model.Content{}, alternativeErr
}
if len(alternativeResult.Items) > 0 {
return alternativeResult.Items[0].Content, nil
}
}
if source == SourceTmdb {
intId, idErr := strconv.Atoi(id)
if idErr != nil {
return model.Content{}, idErr
}
return c.getMovieByTmbdId(ctx, intId)
}
externalSource, externalId, externalSourceErr := getExternalSource(source, id)
if externalSourceErr != nil {
return model.Content{}, externalSourceErr
}
byIdResult, byIdErr := c.c.FindByID(ctx, tmdb.FindByIDRequest{
ExternalSource: externalSource,
ExternalID: externalId,
})
if byIdErr != nil {
return model.Content{}, byIdErr
}
if len(byIdResult.MovieResults) == 0 {
return model.Content{}, classifier.ErrNoMatch
}
return c.getMovieByTmbdId(ctx, int(byIdResult.MovieResults[0].ID))
}
const SourceImdb = "imdb"
const SourceTvdb = "tvdb"
func getExternalSource(source string, id string) (externalSource string, externalId string, err error) {
switch source {
case SourceImdb:
externalSource = "imdb_id"
externalId = id
case SourceTvdb:
externalSource = "tvdb_id"
externalId = id
default:
err = ErrUnknownSource
}
return
}
func (c *client) getMovieByTmbdId(ctx context.Context, id int) (movie model.Content, err error) {
d, getDetailsErr := c.c.MovieDetails(ctx, tmdb.MovieDetailsRequest{
ID: int64(id),
})
if getDetailsErr != nil {
if errors.Is(getDetailsErr, tmdb.ErrNotFound) {
getDetailsErr = classifier.ErrNoMatch
}
err = getDetailsErr
return
}
return MovieDetailsToMovieModel(d)
}
func MovieDetailsToMovieModel(details tmdb.MovieDetailsResponse) (movie model.Content, err error) {
releaseDate := model.Date{}
if details.ReleaseDate != "" {
parsedDate, parseDateErr := model.NewDateFromIsoString(details.ReleaseDate)
if parseDateErr != nil {
err = parseDateErr
return
}
releaseDate = parsedDate
}
var collections []model.ContentCollection
if details.BelongsToCollection.ID != 0 {
collections = append(collections, model.ContentCollection{
Type: "franchise",
Source: SourceTmdb,
ID: strconv.Itoa(int(details.BelongsToCollection.ID)),
Name: details.BelongsToCollection.Name,
})
}
for _, genre := range details.Genres {
collections = append(collections, model.ContentCollection{
Type: "genre",
Source: SourceTmdb,
ID: strconv.Itoa(int(genre.ID)),
Name: genre.Name,
})
}
var attributes []model.ContentAttribute
if details.IMDbID != "" {
attributes = append(attributes, model.ContentAttribute{
Source: "imdb",
Key: "id",
Value: details.IMDbID,
})
}
if details.PosterPath != "" {
attributes = append(attributes, model.ContentAttribute{
Source: "tmdb",
Key: "poster_path",
Value: details.PosterPath,
})
}
if details.BackdropPath != "" {
attributes = append(attributes, model.ContentAttribute{
Source: "tmdb",
Key: "backdrop_path",
Value: details.BackdropPath,
})
}
releaseYear := releaseDate.Year
typeVideo := model.ContentTypeMovie
if details.Adult {
typeVideo = model.ContentTypeXxx
}
return model.Content{
Type: typeVideo,
Source: SourceTmdb,
ID: strconv.Itoa(int(details.ID)),
Title: details.Title,
ReleaseDate: releaseDate,
ReleaseYear: releaseYear,
Adult: model.NewNullBool(details.Adult),
OriginalLanguage: model.ParseLanguage(details.OriginalLanguage),
OriginalTitle: model.NewNullString(details.OriginalTitle),
Overview: model.NullString{
String: details.Overview,
Valid: details.Overview != "",
},
Runtime: model.NullUint16{
Uint16: uint16(details.Runtime),
Valid: details.Runtime > 0,
},
Popularity: model.NewNullFloat32(details.Popularity),
VoteAverage: model.NewNullFloat32(details.VoteAverage),
VoteCount: model.NewNullUint(uint(details.VoteCount)),
Collections: collections,
Attributes: attributes,
}, nil
}
-242
View File
@@ -1,242 +0,0 @@
package tmdb
import (
"context"
"errors"
"fmt"
"github.com/bitmagnet-io/bitmagnet/internal/classifier"
"github.com/bitmagnet-io/bitmagnet/internal/database/query"
"github.com/bitmagnet-io/bitmagnet/internal/database/search"
"github.com/bitmagnet-io/bitmagnet/internal/model"
"github.com/bitmagnet-io/bitmagnet/internal/tmdb"
"strconv"
)
type TvShowClient interface {
SearchTvShow(ctx context.Context, p SearchTvShowParams) (model.Content, error)
GetTvShowByExternalId(ctx context.Context, source, id string) (model.Content, error)
}
type SearchTvShowParams struct {
Name string
Year model.Year
IncludeAdult bool
LevenshteinThreshold uint
}
func (c *client) SearchTvShow(ctx context.Context, p SearchTvShowParams) (tvShow model.Content, err error) {
if localResult, localErr := c.searchTvShowLocal(ctx, p); localErr == nil {
return localResult, nil
} else if !errors.Is(localErr, classifier.ErrNoMatch) {
err = localErr
return
}
return c.searchTvShowTmdb(ctx, p)
}
func (c *client) searchTvShowLocal(ctx context.Context, p SearchTvShowParams) (tvShow model.Content, err error) {
options := []query.Option{
query.Where(search.ContentTypeCriteria(model.ContentTypeTvShow)),
query.QueryString(fmt.Sprintf("\"%s\"", p.Name)),
query.OrderByQueryStringRank(),
query.Limit(5),
search.ContentDefaultPreload(),
search.ContentDefaultHydrate(),
}
if !p.Year.IsNil() {
options = append(options, query.Where(search.ContentReleaseDateCriteria(model.NewDateRangeFromYear(p.Year))))
}
result, searchErr := c.s.Content(
ctx,
options...,
)
if searchErr != nil {
err = searchErr
return
}
for _, item := range result.Items {
candidates := []string{item.Title}
if item.OriginalTitle.Valid {
candidates = append(candidates, item.OriginalTitle.String)
}
if levenshteinCheck(p.Name, candidates, p.LevenshteinThreshold) {
return item.Content, nil
}
}
err = classifier.ErrNoMatch
return
}
func (c *client) searchTvShowTmdb(ctx context.Context, p SearchTvShowParams) (tvShow model.Content, err error) {
urlOptions := make(map[string]string)
if !p.Year.IsNil() {
urlOptions["first_air_date_year"] = strconv.Itoa(int(p.Year))
}
if p.IncludeAdult {
urlOptions["include_adult"] = "true"
}
searchResult, searchErr := c.c.SearchTv(ctx, tmdb.SearchTvRequest{
Query: p.Name,
Year: p.Year,
IncludeAdult: p.IncludeAdult,
})
if searchErr != nil {
err = searchErr
return
}
for _, item := range searchResult.Results {
if levenshteinCheck(p.Name, []string{item.Name, item.OriginalName}, p.LevenshteinThreshold) {
return c.GetTvShowByExternalId(ctx, SourceTmdb, strconv.Itoa(int(item.ID)))
}
}
err = classifier.ErrNoMatch
return
}
func (c *client) GetTvShowByExternalId(ctx context.Context, source, id string) (tvShow model.Content, err error) {
options := []query.Option{
search.ContentDefaultPreload(),
search.ContentDefaultHydrate(),
query.Limit(1),
}
if source == SourceTmdb {
canonicalResult, canonicalErr := c.s.Content(ctx,
append(options, query.Where(search.ContentCanonicalIdentifierCriteria(model.ContentRef{
Type: model.ContentTypeTvShow,
Source: source,
ID: id,
})))...,
)
if canonicalErr != nil {
return model.Content{}, canonicalErr
}
if len(canonicalResult.Items) > 0 {
return canonicalResult.Items[0].Content, nil
}
} else {
alternativeResult, alternativeErr := c.s.Content(ctx,
append(options, query.Where(search.ContentAlternativeIdentifierCriteria(model.ContentRef{
Type: model.ContentTypeTvShow,
Source: source,
ID: id,
})))...,
)
if alternativeErr != nil {
return model.Content{}, alternativeErr
}
if len(alternativeResult.Items) > 0 {
return alternativeResult.Items[0].Content, nil
}
}
if source == SourceTmdb {
intId, idErr := strconv.Atoi(id)
if idErr != nil {
err = idErr
return
}
return c.getTvShowByTmdbId(ctx, intId)
}
externalSource, externalId, externalSourceErr := getExternalSource(source, id)
if externalSourceErr != nil {
err = externalSourceErr
return
}
byIdResult, byIdErr := c.c.FindByID(ctx, tmdb.FindByIDRequest{
ExternalSource: externalSource,
ExternalID: externalId,
})
if byIdErr != nil {
err = byIdErr
return
}
if len(byIdResult.TvResults) == 0 {
err = classifier.ErrNoMatch
return
}
return c.getTvShowByTmdbId(ctx, int(byIdResult.TvResults[0].ID))
}
func (c *client) getTvShowByTmdbId(ctx context.Context, id int) (tvShow model.Content, err error) {
d, getDetailsErr := c.c.TvDetails(ctx, tmdb.TvDetailsRequest{
SeriesID: int64(id),
AppendToResponse: []string{"external_ids"},
})
if getDetailsErr != nil {
if errors.Is(getDetailsErr, tmdb.ErrNotFound) {
getDetailsErr = classifier.ErrNoMatch
}
err = getDetailsErr
return
}
return TvShowDetailsToTvShowModel(d)
}
func TvShowDetailsToTvShowModel(details tmdb.TvDetailsResponse) (movie model.Content, err error) {
firstAirDate := model.Date{}
if details.FirstAirDate != "" {
parsedDate, parseDateErr := model.NewDateFromIsoString(details.FirstAirDate)
if parseDateErr != nil {
err = parseDateErr
return
}
firstAirDate = parsedDate
}
var collections []model.ContentCollection
for _, genre := range details.Genres {
collections = append(collections, model.ContentCollection{
Type: "genre",
Source: SourceTmdb,
ID: strconv.Itoa(int(genre.ID)),
Name: genre.Name,
})
}
var attributes []model.ContentAttribute
if details.ExternalIDs.IMDbID != "" {
attributes = append(attributes, model.ContentAttribute{
Source: "imdb",
Key: "id",
Value: details.ExternalIDs.IMDbID,
})
}
if details.ExternalIDs.TVDBID != 0 {
attributes = append(attributes, model.ContentAttribute{
Source: "tvdb",
Key: "id",
Value: strconv.Itoa(int(details.ExternalIDs.TVDBID)),
})
}
releaseYear := firstAirDate.Year
if details.PosterPath != "" {
attributes = append(attributes, model.ContentAttribute{
Source: "tmdb",
Key: "poster_path",
Value: details.PosterPath,
})
}
if details.BackdropPath != "" {
attributes = append(attributes, model.ContentAttribute{
Source: "tmdb",
Key: "backdrop_path",
Value: details.BackdropPath,
})
}
return model.Content{
Type: model.ContentTypeTvShow,
Source: SourceTmdb,
ID: strconv.Itoa(int(details.ID)),
Title: details.Name,
ReleaseDate: firstAirDate,
ReleaseYear: releaseYear,
OriginalLanguage: model.ParseLanguage(details.OriginalLanguage),
OriginalTitle: model.NewNullString(details.OriginalName),
Overview: model.NullString{
String: details.Overview,
Valid: details.Overview != "",
},
Popularity: model.NewNullFloat32(details.Popularity),
VoteAverage: model.NewNullFloat32(details.VoteAverage),
VoteCount: model.NewNullUint(uint(details.VoteCount)),
Collections: collections,
Attributes: attributes,
}, nil
}
@@ -1,17 +0,0 @@
package videofx
import (
"github.com/bitmagnet-io/bitmagnet/internal/classifier/video"
"github.com/bitmagnet-io/bitmagnet/internal/classifier/video/tmdb"
"go.uber.org/fx"
)
func New() fx.Option {
return fx.Module(
"movie",
fx.Provide(
tmdb.New,
video.New,
),
)
}
+13 -1
View File
@@ -43,6 +43,9 @@ func newTorrentContent(db *gorm.DB, opts ...gen.DOOption) torrentContent {
_torrentContent.CreatedAt = field.NewTime(tableName, "created_at")
_torrentContent.UpdatedAt = field.NewTime(tableName, "updated_at")
_torrentContent.Tsv = field.NewField(tableName, "tsv")
_torrentContent.Seeders = field.NewField(tableName, "seeders")
_torrentContent.Leechers = field.NewField(tableName, "leechers")
_torrentContent.PublishedAt = field.NewTime(tableName, "published_at")
_torrentContent.Torrent = torrentContentBelongsToTorrent{
db: db.Session(&gorm.Session{}),
@@ -149,6 +152,9 @@ type torrentContent struct {
CreatedAt field.Time
UpdatedAt field.Time
Tsv field.Field
Seeders field.Field
Leechers field.Field
PublishedAt field.Time
Torrent torrentContentBelongsToTorrent
Content torrentContentBelongsToContent
@@ -184,6 +190,9 @@ func (t *torrentContent) updateTableName(table string) *torrentContent {
t.CreatedAt = field.NewTime(table, "created_at")
t.UpdatedAt = field.NewTime(table, "updated_at")
t.Tsv = field.NewField(table, "tsv")
t.Seeders = field.NewField(table, "seeders")
t.Leechers = field.NewField(table, "leechers")
t.PublishedAt = field.NewTime(table, "published_at")
t.fillFieldMap()
@@ -200,7 +209,7 @@ func (t *torrentContent) GetFieldByName(fieldName string) (field.OrderExpr, bool
}
func (t *torrentContent) fillFieldMap() {
t.fieldMap = make(map[string]field.Expr, 18)
t.fieldMap = make(map[string]field.Expr, 21)
t.fieldMap["id"] = t.ID
t.fieldMap["info_hash"] = t.InfoHash
t.fieldMap["content_type"] = t.ContentType
@@ -217,6 +226,9 @@ func (t *torrentContent) fillFieldMap() {
t.fieldMap["created_at"] = t.CreatedAt
t.fieldMap["updated_at"] = t.UpdatedAt
t.fieldMap["tsv"] = t.Tsv
t.fieldMap["seeders"] = t.Seeders
t.fieldMap["leechers"] = t.Leechers
t.fieldMap["published_at"] = t.PublishedAt
}
@@ -30,11 +30,9 @@ func newTorrentsTorrentSource(db *gorm.DB, opts ...gen.DOOption) torrentsTorrent
_torrentsTorrentSource.Source = field.NewString(tableName, "source")
_torrentsTorrentSource.InfoHash = field.NewField(tableName, "info_hash")
_torrentsTorrentSource.ImportID = field.NewField(tableName, "import_id")
_torrentsTorrentSource.Bfsd = field.NewBytes(tableName, "bfsd")
_torrentsTorrentSource.Bfpe = field.NewBytes(tableName, "bfpe")
_torrentsTorrentSource.Seeders = field.NewField(tableName, "seeders")
_torrentsTorrentSource.Leechers = field.NewField(tableName, "leechers")
_torrentsTorrentSource.PublishedAt = field.NewTime(tableName, "published_at")
_torrentsTorrentSource.PublishedAt = field.NewField(tableName, "published_at")
_torrentsTorrentSource.CreatedAt = field.NewTime(tableName, "created_at")
_torrentsTorrentSource.UpdatedAt = field.NewTime(tableName, "updated_at")
_torrentsTorrentSource.TorrentSource = torrentsTorrentSourceHasOneTorrentSource{
@@ -55,11 +53,9 @@ type torrentsTorrentSource struct {
Source field.String
InfoHash field.Field
ImportID field.Field
Bfsd field.Bytes
Bfpe field.Bytes
Seeders field.Field
Leechers field.Field
PublishedAt field.Time
PublishedAt field.Field
CreatedAt field.Time
UpdatedAt field.Time
TorrentSource torrentsTorrentSourceHasOneTorrentSource
@@ -82,11 +78,9 @@ func (t *torrentsTorrentSource) updateTableName(table string) *torrentsTorrentSo
t.Source = field.NewString(table, "source")
t.InfoHash = field.NewField(table, "info_hash")
t.ImportID = field.NewField(table, "import_id")
t.Bfsd = field.NewBytes(table, "bfsd")
t.Bfpe = field.NewBytes(table, "bfpe")
t.Seeders = field.NewField(table, "seeders")
t.Leechers = field.NewField(table, "leechers")
t.PublishedAt = field.NewTime(table, "published_at")
t.PublishedAt = field.NewField(table, "published_at")
t.CreatedAt = field.NewTime(table, "created_at")
t.UpdatedAt = field.NewTime(table, "updated_at")
@@ -105,12 +99,10 @@ func (t *torrentsTorrentSource) GetFieldByName(fieldName string) (field.OrderExp
}
func (t *torrentsTorrentSource) fillFieldMap() {
t.fieldMap = make(map[string]field.Expr, 11)
t.fieldMap = make(map[string]field.Expr, 9)
t.fieldMap["source"] = t.Source
t.fieldMap["info_hash"] = t.InfoHash
t.fieldMap["import_id"] = t.ImportID
t.fieldMap["bfsd"] = t.Bfsd
t.fieldMap["bfpe"] = t.Bfpe
t.fieldMap["seeders"] = t.Seeders
t.fieldMap["leechers"] = t.Leechers
t.fieldMap["published_at"] = t.PublishedAt
+9 -99
View File
@@ -1,122 +1,32 @@
package fts
import (
"bufio"
"errors"
"io"
"strconv"
"strings"
"unicode"
"github.com/bitmagnet-io/bitmagnet/internal/lexer"
)
func newLexer(str string) lexer {
return lexer{
reader: bufio.NewReader(strings.NewReader(str)),
}
func newLexer(str string) ftsLexer {
return ftsLexer{lexer.NewLexer(str)}
}
type lexer struct {
pos int
reader *bufio.Reader
type ftsLexer struct {
lexer.Lexer
}
func (l *lexer) read() (rune, bool) {
r, _, err := l.reader.ReadRune()
if err != nil {
if errors.Is(err, io.EOF) {
return 0, false
}
panic(err)
}
l.pos++
return r, true
}
func (l *lexer) backup() {
if err := l.reader.UnreadRune(); err != nil {
panic(err)
}
l.pos--
}
func (l *lexer) isEof() bool {
_, ok := l.read()
if !ok {
return true
}
l.backup()
return false
}
func (l *lexer) readIf(fn func(rune) bool) (rune, bool) {
r, ok := l.read()
if !ok {
return 0, false
}
if !fn(r) {
l.backup()
return 0, false
}
return r, true
}
func (l *lexer) readWhile(fn func(rune) bool) string {
var str string
for {
r, ok := l.readIf(fn)
if !ok {
break
}
str = str + string(r)
}
return str
}
func (l *lexer) readInt() (int, bool) {
str := l.readWhile(isInt)
if str == "" {
return 0, false
}
n, err := strconv.Atoi(str)
if err != nil {
panic(err)
}
return n, true
}
func (l *lexer) readChar(r1 rune) bool {
_, ok := l.readIf(isChar(r1))
return ok
}
func (l *lexer) readQuotedString(quoteChar rune) (string, error) {
if !l.readChar(quoteChar) {
func (l *ftsLexer) readQuotedString(quoteChar rune) (string, error) {
if !l.ReadChar(quoteChar) {
return "", errors.New("missing opening quote")
}
var str string
for {
ch, ok := l.read()
ch, ok := l.Read()
if !ok {
return str, errors.New("unexpected EOF")
}
if ch == quoteChar && !l.readChar(quoteChar) {
if ch == quoteChar && !l.ReadChar(quoteChar) {
break
}
str = str + string(ch)
}
return str, nil
}
func isInt(r rune) bool {
return r >= '0' && r <= '9'
}
func isChar(r1 rune) func(rune) bool {
return func(r2 rune) bool {
return r1 == r2
}
}
func IsWordChar(r rune) bool {
return unicode.IsLetter(r) || unicode.IsDigit(r)
}
+5 -4
View File
@@ -1,6 +1,7 @@
package fts
import (
"github.com/bitmagnet-io/bitmagnet/internal/lexer"
"github.com/mozillazg/go-unidecode/table"
"strings"
"unicode"
@@ -20,7 +21,7 @@ func Tokenize(str string) [][]string {
}
type tokenizerLexer struct {
lexer
ftsLexer
}
func TokenizeFlat(str string) []string {
@@ -44,11 +45,11 @@ func (l *tokenizerLexer) readPhrase() []string {
lexeme = lexeme + str
}
for {
if l.isEof() {
if l.IsEof() {
breakWord()
return phrase
}
if ch, ok := l.readIf(IsWordChar); ok {
if ch, ok := l.ReadIf(lexer.IsWordChar); ok {
ch = unicode.ToLower(ch)
if ch < unicode.MaxASCII {
appendStr(string(ch))
@@ -81,6 +82,6 @@ func (l *tokenizerLexer) readPhrase() []string {
if len(phrase) > 0 {
return phrase
}
l.read()
l.Read()
}
}

Some files were not shown because too many files have changed in this diff Show More