From f6a8e0896a7962b57609f32f67021ae1311fc1ad Mon Sep 17 00:00:00 2001 From: mcneb10 Date: Mon, 24 Jun 2024 17:00:30 -0500 Subject: [PATCH] Write the code --- README.md | 12 +++----- clean | 2 -- config.lua | 56 +----------------------------------- get_frontends | 6 ---- main.lua | 26 +++++++++++++---- run | 5 ---- utils.lua | 78 ++++++++++++--------------------------------------- 7 files changed, 43 insertions(+), 142 deletions(-) delete mode 100755 get_frontends diff --git a/README.md b/README.md index e5554f3..d0c9c83 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,13 @@ -# Lua XMPP Privacy Bot +# Lua XMPP Sed Bot -This bot replaces links to popular sites such as youtube with privacy respecting front ends such as invidious. It is written in 100% pure lua +This bot replaces text similar to the 'sed' tool # How to run Make sure `make`, `tar`, `gzip`, `lua`, and `luarocks` are installed. -Then do `luarocks install luasocket luaexpat luasec` +Then do `luarocks install luasocket luaexpat luasec lrexlib-pcre2` Next configure the bot to your liking in `config.lua`. Also don't forget to copy `config_private_example.lua` to `config_private.lua` and fill that out as well. -Then run the `./run` script. It will download the farside `services.json` list and compile the `verse.lua` xmpp library if it doesn't exist. Then it will start the bot. - -# List of supported front ends - -**TODO** \ No newline at end of file +Then run the `./run` script. It will download and compile the `verse.lua` xmpp library if it doesn't exist. Then it will start the bot. diff --git a/clean b/clean index c4e5514..4f64968 100755 --- a/clean +++ b/clean @@ -2,5 +2,3 @@ print("Deleting compiled verse") os.remove("verse.lua") -print("Deleting services.json") -os.remove("services.json") diff --git a/config.lua b/config.lua index 1cac8b6..5efcacc 100644 --- a/config.lua +++ b/config.lua @@ -3,7 +3,7 @@ config = { -- Log verbosity, 1 will print debug 0 will not. TODO: give more control over log output verbosity = 1, -- Bot nickname - name = "Privacy Link Bot", + name = "Sed Bot", --[[ This will set the type of url to replace the service domain with. Can be: - clearnet @@ -12,62 +12,8 @@ config = { - yggdrasil - TODO: make it work and more types? ]]-- - prefered_website_medium = "clearnet", - -- Choose random frontend instead of fallback one, will force clearnet - random_frontend = true, -- Reply using XEP-0461 instead of just quoting use_reply_xep = true, - -- List of desired frontends to extract from `services.json` - sites = { - -- Key is domain pattern - ["reddit[.]com"] = { - -- Specify which frontents should be used - frontends = { "libreddit", "redlib" } - }, - ["instagram[.]com"] = { - frontends = { "proxigram" } - }, - ["github[.]com"] = { - frontends = { "gothub" } - }, - ["google[.]com"] = { - frontends = { "searxng" } - }, - ["youtube[.]com"] = { - frontends = { "piped", "invidious"} - }, - ["www[.]youtube[.]com"] = { - frontends = { "piped", "invidious"} - }, - ["youtu[.]be"] = { - frontends = { "piped", "invidious", } - }, - ["twitter[.]com"] = { - frontends = { "nitter", } - }, - ["x[.]com"] = { - frontends = { "nitter", } - }, - ["wikipedia[.]org"] = { - frontends = { "wikiless", } - }, - ["medium[.]com"] = { - frontends = { "scribe", } - }, - ["imgur[.]com"] = { - frontends = { "rimgo", } - }, - ["translate[.]google[.]com"] = { - frontends = { "lingva", } - }, - ["tiktok[.]com"] = { - frontends = { "proxitok", } - }, - ["fandom[.]com"] = { - frontends = { "breezewiki", } - }, - -- TODO: the rest - } } -- Load config file with private information diff --git a/get_frontends b/get_frontends deleted file mode 100755 index 0fa456d..0000000 --- a/get_frontends +++ /dev/null @@ -1,6 +0,0 @@ -#!/usr/bin/env lua - -local farside_instance_json_url = "https://git.sr.ht/~benbusby/farside/blob/HEAD/services.json" - -os.remove("services.json") -os.execute(string.format("wget \"%s\"", farside_instance_json_url)) diff --git a/main.lua b/main.lua index 38de5fe..f20084d 100755 --- a/main.lua +++ b/main.lua @@ -1,5 +1,7 @@ -- Get the verse lib verse = require("verse") +-- Get pcre2 lib +re = require("rex_pcre2") -- Setup logging and config require("utils") local log = setup_log(string.format("%s_main", config.name)) @@ -38,13 +40,25 @@ client:hook("ready", function() and not event.stanza:get_child("delay", "urn:xmpp:delay") then local body = event.stanza:get_child_text("body") if body then - for site, services in pairs(config.sites) do - local instance = choose_instance(services.frontends) - for match in string.gmatch(body, string.format("%%s(%s/%%S+)", site)) do - send_reply_link(room, match, site, instance, event) + -- Try to get pairs of sed patterns and quotes + for text, pattern, replacement, flags in re.gmatch(body, sed_expresion_regex, "gx") do + -- Remove block quote markets + text = re.gsub(text, "^>", "", nil, "m") + -- Get compilation flags + local compilation_flags = string.gsub(flags, "[^imsxU]", "") + -- u flag? + local n = nil + if not string.find(flags, "g") then + n = 1 end - for match in string.gmatch(body, string.format("(https?://%s/%%S+)", site)) do - send_reply_link(room, match, site, instance, event) + -- Run the expression and send result + local result, new_text = pcall(re.gsub, text, pattern, replacement, n, compilation_flags) + if result then + if new_text then + send_sed_output(room, new_text, event) + end + else + send_sed_output(room, string.format("PCRE2 Error: %s", new_text), event) end end end diff --git a/run b/run index 80dac12..5ee9b08 100755 --- a/run +++ b/run @@ -2,11 +2,6 @@ -- TODO: luarocks? --- Download frontends list -if not os.execute(string.format("ls services.json 2>/dev/null >/dev/null")) then - dofile("get_frontends") -end - -- Squish commit hash local squish_version = "tip" -- Squish script url diff --git a/utils.lua b/utils.lua index fd83a38..03d8fd6 100644 --- a/utils.lua +++ b/utils.lua @@ -1,5 +1,14 @@ -- Various utility functions used by the bot +sed_expresion_regex = [[ +((?:>[^\r\n]*\R+)+) +s\/ +((?:[^\r\n\[\/\\]|\\.|\[(?:[^\r\n\]\\]|\\.)*\])+) +\/ +((?:[^\r\n\[\/\\]|\\.|\[(?:[^\r\n\]\\]|\\.)*\])*) +\/(.*?)(\s+|$) +]] + function log_callback(source, level, message, ...) local output = string.format( "%s %s [%s]: %s", @@ -41,8 +50,9 @@ function read_all_text(file) return text end -function send_reply_link(room, match, site, instance, event) - local msg = string.format("> %s\nPrivate frontend: %s", match, string.gsub(match, site, instance)) +function send_sed_output(room, result, event) + --local msg = string.format("> s/%s/%s/%s\n%s", pattern, replacement, flags, result) + local msg = result if config.use_reply_xep then room:send(verse.message() -- Set message text @@ -51,66 +61,14 @@ function send_reply_link(room, match, site, instance, event) :tag("reply", { xmlns = 'urn:xmpp:reply:0', to = event.stanza.attr.from, - id = event.stanza.attr.id, - })) + id = event.stanza:get_child("stanza-id", "urn:xmpp:sid:0").attr.id, + }):up() + :tag("markable", { + xmlns = "urn:xmpp:chat-markers:0" + })); else room:send_message(msg) end end --- Choose instance from available services -function choose_instance(services) - -- TODO: make it try all available services before falling back - -- Choose a random service - local service = services[math.random(#services)] - -- Get list of instances for service - for _, service_instance_list in pairs(config.instances) do - if service_instance_list.type == service then - -- Based on config choose instance - if config.random_frontend then - -- TODO: cache this? - local usable_instances = {} - for _, instance_url_list in pairs(service_instance_list.instances) do - -- Instance URLs are split by pipes - for instance in string.gmatch(instance_url_list, "[^|]+") do - if instance.match(instance, "[.]onion$") then - if config.prefered_website_medium == "onion" then - table.insert(usable_instances, instance) - end - elseif instance.match(instance, "[.]i2p$") then - if config.prefered_website_medium == "eepsite" then - table.insert(usable_instances, instance) - end - elseif instance.match(instance, "[[][%d:]+[]]") then - if config.prefered_website_medium == "yggdrasil" then - table.insert(usable_instances, instance) - end - else - -- Assume clearnet - if config.prefered_website_medium == "clearnet" then - table.insert(usable_instances, instance) - end - end - end - end - return string.gsub(usable_instances[math.random(#usable_instances)], "https?://", "") - else - return string.gsub(service_instance_list.fallback, "https?://", "") - end - end - end - return string.format("%s-no-instances-available", service) -end - --- Config file -dofile("config.lua") - --- Load subsitutions -local services_text = read_all_text("services.json") -local json = require("util.json") -local services, err = json.decode(services_text) -if services then - config.instances = services -else - print(string.format("Error loading \"services.json\": %s", err)) -end +dofile("config.lua") \ No newline at end of file