Write the code

This commit is contained in:
mcneb10 2024-06-24 17:00:30 -05:00
parent b85b1aa080
commit f6a8e0896a
7 changed files with 43 additions and 142 deletions

View file

@ -1,17 +1,13 @@
# Lua XMPP Privacy Bot
# Lua XMPP Sed Bot
This bot replaces links to popular sites such as youtube with privacy respecting front ends such as invidious. It is written in 100% pure lua
This bot replaces text similar to the 'sed' tool
# How to run
Make sure `make`, `tar`, `gzip`, `lua`, and `luarocks` are installed.
Then do `luarocks install luasocket luaexpat luasec`
Then do `luarocks install luasocket luaexpat luasec lrexlib-pcre2`
Next configure the bot to your liking in `config.lua`. Also don't forget to copy `config_private_example.lua` to `config_private.lua` and fill that out as well.
Then run the `./run` script. It will download the farside `services.json` list and compile the `verse.lua` xmpp library if it doesn't exist. Then it will start the bot.
# List of supported front ends
**TODO**
Then run the `./run` script. It will download and compile the `verse.lua` xmpp library if it doesn't exist. Then it will start the bot.

2
clean
View file

@ -2,5 +2,3 @@
print("Deleting compiled verse")
os.remove("verse.lua")
print("Deleting services.json")
os.remove("services.json")

View file

@ -3,7 +3,7 @@ config = {
-- Log verbosity, 1 will print debug 0 will not. TODO: give more control over log output
verbosity = 1,
-- Bot nickname
name = "Privacy Link Bot",
name = "Sed Bot",
--[[
This will set the type of url to replace the service domain with. Can be:
- clearnet
@ -12,62 +12,8 @@ config = {
- yggdrasil
- TODO: make it work and more types?
]]--
prefered_website_medium = "clearnet",
-- Choose random frontend instead of fallback one, will force clearnet
random_frontend = true,
-- Reply using XEP-0461 instead of just quoting
use_reply_xep = true,
-- List of desired frontends to extract from `services.json`
sites = {
-- Key is domain pattern
["reddit[.]com"] = {
-- Specify which frontents should be used
frontends = { "libreddit", "redlib" }
},
["instagram[.]com"] = {
frontends = { "proxigram" }
},
["github[.]com"] = {
frontends = { "gothub" }
},
["google[.]com"] = {
frontends = { "searxng" }
},
["youtube[.]com"] = {
frontends = { "piped", "invidious"}
},
["www[.]youtube[.]com"] = {
frontends = { "piped", "invidious"}
},
["youtu[.]be"] = {
frontends = { "piped", "invidious", }
},
["twitter[.]com"] = {
frontends = { "nitter", }
},
["x[.]com"] = {
frontends = { "nitter", }
},
["wikipedia[.]org"] = {
frontends = { "wikiless", }
},
["medium[.]com"] = {
frontends = { "scribe", }
},
["imgur[.]com"] = {
frontends = { "rimgo", }
},
["translate[.]google[.]com"] = {
frontends = { "lingva", }
},
["tiktok[.]com"] = {
frontends = { "proxitok", }
},
["fandom[.]com"] = {
frontends = { "breezewiki", }
},
-- TODO: the rest
}
}
-- Load config file with private information

View file

@ -1,6 +0,0 @@
#!/usr/bin/env lua
local farside_instance_json_url = "https://git.sr.ht/~benbusby/farside/blob/HEAD/services.json"
os.remove("services.json")
os.execute(string.format("wget \"%s\"", farside_instance_json_url))

View file

@ -1,5 +1,7 @@
-- Get the verse lib
verse = require("verse")
-- Get pcre2 lib
re = require("rex_pcre2")
-- Setup logging and config
require("utils")
local log = setup_log(string.format("%s_main", config.name))
@ -38,13 +40,25 @@ client:hook("ready", function()
and not event.stanza:get_child("delay", "urn:xmpp:delay") then
local body = event.stanza:get_child_text("body")
if body then
for site, services in pairs(config.sites) do
local instance = choose_instance(services.frontends)
for match in string.gmatch(body, string.format("%%s(%s/%%S+)", site)) do
send_reply_link(room, match, site, instance, event)
-- Try to get pairs of sed patterns and quotes
for text, pattern, replacement, flags in re.gmatch(body, sed_expresion_regex, "gx") do
-- Remove block quote markets
text = re.gsub(text, "^>", "", nil, "m")
-- Get compilation flags
local compilation_flags = string.gsub(flags, "[^imsxU]", "")
-- u flag?
local n = nil
if not string.find(flags, "g") then
n = 1
end
for match in string.gmatch(body, string.format("(https?://%s/%%S+)", site)) do
send_reply_link(room, match, site, instance, event)
-- Run the expression and send result
local result, new_text = pcall(re.gsub, text, pattern, replacement, n, compilation_flags)
if result then
if new_text then
send_sed_output(room, new_text, event)
end
else
send_sed_output(room, string.format("PCRE2 Error: %s", new_text), event)
end
end
end

5
run
View file

@ -2,11 +2,6 @@
-- TODO: luarocks?
-- Download frontends list
if not os.execute(string.format("ls services.json 2>/dev/null >/dev/null")) then
dofile("get_frontends")
end
-- Squish commit hash
local squish_version = "tip"
-- Squish script url

View file

@ -1,5 +1,14 @@
-- Various utility functions used by the bot
sed_expresion_regex = [[
((?:>[^\r\n]*\R+)+)
s\/
((?:[^\r\n\[\/\\]|\\.|\[(?:[^\r\n\]\\]|\\.)*\])+)
\/
((?:[^\r\n\[\/\\]|\\.|\[(?:[^\r\n\]\\]|\\.)*\])*)
\/(.*?)(\s+|$)
]]
function log_callback(source, level, message, ...)
local output = string.format(
"%s %s [%s]: %s",
@ -41,8 +50,9 @@ function read_all_text(file)
return text
end
function send_reply_link(room, match, site, instance, event)
local msg = string.format("> %s\nPrivate frontend: %s", match, string.gsub(match, site, instance))
function send_sed_output(room, result, event)
--local msg = string.format("> s/%s/%s/%s\n%s", pattern, replacement, flags, result)
local msg = result
if config.use_reply_xep then
room:send(verse.message()
-- Set message text
@ -51,66 +61,14 @@ function send_reply_link(room, match, site, instance, event)
:tag("reply", {
xmlns = 'urn:xmpp:reply:0',
to = event.stanza.attr.from,
id = event.stanza.attr.id,
}))
id = event.stanza:get_child("stanza-id", "urn:xmpp:sid:0").attr.id,
}):up()
:tag("markable", {
xmlns = "urn:xmpp:chat-markers:0"
}));
else
room:send_message(msg)
end
end
-- Choose instance from available services
function choose_instance(services)
-- TODO: make it try all available services before falling back
-- Choose a random service
local service = services[math.random(#services)]
-- Get list of instances for service
for _, service_instance_list in pairs(config.instances) do
if service_instance_list.type == service then
-- Based on config choose instance
if config.random_frontend then
-- TODO: cache this?
local usable_instances = {}
for _, instance_url_list in pairs(service_instance_list.instances) do
-- Instance URLs are split by pipes
for instance in string.gmatch(instance_url_list, "[^|]+") do
if instance.match(instance, "[.]onion$") then
if config.prefered_website_medium == "onion" then
table.insert(usable_instances, instance)
end
elseif instance.match(instance, "[.]i2p$") then
if config.prefered_website_medium == "eepsite" then
table.insert(usable_instances, instance)
end
elseif instance.match(instance, "[[][%d:]+[]]") then
if config.prefered_website_medium == "yggdrasil" then
table.insert(usable_instances, instance)
end
else
-- Assume clearnet
if config.prefered_website_medium == "clearnet" then
table.insert(usable_instances, instance)
end
end
end
end
return string.gsub(usable_instances[math.random(#usable_instances)], "https?://", "")
else
return string.gsub(service_instance_list.fallback, "https?://", "")
end
end
end
return string.format("%s-no-instances-available", service)
end
-- Config file
dofile("config.lua")
-- Load subsitutions
local services_text = read_all_text("services.json")
local json = require("util.json")
local services, err = json.decode(services_text)
if services then
config.instances = services
else
print(string.format("Error loading \"services.json\": %s", err))
end
dofile("config.lua")