diff --git a/.gitignore b/.gitignore index c8d5b9e..5889518 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ package-lock.json dump.rdb .vscode megalinter-reports/ +dhat-heap.json diff --git a/.gitpod.Dockerfile b/.gitpod.Dockerfile index c479341..f64d765 100644 --- a/.gitpod.Dockerfile +++ b/.gitpod.Dockerfile @@ -1,3 +1,3 @@ FROM gitpod/workspace-rust -RUN sudo install-packages redis-server nodejs npm +RUN sudo install-packages redis-server nodejs npm liblua5.4-dev liblua5.3-dev liblua5.2-dev liblua5.1-0-dev libluajit-5.1-dev diff --git a/Cargo.lock b/Cargo.lock index 6bbaab6..ce683cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9,7 +9,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "617a8268e3537fe1d8c9ead925fca49ef6400927ee7bc26750e90ecee14ce4b8" dependencies = [ "bitflags 1.3.2", - "bytes 1.4.0", + "bytes 1.5.0", "futures-core", "futures-sink", "memchr", @@ -46,7 +46,7 @@ dependencies = [ "actix-web", "askama_escape", "bitflags 1.3.2", - "bytes 1.4.0", + "bytes 1.5.0", "derive_more", "futures-core", "http-range", @@ -57,6 +57,18 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "actix-governor" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46ff2d40f2bc627b8054c5e20fa6b0b0cf9428699b54bd41634e9ae3098ad555" +dependencies = [ + "actix-http", + "actix-web", + "futures 0.3.28", + "governor", +] + [[package]] name = "actix-http" version = "3.4.0" @@ -68,10 +80,10 @@ dependencies = [ "actix-service", "actix-utils", "ahash", - "base64 0.21.3", + "base64 0.21.4", "bitflags 2.4.0", "brotli", - "bytes 1.4.0", + "bytes 1.5.0", "bytestring", "derive_more", "encoding_rs", @@ -103,7 +115,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e01ed3140b2f8d422c68afa1ed2e85d996ea619c988ac834d255db32138655cb" dependencies = [ "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -141,7 +153,7 @@ dependencies = [ "futures-core", "futures-util", "mio 0.8.8", - "socket2 0.5.3", + "socket2 0.5.4", "tokio 1.32.0", "tracing", ] @@ -183,7 +195,7 @@ dependencies = [ "actix-utils", "actix-web-codegen", "ahash", - "bytes 1.4.0", + "bytes 1.5.0", "bytestring", "cfg-if 1.0.0", "cookie 0.16.2", @@ -202,7 +214,7 @@ dependencies = [ "serde_json", "serde_urlencoded 0.7.1", "smallvec 1.11.0", - "socket2 0.5.3", + "socket2 0.5.4", "time 0.3.28", "url 2.4.1", ] @@ -216,7 +228,7 @@ dependencies = [ "actix-router", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -278,9 +290,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anstyle" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15c4c2c83f81532e5845a733998b6971faca23490340a418e9b72a3ec9de12ea" +checksum = "b84bf0a05bbb2a83e5eb6fa36bb6e87baa08193c35ff52bbf6b38d8af2890e46" [[package]] name = "anyhow" @@ -288,12 +300,24 @@ version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" +[[package]] +name = "arc-swap" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" + [[package]] name = "askama_escape" version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "619743e34b5ba4e9703bba34deac3427c72507c7159f5fd030aea8cac0cfe341" +[[package]] +name = "async-once-cell" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9338790e78aa95a416786ec8389546c4b6a1dfc3dc36071ed9518a9413a542eb" + [[package]] name = "async-trait" version = "0.1.73" @@ -302,7 +326,7 @@ checksum = "bc00ceb34980c03614e35a3a4e218276a0a824e911d07651cd0d858a51e8c0f0" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -346,9 +370,9 @@ dependencies = [ [[package]] name = "base64" -version = "0.21.3" +version = "0.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "414dcefbc63d77c526a76b3afcf6fbb9b5e2791c19c3aa2297733208750c6e53" +checksum = "9ba43ea6f343b788c8764558649e08df62f86c6ef251fdaeb1ffd010a9ae50a2" [[package]] name = "bit-set" @@ -441,9 +465,9 @@ dependencies = [ [[package]] name = "bytes" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89b2fd2a0dcf38d7971e2194b6b6eebab45ae01067456a7fd93d5547a61b70be" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" [[package]] name = "bytestring" @@ -451,7 +475,7 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "238e4886760d98c4f899360c834fa93e62cf7f721ac3c2da375cbdf4b8679aae" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", ] [[package]] @@ -520,18 +544,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.4.1" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c8d502cbaec4595d2e7d5f61e318f05417bd2b66fdc3809498f0d3fdf0bea27" +checksum = "6a13b88d2c62ff462f88e4a121f17a82c1af05693a2f192b5c38d14de73c19f6" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.4.1" +version = "4.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5891c7bc0edb3e1c2204fc5e94009affabeb1821c9e5fdc3959536c5c0bb984d" +checksum = "2bb9faaa7c2ef94b2743a21f5a29e6f0010dff4caa69ac8e9d6cf8b6fa74da08" dependencies = [ "anstyle", "clap_lex", @@ -558,8 +582,12 @@ version = "4.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "35ed6e9d84f0b51a7f52daf1c7d71dd136fd7a3f41a8462b8cdb8c78d920fad4" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", + "futures-core", "memchr", + "pin-project-lite", + "tokio 1.32.0", + "tokio-util", ] [[package]] @@ -574,7 +602,7 @@ version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "888604f00b3db336d2af898ec3c1d5d0ddf5e6d462220f2ededc33a87ac4bbd5" dependencies = [ - "time 0.1.45", + "time 0.1.43", "url 1.7.2", ] @@ -602,7 +630,7 @@ dependencies = [ "publicsuffix", "serde", "serde_json", - "time 0.1.45", + "time 0.1.43", "try_from", "url 1.7.2", ] @@ -798,7 +826,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" dependencies = [ "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", +] + +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if 1.0.0", + "hashbrown 0.14.0", + "lock_api 0.4.10", + "once_cell", + "parking_lot_core 0.9.8", ] [[package]] @@ -820,6 +861,22 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "dhat" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f2aaf837aaf456f6706cb46386ba8dffd4013a757e36f4ea05c20dd46b209a3" +dependencies = [ + "backtrace", + "lazy_static", + "mintex", + "parking_lot 0.12.1", + "rustc-hash", + "serde", + "serde_json", + "thousands", +] + [[package]] name = "digest" version = "0.10.7" @@ -918,9 +975,9 @@ dependencies = [ [[package]] name = "error-stack" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6a37ef405b504fc3b87a24fa52906d98cdd1a7d4e5ef2b49f0d5fead138fced" +checksum = "27a72baa257b5e0e2de241967bc5ee8f855d6072351042688621081d66b2a76b" dependencies = [ "anyhow", "rustc_version 0.4.0", @@ -1049,6 +1106,21 @@ version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a471a38ef8ed83cd6e40aa59c1ffe17db6855c18e3604d9c4ed8c08ebc28678" +[[package]] +name = "futures" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23342abe12aba583913b2e62f22225ff9c950774065e4bfb61a19cd9770fec40" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.28" @@ -1056,6 +1128,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "955518d47e09b25bbebc7a18df10b81f0c766eaf4c4f1cccef2fca5f2a4fb5f2" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -1070,10 +1143,38 @@ version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab90cde24b3319636588d0c35fe03b1333857621051837ed769faefb4c2162e4" dependencies = [ - "futures", + "futures 0.1.31", "num_cpus", ] +[[package]] +name = "futures-executor" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccecee823288125bd88b4d7f565c9e58e41858e47ab72e8ea2d64e93624386e0" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fff74096e71ed47f8e023204cfd0aa1289cd54ae5430a9523be060cdb849964" + +[[package]] +name = "futures-macro" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89ca545a94061b6365f2c7355b4b32bd20df3ff95f02da9329b34ccc3bd6ee72" +dependencies = [ + "proc-macro2 1.0.66", + "quote 1.0.33", + "syn 2.0.32", +] + [[package]] name = "futures-sink" version = "0.3.28" @@ -1086,16 +1187,28 @@ version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76d3d132be6c0e6aa1534069c705a74a5997a356c0dc2f86a47765e5617c5b65" +[[package]] +name = "futures-timer" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e64b03909df88034c26dc1547e8970b91f98bdb65165d6a4e9110d94263dbb2c" + [[package]] name = "futures-util" version = "0.3.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26b01e40b772d54cf6c6d721c1d1abd0647a0106a12ecaa1c186273392a69533" dependencies = [ + "futures-channel", "futures-core", + "futures-io", + "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project-lite", "pin-utils", + "slab", ] [[package]] @@ -1143,6 +1256,24 @@ version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6fb8d784f27acf97159b40fc4db5ecd8aa23b9ad5ef69cdd136d3bc80665f0c0" +[[package]] +name = "governor" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c390a940a5d157878dd057c78680a33ce3415bcd05b4799509ea44210914b4d5" +dependencies = [ + "cfg-if 1.0.0", + "dashmap", + "futures 0.3.28", + "futures-timer", + "no-std-compat", + "nonzero_ext", + "parking_lot 0.12.1", + "quanta", + "rand 0.8.5", + "smallvec 1.11.0", +] + [[package]] name = "h2" version = "0.1.26" @@ -1152,7 +1283,7 @@ dependencies = [ "byteorder", "bytes 0.4.12", "fnv", - "futures", + "futures 0.1.31", "http 0.1.21", "indexmap", "log", @@ -1167,7 +1298,7 @@ version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", "fnv", "futures-core", "futures-sink", @@ -1188,9 +1319,9 @@ checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" [[package]] name = "handlebars" -version = "4.3.7" +version = "4.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83c3372087601b532857d332f5957cbae686da52bb7810bf038c3e3c3cc2fa0d" +checksum = "c39b3bc2a8f715298032cf5087e58573809374b08160aa7d750582bdb82d2683" dependencies = [ "log", "pest", @@ -1207,6 +1338,12 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" + [[package]] name = "hermit-abi" version = "0.3.2" @@ -1258,7 +1395,7 @@ version = "0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", "fnv", "itoa 1.0.9", ] @@ -1270,7 +1407,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6741c859c1b2463a423a1dbce98d418e6c3c3fc720fb0d45528657320920292d" dependencies = [ "bytes 0.4.12", - "futures", + "futures 0.1.31", "http 0.1.21", "tokio-buf", ] @@ -1281,7 +1418,7 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", "http 0.2.9", "pin-project-lite", ] @@ -1317,7 +1454,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c843caf6296fc1f93444735205af9ed4e109a539005abb2564ae1d6fad34c52" dependencies = [ "bytes 0.4.12", - "futures", + "futures 0.1.31", "futures-cpupool", "h2 0.1.26", "http 0.1.21", @@ -1328,7 +1465,7 @@ dependencies = [ "log", "net2", "rustc_version 0.2.3", - "time 0.1.45", + "time 0.1.43", "tokio 0.1.22", "tokio-buf", "tokio-executor", @@ -1346,7 +1483,7 @@ version = "0.14.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffb1cfd654a8219eaef89881fdb3bb3b1cdc5fa75ded05d6933b2b382e395468" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", "futures-channel", "futures-core", "futures-util", @@ -1371,7 +1508,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a800d6aa50af4b5850b2b0f659625ce9504df908e9733b635720483be26174f" dependencies = [ "bytes 0.4.12", - "futures", + "futures 0.1.31", "hyper 0.12.36", "native-tls", "tokio-io", @@ -1383,7 +1520,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", "hyper 0.14.27", "native-tls", "tokio 1.32.0", @@ -1429,7 +1566,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" dependencies = [ "autocfg 1.1.0", - "hashbrown", + "hashbrown 0.12.3", ] [[package]] @@ -1526,10 +1663,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3" [[package]] -name = "linux-raw-sys" -version = "0.4.5" +name = "libmimalloc-sys" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" +checksum = "25d058a81af0d1c22d7a1c948576bee6d673f7af3c0f35564abd6c81122f513d" +dependencies = [ + "cc", + "libc", +] + +[[package]] +name = "linux-raw-sys" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a9bad9f94746442c783ca431b22403b519cd7fbeed0533fdd6328b2f2212128" [[package]] name = "local-channel" @@ -1580,6 +1727,15 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" +[[package]] +name = "mach" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b823e83b2affd8f40a9ee8c29dbc56404c1e34cd2710921f2801e2cf29527afa" +dependencies = [ + "libc", +] + [[package]] name = "markup5ever" version = "0.8.1" @@ -1631,9 +1787,9 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "memchr" -version = "2.6.2" +version = "2.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5486aed0026218e61b8a01d5fbd5a0a134649abb71a0e53b7bc088529dced86e" +checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" [[package]] name = "memoffset" @@ -1653,6 +1809,15 @@ dependencies = [ "autocfg 1.1.0", ] +[[package]] +name = "mimalloc" +version = "0.1.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "972e5f23f6716f62665760b0f4cbf592576a80c7b879ba9beaafc0e558894127" +dependencies = [ + "libmimalloc-sys", +] + [[package]] name = "mime" version = "0.3.17" @@ -1678,6 +1843,16 @@ dependencies = [ "adler", ] +[[package]] +name = "mintex" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd7c5ba1c3b5a23418d7bbf98c71c3d4946a0125002129231da8d6b723d559cb" +dependencies = [ + "once_cell", + "sys-info", +] + [[package]] name = "mio" version = "0.6.23" @@ -1721,6 +1896,20 @@ dependencies = [ "ws2_32-sys", ] +[[package]] +name = "mlua" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bb37b0ba91f017aa7ca2b98ef99496827770cd635b4a932a6047c5b4bbe678e" +dependencies = [ + "bstr", + "cc", + "num-traits", + "once_cell", + "pkg-config", + "rustc-hash", +] + [[package]] name = "native-tls" version = "0.2.11" @@ -1762,6 +1951,18 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab250442c86f1850815b5d268639dff018c0627022bc1940eb2d642ca1ce12f0" +[[package]] +name = "no-std-compat" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c" + +[[package]] +name = "nonzero_ext" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" + [[package]] name = "num-traits" version = "0.2.16" @@ -1783,9 +1984,9 @@ dependencies = [ [[package]] name = "object" -version = "0.32.0" +version = "0.32.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77ac5bbd07aea88c60a577a1ce218075ffd59208b2d7ca97adf9bfc5aeb21ebe" +checksum = "9cf5f9dd3933bd50a9e1f149ec995f39ae2c496d31fd772c1fd45ebc27e902b0" dependencies = [ "memchr", ] @@ -1825,7 +2026,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -1836,9 +2037,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.92" +version = "0.9.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db7e971c2c2bba161b2d2fdf37080177eff520b3bc044787c7f1f5f9e78d869b" +checksum = "db4d56a4c0478783083cfafcc42493dd4a981d41669da64b4572a2a089b51b1d" dependencies = [ "cc", "libc", @@ -1944,7 +2145,7 @@ dependencies = [ "pest_meta", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -2046,7 +2247,7 @@ dependencies = [ "phf_shared 0.11.2", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -2076,6 +2277,26 @@ dependencies = [ "siphasher 0.3.11", ] +[[package]] +name = "pin-project" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fda4ed1c6c173e3fc7a83629421152e01d7b1f9b7f65fb301e490e8cfc656422" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" +dependencies = [ + "proc-macro2 1.0.66", + "quote 1.0.33", + "syn 2.0.32", +] + [[package]] name = "pin-project-lite" version = "0.2.13" @@ -2162,6 +2383,22 @@ dependencies = [ "url 2.4.1", ] +[[package]] +name = "quanta" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20afe714292d5e879d8b12740aa223c6a88f118af41870e8b6196e39a02238a8" +dependencies = [ + "crossbeam-utils 0.8.16", + "libc", + "mach", + "once_cell", + "raw-cpuid", + "wasi 0.10.2+wasi-snapshot-preview1", + "web-sys", + "winapi 0.3.9", +] + [[package]] name = "quote" version = "0.6.13" @@ -2316,6 +2553,15 @@ dependencies = [ "rand_core 0.3.1", ] +[[package]] +name = "raw-cpuid" +version = "10.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c297679cb867470fa8c9f67dbba74a78d78e3e98d7cf2b08d6d71540f797332" +dependencies = [ + "bitflags 1.3.2", +] + [[package]] name = "rayon" version = "1.7.0" @@ -2349,16 +2595,25 @@ dependencies = [ [[package]] name = "redis" -version = "0.23.2" +version = "0.23.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffd6543a7bc6428396845f6854ccf3d1ae8823816592e2cbe74f20f50f209d02" +checksum = "4f49cdc0bb3f412bf8e7d1bd90fe1d9eb10bc5c399ba90973c14662a27b3f8ba" dependencies = [ + "arc-swap", + "async-trait", + "bytes 1.5.0", "combine", + "futures 0.3.28", + "futures-util", "itoa 1.0.9", "percent-encoding 2.3.0", + "pin-project-lite", "ryu", "sha1_smol", "socket2 0.4.9", + "tokio 1.32.0", + "tokio-retry", + "tokio-util", "url 2.4.1", ] @@ -2379,9 +2634,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.9.4" +version = "1.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12de2eff854e5fa4b1295edd650e227e9d8fb0c9e90b12e7f36d6a6811791a29" +checksum = "697061221ea1b4a94a624f67d0ae2bfe4e22b8a17b6a192afb11046542cc8c47" dependencies = [ "aho-corasick", "memchr", @@ -2391,9 +2646,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.3.7" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49530408a136e16e5b486e883fbb6ba058e8e4e8ae6621a77b048b314336e629" +checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" dependencies = [ "aho-corasick", "memchr", @@ -2418,7 +2673,7 @@ dependencies = [ "cookie_store", "encoding_rs", "flate2", - "futures", + "futures 0.1.31", "http 0.1.21", "hyper 0.12.36", "hyper-tls 0.3.2", @@ -2429,7 +2684,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded 0.5.5", - "time 0.1.45", + "time 0.1.43", "tokio 0.1.22", "tokio-executor", "tokio-io", @@ -2446,8 +2701,8 @@ version = "0.11.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3e9ad3fe7488d7e34558a2033d45a0c90b72d97b4f80705666fea71472e2e6a1" dependencies = [ - "base64 0.21.3", - "bytes 1.4.0", + "base64 0.21.4", + "bytes 1.5.0", "encoding_rs", "futures-core", "futures-util", @@ -2477,36 +2732,18 @@ dependencies = [ "winreg 0.50.0", ] -[[package]] -name = "rlua" -version = "0.19.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d33e5ba15c3d43178f283ed5863d4531e292fc0e56fb773f3bea45f18e3a42a" -dependencies = [ - "bitflags 1.3.2", - "bstr", - "libc", - "num-traits", - "rlua-lua54-sys", -] - -[[package]] -name = "rlua-lua54-sys" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7aafabafe1895cb4a2be81a56d7ff3d46bf4b5d2f9cfdbea2ed404cdabe96474" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - [[package]] name = "rustc-demangle" version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d626bb9dae77e28219937af045c257c28bfd3f69333c512553507f5f9798cb76" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustc_version" version = "0.2.3" @@ -2527,9 +2764,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.10" +version = "0.38.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed6248e1caa625eb708e266e06159f135e8c26f2bb7ceb72dc4b2766d0340964" +checksum = "d7db8590df6dfcd144d22afd1b83b36c21a18d7cbc1dc4bb5295a8712e9eb662" dependencies = [ "bitflags 2.4.0", "errno", @@ -2687,14 +2924,14 @@ checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] name = "serde_json" -version = "1.0.105" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "693151e1ac27563d6dbcec9dee9fbd5da8539b20fa14ad3752b2e6d363ace360" +checksum = "2cc66a619ed80bf7a0f6b17dd063a84b88f6dea1813737cf469aef1d081142c2" dependencies = [ "itoa 1.0.9", "ryu", @@ -2806,6 +3043,9 @@ name = "smallvec" version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" +dependencies = [ + "serde", +] [[package]] name = "socket2" @@ -2819,9 +3059,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.3" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2538b18701741680e0322a2302176d3253a35388e2e62f172f64f4f16605f877" +checksum = "4031e820eb552adee9295814c0ced9e5cf38ddf1e8b7d566d6de8e2538ea989e" dependencies = [ "libc", "windows-sys", @@ -2926,9 +3166,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.29" +version = "2.0.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c324c494eba9d92503e6f1ef2e6df781e78f6a7705a0202d9801b198807d518a" +checksum = "239814284fd6f1a4ffe4ca893952cdd93c224b6a1571c9a9eadd670295c0c9e2" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", @@ -2947,6 +3187,16 @@ dependencies = [ "unicode-xid 0.2.4", ] +[[package]] +name = "sys-info" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b3a0d0aba8bf96a0e1ddfdc352fc53b3df7f39318c71854910c3c4b024ae52c" +dependencies = [ + "cc", + "libc", +] + [[package]] name = "tempfile" version = "3.8.0" @@ -2982,32 +3232,37 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.47" +version = "1.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97a802ec30afc17eee47b2855fc72e0c4cd62be9b4efe6591edde0ec5bd68d8f" +checksum = "9d6d7a740b8a666a7e828dd00da9c0dc290dff53154ea77ac109281de90589b7" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.47" +version = "1.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bb623b56e39ab7dcd4b1b98bb6c8f8d907ed255b18de254088016b27a8ee19b" +checksum = "49922ecae66cc8a249b77e68d1d0623c1b2c514f0060c27cdc68bd62a1219d35" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] -name = "time" -version = "0.1.45" +name = "thousands" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b797afad3f312d1c66a56d11d0316f916356d11bd158fbc6ca6389ff6bf805a" +checksum = "3bf63baf9f5039dadc247375c29eb13706706cfde997d0330d05aa63a77d8820" + +[[package]] +name = "time" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" dependencies = [ "libc", - "wasi 0.10.0+wasi-snapshot-preview1", "winapi 0.3.9", ] @@ -3071,7 +3326,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a09c0b5bb588872ab2f09afa13ee6e9dac11e10a0ec9e8e3ba39a5a5d530af6" dependencies = [ "bytes 0.4.12", - "futures", + "futures 0.1.31", "mio 0.6.23", "num_cpus", "tokio-current-thread", @@ -3090,14 +3345,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17ed6077ed6cd6c74735e21f37eb16dc3935f96878b1fe961074089cc80893f9" dependencies = [ "backtrace", - "bytes 1.4.0", + "bytes 1.5.0", "libc", "mio 0.8.8", "num_cpus", "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", - "socket2 0.5.3", + "socket2 0.5.4", "tokio-macros", "windows-sys", ] @@ -3110,7 +3365,7 @@ checksum = "8fb220f46c53859a4b7ec083e41dec9778ff0b1851c0942b211edb89e0ccdc46" dependencies = [ "bytes 0.4.12", "either", - "futures", + "futures 0.1.31", ] [[package]] @@ -3119,7 +3374,7 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1de0e32a83f131e002238d7ccde18211c0a5397f60cbfffcb112868c2e0e20e" dependencies = [ - "futures", + "futures 0.1.31", "tokio-executor", ] @@ -3130,7 +3385,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fb2d1b8f4548dbf5e1f7818512e9c406860678f29c300cdf0ebac72d1a3a1671" dependencies = [ "crossbeam-utils 0.7.2", - "futures", + "futures 0.1.31", ] [[package]] @@ -3140,7 +3395,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57fc868aae093479e3131e3d165c93b1c7474109d13c90ec0dda2a1bbfff0674" dependencies = [ "bytes 0.4.12", - "futures", + "futures 0.1.31", "log", ] @@ -3152,7 +3407,7 @@ checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", ] [[package]] @@ -3172,7 +3427,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09bc590ec4ba8ba87652da2068d150dcada2cfa2e07faae270a5e0409aa51351" dependencies = [ "crossbeam-utils 0.7.2", - "futures", + "futures 0.1.31", "lazy_static", "log", "mio 0.6.23", @@ -3184,6 +3439,17 @@ dependencies = [ "tokio-sync", ] +[[package]] +name = "tokio-retry" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f57eb36ecbe0fc510036adff84824dd3c24bb781e21bfa67b69d556aa85214f" +dependencies = [ + "pin-project", + "rand 0.8.5", + "tokio 1.32.0", +] + [[package]] name = "tokio-sync" version = "0.1.8" @@ -3191,7 +3457,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "edfe50152bc8164fcc456dab7891fa9bf8beaf01c5ee7e1dd43a397c3cf87dee" dependencies = [ "fnv", - "futures", + "futures 0.1.31", ] [[package]] @@ -3201,7 +3467,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98df18ed66e3b72e742f185882a9e201892407957e45fbff8da17ae7a7c51f72" dependencies = [ "bytes 0.4.12", - "futures", + "futures 0.1.31", "iovec", "mio 0.6.23", "tokio-io", @@ -3217,7 +3483,7 @@ dependencies = [ "crossbeam-deque 0.7.4", "crossbeam-queue", "crossbeam-utils 0.7.2", - "futures", + "futures 0.1.31", "lazy_static", "log", "num_cpus", @@ -3232,7 +3498,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "93044f2d313c95ff1cb7809ce9a7a05735b012288a888b62d4434fd58c94f296" dependencies = [ "crossbeam-utils 0.7.2", - "futures", + "futures 0.1.31", "slab", "tokio-executor", ] @@ -3243,7 +3509,7 @@ version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "806fe8c2c87eccc8b3267cbae29ed3ab2d0bd37fca70ab622e46aaa9375ddb7d" dependencies = [ - "bytes 1.4.0", + "bytes 1.5.0", "futures-core", "futures-sink", "pin-project-lite", @@ -3413,9 +3679,9 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "walkdir" -version = "2.3.3" +version = "2.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" dependencies = [ "same-file", "winapi-util", @@ -3427,7 +3693,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6395efa4784b027708f7451087e647ec73cc74f5d9bc2e418404248d679a230" dependencies = [ - "futures", + "futures 0.1.31", "log", "try-lock", ] @@ -3443,9 +3709,9 @@ dependencies = [ [[package]] name = "wasi" -version = "0.10.0+wasi-snapshot-preview1" +version = "0.10.2+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" [[package]] name = "wasi" @@ -3474,7 +3740,7 @@ dependencies = [ "once_cell", "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", "wasm-bindgen-shared", ] @@ -3508,7 +3774,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2 1.0.66", "quote 1.0.33", - "syn 2.0.29", + "syn 2.0.32", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -3531,29 +3797,35 @@ dependencies = [ [[package]] name = "websurfx" -version = "0.18.6" +version = "0.20.7" dependencies = [ "actix-cors", "actix-files", + "actix-governor", "actix-web", + "async-once-cell", "async-trait", "criterion", + "dhat", "env_logger", "error-stack", "fake-useragent", + "futures 0.3.28", "handlebars", "log", "md5", + "mimalloc", + "mlua", "once_cell", "rand 0.8.5", "redis", "regex", "reqwest 0.11.20", - "rlua", "rusty-hook", "scraper", "serde", "serde_json", + "smallvec 1.11.0", "tempfile", "tokio 1.32.0", ] diff --git a/Cargo.toml b/Cargo.toml index ed0e24b..d686dd5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "websurfx" -version = "0.18.6" +version = "0.20.7" edition = "2021" description = "An open-source alternative to Searx that provides clean, ad-free, and organic results with incredible speed while keeping privacy and security in mind." repository = "https://github.com/neon-mmd/websurfx" @@ -8,9 +8,9 @@ license = "AGPL-3.0" [dependencies] reqwest = {version="0.11.20",features=["json"]} -tokio = {version="1.32.0",features=["full"]} +tokio = {version="1.32.0",features=["rt-multi-thread","macros"]} serde = {version="1.0.188",features=["derive"]} -handlebars = { version = "4.3.7", features = ["dir_source"] } +handlebars = { version = "4.4.0", features = ["dir_source"] } scraper = {version="0.17.1"} actix-web = {version="4.4.0", features = ["cookies"]} actix-files = {version="0.6.2"} @@ -19,14 +19,20 @@ serde_json = {version="1.0.105"} fake-useragent = {version="0.1.3"} env_logger = {version="0.10.0"} log = {version="0.4.20"} -rlua = {version="0.19.7"} -redis = {version="0.23.2"} +mlua = {version="0.8.10", features=["luajit"]} +redis = {version="0.23.3", features=["tokio-comp","connection-manager"]} md5 = {version="0.7.0"} rand={version="0.8.5"} once_cell = {version="1.18.0"} error-stack = {version="0.4.0"} async-trait = {version="0.1.73"} regex = {version="1.9.4", features=["perf"]} +smallvec = {version="1.11.0", features=["union", "serde"]} +futures = {version="0.3.28"} +dhat = {version="0.3.2", optional = true} +mimalloc = { version = "0.1.38", default-features = false } +async-once-cell = {version="0.5.3"} +actix-governor = {version="0.4.1"} [dev-dependencies] rusty-hook = "^0.11.2" @@ -47,13 +53,17 @@ rpath = false [profile.release] opt-level = 3 -debug = false +debug = false # This should only be commented when testing with dhat profiler +# debug = 1 # This should only be uncommented when testing with dhat profiler split-debuginfo = '...' debug-assertions = false overflow-checks = false -lto = 'thin' +lto = true panic = 'abort' incremental = false -codegen-units = 16 +codegen-units = 1 rpath = false strip = "debuginfo" + +[features] +dhat-heap = ["dep:dhat"] diff --git a/README.md b/README.md index 0391aba..891b01d 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,7 @@ - **Getting Started** - [🔭 Preview](#preview-) - [🚀 Features](#features-) - - [🛠️ Installation and Testing](#installation-and-testing-) + - [🛠️ Installation and Testing](#installation-and-testing-%EF%B8%8F) - [🔧 Configuration](#configuration-) - **Feature Overview** - [🎨 Theming](#theming-) diff --git a/docs/installation.md b/docs/installation.md index 8f2ee2e..54d4355 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -109,7 +109,7 @@ colorscheme = "catppuccin-mocha" -- the colorscheme name which should be used fo theme = "simple" -- the theme name which should be used for the website -- ### Caching ### -redis_connection_url = "redis://redis:6379" -- redis connection url address on which the client should connect on. +redis_url = "redis://redis:6379" -- redis connection url address on which the client should connect on. -- ### Search Engines ### upstream_search_engines = { DuckDuckGo = true, Searx = false } -- select the upstream search engines from which the results should be fetched. diff --git a/public/images/barricade.png b/public/images/barricade.png new file mode 100644 index 0000000..eef097b Binary files /dev/null and b/public/images/barricade.png differ diff --git a/public/images/filter.png b/public/images/filter.png new file mode 100644 index 0000000..5325c27 Binary files /dev/null and b/public/images/filter.png differ diff --git a/public/static/themes/simple.css b/public/static/themes/simple.css index 37e3c21..3d9c3b9 100644 --- a/public/static/themes/simple.css +++ b/public/static/themes/simple.css @@ -132,6 +132,35 @@ body { width: 1.2rem; height: 1.2rem; } +.results .result_disallowed, +.results .result_filtered { + display: flex; + justify-content: center; + align-items: center; + gap: 10rem; + font-size: 2rem; + color: var(--foreground-color); + margin: 0rem 7rem; +} + +.results .result_disallowed .user_query, +.results .result_filtered .user_query { + color: var(--background-color); + font-weight: 300; +} + +.results .result_disallowed img, +.results .result_filtered img { + width: 30rem; +} + +.results .result_disallowed div, +.results .result_filtered div { + display: flex; + flex-direction: column; + gap: 1rem; + line-break: strict; +} /* styles for the footer and header */ diff --git a/public/templates/search.html b/public/templates/search.html index e6fd4e8..8a79d69 100644 --- a/public/templates/search.html +++ b/public/templates/search.html @@ -1,37 +1,69 @@ {{>header this.style}}
- {{>search_bar this}} -
- {{#if results}} {{#each results}} -
-

{{{this.title}}}

- {{{this.url}}} -

{{{this.description}}}

-
- {{#each engine}} - {{{this}}} - {{/each}} -
+ {{>search_bar this}} +
+ {{#if results}} {{#each results}} +
+

{{{this.title}}}

+ {{{this.url}}} +

{{{this.description}}}

+
+ {{#each engine}} + {{{this}}} + {{/each}} +
+
+ {{/each}} {{else}} {{#if disallowed}} +
+
+

+ Your search - {{{this.pageQuery}}} - + has been disallowed. +

+

Dear user,

+

+ The query - {{{this.pageQuery}}} - has + been blacklisted via server configuration and hence disallowed by the + server. Henceforth no results could be displayed for your query. +

+
+ Image of a Barricade +
+ {{else}} {{#if filtered}} +
+
+

+ Your search - {{{this.pageQuery}}} - + has been filtered. +

+

Dear user,

+

+ All the search results contain results that has been configured to be + filtered out via server configuration and henceforth has been + completely filtered out. +

+
+ Image of a paper inside a funnel +
+ {{else}} +
+

Your search - {{{this.pageQuery}}} - did not match any documents.

+

Suggestions:

+
    +
  • Make sure that all words are spelled correctly.
  • +
  • Try different keywords.
  • +
  • Try more general keywords.
  • +
+ Man fishing gif +
+ {{/if}} {{/if}} {{/if}}
- {{/each}} {{else}} -
-

Your search - {{{this.pageQuery}}} - did not match any documents.

-

Suggestions:

-
    -
  • Make sure that all words are spelled correctly.
  • -
  • Try different keywords.
  • -
  • Try more general keywords.
  • -
- Man fishing gif + - {{/if}} -
-
diff --git a/src/bin/websurfx.rs b/src/bin/websurfx.rs index 75d0b8d..bc8e7ce 100644 --- a/src/bin/websurfx.rs +++ b/src/bin/websurfx.rs @@ -3,9 +3,19 @@ //! This module contains the main function which handles the logging of the application to the //! stdout and handles the command line arguments provided and launches the `websurfx` server. +use mimalloc::MiMalloc; use std::net::TcpListener; use websurfx::{config::parser::Config, run}; +/// A dhat heap memory profiler +#[cfg(feature = "dhat-heap")] +#[global_allocator] +static ALLOC: dhat::Alloc = dhat::Alloc; + +#[cfg(not(feature = "dhat-heap"))] +#[global_allocator] +static GLOBAL: MiMalloc = MiMalloc; + /// The function that launches the main server and registers all the routes of the website. /// /// # Error @@ -14,6 +24,10 @@ use websurfx::{config::parser::Config, run}; /// available for being used for other applications. #[actix_web::main] async fn main() -> std::io::Result<()> { + // A dhat heap profiler initialization. + #[cfg(feature = "dhat-heap")] + let _profiler = dhat::Profiler::new_heap(); + // Initialize the parsed config file. let config = Config::parse(false).unwrap(); diff --git a/src/cache/cacher.rs b/src/cache/cacher.rs index 44d0710..57351cd 100644 --- a/src/cache/cacher.rs +++ b/src/cache/cacher.rs @@ -1,17 +1,24 @@ //! This module provides the functionality to cache the aggregated results fetched and aggregated //! from the upstream search engines in a json format. +use error_stack::Report; +use futures::future::try_join_all; use md5::compute; -use redis::{Client, Commands, Connection}; +use redis::{aio::ConnectionManager, AsyncCommands, Client, RedisError}; + +use super::error::PoolError; /// A named struct which stores the redis Connection url address to which the client will /// connect to. -/// -/// # Fields -/// -/// * `redis_connection_url` - It stores the redis Connection url address. +#[derive(Clone)] pub struct RedisCache { - connection: Connection, + /// It stores a pool of connections ready to be used. + connection_pool: Vec, + /// It stores the size of the connection pool (in other words the number of + /// connections that should be stored in the pool). + pool_size: u8, + /// It stores the index of which connection is being used at the moment. + current_connection: u8, } impl RedisCache { @@ -19,11 +26,25 @@ impl RedisCache { /// /// # Arguments /// - /// * `redis_connection_url` - It stores the redis Connection url address. - pub fn new(redis_connection_url: String) -> Result> { + /// * `redis_connection_url` - It takes the redis Connection url address. + /// * `pool_size` - It takes the size of the connection pool (in other words the number of + /// connections that should be stored in the pool). + pub async fn new( + redis_connection_url: &str, + pool_size: u8, + ) -> Result> { let client = Client::open(redis_connection_url)?; - let connection = client.get_connection()?; - let redis_cache = RedisCache { connection }; + let mut tasks: Vec<_> = Vec::new(); + + for _ in 0..pool_size { + tasks.push(client.get_tokio_connection_manager()); + } + + let redis_cache = RedisCache { + connection_pool: try_join_all(tasks).await?, + pool_size, + current_connection: Default::default(), + }; Ok(redis_cache) } @@ -32,7 +53,7 @@ impl RedisCache { /// # Arguments /// /// * `url` - It takes an url as string. - fn hash_url(url: &str) -> String { + fn hash_url(&self, url: &str) -> String { format!("{:?}", compute(url)) } @@ -41,9 +62,42 @@ impl RedisCache { /// # Arguments /// /// * `url` - It takes an url as a string. - pub fn cached_json(&mut self, url: &str) -> Result> { - let hashed_url_string = Self::hash_url(url); - Ok(self.connection.get(hashed_url_string)?) + pub async fn cached_json(&mut self, url: &str) -> Result> { + self.current_connection = Default::default(); + let hashed_url_string: &str = &self.hash_url(url); + + let mut result: Result = self.connection_pool + [self.current_connection as usize] + .get(hashed_url_string) + .await; + + // Code to check whether the current connection being used is dropped with connection error + // or not. if it drops with the connection error then the current connection is replaced + // with a new connection from the pool which is then used to run the redis command then + // that connection is also checked whether it is dropped or not if it is not then the + // result is passed as a `Result` or else the same process repeats again and if all of the + // connections in the pool result in connection drop error then a custom pool error is + // returned. + loop { + match result { + Err(error) => match error.is_connection_dropped() { + true => { + self.current_connection += 1; + if self.current_connection == self.pool_size { + return Err(Report::new( + PoolError::PoolExhaustionWithConnectionDropError, + )); + } + result = self.connection_pool[self.current_connection as usize] + .get(hashed_url_string) + .await; + continue; + } + false => return Err(Report::new(PoolError::RedisError(error))), + }, + Ok(res) => return Ok(res), + } + } } /// A function which caches the results by using the hashed `url` as the key and @@ -54,21 +108,45 @@ impl RedisCache { /// /// * `json_results` - It takes the json results string as an argument. /// * `url` - It takes the url as a String. - pub fn cache_results( + pub async fn cache_results( &mut self, - json_results: String, + json_results: &str, url: &str, - ) -> Result<(), Box> { - let hashed_url_string = Self::hash_url(url); + ) -> Result<(), Report> { + self.current_connection = Default::default(); + let hashed_url_string: &str = &self.hash_url(url); - // put results_json into cache - self.connection.set(&hashed_url_string, json_results)?; + let mut result: Result<(), RedisError> = self.connection_pool + [self.current_connection as usize] + .set_ex(hashed_url_string, json_results, 60) + .await; - // Set the TTL for the key to 60 seconds - self.connection - .expire::(hashed_url_string, 60) - .unwrap(); - - Ok(()) + // Code to check whether the current connection being used is dropped with connection error + // or not. if it drops with the connection error then the current connection is replaced + // with a new connection from the pool which is then used to run the redis command then + // that connection is also checked whether it is dropped or not if it is not then the + // result is passed as a `Result` or else the same process repeats again and if all of the + // connections in the pool result in connection drop error then a custom pool error is + // returned. + loop { + match result { + Err(error) => match error.is_connection_dropped() { + true => { + self.current_connection += 1; + if self.current_connection == self.pool_size { + return Err(Report::new( + PoolError::PoolExhaustionWithConnectionDropError, + )); + } + result = self.connection_pool[self.current_connection as usize] + .set_ex(hashed_url_string, json_results, 60) + .await; + continue; + } + false => return Err(Report::new(PoolError::RedisError(error))), + }, + Ok(_) => return Ok(()), + } + } } } diff --git a/src/cache/error.rs b/src/cache/error.rs new file mode 100644 index 0000000..8bdb977 --- /dev/null +++ b/src/cache/error.rs @@ -0,0 +1,37 @@ +//! This module provides the error enum to handle different errors associated while requesting data from +//! the redis server using an async connection pool. +use std::fmt; + +use redis::RedisError; + +/// A custom error type used for handling redis async pool associated errors. +#[derive(Debug)] +pub enum PoolError { + /// This variant handles all errors related to `RedisError`, + RedisError(RedisError), + /// This variant handles the errors which occurs when all the connections + /// in the connection pool return a connection dropped redis error. + PoolExhaustionWithConnectionDropError, +} + +impl fmt::Display for PoolError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PoolError::RedisError(redis_error) => { + if let Some(detail) = redis_error.detail() { + write!(f, "{}", detail) + } else { + write!(f, "") + } + } + PoolError::PoolExhaustionWithConnectionDropError => { + write!( + f, + "Error all connections from the pool dropped with connection error" + ) + } + } + } +} + +impl error_stack::Context for PoolError {} diff --git a/src/cache/mod.rs b/src/cache/mod.rs index de7dd4e..f40369f 100644 --- a/src/cache/mod.rs +++ b/src/cache/mod.rs @@ -1 +1,5 @@ +//! This module provides the modules which provide the functionality to cache the aggregated +//! results fetched and aggregated from the upstream search engines in a json format. + pub mod cacher; +pub mod error; diff --git a/src/config/mod.rs b/src/config/mod.rs index 11ce559..babc54f 100644 --- a/src/config/mod.rs +++ b/src/config/mod.rs @@ -1,2 +1,4 @@ +//! This module provides the modules which handles the functionality to parse the lua config +//! and convert the config options into rust readable form. + pub mod parser; -pub mod parser_models; diff --git a/src/config/parser.rs b/src/config/parser.rs index 4639013..782b026 100644 --- a/src/config/parser.rs +++ b/src/config/parser.rs @@ -3,49 +3,40 @@ use crate::handler::paths::{file_path, FileType}; -use super::parser_models::Style; +use crate::models::parser_models::{AggregatorConfig, RateLimiter, Style}; use log::LevelFilter; -use rlua::Lua; +use mlua::Lua; use std::{collections::HashMap, fs, thread::available_parallelism}; /// A named struct which stores the parsed config file options. -/// -/// # Fields -// -/// * `port` - It stores the parsed port number option on which the server should launch. -/// * `binding_ip` - It stores the parsed ip address option on which the server should launch -/// * `style` - It stores the theming options for the website. -/// * `redis_url` - It stores the redis connection url address on which the redis -/// client should connect. -/// * `aggregator` - It stores the option to whether enable or disable production use. -/// * `logging` - It stores the option to whether enable or disable logs. -/// * `debug` - It stores the option to whether enable or disable debug mode. -/// * `upstream_search_engines` - It stores all the engine names that were enabled by the user. -/// * `request_timeout` - It stores the time (secs) which controls the server request timeout. -/// * `threads` - It stores the number of threads which controls the app will use to run. #[derive(Clone)] pub struct Config { + /// It stores the parsed port number option on which the server should launch. pub port: u16, + /// It stores the parsed ip address option on which the server should launch pub binding_ip: String, + /// It stores the theming options for the website. pub style: Style, + /// It stores the redis connection url address on which the redis + /// client should connect. pub redis_url: String, + /// It stores the option to whether enable or disable production use. pub aggregator: AggregatorConfig, + /// It stores the option to whether enable or disable logs. pub logging: bool, + /// It stores the option to whether enable or disable debug mode. pub debug: bool, - pub upstream_search_engines: Vec, + /// It stores all the engine names that were enabled by the user. + pub upstream_search_engines: Vec, + /// It stores the time (secs) which controls the server request timeout. pub request_timeout: u8, + /// It stores the number of threads which controls the app will use to run. pub threads: u8, -} - -/// Configuration options for the aggregator. -/// -/// # Fields -/// -/// * `random_delay` - It stores the option to whether enable or disable random delays between -/// requests. -#[derive(Clone)] -pub struct AggregatorConfig { - pub random_delay: bool, + /// It stores configuration options for the ratelimiting middleware. + pub rate_limiter: RateLimiter, + /// It stores the level of safe search to be used for restricting content in the + /// search results. + pub safe_search: u8, } impl Config { @@ -63,58 +54,80 @@ impl Config { /// or io error if the config.lua file doesn't exists otherwise it returns a newly constructed /// Config struct with all the parsed config options from the parsed config file. pub fn parse(logging_initialized: bool) -> Result> { - Lua::new().context(|context| -> Result> { - let globals = context.globals(); + let lua = Lua::new(); + let globals = lua.globals(); - context - .load(&fs::read_to_string(file_path(FileType::Config)?)?) - .exec()?; + lua.load(&fs::read_to_string(file_path(FileType::Config)?)?) + .exec()?; - let parsed_threads: u8 = globals.get::<_, u8>("threads")?; + let parsed_threads: u8 = globals.get::<_, u8>("threads")?; - let debug: bool = globals.get::<_, bool>("debug")?; - let logging:bool= globals.get::<_, bool>("logging")?; + let debug: bool = globals.get::<_, bool>("debug")?; + let logging: bool = globals.get::<_, bool>("logging")?; - if !logging_initialized { - set_logging_level(debug, logging); + if !logging_initialized { + set_logging_level(debug, logging); + } + + let threads: u8 = if parsed_threads == 0 { + let total_num_of_threads: usize = available_parallelism()?.get() / 2; + log::error!( + "Config Error: The value of `threads` option should be a non zero positive integer" + ); + log::error!("Falling back to using {} threads", total_num_of_threads); + total_num_of_threads as u8 + } else { + parsed_threads + }; + + let rate_limiter = globals.get::<_, HashMap>("rate_limiter")?; + + let parsed_safe_search: u8 = globals.get::<_, u8>("safe_search")?; + let safe_search: u8 = match parsed_safe_search { + 0..=4 => parsed_safe_search, + _ => { + log::error!("Config Error: The value of `safe_search` option should be a non zero positive integer from 0 to 4."); + log::error!("Falling back to using the value `1` for the option"); + 1 } + }; - let threads: u8 = if parsed_threads == 0 { - let total_num_of_threads: usize = available_parallelism()?.get() / 2; - log::error!("Config Error: The value of `threads` option should be a non zero positive integer"); - log::error!("Falling back to using {} threads", total_num_of_threads); - total_num_of_threads as u8 - } else { - parsed_threads - }; - - Ok(Config { - port: globals.get::<_, u16>("port")?, - binding_ip: globals.get::<_, String>("binding_ip")?, - style: Style::new( - globals.get::<_, String>("theme")?, - globals.get::<_, String>("colorscheme")?, - ), - redis_url: globals.get::<_, String>("redis_url")?, - aggregator: AggregatorConfig { - random_delay: globals.get::<_, bool>("production_use")?, - }, - logging, - debug, - upstream_search_engines: globals - .get::<_, HashMap>("upstream_search_engines")? - .into_iter() - .filter_map(|(key, value)| value.then_some(key)) - .filter_map(|engine| crate::engines::engine_models::EngineHandler::new(&engine)) - .collect(), - request_timeout: globals.get::<_, u8>("request_timeout")?, - threads, - }) + Ok(Config { + port: globals.get::<_, u16>("port")?, + binding_ip: globals.get::<_, String>("binding_ip")?, + style: Style::new( + globals.get::<_, String>("theme")?, + globals.get::<_, String>("colorscheme")?, + ), + redis_url: globals.get::<_, String>("redis_url")?, + aggregator: AggregatorConfig { + random_delay: globals.get::<_, bool>("production_use")?, + }, + logging, + debug, + upstream_search_engines: globals + .get::<_, HashMap>("upstream_search_engines")? + .into_iter() + .filter_map(|(key, value)| value.then_some(key)) + .filter_map(|engine| crate::models::engine_models::EngineHandler::new(&engine)) + .collect(), + request_timeout: globals.get::<_, u8>("request_timeout")?, + threads, + rate_limiter: RateLimiter { + number_of_requests: rate_limiter["number_of_requests"], + time_limit: rate_limiter["time_limit"], + }, + safe_search, }) } } /// a helper function that sets the proper logging level +/// +/// # Arguments +/// +/// * `debug` - It takes the option to whether enable or disable debug mode. +/// * `logging` - It takes the option to whether enable or disable logs. fn set_logging_level(debug: bool, logging: bool) { if let Ok(pkg_env_var) = std::env::var("PKG_ENV") { if pkg_env_var.to_lowercase() == "dev" { diff --git a/src/engines/duckduckgo.rs b/src/engines/duckduckgo.rs index 11b7d86..0f06ea4 100644 --- a/src/engines/duckduckgo.rs +++ b/src/engines/duckduckgo.rs @@ -4,14 +4,14 @@ use std::collections::HashMap; -use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT}; +use reqwest::header::HeaderMap; use scraper::{Html, Selector}; -use crate::results::aggregation_models::SearchResult; +use crate::models::aggregation_models::SearchResult; -use super::engine_models::{EngineError, SearchEngine}; +use crate::models::engine_models::{EngineError, SearchEngine}; -use error_stack::{IntoReport, Report, Result, ResultExt}; +use error_stack::{Report, Result, ResultExt}; /// A new DuckDuckGo engine type defined in-order to implement the `SearchEngine` trait which allows to /// reduce code duplication as well as allows to create vector of different search engines easily. @@ -19,30 +19,13 @@ pub struct DuckDuckGo; #[async_trait::async_trait] impl SearchEngine for DuckDuckGo { - /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped - /// results like title, visiting_url (href in html),engine (from which engine it was fetched from) - /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and - /// values are RawSearchResult struct and then returns it within a Result enum. - /// - /// # Arguments - /// - /// * `query` - Takes the user provided query to query to the upstream search engine with. - /// * `page` - Takes an u32 as an argument. - /// * `user_agent` - Takes a random user agent string as an argument. - /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout. - /// - /// # Errors - /// - /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to - /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to - /// provide results for the requested search query and also returns error if the scraping selector - /// or HeaderMap fails to initialize. async fn results( &self, - query: String, + query: &str, page: u32, - user_agent: String, + user_agent: &str, request_timeout: u8, + _safe_search: u8, ) -> Result, EngineError> { // Page number can be missing or empty string and so appropriate handling is required // so that upstream server recieves valid page number. @@ -61,38 +44,19 @@ impl SearchEngine for DuckDuckGo { }; // initializing HeaderMap and adding appropriate headers. - let mut header_map = HeaderMap::new(); - header_map.insert( - USER_AGENT, - user_agent - .parse() - .into_report() - .change_context(EngineError::UnexpectedError)?, - ); - header_map.insert( - REFERER, - "https://google.com/" - .parse() - .into_report() - .change_context(EngineError::UnexpectedError)?, - ); - header_map.insert( - CONTENT_TYPE, - "application/x-www-form-urlencoded" - .parse() - .into_report() - .change_context(EngineError::UnexpectedError)?, - ); - header_map.insert( - COOKIE, - "kl=wt-wt" - .parse() - .into_report() - .change_context(EngineError::UnexpectedError)?, - ); + let header_map = HeaderMap::try_from(&HashMap::from([ + ("USER_AGENT".to_string(), user_agent.to_string()), + ("REFERER".to_string(), "https://google.com/".to_string()), + ( + "CONTENT_TYPE".to_string(), + "application/x-www-form-urlencoded".to_string(), + ), + ("COOKIE".to_string(), "kl=wt-wt".to_string()), + ])) + .change_context(EngineError::UnexpectedError)?; let document: Html = Html::parse_document( - &DuckDuckGo::fetch_html_from_upstream(self, url, header_map, request_timeout).await?, + &DuckDuckGo::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?, ); let no_result: Selector = Selector::parse(".no-results") @@ -126,8 +90,7 @@ impl SearchEngine for DuckDuckGo { .next() .unwrap() .inner_html() - .trim() - .to_string(), + .trim(), format!( "https://{}", result @@ -136,15 +99,15 @@ impl SearchEngine for DuckDuckGo { .unwrap() .inner_html() .trim() - ), + ) + .as_str(), result .select(&result_desc) .next() .unwrap() .inner_html() - .trim() - .to_string(), - vec!["duckduckgo".to_string()], + .trim(), + &["duckduckgo"], ) }) .map(|search_result| (search_result.url.clone(), search_result)) diff --git a/src/engines/engine_models.rs b/src/engines/engine_models.rs deleted file mode 100644 index d33d13c..0000000 --- a/src/engines/engine_models.rs +++ /dev/null @@ -1,109 +0,0 @@ -//! This module provides the error enum to handle different errors associated while requesting data from -//! the upstream search engines with the search query provided by the user. - -use crate::results::aggregation_models::SearchResult; -use error_stack::{IntoReport, Result, ResultExt}; -use std::{collections::HashMap, fmt, time::Duration}; - -/// A custom error type used for handle engine associated errors. -/// -/// This enum provides variants three different categories of errors: -/// * `RequestError` - This variant handles all request related errors like forbidden, not found, -/// etc. -/// * `EmptyResultSet` - This variant handles the not results found error provide by the upstream -/// search engines. -/// * `UnexpectedError` - This variant handles all the errors which are unexpected or occur rarely -/// and are errors mostly related to failure in initialization of HeaderMap, Selector errors and -/// all other errors occurring within the code handling the `upstream search engines`. -#[derive(Debug)] -pub enum EngineError { - EmptyResultSet, - RequestError, - UnexpectedError, -} - -impl fmt::Display for EngineError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - EngineError::EmptyResultSet => { - write!(f, "The upstream search engine returned an empty result set") - } - EngineError::RequestError => { - write!( - f, - "Error occurred while requesting data from upstream search engine" - ) - } - EngineError::UnexpectedError => { - write!(f, "An unexpected error occurred while processing the data") - } - } - } -} - -impl error_stack::Context for EngineError {} - -/// A trait to define common behavior for all search engines. -#[async_trait::async_trait] -pub trait SearchEngine: Sync + Send { - async fn fetch_html_from_upstream( - &self, - url: String, - header_map: reqwest::header::HeaderMap, - request_timeout: u8, - ) -> Result { - // fetch the html from upstream search engine - Ok(reqwest::Client::new() - .get(url) - .timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server - .headers(header_map) // add spoofed headers to emulate human behavior - .send() - .await - .into_report() - .change_context(EngineError::RequestError)? - .text() - .await - .into_report() - .change_context(EngineError::RequestError)?) - } - - async fn results( - &self, - query: String, - page: u32, - user_agent: String, - request_timeout: u8, - ) -> Result, EngineError>; -} - -pub struct EngineHandler { - engine: Box, - name: &'static str, -} - -impl Clone for EngineHandler { - fn clone(&self) -> Self { - Self::new(self.name).unwrap() - } -} - -impl EngineHandler { - /// parses an engine name into an engine handler, returns none if the engine is unknown - pub fn new(engine_name: &str) -> Option { - let engine: (&'static str, Box) = - match engine_name.to_lowercase().as_str() { - "duckduckgo" => ("duckduckgo", Box::new(super::duckduckgo::DuckDuckGo)), - "searx" => ("searx", Box::new(super::searx::Searx)), - _ => return None, - }; - - Some(Self { - engine: engine.1, - name: engine.0, - }) - } - - pub fn into_name_engine(self) -> (&'static str, Box) { - (self.name, self.engine) - } -} diff --git a/src/engines/mod.rs b/src/engines/mod.rs index f9bb8ad..0016728 100644 --- a/src/engines/mod.rs +++ b/src/engines/mod.rs @@ -1,3 +1,7 @@ +//! This module provides different modules which handles the functionlity to fetch results from the +//! upstream search engines based on user requested queries. Also provides different models to +//! provide a standard functions to be implemented for all the upstream search engine handling +//! code. Moreover, it also provides a custom error for the upstream search engine handling code. + pub mod duckduckgo; -pub mod engine_models; pub mod searx; diff --git a/src/engines/searx.rs b/src/engines/searx.rs index 4ad41f5..6ab0469 100644 --- a/src/engines/searx.rs +++ b/src/engines/searx.rs @@ -2,14 +2,13 @@ //! by querying the upstream searx search engine instance with user provided query and with a page //! number if provided. -use reqwest::header::{HeaderMap, CONTENT_TYPE, COOKIE, REFERER, USER_AGENT}; +use reqwest::header::HeaderMap; use scraper::{Html, Selector}; use std::collections::HashMap; -use crate::results::aggregation_models::SearchResult; - -use super::engine_models::{EngineError, SearchEngine}; -use error_stack::{IntoReport, Report, Result, ResultExt}; +use crate::models::aggregation_models::SearchResult; +use crate::models::engine_models::{EngineError, SearchEngine}; +use error_stack::{Report, Result, ResultExt}; /// A new Searx engine type defined in-order to implement the `SearchEngine` trait which allows to /// reduce code duplication as well as allows to create vector of different search engines easily. @@ -17,66 +16,40 @@ pub struct Searx; #[async_trait::async_trait] impl SearchEngine for Searx { - /// This function scrapes results from the upstream engine duckduckgo and puts all the scraped - /// results like title, visiting_url (href in html),engine (from which engine it was fetched from) - /// and description in a RawSearchResult and then adds that to HashMap whose keys are url and - /// values are RawSearchResult struct and then returns it within a Result enum. - /// - /// # Arguments - /// - /// * `query` - Takes the user provided query to query to the upstream search engine with. - /// * `page` - Takes an u32 as an argument. - /// * `user_agent` - Takes a random user agent string as an argument. - /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout. - /// - /// # Errors - /// - /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to - /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to - /// provide results for the requested search query and also returns error if the scraping selector - /// or HeaderMap fails to initialize. - async fn results( &self, - query: String, + query: &str, page: u32, - user_agent: String, + user_agent: &str, request_timeout: u8, + mut safe_search: u8, ) -> Result, EngineError> { // Page number can be missing or empty string and so appropriate handling is required // so that upstream server recieves valid page number. + if safe_search == 3 { + safe_search = 2; + }; + let url: String = match page { - 0 | 1 => format!("https://searx.work/search?q={query}&pageno=1"), - _ => format!("https://searx.work/search?q={query}&pageno={page}"), + 0 | 1 => { + format!("https://searx.work/search?q={query}&pageno=1&safesearch={safe_search}") + } + _ => format!( + "https://searx.work/search?q={query}&pageno={page}&safesearch={safe_search}" + ), }; // initializing headers and adding appropriate headers. - let mut header_map = HeaderMap::new(); - header_map.insert( - USER_AGENT, - user_agent - .parse() - .into_report() - .change_context(EngineError::UnexpectedError)?, - ); - header_map.insert( - REFERER, - "https://google.com/" - .parse() - .into_report() - .change_context(EngineError::UnexpectedError)?, - ); - header_map.insert( - CONTENT_TYPE, - "application/x-www-form-urlencoded" - .parse() - .into_report() - .change_context(EngineError::UnexpectedError)?, - ); - header_map.insert(COOKIE, "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".parse().into_report().change_context(EngineError::UnexpectedError)?); + let header_map = HeaderMap::try_from(&HashMap::from([ + ("USER_AGENT".to_string(), user_agent.to_string()), + ("REFERER".to_string(), "https://google.com/".to_string()), + ("CONTENT_TYPE".to_string(), "application/x-www-form-urlencoded".to_string()), + ("COOKIE".to_string(), "categories=general; language=auto; locale=en; autocomplete=duckduckgo; image_proxy=1; method=POST; safesearch=2; theme=simple; results_on_new_tab=1; doi_resolver=oadoi.org; simple_style=auto; center_alignment=1; query_in_title=1; infinite_scroll=0; disabled_engines=; enabled_engines=\"archive is__general\\054yep__general\\054curlie__general\\054currency__general\\054ddg definitions__general\\054wikidata__general\\054duckduckgo__general\\054tineye__general\\054lingva__general\\054startpage__general\\054yahoo__general\\054wiby__general\\054marginalia__general\\054alexandria__general\\054wikibooks__general\\054wikiquote__general\\054wikisource__general\\054wikiversity__general\\054wikivoyage__general\\054dictzone__general\\054seznam__general\\054mojeek__general\\054naver__general\\054wikimini__general\\054brave__general\\054petalsearch__general\\054goo__general\"; disabled_plugins=; enabled_plugins=\"searx.plugins.hostname_replace\\054searx.plugins.oa_doi_rewrite\\054searx.plugins.vim_hotkeys\"; tokens=; maintab=on; enginetab=on".to_string()) + ])) + .change_context(EngineError::UnexpectedError)?; let document: Html = Html::parse_document( - &Searx::fetch_html_from_upstream(self, url, header_map, request_timeout).await?, + &Searx::fetch_html_from_upstream(self, &url, header_map, request_timeout).await?, ); let no_result: Selector = Selector::parse("#urls>.dialog-error>p") @@ -117,24 +90,21 @@ impl SearchEngine for Searx { .next() .unwrap() .inner_html() - .trim() - .to_string(), + .trim(), result .select(&result_url) .next() .unwrap() .value() .attr("href") - .unwrap() - .to_string(), + .unwrap(), result .select(&result_desc) .next() .unwrap() .inner_html() - .trim() - .to_string(), - vec!["searx".to_string()], + .trim(), + &["searx"], ) }) .map(|search_result| (search_result.url.clone(), search_result)) diff --git a/src/handler/mod.rs b/src/handler/mod.rs index 8118b29..188767d 100644 --- a/src/handler/mod.rs +++ b/src/handler/mod.rs @@ -1 +1,5 @@ +//! This module provides modules which provide the functionality to handle paths for different +//! files present on different paths and provide one appropriate path on which it is present and +//! can be used. + pub mod paths; diff --git a/src/handler/paths.rs b/src/handler/paths.rs index 9b4fa07..9ea5fff 100644 --- a/src/handler/paths.rs +++ b/src/handler/paths.rs @@ -4,108 +4,116 @@ use std::collections::HashMap; use std::io::Error; use std::path::Path; +use std::sync::OnceLock; // ------- Constants -------- -static PUBLIC_DIRECTORY_NAME: &str = "public"; -static COMMON_DIRECTORY_NAME: &str = "websurfx"; -static CONFIG_FILE_NAME: &str = "config.lua"; -static ALLOWLIST_FILE_NAME: &str = "allowlist.txt"; -static BLOCKLIST_FILE_NAME: &str = "blocklist.txt"; +/// The constant holding the name of the theme folder. +const PUBLIC_DIRECTORY_NAME: &str = "public"; +/// The constant holding the name of the common folder. +const COMMON_DIRECTORY_NAME: &str = "websurfx"; +/// The constant holding the name of the config file. +const CONFIG_FILE_NAME: &str = "config.lua"; +/// The constant holding the name of the AllowList text file. +const ALLOWLIST_FILE_NAME: &str = "allowlist.txt"; +/// The constant holding the name of the BlockList text file. +const BLOCKLIST_FILE_NAME: &str = "blocklist.txt"; +/// An enum type which provides different variants to handle paths for various files/folders. #[derive(Hash, PartialEq, Eq, Debug)] pub enum FileType { + /// This variant handles all the paths associated with the config file. Config, + /// This variant handles all the paths associated with the Allowlist text file. AllowList, + /// This variant handles all the paths associated with the BlockList text file. BlockList, + /// This variant handles all the paths associated with the public folder (Theme folder). Theme, } -static FILE_PATHS_FOR_DIFF_FILE_TYPES: once_cell::sync::Lazy>> = - once_cell::sync::Lazy::new(|| { - HashMap::from([ - ( - FileType::Config, - vec![ - format!( - "{}/.config/{}/{}", - std::env::var("HOME").unwrap(), - COMMON_DIRECTORY_NAME, - CONFIG_FILE_NAME - ), - format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME), - format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME), - ], - ), - ( - FileType::Theme, - vec![ - format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME), - format!("./{}/", PUBLIC_DIRECTORY_NAME), - ], - ), - ( - FileType::AllowList, - vec![ - format!( - "{}/.config/{}/{}", - std::env::var("HOME").unwrap(), - COMMON_DIRECTORY_NAME, - ALLOWLIST_FILE_NAME - ), - format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME), - format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME), - ], - ), - ( - FileType::BlockList, - vec![ - format!( - "{}/.config/{}/{}", - std::env::var("HOME").unwrap(), - COMMON_DIRECTORY_NAME, - BLOCKLIST_FILE_NAME - ), - format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME), - format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME), - ], - ), - ]) - }); +/// A static variable which stores the different filesystem paths for various file/folder types. +static FILE_PATHS_FOR_DIFF_FILE_TYPES: OnceLock>> = OnceLock::new(); -/// A helper function which returns an appropriate config file path checking if the config -/// file exists on that path. +/// A function which returns an appropriate path for thr provided file type by checking if the path +/// for the given file type exists on that path. /// /// # Error /// -/// Returns a `config file not found!!` error if the config file is not present under following -/// paths which are: -/// 1. `~/.config/websurfx/` if it not present here then it fallbacks to the next one (2) -/// 2. `/etc/xdg/websurfx/config.lua` if it is not present here then it fallbacks to the next -/// one (3). -/// 3. `websurfx/` (under project folder ( or codebase in other words)) if it is not present -/// here then it returns an error as mentioned above. - -/// A function which returns an appropriate theme directory path checking if the theme -/// directory exists on that path. +/// Returns a ` folder/file not found!!` error if the give file_type folder/file is not +/// present on the path on which it is being tested. /// -/// # Error +/// # Example +/// +/// If this function is give the file_type of Theme variant then the theme folder is checked by the +/// following steps: /// -/// Returns a `Theme (public) folder not found!!` error if the theme folder is not present under following -/// paths which are: /// 1. `/opt/websurfx` if it not present here then it fallbacks to the next one (2) /// 2. Under project folder ( or codebase in other words) if it is not present /// here then it returns an error as mentioned above. -pub fn file_path(file_type: FileType) -> Result { - let file_path = FILE_PATHS_FOR_DIFF_FILE_TYPES.get(&file_type).unwrap(); +pub fn file_path(file_type: FileType) -> Result<&'static str, Error> { + let file_path: &Vec = FILE_PATHS_FOR_DIFF_FILE_TYPES + .get_or_init(|| { + HashMap::from([ + ( + FileType::Config, + vec![ + format!( + "{}/.config/{}/{}", + std::env::var("HOME").unwrap(), + COMMON_DIRECTORY_NAME, + CONFIG_FILE_NAME + ), + format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME), + format!("./{}/{}", COMMON_DIRECTORY_NAME, CONFIG_FILE_NAME), + ], + ), + ( + FileType::Theme, + vec![ + format!("/opt/websurfx/{}/", PUBLIC_DIRECTORY_NAME), + format!("./{}/", PUBLIC_DIRECTORY_NAME), + ], + ), + ( + FileType::AllowList, + vec![ + format!( + "{}/.config/{}/{}", + std::env::var("HOME").unwrap(), + COMMON_DIRECTORY_NAME, + ALLOWLIST_FILE_NAME + ), + format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME), + format!("./{}/{}", COMMON_DIRECTORY_NAME, ALLOWLIST_FILE_NAME), + ], + ), + ( + FileType::BlockList, + vec![ + format!( + "{}/.config/{}/{}", + std::env::var("HOME").unwrap(), + COMMON_DIRECTORY_NAME, + BLOCKLIST_FILE_NAME + ), + format!("/etc/xdg/{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME), + format!("./{}/{}", COMMON_DIRECTORY_NAME, BLOCKLIST_FILE_NAME), + ], + ), + ]) + }) + .get(&file_type) + .unwrap(); + for (idx, _) in file_path.iter().enumerate() { if Path::new(file_path[idx].as_str()).exists() { - return Ok(file_path[idx].clone()); + return Ok(std::mem::take(&mut &*file_path[idx])); } } // if no of the configs above exist, return error Err(Error::new( std::io::ErrorKind::NotFound, - format!("{:?} file not found!!", file_type), + format!("{:?} file/folder not found!!", file_type), )) } diff --git a/src/lib.rs b/src/lib.rs index cd83d8a..8c74e6a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,19 +1,25 @@ //! This main library module provides the functionality to provide and handle the Tcp server //! and register all the routes for the `websurfx` meta search engine website. +#![forbid(unsafe_code, clippy::panic)] +#![deny(missing_docs, clippy::missing_docs_in_private_items, clippy::perf)] +#![warn(clippy::cognitive_complexity, rust_2018_idioms)] + pub mod cache; pub mod config; pub mod engines; pub mod handler; +pub mod models; pub mod results; pub mod server; use std::net::TcpListener; -use crate::server::routes; +use crate::server::router; use actix_cors::Cors; use actix_files as fs; +use actix_governor::{Governor, GovernorConfigBuilder}; use actix_web::{dev::Server, http::header, middleware::Logger, web, App, HttpServer}; use config::parser::Config; use handlebars::Handlebars; @@ -40,15 +46,15 @@ use handler::paths::{file_path, FileType}; /// let server = run(listener,config).expect("Failed to start server"); /// ``` pub fn run(listener: TcpListener, config: Config) -> std::io::Result { - let mut handlebars: Handlebars = Handlebars::new(); + let mut handlebars: Handlebars<'_> = Handlebars::new(); - let public_folder_path: String = file_path(FileType::Theme)?; + let public_folder_path: &str = file_path(FileType::Theme)?; handlebars .register_templates_directory(".html", format!("{}/templates", public_folder_path)) .unwrap(); - let handlebars_ref: web::Data = web::Data::new(handlebars); + let handlebars_ref: web::Data> = web::Data::new(handlebars); let cloned_config_threads_opt: u8 = config.threads; @@ -64,10 +70,17 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result { ]); App::new() + .wrap(Logger::default()) // added logging middleware for logging. .app_data(handlebars_ref.clone()) .app_data(web::Data::new(config.clone())) .wrap(cors) - .wrap(Logger::default()) // added logging middleware for logging. + .wrap(Governor::new( + &GovernorConfigBuilder::default() + .per_second(config.rate_limiter.time_limit as u64) + .burst_size(config.rate_limiter.number_of_requests as u32) + .finish() + .unwrap(), + )) // Serve images and static files (css and js files). .service( fs::Files::new("/static", format!("{}/static", public_folder_path)) @@ -77,12 +90,12 @@ pub fn run(listener: TcpListener, config: Config) -> std::io::Result { fs::Files::new("/images", format!("{}/images", public_folder_path)) .show_files_listing(), ) - .service(routes::robots_data) // robots.txt - .service(routes::index) // index page - .service(routes::search) // search page - .service(routes::about) // about page - .service(routes::settings) // settings page - .default_service(web::route().to(routes::not_found)) // error page + .service(router::robots_data) // robots.txt + .service(router::index) // index page + .service(server::routes::search::search) // search page + .service(router::about) // about page + .service(router::settings) // settings page + .default_service(web::route().to(router::not_found)) // error page }) .workers(cloned_config_threads_opt as usize) // Start server on 127.0.0.1 with the user provided port number. for example 127.0.0.1:8080. diff --git a/src/models/aggregation_models.rs b/src/models/aggregation_models.rs new file mode 100644 index 0000000..ea4a914 --- /dev/null +++ b/src/models/aggregation_models.rs @@ -0,0 +1,181 @@ +//! This module provides public models for handling, storing and serializing of search results +//! data scraped from the upstream search engines. + +use serde::{Deserialize, Serialize}; +use smallvec::SmallVec; + +use super::{engine_models::EngineError, parser_models::Style}; + +/// A named struct to store the raw scraped search results scraped search results from the +/// upstream search engines before aggregating it.It derives the Clone trait which is needed +/// to write idiomatic rust using `Iterators`. +/// (href url in html in simple words). +#[derive(Clone, Serialize, Deserialize)] +#[serde(rename_all = "camelCase")] +pub struct SearchResult { + /// The title of the search result. + pub title: String, + /// The url which is accessed when clicked on it + pub url: String, + /// The description of the search result. + pub description: String, + /// The names of the upstream engines from which this results were provided. + pub engine: SmallVec<[String; 0]>, +} + +impl SearchResult { + /// Constructs a new `RawSearchResult` with the given arguments needed for the struct. + /// + /// # Arguments + /// + /// * `title` - The title of the search result. + /// * `url` - The url which is accessed when clicked on it + /// (href url in html in simple words). + /// * `description` - The description of the search result. + /// * `engine` - The names of the upstream engines from which this results were provided. + pub fn new(title: &str, url: &str, description: &str, engine: &[&str]) -> Self { + SearchResult { + title: title.to_owned(), + url: url.to_owned(), + description: description.to_owned(), + engine: engine.iter().map(|name| name.to_string()).collect(), + } + } + + /// A function which adds the engine name provided as a string into a vector of strings. + /// + /// # Arguments + /// + /// * `engine` - Takes an engine name provided as a String. + pub fn add_engines(&mut self, engine: &str) { + self.engine.push(engine.to_owned()) + } + + /// A function which returns the engine name stored from the struct as a string. + /// + /// # Returns + /// + /// An engine name stored as a string from the struct. + pub fn engine(&mut self) -> String { + std::mem::take(&mut self.engine[0]) + } +} + +/// A named struct that stores the error info related to the upstream search engines. +#[derive(Serialize, Deserialize, Clone)] +pub struct EngineErrorInfo { + /// It stores the error type which occured while fetching the result from a particular search + /// engine. + pub error: String, + /// It stores the name of the engine that failed to provide the requested search results. + pub engine: String, + /// It stores the name of the color to indicate whether how severe the particular error is (In + /// other words it indicates the severity of the error/issue). + pub severity_color: String, +} + +impl EngineErrorInfo { + /// Constructs a new `SearchResult` with the given arguments needed for the struct. + /// + /// # Arguments + /// + /// * `error` - It takes the error type which occured while fetching the result from a particular + /// search engine. + /// * `engine` - It takes the name of the engine that failed to provide the requested search results. + pub fn new(error: &EngineError, engine: &str) -> Self { + Self { + error: match error { + EngineError::RequestError => "RequestError".to_owned(), + EngineError::EmptyResultSet => "EmptyResultSet".to_owned(), + EngineError::UnexpectedError => "UnexpectedError".to_owned(), + }, + engine: engine.to_owned(), + severity_color: match error { + EngineError::RequestError => "green".to_owned(), + EngineError::EmptyResultSet => "blue".to_owned(), + EngineError::UnexpectedError => "red".to_owned(), + }, + } + } +} + +/// A named struct to store, serialize, deserialize the all the search results scraped and +/// aggregated from the upstream search engines. +/// `SearchResult` structs. +#[derive(Serialize, Deserialize, Default)] +#[serde(rename_all = "camelCase")] +pub struct SearchResults { + /// Stores the individual serializable `SearchResult` struct into a vector of + pub results: Vec, + /// Stores the current pages search query `q` provided in the search url. + pub page_query: String, + /// Stores the theming options for the website. + pub style: Style, + /// Stores the information on which engines failed with their engine name + /// and the type of error that caused it. + pub engine_errors_info: Vec, + /// Stores the flag option which holds the check value that the following + /// search query was disallowed when the safe search level set to 4 and it + /// was present in the `Blocklist` file. + pub disallowed: bool, + /// Stores the flag option which holds the check value that the following + /// search query was filtered when the safe search level set to 3 and it + /// was present in the `Blocklist` file. + pub filtered: bool, +} + +impl SearchResults { + /// Constructs a new `SearchResult` with the given arguments needed for the struct. + /// + /// # Arguments + /// + /// * `results` - Takes an argument of individual serializable `SearchResult` struct + /// and stores it into a vector of `SearchResult` structs. + /// * `page_query` - Takes an argument of current page`s search query `q` provided in + /// the search url. + /// * `engine_errors_info` - Takes an array of structs which contains information regarding + /// which engines failed with their names, reason and their severity color name. + pub fn new( + results: Vec, + page_query: &str, + engine_errors_info: &[EngineErrorInfo], + ) -> Self { + Self { + results, + page_query: page_query.to_owned(), + style: Style::default(), + engine_errors_info: engine_errors_info.to_owned(), + disallowed: Default::default(), + filtered: Default::default(), + } + } + + /// A setter function to add website style to the return search results. + pub fn add_style(&mut self, style: &Style) { + self.style = style.clone(); + } + + /// A setter function that sets disallowed to true. + pub fn set_disallowed(&mut self) { + self.disallowed = true; + } + + /// A setter function to set the current page search query. + pub fn set_page_query(&mut self, page: &str) { + self.page_query = page.to_owned(); + } + + /// A setter function that sets the filtered to true. + pub fn set_filtered(&mut self) { + self.filtered = true; + } + + /// A getter function that gets the value of `engine_errors_info`. + pub fn engine_errors_info(&mut self) -> Vec { + std::mem::take(&mut self.engine_errors_info) + } + /// A getter function that gets the value of `results`. + pub fn results(&mut self) -> Vec { + self.results.clone() + } +} diff --git a/src/models/engine_models.rs b/src/models/engine_models.rs new file mode 100644 index 0000000..d4a4e72 --- /dev/null +++ b/src/models/engine_models.rs @@ -0,0 +1,159 @@ +//! This module provides the error enum to handle different errors associated while requesting data from +//! the upstream search engines with the search query provided by the user. + +use super::aggregation_models::SearchResult; +use error_stack::{Result, ResultExt}; +use std::{collections::HashMap, fmt, time::Duration}; + +/// A custom error type used for handle engine associated errors. +#[derive(Debug)] +pub enum EngineError { + /// This variant handles all request related errors like forbidden, not found, + /// etc. + EmptyResultSet, + /// This variant handles the not results found error provide by the upstream + /// search engines. + RequestError, + /// This variant handles all the errors which are unexpected or occur rarely + /// and are errors mostly related to failure in initialization of HeaderMap, + /// Selector errors and all other errors occurring within the code handling + /// the `upstream search engines`. + UnexpectedError, +} + +impl fmt::Display for EngineError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + EngineError::EmptyResultSet => { + write!(f, "The upstream search engine returned an empty result set") + } + EngineError::RequestError => { + write!( + f, + "Error occurred while requesting data from upstream search engine" + ) + } + EngineError::UnexpectedError => { + write!(f, "An unexpected error occurred while processing the data") + } + } + } +} + +impl error_stack::Context for EngineError {} + +/// A trait to define common behavior for all search engines. +#[async_trait::async_trait] +pub trait SearchEngine: Sync + Send { + /// This helper function fetches/requests the search results from the upstream search engine in + /// an html form. + /// + /// # Arguments + /// + /// * `url` - It takes the url of the upstream search engine with the user requested search + /// query appended in the search parameters. + /// * `header_map` - It takes the http request headers to be sent to the upstream engine in + /// order to prevent being detected as a bot. It takes the header as a HeaderMap type. + /// * `request_timeout` - It takes the request timeout value as seconds which is used to limit + /// the amount of time for each request to remain connected when until the results can be provided + /// by the upstream engine. + /// + /// # Error + /// + /// It returns the html data as a string if the upstream engine provides the data as expected + /// otherwise it returns a custom `EngineError`. + async fn fetch_html_from_upstream( + &self, + url: &str, + header_map: reqwest::header::HeaderMap, + request_timeout: u8, + ) -> Result { + // fetch the html from upstream search engine + Ok(reqwest::Client::new() + .get(url) + .timeout(Duration::from_secs(request_timeout as u64)) // Add timeout to request to avoid DDOSing the server + .headers(header_map) // add spoofed headers to emulate human behavior + .send() + .await + .change_context(EngineError::RequestError)? + .text() + .await + .change_context(EngineError::RequestError)?) + } + + /// This function scrapes results from the upstream engine and puts all the scraped results like + /// title, visiting_url (href in html),engine (from which engine it was fetched from) and description + /// in a RawSearchResult and then adds that to HashMap whose keys are url and values are RawSearchResult + /// struct and then returns it within a Result enum. + /// + /// # Arguments + /// + /// * `query` - Takes the user provided query to query to the upstream search engine with. + /// * `page` - Takes an u32 as an argument. + /// * `user_agent` - Takes a random user agent string as an argument. + /// * `request_timeout` - Takes a time (secs) as a value which controls the server request timeout. + /// + /// # Errors + /// + /// Returns an `EngineErrorKind` if the user is not connected to the internet or if their is failure to + /// reach the above `upstream search engine` page or if the `upstream search engine` is unable to + /// provide results for the requested search query and also returns error if the scraping selector + /// or HeaderMap fails to initialize. + async fn results( + &self, + query: &str, + page: u32, + user_agent: &str, + request_timeout: u8, + safe_search: u8, + ) -> Result, EngineError>; +} + +/// A named struct which stores the engine struct with the name of the associated engine. +pub struct EngineHandler { + /// It stores the engine struct wrapped in a box smart pointer as the engine struct implements + /// the `SearchEngine` trait. + engine: Box, + /// It stores the name of the engine to which the struct is associated to. + name: &'static str, +} + +impl Clone for EngineHandler { + fn clone(&self) -> Self { + Self::new(self.name).unwrap() + } +} + +impl EngineHandler { + /// Parses an engine name into an engine handler. + /// + /// # Arguments + /// + /// * `engine_name` - It takes the name of the engine to which the struct was associated to. + /// + /// # Returns + /// + /// It returns an option either containing the value or a none if the engine is unknown + pub fn new(engine_name: &str) -> Option { + let engine: (&'static str, Box) = + match engine_name.to_lowercase().as_str() { + "duckduckgo" => ( + "duckduckgo", + Box::new(crate::engines::duckduckgo::DuckDuckGo), + ), + "searx" => ("searx", Box::new(crate::engines::searx::Searx)), + _ => return None, + }; + + Some(Self { + engine: engine.1, + name: engine.0, + }) + } + + /// This function converts the EngineHandler type into a tuple containing the engine name and + /// the associated engine struct. + pub fn into_name_engine(self) -> (&'static str, Box) { + (self.name, self.engine) + } +} diff --git a/src/models/mod.rs b/src/models/mod.rs new file mode 100644 index 0000000..6a7d235 --- /dev/null +++ b/src/models/mod.rs @@ -0,0 +1,8 @@ +//! This module provides modules which in turn provides various models for aggregrating search +//! results, parsing config file, providing trait to standardize search engine handling code, +//! custom engine error for the search engine, etc. + +pub mod aggregation_models; +pub mod engine_models; +pub mod parser_models; +pub mod server_models; diff --git a/src/config/parser_models.rs b/src/models/parser_models.rs similarity index 65% rename from src/config/parser_models.rs rename to src/models/parser_models.rs index 0bc52d8..9dad348 100644 --- a/src/config/parser_models.rs +++ b/src/models/parser_models.rs @@ -12,15 +12,12 @@ use serde::{Deserialize, Serialize}; /// order to allow the deserializing the json back to struct in aggregate function in /// aggregator.rs and create a new struct out of it and then serialize it back to json and pass /// it to the template files. -/// -/// # Fields -// -/// * `theme` - It stores the parsed theme option used to set a theme for the website. -/// * `colorscheme` - It stores the parsed colorscheme option used to set a colorscheme for the -/// theme being used. -#[derive(Serialize, Deserialize, Clone)] +#[derive(Serialize, Deserialize, Clone, Default)] pub struct Style { + /// It stores the parsed theme option used to set a theme for the website. pub theme: String, + /// It stores the parsed colorscheme option used to set a colorscheme for the + /// theme being used. pub colorscheme: String, } @@ -36,3 +33,20 @@ impl Style { Style { theme, colorscheme } } } + +/// Configuration options for the aggregator. +#[derive(Clone)] +pub struct AggregatorConfig { + /// It stores the option to whether enable or disable random delays between + /// requests. + pub random_delay: bool, +} + +/// Configuration options for the rate limiter middleware. +#[derive(Clone)] +pub struct RateLimiter { + /// The number of request that are allowed within a provided time limit. + pub number_of_requests: u8, + /// The time limit in which the quantity of requests that should be accepted. + pub time_limit: u8, +} diff --git a/src/models/server_models.rs b/src/models/server_models.rs new file mode 100644 index 0000000..3da6717 --- /dev/null +++ b/src/models/server_models.rs @@ -0,0 +1,26 @@ +//! This module provides the models to parse cookies and search parameters from the search +//! engine website. +use serde::Deserialize; + +/// A named struct which deserializes all the user provided search parameters and stores them. +#[derive(Deserialize)] +pub struct SearchParams { + /// It stores the search parameter option `q` (or query in simple words) + /// of the search url. + pub q: Option, + /// It stores the search parameter `page` (or pageno in simple words) + /// of the search url. + pub page: Option, +} + +/// A named struct which is used to deserialize the cookies fetched from the client side. +#[allow(dead_code)] +#[derive(Deserialize)] +pub struct Cookie { + /// It stores the theme name used in the website. + pub theme: String, + /// It stores the colorscheme name used for the website theme. + pub colorscheme: String, + /// It stores the user selected upstream search engines selected from the UI. + pub engines: Vec, +} diff --git a/src/results/aggregation_models.rs b/src/results/aggregation_models.rs deleted file mode 100644 index e985765..0000000 --- a/src/results/aggregation_models.rs +++ /dev/null @@ -1,142 +0,0 @@ -//! This module provides public models for handling, storing and serializing of search results -//! data scraped from the upstream search engines. - -use serde::{Deserialize, Serialize}; - -use crate::{config::parser_models::Style, engines::engine_models::EngineError}; - -/// A named struct to store the raw scraped search results scraped search results from the -/// upstream search engines before aggregating it.It derives the Clone trait which is needed -/// to write idiomatic rust using `Iterators`. -/// -/// # Fields -/// -/// * `title` - The title of the search result. -/// * `url` - The url which is accessed when clicked on it -/// (href url in html in simple words). -/// * `description` - The description of the search result. -/// * `engine` - The names of the upstream engines from which this results were provided. -#[derive(Clone, Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct SearchResult { - pub title: String, - pub url: String, - pub description: String, - pub engine: Vec, -} - -impl SearchResult { - /// Constructs a new `RawSearchResult` with the given arguments needed for the struct. - /// - /// # Arguments - /// - /// * `title` - The title of the search result. - /// * `url` - The url which is accessed when clicked on it - /// (href url in html in simple words). - /// * `description` - The description of the search result. - /// * `engine` - The names of the upstream engines from which this results were provided. - pub fn new(title: String, url: String, description: String, engine: Vec) -> Self { - SearchResult { - title, - url, - description, - engine, - } - } - - /// A function which adds the engine name provided as a string into a vector of strings. - /// - /// # Arguments - /// - /// * `engine` - Takes an engine name provided as a String. - pub fn add_engines(&mut self, engine: String) { - self.engine.push(engine) - } - - /// A function which returns the engine name stored from the struct as a string. - /// - /// # Returns - /// - /// An engine name stored as a string from the struct. - pub fn engine(self) -> String { - self.engine.get(0).unwrap().to_string() - } -} - -/// -#[derive(Serialize, Deserialize)] -pub struct EngineErrorInfo { - pub error: String, - pub engine: String, - pub severity_color: String, -} - -impl EngineErrorInfo { - pub fn new(error: &EngineError, engine: String) -> Self { - Self { - error: match error { - EngineError::RequestError => String::from("RequestError"), - EngineError::EmptyResultSet => String::from("EmptyResultSet"), - EngineError::UnexpectedError => String::from("UnexpectedError"), - }, - engine, - severity_color: match error { - EngineError::RequestError => String::from("green"), - EngineError::EmptyResultSet => String::from("blue"), - EngineError::UnexpectedError => String::from("red"), - }, - } - } -} - -/// A named struct to store, serialize, deserialize the all the search results scraped and -/// aggregated from the upstream search engines. -/// -/// # Fields -/// -/// * `results` - Stores the individual serializable `SearchResult` struct into a vector of -/// `SearchResult` structs. -/// * `page_query` - Stores the current pages search query `q` provided in the search url. -/// * `style` - Stores the theming options for the website. -/// * `engine_errors_info` - Stores the information on which engines failed with their engine name -/// and the type of error that caused it. -/// * `empty_result_set` - Stores a boolean which indicates that no engines gave a result for the -/// given search query. -#[derive(Serialize, Deserialize)] -#[serde(rename_all = "camelCase")] -pub struct SearchResults { - pub results: Vec, - pub page_query: String, - pub style: Style, - pub engine_errors_info: Vec, -} - -impl SearchResults { - /// Constructs a new `SearchResult` with the given arguments needed for the struct. - /// - /// # Arguments - /// - /// * `results` - Takes an argument of individual serializable `SearchResult` struct - /// and stores it into a vector of `SearchResult` structs. - /// * `page_query` - Takes an argument of current page`s search query `q` provided in - /// the search url. - /// * `empty_result_set` - Takes a boolean which indicates that no engines gave a result for the - /// given search query. - pub fn new( - results: Vec, - page_query: String, - engine_errors_info: Vec, - ) -> Self { - SearchResults { - results, - page_query, - style: Style::new("".to_string(), "".to_string()), - engine_errors_info, - } - } - - /// A setter function to add website style to the return search results. - pub fn add_style(&mut self, style: Style) { - self.style = style; - } -} diff --git a/src/results/aggregator.rs b/src/results/aggregator.rs index 3f06ecb..8c9be2c 100644 --- a/src/results/aggregator.rs +++ b/src/results/aggregator.rs @@ -1,27 +1,23 @@ //! This module provides the functionality to scrape and gathers all the results from the upstream //! search engines and then removes duplicate results. +use super::user_agent::random_user_agent; +use crate::handler::paths::{file_path, FileType}; +use crate::models::{ + aggregation_models::{EngineErrorInfo, SearchResult, SearchResults}, + engine_models::{EngineError, EngineHandler}, +}; +use error_stack::Report; +use rand::Rng; +use regex::Regex; use std::{ collections::HashMap, io::{BufReader, Read}, time::Duration, }; - -use super::{ - aggregation_models::{EngineErrorInfo, SearchResult, SearchResults}, - user_agent::random_user_agent, -}; -use error_stack::Report; -use rand::Rng; -use regex::Regex; use std::{fs::File, io::BufRead}; use tokio::task::JoinHandle; -use crate::{ - engines::engine_models::{EngineError, EngineHandler}, - handler::paths::{file_path, FileType}, -}; - /// Aliases for long type annotations type FutureVec = Vec, Report>>>; @@ -64,14 +60,15 @@ type FutureVec = Vec, Report, + upstream_search_engines: &[EngineHandler], request_timeout: u8, + safe_search: u8, ) -> Result> { - let user_agent: String = random_user_agent(); + let user_agent: &str = random_user_agent(); // Add a random delay before making the request. if random_delay || !debug { @@ -80,19 +77,24 @@ pub async fn aggregate( tokio::time::sleep(Duration::from_secs(delay_secs)).await; } - let mut names: Vec<&str> = vec![]; + let mut names: Vec<&str> = Vec::with_capacity(0); // create tasks for upstream result fetching let mut tasks: FutureVec = FutureVec::new(); for engine_handler in upstream_search_engines { - let (name, search_engine) = engine_handler.into_name_engine(); + let (name, search_engine) = engine_handler.to_owned().into_name_engine(); names.push(name); - let query: String = query.clone(); - let user_agent: String = user_agent.clone(); + let query: String = query.to_owned(); tasks.push(tokio::spawn(async move { search_engine - .results(query, page, user_agent.clone(), request_timeout) + .results( + &query, + page, + user_agent.clone(), + request_timeout, + safe_search, + ) .await })); } @@ -110,7 +112,7 @@ pub async fn aggregate( let mut result_map: HashMap = HashMap::new(); let mut engine_errors_info: Vec = Vec::new(); - let mut handle_error = |error: Report, engine_name: String| { + let mut handle_error = |error: &Report, engine_name: &'static str| { log::error!("Engine Error: {:?}", error); engine_errors_info.push(EngineErrorInfo::new( error.downcast_ref::().unwrap(), @@ -120,7 +122,7 @@ pub async fn aggregate( for _ in 0..responses.len() { let response = responses.pop().unwrap(); - let engine = names.pop().unwrap().to_string(); + let engine = names.pop().unwrap(); if result_map.is_empty() { match response { @@ -128,7 +130,7 @@ pub async fn aggregate( result_map = results.clone(); } Err(error) => { - handle_error(error, engine); + handle_error(&error, engine); } } continue; @@ -140,39 +142,37 @@ pub async fn aggregate( result_map .entry(key) .and_modify(|result| { - result.add_engines(engine.clone()); + result.add_engines(engine); }) .or_insert_with(|| -> SearchResult { value }); }); } Err(error) => { - handle_error(error, engine); + handle_error(&error, engine); } } } - let mut blacklist_map: HashMap = HashMap::new(); - filter_with_lists( - &mut result_map, - &mut blacklist_map, - &file_path(FileType::BlockList)?, - )?; + if safe_search >= 3 { + let mut blacklist_map: HashMap = HashMap::new(); + filter_with_lists( + &mut result_map, + &mut blacklist_map, + file_path(FileType::BlockList)?, + )?; - filter_with_lists( - &mut blacklist_map, - &mut result_map, - &file_path(FileType::AllowList)?, - )?; + filter_with_lists( + &mut blacklist_map, + &mut result_map, + file_path(FileType::AllowList)?, + )?; - drop(blacklist_map); + drop(blacklist_map); + } let results: Vec = result_map.into_values().collect(); - Ok(SearchResults::new( - results, - query.to_string(), - engine_errors_info, - )) + Ok(SearchResults::new(results, query, &engine_errors_info)) } /// Filters a map of search results using a list of regex patterns. @@ -194,7 +194,7 @@ pub fn filter_with_lists( let mut reader = BufReader::new(File::open(file_path)?); for line in reader.by_ref().lines() { - let re = Regex::new(&line?)?; + let re = Regex::new(line?.trim())?; // Iterate over each search result in the map and check if it matches the regex pattern for (url, search_result) in map_to_be_filtered.clone().into_iter() { @@ -203,7 +203,10 @@ pub fn filter_with_lists( || re.is_match(&search_result.description.to_lowercase()) { // If the search result matches the regex pattern, move it from the original map to the resultant map - resultant_map.insert(url.clone(), map_to_be_filtered.remove(&url).unwrap()); + resultant_map.insert( + url.to_owned(), + map_to_be_filtered.remove(&url.to_owned()).unwrap(), + ); } } } @@ -214,6 +217,7 @@ pub fn filter_with_lists( #[cfg(test)] mod tests { use super::*; + use smallvec::smallvec; use std::collections::HashMap; use std::io::Write; use tempfile::NamedTempFile; @@ -223,22 +227,22 @@ mod tests { // Create a map of search results to filter let mut map_to_be_filtered = HashMap::new(); map_to_be_filtered.insert( - "https://www.example.com".to_string(), + "https://www.example.com".to_owned(), SearchResult { - title: "Example Domain".to_string(), - url: "https://www.example.com".to_string(), + title: "Example Domain".to_owned(), + url: "https://www.example.com".to_owned(), description: "This domain is for use in illustrative examples in documents." - .to_string(), - engine: vec!["Google".to_string(), "Bing".to_string()], + .to_owned(), + engine: smallvec!["Google".to_owned(), "Bing".to_owned()], }, ); map_to_be_filtered.insert( - "https://www.rust-lang.org/".to_string(), + "https://www.rust-lang.org/".to_owned(), SearchResult { - title: "Rust Programming Language".to_string(), - url: "https://www.rust-lang.org/".to_string(), - description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(), - engine: vec!["Google".to_string(), "DuckDuckGo".to_string()], + title: "Rust Programming Language".to_owned(), + url: "https://www.rust-lang.org/".to_owned(), + description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), + engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], }, ); @@ -267,22 +271,22 @@ mod tests { fn test_filter_with_lists_wildcard() -> Result<(), Box> { let mut map_to_be_filtered = HashMap::new(); map_to_be_filtered.insert( - "https://www.example.com".to_string(), + "https://www.example.com".to_owned(), SearchResult { - title: "Example Domain".to_string(), - url: "https://www.example.com".to_string(), + title: "Example Domain".to_owned(), + url: "https://www.example.com".to_owned(), description: "This domain is for use in illustrative examples in documents." - .to_string(), - engine: vec!["Google".to_string(), "Bing".to_string()], + .to_owned(), + engine: smallvec!["Google".to_owned(), "Bing".to_owned()], }, ); map_to_be_filtered.insert( - "https://www.rust-lang.org/".to_string(), + "https://www.rust-lang.org/".to_owned(), SearchResult { - title: "Rust Programming Language".to_string(), - url: "https://www.rust-lang.org/".to_string(), - description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_string(), - engine: vec!["Google".to_string(), "DuckDuckGo".to_string()], + title: "Rust Programming Language".to_owned(), + url: "https://www.rust-lang.org/".to_owned(), + description: "A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.".to_owned(), + engine: smallvec!["Google".to_owned(), "DuckDuckGo".to_owned()], }, ); @@ -327,13 +331,13 @@ mod tests { fn test_filter_with_lists_invalid_regex() { let mut map_to_be_filtered = HashMap::new(); map_to_be_filtered.insert( - "https://www.example.com".to_string(), + "https://www.example.com".to_owned(), SearchResult { - title: "Example Domain".to_string(), - url: "https://www.example.com".to_string(), + title: "Example Domain".to_owned(), + url: "https://www.example.com".to_owned(), description: "This domain is for use in illustrative examples in documents." - .to_string(), - engine: vec!["Google".to_string(), "Bing".to_string()], + .to_owned(), + engine: smallvec!["Google".to_owned(), "Bing".to_owned()], }, ); diff --git a/src/results/mod.rs b/src/results/mod.rs index 0c13442..9ec3229 100644 --- a/src/results/mod.rs +++ b/src/results/mod.rs @@ -1,3 +1,6 @@ -pub mod aggregation_models; +//! This module provides modules that handle the functionality to aggregate the fetched search +//! results from the upstream search engines and filters it if safe search is set to 3 or 4. Also, +//! provides various models to aggregate search results into a standardized form. + pub mod aggregator; pub mod user_agent; diff --git a/src/results/user_agent.rs b/src/results/user_agent.rs index 13166bf..ab2811b 100644 --- a/src/results/user_agent.rs +++ b/src/results/user_agent.rs @@ -1,28 +1,34 @@ //! This module provides the functionality to generate random user agent string. +use std::sync::OnceLock; + use fake_useragent::{Browsers, UserAgents, UserAgentsBuilder}; -static USER_AGENTS: once_cell::sync::Lazy = once_cell::sync::Lazy::new(|| { - UserAgentsBuilder::new() - .cache(false) - .dir("/tmp") - .thread(1) - .set_browsers( - Browsers::new() - .set_chrome() - .set_safari() - .set_edge() - .set_firefox() - .set_mozilla(), - ) - .build() -}); +/// A static variable which stores the initially build `UserAgents` struct. So as it can be resused +/// again and again without the need of reinitializing the `UserAgents` struct. +static USER_AGENTS: OnceLock = OnceLock::new(); /// A function to generate random user agent to improve privacy of the user. /// /// # Returns /// /// A randomly generated user agent string. -pub fn random_user_agent() -> String { - USER_AGENTS.random().to_string() +pub fn random_user_agent() -> &'static str { + USER_AGENTS + .get_or_init(|| { + UserAgentsBuilder::new() + .cache(false) + .dir("/tmp") + .thread(1) + .set_browsers( + Browsers::new() + .set_chrome() + .set_safari() + .set_edge() + .set_firefox() + .set_mozilla(), + ) + .build() + }) + .random() } diff --git a/src/server/mod.rs b/src/server/mod.rs index 6a664ab..7f4274f 100644 --- a/src/server/mod.rs +++ b/src/server/mod.rs @@ -1 +1,7 @@ +//! This module provides modules that handle the functionality of handling different routes/paths +//! for the `websurfx` search engine website. Also it handles the parsing of search parameters in +//! the search route. Also, caches the next, current and previous search results in the search +//! routes with the help of the redis server. + +pub mod router; pub mod routes; diff --git a/src/server/router.rs b/src/server/router.rs new file mode 100644 index 0000000..69a3ede --- /dev/null +++ b/src/server/router.rs @@ -0,0 +1,64 @@ +//! This module provides the functionality to handle different routes of the `websurfx` +//! meta search engine website and provide appropriate response to each route/page +//! when requested. + +use crate::{ + config::parser::Config, + handler::paths::{file_path, FileType}, +}; +use actix_web::{get, web, HttpRequest, HttpResponse}; +use handlebars::Handlebars; +use std::fs::read_to_string; + +/// Handles the route of index page or main page of the `websurfx` meta search engine website. +#[get("/")] +pub async fn index( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("index", &config.style).unwrap(); + Ok(HttpResponse::Ok().body(page_content)) +} + +/// Handles the route of any other accessed route/page which is not provided by the +/// website essentially the 404 error page. +pub async fn not_found( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("404", &config.style)?; + + Ok(HttpResponse::Ok() + .content_type("text/html; charset=utf-8") + .body(page_content)) +} + +/// Handles the route of robots.txt page of the `websurfx` meta search engine website. +#[get("/robots.txt")] +pub async fn robots_data(_req: HttpRequest) -> Result> { + let page_content: String = + read_to_string(format!("{}/robots.txt", file_path(FileType::Theme)?))?; + Ok(HttpResponse::Ok() + .content_type("text/plain; charset=ascii") + .body(page_content)) +} + +/// Handles the route of about page of the `websurfx` meta search engine website. +#[get("/about")] +pub async fn about( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("about", &config.style)?; + Ok(HttpResponse::Ok().body(page_content)) +} + +/// Handles the route of settings page of the `websurfx` meta search engine website. +#[get("/settings")] +pub async fn settings( + hbs: web::Data>, + config: web::Data, +) -> Result> { + let page_content: String = hbs.render("settings", &config.style)?; + Ok(HttpResponse::Ok().body(page_content)) +} diff --git a/src/server/routes/mod.rs b/src/server/routes/mod.rs new file mode 100644 index 0000000..6bc5750 --- /dev/null +++ b/src/server/routes/mod.rs @@ -0,0 +1,3 @@ +//! This module provides modules to handle various routes in the search engine website. + +pub mod search; diff --git a/src/server/routes.rs b/src/server/routes/search.rs similarity index 54% rename from src/server/routes.rs rename to src/server/routes/search.rs index 8910f8f..254c038 100644 --- a/src/server/routes.rs +++ b/src/server/routes/search.rs @@ -1,33 +1,38 @@ -//! This module provides the functionality to handle different routes of the `websurfx` -//! meta search engine website and provide appropriate response to each route/page -//! when requested. - -use std::fs::read_to_string; +//! This module handles the search route of the search engine website. use crate::{ cache::cacher::RedisCache, config::parser::Config, - engines::engine_models::EngineHandler, handler::paths::{file_path, FileType}, - results::{aggregation_models::SearchResults, aggregator::aggregate}, + models::{aggregation_models::SearchResults, engine_models::EngineHandler}, + results::aggregator::aggregate, }; use actix_web::{get, web, HttpRequest, HttpResponse}; use handlebars::Handlebars; +use regex::Regex; use serde::Deserialize; +use std::{ + fs::{read_to_string, File}, + io::{BufRead, BufReader, Read}, +}; use tokio::join; +// ---- Constants ---- +/// Initialize redis cache connection once and store it on the heap. +static REDIS_CACHE: async_once_cell::OnceCell = async_once_cell::OnceCell::new(); + /// A named struct which deserializes all the user provided search parameters and stores them. -/// -/// # Fields -/// -/// * `q` - It stores the search parameter option `q` (or query in simple words) -/// of the search url. -/// * `page` - It stores the search parameter `page` (or pageno in simple words) -/// of the search url. #[derive(Deserialize)] -struct SearchParams { +pub struct SearchParams { + /// It stores the search parameter option `q` (or query in simple words) + /// of the search url. q: Option, + /// It stores the search parameter `page` (or pageno in simple words) + /// of the search url. page: Option, + /// It stores the search parameter `safesearch` (or safe search level in simple words) of the + /// search url. + safesearch: Option, } /// Handles the route of index page or main page of the `websurfx` meta search engine website. @@ -54,18 +59,15 @@ pub async fn not_found( } /// A named struct which is used to deserialize the cookies fetched from the client side. -/// -/// # Fields -/// -/// * `theme` - It stores the theme name used in the website. -/// * `colorscheme` - It stores the colorscheme name used for the website theme. -/// * `engines` - It stores the user selected upstream search engines selected from the UI. #[allow(dead_code)] #[derive(Deserialize)] -struct Cookie { - theme: String, - colorscheme: String, - engines: Vec, +struct Cookie<'a> { + /// It stores the theme name used in the website. + theme: &'a str, + /// It stores the colorscheme name used for the website theme. + colorscheme: &'a str, + /// It stores the user selected upstream search engines selected from the UI. + engines: Vec<&'a str>, } /// Handles the route of search page of the `websurfx` meta search engine website and it takes @@ -101,42 +103,58 @@ pub async fn search( None => 1, }; + let safe_search: u8 = match config.safe_search { + 3..=4 => config.safe_search, + _ => match ¶ms.safesearch { + Some(safesearch) => match safesearch { + 0..=2 => *safesearch, + _ => 1, + }, + None => config.safe_search, + }, + }; + let (_, results, _) = join!( results( format!( - "http://{}:{}/search?q={}&page={}", + "http://{}:{}/search?q={}&page={}&safesearch={}", config.binding_ip, config.port, query, - page - 1 + page - 1, + safe_search ), &config, - query.to_string(), + query, page - 1, req.clone(), + safe_search ), results( format!( - "http://{}:{}/search?q={}&page={}", - config.binding_ip, config.port, query, page + "http://{}:{}/search?q={}&page={}&safesearch={}", + config.binding_ip, config.port, query, page, safe_search ), &config, - query.to_string(), + query, page, req.clone(), + safe_search ), results( format!( - "http://{}:{}/search?q={}&page={}", + "http://{}:{}/search?q={}&page={}&safesearch={}", config.binding_ip, config.port, query, - page + 1 + page + 1, + safe_search ), &config, - query.to_string(), + query, page + 1, req.clone(), + safe_search ) ); @@ -149,35 +167,72 @@ pub async fn search( } } -/// Fetches the results for a query and page. -/// First checks the redis cache, if that fails it gets proper results +/// Fetches the results for a query and page. It First checks the redis cache, if that +/// fails it gets proper results by requesting from the upstream search engines. +/// +/// # Arguments +/// +/// * `url` - It takes the url of the current page that requested the search results for a +/// particular search query. +/// * `config` - It takes a parsed config struct. +/// * `query` - It takes the page number as u32 value. +/// * `req` - It takes the `HttpRequest` struct as a value. +/// +/// # Error +/// +/// It returns the `SearchResults` struct if the search results could be successfully fetched from +/// the cache or from the upstream search engines otherwise it returns an appropriate error. async fn results( url: String, config: &Config, - query: String, + query: &str, page: u32, req: HttpRequest, + safe_search: u8, ) -> Result> { - //Initialize redis cache connection struct - let mut redis_cache = RedisCache::new(config.redis_url.clone())?; + // Initialize redis cache connection struct + let mut redis_cache: RedisCache = REDIS_CACHE + .get_or_init(async { + // Initialize redis cache connection pool only one and store it in the heap. + RedisCache::new(&config.redis_url, 5).await.unwrap() + }) + .await + .clone(); // fetch the cached results json. - let cached_results_json = redis_cache.cached_json(&url); + let cached_results_json: Result> = + redis_cache.clone().cached_json(&url).await; // check if fetched cache results was indeed fetched or it was an error and if so // handle the data accordingly. match cached_results_json { - Ok(results) => Ok(serde_json::from_str::(&results).unwrap()), + Ok(results) => Ok(serde_json::from_str::(&results)?), Err(_) => { + if safe_search == 4 { + let mut results: SearchResults = SearchResults::default(); + let mut _flag: bool = + is_match_from_filter_list(file_path(FileType::BlockList)?, query)?; + _flag = !is_match_from_filter_list(file_path(FileType::AllowList)?, query)?; + + if _flag { + results.set_disallowed(); + results.add_style(&config.style); + results.set_page_query(query); + redis_cache + .cache_results(&serde_json::to_string(&results)?, &url) + .await?; + return Ok(results); + } + } + // check if the cookie value is empty or not if it is empty then use the // default selected upstream search engines from the config file otherwise // parse the non-empty cookie and grab the user selected engines from the // UI and use that. - let mut results: crate::results::aggregation_models::SearchResults = match req - .cookie("appCookie") - { + let mut results: SearchResults = match req.cookie("appCookie") { Some(cookie_value) => { - let cookie_value: Cookie = serde_json::from_str(cookie_value.name_value().1)?; + let cookie_value: Cookie<'_> = + serde_json::from_str(cookie_value.name_value().1)?; - let engines = cookie_value + let engines: Vec = cookie_value .engines .iter() .filter_map(|name| EngineHandler::new(name)) @@ -188,8 +243,9 @@ async fn results( page, config.aggregator.random_delay, config.debug, - engines, + &engines, config.request_timeout, + safe_search, ) .await? } @@ -199,19 +255,43 @@ async fn results( page, config.aggregator.random_delay, config.debug, - config.upstream_search_engines.clone(), + &config.upstream_search_engines, config.request_timeout, + safe_search, ) .await? } }; - results.add_style(config.style.clone()); - redis_cache.cache_results(serde_json::to_string(&results)?, &url)?; + if results.engine_errors_info().is_empty() && results.results().is_empty() { + results.set_filtered(); + } + results.add_style(&config.style); + redis_cache + .cache_results(&serde_json::to_string(&results)?, &url) + .await?; Ok(results) } } } +/// A helper function which checks whether the search query contains any keywords which should be +/// disallowed/allowed based on the regex based rules present in the blocklist and allowlist files. +fn is_match_from_filter_list( + file_path: &str, + query: &str, +) -> Result> { + let mut flag = false; + let mut reader = BufReader::new(File::open(file_path)?); + for line in reader.by_ref().lines() { + let re = Regex::new(&line?)?; + if re.is_match(query) { + flag = true; + break; + } + } + Ok(flag) +} + /// Handles the route of robots.txt page of the `websurfx` meta search engine website. #[get("/robots.txt")] pub async fn robots_data(_req: HttpRequest) -> Result> { diff --git a/websurfx/config.lua b/websurfx/config.lua index 4f2633c..09b418d 100644 --- a/websurfx/config.lua +++ b/websurfx/config.lua @@ -10,6 +10,21 @@ production_use = false -- whether to use production mode or not (in other words -- if production_use is set to true -- There will be a random delay before sending the request to the search engines, this is to prevent DDoSing the upstream search engines from a large number of simultaneous requests. request_timeout = 30 -- timeout for the search requests sent to the upstream search engines to be fetched (value in seconds). +rate_limiter = { + number_of_requests = 20, -- The number of request that are allowed within a provided time limit. + time_limit = 3, -- The time limit in which the quantity of requests that should be accepted. +} + +-- ### Search ### +-- Filter results based on different levels. The levels provided are: +-- {{ +-- 0 - None +-- 1 - Low +-- 2 - Moderate +-- 3 - High +-- 4 - Aggressive +-- }} +safe_search = 2 -- ### Website ### -- The different colorschemes provided are: @@ -34,4 +49,7 @@ theme = "simple" -- the theme name which should be used for the website redis_url = "redis://127.0.0.1:8082" -- redis connection url address on which the client should connect on. -- ### Search Engines ### -upstream_search_engines = { DuckDuckGo = true, Searx = false } -- select the upstream search engines from which the results should be fetched. +upstream_search_engines = { + DuckDuckGo = true, + Searx = false, +} -- select the upstream search engines from which the results should be fetched.