generate include and match rules separatedly

this can be useful for copy-pasting these rules into the script settings in ViolentMonkey
2025-01-30 14:47:27 +05:00 · 2024-01-29 18:32:19 +01:00 · 2024-01-29 18:32:19 +01:00 · 66391fb7c0
commit 66391fb7c0
parent 74d6bc83b2
2 changed files with 34 additions and 37 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,4 @@
 untouched_Bypass_All_Shortlinks.user.js
 includes.txt
+match_rules.txt
+include_rules.txt
--- a/2_generate_includes.py
+++ b/2_generate_includes.py
@ -4,10 +4,9 @@ def extract_regex_from_js(js_code):
    pattern1 = r'(?<!//)BypassedByBloggerPemula\((.*?),'
    matches1 = re.findall(pattern1, js_code)
    matches1 = [match.strip('/') for match in matches1]
-    
+
    pattern2 = r"(?<!//)BloggerPemula\('([^']+)',"
    matches2 = re.findall(pattern2, js_code)
-    #matches2 = ['/' + s + '/' for s in matches2]

    pattern3 = r"(?<!//)RemoveBp\('([^']+)',"
    matches3 = re.findall(pattern3, js_code)
@ -20,49 +19,45 @@ def extract_regex_from_js(js_code):

    return matches1+matches2+matches3+matches4+matches5

-def regex_to_include_line(regex):
-    #regex = regex.strip("/")
-
-    #Use @include for more complex regex
-    if any(char in regex for char in ['|', '(', ')', '*']):
-        regex = '(' + regex + ')'
-        include_line = "// @include /^(https?:\/\/)(.+)?" + regex + "(\/.*)/"
-        include_line = include_line.replace( "\.*)(\/.*)/", "\.*)/" ) #clean excess in the regex
-
-    #Use @match for simpler regex
-    else:
-        include_line = '// @match *://*.' + regex + '/*'
-
-    return include_line
-
-def generate_include_lines(regex_list):
-    include_lines = []
-    for regex in regex_list:
-        include_line = regex_to_include_line(regex)
-        include_lines.append(include_line)
-
-    return include_lines
-
-def write_to_file(filename, lines):
+def write_list_of_strings_to_file(filename, lines):
    with open(filename, 'w', encoding='utf-8') as file:
        for line in lines:
            file.write(line + '\n')
    print(f"OK: Generated {filename}")

-def compile_and_print(regex_strings):
-    #for regex_string in regex_strings: print(regex_string)
-    write_to_file('supported_sites.txt', regex_strings)
+def generate_include_lines(regex_list):
+    include_rules = []
+    match_rules = []
+    include_and_match_lines = []

-    include_lines = generate_include_lines(regex_strings)
-    print(f"OK: Generated {len(include_lines)} include lines.")
-    
-    #for line in include_lines: print(line)
-    write_to_file('includes.txt', include_lines)
+    for regex in regex_list:
+
+        #Use @include for more complex regex
+        if any(char in regex for char in ['|', '(', ')', '*']):
+            regex = '(' + regex + ')'
+            include_rule = "/^(https?:\/\/)(.+)?" + regex + "(\/.*)/"
+            include_rule = include_rule.replace( "\.*)(\/.*)/", "\.*)/" ) #clean excess in the regex
+            include_rules.append(include_rule)
+            include_line = "// @include " + include_rule
+            include_and_match_lines.append(include_line)
+
+        #Use @match for simpler regex
+        else:
+            match_rule = '*://*.' + regex + '/*'
+            match_rules.append(match_rule)
+            match_line =  '// @match ' + match_rule
+            include_and_match_lines.append(match_line)
+
+    #Output results to txt files
+    write_list_of_strings_to_file('supported_sites.txt', regex_list)
+    write_list_of_strings_to_file('match_rules.txt', match_rules)
+    write_list_of_strings_to_file('include_rules.txt', include_rules)
+    write_list_of_strings_to_file('includes.txt', include_and_match_lines)


 def main():
    file_path = 'untouched_Bypass_All_Shortlinks.user.js'
-    
+
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            js_code = file.read()
@ -71,7 +66,7 @@ def main():

            # remove short domains (errors)
            regex_strings = [s for s in regex_strings if "." in s and len(s) >= 5]
-            
+
            # remove domains with blocked words
            blocked_words_for_includes = [
                "google",
@ -79,7 +74,7 @@ def main():
            ]
            regex_strings = [s for s in regex_strings if not any(word in s for word in blocked_words_for_includes)]

-            compile_and_print(regex_strings)
+            generate_include_lines(regex_strings)
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
    except Exception as e: