From eea3d2f3e55c784816881a31541ed53794f6b8af Mon Sep 17 00:00:00 2001 From: Hikari Date: Sun, 10 Apr 2022 21:44:20 +0000 Subject: [PATCH] 1.0.27: New requirement - grapheme. New `utils.smart_split`, authored by t.me/bsolute. Minor improvements --- hikka/__main__.py | 53 +++++++++--- hikka/inline/utils.py | 5 +- hikka/utils.py | 183 +++++++++++++++++++++++++++++++----------- hikka/version.py | 2 +- requirements.txt | 1 + 5 files changed, 183 insertions(+), 61 deletions(-) diff --git a/hikka/__main__.py b/hikka/__main__.py index 0e8aa04..68c845f 100755 --- a/hikka/__main__.py +++ b/hikka/__main__.py @@ -29,6 +29,7 @@ import sys import getpass import os +import subprocess if ( getpass.getuser() == "root" @@ -46,6 +47,30 @@ if ( if input("> ").lower() != "force_insecure": sys.exit(1) + +def deps(e): + print( + "Error: you have not installed all dependencies correctly.\n" + f"{str(e)}\n" + "Attempting dependencies installation... Just wait." + ) + + subprocess.run( + [ + sys.executable, + "-m", + "pip", + "install", + "--upgrade", + "-q", + "--disable-pip-version-check", + "--no-warn-script-location", + "-r", + "requirements.txt", + ] + ) + + if sys.version_info < (3, 8, 0): print("Error: you must use at least Python version 3.8.0") # pragma: no cover elif __package__ != "hikka": # In case they did python __main__.py @@ -53,21 +78,29 @@ elif __package__ != "hikka": # In case they did python __main__.py "Error: you cannot run this as a script; you must execute as a package" ) # pragma: no cover else: - from . import log + try: + from . import log - log.init() + log.init() + except ModuleNotFoundError as e: # pragma: no cover + deps(e) + try: + from . import log + + log.init() + except ModuleNotFoundError as e2: + print( + "Error while installing dependencies. Please, do this manually!\n" + f"{str(e2)}\n" + "pip3 install -r requirements.txt" + ) + + sys.exit(1) try: from . import main except ModuleNotFoundError as e: # pragma: no cover - print( - "Error: you have not installed all dependencies correctly.\n" - f"{str(e)}\n" - "Attempting dependencies installation... Just wait." - ) - - os.popen("pip3 install -r requirements.txt").read() - + deps(e) try: from . import main except ModuleNotFoundError as e2: diff --git a/hikka/inline/utils.py b/hikka/inline/utils.py index 72f7999..968aae1 100644 --- a/hikka/inline/utils.py +++ b/hikka/inline/utils.py @@ -10,6 +10,7 @@ import logging from typing import Union from types import FunctionType from .. import security +from .._types import Module import inspect logger = logging.getLogger(__name__) @@ -168,7 +169,7 @@ class Utils(InlineUnit): return next( next( lambda: self._db.get(security.__name__, "masks", {}).get( - f"{getattr(cls_, stack_entry.function).__module__}.{getattr(cls_, stack_entry.function).__name__}", + f"{getattr(cls_, stack_entry.function).__module__}.{stack_entry.function}", getattr( getattr(cls_, stack_entry.function), "security", @@ -176,7 +177,7 @@ class Utils(InlineUnit): ), ) for name, cls_ in stack_entry.frame.f_globals.items() - if name.endswith("Mod") and hasattr(cls_, "strings") + if name.endswith("Mod") and issubclass(cls_, Module) ) for stack_entry in inspect.stack() if hasattr(stack_entry, "function") diff --git a/hikka/utils.py b/hikka/utils.py index 4a4d12a..4f424ec 100755 --- a/hikka/utils.py +++ b/hikka/utils.py @@ -47,7 +47,7 @@ from telethon.tl.types import ( MessageEntityMentionName, ) -import copy +import grapheme from telethon.hints import Entity @@ -293,8 +293,9 @@ async def answer( try: list_ = await message.client.loader.inline.list( message=message, - strings=smart_split(text, 4096), + strings=list(smart_split(text, entity, 4096)), ) + if not message.client.loader.inline.init_complete or not list_: raise @@ -575,54 +576,140 @@ def rand(size: int, /) -> str: ) -def change_attribute(obj, attribute: str, value: str): - object_ = copy.deepcopy(obj) - setattr(object_, attribute, value) - return object_ +def smart_split(text, entities, length=4096, split_on=("\n", " "), min_length=1): + """ + Split the message into smaller messages. + A grapheme will never be broken. Entities will be displaced to match the right location. No inputs will be mutated. + The end of each message except the last one is stripped of characters from [split_on] + :param text: the plain text input + :param entities: the entities + :param length: the maximum length of a single message + :param split_on: characters (or strings) which are preferred for a message break + :param min_length: ignore any matches on [split_on] strings before this number of characters into each message + :return: + """ + + # Authored by @bsolute + # https://t.me/LonamiWebs/27777 + + encoded = text.encode("utf-16le") + pending_entities = entities + text_offset = 0 + bytes_offset = 0 + text_length = len(text) + bytes_length = len(encoded) + while text_offset < text_length: + if bytes_offset + length * 2 >= bytes_length: + yield parser.unparse( + text[text_offset:], + list(sorted(pending_entities, key=lambda x: x.offset)), + ) + break + codepoint_count = len( + encoded[bytes_offset : bytes_offset + length * 2].decode( + "utf-16le", + errors="ignore", + ) + ) + for search in split_on: + search_index = text.rfind( + search, + text_offset + min_length, + text_offset + codepoint_count, + ) + if search_index != -1: + break + else: + search_index = text_offset + codepoint_count + split_index = grapheme.safe_split_index(text, search_index) + assert split_index > text_offset + split_offset_utf16 = ( + len(text[text_offset:split_index].encode("utf-16le")) + ) // 2 + exclude = 0 + while ( + split_index + exclude < text_length + and text[split_index + exclude] in split_on + ): + exclude += 1 + current_entities = [] + entities = pending_entities.copy() + pending_entities = [] + for entity in entities: + if ( + entity.offset < split_offset_utf16 + and entity.offset + entity.length > split_offset_utf16 + exclude + ): + # spans boundary + current_entities.append( + _copy_tl( + entity, + length=split_offset_utf16 - entity.offset, + ) + ) + pending_entities.append( + _copy_tl( + entity, + offset=0, + length=entity.offset + + entity.length + - split_offset_utf16 + - exclude, + ) + ) + elif entity.offset < split_offset_utf16 < entity.offset + entity.length: + # overlaps boundary + current_entities.append( + _copy_tl( + entity, + length=split_offset_utf16 - entity.offset, + ) + ) + elif entity.offset < split_offset_utf16: + # wholly left + current_entities.append(entity) + elif ( + entity.offset + entity.length + > split_offset_utf16 + exclude + > entity.offset + ): + # overlaps right boundary + pending_entities.append( + _copy_tl( + entity, + offset=0, + length=entity.offset + + entity.length + - split_offset_utf16 + - exclude, + ) + ) + elif entity.offset + entity.length > split_offset_utf16 + exclude: + # wholly right + pending_entities.append( + _copy_tl( + entity, + offset=entity.offset - split_offset_utf16 - exclude, + ) + ) + else: + assert entity.length <= exclude + # ignore entity in whitespace + current_text = text[text_offset:split_index] + yield parser.unparse( + current_text, + list(sorted(current_entities, key=lambda x: x.offset)), + ) + text_offset = split_index + exclude + bytes_offset += len(current_text.encode("utf-16le")) + assert bytes_offset % 2 == 0 -def smart_split(text: str, chunk_size: int) -> List[str]: - text = emoji_pattern.sub(r"", text) - text, entities = parser.parse(text) - result = [] - - chunk_begin_offset = 0 - - for chunk in chunks(text, chunk_size): - chunk_end_offset = chunk_begin_offset + chunk_size - # Find all entities which are located in this chunk in particular - this_chunk_entities = [ - copy.deepcopy(entity) - for entity in entities - if entity.offset + entity.length > chunk_begin_offset - and entity.offset < chunk_end_offset - ] - - for entity in this_chunk_entities: - # If entity starts *before* the chunk - if entity.offset < chunk_begin_offset: - if entity.offset + entity.length in range( - chunk_begin_offset, - chunk_end_offset + 1, - ): - # Entity ends *inside* of the chunk - entity.length = entity.offset + entity.length - chunk_begin_offset - else: - # Entity ends *outside* of the chunk - entity.length = chunk_size - entity.offset = 0 - # If entity starts *inside* of chunk - elif entity.offset in range(chunk_begin_offset, chunk_end_offset + 1): - entity.offset -= chunk_begin_offset - if entity.length > chunk_size - entity.offset: - entity.length = chunk_size - entity.offset - - this_chunk_entities.sort(key=lambda x: x.offset) - - result += [[chunk, this_chunk_entities]] - chunk_begin_offset += chunk_size - - return [parser.unparse(*i) for i in result] +def _copy_tl(o, **kwargs): + d = o.to_dict() + del d["_"] + d.update(kwargs) + return o.__class__(**d) init_ts = time.perf_counter() diff --git a/hikka/version.py b/hikka/version.py index 14b5546..c17c37c 100644 --- a/hikka/version.py +++ b/hikka/version.py @@ -1 +1 @@ -__version__ = (1, 0, 26) +__version__ = (1, 0, 27) diff --git a/requirements.txt b/requirements.txt index 5e08fc8..68cef3e 100755 --- a/requirements.txt +++ b/requirements.txt @@ -9,5 +9,6 @@ Jinja2==3.0.3 requests==2.27.1 aiogram==2.19 websockets==10.2 +grapheme==0.6.0 # Python 3.8+