1.0.27: New requirement - grapheme. New `utils.smart_split`, authored by t.me/bsolute. Minor improvements

pull/1/head
Hikari 2022-04-10 21:44:20 +00:00
parent fdef2634de
commit eea3d2f3e5
5 changed files with 183 additions and 61 deletions

View File

@ -29,6 +29,7 @@
import sys import sys
import getpass import getpass
import os import os
import subprocess
if ( if (
getpass.getuser() == "root" getpass.getuser() == "root"
@ -46,6 +47,30 @@ if (
if input("> ").lower() != "force_insecure": if input("> ").lower() != "force_insecure":
sys.exit(1) sys.exit(1)
def deps(e):
print(
"Error: you have not installed all dependencies correctly.\n"
f"{str(e)}\n"
"Attempting dependencies installation... Just wait."
)
subprocess.run(
[
sys.executable,
"-m",
"pip",
"install",
"--upgrade",
"-q",
"--disable-pip-version-check",
"--no-warn-script-location",
"-r",
"requirements.txt",
]
)
if sys.version_info < (3, 8, 0): if sys.version_info < (3, 8, 0):
print("Error: you must use at least Python version 3.8.0") # pragma: no cover print("Error: you must use at least Python version 3.8.0") # pragma: no cover
elif __package__ != "hikka": # In case they did python __main__.py elif __package__ != "hikka": # In case they did python __main__.py
@ -53,21 +78,29 @@ elif __package__ != "hikka": # In case they did python __main__.py
"Error: you cannot run this as a script; you must execute as a package" "Error: you cannot run this as a script; you must execute as a package"
) # pragma: no cover ) # pragma: no cover
else: else:
from . import log try:
from . import log
log.init() log.init()
except ModuleNotFoundError as e: # pragma: no cover
deps(e)
try:
from . import log
log.init()
except ModuleNotFoundError as e2:
print(
"Error while installing dependencies. Please, do this manually!\n"
f"{str(e2)}\n"
"pip3 install -r requirements.txt"
)
sys.exit(1)
try: try:
from . import main from . import main
except ModuleNotFoundError as e: # pragma: no cover except ModuleNotFoundError as e: # pragma: no cover
print( deps(e)
"Error: you have not installed all dependencies correctly.\n"
f"{str(e)}\n"
"Attempting dependencies installation... Just wait."
)
os.popen("pip3 install -r requirements.txt").read()
try: try:
from . import main from . import main
except ModuleNotFoundError as e2: except ModuleNotFoundError as e2:

View File

@ -10,6 +10,7 @@ import logging
from typing import Union from typing import Union
from types import FunctionType from types import FunctionType
from .. import security from .. import security
from .._types import Module
import inspect import inspect
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -168,7 +169,7 @@ class Utils(InlineUnit):
return next( return next(
next( next(
lambda: self._db.get(security.__name__, "masks", {}).get( lambda: self._db.get(security.__name__, "masks", {}).get(
f"{getattr(cls_, stack_entry.function).__module__}.{getattr(cls_, stack_entry.function).__name__}", f"{getattr(cls_, stack_entry.function).__module__}.{stack_entry.function}",
getattr( getattr(
getattr(cls_, stack_entry.function), getattr(cls_, stack_entry.function),
"security", "security",
@ -176,7 +177,7 @@ class Utils(InlineUnit):
), ),
) )
for name, cls_ in stack_entry.frame.f_globals.items() for name, cls_ in stack_entry.frame.f_globals.items()
if name.endswith("Mod") and hasattr(cls_, "strings") if name.endswith("Mod") and issubclass(cls_, Module)
) )
for stack_entry in inspect.stack() for stack_entry in inspect.stack()
if hasattr(stack_entry, "function") if hasattr(stack_entry, "function")

View File

@ -47,7 +47,7 @@ from telethon.tl.types import (
MessageEntityMentionName, MessageEntityMentionName,
) )
import copy import grapheme
from telethon.hints import Entity from telethon.hints import Entity
@ -293,8 +293,9 @@ async def answer(
try: try:
list_ = await message.client.loader.inline.list( list_ = await message.client.loader.inline.list(
message=message, message=message,
strings=smart_split(text, 4096), strings=list(smart_split(text, entity, 4096)),
) )
if not message.client.loader.inline.init_complete or not list_: if not message.client.loader.inline.init_complete or not list_:
raise raise
@ -575,54 +576,140 @@ def rand(size: int, /) -> str:
) )
def change_attribute(obj, attribute: str, value: str): def smart_split(text, entities, length=4096, split_on=("\n", " "), min_length=1):
object_ = copy.deepcopy(obj) """
setattr(object_, attribute, value) Split the message into smaller messages.
return object_ A grapheme will never be broken. Entities will be displaced to match the right location. No inputs will be mutated.
The end of each message except the last one is stripped of characters from [split_on]
:param text: the plain text input
:param entities: the entities
:param length: the maximum length of a single message
:param split_on: characters (or strings) which are preferred for a message break
:param min_length: ignore any matches on [split_on] strings before this number of characters into each message
:return:
"""
# Authored by @bsolute
# https://t.me/LonamiWebs/27777
encoded = text.encode("utf-16le")
pending_entities = entities
text_offset = 0
bytes_offset = 0
text_length = len(text)
bytes_length = len(encoded)
while text_offset < text_length:
if bytes_offset + length * 2 >= bytes_length:
yield parser.unparse(
text[text_offset:],
list(sorted(pending_entities, key=lambda x: x.offset)),
)
break
codepoint_count = len(
encoded[bytes_offset : bytes_offset + length * 2].decode(
"utf-16le",
errors="ignore",
)
)
for search in split_on:
search_index = text.rfind(
search,
text_offset + min_length,
text_offset + codepoint_count,
)
if search_index != -1:
break
else:
search_index = text_offset + codepoint_count
split_index = grapheme.safe_split_index(text, search_index)
assert split_index > text_offset
split_offset_utf16 = (
len(text[text_offset:split_index].encode("utf-16le"))
) // 2
exclude = 0
while (
split_index + exclude < text_length
and text[split_index + exclude] in split_on
):
exclude += 1
current_entities = []
entities = pending_entities.copy()
pending_entities = []
for entity in entities:
if (
entity.offset < split_offset_utf16
and entity.offset + entity.length > split_offset_utf16 + exclude
):
# spans boundary
current_entities.append(
_copy_tl(
entity,
length=split_offset_utf16 - entity.offset,
)
)
pending_entities.append(
_copy_tl(
entity,
offset=0,
length=entity.offset
+ entity.length
- split_offset_utf16
- exclude,
)
)
elif entity.offset < split_offset_utf16 < entity.offset + entity.length:
# overlaps boundary
current_entities.append(
_copy_tl(
entity,
length=split_offset_utf16 - entity.offset,
)
)
elif entity.offset < split_offset_utf16:
# wholly left
current_entities.append(entity)
elif (
entity.offset + entity.length
> split_offset_utf16 + exclude
> entity.offset
):
# overlaps right boundary
pending_entities.append(
_copy_tl(
entity,
offset=0,
length=entity.offset
+ entity.length
- split_offset_utf16
- exclude,
)
)
elif entity.offset + entity.length > split_offset_utf16 + exclude:
# wholly right
pending_entities.append(
_copy_tl(
entity,
offset=entity.offset - split_offset_utf16 - exclude,
)
)
else:
assert entity.length <= exclude
# ignore entity in whitespace
current_text = text[text_offset:split_index]
yield parser.unparse(
current_text,
list(sorted(current_entities, key=lambda x: x.offset)),
)
text_offset = split_index + exclude
bytes_offset += len(current_text.encode("utf-16le"))
assert bytes_offset % 2 == 0
def smart_split(text: str, chunk_size: int) -> List[str]: def _copy_tl(o, **kwargs):
text = emoji_pattern.sub(r"", text) d = o.to_dict()
text, entities = parser.parse(text) del d["_"]
result = [] d.update(kwargs)
return o.__class__(**d)
chunk_begin_offset = 0
for chunk in chunks(text, chunk_size):
chunk_end_offset = chunk_begin_offset + chunk_size
# Find all entities which are located in this chunk in particular
this_chunk_entities = [
copy.deepcopy(entity)
for entity in entities
if entity.offset + entity.length > chunk_begin_offset
and entity.offset < chunk_end_offset
]
for entity in this_chunk_entities:
# If entity starts *before* the chunk
if entity.offset < chunk_begin_offset:
if entity.offset + entity.length in range(
chunk_begin_offset,
chunk_end_offset + 1,
):
# Entity ends *inside* of the chunk
entity.length = entity.offset + entity.length - chunk_begin_offset
else:
# Entity ends *outside* of the chunk
entity.length = chunk_size
entity.offset = 0
# If entity starts *inside* of chunk
elif entity.offset in range(chunk_begin_offset, chunk_end_offset + 1):
entity.offset -= chunk_begin_offset
if entity.length > chunk_size - entity.offset:
entity.length = chunk_size - entity.offset
this_chunk_entities.sort(key=lambda x: x.offset)
result += [[chunk, this_chunk_entities]]
chunk_begin_offset += chunk_size
return [parser.unparse(*i) for i in result]
init_ts = time.perf_counter() init_ts = time.perf_counter()

View File

@ -1 +1 @@
__version__ = (1, 0, 26) __version__ = (1, 0, 27)

View File

@ -9,5 +9,6 @@ Jinja2==3.0.3
requests==2.27.1 requests==2.27.1
aiogram==2.19 aiogram==2.19
websockets==10.2 websockets==10.2
grapheme==0.6.0
# Python 3.8+ # Python 3.8+