1 files changed, 421 insertions, 0 deletions
diff --git a/.config/mpv/scripts/uosc_shared/lib/text.lua b/.config/mpv/scripts/uosc_shared/lib/text.lua
new file mode 100644
index 0000000..7bb456f
--- /dev/null
+++ b/.config/mpv/scripts/uosc_shared/lib/text.lua
@@ -0,0 +1,421 @@
+-- https://en.wikipedia.org/wiki/Unicode_block
+---@alias CodePointRange {[1]: integer; [2]: integer}
+
+---@type CodePointRange[]
+local zero_width_blocks = {
+	{0x0000, 0x001F}, -- C0
+	{0x007F, 0x009F}, -- Delete + C1
+	{0x034F, 0x034F}, -- combining grapheme joiner
+	{0x061C, 0x061C}, -- Arabic Letter	Strong
+	{0x200B, 0x200F}, -- {zero-width space, zero-width non-joiner, zero-width joiner, left-to-right mark, right-to-left mark}
+	{0x2028, 0x202E}, -- {line separator, paragraph separator, Left-to-Right Embedding, Right-to-Left Embedding, Pop Directional Format, Left-to-Right Override, Right-to-Left Override}
+	{0x2060, 0x2060}, -- word joiner
+	{0x2066, 0x2069}, -- {Left-to-Right Isolate, Right-to-Left Isolate, First Strong Isolate, Pop Directional Isolate}
+	{0xFEFF, 0xFEFF}, -- zero-width non-breaking space
+	-- Some other characters can also be combined https://en.wikipedia.org/wiki/Combining_character
+	{0x0300, 0x036F}, -- Combining Diacritical Marks	 0 BMP	Inherited
+	{0x1AB0, 0x1AFF}, -- Combining Diacritical Marks Extended	 0 BMP	Inherited
+	{0x1DC0, 0x1DFF}, -- Combining Diacritical Marks Supplement	 0 BMP	Inherited
+	{0x20D0, 0x20FF}, -- Combining Diacritical Marks for Symbols	 0 BMP	Inherited
+	{0xFE20, 0xFE2F}, -- Combining Half Marks	 0 BMP	Cyrillic (2 characters), Inherited (14 characters)
+	-- Egyptian Hieroglyph Format Controls and Shorthand format Controls
+	{0x13430, 0x1345F}, -- Egyptian Hieroglyph Format Controls	 1 SMP	Egyptian Hieroglyphs
+	{0x1BCA0, 0x1BCAF}, -- Shorthand Format Controls	 1 SMP	Common
+	-- not sure how to deal with those https://en.wikipedia.org/wiki/Spacing_Modifier_Letters
+	{0x02B0, 0x02FF}, -- Spacing Modifier Letters	 0 BMP	Bopomofo (2 characters), Latin (14 characters), Common (64 characters)
+}
+
+-- All characters have the same width as the first one
+---@type CodePointRange[]
+local same_width_blocks = {
+	{0x3400, 0x4DBF}, -- CJK Unified Ideographs Extension A	 0 BMP	Han
+	{0x4E00, 0x9FFF}, -- CJK Unified Ideographs	 0 BMP	Han
+	{0x20000, 0x2A6DF}, -- CJK Unified Ideographs Extension B	 2 SIP	Han
+	{0x2A700, 0x2B73F}, -- CJK Unified Ideographs Extension C	 2 SIP	Han
+	{0x2B740, 0x2B81F}, -- CJK Unified Ideographs Extension D	 2 SIP	Han
+	{0x2B820, 0x2CEAF}, -- CJK Unified Ideographs Extension E	 2 SIP	Han
+	{0x2CEB0, 0x2EBEF}, -- CJK Unified Ideographs Extension F	 2 SIP	Han
+	{0x2F800, 0x2FA1F}, -- CJK Compatibility Ideographs Supplement	 2 SIP	Han
+	{0x30000, 0x3134F}, -- CJK Unified Ideographs Extension G	 3 TIP	Han
+	{0x31350, 0x323AF}, -- CJK Unified Ideographs Extension H	 3 TIP	Han
+}
+
+---Get byte count of utf-8 character at index i in str
+---@param str string
+---@param i integer?
+---@return integer
+local function utf8_char_bytes(str, i)
+	local char_byte = str:byte(i)
+	if char_byte < 0xC0 then return 1
+	elseif char_byte < 0xE0 then return 2
+	elseif char_byte < 0xF0 then return 3
+	elseif char_byte < 0xF8 then return 4
+	else return 1 end
+end
+
+---Creates an iterator for an utf-8 encoded string
+---Iterates over utf-8 characters instead of bytes
+---@param str string
+---@return fun(): integer?, string?
+local function utf8_iter(str)
+	local byte_start = 1
+	return function()
+		local start = byte_start
+		if #str < start then return nil end
+		local byte_count = utf8_char_bytes(str, start)
+		byte_start = start + byte_count
+		return start, str:sub(start, start + byte_count - 1)
+	end
+end
+
+---Extract Unicode code point from utf-8 character at index i in str
+---@param str string
+---@param i integer
+---@return integer
+local function utf8_to_unicode(str, i)
+	local byte_count = utf8_char_bytes(str, i)
+	local char_byte = str:byte(i)
+	local unicode = char_byte
+	if byte_count ~= 1 then
+		local shift = 2 ^ (8 - byte_count)
+		char_byte = char_byte - math.floor(0xFF / shift) * shift
+		unicode = char_byte * (2 ^ 6) ^ (byte_count - 1)
+	end
+	for j = 2, byte_count do
+		char_byte = str:byte(i + j - 1) - 0x80
+		unicode = unicode + char_byte * (2 ^ 6) ^ (byte_count - j)
+	end
+	return round(unicode)
+end
+
+---Convert Unicode code point to utf-8 string
+---@param unicode integer
+---@return string?
+local function unicode_to_utf8(unicode)
+	if unicode < 0x80 then return string.char(unicode)
+	else
+		local byte_count
+		if unicode < 0x800 then byte_count = 2
+		elseif unicode < 0x10000 then byte_count = 3
+		elseif unicode < 0x110000 then byte_count = 4
+		else return end -- too big
+
+		local res = {}
+		local shift = 2 ^ 6
+		local after_shift = unicode
+		for _ = byte_count, 2, -1 do
+			local before_shift = after_shift
+			after_shift = math.floor(before_shift / shift)
+			table.insert(res, 1, before_shift - after_shift * shift + 0x80)
+		end
+		shift = 2 ^ (8 - byte_count)
+		table.insert(res, 1, after_shift + math.floor(0xFF / shift) * shift)
+		---@diagnostic disable-next-line: deprecated
+		return string.char(unpack(res))
+	end
+end
+
+---Update osd resolution if valid
+---@param width integer
+---@param height integer
+local function update_osd_resolution(width, height)
+	if width > 0 and height > 0 then osd_width, osd_height = width, height end
+end
+
+local text_osd = mp.create_osd_overlay("ass-events")
+text_osd.compute_bounds, text_osd.hidden = true, true
+---@type integer, integer
+local osd_width, osd_height = 100, 100
+mp.observe_property('osd-dimensions', 'native', function (_, dim)
+	if dim then update_osd_resolution(dim.w, dim.h) end
+end)
+
+---@param ass_text string
+---@return integer, integer, integer, integer
+local function measure_bounds(ass_text)
+	update_osd_resolution(mp.get_osd_size())
+	text_osd.res_x, text_osd.res_y = osd_width, osd_height
+	text_osd.data = ass_text
+	local res = text_osd:update()
+	return res.x0, res.y0, res.x1, res.y1
+end
+
+---@type {wrap: integer; bold: boolean; italic: boolean, rotate: number; size: number}
+local bounds_opts = {wrap = 2, bold = false, italic = false, rotate = 0, size = 0}
+
+---Measure text width and normalize to a font size of 1
+---text has to be ass safe
+---@param text string
+---@param size number
+---@param bold boolean
+---@param italic boolean
+---@param horizontal boolean
+---@return number, integer
+local function normalized_text_width(text, size, bold, italic, horizontal)
+	bounds_opts.bold, bounds_opts.italic, bounds_opts.rotate = bold, italic, horizontal and 0 or -90
+	local x1, y1 = nil, nil
+	size = size / 0.8
+	-- prevent endless loop
+	local repetitions_left = 5
+	repeat
+		size = size * 0.8
+		bounds_opts.size = size
+		local ass = assdraw.ass_new()
+		ass:txt(0, 0, horizontal and 7 or 1, text, bounds_opts)
+		_, _, x1, y1 = measure_bounds(ass.text)
+		repetitions_left = repetitions_left - 1
+		-- make sure nothing got clipped
+	until (x1 and x1 < osd_width and y1 < osd_height) or repetitions_left == 0
+	local width = (repetitions_left == 0 and not x1) and 0 or (horizontal and x1 or y1)
+	return width / size, horizontal and osd_width or osd_height
+end
+
+---Estimates character length based on utf8 byte count
+---1 character length is roughly the size of a latin character
+---@param char string
+---@return number
+local function char_length(char)
+	return #char > 2 and 2 or 1
+end
+
+---Estimates string length based on utf8 byte count
+---Note: Making a string in the iterator with the character is a waste here,
+---but as this function is only used when measuring whole string widths it's fine
+---@param text string
+---@return number
+local function text_length(text)
+	if not text or text == '' then return 0 end
+	local text_length = 0
+	for _, char in utf8_iter(tostring(text)) do text_length = text_length + char_length(char) end
+	return text_length
+end
+
+local width_length_ratio = 0.5
+---@type {[boolean]: {[string]: {[1]: number, [2]: integer}}}
+local char_width_cache = {}
+
+---Finds the best orientation of text on screen and returns the estimated max size
+---and if the text should be drawn horizontally
+---@param text string
+---@return number, boolean
+local function fit_on_screen(text)
+	local estimated_width = text_length(text) * width_length_ratio
+	if osd_width >= osd_height then
+		-- Fill the screen as much as we can, bigger is more accurate.
+		return math.min(osd_width / estimated_width, osd_height), true
+	else
+		return math.min(osd_height / estimated_width, osd_width), false
+	end
+end
+
+---Gets next stage from cache
+---@param cache {[any]: table}
+---@param value any
+local function get_cache_stage(cache, value)
+	local stage = cache[value]
+	if not stage then
+		stage = {}
+		cache[value] = stage
+	end
+	return stage
+end
+
+---Is measured resolution sufficient
+---@param px integer
+---@return boolean
+local function no_remeasure_required(px)
+	return px >= 800 or (px * 1.1 >= osd_width and px * 1.1 >= osd_height)
+end
+
+---Get measured width of character
+---@param char string
+---@param bold boolean
+---@return number, integer
+local function character_width(char, bold)
+	---@type {[string]: {[1]: number, [2]: integer}}
+	local char_widths = get_cache_stage(char_width_cache, bold)
+	local width_px = char_widths[char]
+	if width_px and no_remeasure_required(width_px[2]) then return width_px[1], width_px[2] end
+
+	local unicode = utf8_to_unicode(char, 1)
+	for _, block in ipairs(zero_width_blocks) do
+		if unicode >= block[1] and unicode <= block[2] then
+			char_widths[char] = {0, infinity}
+			return 0, infinity
+		end
+	end
+
+	local measured_char = nil
+	for _, block in ipairs(same_width_blocks) do
+		if unicode >= block[1] and unicode <= block[2] then
+			measured_char = unicode_to_utf8(block[1])
+			width_px = char_widths[measured_char]
+			if width_px and no_remeasure_required(width_px[2]) then
+				char_widths[char] = width_px
+				return width_px[1], width_px[2]
+			end
+			break
+		end
+	end
+
+	if not measured_char then measured_char = char end
+	-- half as many repetitions for wide characters
+	local char_count = 10 / char_length(char)
+	local max_size, horizontal = fit_on_screen(measured_char:rep(char_count))
+	local size = math.min(max_size * 0.9, 50)
+	char_count = math.min(math.floor(char_count * max_size / size * 0.8), 100)
+	local enclosing_char, enclosing_width, next_char_count = '|', 0, char_count
+	if measured_char == enclosing_char then enclosing_char = ''
+	else enclosing_width = 2 * character_width(enclosing_char, bold) end
+	local width_ratio, width, px = nil, nil, nil
+	repeat
+		char_count = next_char_count
+		local str = enclosing_char .. measured_char:rep(char_count) .. enclosing_char
+		width, px = normalized_text_width(str, size, bold, false, horizontal)
+		width = width - enclosing_width
+		width_ratio = width * size / (horizontal and osd_width or osd_height)
+		next_char_count = math.min(math.floor(char_count / width_ratio * 0.9), 100)
+	until width_ratio < 0.05 or width_ratio > 0.5 or char_count == next_char_count
+	width = width / char_count
+
+	width_px = {width, px}
+	if char ~= measured_char then char_widths[measured_char] = width_px end
+	char_widths[char] = width_px
+	return width, px
+end
+
+---Calculate text width from individual measured characters
+---@param text string|number
+---@param bold boolean
+---@return number, integer
+local function character_based_width(text, bold)
+	local max_width = 0
+	local min_px = infinity
+	for line in tostring(text):gmatch("([^\n]*)\n?") do
+		local total_width = 0
+		for _, char in utf8_iter(line) do
+			local width, px = character_width(char, bold)
+			total_width = total_width + width
+			if px < min_px then min_px = px end
+		end
+		if total_width > max_width then max_width = total_width end
+	end
+	return max_width, min_px
+end
+
+---Measure width of whole text
+---@param text string|number
+---@param bold boolean
+---@param italic boolean
+---@return number, integer
+local function whole_text_width(text, bold, italic)
+	text = tostring(text)
+	local size, horizontal = fit_on_screen(text)
+	return normalized_text_width(ass_escape(text), size * 0.9, bold, italic, horizontal)
+end
+
+---Get scale factor calculated from font size, bold and italic
+---@param opts {size: number; bold?: boolean; italic?: boolean}
+local function opts_scale_factor(opts)
+	return (opts.italic and 1.01 or 1) * opts.size
+end
+
+---@type {[boolean]: {[boolean]: {[string|number]: {[1]: number, [2]: integer}}}} | {[boolean]: {[string|number]: {[1]: number, [2]: integer}}}
+local width_cache = {}
+
+---Calculate width of text with the given opts
+---@param text string|number
+---@return number
+---@param opts {size: number; bold?: boolean; italic?: boolean}
+function text_width(text, opts)
+	if not text or text == '' then return 0 end
+
+	---@type boolean, boolean
+	local bold, italic = opts.bold or options.font_bold, opts.italic or false
+
+	if options.text_width_estimation then
+		---@type {[string|number]: {[1]: number, [2]: integer}}
+		local text_width = get_cache_stage(width_cache, bold)
+		local width_px = text_width[text]
+		if width_px and no_remeasure_required(width_px[2]) then return width_px[1] * opts_scale_factor(opts) end
+
+		local width, px = character_based_width(text, bold)
+		width_cache[bold][text] = {width, px}
+		return width * opts_scale_factor(opts)
+	else
+		---@type {[string|number]: {[1]: number, [2]: integer}}
+		local text_width = get_cache_stage(get_cache_stage(width_cache, bold), italic)
+		local width_px = text_width[text]
+		if width_px and no_remeasure_required(width_px[2]) then return width_px[1] * opts.size end
+
+		local width, px = whole_text_width(text, bold, italic)
+		width_cache[bold][italic][text] = {width, px}
+		return width * opts.size
+	end
+end
+
+---Wrap the text at the closest opportunity to target_line_length
+---@param text string
+---@param opts {size: number; bold?: boolean; italic?: boolean}
+---@param target_line_length number
+---@return string
+function wrap_text(text, opts, target_line_length)
+	local target_line_width = target_line_length * width_length_ratio * opts.size
+	local bold, scale_factor = opts.bold or false, opts_scale_factor(opts)
+	local wrap_at_chars = {' ', '　', '-', '–'}
+	local remove_when_wrap = {' ', '　'}
+	local lines = {}
+	for text_line in text:gmatch("([^\n]*)\n?") do
+		local line_width = 0
+		local line_start = 1
+		local before_end = nil
+		local before_width = 0
+		local before_line_start = 0
+		local before_removed_width = 0
+		for char_start, char in utf8_iter(text_line) do
+			local char_end = char_start + #char - 1
+			local can_wrap = false
+			for _, c in ipairs(wrap_at_chars) do
+				if char == c then
+					can_wrap = true
+					break
+				end
+			end
+			local char_width = character_width(char, bold) * scale_factor
+			line_width = line_width + char_width
+			if can_wrap or (char_end == #text_line) then
+				local remove = false
+				for _, c in ipairs(remove_when_wrap) do
+					if char == c then
+						remove = true
+						break
+					end
+				end
+				local line_width_after_remove = line_width - (remove and char_width or 0)
+				if line_width_after_remove < target_line_width then
+					before_end = remove and char_start - 1 or char_end
+					before_width = line_width_after_remove
+					before_line_start = char_end + 1
+					before_removed_width = remove and char_width or 0
+				else
+					if (target_line_width - before_width) <
+						(line_width_after_remove - target_line_width) then
+						lines[#lines + 1] = text_line:sub(line_start, before_end)
+						line_start = before_line_start
+						line_width = line_width - before_width - before_removed_width
+					else
+						lines[#lines + 1] = text_line:sub(line_start, remove and char_start - 1 or char_end)
+						line_start = char_end + 1
+						line_width = remove and line_width - char_width or line_width
+						line_width = 0
+					end
+					before_end = line_start
+					before_width = 0
+				end
+			end
+		end
+		if #text_line >= line_start then lines[#lines + 1] = text_line:sub(line_start)
+		elseif text_line == '' then lines[#lines + 1] = '' end
+	end
+	return table.concat(lines, '\n')
+end