summaryrefslogtreecommitdiffstats
path: root/.config/mpv/scripts/uosc_shared/lib/text.lua
blob: 7bb456f8c9d0c2e0a8246c8d7dd18c1c4fd770d7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
-- https://en.wikipedia.org/wiki/Unicode_block
---@alias CodePointRange {[1]: integer; [2]: integer}

---@type CodePointRange[]
local zero_width_blocks = {
	{0x0000, 0x001F}, -- C0
	{0x007F, 0x009F}, -- Delete + C1
	{0x034F, 0x034F}, -- combining grapheme joiner
	{0x061C, 0x061C}, -- Arabic Letter	Strong
	{0x200B, 0x200F}, -- {zero-width space, zero-width non-joiner, zero-width joiner, left-to-right mark, right-to-left mark}
	{0x2028, 0x202E}, -- {line separator, paragraph separator, Left-to-Right Embedding, Right-to-Left Embedding, Pop Directional Format, Left-to-Right Override, Right-to-Left Override}
	{0x2060, 0x2060}, -- word joiner
	{0x2066, 0x2069}, -- {Left-to-Right Isolate, Right-to-Left Isolate, First Strong Isolate, Pop Directional Isolate}
	{0xFEFF, 0xFEFF}, -- zero-width non-breaking space
	-- Some other characters can also be combined https://en.wikipedia.org/wiki/Combining_character
	{0x0300, 0x036F}, -- Combining Diacritical Marks	 0 BMP	Inherited
	{0x1AB0, 0x1AFF}, -- Combining Diacritical Marks Extended	 0 BMP	Inherited
	{0x1DC0, 0x1DFF}, -- Combining Diacritical Marks Supplement	 0 BMP	Inherited
	{0x20D0, 0x20FF}, -- Combining Diacritical Marks for Symbols	 0 BMP	Inherited
	{0xFE20, 0xFE2F}, -- Combining Half Marks	 0 BMP	Cyrillic (2 characters), Inherited (14 characters)
	-- Egyptian Hieroglyph Format Controls and Shorthand format Controls
	{0x13430, 0x1345F}, -- Egyptian Hieroglyph Format Controls	 1 SMP	Egyptian Hieroglyphs
	{0x1BCA0, 0x1BCAF}, -- Shorthand Format Controls	 1 SMP	Common
	-- not sure how to deal with those https://en.wikipedia.org/wiki/Spacing_Modifier_Letters
	{0x02B0, 0x02FF}, -- Spacing Modifier Letters	 0 BMP	Bopomofo (2 characters), Latin (14 characters), Common (64 characters)
}

-- All characters have the same width as the first one
---@type CodePointRange[]
local same_width_blocks = {
	{0x3400, 0x4DBF}, -- CJK Unified Ideographs Extension A	 0 BMP	Han
	{0x4E00, 0x9FFF}, -- CJK Unified Ideographs	 0 BMP	Han
	{0x20000, 0x2A6DF}, -- CJK Unified Ideographs Extension B	 2 SIP	Han
	{0x2A700, 0x2B73F}, -- CJK Unified Ideographs Extension C	 2 SIP	Han
	{0x2B740, 0x2B81F}, -- CJK Unified Ideographs Extension D	 2 SIP	Han
	{0x2B820, 0x2CEAF}, -- CJK Unified Ideographs Extension E	 2 SIP	Han
	{0x2CEB0, 0x2EBEF}, -- CJK Unified Ideographs Extension F	 2 SIP	Han
	{0x2F800, 0x2FA1F}, -- CJK Compatibility Ideographs Supplement	 2 SIP	Han
	{0x30000, 0x3134F}, -- CJK Unified Ideographs Extension G	 3 TIP	Han
	{0x31350, 0x323AF}, -- CJK Unified Ideographs Extension H	 3 TIP	Han
}

---Get byte count of utf-8 character at index i in str
---@param str string
---@param i integer?
---@return integer
local function utf8_char_bytes(str, i)
	local char_byte = str:byte(i)
	if char_byte < 0xC0 then return 1
	elseif char_byte < 0xE0 then return 2
	elseif char_byte < 0xF0 then return 3
	elseif char_byte < 0xF8 then return 4
	else return 1 end
end

---Creates an iterator for an utf-8 encoded string
---Iterates over utf-8 characters instead of bytes
---@param str string
---@return fun(): integer?, string?
local function utf8_iter(str)
	local byte_start = 1
	return function()
		local start = byte_start
		if #str < start then return nil end
		local byte_count = utf8_char_bytes(str, start)
		byte_start = start + byte_count
		return start, str:sub(start, start + byte_count - 1)
	end
end

---Extract Unicode code point from utf-8 character at index i in str
---@param str string
---@param i integer
---@return integer
local function utf8_to_unicode(str, i)
	local byte_count = utf8_char_bytes(str, i)
	local char_byte = str:byte(i)
	local unicode = char_byte
	if byte_count ~= 1 then
		local shift = 2 ^ (8 - byte_count)
		char_byte = char_byte - math.floor(0xFF / shift) * shift
		unicode = char_byte * (2 ^ 6) ^ (byte_count - 1)
	end
	for j = 2, byte_count do
		char_byte = str:byte(i + j - 1) - 0x80
		unicode = unicode + char_byte * (2 ^ 6) ^ (byte_count - j)
	end
	return round(unicode)
end

---Convert Unicode code point to utf-8 string
---@param unicode integer
---@return string?
local function unicode_to_utf8(unicode)
	if unicode < 0x80 then return string.char(unicode)
	else
		local byte_count
		if unicode < 0x800 then byte_count = 2
		elseif unicode < 0x10000 then byte_count = 3
		elseif unicode < 0x110000 then byte_count = 4
		else return end -- too big

		local res = {}
		local shift = 2 ^ 6
		local after_shift = unicode
		for _ = byte_count, 2, -1 do
			local before_shift = after_shift
			after_shift = math.floor(before_shift / shift)
			table.insert(res, 1, before_shift - after_shift * shift + 0x80)
		end
		shift = 2 ^ (8 - byte_count)
		table.insert(res, 1, after_shift + math.floor(0xFF / shift) * shift)
		---@diagnostic disable-next-line: deprecated
		return string.char(unpack(res))
	end
end

---Update osd resolution if valid
---@param width integer
---@param height integer
local function update_osd_resolution(width, height)
	if width > 0 and height > 0 then osd_width, osd_height = width, height end
end

local text_osd = mp.create_osd_overlay("ass-events")
text_osd.compute_bounds, text_osd.hidden = true, true
---@type integer, integer
local osd_width, osd_height = 100, 100
mp.observe_property('osd-dimensions', 'native', function (_, dim)
	if dim then update_osd_resolution(dim.w, dim.h) end
end)

---@param ass_text string
---@return integer, integer, integer, integer
local function measure_bounds(ass_text)
	update_osd_resolution(mp.get_osd_size())
	text_osd.res_x, text_osd.res_y = osd_width, osd_height
	text_osd.data = ass_text
	local res = text_osd:update()
	return res.x0, res.y0, res.x1, res.y1
end

---@type {wrap: integer; bold: boolean; italic: boolean, rotate: number; size: number}
local bounds_opts = {wrap = 2, bold = false, italic = false, rotate = 0, size = 0}

---Measure text width and normalize to a font size of 1
---text has to be ass safe
---@param text string
---@param size number
---@param bold boolean
---@param italic boolean
---@param horizontal boolean
---@return number, integer
local function normalized_text_width(text, size, bold, italic, horizontal)
	bounds_opts.bold, bounds_opts.italic, bounds_opts.rotate = bold, italic, horizontal and 0 or -90
	local x1, y1 = nil, nil
	size = size / 0.8
	-- prevent endless loop
	local repetitions_left = 5
	repeat
		size = size * 0.8
		bounds_opts.size = size
		local ass = assdraw.ass_new()
		ass:txt(0, 0, horizontal and 7 or 1, text, bounds_opts)
		_, _, x1, y1 = measure_bounds(ass.text)
		repetitions_left = repetitions_left - 1
		-- make sure nothing got clipped
	until (x1 and x1 < osd_width and y1 < osd_height) or repetitions_left == 0
	local width = (repetitions_left == 0 and not x1) and 0 or (horizontal and x1 or y1)
	return width / size, horizontal and osd_width or osd_height
end

---Estimates character length based on utf8 byte count
---1 character length is roughly the size of a latin character
---@param char string
---@return number
local function char_length(char)
	return #char > 2 and 2 or 1
end

---Estimates string length based on utf8 byte count
---Note: Making a string in the iterator with the character is a waste here,
---but as this function is only used when measuring whole string widths it's fine
---@param text string
---@return number
local function text_length(text)
	if not text or text == '' then return 0 end
	local text_length = 0
	for _, char in utf8_iter(tostring(text)) do text_length = text_length + char_length(char) end
	return text_length
end

local width_length_ratio = 0.5
---@type {[boolean]: {[string]: {[1]: number, [2]: integer}}}
local char_width_cache = {}

---Finds the best orientation of text on screen and returns the estimated max size
---and if the text should be drawn horizontally
---@param text string
---@return number, boolean
local function fit_on_screen(text)
	local estimated_width = text_length(text) * width_length_ratio
	if osd_width >= osd_height then
		-- Fill the screen as much as we can, bigger is more accurate.
		return math.min(osd_width / estimated_width, osd_height), true
	else
		return math.min(osd_height / estimated_width, osd_width), false
	end
end

---Gets next stage from cache
---@param cache {[any]: table}
---@param value any
local function get_cache_stage(cache, value)
	local stage = cache[value]
	if not stage then
		stage = {}
		cache[value] = stage
	end
	return stage
end

---Is measured resolution sufficient
---@param px integer
---@return boolean
local function no_remeasure_required(px)
	return px >= 800 or (px * 1.1 >= osd_width and px * 1.1 >= osd_height)
end

---Get measured width of character
---@param char string
---@param bold boolean
---@return number, integer
local function character_width(char, bold)
	---@type {[string]: {[1]: number, [2]: integer}}
	local char_widths = get_cache_stage(char_width_cache, bold)
	local width_px = char_widths[char]
	if width_px and no_remeasure_required(width_px[2]) then return width_px[1], width_px[2] end

	local unicode = utf8_to_unicode(char, 1)
	for _, block in ipairs(zero_width_blocks) do
		if unicode >= block[1] and unicode <= block[2] then
			char_widths[char] = {0, infinity}
			return 0, infinity
		end
	end

	local measured_char = nil
	for _, block in ipairs(same_width_blocks) do
		if unicode >= block[1] and unicode <= block[2] then
			measured_char = unicode_to_utf8(block[1])
			width_px = char_widths[measured_char]
			if width_px and no_remeasure_required(width_px[2]) then
				char_widths[char] = width_px
				return width_px[1], width_px[2]
			end
			break
		end
	end

	if not measured_char then measured_char = char end
	-- half as many repetitions for wide characters
	local char_count = 10 / char_length(char)
	local max_size, horizontal = fit_on_screen(measured_char:rep(char_count))
	local size = math.min(max_size * 0.9, 50)
	char_count = math.min(math.floor(char_count * max_size / size * 0.8), 100)
	local enclosing_char, enclosing_width, next_char_count = '|', 0, char_count
	if measured_char == enclosing_char then enclosing_char = ''
	else enclosing_width = 2 * character_width(enclosing_char, bold) end
	local width_ratio, width, px = nil, nil, nil
	repeat
		char_count = next_char_count
		local str = enclosing_char .. measured_char:rep(char_count) .. enclosing_char
		width, px = normalized_text_width(str, size, bold, false, horizontal)
		width = width - enclosing_width
		width_ratio = width * size / (horizontal and osd_width or osd_height)
		next_char_count = math.min(math.floor(char_count / width_ratio * 0.9), 100)
	until width_ratio < 0.05 or width_ratio > 0.5 or char_count == next_char_count
	width = width / char_count

	width_px = {width, px}
	if char ~= measured_char then char_widths[measured_char] = width_px end
	char_widths[char] = width_px
	return width, px
end

---Calculate text width from individual measured characters
---@param text string|number
---@param bold boolean
---@return number, integer
local function character_based_width(text, bold)
	local max_width = 0
	local min_px = infinity
	for line in tostring(text):gmatch("([^\n]*)\n?") do
		local total_width = 0
		for _, char in utf8_iter(line) do
			local width, px = character_width(char, bold)
			total_width = total_width + width
			if px < min_px then min_px = px end
		end
		if total_width > max_width then max_width = total_width end
	end
	return max_width, min_px
end

---Measure width of whole text
---@param text string|number
---@param bold boolean
---@param italic boolean
---@return number, integer
local function whole_text_width(text, bold, italic)
	text = tostring(text)
	local size, horizontal = fit_on_screen(text)
	return normalized_text_width(ass_escape(text), size * 0.9, bold, italic, horizontal)
end

---Get scale factor calculated from font size, bold and italic
---@param opts {size: number; bold?: boolean; italic?: boolean}
local function opts_scale_factor(opts)
	return (opts.italic and 1.01 or 1) * opts.size
end

---@type {[boolean]: {[boolean]: {[string|number]: {[1]: number, [2]: integer}}}} | {[boolean]: {[string|number]: {[1]: number, [2]: integer}}}
local width_cache = {}

---Calculate width of text with the given opts
---@param text string|number
---@return number
---@param opts {size: number; bold?: boolean; italic?: boolean}
function text_width(text, opts)
	if not text or text == '' then return 0 end

	---@type boolean, boolean
	local bold, italic = opts.bold or options.font_bold, opts.italic or false

	if options.text_width_estimation then
		---@type {[string|number]: {[1]: number, [2]: integer}}
		local text_width = get_cache_stage(width_cache, bold)
		local width_px = text_width[text]
		if width_px and no_remeasure_required(width_px[2]) then return width_px[1] * opts_scale_factor(opts) end

		local width, px = character_based_width(text, bold)
		width_cache[bold][text] = {width, px}
		return width * opts_scale_factor(opts)
	else
		---@type {[string|number]: {[1]: number, [2]: integer}}
		local text_width = get_cache_stage(get_cache_stage(width_cache, bold), italic)
		local width_px = text_width[text]
		if width_px and no_remeasure_required(width_px[2]) then return width_px[1] * opts.size end

		local width, px = whole_text_width(text, bold, italic)
		width_cache[bold][italic][text] = {width, px}
		return width * opts.size
	end
end

---Wrap the text at the closest opportunity to target_line_length
---@param text string
---@param opts {size: number; bold?: boolean; italic?: boolean}
---@param target_line_length number
---@return string
function wrap_text(text, opts, target_line_length)
	local target_line_width = target_line_length * width_length_ratio * opts.size
	local bold, scale_factor = opts.bold or false, opts_scale_factor(opts)
	local wrap_at_chars = {' ', ' ', '-', '–'}
	local remove_when_wrap = {' ', ' '}
	local lines = {}
	for text_line in text:gmatch("([^\n]*)\n?") do
		local line_width = 0
		local line_start = 1
		local before_end = nil
		local before_width = 0
		local before_line_start = 0
		local before_removed_width = 0
		for char_start, char in utf8_iter(text_line) do
			local char_end = char_start + #char - 1
			local can_wrap = false
			for _, c in ipairs(wrap_at_chars) do
				if char == c then
					can_wrap = true
					break
				end
			end
			local char_width = character_width(char, bold) * scale_factor
			line_width = line_width + char_width
			if can_wrap or (char_end == #text_line) then
				local remove = false
				for _, c in ipairs(remove_when_wrap) do
					if char == c then
						remove = true
						break
					end
				end
				local line_width_after_remove = line_width - (remove and char_width or 0)
				if line_width_after_remove < target_line_width then
					before_end = remove and char_start - 1 or char_end
					before_width = line_width_after_remove
					before_line_start = char_end + 1
					before_removed_width = remove and char_width or 0
				else
					if (target_line_width - before_width) <
						(line_width_after_remove - target_line_width) then
						lines[#lines + 1] = text_line:sub(line_start, before_end)
						line_start = before_line_start
						line_width = line_width - before_width - before_removed_width
					else
						lines[#lines + 1] = text_line:sub(line_start, remove and char_start - 1 or char_end)
						line_start = char_end + 1
						line_width = remove and line_width - char_width or line_width
						line_width = 0
					end
					before_end = line_start
					before_width = 0
				end
			end
		end
		if #text_line >= line_start then lines[#lines + 1] = text_line:sub(line_start)
		elseif text_line == '' then lines[#lines + 1] = '' end
	end
	return table.concat(lines, '\n')
end