Merge pull request #64563 from timothyqiu/word-wrap

Fix `String::word_wrap()` for long words
2022-12-16 13:47:05 +07:00 · 2022-12-16 13:47:05 +07:00 · 912fd3f0e1
parent 49a60b1d1c 207e52c161
commit 912fd3f0e1
16 changed files with 204 additions and 105 deletions
--- a/core/string/ustring.cpp
+++ b/core/string/ustring.cpp
@ -220,37 +220,6 @@ void CharString::copy_from(const char *p_cstr) {
 /*  String                                                               */
 /*************************************************************************/

-//kind of poor should be rewritten properly
-String String::word_wrap(int p_chars_per_line) const {
-	int from = 0;
-	int last_space = 0;
-	String ret;
-	for (int i = 0; i < length(); i++) {
-		if (i - from >= p_chars_per_line) {
-			if (last_space == -1) {
-				ret += substr(from, i - from + 1) + "\n";
-			} else {
-				ret += substr(from, last_space - from) + "\n";
-				i = last_space; //rewind
-			}
-			from = i + 1;
-			last_space = -1;
-		} else if (operator[](i) == ' ' || operator[](i) == '\t') {
-			last_space = i;
-		} else if (operator[](i) == '\n') {
-			ret += substr(from, i - from) + "\n";
-			from = i + 1;
-			last_space = -1;
-		}
-	}
-
-	if (from < length()) {
-		ret += substr(from, length());
-	}
-
-	return ret;
-}
-
 Error String::parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path) const {
 	// Splits the URL into scheme, host, port, path. Strip credentials when present.
 	String base = *this;
--- a/core/string/ustring.h
+++ b/core/string/ustring.h
@ -425,7 +425,6 @@ public:
 	String c_escape_multiline() const;
 	String c_unescape() const;
 	String json_escape() const;
-	String word_wrap(int p_chars_per_line) const;
 	Error parse_url(String &r_scheme, String &r_host, int &r_port, String &r_path) const;

 	String property_name_encode() const;
--- a/doc/classes/TextServer.xml
+++ b/doc/classes/TextServer.xml
@ -1548,8 +1548,15 @@
 			<return type="PackedInt32Array" />
 			<param index="0" name="string" type="String" />
 			<param index="1" name="language" type="String" default="&quot;&quot;" />
-			<description>
-				Returns array of the word break character offsets.
+			<param index="2" name="chars_per_line" type="int" default="0" />
+			<description>
+				Returns an array of the word break boundaries. Elements in the returned array are the offsets of the start and end of words. Therefore the length of the array is always even.
+				When [param chars_per_line] is greater than zero, line break boundaries are returned instead.
+				[codeblock]
+				var ts = TextServerManager.get_primary_interface()
+				print(ts.string_get_word_breaks("Godot Engine")) # Prints [0, 5, 6, 12]
+				print(ts.string_get_word_breaks("Godot Engine", "en", 5)) # Prints [0, 5, 6, 11, 11, 12]
+				[/codeblock]
 			</description>
 		</method>
 		<method name="string_to_lower" qualifiers="const">
--- a/doc/classes/TextServerExtension.xml
+++ b/doc/classes/TextServerExtension.xml
@ -1346,6 +1346,7 @@
 			<return type="PackedInt32Array" />
 			<param index="0" name="string" type="String" />
 			<param index="1" name="language" type="String" />
+			<param index="2" name="chars_per_line" type="int" />
 			<description>
 			</description>
 		</method>
--- a/editor/debugger/script_editor_debugger.cpp
+++ b/editor/debugger/script_editor_debugger.cpp
@ -751,7 +751,16 @@ void ScriptEditorDebugger::_set_reason_text(const String &p_reason, MessageType
 			reason->add_theme_color_override("font_color", get_theme_color(SNAME("success_color"), SNAME("Editor")));
 	}
 	reason->set_text(p_reason);
-	reason->set_tooltip_text(p_reason.word_wrap(80));
+
+	const PackedInt32Array boundaries = TS->string_get_word_breaks(p_reason, "", 80);
+	PackedStringArray lines;
+	for (int i = 0; i < boundaries.size(); i += 2) {
+		const int start = boundaries[i];
+		const int end = boundaries[i + 1];
+		lines.append(p_reason.substr(start, end - start + 1));
+	}
+
+	reason->set_tooltip_text(String("\n").join(lines));
 }

 void ScriptEditorDebugger::_notification(int p_what) {
--- a/editor/scene_tree_editor.cpp
+++ b/editor/scene_tree_editor.cpp
@ -132,8 +132,16 @@ void SceneTreeEditor::_cell_button_pressed(Object *p_item, int p_column, int p_i
 		if (config_err.is_empty()) {
 			return;
 		}
-		config_err = config_err.word_wrap(80);
-		warning->set_text(config_err);
+
+		const PackedInt32Array boundaries = TS->string_get_word_breaks(config_err, "", 80);
+		PackedStringArray lines;
+		for (int i = 0; i < boundaries.size(); i += 2) {
+			const int start = boundaries[i];
+			const int end = boundaries[i + 1];
+			lines.append(config_err.substr(start, end - start + 1));
+		}
+
+		warning->set_text(String("\n").join(lines));
 		warning->popup_centered();

 	} else if (p_id == BUTTON_SIGNALS) {
--- a/modules/text_server_adv/text_server_adv.cpp
+++ b/modules/text_server_adv/text_server_adv.cpp
@ -6246,7 +6246,7 @@ String TextServerAdvanced::_string_to_lower(const String &p_string, const String
 	return String::utf16(lower.ptr(), len);
 }

-PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_string, const String &p_language) const {
+PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const {
 	const String lang = (p_language.is_empty()) ? TranslationServer::get_singleton()->get_tool_locale() : p_language;
 	// Convert to UTF-16.
 	Char16String utf16 = p_string.utf16();
@ -6254,15 +6254,7 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str
 	HashSet<int> breaks;
 	UErrorCode err = U_ZERO_ERROR;
 	UBreakIterator *bi = ubrk_open(UBRK_LINE, lang.ascii().get_data(), (const UChar *)utf16.get_data(), utf16.length(), &err);
-	if (U_FAILURE(err)) {
-		// No data loaded - use fallback.
-		for (int i = 0; i < p_string.length(); i++) {
-			char32_t c = p_string[i];
-			if (is_whitespace(c) || is_linebreak(c)) {
-				breaks.insert(i);
-			}
-		}
-	} else {
+	if (U_SUCCESS(err)) {
 		while (ubrk_next(bi) != UBRK_DONE) {
 			int pos = _convert_pos(p_string, utf16, ubrk_current(bi)) - 1;
 			if (pos != p_string.length() - 1) {
@ -6273,24 +6265,80 @@ PackedInt32Array TextServerAdvanced::_string_get_word_breaks(const String &p_str
 	ubrk_close(bi);

 	PackedInt32Array ret;
+
+	int line_start = 0;
+	int line_end = 0; // End of last word on current line.
+	int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word.
+	int word_length = 0;
+
 	for (int i = 0; i < p_string.length(); i++) {
-		char32_t c = p_string[i];
-		if (c == 0xfffc) {
-			continue;
-		}
-		if (u_ispunct(c) && c != 0x005F) {
-			ret.push_back(i);
-			continue;
-		}
-		if (is_underscore(c)) {
-			ret.push_back(i);
-			continue;
-		}
-		if (breaks.has(i)) {
+		const char32_t c = p_string[i];
+
+		if (is_linebreak(c)) {
+			// Force newline.
+			ret.push_back(line_start);
 			ret.push_back(i);
+			line_start = i + 1;
+			line_end = line_start;
+			word_start = line_start;
+			word_length = 0;
+		} else if (c == 0xfffc) {
 			continue;
+		} else if ((u_ispunct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) {
+			// A whitespace ends current word.
+			if (word_length > 0) {
+				line_end = i - 1;
+				word_start = -1;
+				word_length = 0;
+			}
+		} else if (breaks.has(i)) {
+			// End current word, no space.
+			if (word_length > 0) {
+				line_end = i;
+				word_start = i + 1;
+				word_length = 0;
+			}
+			if (p_chars_per_line <= 0) {
+				ret.push_back(line_start);
+				ret.push_back(line_end + 1);
+				line_start = word_start;
+				line_end = line_start;
+			}
+		} else {
+			if (word_start == -1) {
+				word_start = i;
+				if (p_chars_per_line <= 0) {
+					ret.push_back(line_start);
+					ret.push_back(line_end + 1);
+					line_start = word_start;
+					line_end = line_start;
+				}
+			}
+			word_length += 1;
+
+			if (p_chars_per_line > 0) {
+				if (word_length > p_chars_per_line) {
+					// Word too long: wrap before current character.
+					ret.push_back(line_start);
+					ret.push_back(i);
+					line_start = i;
+					line_end = i;
+					word_start = i;
+					word_length = 1;
+				} else if (i - line_start + 1 > p_chars_per_line) {
+					// Line too long: wrap after the last word.
+					ret.push_back(line_start);
+					ret.push_back(line_end + 1);
+					line_start = word_start;
+					line_end = line_start;
+				}
+			}
 		}
 	}
+	if (line_start < p_string.length()) {
+		ret.push_back(line_start);
+		ret.push_back(p_string.length());
+	}
 	return ret;
 }

--- a/modules/text_server_adv/text_server_adv.h
+++ b/modules/text_server_adv/text_server_adv.h
@ -915,7 +915,7 @@ public:
 	MODBIND2RC(String, parse_number, const String &, const String &);
 	MODBIND1RC(String, percent_sign, const String &);

-	MODBIND2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &);
+	MODBIND3RC(PackedInt32Array, string_get_word_breaks, const String &, const String &, int);

 	MODBIND2RC(int64_t, is_confusable, const String &, const PackedStringArray &);
 	MODBIND1RC(bool, spoof_check, const String &);
--- a/modules/text_server_fb/text_server_fb.cpp
+++ b/modules/text_server_fb/text_server_fb.cpp
@ -4099,26 +4099,69 @@ String TextServerFallback::_string_to_lower(const String &p_string, const String
 	return lower;
 }

-PackedInt32Array TextServerFallback::_string_get_word_breaks(const String &p_string, const String &p_language) const {
+PackedInt32Array TextServerFallback::_string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const {
 	PackedInt32Array ret;
+
+	int line_start = 0;
+	int line_end = 0; // End of last word on current line.
+	int word_start = 0; // -1 if no word encountered. Leading spaces are part of a word.
+	int word_length = 0;
+
 	for (int i = 0; i < p_string.length(); i++) {
-		char32_t c = p_string[i];
-		if (c == 0xfffc) {
-			continue;
-		}
-		if (is_punct(c) && c != 0x005F) {
-			ret.push_back(i);
-			continue;
-		}
-		if (is_underscore(c)) {
-			ret.push_back(i);
-			continue;
-		}
-		if (is_whitespace(c) || is_linebreak(c)) {
+		const char32_t c = p_string[i];
+
+		if (is_linebreak(c)) {
+			// Force newline.
+			ret.push_back(line_start);
 			ret.push_back(i);
+			line_start = i + 1;
+			line_end = line_start;
+			word_start = line_start;
+			word_length = 0;
+		} else if (c == 0xfffc) {
 			continue;
+		} else if ((is_punct(c) && c != 0x005F) || is_underscore(c) || c == '\t' || is_whitespace(c)) {
+			// A whitespace ends current word.
+			if (word_length > 0) {
+				line_end = i - 1;
+				word_start = -1;
+				word_length = 0;
+			}
+		} else {
+			if (word_start == -1) {
+				word_start = i;
+				if (p_chars_per_line <= 0) {
+					ret.push_back(line_start);
+					ret.push_back(line_end + 1);
+					line_start = word_start;
+					line_end = line_start;
+				}
+			}
+			word_length += 1;
+
+			if (p_chars_per_line > 0) {
+				if (word_length > p_chars_per_line) {
+					// Word too long: wrap before current character.
+					ret.push_back(line_start);
+					ret.push_back(i);
+					line_start = i;
+					line_end = i;
+					word_start = i;
+					word_length = 1;
+				} else if (i - line_start + 1 > p_chars_per_line) {
+					// Line too long: wrap after the last word.
+					ret.push_back(line_start);
+					ret.push_back(line_end + 1);
+					line_start = word_start;
+					line_end = line_start;
+				}
+			}
 		}
 	}
+	if (line_start < p_string.length()) {
+		ret.push_back(line_start);
+		ret.push_back(p_string.length());
+	}
 	return ret;
 }

--- a/modules/text_server_fb/text_server_fb.h
+++ b/modules/text_server_fb/text_server_fb.h
@ -786,7 +786,7 @@ public:
 	MODBIND1RC(double, shaped_text_get_underline_position, const RID &);
 	MODBIND1RC(double, shaped_text_get_underline_thickness, const RID &);

-	MODBIND2RC(PackedInt32Array, string_get_word_breaks, const String &, const String &);
+	MODBIND3RC(PackedInt32Array, string_get_word_breaks, const String &, const String &, int);

 	MODBIND2RC(String, string_to_upper, const String &, const String &);
 	MODBIND2RC(String, string_to_lower, const String &, const String &);
--- a/platform/linuxbsd/tts_linux.cpp
+++ b/platform/linuxbsd/tts_linux.cpp
@ -117,13 +117,12 @@ void TTS_Linux::speech_event_callback(size_t p_msg_id, size_t p_client_id, SPDNo
 				free_spd_voices(voices);
 			}
 			PackedInt32Array breaks = TS->string_get_word_breaks(message.text, language);
-			int prev = 0;
-			for (int i = 0; i < breaks.size(); i++) {
-				text += message.text.substr(prev, breaks[i] - prev);
-				text += "<mark name=\"" + String::num_int64(breaks[i], 10) + "\"/>";
-				prev = breaks[i];
+			for (int i = 0; i < breaks.size(); i += 2) {
+				const int start = breaks[i];
+				const int end = breaks[i + 1];
+				text += message.text.substr(start, end - start + 1);
+				text += "<mark name=\"" + String::num_int64(end, 10) + "\"/>";
 			}
-			text += message.text.substr(prev, -1);

 			spd_set_synthesis_voice(tts->synth, message.voice.utf8().get_data());
 			spd_set_volume(tts->synth, message.volume * 2 - 100);
--- a/servers/text/text_server_extension.cpp
+++ b/servers/text/text_server_extension.cpp
@ -308,7 +308,7 @@ void TextServerExtension::_bind_methods() {
 	GDVIRTUAL_BIND(_strip_diacritics, "string");
 	GDVIRTUAL_BIND(_is_valid_identifier, "string");

-	GDVIRTUAL_BIND(_string_get_word_breaks, "string", "language");
+	GDVIRTUAL_BIND(_string_get_word_breaks, "string", "language", "chars_per_line");

 	GDVIRTUAL_BIND(_is_confusable, "string", "dict");
 	GDVIRTUAL_BIND(_spoof_check, "string");
@ -1379,9 +1379,9 @@ TypedArray<Vector2i> TextServerExtension::parse_structured_text(StructuredTextPa
 	return ret;
 }

-PackedInt32Array TextServerExtension::string_get_word_breaks(const String &p_string, const String &p_language) const {
+PackedInt32Array TextServerExtension::string_get_word_breaks(const String &p_string, const String &p_language, int p_chars_per_line) const {
 	PackedInt32Array ret;
-	GDVIRTUAL_CALL(_string_get_word_breaks, p_string, p_language, ret);
+	GDVIRTUAL_CALL(_string_get_word_breaks, p_string, p_language, p_chars_per_line, ret);
 	return ret;
 }

--- a/servers/text/text_server_extension.h
+++ b/servers/text/text_server_extension.h
@ -510,8 +510,8 @@ public:
 	virtual String strip_diacritics(const String &p_string) const override;
 	GDVIRTUAL1RC(String, _strip_diacritics, const String &);

-	virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const override;
-	GDVIRTUAL2RC(PackedInt32Array, _string_get_word_breaks, const String &, const String &);
+	virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "", int p_chars_per_line = 0) const override;
+	GDVIRTUAL3RC(PackedInt32Array, _string_get_word_breaks, const String &, const String &, int);

 	virtual bool is_valid_identifier(const String &p_string) const override;
 	GDVIRTUAL1RC(bool, _is_valid_identifier, const String &);
--- a/servers/text_server.cpp
+++ b/servers/text_server.cpp
@ -454,7 +454,7 @@ void TextServer::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("parse_number", "number", "language"), &TextServer::parse_number, DEFVAL(""));
 	ClassDB::bind_method(D_METHOD("percent_sign", "language"), &TextServer::percent_sign, DEFVAL(""));

-	ClassDB::bind_method(D_METHOD("string_get_word_breaks", "string", "language"), &TextServer::string_get_word_breaks, DEFVAL(""));
+	ClassDB::bind_method(D_METHOD("string_get_word_breaks", "string", "language", "chars_per_line"), &TextServer::string_get_word_breaks, DEFVAL(""), DEFVAL(0));

 	ClassDB::bind_method(D_METHOD("is_confusable", "string", "dict"), &TextServer::is_confusable);
 	ClassDB::bind_method(D_METHOD("spoof_check", "string"), &TextServer::spoof_check);
--- a/servers/text_server.h
+++ b/servers/text_server.h
@ -493,7 +493,7 @@ public:
 	virtual String percent_sign(const String &p_language = "") const = 0;

 	// String functions.
-	virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "") const = 0;
+	virtual PackedInt32Array string_get_word_breaks(const String &p_string, const String &p_language = "", int p_chars_per_line = 0) const = 0;

 	virtual int64_t is_confusable(const String &p_string, const PackedStringArray &p_dict) const { return -1; };
 	virtual bool spoof_check(const String &p_string) const { return false; };
--- a/tests/servers/test_text_server.h
+++ b/tests/servers/test_text_server.h
@ -593,12 +593,18 @@ TEST_SUITE("[TextServer]") {
 					String text1 = U"linguistically similar and effectively form";
 					//                           14^     22^ 26^         38^
 					PackedInt32Array breaks = ts->string_get_word_breaks(text1, "en");
-					CHECK(breaks.size() == 4);
-					if (breaks.size() == 4) {
-						CHECK(breaks[0] == 14);
-						CHECK(breaks[1] == 22);
-						CHECK(breaks[2] == 26);
-						CHECK(breaks[3] == 38);
+					CHECK(breaks.size() == 10);
+					if (breaks.size() == 10) {
+						CHECK(breaks[0] == 0);
+						CHECK(breaks[1] == 14);
+						CHECK(breaks[2] == 15);
+						CHECK(breaks[3] == 22);
+						CHECK(breaks[4] == 23);
+						CHECK(breaks[5] == 26);
+						CHECK(breaks[6] == 27);
+						CHECK(breaks[7] == 38);
+						CHECK(breaks[8] == 39);
+						CHECK(breaks[9] == 43);
 					}
 				}

@ -608,16 +614,26 @@ TEST_SUITE("[TextServer]") {
 					//                 3^   7^    13^ 16^  20^   25^ 29^ 32^

 					PackedInt32Array breaks = ts->string_get_word_breaks(text2, "th");
-					CHECK(breaks.size() == 8);
-					if (breaks.size() == 8) {
-						CHECK(breaks[0] == 3);
-						CHECK(breaks[1] == 7);
-						CHECK(breaks[2] == 13);
-						CHECK(breaks[3] == 16);
-						CHECK(breaks[4] == 20);
-						CHECK(breaks[5] == 25);
-						CHECK(breaks[6] == 29);
-						CHECK(breaks[7] == 32);
+					CHECK(breaks.size() == 18);
+					if (breaks.size() == 18) {
+						CHECK(breaks[0] == 0);
+						CHECK(breaks[1] == 4);
+						CHECK(breaks[2] == 4);
+						CHECK(breaks[3] == 8);
+						CHECK(breaks[4] == 8);
+						CHECK(breaks[5] == 14);
+						CHECK(breaks[6] == 14);
+						CHECK(breaks[7] == 17);
+						CHECK(breaks[8] == 17);
+						CHECK(breaks[9] == 21);
+						CHECK(breaks[10] == 21);
+						CHECK(breaks[11] == 26);
+						CHECK(breaks[12] == 26);
+						CHECK(breaks[13] == 30);
+						CHECK(breaks[14] == 30);
+						CHECK(breaks[15] == 33);
+						CHECK(breaks[16] == 33);
+						CHECK(breaks[17] == 42);
 					}
 				}
 			}