2019-10-22 07:01:37 +08:00
|
|
|
|
// Copyright (c) ppy Pty Ltd <contact@ppy.sh>. Licensed under the MIT Licence.
|
2019-01-24 16:43:03 +08:00
|
|
|
|
// See the LICENCE file in the repository root for full licence text.
|
2018-04-13 17:19:50 +08:00
|
|
|
|
|
|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Generic;
|
Add backslash escaping to new link format
For users to be able to add square brackets inside of links using
the new format, the regular expression used for parsing those links
contained a balancing group, which can be used for matching pairs
of tokens (in this case, opening and closing brackets, in that order).
However, this means that users could not post links with unmatched
brackets inside of them (ie. ones that contain single brackets, or
a closing bracket and then an opening one). Allow for escaping opening
and closing brackets using the backslash character.
The change substitutes this old fragment of the regex in the display
text group:
[^\[\]]* // any character other than closing/opening bracket
for this one:
(((?<=\\)[\[\]])|[^\[\]])*
The second pattern in the alternative remains the same; the first one
performs the escaping, as follows:
(
(?<=\\) // positive lookbehind expression:
// this match will succeed, if the next expression
// is preceded by a single backslash
[\[\]] // either an opening or closing brace
)
Since the entire display group is matched, unfortunately the lookbehind
expression does not actually strip the backslashes, so they are
manually stripped in handleMatches.
As demonstrated in the unit tests attached, this also allows balanced
brackets to be mixed with escaped ones.
2019-09-04 05:18:39 +08:00
|
|
|
|
using System.Linq;
|
2018-04-13 17:19:50 +08:00
|
|
|
|
using System.Text.RegularExpressions;
|
|
|
|
|
|
|
|
|
|
namespace osu.Game.Online.Chat
|
|
|
|
|
{
|
|
|
|
|
public static class MessageFormatter
|
|
|
|
|
{
|
|
|
|
|
// [[Performance Points]] -> wiki:Performance Points (https://osu.ppy.sh/wiki/Performance_Points)
|
2019-09-04 05:56:07 +08:00
|
|
|
|
private static readonly Regex wiki_regex = new Regex(@"\[\[(?<text>[^\]]+)\]\]");
|
2018-04-13 17:19:50 +08:00
|
|
|
|
|
|
|
|
|
// (test)[https://osu.ppy.sh/b/1234] -> test (https://osu.ppy.sh/b/1234)
|
2019-09-04 06:17:52 +08:00
|
|
|
|
private static readonly Regex old_link_regex = new Regex(@"\((?<text>(((?<=\\)[\(\)])|[^\(\)])*(((?<open>\()(((?<=\\)[\(\)])|[^\(\)])*)+((?<close-open>\))(((?<=\\)[\(\)])|[^\(\)])*)+)*(?(open)(?!)))\)\[(?<url>[a-z]+://[^ ]+)\]");
|
2018-04-13 17:19:50 +08:00
|
|
|
|
|
|
|
|
|
// [https://osu.ppy.sh/b/1234 Beatmap [Hard] (poop)] -> Beatmap [hard] (poop) (https://osu.ppy.sh/b/1234)
|
2019-09-04 05:56:07 +08:00
|
|
|
|
private static readonly Regex new_link_regex = new Regex(@"\[(?<url>[a-z]+://[^ ]+) (?<text>(((?<=\\)[\[\]])|[^\[\]])*(((?<open>\[)(((?<=\\)[\[\]])|[^\[\]])*)+((?<close-open>\])(((?<=\\)[\[\]])|[^\[\]])*)+)*(?(open)(?!)))\]");
|
2018-04-13 17:19:50 +08:00
|
|
|
|
|
|
|
|
|
// [test](https://osu.ppy.sh/b/1234) -> test (https://osu.ppy.sh/b/1234) aka correct markdown format
|
2019-10-22 06:40:58 +08:00
|
|
|
|
private static readonly Regex markdown_link_regex = new Regex(@"\[(?<text>(((?<=\\)[\[\]])|[^\[\]])*(((?<open>\[)(((?<=\\)[\[\]])|[^\[\]])*)+((?<close-open>\])(((?<=\\)[\[\]])|[^\[\]])*)+)*(?(open)(?!)))\]\((?<url>[a-z]+://[^ ]+)(\s+(?<title>""([^""]|(?<=\\)"")*""))?\)");
|
2018-04-13 17:19:50 +08:00
|
|
|
|
|
|
|
|
|
// advanced, RFC-compatible regular expression that matches any possible URL, *but* allows certain invalid characters that are widely used
|
|
|
|
|
// This is in the format (<required>, [optional]):
|
|
|
|
|
// http[s]://<domain>.<tld>[:port][/path][?query][#fragment]
|
|
|
|
|
private static readonly Regex advanced_link_regex = new Regex(
|
2019-02-28 12:31:40 +08:00
|
|
|
|
// protocol
|
|
|
|
|
@"(?<link>[a-z]*?:\/\/" +
|
|
|
|
|
// domain + tld
|
|
|
|
|
@"(?<domain>(?:[a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*[a-z0-9-]*[a-z0-9]" +
|
|
|
|
|
// port (optional)
|
|
|
|
|
@"(?::\d+)?)" +
|
|
|
|
|
// path (optional)
|
|
|
|
|
@"(?<path>(?:(?:\/+(?:[a-z0-9$_\.\+!\*\',;:\(\)@&~=-]|%[0-9a-f]{2})*)*" +
|
|
|
|
|
// query (optional)
|
|
|
|
|
@"(?:\?(?:[a-z0-9$_\+!\*\',;:\(\)@&=\/~-]|%[0-9a-f]{2})*)?)?" +
|
|
|
|
|
// fragment (optional)
|
|
|
|
|
@"(?:#(?:[a-z0-9$_\+!\*\',;:\(\)@&=\/~-]|%[0-9a-f]{2})*)?)?)",
|
|
|
|
|
RegexOptions.IgnoreCase);
|
2018-04-13 17:19:50 +08:00
|
|
|
|
|
|
|
|
|
// 00:00:000 (1,2,3) - test
|
|
|
|
|
private static readonly Regex time_regex = new Regex(@"\d\d:\d\d:\d\d\d? [^-]*");
|
|
|
|
|
|
|
|
|
|
// #osu
|
|
|
|
|
private static readonly Regex channel_regex = new Regex(@"(#[a-zA-Z]+[a-zA-Z0-9]+)");
|
|
|
|
|
|
|
|
|
|
// Unicode emojis
|
|
|
|
|
private static readonly Regex emoji_regex = new Regex(@"(\uD83D[\uDC00-\uDE4F])");
|
|
|
|
|
|
Add backslash escaping to new link format
For users to be able to add square brackets inside of links using
the new format, the regular expression used for parsing those links
contained a balancing group, which can be used for matching pairs
of tokens (in this case, opening and closing brackets, in that order).
However, this means that users could not post links with unmatched
brackets inside of them (ie. ones that contain single brackets, or
a closing bracket and then an opening one). Allow for escaping opening
and closing brackets using the backslash character.
The change substitutes this old fragment of the regex in the display
text group:
[^\[\]]* // any character other than closing/opening bracket
for this one:
(((?<=\\)[\[\]])|[^\[\]])*
The second pattern in the alternative remains the same; the first one
performs the escaping, as follows:
(
(?<=\\) // positive lookbehind expression:
// this match will succeed, if the next expression
// is preceded by a single backslash
[\[\]] // either an opening or closing brace
)
Since the entire display group is matched, unfortunately the lookbehind
expression does not actually strip the backslashes, so they are
manually stripped in handleMatches.
As demonstrated in the unit tests attached, this also allows balanced
brackets to be mixed with escaped ones.
2019-09-04 05:18:39 +08:00
|
|
|
|
private static void handleMatches(Regex regex, string display, string link, MessageFormatterResult result, int startIndex = 0, LinkAction? linkActionOverride = null, char[] escapeChars = null)
|
2018-04-13 17:19:50 +08:00
|
|
|
|
{
|
|
|
|
|
int captureOffset = 0;
|
2019-04-01 11:16:05 +08:00
|
|
|
|
|
2018-04-13 17:19:50 +08:00
|
|
|
|
foreach (Match m in regex.Matches(result.Text, startIndex))
|
|
|
|
|
{
|
|
|
|
|
var index = m.Index - captureOffset;
|
|
|
|
|
|
|
|
|
|
var displayText = string.Format(display,
|
2019-02-28 12:31:40 +08:00
|
|
|
|
m.Groups[0],
|
2019-09-04 05:56:07 +08:00
|
|
|
|
m.Groups["text"].Value,
|
|
|
|
|
m.Groups["url"].Value).Trim();
|
2018-04-13 17:19:50 +08:00
|
|
|
|
|
|
|
|
|
var linkText = string.Format(link,
|
2019-02-28 12:31:40 +08:00
|
|
|
|
m.Groups[0],
|
2019-09-04 05:56:07 +08:00
|
|
|
|
m.Groups["text"].Value,
|
|
|
|
|
m.Groups["url"].Value).Trim();
|
2018-04-13 17:19:50 +08:00
|
|
|
|
|
|
|
|
|
if (displayText.Length == 0 || linkText.Length == 0) continue;
|
|
|
|
|
|
Add backslash escaping to new link format
For users to be able to add square brackets inside of links using
the new format, the regular expression used for parsing those links
contained a balancing group, which can be used for matching pairs
of tokens (in this case, opening and closing brackets, in that order).
However, this means that users could not post links with unmatched
brackets inside of them (ie. ones that contain single brackets, or
a closing bracket and then an opening one). Allow for escaping opening
and closing brackets using the backslash character.
The change substitutes this old fragment of the regex in the display
text group:
[^\[\]]* // any character other than closing/opening bracket
for this one:
(((?<=\\)[\[\]])|[^\[\]])*
The second pattern in the alternative remains the same; the first one
performs the escaping, as follows:
(
(?<=\\) // positive lookbehind expression:
// this match will succeed, if the next expression
// is preceded by a single backslash
[\[\]] // either an opening or closing brace
)
Since the entire display group is matched, unfortunately the lookbehind
expression does not actually strip the backslashes, so they are
manually stripped in handleMatches.
As demonstrated in the unit tests attached, this also allows balanced
brackets to be mixed with escaped ones.
2019-09-04 05:18:39 +08:00
|
|
|
|
// Remove backslash escapes in front of the characters provided in escapeChars
|
|
|
|
|
if (escapeChars != null)
|
|
|
|
|
displayText = escapeChars.Aggregate(displayText, (current, c) => current.Replace($"\\{c}", c.ToString()));
|
|
|
|
|
|
2018-04-13 17:19:50 +08:00
|
|
|
|
// Check for encapsulated links
|
2019-06-11 16:28:16 +08:00
|
|
|
|
if (result.Links.Find(l => (l.Index <= index && l.Index + l.Length >= index + m.Length) || (index <= l.Index && index + m.Length >= l.Index + l.Length)) == null)
|
2018-04-13 17:19:50 +08:00
|
|
|
|
{
|
|
|
|
|
result.Text = result.Text.Remove(index, m.Length).Insert(index, displayText);
|
|
|
|
|
|
|
|
|
|
//since we just changed the line display text, offset any already processed links.
|
|
|
|
|
result.Links.ForEach(l => l.Index -= l.Index > index ? m.Length - displayText.Length : 0);
|
|
|
|
|
|
2019-11-01 10:40:51 +08:00
|
|
|
|
var details = GetLinkDetails(linkText);
|
2018-04-13 17:19:50 +08:00
|
|
|
|
result.Links.Add(new Link(linkText, index, displayText.Length, linkActionOverride ?? details.Action, details.Argument));
|
|
|
|
|
|
|
|
|
|
//adjust the offset for processing the current matches group.
|
|
|
|
|
captureOffset += m.Length - displayText.Length;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static void handleAdvanced(Regex regex, MessageFormatterResult result, int startIndex = 0)
|
|
|
|
|
{
|
|
|
|
|
foreach (Match m in regex.Matches(result.Text, startIndex))
|
|
|
|
|
{
|
|
|
|
|
var index = m.Index;
|
2019-10-22 07:01:37 +08:00
|
|
|
|
var linkText = m.Groups["link"].Value;
|
|
|
|
|
var indexLength = linkText.Length;
|
2018-04-13 17:19:50 +08:00
|
|
|
|
|
2019-11-01 10:40:51 +08:00
|
|
|
|
var details = GetLinkDetails(linkText);
|
2019-10-22 07:01:37 +08:00
|
|
|
|
var link = new Link(linkText, index, indexLength, details.Action, details.Argument);
|
|
|
|
|
|
|
|
|
|
// sometimes an already-processed formatted link can reduce to a simple URL, too
|
|
|
|
|
// (example: [mean example - https://osu.ppy.sh](https://osu.ppy.sh))
|
|
|
|
|
// therefore we need to check if any of the pre-existing links contains the raw one we found
|
2019-10-25 06:20:44 +08:00
|
|
|
|
if (result.Links.All(existingLink => !existingLink.Overlaps(link)))
|
2019-10-22 07:01:37 +08:00
|
|
|
|
result.Links.Add(link);
|
2018-04-13 17:19:50 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-11-01 10:40:51 +08:00
|
|
|
|
public static LinkDetails GetLinkDetails(string url)
|
2018-04-13 17:19:50 +08:00
|
|
|
|
{
|
|
|
|
|
var args = url.Split(new[] { '/' }, StringSplitOptions.RemoveEmptyEntries);
|
|
|
|
|
args[0] = args[0].TrimEnd(':');
|
|
|
|
|
|
|
|
|
|
switch (args[0])
|
|
|
|
|
{
|
|
|
|
|
case "http":
|
|
|
|
|
case "https":
|
|
|
|
|
// length > 3 since all these links need another argument to work
|
|
|
|
|
if (args.Length > 3 && (args[1] == "osu.ppy.sh" || args[1] == "new.ppy.sh"))
|
|
|
|
|
{
|
|
|
|
|
switch (args[2])
|
|
|
|
|
{
|
|
|
|
|
case "b":
|
|
|
|
|
case "beatmaps":
|
|
|
|
|
return new LinkDetails(LinkAction.OpenBeatmap, args[3]);
|
2019-04-01 11:16:05 +08:00
|
|
|
|
|
2018-04-13 17:19:50 +08:00
|
|
|
|
case "s":
|
|
|
|
|
case "beatmapsets":
|
|
|
|
|
case "d":
|
|
|
|
|
return new LinkDetails(LinkAction.OpenBeatmapSet, args[3]);
|
2019-04-01 11:16:05 +08:00
|
|
|
|
|
2018-04-13 17:19:50 +08:00
|
|
|
|
case "u":
|
2019-08-19 03:02:59 +08:00
|
|
|
|
case "users":
|
2018-04-13 17:19:50 +08:00
|
|
|
|
return new LinkDetails(LinkAction.OpenUserProfile, args[3]);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return new LinkDetails(LinkAction.External, null);
|
2019-04-01 11:16:05 +08:00
|
|
|
|
|
2018-04-13 17:19:50 +08:00
|
|
|
|
case "osu":
|
|
|
|
|
// every internal link also needs some kind of argument
|
|
|
|
|
if (args.Length < 3)
|
|
|
|
|
return new LinkDetails(LinkAction.External, null);
|
|
|
|
|
|
|
|
|
|
LinkAction linkType;
|
2019-04-01 11:16:05 +08:00
|
|
|
|
|
2018-04-13 17:19:50 +08:00
|
|
|
|
switch (args[1])
|
|
|
|
|
{
|
|
|
|
|
case "chan":
|
|
|
|
|
linkType = LinkAction.OpenChannel;
|
|
|
|
|
break;
|
2019-04-01 11:16:05 +08:00
|
|
|
|
|
2018-04-13 17:19:50 +08:00
|
|
|
|
case "edit":
|
|
|
|
|
linkType = LinkAction.OpenEditorTimestamp;
|
|
|
|
|
break;
|
2019-04-01 11:16:05 +08:00
|
|
|
|
|
2018-04-13 17:19:50 +08:00
|
|
|
|
case "b":
|
|
|
|
|
linkType = LinkAction.OpenBeatmap;
|
|
|
|
|
break;
|
2019-04-01 11:16:05 +08:00
|
|
|
|
|
2018-04-13 17:19:50 +08:00
|
|
|
|
case "s":
|
|
|
|
|
case "dl":
|
|
|
|
|
linkType = LinkAction.OpenBeatmapSet;
|
|
|
|
|
break;
|
2019-04-01 11:16:05 +08:00
|
|
|
|
|
2018-04-13 17:19:50 +08:00
|
|
|
|
case "spectate":
|
|
|
|
|
linkType = LinkAction.Spectate;
|
|
|
|
|
break;
|
2019-04-01 11:16:05 +08:00
|
|
|
|
|
2018-04-13 17:19:50 +08:00
|
|
|
|
case "u":
|
|
|
|
|
linkType = LinkAction.OpenUserProfile;
|
|
|
|
|
break;
|
2019-04-01 11:16:05 +08:00
|
|
|
|
|
2018-04-13 17:19:50 +08:00
|
|
|
|
default:
|
|
|
|
|
linkType = LinkAction.External;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return new LinkDetails(linkType, args[2]);
|
2019-04-01 11:16:05 +08:00
|
|
|
|
|
2018-04-13 17:19:50 +08:00
|
|
|
|
case "osump":
|
|
|
|
|
return new LinkDetails(LinkAction.JoinMultiplayerMatch, args[1]);
|
2019-04-01 11:16:05 +08:00
|
|
|
|
|
2018-04-13 17:19:50 +08:00
|
|
|
|
default:
|
|
|
|
|
return new LinkDetails(LinkAction.External, null);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static MessageFormatterResult format(string toFormat, int startIndex = 0, int space = 3)
|
|
|
|
|
{
|
|
|
|
|
var result = new MessageFormatterResult(toFormat);
|
|
|
|
|
|
|
|
|
|
// handle the [link display] format
|
2019-09-04 05:56:07 +08:00
|
|
|
|
handleMatches(new_link_regex, "{1}", "{2}", result, startIndex, escapeChars: new[] { '[', ']' });
|
2018-04-13 17:19:50 +08:00
|
|
|
|
|
|
|
|
|
// handle the standard markdown []() format
|
2019-09-04 06:01:26 +08:00
|
|
|
|
handleMatches(markdown_link_regex, "{1}", "{2}", result, startIndex, escapeChars: new[] { '[', ']' });
|
2018-04-13 17:19:50 +08:00
|
|
|
|
|
|
|
|
|
// handle the ()[] link format
|
2019-09-04 06:17:52 +08:00
|
|
|
|
handleMatches(old_link_regex, "{1}", "{2}", result, startIndex, escapeChars: new[] { '(', ')' });
|
2018-04-13 17:19:50 +08:00
|
|
|
|
|
|
|
|
|
// handle wiki links
|
|
|
|
|
handleMatches(wiki_regex, "{1}", "https://osu.ppy.sh/wiki/{1}", result, startIndex);
|
|
|
|
|
|
|
|
|
|
// handle bare links
|
|
|
|
|
handleAdvanced(advanced_link_regex, result, startIndex);
|
|
|
|
|
|
|
|
|
|
// handle editor times
|
|
|
|
|
handleMatches(time_regex, "{0}", "osu://edit/{0}", result, startIndex, LinkAction.OpenEditorTimestamp);
|
|
|
|
|
|
|
|
|
|
// handle channels
|
|
|
|
|
handleMatches(channel_regex, "{0}", "osu://chan/{0}", result, startIndex, LinkAction.OpenChannel);
|
|
|
|
|
|
|
|
|
|
var empty = "";
|
|
|
|
|
while (space-- > 0)
|
|
|
|
|
empty += "\0";
|
|
|
|
|
|
|
|
|
|
handleMatches(emoji_regex, empty, "{0}", result, startIndex);
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static Message FormatMessage(Message inputMessage)
|
|
|
|
|
{
|
|
|
|
|
var result = format(inputMessage.Content);
|
|
|
|
|
|
|
|
|
|
inputMessage.DisplayContent = result.Text;
|
|
|
|
|
|
|
|
|
|
// Sometimes, regex matches are not in order
|
|
|
|
|
result.Links.Sort();
|
|
|
|
|
inputMessage.Links = result.Links;
|
|
|
|
|
return inputMessage;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public static MessageFormatterResult FormatText(string text)
|
|
|
|
|
{
|
|
|
|
|
var result = format(text);
|
|
|
|
|
|
|
|
|
|
result.Links.Sort();
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public class MessageFormatterResult
|
|
|
|
|
{
|
|
|
|
|
public List<Link> Links = new List<Link>();
|
|
|
|
|
public string Text;
|
|
|
|
|
public string OriginalText;
|
|
|
|
|
|
|
|
|
|
public MessageFormatterResult(string text)
|
|
|
|
|
{
|
|
|
|
|
OriginalText = Text = text;
|
|
|
|
|
}
|
|
|
|
|
}
|
2019-11-01 10:40:51 +08:00
|
|
|
|
}
|
2018-04-13 17:19:50 +08:00
|
|
|
|
|
2019-11-01 10:40:51 +08:00
|
|
|
|
public class LinkDetails
|
|
|
|
|
{
|
|
|
|
|
public LinkAction Action;
|
|
|
|
|
public string Argument;
|
2018-04-13 17:19:50 +08:00
|
|
|
|
|
2019-11-01 10:40:51 +08:00
|
|
|
|
public LinkDetails(LinkAction action, string argument)
|
|
|
|
|
{
|
|
|
|
|
Action = action;
|
|
|
|
|
Argument = argument;
|
2018-04-13 17:19:50 +08:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public enum LinkAction
|
|
|
|
|
{
|
|
|
|
|
External,
|
|
|
|
|
OpenBeatmap,
|
|
|
|
|
OpenBeatmapSet,
|
|
|
|
|
OpenChannel,
|
|
|
|
|
OpenEditorTimestamp,
|
|
|
|
|
JoinMultiplayerMatch,
|
|
|
|
|
Spectate,
|
|
|
|
|
OpenUserProfile,
|
2019-11-01 10:40:51 +08:00
|
|
|
|
Custom
|
2018-04-13 17:19:50 +08:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
public class Link : IComparable<Link>
|
|
|
|
|
{
|
|
|
|
|
public string Url;
|
|
|
|
|
public int Index;
|
|
|
|
|
public int Length;
|
|
|
|
|
public LinkAction Action;
|
|
|
|
|
public string Argument;
|
|
|
|
|
|
|
|
|
|
public Link(string url, int startIndex, int length, LinkAction action, string argument)
|
|
|
|
|
{
|
|
|
|
|
Url = url;
|
|
|
|
|
Index = startIndex;
|
|
|
|
|
Length = length;
|
|
|
|
|
Action = action;
|
|
|
|
|
Argument = argument;
|
|
|
|
|
}
|
|
|
|
|
|
2019-10-25 06:20:44 +08:00
|
|
|
|
public bool Overlaps(Link otherLink) => Index < otherLink.Index + otherLink.Length && otherLink.Index < Index + Length;
|
2019-10-22 07:01:37 +08:00
|
|
|
|
|
2018-04-13 17:19:50 +08:00
|
|
|
|
public int CompareTo(Link otherLink) => Index > otherLink.Index ? 1 : -1;
|
|
|
|
|
}
|
|
|
|
|
}
|