2019-10-22 07:01:37 +08:00
// Copyright (c) ppy Pty Ltd <contact@ppy.sh>. Licensed under the MIT Licence.
2019-01-24 16:43:03 +08:00
// See the LICENCE file in the repository root for full licence text.
2018-04-13 17:19:50 +08:00
using System ;
using System.Collections.Generic ;
Add backslash escaping to new link format
For users to be able to add square brackets inside of links using
the new format, the regular expression used for parsing those links
contained a balancing group, which can be used for matching pairs
of tokens (in this case, opening and closing brackets, in that order).
However, this means that users could not post links with unmatched
brackets inside of them (ie. ones that contain single brackets, or
a closing bracket and then an opening one). Allow for escaping opening
and closing brackets using the backslash character.
The change substitutes this old fragment of the regex in the display
text group:
[^\[\]]* // any character other than closing/opening bracket
for this one:
(((?<=\\)[\[\]])|[^\[\]])*
The second pattern in the alternative remains the same; the first one
performs the escaping, as follows:
(
(?<=\\) // positive lookbehind expression:
// this match will succeed, if the next expression
// is preceded by a single backslash
[\[\]] // either an opening or closing brace
)
Since the entire display group is matched, unfortunately the lookbehind
expression does not actually strip the backslashes, so they are
manually stripped in handleMatches.
As demonstrated in the unit tests attached, this also allows balanced
brackets to be mixed with escaped ones.
2019-09-04 05:18:39 +08:00
using System.Linq ;
2018-04-13 17:19:50 +08:00
using System.Text.RegularExpressions ;
2021-06-01 13:09:35 +08:00
#nullable enable
2018-04-13 17:19:50 +08:00
namespace osu.Game.Online.Chat
{
public static class MessageFormatter
{
// [[Performance Points]] -> wiki:Performance Points (https://osu.ppy.sh/wiki/Performance_Points)
2019-09-04 05:56:07 +08:00
private static readonly Regex wiki_regex = new Regex ( @"\[\[(?<text>[^\]]+)\]\]" ) ;
2018-04-13 17:19:50 +08:00
// (test)[https://osu.ppy.sh/b/1234] -> test (https://osu.ppy.sh/b/1234)
2019-09-04 06:17:52 +08:00
private static readonly Regex old_link_regex = new Regex ( @"\((?<text>(((?<=\\)[\(\)])|[^\(\)])*(((?<open>\()(((?<=\\)[\(\)])|[^\(\)])*)+((?<close-open>\))(((?<=\\)[\(\)])|[^\(\)])*)+)*(?(open)(?!)))\)\[(?<url>[a-z]+://[^ ]+)\]" ) ;
2018-04-13 17:19:50 +08:00
// [https://osu.ppy.sh/b/1234 Beatmap [Hard] (poop)] -> Beatmap [hard] (poop) (https://osu.ppy.sh/b/1234)
2019-09-04 05:56:07 +08:00
private static readonly Regex new_link_regex = new Regex ( @"\[(?<url>[a-z]+://[^ ]+) (?<text>(((?<=\\)[\[\]])|[^\[\]])*(((?<open>\[)(((?<=\\)[\[\]])|[^\[\]])*)+((?<close-open>\])(((?<=\\)[\[\]])|[^\[\]])*)+)*(?(open)(?!)))\]" ) ;
2018-04-13 17:19:50 +08:00
// [test](https://osu.ppy.sh/b/1234) -> test (https://osu.ppy.sh/b/1234) aka correct markdown format
2019-10-22 06:40:58 +08:00
private static readonly Regex markdown_link_regex = new Regex ( @"\[(?<text>(((?<=\\)[\[\]])|[^\[\]])*(((?<open>\[)(((?<=\\)[\[\]])|[^\[\]])*)+((?<close-open>\])(((?<=\\)[\[\]])|[^\[\]])*)+)*(?(open)(?!)))\]\((?<url>[a-z]+://[^ ]+)(\s+(?<title>""([^""]|(?<=\\)"")*""))?\)" ) ;
2018-04-13 17:19:50 +08:00
// advanced, RFC-compatible regular expression that matches any possible URL, *but* allows certain invalid characters that are widely used
// This is in the format (<required>, [optional]):
// http[s]://<domain>.<tld>[:port][/path][?query][#fragment]
private static readonly Regex advanced_link_regex = new Regex (
2019-02-28 12:31:40 +08:00
// protocol
@"(?<link>[a-z]*?:\/\/" +
// domain + tld
@"(?<domain>(?:[a-z0-9]\.|[a-z0-9][a-z0-9-]*[a-z0-9]\.)*[a-z0-9-]*[a-z0-9]" +
// port (optional)
@"(?::\d+)?)" +
// path (optional)
@"(?<path>(?:(?:\/+(?:[a-z0-9$_\.\+!\*\',;:\(\)@&~=-]|%[0-9a-f]{2})*)*" +
// query (optional)
@"(?:\?(?:[a-z0-9$_\+!\*\',;:\(\)@&=\/~-]|%[0-9a-f]{2})*)?)?" +
// fragment (optional)
@"(?:#(?:[a-z0-9$_\+!\*\',;:\(\)@&=\/~-]|%[0-9a-f]{2})*)?)?)" ,
RegexOptions . IgnoreCase ) ;
2018-04-13 17:19:50 +08:00
// 00:00:000 (1,2,3) - test
2021-08-21 04:01:06 +08:00
// regex from https://github.com/ppy/osu-web/blob/651a9bac2b60d031edd7e33b8073a469bf11edaa/resources/assets/coffee/_classes/beatmap-discussion-helper.coffee#L10
private static readonly Regex time_regex = new Regex ( @"\b(((\d{2,}):([0-5]\d)[:.](\d{3}))(\s\((?:\d+[,|])*\d+\))?)" ) ;
2018-04-13 17:19:50 +08:00
// #osu
private static readonly Regex channel_regex = new Regex ( @"(#[a-zA-Z]+[a-zA-Z0-9]+)" ) ;
// Unicode emojis
private static readonly Regex emoji_regex = new Regex ( @"(\uD83D[\uDC00-\uDE4F])" ) ;
2021-02-12 13:54:19 +08:00
/// <summary>
/// The root URL for the website, used for chat link matching.
/// </summary>
2021-02-12 13:56:46 +08:00
public static string WebsiteRootUrl
{
2022-01-11 16:53:15 +08:00
get = > websiteRootUrl ;
2021-02-12 13:56:46 +08:00
set = > websiteRootUrl = value
. Trim ( '/' ) // trim potential trailing slash/
. Split ( '/' ) . Last ( ) ; // only keep domain name, ignoring protocol.
}
2021-02-12 14:03:53 +08:00
private static string websiteRootUrl = "osu.ppy.sh" ;
2021-02-12 13:54:19 +08:00
2021-06-01 13:09:35 +08:00
private static void handleMatches ( Regex regex , string display , string link , MessageFormatterResult result , int startIndex = 0 , LinkAction ? linkActionOverride = null , char [ ] ? escapeChars = null )
2018-04-13 17:19:50 +08:00
{
int captureOffset = 0 ;
2019-04-01 11:16:05 +08:00
2018-04-13 17:19:50 +08:00
foreach ( Match m in regex . Matches ( result . Text , startIndex ) )
{
2021-10-27 12:04:41 +08:00
int index = m . Index - captureOffset ;
2018-04-13 17:19:50 +08:00
2021-10-27 12:04:41 +08:00
string? displayText = string . Format ( display ,
2019-02-28 12:31:40 +08:00
m . Groups [ 0 ] ,
2019-09-04 05:56:07 +08:00
m . Groups [ "text" ] . Value ,
m . Groups [ "url" ] . Value ) . Trim ( ) ;
2018-04-13 17:19:50 +08:00
2021-10-27 12:04:41 +08:00
string linkText = string . Format ( link ,
2019-02-28 12:31:40 +08:00
m . Groups [ 0 ] ,
2019-09-04 05:56:07 +08:00
m . Groups [ "text" ] . Value ,
m . Groups [ "url" ] . Value ) . Trim ( ) ;
2018-04-13 17:19:50 +08:00
if ( displayText . Length = = 0 | | linkText . Length = = 0 ) continue ;
Add backslash escaping to new link format
For users to be able to add square brackets inside of links using
the new format, the regular expression used for parsing those links
contained a balancing group, which can be used for matching pairs
of tokens (in this case, opening and closing brackets, in that order).
However, this means that users could not post links with unmatched
brackets inside of them (ie. ones that contain single brackets, or
a closing bracket and then an opening one). Allow for escaping opening
and closing brackets using the backslash character.
The change substitutes this old fragment of the regex in the display
text group:
[^\[\]]* // any character other than closing/opening bracket
for this one:
(((?<=\\)[\[\]])|[^\[\]])*
The second pattern in the alternative remains the same; the first one
performs the escaping, as follows:
(
(?<=\\) // positive lookbehind expression:
// this match will succeed, if the next expression
// is preceded by a single backslash
[\[\]] // either an opening or closing brace
)
Since the entire display group is matched, unfortunately the lookbehind
expression does not actually strip the backslashes, so they are
manually stripped in handleMatches.
As demonstrated in the unit tests attached, this also allows balanced
brackets to be mixed with escaped ones.
2019-09-04 05:18:39 +08:00
// Remove backslash escapes in front of the characters provided in escapeChars
if ( escapeChars ! = null )
displayText = escapeChars . Aggregate ( displayText , ( current , c ) = > current . Replace ( $"\\{c}" , c . ToString ( ) ) ) ;
2018-04-13 17:19:50 +08:00
// Check for encapsulated links
2019-06-11 16:28:16 +08:00
if ( result . Links . Find ( l = > ( l . Index < = index & & l . Index + l . Length > = index + m . Length ) | | ( index < = l . Index & & index + m . Length > = l . Index + l . Length ) ) = = null )
2018-04-13 17:19:50 +08:00
{
result . Text = result . Text . Remove ( index , m . Length ) . Insert ( index , displayText ) ;
2020-05-05 09:31:11 +08:00
// since we just changed the line display text, offset any already processed links.
2018-04-13 17:19:50 +08:00
result . Links . ForEach ( l = > l . Index - = l . Index > index ? m . Length - displayText . Length : 0 ) ;
2019-11-01 10:40:51 +08:00
var details = GetLinkDetails ( linkText ) ;
2018-04-13 17:19:50 +08:00
result . Links . Add ( new Link ( linkText , index , displayText . Length , linkActionOverride ? ? details . Action , details . Argument ) ) ;
2020-05-05 09:31:11 +08:00
// adjust the offset for processing the current matches group.
2018-04-13 17:19:50 +08:00
captureOffset + = m . Length - displayText . Length ;
}
}
}
private static void handleAdvanced ( Regex regex , MessageFormatterResult result , int startIndex = 0 )
{
foreach ( Match m in regex . Matches ( result . Text , startIndex ) )
{
2021-10-27 12:04:41 +08:00
int index = m . Index ;
string? linkText = m . Groups [ "link" ] . Value ;
int indexLength = linkText . Length ;
2018-04-13 17:19:50 +08:00
2019-11-01 10:40:51 +08:00
var details = GetLinkDetails ( linkText ) ;
2019-10-22 07:01:37 +08:00
var link = new Link ( linkText , index , indexLength , details . Action , details . Argument ) ;
// sometimes an already-processed formatted link can reduce to a simple URL, too
// (example: [mean example - https://osu.ppy.sh](https://osu.ppy.sh))
// therefore we need to check if any of the pre-existing links contains the raw one we found
2019-10-25 06:20:44 +08:00
if ( result . Links . All ( existingLink = > ! existingLink . Overlaps ( link ) ) )
2019-10-22 07:01:37 +08:00
result . Links . Add ( link ) ;
2018-04-13 17:19:50 +08:00
}
}
2019-11-01 10:40:51 +08:00
public static LinkDetails GetLinkDetails ( string url )
2018-04-13 17:19:50 +08:00
{
2021-10-27 12:04:41 +08:00
string [ ] ? args = url . Split ( '/' , StringSplitOptions . RemoveEmptyEntries ) ;
2018-04-13 17:19:50 +08:00
args [ 0 ] = args [ 0 ] . TrimEnd ( ':' ) ;
switch ( args [ 0 ] )
{
case "http" :
case "https" :
// length > 3 since all these links need another argument to work
2022-01-11 16:53:15 +08:00
if ( args . Length > 3 & & args [ 1 ] . EndsWith ( WebsiteRootUrl , StringComparison . OrdinalIgnoreCase ) )
2018-04-13 17:19:50 +08:00
{
2021-10-27 12:09:30 +08:00
string mainArg = args [ 3 ] ;
2021-02-12 14:17:54 +08:00
2018-04-13 17:19:50 +08:00
switch ( args [ 2 ] )
{
2021-02-12 14:17:54 +08:00
// old site only
2018-04-13 17:19:50 +08:00
case "b" :
case "beatmaps" :
2021-02-12 14:17:54 +08:00
{
string trimmed = mainArg . Split ( '?' ) . First ( ) ;
2021-10-27 12:04:41 +08:00
if ( int . TryParse ( trimmed , out int id ) )
2021-02-12 14:17:54 +08:00
return new LinkDetails ( LinkAction . OpenBeatmap , id . ToString ( ) ) ;
break ;
}
2019-04-01 11:16:05 +08:00
2018-04-13 17:19:50 +08:00
case "s" :
case "beatmapsets" :
case "d" :
2021-02-12 14:17:54 +08:00
{
2021-06-21 12:37:00 +08:00
if ( mainArg = = "discussions" )
// handle discussion links externally for now
return new LinkDetails ( LinkAction . External , url ) ;
2021-10-27 12:04:41 +08:00
if ( args . Length > 4 & & int . TryParse ( args [ 4 ] , out int id ) )
2021-02-12 14:17:54 +08:00
// https://osu.ppy.sh/beatmapsets/1154158#osu/2768184
return new LinkDetails ( LinkAction . OpenBeatmap , id . ToString ( ) ) ;
// https://osu.ppy.sh/beatmapsets/1154158#whatever
string trimmed = mainArg . Split ( '#' ) . First ( ) ;
2021-02-12 14:25:00 +08:00
if ( int . TryParse ( trimmed , out id ) )
return new LinkDetails ( LinkAction . OpenBeatmapSet , id . ToString ( ) ) ;
break ;
2021-02-12 14:17:54 +08:00
}
2019-04-01 11:16:05 +08:00
2018-04-13 17:19:50 +08:00
case "u" :
2019-08-19 03:02:59 +08:00
case "users" :
2021-02-12 14:17:54 +08:00
return new LinkDetails ( LinkAction . OpenUserProfile , mainArg ) ;
2021-05-17 01:43:59 +08:00
case "wiki" :
return new LinkDetails ( LinkAction . OpenWiki , string . Join ( '/' , args . Skip ( 3 ) ) ) ;
2021-10-12 10:40:45 +08:00
case "home" :
if ( mainArg ! = "changelog" )
// handle link other than changelog as external for now
return new LinkDetails ( LinkAction . External , url ) ;
switch ( args . Length )
{
case 4 :
// https://osu.ppy.sh/home/changelog
return new LinkDetails ( LinkAction . OpenChangelog , string . Empty ) ;
case 6 :
// https://osu.ppy.sh/home/changelog/lazer/2021.1006
return new LinkDetails ( LinkAction . OpenChangelog , $"{args[4]}/{args[5]}" ) ;
}
break ;
2018-04-13 17:19:50 +08:00
}
}
2021-06-01 13:09:35 +08:00
break ;
2019-04-01 11:16:05 +08:00
2018-04-13 17:19:50 +08:00
case "osu" :
// every internal link also needs some kind of argument
if ( args . Length < 3 )
2021-06-01 13:09:35 +08:00
break ;
2018-04-13 17:19:50 +08:00
LinkAction linkType ;
2019-04-01 11:16:05 +08:00
2018-04-13 17:19:50 +08:00
switch ( args [ 1 ] )
{
case "chan" :
linkType = LinkAction . OpenChannel ;
break ;
2019-04-01 11:16:05 +08:00
2018-04-13 17:19:50 +08:00
case "edit" :
linkType = LinkAction . OpenEditorTimestamp ;
break ;
2019-04-01 11:16:05 +08:00
2018-04-13 17:19:50 +08:00
case "b" :
linkType = LinkAction . OpenBeatmap ;
break ;
2019-04-01 11:16:05 +08:00
2018-04-13 17:19:50 +08:00
case "s" :
case "dl" :
linkType = LinkAction . OpenBeatmapSet ;
break ;
2019-04-01 11:16:05 +08:00
2018-04-13 17:19:50 +08:00
case "spectate" :
linkType = LinkAction . Spectate ;
break ;
2019-04-01 11:16:05 +08:00
2018-04-13 17:19:50 +08:00
case "u" :
linkType = LinkAction . OpenUserProfile ;
break ;
2019-04-01 11:16:05 +08:00
2018-04-13 17:19:50 +08:00
default :
linkType = LinkAction . External ;
break ;
}
return new LinkDetails ( linkType , args [ 2 ] ) ;
2019-04-01 11:16:05 +08:00
2018-04-13 17:19:50 +08:00
case "osump" :
return new LinkDetails ( LinkAction . JoinMultiplayerMatch , args [ 1 ] ) ;
}
2021-02-12 14:17:54 +08:00
2021-06-01 12:50:20 +08:00
return new LinkDetails ( LinkAction . External , url ) ;
2018-04-13 17:19:50 +08:00
}
private static MessageFormatterResult format ( string toFormat , int startIndex = 0 , int space = 3 )
{
var result = new MessageFormatterResult ( toFormat ) ;
// handle the [link display] format
2019-09-04 05:56:07 +08:00
handleMatches ( new_link_regex , "{1}" , "{2}" , result , startIndex , escapeChars : new [ ] { '[' , ']' } ) ;
2018-04-13 17:19:50 +08:00
// handle the standard markdown []() format
2019-09-04 06:01:26 +08:00
handleMatches ( markdown_link_regex , "{1}" , "{2}" , result , startIndex , escapeChars : new [ ] { '[' , ']' } ) ;
2018-04-13 17:19:50 +08:00
// handle the ()[] link format
2019-09-04 06:17:52 +08:00
handleMatches ( old_link_regex , "{1}" , "{2}" , result , startIndex , escapeChars : new [ ] { '(' , ')' } ) ;
2018-04-13 17:19:50 +08:00
// handle wiki links
2022-01-11 16:53:15 +08:00
handleMatches ( wiki_regex , "{1}" , $"https://{WebsiteRootUrl}/wiki/{{1}}" , result , startIndex ) ;
2018-04-13 17:19:50 +08:00
// handle bare links
handleAdvanced ( advanced_link_regex , result , startIndex ) ;
// handle editor times
handleMatches ( time_regex , "{0}" , "osu://edit/{0}" , result , startIndex , LinkAction . OpenEditorTimestamp ) ;
// handle channels
handleMatches ( channel_regex , "{0}" , "osu://chan/{0}" , result , startIndex , LinkAction . OpenChannel ) ;
2021-10-27 12:04:41 +08:00
string empty = "" ;
2018-04-13 17:19:50 +08:00
while ( space - - > 0 )
empty + = "\0" ;
handleMatches ( emoji_regex , empty , "{0}" , result , startIndex ) ;
return result ;
}
public static Message FormatMessage ( Message inputMessage )
{
var result = format ( inputMessage . Content ) ;
inputMessage . DisplayContent = result . Text ;
// Sometimes, regex matches are not in order
result . Links . Sort ( ) ;
inputMessage . Links = result . Links ;
return inputMessage ;
}
public static MessageFormatterResult FormatText ( string text )
{
var result = format ( text ) ;
result . Links . Sort ( ) ;
return result ;
}
public class MessageFormatterResult
{
public List < Link > Links = new List < Link > ( ) ;
public string Text ;
public string OriginalText ;
public MessageFormatterResult ( string text )
{
OriginalText = Text = text ;
}
}
2019-11-01 10:40:51 +08:00
}
2018-04-13 17:19:50 +08:00
2019-11-01 10:40:51 +08:00
public class LinkDetails
{
2021-02-12 14:17:54 +08:00
public readonly LinkAction Action ;
2021-11-08 13:17:47 +08:00
public readonly object Argument ;
2018-04-13 17:19:50 +08:00
2021-11-08 13:17:47 +08:00
public LinkDetails ( LinkAction action , object argument )
2019-11-01 10:40:51 +08:00
{
Action = action ;
Argument = argument ;
2018-04-13 17:19:50 +08:00
}
}
public enum LinkAction
{
External ,
OpenBeatmap ,
OpenBeatmapSet ,
OpenChannel ,
OpenEditorTimestamp ,
JoinMultiplayerMatch ,
Spectate ,
OpenUserProfile ,
2020-01-31 06:41:50 +08:00
SearchBeatmapSet ,
2021-05-17 01:43:59 +08:00
OpenWiki ,
2020-01-30 12:30:25 +08:00
Custom ,
2021-10-12 10:37:11 +08:00
OpenChangelog ,
2018-04-13 17:19:50 +08:00
}
public class Link : IComparable < Link >
{
public string Url ;
public int Index ;
public int Length ;
public LinkAction Action ;
2021-11-08 13:17:47 +08:00
public object Argument ;
2018-04-13 17:19:50 +08:00
2021-11-08 13:17:47 +08:00
public Link ( string url , int startIndex , int length , LinkAction action , object argument )
2018-04-13 17:19:50 +08:00
{
Url = url ;
Index = startIndex ;
Length = length ;
Action = action ;
Argument = argument ;
}
2019-10-25 06:20:44 +08:00
public bool Overlaps ( Link otherLink ) = > Index < otherLink . Index + otherLink . Length & & otherLink . Index < Index + Length ;
2019-10-22 07:01:37 +08:00
2018-04-13 17:19:50 +08:00
public int CompareTo ( Link otherLink ) = > Index > otherLink . Index ? 1 : - 1 ;
}
}