library-ts/browser/text/RegExpUtitlity.ts

958 lines
20 KiB
TypeScript

import { MathX } from "../math/MathX";
import { Arrays } from "../tools/Arrays";
import { LevenshteinDistance } from "./Levehshtein";
import { LexerMatcher } from "./lexer/LexerMatcher";
export class RegExpUtility
{
static repeat( text:string, times:number )
{
let output = [];
while ( times > 0 )
{
output.push( text );
times--;
}
return output.join( "" );
}
static collapse( text:string, collapsingPattern:string )
{
let collapsingRegex = /((?:XXX)+)/g;
let escapedRegexContent = collapsingRegex.source;
let escapedCollapsedPattern = RegExpUtility.toRegexSource( collapsingPattern );
escapedRegexContent = escapedRegexContent.replace( "XXX", escapedCollapsedPattern );
let regex = new RegExp( escapedRegexContent, "g" );
return text.replace( regex, collapsingPattern );
}
static collapseWhiteSpace( text:string )
{
text = text.replace( /((?:\s|\n|\r|\t)+)/g, " " );
return text.trim();
}
static toFileName( text:string )
{
let fileNameOutput:string[] = [];
for ( let i = 0; i < text.length; i++ )
{
let character = text[ i ];
if ( /[a-zA-Z0-9\-]/.test( character ) )
{
fileNameOutput.push( character );
//console.log( "Not escaped:", character );
}
else
{
fileNameOutput.push( "_" );
//console.log( "Escaped:", character, ">>", "_" );
}
}
return fileNameOutput.join( "" );
}
static trimCharacters( text:string, fromStart:number, fromEnd:number )
{
let start = fromStart;
let end = text.length - fromEnd;
return text.substring( start, end );
}
static splitLines( text:string )
{
return text.split( /(?:\r\n)|\n|\r/g );
}
static splitLinesCaptureBreaks( text:string )
{
return text.split( /((?:\r\n)|\n|\r)/g );
}
static createMatcherFromCombiningWords( words:string[], noSubmatches:boolean ):RegExp
{
let sources = words.map( w => RegExpUtility.toRegexSource( w ) );
let regexSource = sources.join( "|" );
if ( ! noSubmatches )
{
regexSource = `^(${regexSource})$`;
}
return new RegExp( regexSource );
}
static matches( value:string, matcher:string|RegExp )
{
if ( ! matcher )
{
return true;
}
if ( typeof matcher === "string" )
{
return value === matcher;
}
return matcher.test( value );
}
static cutout( value:string, regex:RegExp ):string[]
{
let result = regex.exec( value );
if ( ! result )
{
return [ value, null ];
}
let match = result[ 0 ];
let cutout = RegExpUtility.cutoutRange( value, result.index, match.length );
return [ cutout, match ];
}
static chopRange( value:string, start:number, end:number )
{
let chopped:string[] = [];
if ( start > 0 )
{
chopped.push( value.substring( 0, start ) );
}
else
{
chopped.push( null );
}
chopped.push( value.substring( start, end ) );
if ( end < value.length )
{
chopped.push( value.substring( end, value.length ) );
}
else
{
chopped.push( null );
}
return chopped;
}
static cutoutRange( value:string, start:number, length:number ):string
{
if ( start == 0 )
{
return value.substring( length );
}
let before = value.substring( 0, start );
let after = value.substring( start + length );
return before + after;
}
static parseDuration( value:string, alternative:number = 0 )
{
if ( ! value )
{
return alternative;
}
if ( value.indexOf( ":" ) === -1 )
{
return parseFloat( value );
}
let values = this.parseNumbers( value, [ 0, 0 ], ":" );
return values[ 0 ] * 60 + values[ 1 ];
}
static asDurationString( duration:number, delimiter:string = ":", zerofillMinutes:boolean = false )
{
let seconds = Math.floor( MathX.repeat( duration, 60 ) ) + "";
let minutes = Math.floor( duration / 60 ) + "";
if ( seconds.length < 2 ){ seconds = "0" + seconds; }
if ( zerofillMinutes && minutes.length < 2 ){ minutes = "0" + minutes; }
return minutes + delimiter + seconds;
}
static parseNumbers( value:string, alternative:number[] = [], delimiter = "," )
{
if ( ! value )
{
return alternative;
}
var splitted = value.split( delimiter );
var numberValues = [];
for ( let i = 0; i < splitted.length; i++ )
{
var splitValue = splitted[ i ].trim();
var numericValue = parseFloat( splitValue );
numberValues.push( numericValue );
}
return numberValues;
}
static splitPath( path:string )
{
return path.split( /\\|\// );
}
static startsWithSome( text:string, starts:string[] )
{
return starts.some( s => text.startsWith( s ) );
}
static escapePathFragmentForWindows( fragmentWithoutSlashes:string, replacement:string = "_" ):string
{
return fragmentWithoutSlashes.replace( /(\\|\/|\?|\:|\||\*|\<|\>)/g, replacement );
}
static resolvePath( path:string )
{
let pathFragments = path.split( "/" );
let resolvedFragments = [];
for ( let i = 0; i < pathFragments.length; i++ )
{
if ( pathFragments[ i ] === ".." )
{
if ( resolvedFragments.length === 0 || resolvedFragments[ i - 1 ] === ".." )
{
resolvedFragments.push( ".." );
}
else
{
resolvedFragments.pop();
}
}
else
{
resolvedFragments.push( pathFragments[ i ] );
}
}
let resolvedPath = resolvedFragments.join( "/" );
return resolvedPath;
}
static createRelativeDirectoryPath( sourceDirectory:string, targetDirectory:string)
{
if ( sourceDirectory === targetDirectory )
{
return "";
}
sourceDirectory = this.normalizePath( sourceDirectory );
targetDirectory = this.normalizePath( targetDirectory );
if ( sourceDirectory === targetDirectory )
{
return "";
}
let matching = RegExpUtility.getMatchingDirectories( sourceDirectory, targetDirectory );
let shortSource = sourceDirectory.substring( matching.length );
let shortTarget = targetDirectory.substring( matching.length );
shortSource = this.normalizePath( shortSource );
shortTarget = this.normalizePath( shortTarget );
if ( this.isEmptyPath( shortSource ) )
{
return shortTarget;
}
let sourceDirectories = RegExpUtility.splitPath( shortSource );
let path = "";
for ( let i = 0; i < sourceDirectories.length; i++ )
{
if ( path !== "" )
{
path += "/";
}
path += "..";
}
if ( path !== "" )
{
path += "/"
}
path += shortTarget;
return path;
}
static isEmptyPath( path:string )
{
return /^\s*(\\|\/)?\s*$/.test( path );
}
static getAllMatchResultsOf( source:string, regex:RegExp )
{
let matcher = new LexerMatcher( "match", RegExpUtility.makeSticky( regex ) );
let offset = 0;
let results:RegExpExecArray[] = [];
while ( offset < source.length )
{
let result = matcher.getMatchResult( source, offset );
if ( result )
{
results.push( result );
offset += result[ 0 ].length;
}
else
{
offset ++;
}
}
return results;
}
static removeLeadingSlashes( text:string )
{
return text.replace( /^\s*(\\|\/)/, "" );
}
static removeTrailingSlashes( text:string )
{
return text.replace( /(\\|\/)\s*$/, "" );
}
static trimSlashes( text:string )
{
return this.removeTrailingSlashes( this.removeLeadingSlashes( text ) );
}
static removeLeadingSpace( textContent:string, removeFirstEmpty:boolean = true )
{
var lines = textContent.split( /(?:\r\n|\r|\n)/ );
if ( removeFirstEmpty && /^\s*$/.test( lines[ 0 ] ))
{
lines.shift();
}
var offset:number|null = null;
lines.forEach(
( line ) =>
{
if ( /^\s*$/.test( line ) )
{
return;
}
var result = /^\s+/.exec( line );
//console.log( "LINE RESULT:", result, "line:", line );
if ( result == null )
{
return;
}
var numSpaces = result[ 0 ].length;
offset = offset === null ? numSpaces : Math.min( offset, numSpaces );
}
)
//console.log( "offset", offset );
if ( offset == null )
{
return lines.join( "\n" );
}
lines = lines.map( line =>
{
if ( /^\s*$/.test( line ) )
{
return line;
}
var result = /^\s+/.exec( line );
if ( result === null )
{
return line;
}
return line.substring( offset );
}
);
return lines.join( "\n" );
}
static getMatchingStartOfAll( s:string[] ):string
{
if ( s === null || s === undefined || s.length === 0 )
{
return "";
}
let empty = s.some( e => e === null || e === undefined || e === "" );
if ( empty )
{
return "";
}
if ( s.length === 1 )
{
return s[ 0 ];
}
let length = s.map( e => e.length ).reduce( ( a, b ) => Math.min( a,b ) );
for ( let i = 0; i < length; i++ )
{
let value = s[ 0 ][ i ];
for ( let j = 1; j < s.length; j ++ )
{
if ( s[ j ][ i ] !== value )
{
return s[ 0 ].substring( 0, i );
}
}
}
return s[ 0 ].substring( 0, length );
}
static getMatchingDirectories( a:string, b:string )
{
let matching = [];
let directoriesA = this.splitPath( a );
let directoriesB = this.splitPath( b );
for ( let i = 0; i < directoriesA.length && i < directoriesB.length; i++ )
{
if ( directoriesA[ i ] == directoriesB[ i ] )
{
matching.push( directoriesA[ i ] );
}
else
{
return matching.join( "/" );
}
}
return matching.join( "/" );
}
static getMatchingStart( a:string, b:string )
{
if ( ! a || ! b )
{
return "";
}
let length = Math.min( a.length, b.length );
for ( let i = 0; i < length; i++ )
{
if ( a[ i ] !== b[ i ] )
{
return a.substring( 0, i );
}
}
if ( length === a.length )
{
return a;
}
return b;
}
static getMatchingEnd( a:string, b:string )
{
if ( ! a || ! b )
{
return "";
}
let length = Math.min( a.length, b.length );
for ( let i = 0 ; i < length; i++ )
{
let aIndex = a.length - ( 1 + i );
let bIndex = b.length - ( 1 + i );
if ( a[ aIndex ] !== b[ bIndex ] )
{
return a.substring( aIndex + 1, a.length );
}
}
if ( length === a.length )
{
return a;
}
return b;
}
static readonly tagMatchingRegex =
/(<tag(?: (?:\w|\s|\-|\=|"(?:\.|(?:\\\")|[^\""\n])*"|'(?:\.|(?:\\\')|[^\''\n])*')*)?>)((?:.|\n)*)(<\/tag>)/;
static getTagRegex( tag:string )
{
return new RegExp( RegExpUtility.tagMatchingRegex.source.replace( /tag/g, tag ) );
}
static getTagContent( source:string, tag:string )
{
let regex = RegExpUtility.getTagRegex( tag );
let result = regex.exec( source );
if ( ! result )
{
return null;
}
return result[ 2 ];
}
static setTagContent( source:string, tag:string, value:string )
{
let regex = RegExpUtility.getTagRegex( tag );
return source.replace( regex, `$1${value}$3`);
}
static ensureSlash( path:string )
{
if ( path.endsWith( "/" ) )
{
return path;
}
return path + "/";
}
static minimumDigitsInt( value:number, digits:number = 2 )
{
let stringValue = Math.round( value ) + "";
while ( stringValue.length < digits )
{
stringValue = "0" + stringValue;
}
return stringValue;
}
static round( value:number, numZeros:number )
{
if ( numZeros <= 0 )
{
return Math.round( value ) + "";
}
numZeros = Math.round( numZeros );
var sign = value < 0 ? "-" : "";
value = Math.abs( value );
var roundedBiggerValue = Math.round( value * Math.pow( 10, numZeros ) );
var stringValue = roundedBiggerValue + "";
var minimumLength = numZeros + 1;
while ( stringValue.length < minimumLength )
{
stringValue = "0" + stringValue;
}
var split = stringValue.length - numZeros;
return sign + stringValue.substring( 0, split ) + "." + stringValue.substring( split );
}
static createFromList( list:string[] )
{
list = list.map( l => RegExpUtility.toRegexSource( l ) );
return new RegExp( list.join( "|" ) );
}
static toRegexSource( source:string )
{
source = source.replace( /\./g, "\\." );
source = source.replace( /\(/g, "\\(" );
source = source.replace( /\)/g, "\\)" );
source = source.replace( /\[/g, "\\[" );
source = source.replace( /\]/g, "\\]" );
source = source.replace( /\^/g, "\\^" );
source = source.replace( /\$/g, "\\$" );
source = source.replace( /\*/g, "\\*" );
source = source.replace( /\+/g, "\\+" );
source = source.replace( /\-/g, "\\-" );
source = source.replace( /\?/g, "\\?" );
source = source.replace( /\//g, "\\/" );
source = source.replace( /\|/g, "\\|" );
return source;
}
static upperCaseFirst( source:string )
{
if ( source === null || source === undefined || source.length === 0 )
{
return null;
}
return source[ 0 ].toUpperCase() + source.substring( 1 );
}
static lowerCaseFirst( source:string )
{
if ( source === null || source === undefined || source.length === 0 )
{
return null;
}
return source[ 0 ].toLowerCase() + source.substring( 1 );
}
private static fileTypeExtensionRegex = /\.(\w+)$/;
static trimProtocols( link:string, protocols:string[]=["https","http","ftp","sftp"] )
{
let genericRegex = /^XXX?\:\/\//;
let escapedProtocols = protocols.map( p => RegExpUtility.toRegexSource( p ) );
let combinedProtocols = `(${escapedProtocols.join("|")})`;
let protocolRegexSource = genericRegex.source.replace( "XXX", combinedProtocols );
let regex = new RegExp( protocolRegexSource );
// console.log( "REGEX:", regex, ">>", link );
return link.replace( regex, "" );
}
static trimFileTypeExtension( source:string )
{
return source.replace( RegExpUtility.fileTypeExtensionRegex, "" );
}
static replaceAll( text:string, replacements:Map<string,string> )
{
for ( let [ variable, replacement ] of replacements )
{
let replacementRegexSource = RegExpUtility.toRegexSource( variable );
let regex = new RegExp( replacementRegexSource, "g" );
text = text.replace( regex, replacement );
}
return text;
}
static getFileTypeExtension( source:string )
{
let result = source.lastIndexOf( "." );
if ( result === - 1 )
{
return "";
}
return source.substring( result + 1 );
}
static removeStarting( text:string, start:string )
{
if ( text.startsWith( start ) )
{
return text.substring( start.length );
}
return text;
}
static removeInner( text:string, start:number, length:number )
{
return text.substring( 0, start ) + text.substring( start + length );
}
static makeSticky( regexp:RegExp )
{
if ( regexp.sticky )
{
return regexp;
}
var source = regexp.source;
var flags = regexp.flags;
if ( flags.indexOf( "y" ) === -1 )
{
flags += "y";
}
return new RegExp( source, flags );
}
static getClosestMatching<T>( list:T[], matching:string, getText:(t:T)=>string = null ):T
{
let index = this.getClosestIndex( list, matching, getText );
return list[ index ];
}
static getClosestIndex<T>( list:T[], matching:string, getText:(t:T)=>string = null ):number
{
if ( ! getText )
{
getText = ( t )=>{ return t + "" };
}
let index = list.findIndex( l => getText( l ) === matching );
if ( index !== -1 )
{
return index;
}
let closestIndex = -1;
let closestValue = 100000;
for ( let i = 0; i < list.length; i++ )
{
let listValue = getText( list[ i ] );
let distance = LevenshteinDistance.compute( listValue, matching );
if ( distance >= closestValue )
{
continue;
}
closestValue = distance;
closestIndex = i;
}
return closestIndex;
}
static makeGlobal( regexp:RegExp )
{
if ( regexp.global )
{
return regexp;
}
var source = regexp.source;
var flags = regexp.flags + "g";
return new RegExp( source, flags );
}
static makeIgnoreCase( regexp:RegExp )
{
if ( regexp.ignoreCase )
{
return regexp;
}
var source = regexp.source;
var flags = regexp.flags + "i";
return new RegExp( source, flags );
}
static prependZeros( source:string, minimumLength:number = 2 )
{
while ( source.length < minimumLength )
{
source = "0" + source;
}
return source;
}
static createWordMatcher( source:string )
{
source = "\\b" + RegExpUtility.toRegexSource( source ) + "\\b";
return new RegExp( source );
}
static createClassMatcher( source:string )
{
source = "(^|\\s)" + RegExpUtility.toRegexSource( source ) + "(\\s|$)";
return new RegExp( source );
}
static createMatcher( source:string )
{
return new RegExp( RegExpUtility.toRegexSource( source ) );
}
static createRegExp( regexp:RegExp, matching:string, replacement:string )
{
let source = regexp.source;
let flags = regexp.flags;
source = source.replace( matching, replacement );
return new RegExp( source, flags );
}
static readonly ES_NUMBER_REGEX = /((\d+)?\.)?\d+(e(\+|\-)\d+)?/;
static isESNumber( value:string )
{
return RegExpUtility.ES_NUMBER_REGEX.test( value );
}
static parentPath( path:string, alternative:string ="")
{
let lastSlash = path.lastIndexOf( "/" );
let lastBackSlash = path.lastIndexOf( "\\" )
if ( lastSlash === -1 && lastBackSlash === -1 )
{
return alternative;
}
let highest = Math.max( lastSlash, lastBackSlash );
return path.substring( 0, highest );
}
static fileNameWithoutExtension( path:string )
{
return RegExpUtility.trimFileTypeExtension( RegExpUtility.fileNameOrLastPath( path ) );
}
static fileNameOrLastPath( path:string )
{
path = this.normalizePath( path );
let lastSlash = path.lastIndexOf( "/" );
if ( lastSlash === -1 )
{
return path;
}
return path.substring( lastSlash + 1 );
/*
path = path.replace( /(\\|\/)$/, "" );
let parentPath = RegExpUtility.parentPath( path );
if ( parentPath === "" )
{
path = path.replace( /^(\/|\\)/, "" );
return path;
}
let last = path.substring( parentPath.length + 1 );
last = last.replace( /^(\/|\\)/, "" );
return last;
*/
}
static join( pathA:string, pathB:string, ...paths:string[] )
{
let normalizedPaths = [pathA,pathB].concat( paths );
for ( let i = 0; i < normalizedPaths.length; i++ )
{
normalizedPaths[ i ] = this.normalizePath( normalizedPaths[ i ] );
}
return normalizedPaths.join( "/" );
}
static joinPaths( paths:string[] )
{
let normalizedPaths = paths;
for ( let i = normalizedPaths.length - 1; i >= 0; i-- )
{
if ( this.isEmptyPath( normalizedPaths[ i ] ) )
{
Arrays.removeAt( normalizedPaths, i );
}
}
for ( let i = 0; i < normalizedPaths.length; i++ )
{
normalizedPaths[ i ] = this.normalizePath( normalizedPaths[ i ] );
}
return normalizedPaths.join( "/" );
}
static normalizePath( path:string )
{
let slashMatcher = /\\/g;
let multiples = /\/\/+/g;
let startSlashes = /^\/+/;
let endSlashes = /\/+$/;
path = path.replace( slashMatcher, "/" );
path = path.replace( multiples, "/" );
path = path.replace( startSlashes, "" );
path = path.replace( endSlashes, "" );
return path;
}
}