Показать сообщение отдельно
Старый 17.11.2010, 17:08   #2
Randomize
[object Object]
 
Аватар для Randomize
 
Регистрация: 01.08.2008
Адрес: В России
Сообщений: 4,355
Написано 2,471 полезных сообщений
(для 6,852 пользователей)
Ответ: Поиск Dll регулярных выражений

Print RegEx_Process("H(д|ae?)ndel","Handel")
Print 
RegEx_Process("H(д|ae?)ndel","Hдendel")
Print 
RegEx_Process("H(д|ae?)ndel","Hendel")
Print 
RegEx_Process("H(д|ae?)ndel","HaendelASDFGHB!")
Print 
RegEx_Process("[hc]+at","hat")
Print 
RegEx_Process("\\","\")
Print RegEx_Process("
[hc]+at","chat")
Print RegEx_Process("
[hc]+at\n","cat"+Chr(13))
Print RegEx_Process("
-?[:digit:]+","1256")
Print RegEx_Process("
bo+o","boogle!")
Print RegEx_Process(Chr(34)+"
[^\n]*"+Chr(34),Chr(34)+"String literal!.><"+Chr(34)+"wakakaka")
Print RegEx_Process("
0[0-9]*(([uU][lL]?)|([lL][uU]?))?","012")
Print RegEx_Process("
0[0-9]*(([uU][lL]?)|([lL][uU]?))?","012u")
Print RegEx_Process("
0[0-9]*(([uU][lL]?)|([lL][uU]?))?","012l")
Print RegEx_Process("
0[0-9]*(([uU][lL]?)|([lL][uU]?))?","012LU")
Print RegEx_Process("
0[0-9]*(([uU][lL]?)|([lL][uU]?))?","012uL")
Print RegEx_Process("
N(u|l)*","Null")


WaitKey
End





;===============================================================================
;Regular expressions
;===============================================================================


Type RegEx_NFA
    Field nType
    Field state
    Field stack
    Field value$
    Field lNode.RegEx_NFA
    Field rNode.RegEx_NFA
    Field root.RegEx_NFA
End Type


Function RegEx_Process$(regIn$,inputString$)    ;Compare a pattern to a string (not actually used here!)
    Local oNode.RegEx_NFA,char$,branchList
    
    oNode=RegEx_Parse(regIn)
    char=RegEx_Match(Handle(oNode),StrToBank(inputString),0)
    
    RegEx_Delete(oNode)
    FreeBank branchList
    Return char
End Function

Function RegEx_Match$(RegExH,inputStringBank,startPtr)    ;Match an NFA against a bank
    Local node.RegEx_NFA,oNode.RegEx_NFA,cPtr,maxPtr,i,char$,cAsc
    Local nodeStack,stackSize,caseSen
    
    oNode=Object.RegEx_NFA(RegExH)
    If oNode\nType<0 Then Return RegEx_MatchSimple(oNode,inputStringBank,startPtr)
    
    If (oNode\state And %01)    ;If it's a line start pattern only...
        If startPtr>1
            If PeekShort(inputStringBank,startPtr-2)<>
$A0D Then Return ""
        Else
            If startPtr=1 Then Return ""
        EndIf
    EndIf
    caseSen=Not(oNode\state And %100)
    
    node=oNode
    maxPtr=BankSize(inputStringBank)
    cPtr=startPtr
    nodeStack=CreateBank(14)
    PokeInt nodeStack,0,Handle(node)
    PokeInt nodeStack,4,cPtr
    PokeByte nodeStack,9,node\nType
    
    
    While BankSize(nodeStack)>0
        stackSize=BankSize(nodeStack)
        ResizeBank nodeStack,stackSize+14
        PokeInt nodeStack,stackSize,Handle(node)
        PokeInt nodeStack,stackSize+4,cPtr
        PokeByte nodeStack,stackSize+8,node\nType
        PokeInt nodeStack,stackSize+10,1
        
        Select node\nType
            Case 3,4,6
                If node\stack=0 Then node\stack=CreateBank(4):Else ResizeBank node\stack,4
                PokeInt node\stack,0,cPtr
                node=node\lNode
                PokeByte nodeStack,stackSize+9,1
            Case 5
                If node\stack=0 Then node\stack=CreateBank(4):Else ResizeBank node\stack,4
                PokeInt node\stack,0,cPtr
                node=node\lNode
                PokeByte nodeStack,stackSize+9,0
            Case 12
                Exit
            Default
                If cPtr<maxPtr
                    cAsc=PeekByte(inputStringBank,cPtr)
                    If caseSen=0
                        If cAsc>=65 And cAsc<=90 Then cAsc=cAsc+32
                    EndIf
                    char=Chr(cAsc)
                Else
                    char=""
                EndIf
                
                If Instr(node\value,char)>0 And char<>""
                    cPtr=cPtr+1
                    
                    If node\rNode<>Null
                        node=node\rNode
                    Else
                        Repeat
                            ResizeBank nodeStack,stackSize
                            stackSize=stackSize-14
                        Until PeekInt(nodeStack,stackSize)=Handle(node\root)
                        node=Object.RegEx_NFA(PeekInt(nodeStack,stackSize))
                        
                        PokeByte nodeStack,stackSize+9,node\nType<>3
                        ResizeBank node\stack,BankSize(node\stack)+4
                        PokeInt node\stack,BankSize(node\stack)-4,cPtr
                        PokeInt nodeStack,stackSize+10,PeekInt(nodeStack,stackSize+10)+1
                        
                        node=node\lNode
                    EndIf
                Else
                    Repeat
                        ResizeBank nodeStack,stackSize
                        If stackSize=0 Then Exit
                        stackSize=stackSize-14
                        
                        If PeekByte(nodeStack,stackSize+8)<>0 And PeekByte(nodeStack,stackSize+9)=1
                            node=Object.RegEx_NFA(PeekInt(nodeStack,stackSize))
                            cPtr=PeekInt(node\stack,BankSize(node\stack)-4)
                            ResizeBank node\stack,BankSize(node\stack)-4
                            node=node\rNode
                            PokeInt nodeStack,stackSize+10,PeekInt(nodeStack,stackSize+10)-1
                            PokeByte nodeStack,stackSize+9,PeekInt(nodeStack,stackSize+10)>0
                            Exit
                        EndIf
                    Forever
                EndIf
        End Select
    Wend
    
    If node\nType=12
        char=""
        For i=startPtr To cPtr-1
            char=char+Chr(PeekByte(inputStringBank,i))
        Next
    Else
        char=""
    End If
    
    If (oNode\state And %010)        ;If it's a line end pattern only
        If startPtr+Len(char)<BankSize(inputStringBank)
            If startPtr+Len(char)=BankSize(inputStringBank)-1
                char=""
            Else
                If PeekShort(inputStringBank,startPtr+Len(char))<>
$A0D Then char=""
            EndIf
        EndIf
    EndIf
    
    RegEx_ClearStacks(oNode)
    FreeBank nodeStack
    Return char
End Function

Function RegEx_MatchSimple$(node.RegEx_NFA,inputStringBank,startPtr)    ;Quicker test for simple patterns
    Local caseSen,i,c,inputString$
    
    If BankSize(inputStringBank)<=startPtr+Len(node\value) Then Return ""    ;If the remaining string is shorter than the pattern
    
    If (node\state And %01)    ;If it's a line start pattern only...
        If startPtr>1
            If PeekShort(inputStringBank,startPtr-2)<>
$A0D Then Return ""
        Else
            If startPtr=1 Then Return ""
        EndIf
    EndIf
    caseSen=Not(node\state And %100)
    
    For i=0 To BankSize(node\stack)-1
        c=PeekByte(inputStringBank,startPtr+i)
        If caseSen=0
            If (c>=65 And c<=90) Then c=c+32
        EndIf
        If c<>PeekByte(node\stack,i) Then Return "":Else inputString=inputString+Chr(c)
    Next
    
    If (node\state And %010)        ;If it's a line end pattern only
        i=BankSize(node\stack)
        If startPtr+i<BankSize(inputStringBank)
            If startPtr+i=BankSize(inputStringBank)-1
                Return ""
            Else
                If PeekShort(inputStringBank,startPtr+i)<>
$A0D Then Return ""
            EndIf
        EndIf
    EndIf
    
    Return inputString
End Function

Function RegEx_Parse.RegEx_NFA(regIn$,caseSensitive=1)        ;Convert a regular expression to prefix notation
    Local cPtr,opStack,valStack,tok$,lOp$,rOp$,opt$,inClass
    Local oNode.RegEx_NFA,lNode.RegEx_NFA
    Local lineStart,lineEnd
    
    If Left(regIn,1)="
^"    ;This pattern may only match the start of a line
        regIn=Mid(regIn,2)
        lineStart=True
    EndIf
    If Right(regIn,1)="
$"    ;This pattern may only match the end of a line
        If Right(regIn,2)<>"
\$"
            regIn=Left(regIn,Len(regIn)-1)
            lineEnd=True
        EndIf
    EndIf
    
    If RegEx_IsSimple(regIn)
        oNode=New RegEx_NFA
        oNode\nType=-1
        If lineStart Then oNode\state=1
        If lineEnd Then oNode\state=oNode\state+2
        If caseSensitive=0 Then oNode\state=oNode\state+4:regIn=Lower(regIn)
        oNode\stack=StrToBank(regIn)
        Return oNode
    EndIf
    
    While cPtr<Len(regIn)        ;Preprocess regIn to insert explicit concatenation operator Chr(8)
        cPtr=cPtr+1
        tok=tok+Mid(regIn,cPtr,1)
        If Right(tok,1)<>"
\" Or Right(tok,2)="\\"
            If cPtr=Len(regIn) Then Exit
            If Mid(regIn,cPtr,1)="
["
                If cPtr>1
                    If Mid(regIn,cPtr-1,1)<>"
\" Then inClass=inClass+1
                Else
                    inClass=inClass+1
                EndIf
            EndIf
            opt=Mid(regIn,cPtr+1,1)
            If inClass=False
                If opt<>"
*" And opt<>"+" And opt<>"?" And opt<>"|" And opt<>")"
                    If Mid(regIn,cPtr,1)<>"
(" And Mid(regIn,cPtr,1)<>"|"
                        tok=tok+Chr(8)
                    EndIf
                EndIf
            Else
                If opt="
]" And Mid(regIn,cPtr,1)<>"\" Then inClass=inClass-1
            EndIf
        EndIf
    Wend
    
    regIn=tok
    valStack=CreateBank()
    opStack=CreateBank()
    cPtr=1
    inClass=False
    
    While cPtr<=Len(regIn)
        tok=""        ;Get token
        If Mid(regIn,cPtr,1)="
["        ;Character class
            cPtr=cPtr+1
            inClass=inClass+1
            Repeat
                If Mid(regIn,cPtr,1)="
]"
                    inClass=inClass-1
                    If inClass Then tok=tok+"
]"
                    cPtr=cPtr+1
                    If inClass=0 Then Exit
                ElseIf Mid(regIn,cPtr,1)="
["
                    If inClass Then tok=tok+"
["
                    inClass=inClass+1
                    cPtr=cPtr+1
                ElseIf Mid(regIn,cPtr,1)="
\"
                    Select Mid(regIn,cPtr+1,1)
                        Case "
[","]"
                            tok=tok+"
\"+Mid(regIn,cPtr+1,1)
                        Case "
n"
                            tok=tok+Chr(13)
                        Case "
r"
                            tok=tok+Chr(10)
                        Case "
t"
                            tok=tok+Chr(9)
                        Default
                            tok=tok+Mid(regIn,cPtr+1,1)
                    End Select
                    cPtr=cPtr+2
                Else
                    tok=tok+Mid(regIn,cPtr,1)
                    cPtr=cPtr+1
                EndIf
            Forever
            tok=RegEx_GetCharacterClass(tok)
        ElseIf Mid(regIn,cPtr,1)="
\"    ;Escape character
            If Mid(regIn,cPtr+1,1)="
n"
                tok=Chr(13)
            ElseIf Mid(regIn,cPtr+1,1)="
r"
                tok=Chr(10)
            ElseIf Mid(regIn,cPtr+1,1)="
t"
                tok=Chr(9)
            Else
                tok=Mid(regIn,cPtr+1,1)
            EndIf
            cPtr=cPtr+2
        Else    ;Single character
            tok=Mid(regIn,cPtr,1)
            Select tok
                Case "
(":tok=Chr(1)
                Case "
)":tok=Chr(2)
                Case "
|":tok=Chr(3)
                Case "
*":tok=Chr(4)
                Case "
+":tok=Chr(5)
                Case "
?":tok=Chr(6)
                Case "
.":tok=RegEx_GetCharacterClass("...")
            End Select
            cPtr=cPtr+1
        EndIf
        
        If Asc(tok)<9        ;Token is operator
            If tok=Chr(1) Or RegEx_Precedence(tok)>RegEx_Precedence(RegEx_StackTop(opStack))
                RegEx_StackPush opStack,tok
            Else
                If tok=Chr(2)
                    While RegEx_StackTop(opStack)<>Chr(1)
                        opt=RegEx_StackPop(opStack)
                        rOp=RegEx_StackPop(valStack)
                        If RegEx_Precedence(opt)<3 Then lOp=RegEx_StackPop(valStack):Else lOp=rOp:rOp=""
                        oNode=New RegEx_NFA
                        oNode\value=opt
                        oNode\nType=Asc(opt)
                        oNode\lNode=Object.RegEx_NFA(lOp)
                        oNode\rNode=Object.RegEx_NFA(rOp)
                        RegEx_StackPush valStack,Handle(oNode)
                    Wend
                    RegEx_StackPop(opStack)
                ElseIf RegEx_Precedence(tok)<=RegEx_Precedence(RegEx_StackTop(opStack))
                    While RegEx_Precedence(tok)<=RegEx_Precedence(RegEx_StackTop(opStack))
                        opt=RegEx_StackPop(opStack)
                        rOp=RegEx_StackPop(valStack)
                        If RegEx_Precedence(opt)<3 Then lOp=RegEx_StackPop(valStack):Else lOp=rOp:rOp=""
                        oNode=New RegEx_NFA
                        oNode\value=opt
                        oNode\nType=Asc(opt)
                        oNode\lNode=Object.RegEx_NFA(lOp)
                        oNode\rNode=Object.RegEx_NFA(rOp)
                        RegEx_StackPush valStack,Handle(oNode)
                    Wend
                    RegEx_StackPush opStack,tok
                EndIf
            EndIf
        Else        ;Token is value
            lNode=New RegEx_NFA
            lNode\value=tok
            RegEx_StackPush valStack,Handle(lNode)        ;Push to valstack
        EndIf
    Wend
    
    While RegEx_StackTop(opStack)<>""
        opt=RegEx_StackPop(opStack)
        rOp=RegEx_StackPop(valStack)
        If RegEx_Precedence(opt)<3 Then lOp=RegEx_StackPop(valStack):Else lOp=rOp:rOp=""
        oNode=New RegEx_NFA
        oNode\value=opt
        oNode\nType=Asc(opt)
        oNode\lNode=Object.RegEx_NFA(lOp)
        oNode\rNode=Object.RegEx_NFA(rOp)
        RegEx_StackPush valStack,Handle(oNode)
    Wend
    
    oNode=Object.RegEx_NFA(RegEx_StackPop(valStack))
    FreeBank valStack
    FreeBank opStack
    
    RegEx_SetState(oNode,0)
    oNode=RegEx_RemoveConcatenations(oNode)
    lNode=New RegEx_NFA
    lNode\nType=12
    RegEx_Append(oNode,lNode)
    RegEx_SetState(oNode,0)
    RegEx_SetRoots oNode
    
    If lineStart Then oNode\state=1
    If lineEnd Then oNode\state=oNode\state+2
    If caseSensitive=0 Then oNode\state=oNode\state+4
    
    Return oNode
End Function

Function RegEx_IsSimple(regIn$)
    Local i,char
    For i=1 To Len(regIn)
        char=Asc(Mid(regIn,i,1))
        Select char
            Case 40,42,43,46,63,91,92,124:Return False
        End Select
    Next
    Return True
End Function

Function RegEx_RemoveConcatenations.RegEx_NFA(node.RegEx_NFA)
    Local lNode.RegEx_NFA
    While node\nType=8
        RegEx_Append(node\lNode,node\rNode)
        RegEx_SetState(node,0)
        lNode=node\lNode
        Delete node
        node=lNode
    Wend
    If node\lNode<>Null Then node\lNode=RegEx_RemoveConcatenations(node\lNode)
    If node\rNode<>Null Then node\rNode=RegEx_RemoveConcatenations(node\rNode)
    Return node
End Function

Function RegEx_Append(parent.RegEx_NFA,child.RegEx_NFA)
    If parent\state=0
        parent\state=1
        If parent\rNode=Null
            parent\rNode=child
        Else
            RegEx_Append(parent\rNode,child)
        EndIf
        If parent\nType=3 Or parent\nType=6 Then RegEx_Append(parent\lNode,child)
    EndIf
End Function

Function RegEx_SetRoots(node.RegEx_NFA)
    If node\nType=4 Or node\nType=5
        RegEx_SetRoot node\lNode,node
    EndIf
    
    If node\lNode<>Null Then RegEx_SetRoots node\lNode
    If node\rNode<>Null Then RegEx_SetRoots node\rNode
End Function

Function RegEx_SetRoot(node.RegEx_NFA,root.RegEx_NFA)
    If node\rNode=Null
        node\root=root
    Else
        RegEx_SetRoot node\rNode,root
        If node\nType=3 Or node\nType=6 Then RegEx_SetRoot node\lNode,root
    EndIf
End Function

Function RegEx_SetState(node.RegEx_NFA,state)
    node\state=state
    If node\lNode<>Null Then RegEx_SetState(node\lNode,state)
    If node\rNode<>Null Then RegEx_SetState(node\rNode,state)
End Function

Function RegEx_Delete(node.RegEx_NFA)    ;Clear an NFA tree when done
    node\state=1
    If node\lNode<>Null
        If node\lNode\state Then Delete node\lNode:Else RegEx_Delete node\lNode
    EndIf
    If node\rNode<>Null
        If node\rNode\state Then Delete node\rNode:Else RegEx_Delete node\rNode
    EndIf
    If node\nType<0 Then FreeBank node\stack
    Delete node
End Function

Function RegEx_Precedence(tok$)    ;Return an operator's regex_precedence
    Select Asc(tok)
        Case 4,5,6
            Return 3
        Case 8
            Return 2
        Case 3
            Return 1
    End Select
End Function

Function RegEx_GetCharacterClass$(cClass$)    ;Returns a full character list from a character class
    Local invert,cList$,char$,cPtr,i
    
    If cClass="
..."        ;Not an expected input class - this is shorthand for "any character"
        For i=9 To 127        ;...any character above 8. 1-8 are used as operators
            cList=cList+Chr(i)
        Next
        Return cList
    EndIf
    
    If Left(cClass,1)="
:" And Right(cClass,1)=":"
        cClass=Lower(Mid(cClass,2,Len(cClass)-2))
        If Left(cClass,1)="
^"
            invert=True
            cClass=Mid(cClass,2)
        EndIf
        Select cClass
            Case "
alnum":cClass="A-Za-z0-9"
            Case "
word":cClass="A-Za-z0-9_"
            Case "
alpha":cClass="A-Za-z"
            Case "
blank":cClass=" "+Chr(9)
            Case "
cntrl":cClass=Chr(0)+"-"+Chr($1F)+Chr($7F)
            Case "
digit":cClass="0-9"
            Case "
graph":cClass=Chr($21)+"-"+Chr($7E)
            Case "
lower":cClass="a-z"
            Case "
print":cClass=Chr($20)+"-"+Chr($7E)
            Case "
punct":cClass="-!"+Chr(34)+"#$%&'()*+,./:;<=>?@\[\\\]_`{|}~"
            
Case "space":cClass=" "+Chr(13)+Chr(12)+Chr(11)+Chr(10)+Chr(9)
            Case 
"upper":cClass="A-Z"
            
Case "xdigit":cClass="A-Fa-f0-9"
            
Default:cClass=""
        
End Select
        
If invert Then cClass="^"+cClass
    
Else
        If 
Left(cClass,1)="^"
            
invert=True
            cClass
=Mid(cClass,2)
        EndIf
    EndIf
    
    For 
cPtr=1 To Len(cClass)
        If 
Mid(cClass,cPtr+1,1)="-" And cPtr<(Len(cClass)-1)
            For 
i=Asc(Mid(cClass,cPtr,1)) To Asc(Mid(cClass,cPtr+2,1))
                
cList=cList+Chr(i)
            
Next
            cPtr
=cPtr+2
        
Else
            
char=Mid(cClass,cPtr,1)
            If 
char="["
                
char=""
                
cPtr=cPtr+1
                
While Mid(cClass,cPtr,1)="]"
                    
If Mid(cClass,cPtr,1)="\"
                        If Mid(cClass,cPtr+1,1)="
]" Then cPtr=cPtr+1
                    EndIf
                    char=char+Mid(cClass,cPtr,1)
                    cPtr=cPtr+1
                Wend
                char=RegEx_GetCharacterClass(char)
            ElseIf char="
\"
                If Mid(cClass,cPtr+1,1)="
["
                    char="
["
                    cPtr=cPtr+1
                EndIf
            EndIf
            cList=cList+char
        EndIf
    Next
    
    If invert
        cClass=cList
        cList=""
        For i=9 To 127
            If Not Instr(cClass,Chr(i)) Then cList=cList+Chr(i)
        Next
    EndIf
    
    Return cList
End Function

Function RegEx_StackPush(stack,value$)
    ResizeBank stack,BankSize(stack)+4
    PokeInt stack,BankSize(stack)-4,StrToBank(value)
End Function

Function RegEx_StackPop$(stack)
    Local value$,bank
    If BankSize(stack)
        bank=PeekInt(stack,BankSize(stack)-4)
        ResizeBank stack,BankSize(stack)-4
        value=BankToStr(bank)
        FreeBank bank
        Return value
    EndIf
End Function

Function RegEx_StackTop$(stack)
    If BankSize(stack)>=4
        Return BankToStr(PeekInt(stack,BankSize(stack)-4))
    EndIf
End Function

Function RegEx_ClearStacks(node.RegEx_NFA)
    If node\stack Then FreeBank node\stack:node\stack=0
    If node\lNode<>Null Then RegEx_ClearStacks node\lNode
    If node\rNode<>Null Then RegEx_ClearStacks node\rNode
End Function


;===============================================================================



;===============================================================================
;General functions
;===============================================================================


Function StrToBank(s$)        ;Return a bank containing the binary value of the given string
    Local i,bank
    bank=CreateBank(Len(s))
    For i=0 To Len(s)-1
        PokeByte bank,i,Asc(Mid(s,i+1,1))
    Next
    Return bank
End Function

Function BankToStr$(bank)        ;Return a string containing the ASCII value of the given bank
    Local i,s$
    For i=0 To BankSize(bank)-1
        s=s+Chr(PeekByte(bank,i))
    Next
    Return s
End Function

Function LoadFileBank(filename$)    ;Load a file straight into a bank
    Local bank,file
    file=ReadFile(filename)
    bank=CreateBank(FileSize(filename))
    ReadBytes bank,file,0,BankSize(bank)
    CloseFile file
    Return bank
End Function


;=============================================================================== 
__________________
Retry, Abort, Ignore? █
Intel Core i7-9700 4.70 Ghz; 64Gb; Nvidia RTX 3070
AMD Ryzen 7 3800X 4.3Ghz; 64Gb; Nvidia 1070Ti
AMD Ryzen 7 1700X 3.4Ghz; 8Gb; AMD RX 570
AMD Athlon II 2.6Ghz; 8Gb; Nvidia GTX 750 Ti
(Offline)
 
Ответить с цитированием
Эти 4 пользователя(ей) сказали Спасибо Randomize за это полезное сообщение:
impersonalis (18.11.2010), Reizel (31.12.2010), Spy4433 (19.11.2010), Tadeus (18.11.2010)