空腹おやじのログと備忘録

VBA(主にExcel)でいろいろな実験的な事とか、Linuxのコマンドとか設定とかについて忘れないように、あれこれと・・・

【VBA】Twitter お題 「280バイトを超えない範囲で区切りのよいところで分割したい」を解いてみた

Twitter のお題、
280バイトを超えない範囲で区切りのよいところで分割したい
を解いてみました。

お題は、こちら

処理の概要

  1. 先頭から、280バイト切り出し(末尾の2バイト文字がバラバラにならないように)
  2. 後ろから、区切り文字("。" or LF)を検索
  3. その位置まで、配列に格納
  4. 先頭位置を、移動して、280バイト未満になるまで繰り返し
  5. 280バイト未満の文字列が残っていれば、それも追加

ある程度の汎用性を持たせているので
正直なところ、効率は良くない方法ですw

専用の関数にして、効率よくする方法はいくつかあるけのだけれど、あえてやってません。

Option Explicit

Public Sub hoge()

    Const MAX_BYTES As Long = 280&

    Dim sResults()  As String
    Dim sSrc        As String
    Dim lElems      As Long
    Dim i           As Long

    sSrc = Sheet1.Range("A1").Value

    lElems = splitCustom(sSrc, MAX_BYTES, sResults)

    For i = 0 To lElems - 1
        Sheet1.Range("A2").Offset(i).Value = sResults(i)
    Next i

    Debug.Print "Done."

End Sub

Public Function splitCustom(ByVal sSrc As String, _
                            ByVal lMaxBytes As Long, _
                            ByRef sResultArray() As String) As Long

    Dim sTarget As String
    Dim sElem   As String
    Dim lDelimPos   As Long
    Dim lIndex      As Long
    Dim lOffset     As Long

    lIndex = 0
    ReDim sResultArray(lIndex)

    sTarget = sSrc
    Do Until LenByte(sTarget) < lMaxBytes
        sElem = LeftByte(sTarget, lMaxBytes)
        lDelimPos = getLastTargetPos(sElem)
        ReDim Preserve sResultArray(lIndex)

        If lDelimPos > 0 Then
            sResultArray(lIndex) = Left$(sElem, lDelimPos)
            lOffset = lDelimPos + 1
        Else
            sResultArray(lIndex) = sElem
            lOffset = Len(sElem) + 1
        End If

        lIndex = lIndex + 1
        sTarget = Mid$(sTarget, lOffset)
    Loop

    If Len(sTarget) > 0 Then
        ReDim Preserve sResultArray(lIndex)
        sResultArray(lIndex) = sTarget
        lIndex = lIndex + 1
    End If

    splitCustom = lIndex

End Function

Private Function getLastTargetPos(ByVal sSrc As String) As Long

    Dim sDelimiters(1)  As String
    Dim lDelimPos       As Long
    Dim lResult         As Long
    Dim i               As Long

    sDelimiters(0) = "。"
    sDelimiters(1) = vbLf

    lResult = -1

    For i = 0 To UBound(sDelimiters)
        lDelimPos = InStrRev(sSrc, sDelimiters(i))

        If lDelimPos > 0 Then
            If lDelimPos > lResult Then
                lResult = lDelimPos
            End If
        End If
    Next i

    getLastTargetPos = lResult

End Function

Private Function LenByte(ByVal sSrc As String) As Long

    Dim sConverted  As String

    sConverted = StrConv(sSrc, vbFromUnicode)

    LenByte = LenB(sConverted)

End Function

Private Function LeftByte(ByVal sSrc As String, ByVal lBytes As Long) As String

    Dim sConverted  As String
    Dim sResult     As String

    sConverted = StrConv(sSrc, vbFromUnicode)

    If LenB(sConverted) <= lBytes Then
        LeftByte = sSrc

        Exit Function
    End If

    sResult = StrConv(LeftB(sConverted, lBytes + 1), vbUnicode)

    LeftByte = Left(sResult, Len(sResult) - 1)

End Function

実行結果
f:id:Z1000S:20200614185128p:plain

2020/6/15 追記
正規表現を使用した処理も書いてみました。

Public Function splitCusom2(ByVal sSrc As String, _
                            ByVal lMaxBytes As Long, _
                            ByRef sResultArray() As String) As Long

    Dim re      As RegExp
    Dim mc      As MatchCollection
    Dim sTarget As String
    Dim sElem   As String
    Dim lIndex      As Long
    Dim lOffset     As Long

    Set re = New RegExp

    With re
        .Global = True
        .MultiLine = True
        '"."は、"。"にはマッチするが、改行にマッチしないので、途中に改行が含まれていてもマッチさせる
        .Pattern = "^(.|\n)+(。|\n)"
    End With

    lIndex = 0
    ReDim sResultArray(lIndex)

    sTarget = sSrc
    Do Until LenByte(sTarget) < lMaxBytes
        ReDim Preserve sResultArray(lIndex)

        sElem = LeftByte(sTarget, lMaxBytes)

        Set mc = re.Execute(sElem)
        If mc.Count > 0 Then
            sResultArray(lIndex) = mc.Item(0)
            lOffset = Len(sResultArray(lIndex)) + 1
        Else
            sResultArray(lIndex) = sElem
            lOffset = Len(sElem) + 1
        End If

        lIndex = lIndex + 1
        sTarget = Mid$(sTarget, lOffset)
    Loop

    If Len(sTarget) > 0 Then
        ReDim Preserve sResultArray(lIndex)
        sResultArray(lIndex) = sTarget
        lIndex = lIndex + 1
    End If

    splitCusom2 = lIndex

End Function