空腹おやじのログと備忘録

VBA(主にExcel)でいろいろな実験的な事とか、Linuxのコマンドとか設定とかについて忘れないように、あれこれと・・・

VBAでFindFirstFile、FindNextFileを使ってファイルリストを取得する

ことりちゅんさん(id:Kotori-ChunChun)のところで、FileSystemObjectとDirを使って、ファイルパスの一覧を取得する速度の比較をしている記事を見つけました。

kotori-chunchun.hatenablog.com
www.excel-chunchun.com

個人的には、FindFirstFileFindNextFileという選択肢もあったので、比べてみることにしました。

データ

62,200個の0Byteファイルを作成しました。(ファイル名の取得だけだから、中身はいらないでしょ。うちの娘(使用年数から言えば、もう婆さんか?)ssdの空きも少ないし・・・)
f:id:Z1000S:20190409225335j:plain
f:id:Z1000S:20190409224813j:plain

データ生成コード

Private Declare Function MakeSureDirectoryPathExists Lib "imagehlp.dll" _
       (ByVal lpPath As String) As Long

Public Const TARGET_FOLDER_ROOT As String = "C:\Datas\FileListTest\Data\"


Public Sub createData()

    Dim sSubFolder  As String
    Dim sFolderPath As String

    Dim iFileNo As Integer
    Dim i   As Long
    Dim j   As Long
    Dim k   As Long
    Dim l   As Long

    For i = 1 To 10
        For j = 1 To 5
            For k = 1 To 5
                sSubFolder = "A00\B0000\C0000\"

                Mid$(sSubFolder, 2, 2) = Format$(i, "00")
                Mid$(sSubFolder, 6, 4) = Format$(j + 1000, "0000")
                Mid$(sSubFolder, 12, 4) = Format$(k + 2000, "0000")

                sFolderPath = TARGET_FOLDER_ROOT & sSubFolder

                Call createFolder(sFolderPath)

                For l = 1 To 200
                    iFileNo = FreeFile

                    Open sFolderPath & Format$(l, "000") & ".txt" For Output As iFileNo

                    Close iFileNo
                Next l
            Next k

            sSubFolder = "A00\B0000\"

            Mid$(sSubFolder, 2, 2) = Format$(i, "00")
            Mid$(sSubFolder, 6, 4) = Format$(j + 1000, "0000")

            sFolderPath = TARGET_FOLDER_ROOT & sSubFolder

            For l = 1 To 200
                iFileNo = FreeFile

                Open sFolderPath & "C" & Format$(l, "000") & ".txt" For Output As iFileNo

                Close iFileNo
            Next l
        Next j

        sSubFolder = "A00\"

        Mid$(sSubFolder, 2, 2) = Format$(i, "00")

        sFolderPath = TARGET_FOLDER_ROOT & sSubFolder

        For l = 1 To 200
            iFileNo = FreeFile

            Open sFolderPath & "B" & Format$(l, "000") & ".txt" For Output As iFileNo

            Close iFileNo
        Next l
    Next i

    sFolderPath = TARGET_FOLDER_ROOT

    For l = 1 To 200
        iFileNo = FreeFile

        Open sFolderPath & "A" & Format$(l, "000") & ".txt" For Output As iFileNo

        Close iFileNo
    Next l

    Debug.Print "Done."

End Sub

Public Function createFolder(ByVal sFolderPath As String) As Boolean

    Dim sPath   As String
    Dim lResult As Long

    If Right$(sFolderPath, 1) <> "\" Then
        sPath = sFolderPath & "\"
    Else
        sPath = sFolderPath
    End If

    lResult = MakeSureDirectoryPathExists(sPath)

    createFolder = CBool(lResult <> 0)

End Function
テスト用コード(FindFirstFile版)
Private Const MAX_PATH      As Long = 260&

Private Type FILETIME
    dwLowDateTime   As Long
    dwHighDateTime  As Long
End Type

Private Type WIN32_FIND_DATA
    dwFileAttributes        As Long
    ftCreationTime          As FILETIME
    ftLastAccessTime        As FILETIME
    ftLastWriteTime         As FILETIME
    nFileSizeHigh           As Long
    nFileSizeLow            As Long
    dwReserved0             As Long
    dwReserved1             As Long
    cFileName               As String * MAX_PATH
    cAlternateFileName      As String * 14
End Type

Private Declare Function FindFirstFile Lib "kernel32" Alias "FindFirstFileA" _
       (ByVal Filename As String, _
        ByRef FindData As WIN32_FIND_DATA) As Long

Private Declare Function FindNextFile Lib "kernel32" Alias "FindNextFileA" _
       (ByVal hFind As Long, _
        ByRef FindData As WIN32_FIND_DATA) As Long

Private Declare Function FindClose Lib "kernel32" _
       (ByVal hFind As Long) As Long


Private Const INVALID_HANDLE_VALUE  As Long = -1

'Private Const FILE_ATTRIBUTE_ARCHIVE    As Long = &H20
Private Const FILE_ATTRIBUTE_DIRECTORY  As Long = &H10
'Private Const FILE_ATTRIBUTE_HIDDEN     As Long = &H2
'Private Const FILE_ATTRIBUTE_NORMAL     As Long = &H80
'Private Const FILE_ATTRIBUTE_READONLY   As Long = &H1
'Private Const FILE_ATTRIBUTE_SYSTEM     As Long = &H4
'Private Const FILE_ATTRIBUTE_TEMPORARY  As Long = &H100


Public Sub fffTest()

    Dim colPaths    As Collection
    Dim sgStart     As Single
    Dim sgStop      As Single

    sgStart = Timer

    Set colPaths = New Collection

    Call getFilePaths("C:\Datas\FileListTest\Data", colPaths)

    sgStop = Timer

    Debug.Print Format$(sgStop - sgStart, "0.00") & " sec."

    If colPaths.Count > 0 Then
        Debug.Print colPaths.Count
    Else
        Debug.Print "No files found."
    End If

End Sub

Public Function getFilePaths(ByVal sTargetFolder As String, ByRef colFilePaths As Collection, Optional ByVal sTargetPattern As String = "*") As Long

    Dim sTargetFolderY  As String
    Dim sTargetPathName As String
    Dim hFind       As Long
    Dim fd          As WIN32_FIND_DATA
    Dim sFileName   As String
    Dim sPath       As String

    If Right$(sTargetFolder, 1) <> "\" Then
        '
        sTargetFolderY = sTargetFolder & "\"
    Else
        sTargetFolderY = sTargetFolder
    End If

    sTargetPathName = sTargetFolderY & sTargetPattern

    hFind = FindFirstFile(sTargetPathName, fd)

    If hFind = INVALID_HANDLE_VALUE Then
        getFilePaths = INVALID_HANDLE_VALUE

        Exit Function
    End If

    Do
        'fd.cFileNameは固定長文字列で、本来のファイル名以降の部分にはvbNullCharが埋められているので
        'それらを削除する必要がある
        sFileName = deleteNullChar(fd.cFileName)

        If sFileName = "." Then
            'カレントフォルダは処理しない
            GoTo LOOP_CONTINUE
        ElseIf sFileName = ".." Then
            '親フォルダは処理しない
            GoTo LOOP_CONTINUE
        End If

        If (fd.dwFileAttributes And FILE_ATTRIBUTE_DIRECTORY) = FILE_ATTRIBUTE_DIRECTORY Then
            'フォルダなら再帰処理させるためにパスを生成
            sPath = sTargetFolderY & sFileName
    
            '再帰呼び出し
            Call getFilePaths(sPath, colFilePaths, sTargetPattern)
        Else
            'ファイルならコレクションに追加
            colFilePaths.Add sTargetFolderY & sFileName
        End If

LOOP_CONTINUE:
        '次のファイルが見つからなくなるまでループ
    Loop Until FindNextFile(hFind, fd) = 0

    Call FindClose(hFind)

    getFilePaths = 0

End Function

Private Function deleteNullChar(ByVal sSource As String) As String

    Dim lPos    As Long

    lPos = InStr(sSource, vbNullChar)

    If lPos > 0 Then
        deleteNullChar = Left$(sSource, lPos - 1)
    Else
        deleteNullChar = sSource
    End If

End Function
テストコード(Dir版)

ことりちゅんさん(id:Kotori-ChunChun)のところの処理を流用。

但し、test_GetFileListTmpfileの時間計測部分と結果表示部分のみアレンジ。

  • Time→Timer
  • 結果表示フォーマット 0.00 sec.
  • ファイル数表示追加
Const WshHide = 0               '非表示
Const WshNormalFocus = 1        '通常サイズ

Function TrimEx(TargetString As String, Optional TrimLeft As Boolean = True, Optional TrimRight As Boolean = True) As String
    Dim reg_pattern As String

    If TrimLeft And TrimRight Then
        reg_pattern = "(?:^\s+|\s+$)"
    ElseIf TrimLeft Then
        reg_pattern = "^\s+"
    ElseIf TrimRight Then
        reg_pattern = "\s+$"
    Else
        TrimEx = TargetString
        Exit Function
    End If

    With CreateObject("VBScript.RegExp")
        .Pattern = reg_pattern
        .IgnoreCase = False
        .Global = True
        TrimEx = .Replace(TargetString, "")
    End With
End Function

Function GetFileListTmpfile(FolderLocation As String, Optional ShowCmdWindow As Boolean = True) As String()
    GetFileListTmpfile = Split(vbNullString)

    Dim tmpfile As String
    Dim filelist() As String

    With CreateObject("Scripting.FileSystemObject")
        If Not .FolderExists(FolderLocation) Then
            Debug.Print "* Error * Folder(" & FolderLocation & ") not found"
            Exit Function
        End If
        Do
            tmpfile = .GetSpecialFolder(2) & "\" & .GetTempName
        Loop While .FileExists(tmpfile)
    End With

    CreateObject("Wscript.Shell").Run "cmd /U /C dir /S /B /A-D """ & FolderLocation & """ > " & tmpfile, _
                                        IIf(ShowCmdWindow, WshNormalFocus, WshHide), True

    'コマンドプロンプト表示がONで強制終了(中断)された時の対策
    With CreateObject("Scripting.FileSystemObject")
        If Not .FileExists(tmpfile) Then
            Exit Function
        End If
    End With

    With CreateObject("ADODB.Stream")
        .Charset = "Unicode"
        .Open
        .LoadFromFile tmpfile
        filelist = Split(TrimEx(.ReadText), vbCrLf)
        .Close
    End With

    Kill tmpfile

    GetFileListTmpfile = filelist
End Function

Sub test_GetFileListTmpfile()
    Dim t As Single
    t = Timer
    Dim V As Variant
    'エクセルが応答不能になる。使用する場合は超注意
    'Application.Interactive = False
    V = GetFileListTmpfile("C:\Datas\FileListTest\Data", True)
    'Application.Interactive = True
    Debug.Print Format$(Timer - t, "0.00") & "sec."

    Debug.Print UBound(V) - LBound(V) + 1
End Sub
実行結果
call fffTest
0.31 sec.
 62200 

call test_GetFileListTmpfile
7.27sec.
 62200 

やっぱりAPI速いっすねぇ。1桁違います。
  件数は同じですけど、検索順は違いますよ。詳細は省きますけど・・・

ちなみに、Visual Studio 2017 C++で同様の処理を実行してみたら・・・
f:id:Z1000S:20190410095241j:plain
さらにその半分以下になりました。
C++のソースは省略。VBAのソースのCollectionがVectorになったくらいだから、誰も見ないでしょ?

おまけ

FindFirstFile、FindNextFileのファイルの検索順は、ファイルシステムに依存があります。

とりあえず、こんなコードで

Public Sub fffTest2()

    Dim colPaths    As Collection
    Dim v           As Variant

    Dim sFolderPath     As String
    Dim sFileName(4)    As String
    Dim iFileNo As Integer
    Dim i       As Long

    sFolderPath = "D:\FFF_Test\"

    Call createFolder(sFolderPath)

    On Error Resume Next

    Kill sFolderPath & "*"

    On Error GoTo 0

    sFileName(0) = "003.txt"
    sFileName(1) = "005.txt"
    sFileName(2) = "004.txt"
    sFileName(3) = "002.txt"
    sFileName(4) = "001.txt"

    For i = 0 To 4
        iFileNo = FreeFile

        Open sFolderPath & sFileName(i) For Output As iFileNo

        Close iFileNo

        Call Sleep(3000)
    Next i

    Set colPaths = New Collection

    Call getFilePaths("D:\FFF_Test", colPaths)

    If colPaths.Count > 0 Then
        Debug.Print "FileSystem:" & getFileSystemName("D:\")

        For Each v In colPaths
            Debug.Print v
        Next v

        Debug.Print "Done."
    Else
        Debug.Print "No files found."
    End If

End Sub

実行結果は以下の通り。
Dドライブは、USBメモリで、ファイルシステムの違いは、USBメモリの差し替えで実現しています。
(これのためだけに、1個のUSBメモリNTFSでフォーマットしました。)

call fffTest2
FileSystem:exFAT
D:\FFF_Test\003.txt
D:\FFF_Test\005.txt
D:\FFF_Test\004.txt
D:\FFF_Test\002.txt
D:\FFF_Test\001.txt
Done.

call fffTest2
FileSystem:NTFS
D:\FFF_Test\001.txt
D:\FFF_Test\002.txt
D:\FFF_Test\003.txt
D:\FFF_Test\004.txt
D:\FFF_Test\005.txt
Done.

call fffTest2
FileSystem:FAT32
D:\FFF_Test\003.txt
D:\FFF_Test\005.txt
D:\FFF_Test\004.txt
D:\FFF_Test\002.txt
D:\FFF_Test\001.txt
Done.

NTFSファイル名順
exFatFAT32は、ファイル作成順
となってます。
厳密には、CollectionをFor Eachで出力している時点で駄目なんだけど、簡易的な確認なので、まぁ良しとしてしまえ。

ここにも

The order in which this function returns the file names is dependent on the file system type. With the NTFS file system and CDFS file systems, the names are usually returned in alphabetical order. With FAT file systems, the names are usually returned in the order the files were written to the disk, which may or may not be in alphabetical order. However, as stated previously, these behaviors are not guaranteed.

と書いてありました。


さらに、Dir版でも試してみたところ、FindFirstFile版と同じ結果になりました。
これは、ちょっとした発見でした。





あぁ、なんか話が全然別の方に行ってる・・・