Skip to content

Commit

Permalink
Merge pull request #9 from WillySuspension/master
Browse files Browse the repository at this point in the history
BETTER
  • Loading branch information
LAMMJohnson committed Jan 9, 2012
2 parents 4daeeb2 + 10f9f4b commit 726cbf6
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 46 deletions.
68 changes: 24 additions & 44 deletions Haskell/Dupechecker.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,78 +5,58 @@
--
-- This file has been placed in the public domain.

import Control.Monad (foldM)
import Control.Monad (foldM)
import qualified Data.Hash.MD5 as MD5
import Data.List (nub)
import Data.List (intercalate, nub)
import qualified Data.Map as Map
import Data.Maybe (fromMaybe)
import Data.String.Utils (join)
import System (getArgs)
import System.Directory (getDirectoryContents)
import System.FilePath (combine)
import System.IO (withBinaryFile, hGetContents, IOMode(ReadMode))
import Data.Maybe (fromMaybe)
import System.Environment (getArgs)
import System.Directory (getDirectoryContents)
import System.FilePath (combine)
import System.IO (withBinaryFile, hGetContents, IOMode(ReadMode))
import System.IO.PlafCompat (getFileStatus, isDirectory)
import Text.Printf (printf)

type MD5HashMap = Map.Map Integer [FilePath]

-- hash a single file and return the MD5 sum
hashFile :: String -> IO Integer
hashFile fileName = do
hashFile fileName =
withBinaryFile fileName ReadMode $ \handle -> do
contents <- hGetContents handle
return $! MD5.md5i $ MD5.Str contents

-- hash a directory and put them into an MD5 hash map
hashDirectory :: MD5HashMap -> FilePath -> IO MD5HashMap
hashDirectory hashes dirName = do
let banner = printf "Scanning: %s" dirName
putStrLn banner
putStrLn $ [ '=' | _ <- [1..length banner] ]

contents <- getDirectoryContents dirName
statuses <- mapM getFileStatus contents

let files = map fst $ filter (not . isDirectory . snd) (zip contents statuses)

-- now hash the files and throw them into the map
foldM hashAndPut hashes files

where
hashAndPut hashes fileName = do
hash <- hashFile fileName
let fn = combine dirName fileName
existing <- maybe (return []) (notify hash fn) $ Map.lookup hash hashes
return $ Map.insert hash (fn:existing) hashes

notify hash fileName existing = do
putStrLn $ printf "%32x: %s, %s" hash fileName (head existing)
return existing
where
hashAndPut hashes fileName = do
hash <- hashFile fileName
let fn = combine dirName fileName
existing <- maybe (return []) (notify hash fn) $ Map.lookup hash hashes
return $ Map.insert hash (fn:existing) hashes
notify hash fileName = return

-- hash multiple directories
hashDirectories :: MD5HashMap -> [FilePath] -> IO MD5HashMap
hashDirectories hashes dirs =
foldM hashDirectory hashes dirs
hashDirectories = foldM hashDirectory

-- generate a summary of duplicates
generateSummary :: MD5HashMap -> IO ()
generateSummary hashes = do
putStrLn ""
putStrLn "Summary"
putStrLn "======="
mapM_ summarize $ Map.toList hashes

where
summarize (_, [x]) = return ()
summarize (hash, fileNames) = do
putStrLn $ printf "%32x: %s" hash (join ", " fileNames)
generateSummary hashes = mapM_ summarize (Map.toList hashes)
where
summarize (_, [x]) = return ()
summarize (hash, fileNames) =
putStrLn $ intercalate ", " fileNames ++ " have the same hash."

main :: IO ()
main = do
argv <- getArgs
let dirs = nub argv

if dirs == [] then
putStrLn "ERROR: At least one directory should be specified."
else
hashDirectories Map.empty dirs >>= generateSummary
if dirs == []
then fail "At least one directory should be specified."
else hashDirectories Map.empty dirs >>= generateSummary
3 changes: 1 addition & 2 deletions Haskell/Scraper.hs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import Network.Curl.Easy
import Text.Regex.TDFA
import System.Environment (getArgs)
import System.Console.CmdArgs
import System.IO
import Download

getImage :: String -> String -> IO ()
Expand Down Expand Up @@ -64,4 +63,4 @@ main = withCurlDo $ do
unless (quiet args) $ putStrLn $ "Downloaded " ++ show a ++
if output args == "./"
then "."
else "to" ++ show (output args) ++ "."
else "to" ++ show (output args) ++ "."

0 comments on commit 726cbf6

Please sign in to comment.