Skip to content

Commit

Permalink
Fix URL handling, download path generation, and error handling
Browse files Browse the repository at this point in the history
  • Loading branch information
OptoCloud committed Mar 13, 2022
1 parent fcc0159 commit 76503b7
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 20 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,8 @@
vendor/

# Go workspace file
go.work
go.work

# h5ai-dl generated files
urls.txt
downloads/
15 changes: 15 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Launch",
"type": "go",
"request": "launch",
"mode": "auto",
"program": "${fileDirname}"
}
]
}
61 changes: 42 additions & 19 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ const (
FolderParentEntry = 2
)

var baseUrl string
var hostUrl string

var threads int64 = 0

Expand All @@ -35,6 +35,24 @@ var urlFileMtx sync.Mutex

var wg sync.WaitGroup

func getDownloadPath(fileUrl string) (string, error) {
u, err := url.Parse(fileUrl)
if err != nil {
return "", err
}

parts := []string{"downloads"}

for _, str := range strings.Split(u.Path, "/") {
str = strings.TrimSpace(str)
if len(str) > 0 {
parts = append(parts, str)
}
}

return strings.Join(parts, "/"), nil
}

func GetFileSize(name string) (int64, error) {
stat, err := os.Stat(name)
if err == nil {
Expand Down Expand Up @@ -206,11 +224,7 @@ func ParseEntry(node *html.Node) {
return
}

if entryPath[0] == '/' {
entryPath = entryPath[1:]
}

entryUrl := baseUrl + entryPath
entryUrl := hostUrl + entryPath
if entryType == FolderEntry {
crawlDirectoryAsync(entryUrl)
} else {
Expand All @@ -225,13 +239,11 @@ func writeUrl(fileUrl string) {
urlFile.WriteString(fileUrl + "\n")
}
func downloadUrl(fileUrl string, downloadSize int64) {
fileName, err := url.QueryUnescape(fileUrl[25:])
fileName, err := getDownloadPath(fileUrl)
if err != nil {
return
}

fileName = "downloads/" + fileName

folder := filepath.Dir(fileName)
if os.MkdirAll(folder, os.ModePerm) != nil {
return
Expand Down Expand Up @@ -268,12 +280,14 @@ func downloadUrl(fileUrl string, downloadSize int64) {

for {
n, err := resp.Body.Read(buffer)
if err == io.EOF {
return
} else if err != nil {
fmt.Printf("%s: %s\n", fileName, err.Error())
os.Remove(fileName)
return
if err != nil {
if err == io.EOF {
return
} else {
fmt.Printf("%s: %s\n", fileName, err.Error())
os.Remove(fileName)
return
}
}

if n != 4096 {
Expand Down Expand Up @@ -378,11 +392,20 @@ func main() {
return
}

baseUrl = os.Args[1]
if baseUrl[len(baseUrl)-1] != '/' {
baseUrl = baseUrl + "/"
requestUrl, err := url.Parse(os.Args[1])
if err != nil {
printUsage()
println("Error: 1st argument: " + err.Error())
return
}
if requestUrl.Scheme != "http" && requestUrl.Scheme != "https" {
printUsage()
println("Error: 1st argument is not a http or https url!")
return
}

hostUrl = requestUrl.Scheme + "://" + requestUrl.Host

writeUrlOnly, err = strconv.ParseBool(os.Args[2])
if err != nil {
printUsage()
Expand All @@ -397,7 +420,7 @@ func main() {
}
defer urlFile.Close()

crawlDirectory(baseUrl)
crawlDirectory(requestUrl.String())
time.Sleep(time.Second)
wg.Wait()

Expand Down

0 comments on commit 76503b7

Please sign in to comment.