Skip to content

Commit af7f636

Browse files
file code
1 parent 5028786 commit af7f636

File tree

1 file changed

+80
-0
lines changed

1 file changed

+80
-0
lines changed

blog post html convert code.ps1

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
<#
2+
3+
$Url = 'https://i1.wp.com/sqldbawithabeard.com/wp-content/uploads/2020/08/image-16.png'
4+
$fileName = $Url.Split('/')[-1]
5+
$filepathpart = $Url.Split('/')[-3..(-1)] -join '\'
6+
$OutputFile = 'C:\Users\mrrob\OneDrive\Documents\GitHub\robsewell\assets\uploads\' + $filepathpart
7+
8+
Invoke-WebRequest -Uri $Url -OutFile $OutputFile
9+
10+
#>
11+
12+
$posts = Get-ChildItem -Directory 'C:\Users\mrrob\Downloads\static-html-output\2020' -Recurse | Where Name -NotMatch '\d\d' |Where{$_.Parent.Name -ne 'page'} | Where Name -notin ('page','amp','feed')
13+
14+
# $post = Get-Item C:\Users\mrrob\Downloads\static-html-output\2013\05\07\12-things-i-learnt-at-sqlbits-xi
15+
# foreach ($post in $posts[6..($posts.Count)]){
16+
foreach ($post in $posts){
17+
18+
# Set the post date
19+
$Datepath = $post.PSParentPath.Split('\')[-3..-1] -join '-'
20+
21+
#set the destination file path
22+
$fileName = 'C:\Users\mrrob\OneDrive\Documents\GitHub\robsewell\_posts\' + $Datepath + '-' + $post.Name + '.md'
23+
24+
# get the old post html
25+
$oldpostpath = $post.Fullname + '\index.html'
26+
27+
# get the old post HTML to variable
28+
$oldpost = New-Object -Com "HTMLFile"
29+
$htmlrawcontent = Get-Content -Path $oldpostpath -Raw
30+
$src = [System.Text.Encoding]::Unicode.GetBytes($htmlrawcontent)
31+
$oldpost.write($src)
32+
33+
# grab the entry content from the html
34+
$entrycontent = $oldpost.getElementsByClassName('entry-content')
35+
$content = ($entrycontent | Select innerHTML).innerHTML
36+
37+
# get just the post and not the fluff
38+
$notfluff = ($content -split '(.*)\<DIV class="sharedaddy sd-sharing-enabled"\>')[0] -replace 'https://blog.robsewell.com/tags/#','https://blog.robsewell.com/tags/' -replace '<SPAN class=crayon-title></SPAN>','' -replace '<DIV class=crayon-tools style="FONT-SIZE: 12px !important; HEIGHT: 18px !important; LINE-HEIGHT: 18px !important">','' -replace '<DIV class=crayon-button-icon></DIV></DIV>' ,'' -replace '<DIV title="Toggle Plain Code" class="crayon-button crayon-plain-button">','' -replace '<DIV title="Toggle Line Wrap" class="crayon-button crayon-wrap-button">','' -replace '<DIV title="Expand Code" class="crayon-button crayon-expand-button">','' -replace '<DIV title=Copy class="crayon-button crayon-copy-button">','' -replace '<DIV title="Open Code In New Window" class="crayon-button crayon-popup-button">','' -replace '<DIV class=crayon-button-icon></DIV></DIV><SPAN class=crayon-language>PowerShell</SPAN></DIV></DIV>','' -replace '<DIV class=crayon-info style="MIN-HEIGHT: 16px !important; LINE-HEIGHT: 16px !important"></DIV>','' -replace '<DIV class=crayon-plain-wrap><TEXTAREA class="crayon-plain print-no" style="FONT-SIZE: 12px !important; LINE-HEIGHT: 15px !important; -moz-tab-size: 4; -o-tab-size: 4;' ,'' -replace '-webkit-tab-size: 4; tab-size: 4" readOnly data-settings="dblclick">','' -replace '</TEXTAREA></DIV>','' -replace '<TABLE class=crayon-table>','' -replace '<TR class=crayon-row>','' -replace '<TD class="crayon-nums " data-settings="show">','' -replace '<DIV class=crayon-nums-content style="FONT-SIZE: 12px !important; LINE-HEIGHT: 15px !important">','' -replace '<SPAN class=crayon-language>Transact-SQL</SPAN></DIV></DIV>','' -replace '<DIV class=crayon-main>','' -replace '<TBODY>','' -replace '<DIV title="Toggle Line Numbers" class="crayon-button crayon-nums-button">','' -replace 'https://i1.wp.com/sqldbawithabeard.com/wp-content', 'https://blog.robsewell.com/assets' -replace 'https://i0.wp.com/sqldbawithabeard.com/wp-content', 'https://blog.robsewell.com/assets' -replace 'https://i2.wp.com/sqldbawithabeard.com/wp-content', 'https://blog.robsewell.com/assets' -replace '<SPAN class=crayon-language>PowerShell</SPAN></DIV></DIV>',''
39+
40+
$regex = [regex]::Matches($oldpost.body.innerHTML,'category-(\w{0,50})\s')
41+
$categorynames = (($regex.groups|Select @{name='match';exp={$_.groups[1].value}})| Where match -ne $null).match
42+
43+
if($categorynames -eq $null){
44+
$categorynames = 'Blog'
45+
}elseif($categorynames -match 'uncategorised') {
46+
$categorynames = $categorynames -replace 'uncategorised','Blog'
47+
}
48+
else{
49+
$categorynames = @($categorynames) + 'Blog'
50+
}
51+
52+
$categories = $categorynames| ForEach-Object {" - " + $Psitem } | Out-String
53+
54+
$regex = [regex]::Matches($oldpost.body.innerHTML,'tag-(\w{0,50})\s')
55+
$tagnames = (($regex.groups|Select @{name='match';exp={$_.groups[1].value}})| Where match -ne $null).match
56+
57+
if($tagnames -eq $null){
58+
$tagnames = 'powershell'
59+
}else{
60+
$tagnames = $tagnames
61+
}
62+
63+
$tags = $tagnames| ForEach-Object {" - " + $Psitem } | Out-String
64+
$title = $oldpost.title -replace ' \| SQL DBA with A Beard' , ''
65+
66+
$Yamlfront = @"
67+
---
68+
title: "$title"
69+
categories:
70+
$categories
71+
tags:
72+
$tags
73+
---
74+
75+
"@
76+
77+
$filecontent = $Yamlfront + $notfluff
78+
79+
Set-Content -Value $filecontent -Path $fileName
80+
}

0 commit comments

Comments
 (0)