-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
177 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
siteid,sitename,longitudeeast,latitudenorth,longitudewest,latitudesouth,altitude,area,sitedescription,notes,recdatecreated,recdatemodified,geog,datasetid,collectionunitid,datasettypeid,datasetname,notes-2,recdatecreated-2,recdatemodified-2,embargoid,citation,doi | ||
10330,Lac du Sommet,-70.66468,47.71662,-70.66573,47.71382,830,2,"The small shallow Lac du Sommet (0.02 km2, 4 m maximum depth, elevation of 830 m a.s.l., 47°43′N, 70°40′W) is located in the boreal forest north of the St Lawrence Estuary (Figure 1). It is of glacial origin and situated on granitic-gneissic bedrock of the Canadian Precambrian Shield in the Laurentian Mountains.",,2015-04-16 19:02:09,2016-05-18 21:10:02,0103000020E61000000100000005000000D47D00529BAA51C0889D29745EDB47408A93FB1D8AAA51C0889D29745EDB47408A93FB1D8AAA51C06BD44334BADB4740D47D00529BAA51C06BD44334BADB4740D47D00529BAA51C0889D29745EDB4740,15691,11686,1,,,2015-04-16 19:02:13,2015-04-16 19:02:13,,"Hausmann, S., I. Larocque-Tobler, P.J.H. Richard, R. Pienitz, G. St-Onge, and F. Fye. 2011. Diatom-inferred wind activity at Lac du Sommet, southern Québec, Canada: A multiproxy paleoclimate reconstruction based on diatoms, chironomids, and pollen for the past 9500 years. The Holocene 21(6):925-938. http://hol.sagepub.com/content/21/6/925.full.pdf+html. [DOI: 10.1177/0959683611400199]",10.1177/0959683611400199 | ||
10330,Lac du Sommet,-70.66468,47.71662,-70.66573,47.71382,830,2,"The small shallow Lac du Sommet (0.02 km2, 4 m maximum depth, elevation of 830 m a.s.l., 47°43′N, 70°40′W) is located in the boreal forest north of the St Lawrence Estuary (Figure 1). It is of glacial origin and situated on granitic-gneissic bedrock of the Canadian Precambrian Shield in the Laurentian Mountains.",,2015-04-16 19:02:09,2016-05-18 21:10:02,0103000020E61000000100000005000000D47D00529BAA51C0889D29745EDB47408A93FB1D8AAA51C0889D29745EDB47408A93FB1D8AAA51C06BD44334BADB4740D47D00529BAA51C06BD44334BADB4740D47D00529BAA51C0889D29745EDB4740,15692,11686,11,Lac du Sommet Diatom Data,,2015-04-16 19:02:20,2015-04-16 19:02:20,,"Hausmann, S., I. Larocque-Tobler, P.J.H. Richard, R. Pienitz, G. St-Onge, and F. Fye. 2011. Diatom-inferred wind activity at Lac du Sommet, southern Québec, Canada: A multiproxy paleoclimate reconstruction based on diatoms, chironomids, and pollen for the past 9500 years. The Holocene 21(6):925-938. http://hol.sagepub.com/content/21/6/925.full.pdf+html. [DOI: 10.1177/0959683611400199]",10.1177/0959683611400199 | ||
1729,Myrtle Lake,-93.37853,47.98645,-93.39207,47.97876,393,50,"Lake surrounded by peatland. Physiography: Red Lake lowlands. Surrounding vegetation: Sphagnum, Piceto-Chamaedophnetum.",,2013-09-30 14:03:01,2016-05-18 21:10:02,0103000020E6100000010000000500000089EFC4AC175957C00551F70148FD474083A3E4D5395857C00551F70148FD474083A3E4D5395857C0D1915CFE43FE474089EFC4AC175957C0D1915CFE43FE474089EFC4AC175957C00551F70148FD4740,1786,1728,3,,,2013-09-30 14:02:42,2013-09-30 14:02:42,,"Janssen, C.R. 1968. Myrtle Lake: a late- and post-glacial pollen diagram from northern Minnesota. Canadian Journal of Botany 46(11):1397-1410. [DOI: 10.1139/b68-190]",10.1139/b68-190 | ||
1729,Myrtle Lake,-93.37853,47.98645,-93.39207,47.97876,393,50,"Lake surrounded by peatland. Physiography: Red Lake lowlands. Surrounding vegetation: Sphagnum, Piceto-Chamaedophnetum.",,2013-09-30 14:03:01,2016-05-18 21:10:02,0103000020E6100000010000000500000089EFC4AC175957C00551F70148FD474083A3E4D5395857C00551F70148FD474083A3E4D5395857C0D1915CFE43FE474089EFC4AC175957C0D1915CFE43FE474089EFC4AC175957C00551F70148FD4740,8423,1728,1,,,2013-09-30 14:02:42,2013-09-30 14:02:42,,"Janssen, C.R. 1968. Myrtle Lake: a late- and post-glacial pollen diagram from northern Minnesota. Canadian Journal of Botany 46(11):1397-1410. [DOI: 10.1139/b68-190]",10.1139/b68-190 | ||
269,Billy's Lake,-94.54948,46.27186,-94.55363,46.2693,383,2.5,Shallow depression on St. Croix moraine. Physiography: rugged relief. Surrounding vegetation: white pine/hardwoods.,,2013-09-30 14:03:01,2016-05-18 21:10:02,0103000020E61000000100000005000000680586AC6EA357C00A68226C78224740977329AE2AA357C00A68226C78224740977329AE2AA357C0978BF84ECC224740680586AC6EA357C0978BF84ECC224740680586AC6EA357C00A68226C78224740,275,269,3,,,2013-09-30 14:02:42,2013-09-30 14:02:42,,"Jacobson, G.L., Jr., and E.C. Grimm. 1986. A numerical analysis of Holocene forest and prairie vegetation in central Minnesota. Ecology 67(4):958-966. [DOI: 10.2307/1939818]",10.2307/1939818 | ||
269,Billy's Lake,-94.54948,46.27186,-94.55363,46.2693,383,2.5,Shallow depression on St. Croix moraine. Physiography: rugged relief. Surrounding vegetation: white pine/hardwoods.,,2013-09-30 14:03:01,2016-05-18 21:10:02,0103000020E61000000100000005000000680586AC6EA357C00A68226C78224740977329AE2AA357C00A68226C78224740977329AE2AA357C0978BF84ECC224740680586AC6EA357C0978BF84ECC224740680586AC6EA357C00A68226C78224740,7929,269,1,,,2013-09-30 14:02:42,2013-09-30 14:02:42,,"Jacobson, G.L., Jr., and E.C. Grimm. 1986. A numerical analysis of Holocene forest and prairie vegetation in central Minnesota. Ecology 67(4):958-966. [DOI: 10.2307/1939818]",10.2307/1939818 | ||
1598,Lake of the Clouds,-91.10962,48.14638,-91.11546,48.13873,462,11.6,"Lake with two basins. Physiography: rugged, steep slopes and cliffs. Surrounding vegetation: Pinus banksiana, Fraxinus, Acer, Alnus, Cornus.",Minnesota DNR Lake ID: 38016900.,2013-09-30 14:03:01,2017-12-18 19:05:37,0103000020E61000000100000005000000C1FF56B263C756C0AF7C96E7C111484034BF9A0304C756C0AF7C96E7C111484034BF9A0304C756C0ED647094BC124840C1FF56B263C756C0ED647094BC124840C1FF56B263C756C0AF7C96E7C1114840,1649,1597,3,,Sample depths from core descriptions in LRC files. A reliable radiocarbon chronology does not seem possible (ECG).,2013-09-30 14:02:42,2013-09-30 14:02:42,,"Craig, A.J. 1972. Pollen influx to laminated sediments: a pollen diagram from northeastern Minnesota. Ecology 53(1):46-57. [DOI: 10.2307/1935709]",10.2307/1935709 | ||
1598,Lake of the Clouds,-91.10962,48.14638,-91.11546,48.13873,462,11.6,"Lake with two basins. Physiography: rugged, steep slopes and cliffs. Surrounding vegetation: Pinus banksiana, Fraxinus, Acer, Alnus, Cornus.",Minnesota DNR Lake ID: 38016900.,2013-09-30 14:03:01,2017-12-18 19:05:37,0103000020E61000000100000005000000C1FF56B263C756C0AF7C96E7C111484034BF9A0304C756C0AF7C96E7C111484034BF9A0304C756C0ED647094BC124840C1FF56B263C756C0ED647094BC124840C1FF56B263C756C0AF7C96E7C1114840,3482,3379,3,,Sample depths corrected in January 2018 by E.C. Grimm based on Appendix 1 in Craig's 1969 thesis.,2013-09-30 14:02:42,2018-01-25 23:58:54,,"Craig, A.J. 1972. Pollen influx to laminated sediments: a pollen diagram from northeastern Minnesota. Ecology 53(1):46-57. [DOI: 10.2307/1935709]",10.2307/1935709 | ||
1598,Lake of the Clouds,-91.10962,48.14638,-91.11546,48.13873,462,11.6,"Lake with two basins. Physiography: rugged, steep slopes and cliffs. Surrounding vegetation: Pinus banksiana, Fraxinus, Acer, Alnus, Cornus.",Minnesota DNR Lake ID: 38016900.,2013-09-30 14:03:01,2017-12-18 19:05:37,0103000020E61000000100000005000000C1FF56B263C756C0AF7C96E7C111484034BF9A0304C756C0AF7C96E7C111484034BF9A0304C756C0ED647094BC124840C1FF56B263C756C0ED647094BC124840C1FF56B263C756C0AF7C96E7C1114840,3483,3380,3,,The pollen counts are from the same core as LKCLDSH. The actual depths are not known; samples were recorded as varve counts.,2013-09-30 14:02:42,2013-09-30 14:02:42,,"Craig, A.J. 1972. Pollen influx to laminated sediments: a pollen diagram from northeastern Minnesota. Ecology 53(1):46-57. [DOI: 10.2307/1935709]",10.2307/1935709 | ||
203,Anderson Pond,-85.49868,36.03275,-85.50396,36.02755,303,34.8,,,2013-09-30 14:03:01,2016-05-18 21:10:02,0103000020E61000000100000005000000809F71E1406055C09D8026C286034240D5CA845FEA5F55C09D8026C286034240D5CA845FEA5F55C0D578E92631044240809F71E1406055C0D578E92631044240809F71E1406055C09D8026C286034240,203,203,3,,,2013-09-30 14:02:42,2013-09-30 14:02:42,,"Delcourt, H.R. 1979. Late Quaternary vegetation history of the eastern Highland Rim and adjacent Cumberland Plateau of Tennessee. Ecological Monographs 49(3):255-280. [DOI: 10.2307/1942485]",10.2307/1942485 | ||
195,Hungry Jack Lake,-91.12,48.15,-91.12,48.15,453,,,,2013-09-30 14:03:01,2016-05-18 21:10:02,0101000020E610000048E17A14AEC756C03333333333134840,195,195,7,,,2013-09-30 14:02:42,2013-09-30 14:02:42,,"Swain, A.M. 1973. A history of fire and vegetation in northeastern Minnesota as recorded in lake sediments. Quaternary Research 3(3):383-396. [DOI: 10.1016/0033-5894(73)900]",10.1016/0033-5894(73)900 | ||
1598,Lake of the Clouds,-91.10962,48.14638,-91.11546,48.13873,462,11.6,"Lake with two basins. Physiography: rugged, steep slopes and cliffs. Surrounding vegetation: Pinus banksiana, Fraxinus, Acer, Alnus, Cornus.",Minnesota DNR Lake ID: 38016900.,2013-09-30 14:03:01,2017-12-18 19:05:37,0103000020E61000000100000005000000C1FF56B263C756C0AF7C96E7C111484034BF9A0304C756C0AF7C96E7C111484034BF9A0304C756C0ED647094BC124840C1FF56B263C756C0ED647094BC124840C1FF56B263C756C0AF7C96E7C1114840,1649,1597,3,,Sample depths from core descriptions in LRC files. A reliable radiocarbon chronology does not seem possible (ECG).,2013-09-30 14:02:42,2013-09-30 14:02:42,,"Swain, A.M. 1973. A history of fire and vegetation in northeastern Minnesota as recorded in lake sediments. Quaternary Research 3(3):383-396. [DOI: 10.1016/0033-5894(73)900]",10.1016/0033-5894(73)900 | ||
1598,Lake of the Clouds,-91.10962,48.14638,-91.11546,48.13873,462,11.6,"Lake with two basins. Physiography: rugged, steep slopes and cliffs. Surrounding vegetation: Pinus banksiana, Fraxinus, Acer, Alnus, Cornus.",Minnesota DNR Lake ID: 38016900.,2013-09-30 14:03:01,2017-12-18 19:05:37,0103000020E61000000100000005000000C1FF56B263C756C0AF7C96E7C111484034BF9A0304C756C0AF7C96E7C111484034BF9A0304C756C0ED647094BC124840C1FF56B263C756C0ED647094BC124840C1FF56B263C756C0AF7C96E7C1114840,3482,3379,3,,Sample depths corrected in January 2018 by E.C. Grimm based on Appendix 1 in Craig's 1969 thesis.,2013-09-30 14:02:42,2018-01-25 23:58:54,,"Swain, A.M. 1973. A history of fire and vegetation in northeastern Minnesota as recorded in lake sediments. Quaternary Research 3(3):383-396. [DOI: 10.1016/0033-5894(73)900]",10.1016/0033-5894(73)900 | ||
1598,Lake of the Clouds,-91.10962,48.14638,-91.11546,48.13873,462,11.6,"Lake with two basins. Physiography: rugged, steep slopes and cliffs. Surrounding vegetation: Pinus banksiana, Fraxinus, Acer, Alnus, Cornus.",Minnesota DNR Lake ID: 38016900.,2013-09-30 14:03:01,2017-12-18 19:05:37,0103000020E61000000100000005000000C1FF56B263C756C0AF7C96E7C111484034BF9A0304C756C0AF7C96E7C111484034BF9A0304C756C0ED647094BC124840C1FF56B263C756C0ED647094BC124840C1FF56B263C756C0AF7C96E7C1114840,3483,3380,3,,The pollen counts are from the same core as LKCLDSH. The actual depths are not known; samples were recorded as varve counts.,2013-09-30 14:02:42,2013-09-30 14:02:42,,"Swain, A.M. 1973. A history of fire and vegetation in northeastern Minnesota as recorded in lake sediments. Quaternary Research 3(3):383-396. [DOI: 10.1016/0033-5894(73)900]",10.1016/0033-5894(73)900 | ||
10461,Pigeon Marsh,-85.4009,34.66407,-85.40174,34.66311,660,0.5,"Small marsh located on Pigeon Mountain, the easern ridge of Lookout Mountain, a forested linear ridge. Marsh plants include Cephalanthus occidentalis, Salix, Scirpus cf. S. cyperinus, Carex tussocks, Persicaria hydropiperoides, Proserpinaca, Eleocharis, Bidens, Rhexia, Sagittaria, Sphagnum, Leucobryum, and Osmunda cinnamomea. Dominant tree around the edge is Acer rubrum and Liquidambar styraciflua. Upland vegetation dominated by Quercus and Carya with some Pinus virginiana. ",,2015-06-03 21:40:58,2016-05-18 21:10:02,0103000020E61000000100000005000000AF5FB01BB65955C046D3D9C9E0544140D93D7958A85955C046D3D9C9E0544140D93D7958A85955C09B20EA3E00554140AF5FB01BB65955C09B20EA3E00554140AF5FB01BB65955C046D3D9C9E0544140,16008,11849,1,,,2015-06-03 21:41:00,2015-06-03 21:41:00,,"Watts, W.A. 1975. Vegetation record for the last 20,000 years from a small marsh on Lookout Mountain, northwestern Georgia. Geological Society of America Bulletin 86(3):287-291. [DOI: 10.1130/0016-7606(1975)86<287:VRFTLY>2.0.CO;2]",10.1130/0016-7606(1975)86<287:VRFTLY>2.0.CO;2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,159 @@ | ||
import utils as ard | ||
import os | ||
|
||
def not_in_neotoma(df, df2, path = r'/Users/seiryu8808/Desktop/UWinsc/Github/UnacquiredSites/src/output/eda'): | ||
"""Obtain all the article DOI's that are not in the Neotoma Database | ||
Parameters | ||
---------- | ||
df : pd.DataFrame | ||
Input data frame | ||
df2: pd.DataFrame | ||
Input data frame | ||
path: location where to write csv file | ||
Returns | ||
------- | ||
pd.DataFrame with values that are not contained in the Neotoma Database | ||
csv file in the output path with the mentioned DataFrame | ||
""" | ||
arts_not_in_neotoma = df[df['longeast'].isnull()] | ||
arts_not_in_neotoma = arts_not_in_neotoma.groupby('_gddid')\ | ||
.agg({'longeast':'sum'}) | ||
|
||
arts_not_in_neotoma = arts_not_in_neotoma.merge(df2, on ='_gddid') | ||
arts_not_in_neotoma = arts_not_in_neotoma[['_gddid', 'title', 'year', 'doi', 'link_url']] | ||
|
||
output_file = os.path.join(path,'articles_wo_neotoma_coordinates.tsv') | ||
arts_not_in_neotoma.to_csv(output_file, sep='\t', index = False) | ||
print("A TSV file with articles not found in Neotoma was created on your EDA output folder.") | ||
return arts_not_in_neotoma | ||
|
||
def sentences_w_coords_int(df_with_int, path = r'/Users/seiryu8808/Desktop/UWinsc/Github/UnacquiredSites/src/output/eda'): | ||
"""Obtain all the intersections between sentences in NLP df and Neotoma DB | ||
Parameters | ||
---------- | ||
df_with_int : pd.DataFrame | ||
Input data frame where we want to look for intersections | ||
path: location where to write csv file | ||
Returns | ||
------- | ||
pd.DataFrame with coordinate intersections between sentences and neotoma database | ||
csv file in the output path with the mentioned DataFrame | ||
""" | ||
# Output of sentences with lat and long intersections | ||
sent_with_int_df = df_with_int[['_gddid','words', 'year', 'latnorth', 'found_lat', 'longeast', 'found_long', 'dms_regex', 'dd_regex']] | ||
sent_with_int_df = sent_with_int_df.rename(columns={"latnorth":"expected_lat", 'longeast':'expected_long'}) | ||
output_file = os.path.join(path,'sentences_with_latlong_intersections.tsv') | ||
sent_with_int_df.to_csv(output_file, sep='\t', index = False) | ||
print("A TSV file with sentences that have coordinates was created in your EDA output folder.") | ||
return sent_with_int_df | ||
|
||
def articles_wo_coords(nlp_bib_neotoma, bibliography, neotoma_joined_df, path = r'/Users/seiryu8808/Desktop/UWinsc/Github/UnacquiredSites/src/output/eda'): | ||
"""Obtain all article that have no coordinate intersections | ||
Parameters | ||
---------- | ||
df = nlp_bib_neotoma : pd.DataFrame | ||
Input data frame | ||
df2 = bibliography : pd.DataFrame | ||
Input data frame | ||
df3 = neotoma_joined_df :pd.DataFrame | ||
Input data frame | ||
path: location where to write csv file | ||
Returns | ||
------- | ||
pd.DataFrame with articles that have no coordinates in the Neotoma Database | ||
csv file in the output path with the mentioned DataFrame | ||
""" | ||
no_inter_df = nlp_bib_neotoma.groupby('_gddid')\ | ||
.agg({'found_lat':'sum', 'found_long':'sum'})\ | ||
.reset_index() | ||
|
||
no_inter_df = no_inter_df[(no_inter_df['found_lat'].apply(len) == 0) & (no_inter_df['found_long'].apply(len) == 0 )] | ||
|
||
|
||
no_inter_df = no_inter_df.merge(bibliography) | ||
no_inter_df = no_inter_df.merge(neotoma_joined_df, how = 'left', left_on = 'doi', right_on = 'doi')\ | ||
.rename(columns={"latnorth": "expected_lat", "longeast": "expected_long"}) | ||
no_inter_df = no_inter_df[['_gddid', 'title', 'year','found_lat', 'expected_lat', 'found_long', 'expected_long', 'doi', 'link_url',]] | ||
output_file = os.path.join(path,'articles_wo_latlong_intersections.tsv') | ||
no_inter_df.to_csv(output_file, sep='\t', index = False) | ||
print("A TSV file of articles that have no coordinates was created in your EDA output folder.") | ||
return no_inter_df | ||
|
||
|
||
|
||
def sentences_w_site_int(nlp_bib_neotoma, path = r'/Users/seiryu8808/Desktop/UWinsc/Github/UnacquiredSites/src/output/eda'): | ||
"""Obtain all article that have no coordinate intersections | ||
Parameters | ||
---------- | ||
df = nlp_bib_neotoma : pd.DataFrame | ||
Input data frame | ||
path: location where to write csv file | ||
Returns | ||
------- | ||
pd.DataFrame with intersections of sitenames and sentences df | ||
csv file in the output path with the mentioned DataFrame | ||
""" | ||
sn_inter = ard.find_intersections(nlp_bib_neotoma, cols_to_intersect = ['words_l','sitenames_l'], new_col_name = 'found_sitenames') | ||
|
||
sn_inter = sn_inter[sn_inter['found_sitenames'].str.len() != 0] | ||
|
||
sn_inter = sn_inter[['_gddid', 'sentid', 'words_l', 'sitenames_l', 'found_sitenames', 'year']] | ||
sn_inter = sn_inter.rename(columns={'sitenames_l':'expected_sitename','found_sitenames':'intersected_sitename'}) | ||
output_file = os.path.join(path,'sentences_with_sitenames_intersections.tsv') | ||
sn_inter.to_csv(output_file, sep='\t', index = False) | ||
print("A TSV file of sentences with Site intersections was created in your EDA output folder.") | ||
return sn_inter | ||
|
||
def articles_wo_sites(nlp_bib_neotoma, bibliography, neotoma_joined_df, path = r'/Users/seiryu8808/Desktop/UWinsc/Github/UnacquiredSites/src/output/eda'): | ||
"""Obtain all article that have no sitenames intersections | ||
Parameters | ||
---------- | ||
df = nlp_bib_neotoma : pd.DataFrame | ||
Input data frame | ||
df2 = bibliography : pd.DataFrame | ||
Input data frame | ||
df3 = neotoma_joined_df :pd.DataFrame | ||
Input data frame | ||
path: location where to write csv file | ||
Returns | ||
------- | ||
pd.DataFrame with articles that have no sitenames in the Neotoma Database | ||
csv file in the output path with the mentioned DataFrame | ||
""" | ||
arts_wo_sites = nlp_bib_neotoma.groupby('_gddid')\ | ||
.agg({'found_sitenames':'sum'})\ | ||
.reset_index() | ||
|
||
arts_wo_sites['found_sitenames'] = arts_wo_sites['found_sitenames'].apply(lambda x: list(set(x))) | ||
arts_wo_sites = arts_wo_sites[arts_wo_sites['found_sitenames'].str.len() == 0] | ||
arts_wo_sites = arts_wo_sites.merge(bibliography, how = 'inner')\ | ||
.merge(neotoma_joined_df, left_on = 'doi', right_on = 'doi') | ||
|
||
arts_wo_sites = arts_wo_sites[['_gddid', 'title', 'year','found_sitenames', 'sitenames', 'doi', 'link_url']] | ||
arts_wo_sites = arts_wo_sites.rename(columns = {'sitenames': 'exptected_sitename'}) | ||
|
||
# Output file | ||
output_file = os.path.join(path,'articles_wo_sitename_intersections.tsv') | ||
arts_wo_sites.to_csv(output_file, sep='\t', index = False) | ||
print("A TSV file of Articles without Sites was created in your EDA output folder.") | ||
|
||
return arts_wo_sites |