-
-
Notifications
You must be signed in to change notification settings - Fork 381
Added Custom-search CLI script #485
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1 @@ | ||
| #,Title,Link |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,41 @@ | ||||||
| # Custom-search CLI | ||||||
| A simple Python script that uses the **Google Custom Search API** to fetch search results and export them into a CSV file. | ||||||
|
|
||||||
|
|
||||||
| ## Requirements | ||||||
| - Python 3.8+ | ||||||
| - A Google API key | ||||||
| - A Google Custom Search Engine (CX) ID | ||||||
| - Install dependencies: | ||||||
| ```bash | ||||||
| pip install requests | ||||||
| pip install beautifulsoup4 | ||||||
| pip install python-csv | ||||||
| pip install argparse | ||||||
| ``` | ||||||
|
|
||||||
| ## Setup | ||||||
| 1. Get a Google API key from [Google Cloud Console](https://console.cloud.google.com/) | ||||||
| 2. Create a Custom Search Engine (CX) at [Google CSE](https://cse.google.com/cse/all) | ||||||
| 3. Run the script with your API key to create setting.json: | ||||||
|
|
||||||
| python main.py -sq [SEARCH_QUERY] --add_api_key [YOUR_API_KEY] | ||||||
|
||||||
| python main.py -sq [SEARCH_QUERY] --add_api_key [YOUR_API_KEY] | |
| python scraper.py -sq [SEARCH_QUERY] --add_api_key [YOUR_API_KEY] |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,101 @@ | ||||||
| import requests | ||||||
| import json | ||||||
| import os | ||||||
| import csv | ||||||
| import argparse | ||||||
| from typing import List, Dict, Tuple, Any | ||||||
|
|
||||||
| SETTING_ROUTE = 'setting.json' | ||||||
| DEFAULT_CX = 'b0264518c3d104eda' | ||||||
|
|
||||||
|
|
||||||
| def load_settings(api_key: str | None = None) -> Dict[str, str]: | ||||||
| """ | ||||||
| Load API settings from setting.json, or create it if missing. | ||||||
| """ | ||||||
| if os.path.exists(SETTING_ROUTE): | ||||||
| with open(SETTING_ROUTE, 'r', encoding="utf-8") as f: | ||||||
| settings = json.load(f) | ||||||
|
|
||||||
| if not settings.get("API_KEY"): | ||||||
| if api_key: | ||||||
| settings["API_KEY"] = api_key | ||||||
| with open(SETTING_ROUTE, 'w', encoding="utf-8") as f: | ||||||
| json.dump(settings, f, indent=4) | ||||||
| else: | ||||||
| raise ValueError("API_KEY is missing in setting.json. Use --add_api_key to add one.") | ||||||
| else: | ||||||
| if not api_key: | ||||||
| raise FileNotFoundError("No setting.json found. Please run with --add_api_key to create one.") | ||||||
| settings = {"API_KEY": api_key, "CX": DEFAULT_CX} | ||||||
| with open(SETTING_ROUTE, 'w', encoding="utf-8") as f: | ||||||
| json.dump(settings, f, indent=4) | ||||||
|
|
||||||
| return settings | ||||||
|
|
||||||
|
|
||||||
| def scrape(search_query: str, api_key: str, cx: str, pages: int = 1) -> Tuple[List[Dict[str, Any]], float]: | ||||||
| """ | ||||||
| Perform a Google Custom Search and return results. | ||||||
| """ | ||||||
| results = [] | ||||||
| search_time = 0.0 | ||||||
|
|
||||||
| for page in range(pages): | ||||||
| start = page * 10 + 1 | ||||||
| url = ( | ||||||
| f"https://www.googleapis.com/customsearch/v1" | ||||||
| f"?key={api_key}&q={search_query}&cx={cx}&start={start}" | ||||||
| ) | ||||||
|
|
||||||
| response = requests.get(url) | ||||||
| if response.status_code != 200: | ||||||
| raise RuntimeError(f"API request failed: {response.status_code} {response.text}") | ||||||
|
|
||||||
| data = response.json() | ||||||
|
|
||||||
| if "items" not in data: | ||||||
| print("No results found or error:", data) | ||||||
| break | ||||||
|
|
||||||
| results.extend(data["items"]) | ||||||
| search_time += float(data['searchInformation']['searchTime']) | ||||||
|
|
||||||
| return results, search_time | ||||||
|
|
||||||
|
|
||||||
| def export_to_csv(results: List[Dict[str, Any]], filename: str = "output.csv") -> None: | ||||||
| """ | ||||||
| Export search results to a CSV file. | ||||||
| """ | ||||||
| rows = [[i + 1, item.get("title", ""), item.get("link", "")] for i, item in enumerate(results)] | ||||||
|
|
||||||
| with open(filename, "w", encoding="utf-8", newline="") as f: | ||||||
| writer = csv.writer(f) | ||||||
| writer.writerow(["#", "Title", "Link"]) | ||||||
| writer.writerows(rows) | ||||||
|
|
||||||
| print(f"Exported {len(results)} results to {filename}") | ||||||
|
|
||||||
|
|
||||||
| def main(): | ||||||
| parser = argparse.ArgumentParser(description="Google Custom Search scraper") | ||||||
| parser.add_argument("-sq", "--search_query", required=True, help="Search query to search for") | ||||||
| parser.add_argument("--add_api_key", type=str, help="Your Google API key") | ||||||
| parser.add_argument("--pages", type=int, default=1, help="Number of pages of results to fetch") | ||||||
| args = parser.parse_args() | ||||||
|
|
||||||
| settings = load_settings(args.add_api_key) | ||||||
| api_key = settings["API_KEY"] | ||||||
| cx = settings["CX"] | ||||||
|
|
||||||
| print(f"Using API key: {api_key}") | ||||||
|
||||||
| print(f"Using API key: {api_key}") | |
| print(f"Using API key: {api_key[:4]}...{api_key[-4:]} (masked)") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| { | ||
| "API_KEY": "", | ||
| "CX": "" | ||
| } |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -55,6 +55,7 @@ More information on contributing and the general code of conduct for discussion | |||||
| | CSV to Excel | [CSV to Excel](https://github.com/DhanushNehru/Python-Scripts/tree/main/CSV%20to%20Excel) | A Python script to convert a CSV to an Excel file. | | ||||||
| | CSV_TO_NDJSON | [CSV to Excel](https://github.com/DhanushNehru/Python-Scripts/tree/main/CSV_TO_NDJSON) | A Python script to convert a CSV to an NDJSON files file. | | ||||||
| | Currency Script | [Currency Script](https://github.com/DhanushNehru/Python-Scripts/tree/main/Currency%20Script) | A Python script to convert the currency of one country to that of another. | | ||||||
| | Custom-search CLI | [Custom-search CLI](https://github.com/DhanushNehru/Python-Scripts/tree/main/Custom-search%20CLI) | Python script to search a query through internet and save the results in a .csv file. | | ||||||
|
||||||
| | Custom-search CLI | [Custom-search CLI](https://github.com/DhanushNehru/Python-Scripts/tree/main/Custom-search%20CLI) | Python script to search a query through internet and save the results in a .csv file. | | |
| | Custom-search CLI | [Custom-search CLI](https://github.com/DhanushNehru/Python-Scripts/tree/main/Custom-search%20CLI) | Python script to search a query through internet and save the results in a .csv file. | |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The dependencies listed are incorrect. The script doesn't use beautifulsoup4 or python-csv (csv is built-in), and argparse is part of the standard library. Only 'requests' is needed as an external dependency.