import json
import re
import os
import textwrap
from datetime import datetime
from pathlib import Path
from collections import defaultdict
try:
import requests
REQUESTS_OK = True
except ImportError:
REQUESTS_OK = False
# ============================================================
# CONFIGURATION
# ============================================================
HISTORY_FILE = "wiki_history.json"
SAVED_FILE = "wiki_saved.json"
WIKI_API = "https://en.wikipedia.org/api/rest_v1/page/summary/{}"
WIKI_SEARCH = "https://en.wikipedia.org/w/api.php"
WIKI_SECTIONS = "https://en.wikipedia.org/api/rest_v1/page/mobile-sections/{}"
WRAP_WIDTH = 72
# Language support
LANG_APIS = {
"en": "https://en.wikipedia.org/api/rest_v1/page/summary/{}",
"hi": "https://hi.wikipedia.org/api/rest_v1/page/summary/{}",
"ta": "https://ta.wikipedia.org/api/rest_v1/page/summary/{}",
"fr": "https://fr.wikipedia.org/api/rest_v1/page/summary/{}",
"de": "https://de.wikipedia.org/api/rest_v1/page/summary/{}",
"es": "https://es.wikipedia.org/api/rest_v1/page/summary/{}",
"ja": "https://ja.wikipedia.org/api/rest_v1/page/summary/{}",
"zh": "https://zh.wikipedia.org/api/rest_v1/page/summary/{}",
}
current_lang = "en"
# ============================================================
# HELPERS
# ============================================================
def clean_text(text):
"""Remove wiki markup artifacts."""
if not text:
return ""
text = re.sub(r'\s+', ' ', text)
text = text.strip()
return text
def wrap_text(text, width=WRAP_WIDTH, indent=" "):
"""Word-wrap text with indent."""
paragraphs = text.split("\n")
result = []
for para in paragraphs:
if para.strip():
wrapped = textwrap.fill(para.strip(), width=width,
initial_indent=indent,
subsequent_indent=indent)
result.append(wrapped)
else:
result.append("")
return "\n".join(result)
# ============================================================
# LOAD & SAVE HISTORY / SAVED ARTICLES
# ============================================================
def load_history():
if Path(HISTORY_FILE).exists():
try:
with open(HISTORY_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except:
pass
return []
def save_to_history(title, url, lang="en"):
history = load_history()
# Avoid duplicate consecutive entries
if history and history[-1].get("title") == title:
return
history.append({
"title": title,
"url": url,
"lang": lang,
"viewed_at": datetime.now().strftime("%d-%m-%Y %H:%M:%S"),
})
history = history[-50:]
with open(HISTORY_FILE, "w", encoding="utf-8") as f:
json.dump(history, f, indent=2, ensure_ascii=False)
def load_saved():
if Path(SAVED_FILE).exists():
try:
with open(SAVED_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except:
pass
return []
def save_article(article):
saved = load_saved()
if any(s["title"] == article["title"] for s in saved):
print(" Already saved.")
return
article["saved_at"] = datetime.now().strftime("%d-%m-%Y %H:%M:%S")
saved.append(article)
with open(SAVED_FILE, "w", encoding="utf-8") as f:
json.dump(saved, f, indent=2, ensure_ascii=False)
print(f" Saved: {article['title']}")
# ============================================================
# SEARCH WIKIPEDIA
# ============================================================
def search_wikipedia(query, limit=8):
"""
Search Wikipedia and return list of matching titles.
Uses Wikipedia's opensearch API.
"""
params = {
"action": "opensearch",
"search": query,
"limit": limit,
"namespace": 0,
"format": "json",
}
try:
resp = requests.get(WIKI_SEARCH, params=params, timeout=10)
resp.raise_for_status()
data = resp.json()
titles = data[1] if len(data) > 1 else []
urls = data[3] if len(data) > 3 else []
return list(zip(titles, urls))
except Exception as e:
print(f" Search error: {e}")
return []
# ============================================================
# FETCH SUMMARY
# ============================================================
def fetch_summary(title, lang="en"):
"""
Fetch Wikipedia summary for a given title.
Returns article dict or None.
"""
api_base = LANG_APIS.get(lang, LANG_APIS["en"])
url = api_base.format(title.replace(" ", "_"))
try:
resp = requests.get(url, timeout=10,
headers={"User-Agent": "WikiFetcher/1.0"})
if resp.status_code == 404:
return None
resp.raise_for_status()
data = resp.json()
article = {
"title": data.get("title", title),
"description": data.get("description", ""),
"summary": clean_text(data.get("extract", "")),
"url": data.get("content_urls", {})
.get("desktop", {})
.get("page", ""),
"thumbnail": data.get("originalimage", {}).get("source", ""),
"lang": lang,
"type": data.get("type", ""),
"fetched_at": datetime.now().strftime("%d-%m-%Y %H:%M:%S"),
}
return article
except requests.exceptions.ConnectionError:
print(" No internet connection.")
return None
except Exception as e:
print(f" Fetch error: {e}")
return None
# ============================================================
# DISPLAY ARTICLE
# ============================================================
def display_article(article, full=False):
w = 70
print("\n" + "="*w)
print(f" {article['title'].upper()}")
if article.get("description"):
print(f" {article['description']}")
print("="*w)
summary = article.get("summary", "")
if not summary:
print(" No summary available.")
return
if full:
print(wrap_text(summary))
else:
# Show first 3 sentences
sentences = re.split(r'(?<=[.!?])\s+', summary)
preview = " ".join(sentences[:3])
print(wrap_text(preview))
if len(sentences) > 3:
remaining = len(sentences) - 3
print(f"\n ... ({remaining} more sentence(s))")
expand = input("\n Read full summary? (y/n): ").strip().lower()
if expand == "y":
print("\n" + "-"*w)
print(wrap_text(summary))
print("\n" + "-"*w)
if article.get("url"):
print(f" Read more: {article['url']}")
if article.get("thumbnail"):
print(f" Image : {article['thumbnail']}")
print("="*w)
# ============================================================
# FETCH RELATED TOPICS
# ============================================================
def fetch_related(title):
"""Fetch related/linked articles using search."""
results = search_wikipedia(title, limit=6)
# Filter out exact match
related = [(t, u) for t, u in results if t.lower() != title.lower()]
return related[:5]
# ============================================================
# WORD COUNT & STATS
# ============================================================
def article_stats(article):
summary = article.get("summary", "")
words = len(summary.split())
sentences = len(re.split(r'(?<=[.!?])\s+', summary))
chars = len(summary)
read_time = max(1, round(words / 200)) # avg 200 wpm
print("\n" + "="*45)
print(f" ARTICLE STATS: {article['title']}")
print("="*45)
print(f" Words : {words:,}")
print(f" Sentences : {sentences:,}")
print(f" Characters : {chars:,}")
print(f" Read time : ~{read_time} min")
print(f" Language : {article.get('lang','en')}")
print(f" Type : {article.get('type','')}")
print("="*45)
# ============================================================
# COMPARE TWO ARTICLES
# ============================================================
def compare_articles(article1, article2):
print("\n" + "="*70)
print(" COMPARISON")
print("="*70)
w = 32
print(f" {'TOPIC':<14} {article1['title'][:w]:<{w}} "
f"{article2['title'][:w]}")
print(" " + "-"*65)
print(f" {'Description':<14} "
f"{article1.get('description','')[:w]:<{w}} "
f"{article2.get('description','')[:w]}")
words1 = len(article1.get("summary","").split())
words2 = len(article2.get("summary","").split())
print(f" {'Word count':<14} {words1:<{w}} {words2}")
print(f" {'URL':<14} {article1.get('url','')[:w]:<{w}} "
f"{article2.get('url','')[:w]}")
print("="*70)
# Show both summaries
for art in [article1, article2]:
print(f"\n [{art['title']}]")
sentences = re.split(r'(?<=[.!?])\s+', art.get("summary",""))
preview = " ".join(sentences[:2])
print(wrap_text(preview))
# ============================================================
# VIEW HISTORY
# ============================================================
def view_history():
history = load_history()
if not history:
print("\n No search history yet.")
return
print("\n" + "="*60)
print(f" SEARCH HISTORY ({len(history)} articles)")
print("="*60)
print(f" {'#':<4} {'TITLE':<30} {'LANG':<6} VIEWED AT")
print(" " + "-"*55)
for i, h in enumerate(reversed(history[-20:]), 1):
print(f" {i:<4} {h['title']:<30} {h.get('lang','en'):<6} "
f"{h['viewed_at']}")
print("="*60)
# ============================================================
# VIEW SAVED ARTICLES
# ============================================================
def view_saved():
saved = load_saved()
if not saved:
print("\n No saved articles yet.")
return
print("\n" + "="*60)
print(f" SAVED ARTICLES ({len(saved)})")
print("="*60)
for i, a in enumerate(saved, 1):
print(f"\n [{i}] {a['title']}")
if a.get("description"):
print(f" {a['description']}")
print(f" Saved: {a.get('saved_at','')} "
f"URL: {a.get('url','')[:40]}")
print("="*60)
choice = input("\n View an article? (number or Enter to back): ").strip()
if choice.isdigit():
idx = int(choice) - 1
if 0 <= idx < len(saved):
display_article(saved[idx], full=True)
def remove_saved():
saved = load_saved()
if not saved:
print("\n No saved articles.")
return
view_saved()
try:
idx = int(input("\n Enter number to remove: ").strip()) - 1
if 0 <= idx < len(saved):
removed = saved.pop(idx)
with open(SAVED_FILE, "w", encoding="utf-8") as f:
json.dump(saved, f, indent=2, ensure_ascii=False)
print(f" Removed: {removed['title']}")
else:
print(" Invalid number.")
except ValueError:
print(" Invalid input.")
# ============================================================
# MAIN SEARCH FLOW
# ============================================================
def do_search():
global current_lang
query = input("\n Search Wikipedia: ").strip()
if not query:
return None
print(f" Searching...")
results = search_wikipedia(query)
if not results:
print(" No results found.")
return None
# Show results
print(f"\n Results for '{query}':")
print(" " + "-"*50)
for i, (title, url) in enumerate(results, 1):
print(f" [{i}] {title}")
print(f" {url}")
print("\n [0] Back to menu")
choice = input("\n Select article number: ").strip()
if not choice.isdigit() or int(choice) == 0:
return None
idx = int(choice) - 1
if idx < 0 or idx >= len(results):
print(" Invalid selection.")
return None
title = results[idx][0]
print(f"\n Fetching: {title}...")
article = fetch_summary(title, current_lang)
if not article:
print(f" Could not fetch article for: {title}")
return None
display_article(article)
save_to_history(article["title"], article.get("url",""), current_lang)
# Post-article options
print("\n Options:")
print(" 1. Read full summary")
print(" 2. Article statistics")
print(" 3. Save this article")
print(" 4. Show related topics")
print(" 5. Back")
sub = input(" > ").strip()
if sub == "1":
display_article(article, full=True)
elif sub == "2":
article_stats(article)
elif sub == "3":
save_article(article)
elif sub == "4":
related = fetch_related(title)
if related:
print(f"\n Related to '{title}':")
for i, (t, u) in enumerate(related, 1):
print(f" [{i}] {t}")
print(f" {u}")
rel_choice = input("\n Open related article? (number/Enter): ").strip()
if rel_choice.isdigit():
ri = int(rel_choice) - 1
if 0 <= ri < len(related):
rel_article = fetch_summary(related[ri][0], current_lang)
if rel_article:
display_article(rel_article)
save_to_history(rel_article["title"],
rel_article.get("url",""),
current_lang)
else:
print(" No related articles found.")
return article
# ============================================================
# LANGUAGE SELECTOR
# ============================================================
def select_language():
global current_lang
print("\n Available languages:")
langs = list(LANG_APIS.keys())
lang_names = {
"en": "English", "hi": "Hindi", "ta": "Tamil",
"fr": "French", "de": "German", "es": "Spanish",
"ja": "Japanese","zh": "Chinese"
}
for i, lang in enumerate(langs, 1):
marker = " ← current" if lang == current_lang else ""
print(f" {i}. {lang_names.get(lang, lang)} ({lang}){marker}")
choice = input("\n Select language (number): ").strip()
if choice.isdigit():
idx = int(choice) - 1
if 0 <= idx < len(langs):
current_lang = langs[idx]
print(f" Language set to: {lang_names.get(current_lang, current_lang)}")
# ============================================================
# MAIN MENU
# ============================================================
def print_menu():
lang_names = {
"en": "English", "hi": "Hindi", "ta": "Tamil",
"fr": "French", "de": "German", "es": "Spanish",
"ja": "Japanese","zh": "Chinese"
}
saved_count = len(load_saved())
history_count = len(load_history())
print("\n" + "-"*50)
print(f" WIKIPEDIA SUMMARY FETCHER "
f"[{lang_names.get(current_lang, current_lang)}]")
print("-"*50)
print(" 1. Search & fetch article")
print(" 2. Fetch by exact title")
print(" 3. Compare two articles")
print(f" 4. Saved articles [{saved_count}]")
print(f" 5. Search history [{history_count}]")
print(" 6. Change language")
print(" 7. Random article")
print(" 0. Exit")
print("-"*50)
def fetch_random():
"""Fetch a random Wikipedia article."""
try:
url = f"https://{current_lang}.wikipedia.org/api/rest_v1/page/random/summary"
resp = requests.get(url, timeout=10,
headers={"User-Agent": "WikiFetcher/1.0"})
resp.raise_for_status()
data = resp.json()
article = {
"title": data.get("title", "Random"),
"description": data.get("description", ""),
"summary": clean_text(data.get("extract", "")),
"url": data.get("content_urls", {})
.get("desktop", {}).get("page", ""),
"thumbnail": data.get("originalimage", {}).get("source", ""),
"lang": current_lang,
"type": data.get("type", ""),
"fetched_at": datetime.now().strftime("%d-%m-%Y %H:%M:%S"),
}
display_article(article)
save_to_history(article["title"], article.get("url",""), current_lang)
save = input("\n Save this article? (y/n): ").strip().lower()
if save == "y":
save_article(article)
except Exception as e:
print(f" Could not fetch random article: {e}")
def main():
global current_lang
print("\n" + "="*55)
print(" WIKIPEDIA SUMMARY FETCHER")
print("="*55)
if not REQUESTS_OK:
print("\n requests library not installed!")
print(" Install it: pip install requests")
return
print("\n Fetch Wikipedia summaries for any topic.")
print(" Supports 8 languages, save & compare articles.\n")
last_article = None
while True:
print_menu()
choice = input(" > ").strip()
if choice == "1":
last_article = do_search() or last_article
elif choice == "2":
title = input("\n Exact Wikipedia title: ").strip()
if title:
print(f" Fetching: {title}...")
article = fetch_summary(title, current_lang)
if article:
display_article(article)
save_to_history(article["title"],
article.get("url",""), current_lang)
last_article = article
save = input("\n Save this article? (y/n): ").strip().lower()
if save == "y":
save_article(article)
else:
print(f" Article not found: '{title}'")
print(" Try option 1 (Search) for suggestions.")
elif choice == "3":
print("\n Compare two Wikipedia articles")
t1 = input(" First topic : ").strip()
t2 = input(" Second topic: ").strip()
if t1 and t2:
print(" Fetching...")
a1 = fetch_summary(t1, current_lang)
a2 = fetch_summary(t2, current_lang)
if a1 and a2:
compare_articles(a1, a2)
else:
print(" Could not fetch one or both articles.")
elif choice == "4":
view_saved()
elif choice == "5":
view_history()
elif choice == "6":
select_language()
elif choice == "7":
print("\n Fetching random article...")
fetch_random()
elif choice == "0":
print("\n Goodbye! Keep exploring!\n")
break
else:
print(" Invalid choice.")
# ============================================================
# RUN
# ============================================================
if __name__ == "__main__":
main()
No comments:
Post a Comment