Python for Engineers : Wikipedia Summary Fetcher

import json

import re

import os

import textwrap

from datetime import datetime

from pathlib import Path

from collections import defaultdict

try:

import requests

REQUESTS_OK = True

except ImportError:

REQUESTS_OK = False

# ============================================================

# CONFIGURATION

# ============================================================

HISTORY_FILE = "wiki_history.json"

SAVED_FILE = "wiki_saved.json"

WIKI_API = "https://en.wikipedia.org/api/rest_v1/page/summary/{}"

WIKI_SEARCH = "https://en.wikipedia.org/w/api.php"

WIKI_SECTIONS = "https://en.wikipedia.org/api/rest_v1/page/mobile-sections/{}"

WRAP_WIDTH = 72

# Language support

LANG_APIS = {

"en": "https://en.wikipedia.org/api/rest_v1/page/summary/{}",

"hi": "https://hi.wikipedia.org/api/rest_v1/page/summary/{}",

"ta": "https://ta.wikipedia.org/api/rest_v1/page/summary/{}",

"fr": "https://fr.wikipedia.org/api/rest_v1/page/summary/{}",

"de": "https://de.wikipedia.org/api/rest_v1/page/summary/{}",

"es": "https://es.wikipedia.org/api/rest_v1/page/summary/{}",

"ja": "https://ja.wikipedia.org/api/rest_v1/page/summary/{}",

"zh": "https://zh.wikipedia.org/api/rest_v1/page/summary/{}",

}

current_lang = "en"

# ============================================================

# HELPERS

# ============================================================

def clean_text(text):

"""Remove wiki markup artifacts."""

if not text:

return ""

text = re.sub(r'\s+', ' ', text)

text = text.strip()

return text

def wrap_text(text, width=WRAP_WIDTH, indent=" "):

"""Word-wrap text with indent."""

paragraphs = text.split("\n")

result = []

for para in paragraphs:

if para.strip():

wrapped = textwrap.fill(para.strip(), width=width,

initial_indent=indent,

subsequent_indent=indent)

result.append(wrapped)

else:

result.append("")

return "\n".join(result)

# ============================================================

# LOAD & SAVE HISTORY / SAVED ARTICLES

# ============================================================

def load_history():

if Path(HISTORY_FILE).exists():

try:

with open(HISTORY_FILE, "r", encoding="utf-8") as f:

return json.load(f)

except:

pass

return []

def save_to_history(title, url, lang="en"):

history = load_history()

# Avoid duplicate consecutive entries

if history and history[-1].get("title") == title:

return

history.append({

"title": title,

"url": url,

"lang": lang,

"viewed_at": datetime.now().strftime("%d-%m-%Y %H:%M:%S"),

})

history = history[-50:]

with open(HISTORY_FILE, "w", encoding="utf-8") as f:

json.dump(history, f, indent=2, ensure_ascii=False)

def load_saved():

if Path(SAVED_FILE).exists():

try:

with open(SAVED_FILE, "r", encoding="utf-8") as f:

return json.load(f)

except:

pass

return []

def save_article(article):

saved = load_saved()

if any(s["title"] == article["title"] for s in saved):

print(" Already saved.")

return

article["saved_at"] = datetime.now().strftime("%d-%m-%Y %H:%M:%S")

saved.append(article)

with open(SAVED_FILE, "w", encoding="utf-8") as f:

json.dump(saved, f, indent=2, ensure_ascii=False)

print(f" Saved: {article['title']}")

# ============================================================

# SEARCH WIKIPEDIA

# ============================================================

def search_wikipedia(query, limit=8):

"""

Search Wikipedia and return list of matching titles.

Uses Wikipedia's opensearch API.

"""

params = {

"action": "opensearch",

"search": query,

"limit": limit,

"namespace": 0,

"format": "json",

}

try:

resp = requests.get(WIKI_SEARCH, params=params, timeout=10)

resp.raise_for_status()

data = resp.json()

titles = data[1] if len(data) > 1 else []

urls = data[3] if len(data) > 3 else []

return list(zip(titles, urls))

except Exception as e:

print(f" Search error: {e}")

return []

# ============================================================

# FETCH SUMMARY

# ============================================================

def fetch_summary(title, lang="en"):

"""

Fetch Wikipedia summary for a given title.

Returns article dict or None.

"""

api_base = LANG_APIS.get(lang, LANG_APIS["en"])

url = api_base.format(title.replace(" ", "_"))

try:

resp = requests.get(url, timeout=10,

headers={"User-Agent": "WikiFetcher/1.0"})

if resp.status_code == 404:

return None

resp.raise_for_status()

data = resp.json()

article = {

"title": data.get("title", title),

"description": data.get("description", ""),

"summary": clean_text(data.get("extract", "")),

"url": data.get("content_urls", {})

.get("desktop", {})

.get("page", ""),

"thumbnail": data.get("originalimage", {}).get("source", ""),

"lang": lang,

"type": data.get("type", ""),

"fetched_at": datetime.now().strftime("%d-%m-%Y %H:%M:%S"),

}

return article

except requests.exceptions.ConnectionError:

print(" No internet connection.")

return None

except Exception as e:

print(f" Fetch error: {e}")

return None

# ============================================================

# DISPLAY ARTICLE

# ============================================================

def display_article(article, full=False):

w = 70

print("\n" + "="*w)

print(f" {article['title'].upper()}")

if article.get("description"):

print(f" {article['description']}")

print("="*w)

summary = article.get("summary", "")

if not summary:

print(" No summary available.")

return

if full:

print(wrap_text(summary))

else:

# Show first 3 sentences

sentences = re.split(r'(?<=[.!?])\s+', summary)

preview = " ".join(sentences[:3])

print(wrap_text(preview))

if len(sentences) > 3:

remaining = len(sentences) - 3

print(f"\n ... ({remaining} more sentence(s))")

expand = input("\n Read full summary? (y/n): ").strip().lower()

if expand == "y":

print("\n" + "-"*w)

print(wrap_text(summary))

print("\n" + "-"*w)

if article.get("url"):

print(f" Read more: {article['url']}")

if article.get("thumbnail"):

print(f" Image : {article['thumbnail']}")

print("="*w)

# ============================================================

# FETCH RELATED TOPICS

# ============================================================

def fetch_related(title):

"""Fetch related/linked articles using search."""

results = search_wikipedia(title, limit=6)

# Filter out exact match

related = [(t, u) for t, u in results if t.lower() != title.lower()]

return related[:5]

# ============================================================

# WORD COUNT & STATS

# ============================================================

def article_stats(article):

summary = article.get("summary", "")

words = len(summary.split())

sentences = len(re.split(r'(?<=[.!?])\s+', summary))

chars = len(summary)

read_time = max(1, round(words / 200)) # avg 200 wpm

print("\n" + "="*45)

print(f" ARTICLE STATS: {article['title']}")

print("="*45)

print(f" Words : {words:,}")

print(f" Sentences : {sentences:,}")

print(f" Characters : {chars:,}")

print(f" Read time : ~{read_time} min")

print(f" Language : {article.get('lang','en')}")

print(f" Type : {article.get('type','')}")

print("="*45)

# ============================================================

# COMPARE TWO ARTICLES

# ============================================================

def compare_articles(article1, article2):

print("\n" + "="*70)

print(" COMPARISON")

print("="*70)

w = 32

print(f" {'TOPIC':<14} {article1['title'][:w]:<{w}} "

f"{article2['title'][:w]}")

print(" " + "-"*65)

print(f" {'Description':<14} "

f"{article1.get('description','')[:w]:<{w}} "

f"{article2.get('description','')[:w]}")

words1 = len(article1.get("summary","").split())

words2 = len(article2.get("summary","").split())

print(f" {'Word count':<14} {words1:<{w}} {words2}")

print(f" {'URL':<14} {article1.get('url','')[:w]:<{w}} "

f"{article2.get('url','')[:w]}")

print("="*70)

# Show both summaries

for art in [article1, article2]:

print(f"\n [{art['title']}]")

sentences = re.split(r'(?<=[.!?])\s+', art.get("summary",""))

preview = " ".join(sentences[:2])

print(wrap_text(preview))

# ============================================================

# VIEW HISTORY

# ============================================================

def view_history():

history = load_history()

if not history:

print("\n No search history yet.")

return

print("\n" + "="*60)

print(f" SEARCH HISTORY ({len(history)} articles)")

print("="*60)

print(f" {'#':<4} {'TITLE':<30} {'LANG':<6} VIEWED AT")

print(" " + "-"*55)

for i, h in enumerate(reversed(history[-20:]), 1):

print(f" {i:<4} {h['title']:<30} {h.get('lang','en'):<6} "

f"{h['viewed_at']}")

print("="*60)

# ============================================================

# VIEW SAVED ARTICLES

# ============================================================

def view_saved():

saved = load_saved()

if not saved:

print("\n No saved articles yet.")

return

print("\n" + "="*60)

print(f" SAVED ARTICLES ({len(saved)})")

print("="*60)

for i, a in enumerate(saved, 1):

print(f"\n [{i}] {a['title']}")

if a.get("description"):

print(f" {a['description']}")

print(f" Saved: {a.get('saved_at','')} "

f"URL: {a.get('url','')[:40]}")

print("="*60)

choice = input("\n View an article? (number or Enter to back): ").strip()

if choice.isdigit():

idx = int(choice) - 1

if 0 <= idx < len(saved):

display_article(saved[idx], full=True)

def remove_saved():

saved = load_saved()

if not saved:

print("\n No saved articles.")

return

view_saved()

try:

idx = int(input("\n Enter number to remove: ").strip()) - 1

if 0 <= idx < len(saved):

removed = saved.pop(idx)

with open(SAVED_FILE, "w", encoding="utf-8") as f:

json.dump(saved, f, indent=2, ensure_ascii=False)

print(f" Removed: {removed['title']}")

else:

print(" Invalid number.")

except ValueError:

print(" Invalid input.")

# ============================================================

# MAIN SEARCH FLOW

# ============================================================

def do_search():

global current_lang

query = input("\n Search Wikipedia: ").strip()

if not query:

return None

print(f" Searching...")

results = search_wikipedia(query)

if not results:

print(" No results found.")

return None

# Show results

print(f"\n Results for '{query}':")

print(" " + "-"*50)

for i, (title, url) in enumerate(results, 1):

print(f" [{i}] {title}")

print(f" {url}")

print("\n [0] Back to menu")

choice = input("\n Select article number: ").strip()

if not choice.isdigit() or int(choice) == 0:

return None

idx = int(choice) - 1

if idx < 0 or idx >= len(results):

print(" Invalid selection.")

return None

title = results[idx][0]

print(f"\n Fetching: {title}...")

article = fetch_summary(title, current_lang)

if not article:

print(f" Could not fetch article for: {title}")

return None

display_article(article)

save_to_history(article["title"], article.get("url",""), current_lang)

# Post-article options

print("\n Options:")

print(" 1. Read full summary")

print(" 2. Article statistics")

print(" 3. Save this article")

print(" 4. Show related topics")

print(" 5. Back")

sub = input(" > ").strip()

if sub == "1":

display_article(article, full=True)

elif sub == "2":

article_stats(article)

elif sub == "3":

save_article(article)

elif sub == "4":

related = fetch_related(title)

if related:

print(f"\n Related to '{title}':")

for i, (t, u) in enumerate(related, 1):

print(f" [{i}] {t}")

print(f" {u}")

rel_choice = input("\n Open related article? (number/Enter): ").strip()

if rel_choice.isdigit():

ri = int(rel_choice) - 1

if 0 <= ri < len(related):

rel_article = fetch_summary(related[ri][0], current_lang)

if rel_article:

display_article(rel_article)

save_to_history(rel_article["title"],

rel_article.get("url",""),

current_lang)

else:

print(" No related articles found.")

return article

# ============================================================

# LANGUAGE SELECTOR

# ============================================================

def select_language():

global current_lang

print("\n Available languages:")

langs = list(LANG_APIS.keys())

lang_names = {

"en": "English", "hi": "Hindi", "ta": "Tamil",

"fr": "French", "de": "German", "es": "Spanish",

"ja": "Japanese","zh": "Chinese"

}

for i, lang in enumerate(langs, 1):

marker = " ← current" if lang == current_lang else ""

print(f" {i}. {lang_names.get(lang, lang)} ({lang}){marker}")

choice = input("\n Select language (number): ").strip()

if choice.isdigit():

idx = int(choice) - 1

if 0 <= idx < len(langs):

current_lang = langs[idx]

print(f" Language set to: {lang_names.get(current_lang, current_lang)}")

# ============================================================

# MAIN MENU

# ============================================================

def print_menu():

lang_names = {

"en": "English", "hi": "Hindi", "ta": "Tamil",

"fr": "French", "de": "German", "es": "Spanish",

"ja": "Japanese","zh": "Chinese"

}

saved_count = len(load_saved())

history_count = len(load_history())

print("\n" + "-"*50)

print(f" WIKIPEDIA SUMMARY FETCHER "

f"[{lang_names.get(current_lang, current_lang)}]")

print("-"*50)

print(" 1. Search & fetch article")

print(" 2. Fetch by exact title")

print(" 3. Compare two articles")

print(f" 4. Saved articles [{saved_count}]")

print(f" 5. Search history [{history_count}]")

print(" 6. Change language")

print(" 7. Random article")

print(" 0. Exit")

print("-"*50)

def fetch_random():

"""Fetch a random Wikipedia article."""

try:

url = f"https://{current_lang}.wikipedia.org/api/rest_v1/page/random/summary"

resp = requests.get(url, timeout=10,

headers={"User-Agent": "WikiFetcher/1.0"})

resp.raise_for_status()

data = resp.json()

article = {

"title": data.get("title", "Random"),

"description": data.get("description", ""),

"summary": clean_text(data.get("extract", "")),

"url": data.get("content_urls", {})

.get("desktop", {}).get("page", ""),

"thumbnail": data.get("originalimage", {}).get("source", ""),

"lang": current_lang,

"type": data.get("type", ""),

"fetched_at": datetime.now().strftime("%d-%m-%Y %H:%M:%S"),

}

display_article(article)

save_to_history(article["title"], article.get("url",""), current_lang)

save = input("\n Save this article? (y/n): ").strip().lower()

if save == "y":

save_article(article)

except Exception as e:

print(f" Could not fetch random article: {e}")

def main():

global current_lang

print("\n" + "="*55)

print(" WIKIPEDIA SUMMARY FETCHER")

print("="*55)

if not REQUESTS_OK:

print("\n requests library not installed!")

print(" Install it: pip install requests")

return

print("\n Fetch Wikipedia summaries for any topic.")

print(" Supports 8 languages, save & compare articles.\n")

last_article = None

while True:

print_menu()

choice = input(" > ").strip()

if choice == "1":

last_article = do_search() or last_article

elif choice == "2":

title = input("\n Exact Wikipedia title: ").strip()

if title:

print(f" Fetching: {title}...")

article = fetch_summary(title, current_lang)

if article:

display_article(article)

save_to_history(article["title"],

article.get("url",""), current_lang)

last_article = article

save = input("\n Save this article? (y/n): ").strip().lower()

if save == "y":

save_article(article)

else:

print(f" Article not found: '{title}'")

print(" Try option 1 (Search) for suggestions.")

elif choice == "3":

print("\n Compare two Wikipedia articles")

t1 = input(" First topic : ").strip()

t2 = input(" Second topic: ").strip()

if t1 and t2:

print(" Fetching...")

a1 = fetch_summary(t1, current_lang)

a2 = fetch_summary(t2, current_lang)

if a1 and a2:

compare_articles(a1, a2)

else:

print(" Could not fetch one or both articles.")

elif choice == "4":

view_saved()

elif choice == "5":

view_history()

elif choice == "6":

select_language()

elif choice == "7":

print("\n Fetching random article...")

fetch_random()

elif choice == "0":

print("\n Goodbye! Keep exploring!\n")

break

else:

print(" Invalid choice.")

# ============================================================

# RUN

# ============================================================

if __name__ == "__main__":

main()

Python for Engineers

Blog Pages

Wikipedia Summary Fetcher

No comments: