Wikipedia Summary Fetcher

import json

import re

import os

import textwrap

from datetime import datetime

from pathlib import Path

from collections import defaultdict


try:

    import requests

    REQUESTS_OK = True

except ImportError:

    REQUESTS_OK = False


# ============================================================

# CONFIGURATION

# ============================================================


HISTORY_FILE  = "wiki_history.json"

SAVED_FILE    = "wiki_saved.json"

WIKI_API      = "https://en.wikipedia.org/api/rest_v1/page/summary/{}"

WIKI_SEARCH   = "https://en.wikipedia.org/w/api.php"

WIKI_SECTIONS = "https://en.wikipedia.org/api/rest_v1/page/mobile-sections/{}"

WRAP_WIDTH    = 72


# Language support

LANG_APIS = {

    "en": "https://en.wikipedia.org/api/rest_v1/page/summary/{}",

    "hi": "https://hi.wikipedia.org/api/rest_v1/page/summary/{}",

    "ta": "https://ta.wikipedia.org/api/rest_v1/page/summary/{}",

    "fr": "https://fr.wikipedia.org/api/rest_v1/page/summary/{}",

    "de": "https://de.wikipedia.org/api/rest_v1/page/summary/{}",

    "es": "https://es.wikipedia.org/api/rest_v1/page/summary/{}",

    "ja": "https://ja.wikipedia.org/api/rest_v1/page/summary/{}",

    "zh": "https://zh.wikipedia.org/api/rest_v1/page/summary/{}",

}


current_lang = "en"


# ============================================================

# HELPERS

# ============================================================


def clean_text(text):

    """Remove wiki markup artifacts."""

    if not text:

        return ""

    text = re.sub(r'\s+', ' ', text)

    text = text.strip()

    return text



def wrap_text(text, width=WRAP_WIDTH, indent="  "):

    """Word-wrap text with indent."""

    paragraphs = text.split("\n")

    result     = []

    for para in paragraphs:

        if para.strip():

            wrapped = textwrap.fill(para.strip(), width=width,

                                    initial_indent=indent,

                                    subsequent_indent=indent)

            result.append(wrapped)

        else:

            result.append("")

    return "\n".join(result)



# ============================================================

# LOAD & SAVE HISTORY / SAVED ARTICLES

# ============================================================


def load_history():

    if Path(HISTORY_FILE).exists():

        try:

            with open(HISTORY_FILE, "r", encoding="utf-8") as f:

                return json.load(f)

        except:

            pass

    return []



def save_to_history(title, url, lang="en"):

    history = load_history()

    # Avoid duplicate consecutive entries

    if history and history[-1].get("title") == title:

        return

    history.append({

        "title":     title,

        "url":       url,

        "lang":      lang,

        "viewed_at": datetime.now().strftime("%d-%m-%Y %H:%M:%S"),

    })

    history = history[-50:]

    with open(HISTORY_FILE, "w", encoding="utf-8") as f:

        json.dump(history, f, indent=2, ensure_ascii=False)



def load_saved():

    if Path(SAVED_FILE).exists():

        try:

            with open(SAVED_FILE, "r", encoding="utf-8") as f:

                return json.load(f)

        except:

            pass

    return []



def save_article(article):

    saved = load_saved()

    if any(s["title"] == article["title"] for s in saved):

        print("  Already saved.")

        return

    article["saved_at"] = datetime.now().strftime("%d-%m-%Y %H:%M:%S")

    saved.append(article)

    with open(SAVED_FILE, "w", encoding="utf-8") as f:

        json.dump(saved, f, indent=2, ensure_ascii=False)

    print(f"  Saved: {article['title']}")



# ============================================================

# SEARCH WIKIPEDIA

# ============================================================


def search_wikipedia(query, limit=8):

    """

    Search Wikipedia and return list of matching titles.

    Uses Wikipedia's opensearch API.

    """

    params = {

        "action":     "opensearch",

        "search":     query,

        "limit":      limit,

        "namespace":  0,

        "format":     "json",

    }

    try:

        resp = requests.get(WIKI_SEARCH, params=params, timeout=10)

        resp.raise_for_status()

        data    = resp.json()

        titles  = data[1] if len(data) > 1 else []

        urls    = data[3] if len(data) > 3 else []

        return list(zip(titles, urls))

    except Exception as e:

        print(f"  Search error: {e}")

        return []



# ============================================================

# FETCH SUMMARY

# ============================================================


def fetch_summary(title, lang="en"):

    """

    Fetch Wikipedia summary for a given title.

    Returns article dict or None.

    """

    api_base = LANG_APIS.get(lang, LANG_APIS["en"])

    url      = api_base.format(title.replace(" ", "_"))


    try:

        resp = requests.get(url, timeout=10,

                            headers={"User-Agent": "WikiFetcher/1.0"})


        if resp.status_code == 404:

            return None

        resp.raise_for_status()


        data    = resp.json()

        article = {

            "title":       data.get("title", title),

            "description": data.get("description", ""),

            "summary":     clean_text(data.get("extract", "")),

            "url":         data.get("content_urls", {})

                               .get("desktop", {})

                               .get("page", ""),

            "thumbnail":   data.get("originalimage", {}).get("source", ""),

            "lang":        lang,

            "type":        data.get("type", ""),

            "fetched_at":  datetime.now().strftime("%d-%m-%Y %H:%M:%S"),

        }

        return article


    except requests.exceptions.ConnectionError:

        print("  No internet connection.")

        return None

    except Exception as e:

        print(f"  Fetch error: {e}")

        return None



# ============================================================

# DISPLAY ARTICLE

# ============================================================


def display_article(article, full=False):

    w = 70


    print("\n" + "="*w)

    print(f"  {article['title'].upper()}")

    if article.get("description"):

        print(f"  {article['description']}")

    print("="*w)


    summary = article.get("summary", "")


    if not summary:

        print("  No summary available.")

        return


    if full:

        print(wrap_text(summary))

    else:

        # Show first 3 sentences

        sentences = re.split(r'(?<=[.!?])\s+', summary)

        preview   = " ".join(sentences[:3])

        print(wrap_text(preview))


        if len(sentences) > 3:

            remaining = len(sentences) - 3

            print(f"\n  ... ({remaining} more sentence(s))")

            expand = input("\n  Read full summary? (y/n): ").strip().lower()

            if expand == "y":

                print("\n" + "-"*w)

                print(wrap_text(summary))


    print("\n" + "-"*w)

    if article.get("url"):

        print(f"  Read more: {article['url']}")

    if article.get("thumbnail"):

        print(f"  Image    : {article['thumbnail']}")

    print("="*w)



# ============================================================

# FETCH RELATED TOPICS

# ============================================================


def fetch_related(title):

    """Fetch related/linked articles using search."""

    results = search_wikipedia(title, limit=6)

    # Filter out exact match

    related = [(t, u) for t, u in results if t.lower() != title.lower()]

    return related[:5]



# ============================================================

# WORD COUNT & STATS

# ============================================================


def article_stats(article):

    summary   = article.get("summary", "")

    words     = len(summary.split())

    sentences = len(re.split(r'(?<=[.!?])\s+', summary))

    chars     = len(summary)

    read_time = max(1, round(words / 200))  # avg 200 wpm


    print("\n" + "="*45)

    print(f"  ARTICLE STATS: {article['title']}")

    print("="*45)

    print(f"  Words      : {words:,}")

    print(f"  Sentences  : {sentences:,}")

    print(f"  Characters : {chars:,}")

    print(f"  Read time  : ~{read_time} min")

    print(f"  Language   : {article.get('lang','en')}")

    print(f"  Type       : {article.get('type','')}")

    print("="*45)



# ============================================================

# COMPARE TWO ARTICLES

# ============================================================


def compare_articles(article1, article2):

    print("\n" + "="*70)

    print("  COMPARISON")

    print("="*70)


    w = 32

    print(f"  {'TOPIC':<14} {article1['title'][:w]:<{w}}  "

          f"{article2['title'][:w]}")

    print("  " + "-"*65)


    print(f"  {'Description':<14} "

          f"{article1.get('description','')[:w]:<{w}}  "

          f"{article2.get('description','')[:w]}")


    words1 = len(article1.get("summary","").split())

    words2 = len(article2.get("summary","").split())

    print(f"  {'Word count':<14} {words1:<{w}}  {words2}")


    print(f"  {'URL':<14} {article1.get('url','')[:w]:<{w}}  "

          f"{article2.get('url','')[:w]}")

    print("="*70)


    # Show both summaries

    for art in [article1, article2]:

        print(f"\n  [{art['title']}]")

        sentences = re.split(r'(?<=[.!?])\s+', art.get("summary",""))

        preview   = " ".join(sentences[:2])

        print(wrap_text(preview))



# ============================================================

# VIEW HISTORY

# ============================================================


def view_history():

    history = load_history()

    if not history:

        print("\n  No search history yet.")

        return


    print("\n" + "="*60)

    print(f"  SEARCH HISTORY  ({len(history)} articles)")

    print("="*60)

    print(f"  {'#':<4} {'TITLE':<30} {'LANG':<6} VIEWED AT")

    print("  " + "-"*55)

    for i, h in enumerate(reversed(history[-20:]), 1):

        print(f"  {i:<4} {h['title']:<30} {h.get('lang','en'):<6} "

              f"{h['viewed_at']}")

    print("="*60)



# ============================================================

# VIEW SAVED ARTICLES

# ============================================================


def view_saved():

    saved = load_saved()

    if not saved:

        print("\n  No saved articles yet.")

        return


    print("\n" + "="*60)

    print(f"  SAVED ARTICLES  ({len(saved)})")

    print("="*60)


    for i, a in enumerate(saved, 1):

        print(f"\n  [{i}] {a['title']}")

        if a.get("description"):

            print(f"       {a['description']}")

        print(f"       Saved: {a.get('saved_at','')}  "

              f"URL: {a.get('url','')[:40]}")


    print("="*60)


    choice = input("\n  View an article? (number or Enter to back): ").strip()

    if choice.isdigit():

        idx = int(choice) - 1

        if 0 <= idx < len(saved):

            display_article(saved[idx], full=True)



def remove_saved():

    saved = load_saved()

    if not saved:

        print("\n  No saved articles.")

        return


    view_saved()

    try:

        idx = int(input("\n  Enter number to remove: ").strip()) - 1

        if 0 <= idx < len(saved):

            removed = saved.pop(idx)

            with open(SAVED_FILE, "w", encoding="utf-8") as f:

                json.dump(saved, f, indent=2, ensure_ascii=False)

            print(f"  Removed: {removed['title']}")

        else:

            print("  Invalid number.")

    except ValueError:

        print("  Invalid input.")



# ============================================================

# MAIN SEARCH FLOW

# ============================================================


def do_search():

    global current_lang


    query = input("\n  Search Wikipedia: ").strip()

    if not query:

        return None


    print(f"  Searching...")

    results = search_wikipedia(query)


    if not results:

        print("  No results found.")

        return None


    # Show results

    print(f"\n  Results for '{query}':")

    print("  " + "-"*50)

    for i, (title, url) in enumerate(results, 1):

        print(f"  [{i}] {title}")

        print(f"       {url}")


    print("\n  [0] Back to menu")


    choice = input("\n  Select article number: ").strip()

    if not choice.isdigit() or int(choice) == 0:

        return None


    idx = int(choice) - 1

    if idx < 0 or idx >= len(results):

        print("  Invalid selection.")

        return None


    title = results[idx][0]

    print(f"\n  Fetching: {title}...")

    article = fetch_summary(title, current_lang)


    if not article:

        print(f"  Could not fetch article for: {title}")

        return None


    display_article(article)

    save_to_history(article["title"], article.get("url",""), current_lang)


    # Post-article options

    print("\n  Options:")

    print("  1. Read full summary")

    print("  2. Article statistics")

    print("  3. Save this article")

    print("  4. Show related topics")

    print("  5. Back")


    sub = input("  > ").strip()


    if sub == "1":

        display_article(article, full=True)

    elif sub == "2":

        article_stats(article)

    elif sub == "3":

        save_article(article)

    elif sub == "4":

        related = fetch_related(title)

        if related:

            print(f"\n  Related to '{title}':")

            for i, (t, u) in enumerate(related, 1):

                print(f"  [{i}] {t}")

                print(f"       {u}")

            rel_choice = input("\n  Open related article? (number/Enter): ").strip()

            if rel_choice.isdigit():

                ri = int(rel_choice) - 1

                if 0 <= ri < len(related):

                    rel_article = fetch_summary(related[ri][0], current_lang)

                    if rel_article:

                        display_article(rel_article)

                        save_to_history(rel_article["title"],

                                        rel_article.get("url",""),

                                        current_lang)

        else:

            print("  No related articles found.")


    return article



# ============================================================

# LANGUAGE SELECTOR

# ============================================================


def select_language():

    global current_lang

    print("\n  Available languages:")

    langs = list(LANG_APIS.keys())

    lang_names = {

        "en": "English", "hi": "Hindi", "ta": "Tamil",

        "fr": "French",  "de": "German", "es": "Spanish",

        "ja": "Japanese","zh": "Chinese"

    }

    for i, lang in enumerate(langs, 1):

        marker = " ← current" if lang == current_lang else ""

        print(f"  {i}. {lang_names.get(lang, lang)} ({lang}){marker}")


    choice = input("\n  Select language (number): ").strip()

    if choice.isdigit():

        idx = int(choice) - 1

        if 0 <= idx < len(langs):

            current_lang = langs[idx]

            print(f"  Language set to: {lang_names.get(current_lang, current_lang)}")



# ============================================================

# MAIN MENU

# ============================================================


def print_menu():

    lang_names = {

        "en": "English", "hi": "Hindi", "ta": "Tamil",

        "fr": "French",  "de": "German", "es": "Spanish",

        "ja": "Japanese","zh": "Chinese"

    }

    saved_count   = len(load_saved())

    history_count = len(load_history())


    print("\n" + "-"*50)

    print(f"  WIKIPEDIA SUMMARY FETCHER  "

          f"[{lang_names.get(current_lang, current_lang)}]")

    print("-"*50)

    print("  1. Search & fetch article")

    print("  2. Fetch by exact title")

    print("  3. Compare two articles")

    print(f"  4. Saved articles  [{saved_count}]")

    print(f"  5. Search history  [{history_count}]")

    print("  6. Change language")

    print("  7. Random article")

    print("  0. Exit")

    print("-"*50)



def fetch_random():

    """Fetch a random Wikipedia article."""

    try:

        url  = f"https://{current_lang}.wikipedia.org/api/rest_v1/page/random/summary"

        resp = requests.get(url, timeout=10,

                            headers={"User-Agent": "WikiFetcher/1.0"})

        resp.raise_for_status()

        data    = resp.json()

        article = {

            "title":       data.get("title", "Random"),

            "description": data.get("description", ""),

            "summary":     clean_text(data.get("extract", "")),

            "url":         data.get("content_urls", {})

                               .get("desktop", {}).get("page", ""),

            "thumbnail":   data.get("originalimage", {}).get("source", ""),

            "lang":        current_lang,

            "type":        data.get("type", ""),

            "fetched_at":  datetime.now().strftime("%d-%m-%Y %H:%M:%S"),

        }

        display_article(article)

        save_to_history(article["title"], article.get("url",""), current_lang)


        save = input("\n  Save this article? (y/n): ").strip().lower()

        if save == "y":

            save_article(article)


    except Exception as e:

        print(f"  Could not fetch random article: {e}")



def main():

    global current_lang


    print("\n" + "="*55)

    print("     WIKIPEDIA SUMMARY FETCHER")

    print("="*55)


    if not REQUESTS_OK:

        print("\n  requests library not installed!")

        print("  Install it:  pip install requests")

        return


    print("\n  Fetch Wikipedia summaries for any topic.")

    print("  Supports 8 languages, save & compare articles.\n")


    last_article = None


    while True:

        print_menu()

        choice = input("  > ").strip()


        if choice == "1":

            last_article = do_search() or last_article


        elif choice == "2":

            title = input("\n  Exact Wikipedia title: ").strip()

            if title:

                print(f"  Fetching: {title}...")

                article = fetch_summary(title, current_lang)

                if article:

                    display_article(article)

                    save_to_history(article["title"],

                                    article.get("url",""), current_lang)

                    last_article = article

                    save = input("\n  Save this article? (y/n): ").strip().lower()

                    if save == "y":

                        save_article(article)

                else:

                    print(f"  Article not found: '{title}'")

                    print("  Try option 1 (Search) for suggestions.")


        elif choice == "3":

            print("\n  Compare two Wikipedia articles")

            t1 = input("  First topic : ").strip()

            t2 = input("  Second topic: ").strip()

            if t1 and t2:

                print("  Fetching...")

                a1 = fetch_summary(t1, current_lang)

                a2 = fetch_summary(t2, current_lang)

                if a1 and a2:

                    compare_articles(a1, a2)

                else:

                    print("  Could not fetch one or both articles.")


        elif choice == "4":

            view_saved()


        elif choice == "5":

            view_history()


        elif choice == "6":

            select_language()


        elif choice == "7":

            print("\n  Fetching random article...")

            fetch_random()


        elif choice == "0":

            print("\n  Goodbye! Keep exploring!\n")

            break


        else:

            print("  Invalid choice.")



# ============================================================

# RUN

# ============================================================


if __name__ == "__main__":

    main()

No comments: