#This will not run on online IDE
import requests
import io,os
from bs4 import BeautifulSoup
import json

count = 0
i = 0
json_data = []
jdata = {}

elist = ['questions_answers.htm','quick_guide.htm','useful_resources.htm','resources.htm','discussion.htm']

URL = "https://www.tutorialspoint.com/tutorialslibrary.htm?v=1.2"
r = requests.get(URL)
soup = BeautifulSoup(r.content, 'html5lib') # If this line causes an error, run 'pip install html5lib' 

tutorials = soup.find_all("div", class_="featured-boxes")

for tutorials_elements in tutorials:
   li_elements = tutorials_elements.find_all("a",href=True)   
   for a in li_elements:
      tutorial_title = a.text
      tutorial_title = tutorial_title.replace(" - Home", "")
      ahref = a['href'];      
      tutorial_filename, tutorial_file_extension = os.path.splitext(ahref)
      tutorial_filename = os.path.basename(ahref)
      #fname = filename.replace(ahref+"_", "")
      
      if tutorial_file_extension == '.htm':
         page_url = "https://www.tutorialspoint.com/"+ahref    
                  
         r = requests.get(page_url)
           
         soup = BeautifulSoup(r.content, 'html5lib') # If this line causes an error, run 'pip install html5lib' 

         toc = soup.find_all("ul", class_="toc chapters")
         
         for toc_elements in toc:
            toc_li_elements = toc_elements.find_all("a",href=True)   
            for a in toc_li_elements:
               title = a.text
               title = title.replace(" - Home", "")
               ahref = a['href'];
               filename, file_extension = os.path.splitext(ahref)
               filename = os.path.basename(ahref)
               fname = filename.replace(tutorial_filename+"_", "")
               
               if file_extension == '.htm'and fname not in elist:
                  page_url = "https://www.tutorialspoint.com"+ahref       
                 
                  obj = {
                     'name' : title,
                     'url' : page_url
                  }
                  json_data.append(obj)  
                  i = i+1
                  
      jdata['data'] = json_data
      json_object = json.dumps(jdata, indent = 4)

      with open("tp_all_tutorials_24-01-2023.json", "w") as outfile:
          outfile.write(json_object)
      
