代碼對於初學者有幾個重點：

1，href 超鏈接的組合；
2，下一頁的超鏈接採集合成；
3，根據 href 超鏈接，通過頁面標籤提取該頁詩詞文本的方法；

#coding:utf-8
#'http://www.shicimingju.com'
#採集多頁詩詞網站，並儲存為TXT文件；
#-- 讀取寫入txt段再思考；
import sys
import re, os, random, requests
from bs4 import BeautifulSoup as BP
 
base='http://www.shicimingju.com'
url='http://www.shicimingju.com/chaxun/zuozhe/1.html'
visithead= {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:58.0)'+ 'Gecko/20100101 Firefox/58.0'}
 
def geturls(url): 
 print ('--------------------------------------------------------------------------------')
 r= requests.get(url, headers=visithead) 
 html=r.text.encode(r.encoding)
 soup=BP(html, 'lxml')
 
 div=soup.find('div', attrs={'class': 'www-shadow-card www-main-container'})
 hrefs=[l.attrs['href'] for l in div.findAll('a') if l.has_attr('href') ] 
 hrefs=[base + i for i in hrefs]
 print (hrefs)
 
 n=soup.find('div' , attrs= {'class':'pagination www-shadow-card'})
 n2=n.find(re.compile('a'), text = re.compile(u'\\u4e0b\\u4e00\\u9875')) 
 nexturl=[base+i for i in re.findall(r'[/].*.html',str(n2))]
 print (u'\\u4e0b\\u4e00\\u9875', '--------------------------------')
 print (nexturl)
 
 ans={}  

 ans['hrefs']= hrefs
 ans['nexturl']=nexturl[0]
 return(ans)
 
def txt(url) :
 r= requests.get(url)
 html= r.text.encode(r.encoding)
 soup= BP(html, 'lxml')
 
 x={'class': 'shici-container www-shadow-card'}
 # 一，對 div 段的傳統處理法；
 # c0=soup.find('div', attrs=x).text 
 # c0=re.sub(r'[ ]', '', c0)
 # c0=re.sub(r'[\\xa0]', '', c0)
 
 # 二，對text div段的標題、作者、詩詞內容的單獨處理；
 c1=soup.find('div', attrs=x).h1.text #標題；
 c2=soup.find('div', attrs=x).find('div', attrs={'class': 'shici-info'}).text #作者
 c3=soup.find('div', attrs=x).find('div', attrs={'class': 'shici-content'}).text #內容
 c3=re.sub(r'[\\xa0]', '', c3) #刪除特殊字符；
 c3=re.sub(r'[ ]{4}', '', c3) #刪除N個空格；
 
 t=re.sub(r'[/]', ' ',c1) #標題去除斜線；
 
 filedir= os.getcwd() + '/ok' 
 if not os.path.exists(filedir):
 os.mkdir(filedir)
 
 with open(filedir + '/%d-%s.txt' % (i+1,t), mode='w') as f: 
 c0 = c1 +u'\n'+ c2 + c3 #加換行；
 f.write(c0)
 print (c0)
 
ans= geturls(url)
allhrefs= ans['hrefs']
 
while ans['nexturl']:
 try:
 ans=geturls(ans['nexturl']) 
 allhrefs= allhrefs+ans['hrefs']
 except:
 print ('This is last page...!\n')
 print (u'總計找到 ', len(allhrefs), u'條數據！') 

 input ('Press any key to write to txt files!')
 break
 
for i in range(len(allhrefs)//100):
 txt(allhrefs[i])
 print (i+1,'......done!')
 print ('------------------------------------------------------------------------------')