python递归函数

Posted by ZMY Blog on April 1, 2019

最近在看python网络数据采集这本书,并跟着书中的章节进行练习,在练习第五章,六度空间游戏中,其中递归函数看起来有点蒙 后经过在笔记本上将递归过程罗列下来,才理清头绪,源代码如下

from bs4 import BeautifulSoup
from urllib.request import urlopen
import re
import pymysql
conn=pymysql.connect(host='127.0.0.1', user='root', passwd='zhaimingyu', db='mysql', charset='utf8')
cur = conn.cursor()
cur.execute("USE wikipedia")

def insertPageIfNotExists(url):
 cur.execute("SELECT *FROM pages WHERE url = %s", (url))
 if cur.rowcount == 0:
  cur.execute("INSERT INTO pages (url) VALUES  (%s)", (url))
  conn.commit()
  return cur.lastrowid
 else:
  return cur.fetchone()[0]

def insertLink(fromPageId, toPageId):
 cur.execute("SELECT * FROM link WHERE fromPageId = %s AND toPageId = %s", ((fromPageId,(toPageId))))
 if cur.rowcount == 0:
   cur.execute("INSERT INTO link (fromPageId, toPageId) VALUES (%s, %s)", ((fromPageId, (toPageId))))
   conn.commit()

pages = set()
def getLinks(pageUrl, recursionLevel):
 global pages
 if recursionLevel > 4:
  return ;
 pageId = insertPageIfNotExists(pageUrl)
 html = urlopen("https://en.wikipedia.org"+pageUrl)
 bsObj = BeautifulSoup(html, "html.parser")
 for link in bsObj.findAll("a", href=re.compile("^(/wiki/)((?!:).)*$")):
  insertLink(pageId, insertPageIfNotExists(link.attrs['href']))
  if link.attrs['href'] not in pages:
   newPage = link.attrs['href']
   pages.add(newPage)
   getLinks(newPage, recursionLevel+1)
getLinks("/wiki/Kevin_Bacon", 0)
cur.close()
conn.close()

其中比较绕的部分是getLinks中,使用了for循环,在for循环中又调用了getLinks函数,这样实现的递归函数

def getLinks(pageUrl, recursionLevel):
 global pages
 if recursionLevel > 4:
  return ;
 pageId = insertPageIfNotExists(pageUrl)
 html = urlopen("https://en.wikipedia.org"+pageUrl)
 bsObj = BeautifulSoup(html, "html.parser")
 for link in bsObj.findAll("a", href=re.compile("^(/wiki/)((?!:).)*$")):
  insertLink(pageId, insertPageIfNotExists(link.attrs['href']))
  if link.attrs['href'] not in pages:
   newPage = link.attrs['href']
   pages.add(newPage)
   getLinks(newPage, recursionLevel+1)
getLinks("/wiki/Kevin_Bacon", 0)

通过下列代码得出的结果可以清晰的反映出程序的执行过程

list0=[1,2,3]
list1=[11,22,33]
list2=[111,222,333]

def test(rec):
  k=rec
  if rec>2:
    return;
  if rec==0:
    rec=list0
  if rec==1:
    rec=list1
  if rec==2:
    rec=list2
  for i in rec:
    print("我是第 %d 层循环中的 %d"%(k,i))
    test(k+1)
test(0)

上面函数返回结果如下
拓扑图如下
说明:程序执行过程按照上图1-9过程,之后以此类推 总结,执行函数执行for循环中的首个变量,当递归数超过限定数值,取并列for循环中所有值,返回到上一个父元素取for中所有值,以此类推