xref: /petsc/config/BuildSystem/retrieval.py (revision ed0bf72be7d57d1de4f7e8393a052ca86de33b54)
15b6bfdb9SJed Brownfrom __future__ import absolute_import
2179860b2SJed Brownimport logger
3179860b2SJed Brown
4179860b2SJed Brownimport os
5e7c47bf1SJed Browntry:
6e7c47bf1SJed Brown  from urllib import urlretrieve
7e7c47bf1SJed Brownexcept ImportError:
8e7c47bf1SJed Brown  from urllib.request import urlretrieve
9e7c47bf1SJed Browntry:
108f450857SSatish Balay  import urlparse as urlparse_local # novermin
119ad79eecSSatish Balayexcept ImportError:
1227711972SSatish Balay  from urllib import parse as urlparse_local # novermin
13179860b2SJed Brownimport config.base
14728600e6SSatish Balayimport socket
15fbfe4939SVaclav Haplaimport shutil
16728600e6SSatish Balay
17179860b2SJed Brown# Fix parsing for nonstandard schemes
188f450857SSatish Balayurlparse_local.uses_netloc.extend(['bk', 'ssh', 'svn'])
19179860b2SJed Brown
20179860b2SJed Brownclass Retriever(logger.Logger):
21179860b2SJed Brown  def __init__(self, sourceControl, clArgs = None, argDB = None):
22179860b2SJed Brown    logger.Logger.__init__(self, clArgs, argDB)
23179860b2SJed Brown    self.sourceControl = sourceControl
24*ed0bf72bSSatish Balay    self.gitsubmodules = []
25*ed0bf72bSSatish Balay    self.gitprereq = 1
26*ed0bf72bSSatish Balay    self.git_urls = []
27*ed0bf72bSSatish Balay    self.hg_urls = []
28*ed0bf72bSSatish Balay    self.dir_urls = []
29*ed0bf72bSSatish Balay    self.link_urls = []
30*ed0bf72bSSatish Balay    self.tarball_urls = []
31179860b2SJed Brown    self.stamp = None
32179860b2SJed Brown    return
33179860b2SJed Brown
34*ed0bf72bSSatish Balay  def isGitURL(self, url):
35*ed0bf72bSSatish Balay    parsed = urlparse_local.urlparse(url)
36*ed0bf72bSSatish Balay    if (parsed[0] == 'git') or (parsed[0] == 'ssh' and parsed[2].endswith('.git')) or (parsed[0] == 'https' and parsed[2].endswith('.git')):
37*ed0bf72bSSatish Balay      return True
38*ed0bf72bSSatish Balay    elif os.path.isdir(url) and self.isDirectoryGitRepo(url):
39*ed0bf72bSSatish Balay      return True
40*ed0bf72bSSatish Balay    return False
41*ed0bf72bSSatish Balay
42*ed0bf72bSSatish Balay  def setupURLs(self,packagename,urls,gitsubmodules,gitprereq):
43*ed0bf72bSSatish Balay    self.packagename = packagename
44*ed0bf72bSSatish Balay    self.gitsubmodules = gitsubmodules
45*ed0bf72bSSatish Balay    self.gitprereq = gitprereq
46*ed0bf72bSSatish Balay    for url in urls:
47*ed0bf72bSSatish Balay      parsed = urlparse_local.urlparse(url)
48*ed0bf72bSSatish Balay      if self.isGitURL(url):
49*ed0bf72bSSatish Balay        self.git_urls.append(self.removePrefix(url,'git://'))
50*ed0bf72bSSatish Balay      elif parsed[0] == 'hg'or (parsed[0] == 'ssh' and parsed[1].startswith('hg@')):
51*ed0bf72bSSatish Balay        self.hg_urls.append(self.removePrefix(url,'hg://'))
52*ed0bf72bSSatish Balay      elif parsed[0] == 'dir' or os.path.isdir(url):
53*ed0bf72bSSatish Balay        self.dir_urls.append(self.removePrefix(url,'dir://'))
54*ed0bf72bSSatish Balay      elif parsed[0] == 'link':
55*ed0bf72bSSatish Balay        self.link_urls.append(self.removePrefix(url,'link://'))
56*ed0bf72bSSatish Balay      else:
57*ed0bf72bSSatish Balay        # check for ftp.mcs.anl.gov - and use https://,www.mcs.anl.gov,ftp://
58*ed0bf72bSSatish Balay        if url.find('ftp.mcs.anl.gov'):
59*ed0bf72bSSatish Balay          https_url = url.replace('http://','https://').replace('ftp://','http://')
60*ed0bf72bSSatish Balay          self.tarball_urls.extend([https_url,https_url.replace('ftp.mcs.anl.gov/pub/petsc/','www.mcs.anl.gov/petsc/mirror/'),https_url.replace('https://','ftp')])
61*ed0bf72bSSatish Balay
62fbfe4939SVaclav Hapla  def isDirectoryGitRepo(self, directory):
63*ed0bf72bSSatish Balay    if not hasattr(self.sourceControl, 'git'):
64*ed0bf72bSSatish Balay      self.logPrint('git not found in self.sourceControl - cannot evaluate isDirectoryGitRepo(): '+directory)
65*ed0bf72bSSatish Balay      return False
66fbfe4939SVaclav Hapla    from config.base import Configure
67fbfe4939SVaclav Hapla    for loc in ['.git','']:
68fbfe4939SVaclav Hapla      cmd = '%s rev-parse --resolve-git-dir  %s'  % (self.sourceControl.git, os.path.join(directory,loc))
69fbfe4939SVaclav Hapla      (output, error, ret) = Configure.executeShellCommand(cmd, checkCommand = Configure.passCheckCommand, log = self.log)
70fbfe4939SVaclav Hapla      if not ret:
71fbfe4939SVaclav Hapla        return True
72fbfe4939SVaclav Hapla    return False
73fbfe4939SVaclav Hapla
74fbfe4939SVaclav Hapla  @staticmethod
75fbfe4939SVaclav Hapla  def removeTarget(t):
76fbfe4939SVaclav Hapla    if os.path.islink(t) or os.path.isfile(t):
77fbfe4939SVaclav Hapla      os.unlink(t) # same as os.remove(t)
78fbfe4939SVaclav Hapla    elif os.path.isdir(t):
79fbfe4939SVaclav Hapla      shutil.rmtree(t)
80fbfe4939SVaclav Hapla
81fbfe4939SVaclav Hapla  @staticmethod
82fbfe4939SVaclav Hapla  def getDownloadFailureMessage(package, url, filename=None):
83fbfe4939SVaclav Hapla    slashFilename = '/'+filename if filename else ''
84fbfe4939SVaclav Hapla    return '''\
85fbfe4939SVaclav HaplaUnable to download package %s from: %s
86fbfe4939SVaclav Hapla* If URL specified manually - perhaps there is a typo?
87fbfe4939SVaclav Hapla* If your network is disconnected - please reconnect and rerun ./configure
88fbfe4939SVaclav Hapla* Or perhaps you have a firewall blocking the download
89fbfe4939SVaclav Hapla* You can run with --with-packages-download-dir=/adirectory and ./configure will instruct you what packages to download manually
90fbfe4939SVaclav Hapla* or you can download the above URL manually, to /yourselectedlocation%s
91fbfe4939SVaclav Hapla  and use the configure option:
92fbfe4939SVaclav Hapla  --download-%s=/yourselectedlocation%s
93fbfe4939SVaclav Hapla    ''' % (package.upper(), url, slashFilename, package, slashFilename)
94fbfe4939SVaclav Hapla
95fbfe4939SVaclav Hapla  @staticmethod
96fbfe4939SVaclav Hapla  def removePrefix(url,prefix):
97fbfe4939SVaclav Hapla    '''Replacement for str.removeprefix() supported only since Python 3.9'''
98fbfe4939SVaclav Hapla    if url.startswith(prefix):
99fbfe4939SVaclav Hapla      return url[len(prefix):]
100fbfe4939SVaclav Hapla    return url
101fbfe4939SVaclav Hapla
102*ed0bf72bSSatish Balay  def generateURLs(self):
103*ed0bf72bSSatish Balay    if hasattr(self.sourceControl, 'git') and self.gitprereq:
104*ed0bf72bSSatish Balay      for url in self.git_urls:
105*ed0bf72bSSatish Balay        yield('git',url)
106*ed0bf72bSSatish Balay    else:
107*ed0bf72bSSatish Balay      self.logPrint('Git not found or gitprereq check failed! skipping giturls: '+str(self.git_urls)+'\n')
108*ed0bf72bSSatish Balay    if hasattr(self.sourceControl, 'hg'):
109*ed0bf72bSSatish Balay      for url in self.hg_urls:
110*ed0bf72bSSatish Balay        yield('hg',url)
111*ed0bf72bSSatish Balay    else:
112*ed0bf72bSSatish Balay      self.logPrint('Hg not found - skipping hgurls: '+str(self.hg_urls)+'\n')
113*ed0bf72bSSatish Balay    for url in self.dir_urls:
114*ed0bf72bSSatish Balay      yield('dir',url)
115*ed0bf72bSSatish Balay    for url in self.link_urls:
116*ed0bf72bSSatish Balay      yield'link',(url)
117*ed0bf72bSSatish Balay    for url in self.tarball_urls:
118*ed0bf72bSSatish Balay      yield('tarball',url)
119*ed0bf72bSSatish Balay
120*ed0bf72bSSatish Balay  def genericRetrieve(self,proto,url,root):
121fbfe4939SVaclav Hapla    '''Fetch package from version control repository or tarfile indicated by URL and extract it into root'''
122*ed0bf72bSSatish Balay    if proto == 'git':
123*ed0bf72bSSatish Balay      return self.gitRetrieve(url,root)
124*ed0bf72bSSatish Balay    elif proto == 'hg':
125*ed0bf72bSSatish Balay      return self.hgRetrieve(url,root)
126*ed0bf72bSSatish Balay    elif proto == 'dir':
127*ed0bf72bSSatish Balay      return self.dirRetrieve(url,root)
128*ed0bf72bSSatish Balay    elif proto == 'link':
129*ed0bf72bSSatish Balay      self.linkRetrieve(url,root)
130*ed0bf72bSSatish Balay    elif proto == 'tarball':
131*ed0bf72bSSatish Balay      self.tarballRetrieve(url,root)
132179860b2SJed Brown
133*ed0bf72bSSatish Balay  def dirRetrieve(self, url, root):
134fbfe4939SVaclav Hapla    self.logPrint('Retrieving %s as directory' % url, 3, 'install')
135*ed0bf72bSSatish Balay    if not os.path.isdir(url): raise RuntimeError('URL %s is not a directory' % url)
13652df3566SBarry Smith
137*ed0bf72bSSatish Balay    t = os.path.join(root,os.path.basename(url))
138fbfe4939SVaclav Hapla    self.removeTarget(t)
139*ed0bf72bSSatish Balay    shutil.copytree(url,t)
14052df3566SBarry Smith
141*ed0bf72bSSatish Balay  def linkRetrieve(self, url, root):
142fbfe4939SVaclav Hapla    self.logPrint('Retrieving %s as link' % url, 3, 'install')
143*ed0bf72bSSatish Balay    if not os.path.isdir(url): raise RuntimeError('URL %s is not pointing to a directory' % url)
1443a911845SSatish Balay
145*ed0bf72bSSatish Balay    t = os.path.join(root,os.path.basename(url))
146fbfe4939SVaclav Hapla    self.removeTarget(t)
147*ed0bf72bSSatish Balay    os.symlink(os.path.abspath(url),t)
1483a911845SSatish Balay
149*ed0bf72bSSatish Balay  def gitRetrieve(self, url, root):
150fbfe4939SVaclav Hapla    self.logPrint('Retrieving %s as git repo' % url, 3, 'install')
151fbfe4939SVaclav Hapla    if not hasattr(self.sourceControl, 'git'):
152fbfe4939SVaclav Hapla      raise RuntimeError('self.sourceControl.git not set')
153*ed0bf72bSSatish Balay    if os.path.isdir(url) and not self.isDirectoryGitRepo(url):
154fbfe4939SVaclav Hapla      raise RuntimeError('URL %s is a directory but not a git repository' % url)
15552df3566SBarry Smith
156*ed0bf72bSSatish Balay    newgitrepo = os.path.join(root,'git.'+self.packagename)
157fbfe4939SVaclav Hapla    self.removeTarget(newgitrepo)
15852df3566SBarry Smith
159b93f8388SBarry Smith    try:
1600a7c9ef6SSatish Balay      submodopt =''
161*ed0bf72bSSatish Balay      for itm in self.gitsubmodules:
1620a7c9ef6SSatish Balay        submodopt += ' --recurse-submodules='+itm
163*ed0bf72bSSatish Balay      config.base.Configure.executeShellCommand('%s clone %s %s %s' % (self.sourceControl.git, submodopt, url, newgitrepo), log = self.log, timeout = 120.0)
1645b6bfdb9SJed Brown    except  RuntimeError as e:
165b93f8388SBarry Smith      self.logPrint('ERROR: '+str(e))
166b93f8388SBarry Smith      err = str(e)
167*ed0bf72bSSatish Balay      failureMessage = self.getDownloadFailureMessage(self.packagename, url)
168*ed0bf72bSSatish Balay      raise RuntimeError('Unable to clone '+self.packagename+'\n'+err+failureMessage)
1695e208ef3SBarry Smith
170*ed0bf72bSSatish Balay  def hgRetrieve(self, url, root):
171fbfe4939SVaclav Hapla    self.logPrint('Retrieving %s as hg repo' % url, 3, 'install')
172fbfe4939SVaclav Hapla    if not hasattr(self.sourceControl, 'hg'):
173fbfe4939SVaclav Hapla      raise RuntimeError('self.sourceControl.hg not set')
1740c3d3c20SBarry Smith
175*ed0bf72bSSatish Balay    newgitrepo = os.path.join(root,'hg.'+self.packagename)
176fbfe4939SVaclav Hapla    self.removeTarget(newgitrepo)
177b93f8388SBarry Smith    try:
178*ed0bf72bSSatish Balay      config.base.Configure.executeShellCommand('%s clone %s %s' % (self.sourceControl.hg, url, newgitrepo), log = self.log, timeout = 120.0)
1795b6bfdb9SJed Brown    except  RuntimeError as e:
180b93f8388SBarry Smith      self.logPrint('ERROR: '+str(e))
181b93f8388SBarry Smith      err = str(e)
182*ed0bf72bSSatish Balay      failureMessage = self.getDownloadFailureMessage(self.packagename, url)
183*ed0bf72bSSatish Balay      raise RuntimeError('Unable to clone '+self.packagename+'\n'+err+failureMessage)
1840c3d3c20SBarry Smith
185*ed0bf72bSSatish Balay  def tarballRetrieve(self, url, root):
186fbfe4939SVaclav Hapla    parsed = urlparse_local.urlparse(url)
187fbfe4939SVaclav Hapla    filename = os.path.basename(parsed[2])
18815ac2963SJed Brown    localFile = os.path.join(root,'_d_'+filename)
189fbfe4939SVaclav Hapla    self.logPrint('Retrieving %s as tarball to %s' % (url,localFile) , 3, 'install')
19015ac2963SJed Brown    ext =  os.path.splitext(localFile)[1]
19115ac2963SJed Brown    if ext not in ['.bz2','.tbz','.gz','.tgz','.zip','.ZIP']:
192179860b2SJed Brown      raise RuntimeError('Unknown compression type in URL: '+ url)
19315ac2963SJed Brown
194fbfe4939SVaclav Hapla    self.removeTarget(localFile)
195fbfe4939SVaclav Hapla
196fbfe4939SVaclav Hapla    if parsed[0] == 'file' and not parsed[1]:
197fbfe4939SVaclav Hapla      url = parsed[2]
198fbfe4939SVaclav Hapla    if os.path.exists(url):
199fbfe4939SVaclav Hapla      if not os.path.isfile(url):
200fbfe4939SVaclav Hapla        raise RuntimeError('Local path exists but is not a regular file: '+ url)
201fbfe4939SVaclav Hapla      # copy local file
202fbfe4939SVaclav Hapla      shutil.copyfile(url, localFile)
203fbfe4939SVaclav Hapla    else:
204fbfe4939SVaclav Hapla      # fetch remote file
205179860b2SJed Brown      try:
206728600e6SSatish Balay        sav_timeout = socket.getdefaulttimeout()
207728600e6SSatish Balay        socket.setdefaulttimeout(30)
208e7c47bf1SJed Brown        urlretrieve(url, localFile)
209728600e6SSatish Balay        socket.setdefaulttimeout(sav_timeout)
2105b6bfdb9SJed Brown      except Exception as e:
211728600e6SSatish Balay        socket.setdefaulttimeout(sav_timeout)
212*ed0bf72bSSatish Balay        failureMessage = self.getDownloadFailureMessage(self.packagename, url, filename)
213179860b2SJed Brown        raise RuntimeError(failureMessage)
21415ac2963SJed Brown
21515ac2963SJed Brown    self.logPrint('Extracting '+localFile)
21615ac2963SJed Brown    if ext in ['.zip','.ZIP']:
21715ac2963SJed Brown      config.base.Configure.executeShellCommand('cd '+root+'; unzip '+localFile, log = self.log)
21815ac2963SJed Brown      output = config.base.Configure.executeShellCommand('cd '+root+'; zipinfo -1 '+localFile+' | head -n 1', log = self.log)
219179860b2SJed Brown      dirname = os.path.normpath(output[0].strip())
22015ac2963SJed Brown    else:
22115ac2963SJed Brown      failureMessage = '''\
22215ac2963SJed BrownDownloaded package %s from: %s is not a tarball.
22315ac2963SJed Brown[or installed python cannot process compressed files]
22415ac2963SJed Brown* If you are behind a firewall - please fix your proxy and rerun ./configure
22515ac2963SJed Brown  For example at LANL you may need to set the environmental variable http_proxy (or HTTP_PROXY?) to  http://proxyout.lanl.gov
2260aa1f76dSSatish Balay* You can run with --with-packages-download-dir=/adirectory and ./configure will instruct you what packages to download manually
227b93f8388SBarry Smith* or you can download the above URL manually, to /yourselectedlocation/%s
22815ac2963SJed Brown  and use the configure option:
22915ac2963SJed Brown  --download-%s=/yourselectedlocation/%s
230*ed0bf72bSSatish Balay''' % (self.packagename.upper(), url, filename, self.packagename, filename)
23115ac2963SJed Brown      import tarfile
23215ac2963SJed Brown      try:
23315ac2963SJed Brown        tf  = tarfile.open(os.path.join(root, localFile))
2345b6bfdb9SJed Brown      except tarfile.ReadError as e:
235b95f98c7SJed Brown        raise RuntimeError(str(e)+'\n'+failureMessage)
23615ac2963SJed Brown      if not tf: raise RuntimeError(failureMessage)
2372501eaf6SSatish Balay      #git puts 'pax_global_header' as the first entry and some tar utils process this as a file
2382501eaf6SSatish Balay      firstname = tf.getnames()[0]
2392501eaf6SSatish Balay      if firstname == 'pax_global_header':
2402501eaf6SSatish Balay        firstmember = tf.getmembers()[1]
24115ac2963SJed Brown      else:
2422501eaf6SSatish Balay        firstmember = tf.getmembers()[0]
2432501eaf6SSatish Balay      # some tarfiles list packagename/ but some list packagename/filename in the first entry
2442501eaf6SSatish Balay      if firstmember.isdir():
2452501eaf6SSatish Balay        dirname = firstmember.name
2462501eaf6SSatish Balay      else:
2472501eaf6SSatish Balay        dirname = os.path.dirname(firstmember.name)
24815ac2963SJed Brown      tf.extractall(root)
24915ac2963SJed Brown      tf.close()
25015ac2963SJed Brown
25115ac2963SJed Brown    # fix file permissions for the untared tarballs.
25215ac2963SJed Brown    try:
2532501eaf6SSatish Balay      # check if 'dirname' is set'
2542501eaf6SSatish Balay      if dirname:
255179860b2SJed Brown        config.base.Configure.executeShellCommand('cd '+root+'; chmod -R a+r '+dirname+';find  '+dirname + ' -type d -name "*" -exec chmod a+rx {} \;', log = self.log)
2562501eaf6SSatish Balay      else:
2572501eaf6SSatish Balay        self.logPrintBox('WARNING: Could not determine dirname extracted by '+localFile+' to fix file permissions')
2585b6bfdb9SJed Brown    except RuntimeError as e:
25915ac2963SJed Brown      raise RuntimeError('Error changing permissions for '+dirname+' obtained from '+localFile+ ' : '+str(e))
260179860b2SJed Brown    os.unlink(localFile)
261