#
# Copyright 2014 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#
# Refer to the README and COPYING files for full details of the license
#
"""
This module contains all the functions related to syncing yum repos, it also
defines the format for the reposync configuration file.
Reposync config file
---------------------------
In order to provide fast package installation to the vms lago creates a local
repository for each prefix, right now is also the only way to pass local repos
to the vms too.
This file should be a valid yum config file, with the repos that you want to
be available for the vms declared there with a small extension, the whitelist
and blacklist options:
Include
++++++++++
For each repo you can define an option 'include' with a space separated list
of :mod:`fnmatch` patterns to allow only rpms that match them
Exclude
++++++++++
Similar to include, you can define an option 'exclude' with a space separated
list of :mod:`fnmatch` patterns to ignore any rpms that match them
Example::
[main]
reposdir=/etc/reposync.repos.d
[local-vdsm-build-el7]
name=VDSM local built rpms
baseurl=file:///home/dcaro/Work/redhat/ovirt/vdsm/exported-artifacts
enabled=1
gpgcheck=0
[ovirt-master-snapshot-el7]
name=oVirt Master Nightly Test Releases
baseurl=http://resources.ovirt.org/pub/ovirt-master-snapshot/rpm/el7/
exclude=vdsm-* ovirt-node-* *-debuginfo ovirt-engine-appliance
enabled=1
gpgcheck=0
"""
import ConfigParser
import fnmatch
import functools
import gzip
import logging
import os
import StringIO
import urllib2
import lxml.etree
import rpmUtils.arch
import rpmUtils.miscutils
import lago.utils
from lago import log_utils
LOGGER = logging.getLogger(__name__)
LogTask = functools.partial(log_utils.LogTask, logger=LOGGER)
log_task = functools.partial(log_utils.log_task, logger=LOGGER)
[docs]def gen_to_list(func):
"""
Decorator to wrap the results of the decorated function in a list
"""
@functools.wraps(func)
def wrapper(*args, **kwargs):
return list(func(*args, **kwargs))
return wrapper
#: Randomly chosen rpm xml name spaces (I swear, we used a dice)
RPMNS = {
'rpm': 'http://linux.duke.edu/metadata/repo',
'common': 'http://linux.duke.edu/metadata/common'
}
[docs]def fetch_xml(url):
"""
Retrieves an xml resource from a url
Args:
url (str): URL to get the xml from
Returns:
lxml.etree._Element: Root of the xml tree for the retrieved resource
"""
content = urllib2.urlopen(url).read()
if url.endswith('.gz'):
content = gzip.GzipFile(fileobj=StringIO.StringIO(content)).read()
return lxml.etree.fromstring(content)
[docs]def _pkg_in_pattern_list(pattern_list, pkg):
return (
pkg in pattern_list or any(
fnmatch.fnmatch(pkg, pat) for pat in pattern_list
)
)
[docs]def _passes_lists(whitelist, blacklist, name):
if (
whitelist and not _pkg_in_pattern_list(whitelist, name) or blacklist
and _pkg_in_pattern_list(blacklist, name)
):
return False
return True
[docs]def _get_packages_from_repo_url(repo_url):
repomd_url = '%s/repodata/repomd.xml' % repo_url
repomd_xml = fetch_xml(repomd_url)
primary_xml_loc = repomd_xml.xpath(
'/rpm:repomd/rpm:data[@type="primary"]/rpm:location',
namespaces=RPMNS,
)[0].attrib['href']
primary_xml = fetch_xml('%s/%s' % (repo_url, primary_xml_loc))
return primary_xml.xpath(
'/common:metadata/common:package[@type="rpm"]',
namespaces=RPMNS,
)
[docs]def get_packages(repo_url, whitelist=None, blacklist=None, only_latest=True):
"""
Retrieves the package info from the given repo, filtering with whitelist
and blacklist
Args:
repo_url (str): URL to the repo to ger rpm info from
whitelist (list of str): :mod:`fnmatch` patterns to whitelist by
blacklist (list of str): :mod:`fnmatch` patterns to blacklist by
Returns:
list of dict: list with the rpm info for each rpm that passed the
filters, where the returned dict has the keys:
* name (str): Name of the rpm
* location (str): URL for the rpm, relative to the repo url
* checksum (dict): dict with the hash type and value
* checksum[type] (str): type of checksum (usually sha256)
* checksum[hash] (str): value for the checksum
* build_time (int): Time when the package was built
* version (tuple of str, str, str): tuple with the epoc, version
and release strings for that rpm
Warning:
The whitelist is actually doing the same as blacklist, **the example
below shows what it shoud do, not what it does**
Example:
>>> get_packages(
... 'http://resources.ovirt.org/pub/ovirt-master-snapshot/rpm/el7/',
... whitelist=['*ioprocess*'],
... blacklist=['*debuginfo*'],
... )
... # doctest: +ELLIPSIS
[{'build_time': 1...,
'checksum': {'hash': '...',
'type': 'sha256'},
'location': 'noarch/python-ioprocess-....el7.noarch.rpm',
'name': 'python-ioprocess',
'version': ('...', '...', '....el7')},
{'build_time': 1...,
'checksum': {'hash': '...',
'type': 'sha256'},
'location': 'noarch/python-ioprocess-....el7.noarch.rpm',
'name': 'python-ioprocess',
'version': ('...', '...', '....el7')},
{'build_time': 1...,
'checksum': {'hash': '...',
'type': 'sha256'},
'location': 'x86_64/ioprocess-....el7.x86_64.rpm',
'name': 'ioprocess',
'version': ('0', '0.15.0', '3.el7')},
{'build_time': 1...,
'checksum': {'hash': '...',
'type': 'sha256'},
'location': 'x86_64/ioprocess-....el7.x86_64.rpm',
'name': 'ioprocess',
'version': ('...', '...', '....el7')}]
"""
available_arches = rpmUtils.arch.getArchList()
rpms_by_name = {}
unfiltered_packages = _get_packages_from_repo_url(repo_url=repo_url)
LOGGER.debug(
'Got %d unfiltered packages from %s',
len(unfiltered_packages),
repo_url,
)
for pkg_element in unfiltered_packages:
name = pkg_element.xpath('common:name', namespaces=RPMNS)[0].text
if not _passes_lists(
whitelist=whitelist,
blacklist=blacklist,
name=name
):
continue
arch = pkg_element.xpath('common:arch', namespaces=RPMNS)[0].text
if arch not in available_arches:
continue
rpm = {
'name': name,
'location': pkg_element.xpath(
'common:location',
namespaces=RPMNS,
)[0].attrib['href'],
'checksum': {
'type': pkg_element.xpath(
'common:checksum',
namespaces=RPMNS,
)[0].attrib['type'],
'hash': pkg_element.xpath(
'common:checksum',
namespaces=RPMNS,
)[0].text,
},
'version': (
pkg_element.xpath(
'common:version',
namespaces=RPMNS,
)[0].attrib['epoch'],
pkg_element.xpath(
'common:version',
namespaces=RPMNS,
)[0].attrib['ver'],
pkg_element.xpath(
'common:version',
namespaces=RPMNS,
)[0].attrib['rel'],
),
'build_time': int(
pkg_element.xpath(
'common:time',
namespaces=RPMNS,
)[0].attrib['build']
),
}
name = rpm['name']
if rpm['location'].endswith('.src.rpm'):
name = 'src-%s' % name
if (
name not in rpms_by_name or rpmUtils.miscutils.compareEVR(
rpms_by_name[name]['version'], rpm['version']
) < 0
):
rpms_by_name[name] = rpm
else:
continue
return rpms_by_name.values()
[docs]def verify_repo(repo_url, path, whitelist=None, blacklist=None):
"""
Verifies that the given repo url is properly synced to the given path
Args:
repo_url (str): Remote URL to sync locally
path (str): Local path to sync to
whitelist (list of str): List of patterns to whitelist by
blacklist (list of str): List of patterns to blacklist by
Returns:
None
Raises:
RuntimeError: if there's a local rpm that does not exist in the remote
repo url
See Also:
:func:`get_packages`
"""
downloaded_rpms = []
whitelist = whitelist or {'*': True}
blacklist = blacklist or {}
for _, _, files in os.walk(path):
downloaded_rpms.extend(
fname for fname in files if fname.endswith('.rpm')
)
packages = get_packages(repo_url, whitelist, blacklist, only_latest=True)
LOGGER.debug(
'Got %d filtered packages for repo %s',
len(packages),
repo_url,
)
are_there_missing_rpms = False
for rpm in get_packages(repo_url, whitelist, blacklist, only_latest=True):
rpm_filename = os.path.basename(rpm['location'])
if rpm_filename.endswith('.src.rpm'):
continue
if rpm_filename not in downloaded_rpms:
are_there_missing_rpms = True
LOGGER.error(
'RPM %s from %s is missing locally ',
rpm['name'],
repo_url,
)
if are_there_missing_rpms:
raise RuntimeError(
'Some rpms were not found locally for repo %s' % repo_url
)
[docs]def verify_reposync(config_path, sync_dir, repo_whitelist=None):
"""
Verifies that the given reposync configuration is properly updated in the
given sync dir, skipping any non-whitelisted repos
Args:
config_path (str): Path to the reposync configuration file
sync_dir (str): Local path to the parent dir where to look for the
repos, if not there, they will get created
repo_whitelist (list of str): list with the :mod:`fnmatch` patterns to
whitelist repos by, if empty or not passed, it will not filter the
repos
Returns:
None
"""
config = ConfigParser.SafeConfigParser()
with open(config_path) as config_fd:
config.readfp(config_fd)
verify_repo_jobs = []
for repo in config.sections():
if repo == 'main':
continue
if repo_whitelist and repo not in repo_whitelist:
continue
if not config.getint(repo, 'enabled'):
continue
if config.has_option(repo, 'includepkgs'):
# a dict is a couple orders of magnitude faster than a list
whitelist = {
pkg: True
for pkg in config.get(repo, 'includepkgs').split(' ')
}
else:
whitelist = None
if config.has_option(repo, 'exclude'):
# a dict is a couple orders of magnitude faster than a list
blacklist = {
pkg: True
for pkg in config.get(repo, 'exclude').split(' ')
}
else:
blacklist = None
repo_path = os.path.join(sync_dir, repo)
def _verify_repo_job(base_url, *args, **kwargs):
with LogTask('Verifying repo %s' % base_url):
verify_repo(base_url, *args, **kwargs)
verify_repo_jobs.append(
functools.partial(
_verify_repo_job,
config.get(repo, 'baseurl'),
repo_path,
whitelist,
blacklist,
)
)
lago.utils.invoke_in_parallel(lambda f: f(), verify_repo_jobs)