summaryrefslogtreecommitdiff
path: root/methods
diff options
context:
space:
mode:
authorDavid Kalnischkies <david@kalnischkies.de>2013-12-06 12:17:48 +0100
committerDavid Kalnischkies <david@kalnischkies.de>2013-12-13 11:59:49 +0100
commit47d2bc78adb49f3182f9a3d7a4baea363e772d64 (patch)
tree0536df9928330c35d98a8c2a41fb24405cae9ba1 /methods
parent9d39208af5c8c72d3886c70d603921cf427056ee (diff)
downloadapt-47d2bc78adb49f3182f9a3d7a4baea363e772d64.tar.gz
implement POC client-side merging of pdiffs via apt-file
The idea of pdiffs is to avoid downloading the hole file by patching the existing index. This works very well, but becomes slow if a lot of patches needs to be applied to reconstruct an up-to-date index and in recent years more and more dinstall (or similar) runs are executed creating more and more pdiffs in the same amount of time, so pdiffs became less useful. The solution is simple: Reduce the amount of patches (which are very small) which need to be applied on top of the index we have available (which is usually pretty big). This can be done in two ways: Either merge the patches on the server-side so that the client has to download only one patch or the patches are all downloaded and merged on the client-side. The first needs a client who is doing one step at a time who can also skip patches if it needs (APT supports this for a long time now). The later is implemented by this commit, but depends on the server NOT merging the patches and the patches being in a strict order in which no patch is skipped. This is traditionally the case for dak, but other repository creators support merging – e.g. reprepro (which helpfully adds a flag indicating that the patches are merged). To support both or even mixes a client needs more information which isn't available for now. This POC uses the external diffindex-rred included in apt-file to do the heavy lifting of merging & applying all patches in one pass, hence to test this feature apt-file needs to be installed.
Diffstat (limited to 'methods')
-rw-r--r--methods/rred.cc128
1 files changed, 96 insertions, 32 deletions
diff --git a/methods/rred.cc b/methods/rred.cc
index 7c65f8f92..bea8ed263 100644
--- a/methods/rred.cc
+++ b/methods/rred.cc
@@ -11,6 +11,8 @@
#include <sys/stat.h>
#include <sys/uio.h>
+#include <sys/types.h>
+#include <fcntl.h>
#include <unistd.h>
#include <utime.h>
#include <stdio.h>
@@ -465,50 +467,112 @@ bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
} else
URIStart(Res);
- if (Debug == true)
- std::clog << "Patching " << Path << " with " << Path
- << ".ed and putting result into " << Itm->DestFile << std::endl;
- // Open the source and destination files (the d'tor of FileFd will do
- // the cleanup/closing of the fds)
- FileFd From(Path,FileFd::ReadOnly);
- FileFd Patch(Path+".ed",FileFd::ReadOnly, FileFd::Gzip);
- FileFd To(Itm->DestFile,FileFd::WriteAtomic);
- To.EraseOnFailure();
- if (_error->PendingError() == true)
- return false;
-
+ std::string lastPatchName;
Hashes Hash;
- // now do the actual patching
- State const result = patchMMap(Patch, From, To, &Hash);
- if (result == MMAP_FAILED) {
- // retry with patchFile
- Patch.Seek(0);
- From.Seek(0);
- To.Open(Itm->DestFile,FileFd::WriteAtomic);
+
+ // check for a single ed file
+ if (FileExists(Path+".ed") == true)
+ {
+ if (Debug == true)
+ std::clog << "Patching " << Path << " with " << Path
+ << ".ed and putting result into " << Itm->DestFile << std::endl;
+
+ // Open the source and destination files
+ lastPatchName = Path + ".ed";
+ FileFd From(Path,FileFd::ReadOnly);
+ FileFd To(Itm->DestFile,FileFd::WriteAtomic);
+ To.EraseOnFailure();
+ FileFd Patch(lastPatchName, FileFd::ReadOnly, FileFd::Gzip);
if (_error->PendingError() == true)
- return false;
- if (patchFile(Patch, From, To, &Hash) != ED_OK) {
- return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
+ return false;
+
+ // now do the actual patching
+ State const result = patchMMap(Patch, From, To, &Hash);
+ if (result == MMAP_FAILED) {
+ // retry with patchFile
+ Patch.Seek(0);
+ From.Seek(0);
+ To.Open(Itm->DestFile,FileFd::WriteAtomic);
+ if (_error->PendingError() == true)
+ return false;
+ if (patchFile(Patch, From, To, &Hash) != ED_OK) {
+ return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str());
+ } else if (Debug == true) {
+ std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
+ }
+ } else if (result != ED_OK) {
+ return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
} else if (Debug == true) {
- std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl;
+ std::clog << "rred: finished mmap patching of " << Path << std::endl;
}
- } else if (result != ED_OK) {
- return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str());
- } else if (Debug == true) {
- std::clog << "rred: finished mmap patching of " << Path << std::endl;
+
+ // write out the result
+ From.Close();
+ Patch.Close();
+ To.Close();
}
+ else
+ {
+ if (Debug == true)
+ std::clog << "Patching " << Path << " with all " << Path << ".ed.*.gz files and "
+ << "putting result into " << Itm->DestFile << std::endl;
+
+ int From = open(Path.c_str(), O_RDONLY);
+ unlink(Itm->DestFile.c_str());
+ int To = open(Itm->DestFile.c_str(), O_WRONLY | O_CREAT | O_EXCL, 0644);
+ SetCloseExec(From, false);
+ SetCloseExec(To, false);
+
+ _error->PushToStack();
+ std::vector<std::string> patches = GetListOfFilesInDir(flNotFile(Path), "gz", true, false);
+ _error->RevertToStack();
+
+ std::string externalrred = _config->Find("Dir::Bin::rred", "/usr/bin/diffindex-rred");
+ std::vector<const char *> Args;
+ Args.reserve(22);
+ Args.push_back(externalrred.c_str());
+
+ std::string const baseName = Path + ".ed.";
+ for (std::vector<std::string>::const_iterator p = patches.begin();
+ p != patches.end(); ++p)
+ if (p->compare(0, baseName.length(), baseName) == 0)
+ Args.push_back(p->c_str());
+
+ Args.push_back(NULL);
+
+ pid_t Patcher = ExecFork();
+ if (Patcher == 0) {
+ dup2(From, STDIN_FILENO);
+ dup2(To, STDOUT_FILENO);
+
+ execvp(Args[0], (char **) &Args[0]);
+ std::cerr << "Failed to execute patcher " << Args[0] << "!" << std::endl;
+ _exit(100);
+ }
+ // last is NULL, so the one before is the last patch
+ lastPatchName = Args[Args.size() - 2];
- // write out the result
- From.Close();
- Patch.Close();
- To.Close();
+ if (ExecWait(Patcher, "rred") == false)
+ return _error->Errno("rred", "Patching via external rred failed");
+
+ close(From);
+ close(To);
+
+ struct stat Buf;
+ if (stat(Itm->DestFile.c_str(), &Buf) != 0)
+ return _error->Errno("stat",_("Failed to stat"));
+
+ To = open(Path.c_str(), O_RDONLY);
+ Hash.AddFD(To, Buf.st_size);
+ close(To);
+ }
/* Transfer the modification times from the patch file
to be able to see in which state the file should be
and use the access time from the "old" file */
struct stat BufBase, BufPatch;
if (stat(Path.c_str(),&BufBase) != 0 ||
- stat(std::string(Path+".ed").c_str(),&BufPatch) != 0)
+ stat(lastPatchName.c_str(), &BufPatch) != 0)
return _error->Errno("stat",_("Failed to stat"));
struct utimbuf TimeBuf;