#include "file_cache.h"

const std::string FileCache::CACHE_DATA_DIR = "data";
const std::string FileCache::CACHE_JOB_DIR = "joblinks";
const int FileCache::CACHE_DIR_LENGTH = 2;
const int FileCache::CACHE_DIR_LEVELS = 1;
const std::string FileCache::CACHE_LOCK_SUFFIX = ".lock";
const std::string FileCache::CACHE_META_SUFFIX = ".meta";
const int FileCache::CACHE_DEFAULT_AUTH_VALIDITY = 86400; // 24 h

FileCache::FileCache(std::string cache_path,
    std::string id,
    uid_t job_uid,
    gid_t job_gid) throw(FileCacheException) {
  
  // make a vector of one item and call _init
  std::vector<std::string> remote_caches;
  std::vector<std::string> caches;
  caches.push_back(cache_path);
  
  _init(caches, remote_caches, id, job_uid, job_gid);
}

FileCache::FileCache(std::string cache_path,
    std::string remote_cache_path,
    std::string id,
    uid_t job_uid,
    gid_t job_gid) throw(FileCacheException) {
  
  // make a vector of one item and call _init
  std::vector<std::string> caches;
  caches.push_back(cache_path);
  std::vector<std::string> remote_caches;
  remote_caches.push_back(remote_cache_path);
    
  _init(caches, remote_caches, id, job_uid, job_gid);
}

FileCache::FileCache(std::vector<std::string> caches,
    std::string id,
    uid_t job_uid,
    gid_t job_gid) throw(FileCacheException) {
  
  std::vector<std::string> remote_caches;
  _init(caches, remote_caches, id, job_uid, job_gid);
}

FileCache::FileCache(std::vector<std::string> caches,
    std::vector<std::string> remote_caches,
    std::string id,
    uid_t job_uid,
    gid_t job_gid) throw(FileCacheException) {
  
  _init(caches, remote_caches, id, job_uid, job_gid);
}

void FileCache::_init(std::vector<std::string> caches,
    std::vector<std::string> remote_caches,
    std::string id,
    uid_t job_uid,
    gid_t job_gid) throw(FileCacheException) {
  
  _id = id;
  _uid = job_uid;
  _gid = job_gid;
  
  // for each cache
  for (int i = 0; i < (int)caches.size(); i++) {
    std::string cache = caches[i];
    std::string cache_path = cache.substr(0, cache.find(" "));
    if (cache_path.empty()) throw FileCacheException("No cache directory specified");
    std::string cache_link_path = "";
    if (cache.find(" ") != std::string::npos) cache_link_path = cache.substr(cache.find_last_of(" ")+1, cache.length()-cache.find_last_of(" ")+1);
    
    // tidy up paths - take off any trailing slashes
    if (cache_path.rfind("/") == cache_path.length()-1) cache_path = cache_path.substr(0, cache_path.length()-1);
    if (cache_link_path.rfind("/") == cache_link_path.length()-1) cache_link_path = cache_link_path.substr(0, cache_link_path.length()-1);
  
    // create cache dir and subdirs
    if (!_cacheMkDir(cache_path+"/"+CACHE_DATA_DIR, true)) throw FileCacheException("Cannot create directory "+cache_path+"/"+CACHE_DATA_DIR+" for cache");
    if (!_cacheMkDir(cache_path+"/"+CACHE_JOB_DIR, true)) throw FileCacheException("Cannot create directory "+cache_path+"/"+CACHE_JOB_DIR+" for cache");
  
    // add this cache to our list
    struct CacheParameters cache_params;
    cache_params.cache_path = cache_path;
    cache_params.cache_link_path = cache_link_path;
    _caches.push_back(cache_params);
  }
  
  // add remote caches
  for (int i = 0; i < (int)remote_caches.size(); i++) {
    std::string cache = remote_caches[i];
    std::string cache_path = cache.substr(0, cache.find(" "));
    if (cache_path.empty()) throw FileCacheException("No cache directory specified in remote caches");
    std::string cache_link_path = "";
    if (cache.find(" ") != std::string::npos) cache_link_path = cache.substr(cache.find_last_of(" ")+1, cache.length()-cache.find_last_of(" ")+1);
    
    // tidy up paths - take off any trailing slashes
    if (cache_path.rfind("/") == cache_path.length()-1) cache_path = cache_path.substr(0, cache_path.length()-1);
    if (cache_link_path.rfind("/") == cache_link_path.length()-1) cache_link_path = cache_link_path.substr(0, cache_link_path.length()-1);

    // add this cache to our list
    struct CacheParameters cache_params;
    cache_params.cache_path = cache_path;
    cache_params.cache_link_path = cache_link_path;
    _remote_caches.push_back(cache_params);
  }
  
  // our hostname and pid
  struct utsname buf;
  if (uname(&buf) != 0) throw FileCacheException("Cannot determine hostname from uname()");
  _hostname = buf.nodename;
  _pid = inttostring(getpid());
}

    
FileCache::FileCache(const FileCache& cache) {
  _caches = cache._caches;
  _remote_caches = cache._remote_caches;
  _id = cache._id;
  _uid = cache._uid;
  _gid = cache._gid;
  
  // our hostname and pid
  struct utsname buf;
  if (uname(&buf) != 0) throw FileCacheException("Cannot determine hostname from uname()");
  _hostname = buf.nodename;
  _pid = inttostring(getpid());
}

FileCache::~FileCache(void) {
}

bool FileCache::start(std::string url, bool &available, bool &is_locked, bool use_remote) {
	
  available = false;
  is_locked = false;
	std::string filename = file(url);
  std::string lock_file = _getLockFileName(url);

  // create directory structure if required, only readable by GM user
  if (!_cacheMkDir(lock_file.substr(0, lock_file.rfind("/")), false)) return false;
  
  int lock_timeout = 86400; // one day timeout on lock TODO: make configurable?
  
  // locking mechanism:
  // - check if lock is there
  // - if not, create tmp file and check again
  // - if lock is still not there copy tmp file to cache lock file
  // - check pid inside lock file matches ours
  
  struct stat fileStat;
  int err = stat( lock_file.c_str(), &fileStat ); 
  if (0 != err) {
    if (errno == EACCES) {
      odlog(ERROR)<<"EACCES Error opening lock file"<<lock_file<<": "<<strerror(errno)<<std::endl;
      return false;
    }
    else if (errno != ENOENT) {
      // some other error occurred opening the lock file
      odlog(ERROR)<<"Error opening lock file "<<lock_file<<" in initial check: "<<strerror(errno)<<std::endl;
      return false;
    }
    // lock does not exist - create tmp file
    // ugly char manipulation to get mkstemp() to work...
    char tmpfile[256];
    tmpfile[0] = '\0';
    strcat(tmpfile, lock_file.c_str());
    strcat(tmpfile, ".XXXXXX");
    int h = mkstemp(tmpfile);
    if (h == -1) {
      odlog(ERROR)<<"Error creating file "<<tmpfile<<" with mkstemp(): "<<strerror(errno)<<std::endl;
      return false;
    }
    // write pid@hostname to the lock file
    char buf[_pid.length()+_hostname.length()+2];
    sprintf(buf, "%s@%s", _pid.c_str(), _hostname.c_str());
    if (write(h, &buf, strlen(buf)) == -1) {
      odlog(ERROR)<<"Error writing to tmp lock file "<<tmpfile<<": "<<strerror(errno)<<std::endl;
      close(h);
      // not much we can do if this doesn't work, but it is only a tmp file
      remove(tmpfile);
      return false;
    }
    if (close(h) != 0) {
      // not critical as file will be removed after we are done
      odlog(WARNING)<<"Warning: closing tmp lock file "<<tmpfile<<" failed"<<std::endl;
    }
    // check again if lock exists, in case creating the tmp file took some time
    err = stat( lock_file.c_str(), &fileStat ); 
    if (0 != err) {
      if (errno == ENOENT) {
        // ok, we can create lock
        if (rename(tmpfile, lock_file.c_str()) != 0) {
          odlog(ERROR)<<"Error renaming tmp file "<<tmpfile<<" to lock file "<<lock_file<<": "<<strerror(errno)<<std::endl;
          remove(tmpfile);
          return false;
        }
        // check it's really there
        err = stat( lock_file.c_str(), &fileStat ); 
        if (0 != err) {
          odlog(ERROR)<<"Error renaming lock file, even though rename() did not return an error"<<std::endl;
          return false;
        }
        // check the pid inside the lock file, just in case...
        if (!_checkLock(url)) {
          is_locked = true;
          return false;
        }
      }
      else if (errno == EACCES) {
        odlog(ERROR)<<"EACCES Error opening lock file"<<lock_file<<": "<<strerror(errno)<<std::endl;
        return false;
      }
      else {
        // some other error occurred opening the lock file
        odlog(ERROR)<<"Error opening lock file we just renamed successfully "<<lock_file<<": "<<strerror(errno)<<std::endl;
        return false;
      }
    }
    else {
      odlog(DEBUG)<<"The file is currently locked with a valid lock"<<std::endl;
      is_locked = true;
      return false;
    }
  }
  else {
    // the lock already exists, check if it has expired
    // look at modification time
    time_t mod_time = fileStat.st_mtime;
    time_t now = time(NULL);
    odlog(DEBUG)<<(now - mod_time)<<" seconds since lock file was created"<<std::endl;
    
    if ((now - mod_time) > lock_timeout) {
      odlog(DEBUG)<<"Timeout has expired, will remove lock file"<<std::endl;
      // TODO: kill the process holding the lock, only if we know it was the original
      // process which created it
      if (remove(lock_file.c_str()) != 0 && errno != ENOENT) {
        odlog(ERROR)<<"Failed to unlock file "<<lock_file<<": "<<strerror(errno)<<std::endl;
        return false;
      }
      // lock has expired and has been removed. Try to remove cache file and call start() again
      if (remove(filename.c_str()) != 0 && errno != ENOENT) {
        odlog(ERROR)<<"Error removing cache file "<<file(url)<<": "<<strerror(errno)<<std::endl;
        return false;
      }
      return start(url, available, is_locked);
    }
    
    // lock is still valid, check if we own it
    FILE * pFile;
    char lock_info [100]; // should be long enough for a pid + hostname
    pFile = fopen ((char*)lock_file.c_str(), "r");
    if (pFile == NULL) {
      // lock could have been released by another process, so call start again
      if (errno == ENOENT) {
        odlog(DEBUG)<<"Lock that recently existed has been deleted by another process, calling start() again"<<std::endl;
        return start(url, available, is_locked);
      }
      odlog(ERROR)<<"Error opening valid and existing lock file "<<lock_file<<": "<<strerror(errno)<<std::endl;
      return false;
    }
    fgets (lock_info, 100, pFile);
    fclose (pFile);
    
    std::string lock_info_s(lock_info);
    std::string::size_type index = lock_info_s.find("@", 0);
    if (index == std::string::npos) {
      odlog(ERROR)<<"Error with formatting in lock file "<<lock_file<<": "<<lock_info_s<<std::endl;
      return false;
    }
    
    if (lock_info_s.substr(index+1) != _hostname) {
      odlog(DEBUG)<<"Lock is owned by a different host"<<std::endl;
      // TODO: here do ssh login and check
      is_locked = true;
      return false;
    }
    std::string lock_pid = lock_info_s.substr(0, index);
    if (lock_pid == _pid) {
      // usually caused by duplicate input files - safer to wait until lock expires
      odlog(WARNING)<<"Warning: This process already owns the lock - possibly due to duplicate input files"<<std::endl;
    }
    else {
      // check if the pid owning the lock is still running - if not we can claim the lock
      // this is not really portable... but no other way to do it
      std::string procdir("/proc/");
      procdir = procdir.append(lock_pid);
      if (stat(procdir.c_str(), &fileStat) != 0 && errno == ENOENT) {
        odlog(DEBUG)<<"The process owning the lock is no longer running, will remove lock"<<std::endl;
        if (remove(lock_file.c_str()) != 0) {
          odlog(ERROR)<<"Failed to unlock file "<<lock_file<<": "<<strerror(errno)<<std::endl;
          return false;
        }
        // lock has been removed. try to delete cache file and call start() again
        if (remove(filename.c_str()) != 0 && errno != ENOENT) {
          odlog(ERROR)<<"Error removing cache file "<<file(url)<<": "<<strerror(errno)<<std::endl;
          return false;
        }
        return start(url, available, is_locked);
      }
    }
    
    odlog(DEBUG)<<"The file is currently locked with a valid lock"<<std::endl;
    is_locked = true;
    return false;
  }

  // if we get to here we have acquired the lock

  // create the meta file to store the URL, if it does not exist
  std::string meta_file = _getMetaFileName(url);
  err = stat( meta_file.c_str(), &fileStat ); 
  if (0 == err) {
    // check URL inside file for possible hash collisions
    FILE * pFile;
    char mystring [fileStat.st_size+1];
    pFile = fopen ((char*)_getMetaFileName(url).c_str(), "r");
    if (pFile == NULL) {
      odlog(ERROR)<<"Error opening meta file "<<_getMetaFileName(url)<<": "<<strerror(errno)<<std::endl;
      remove(lock_file.c_str());
      return false;
    }
    fgets (mystring, sizeof(mystring), pFile);
    fclose (pFile);
    
    std::string meta_str(mystring);
    // get the first line
    if (meta_str.find('\n') != std::string::npos) meta_str.resize(meta_str.find('\n'));
    
    std::string::size_type space_pos = meta_str.find(' ', 0);
    if (meta_str.substr(0, space_pos) != url) {
      odlog(ERROR)<<"Error: File "<<url<<" is already cached at "<<filename<<" under a different URL: "<<meta_str.substr(0, space_pos)<<" - this file will not be cached"<<std::endl;
      remove(lock_file.c_str());
      return false;
    }
  }
  else if (errno == ENOENT) {
    // create new file
    FILE * pFile;
    pFile = fopen ((char*)meta_file.c_str(), "w");
    if (pFile == NULL) {
      odlog(ERROR)<<"Failed to create info file: "<<meta_file<<": "<<strerror(errno)<<std::endl;
      remove(lock_file.c_str());
      return false;
    }
    fputs ((char*)url.c_str(), pFile);
    fputs ("\n", pFile);
    fclose (pFile);
    // make read/writeable only by GM user
    chmod(meta_file.c_str(), S_IRUSR | S_IWUSR);
  }
  else {
    odlog(ERROR)<<"Error looking up attributes of meta file "<<meta_file<<": "<<strerror(errno)<<std::endl;
    remove(lock_file.c_str());
    return false;
  }
  // now check if the cache file is there already
  err = stat( filename.c_str(), &fileStat );
  if (0 == err) available = true;
  
  // if the file is not there. check remote caches
  else if (errno == ENOENT) {
    if (!use_remote) return true;    
    // get the hash of the url
    std::string hash = FileCacheHash::getHash(url);
  
    int index = 0;
    for(int level = 0; level < CACHE_DIR_LEVELS; level ++) {
      hash.insert(index + CACHE_DIR_LENGTH, "/");
      // go to next slash position, add one since we just inserted a slash
      index += CACHE_DIR_LENGTH + 1;
    }
    std::string remote_cache_file;
    std::string remote_cache_link;
    for (std::vector<struct CacheParameters>::iterator it = _remote_caches.begin(); it != _remote_caches.end(); it++) {
      std::string remote_file = it->cache_path+"/"+CACHE_DATA_DIR+"/"+hash;
      if (stat(remote_file.c_str(), &fileStat) == 0) {
        remote_cache_file = remote_file;
        remote_cache_link = it->cache_link_path;
        break;
      }
    }
    if (remote_cache_file.empty()) return true;
    
    odlog(INFO)<<"Found file "<<url<<" in remote cache at "<<remote_cache_file<<std::endl;
    // if found, create lock file in remote cache
    std::string remote_lock_file = remote_cache_file+".lock";
    err = stat( remote_lock_file.c_str(), &fileStat );
    // if lock exists, exit
    if (0 == err) {
      odlog(DEBUG)<<"File exists in remote cache at "<<remote_cache_file<<" but is locked. Will download from source"<<std::endl;
      return true;
    }
  
    // lock does not exist - create tmp file
    // ugly char manipulation to get mkstemp() to work...
    char remote_tmpfile[256];
    remote_tmpfile[0] = '\0';
    strcat(remote_tmpfile, remote_lock_file.c_str());
    strcat(remote_tmpfile, ".XXXXXX");
    int h = mkstemp(remote_tmpfile);
    if (h == -1) {
      odlog(WARNING)<<"Error creating tmp file "<<remote_tmpfile<<" for remote lock with mkstemp(): "<<strerror(errno)<<std::endl;
      return true;
    }
    // write pid@hostname to the lock file
    char buf2[_pid.length()+_hostname.length()+2];
    sprintf(buf2, "%s@%s", _pid.c_str(), _hostname.c_str());
    if (write(h, &buf2, strlen(buf2)) == -1) {
      odlog(WARNING)<<"Error writing to tmp lock file for remote lock "<<remote_tmpfile<<": "<<strerror(errno)<<std::endl;
      close(h);
      // not much we can do if this doesn't work, but it is only a tmp file
      remove(remote_tmpfile);
      return true;
    }
    if (close(h) != 0) {
      // not critical as file will be removed after we are done
      odlog(WARNING)<<"Warning: closing tmp lock file for remote lock "<<remote_tmpfile<<" failed"<<std::endl;
    }
    // check again if lock exists, in case creating the tmp file took some time
    err = stat( remote_lock_file.c_str(), &fileStat ); 
    if (0 != err) {
      if (errno == ENOENT) {
        // ok, we can create lock
        if (rename(remote_tmpfile, remote_lock_file.c_str()) != 0) {
          odlog(WARNING)<<"Error renaming tmp file "<<remote_tmpfile<<" to lock file "<<remote_lock_file<<" for remote lock: "<<strerror(errno)<<std::endl;
          remove(remote_tmpfile);
          return true;
        }
        // check it's really there
        err = stat( remote_lock_file.c_str(), &fileStat ); 
        if (0 != err) {
          odlog(WARNING)<<"Error renaming lock file for remote lock, even though rename() did not return an error: "<<strerror(errno)<<std::endl;
          return true;
        }
      }
      else {
        // some error occurred opening the lock file
        odlog(WARNING)<<"Error opening lock file for remote lock we just renamed successfully "<<remote_lock_file<<": "<<strerror(errno)<<std::endl;
        return true;
      }
    }
    else {
      odlog(DEBUG)<<"The remote cache file is currently locked with a valid lock, will download from source"<<std::endl;
      return true;
    }
    
    // we have locked the remote file - so find out what to do with it
    if (remote_cache_link == "replicate") {
      // copy the file to the local cache, remove remote lock and exit with available=true
      odlog(DEBUG)<<"Replicating file "<<remote_cache_file<<" to local cache file "<<filename<<std::endl;
        // do the copy - taken directly from old datacache.cc
      char copybuf[65536];
      int fdest = open(filename.c_str(), O_WRONLY | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
      if(fdest == -1) {
        odlog(ERROR)<<"Failed to create file "<<filename<<" for writing: "<<strerror(errno)<<std::endl;
        return false;
      };
      
      int fsource = open(remote_cache_file.c_str(), O_RDONLY);
      if(fsource == -1) {
        close(fdest);
        odlog(ERROR)<<"Failed to open file "<<remote_cache_file<<" for reading: "<<strerror(errno)<<std::endl;
        return false;
      };
      
      // source and dest opened ok - copy in chunks
      for(;;) {
        ssize_t lin = read(fsource, copybuf, sizeof(copybuf));
        if(lin == -1) {
          close(fdest); close(fsource);
          odlog(ERROR)<<"Failed to read file "<<remote_cache_file<<": "<<strerror(errno)<<std::endl;
          return false;
        };
        if(lin == 0) break; // eof
        
        for(ssize_t lout = 0; lout < lin;) {
          ssize_t lwritten = write(fdest, copybuf+lout, lin-lout);
          if(lwritten == -1) {
            close(fdest); close(fsource);
            odlog(ERROR)<<"Failed to write file "<<filename<<": "<<strerror(errno)<<std::endl;
            return false;
          };
          lout += lwritten;
        };
      };
      close(fdest); close(fsource);
      if (remove(remote_lock_file.c_str()) != 0) {
        odlog(ERROR)<<"Failed to remove remote lock file "<<remote_lock_file<<": "<<strerror(errno)<<" Some manual intervention may be required"<<std::endl;
        return true;
      }
    }
    // create symlink from file in this cache to other cache
    else {
      odlog(DEBUG)<<"Creating temporary link from "<<filename<<" to remote cache file "<<remote_cache_file<<std::endl;
      if (symlink(remote_cache_file.c_str(), filename.c_str()) != 0) {
        odlog(ERROR)<<"Failed to create soft link to remote cache: "<<strerror(errno)<<" Will download "<<url<<" from source"<<std::endl;
        if (remove(remote_lock_file.c_str()) != 0) {
          odlog(ERROR)<<"Failed to remove remote lock file "<<remote_lock_file<<": "<<strerror(errno)<<" Some manual intervention may be required"<<std::endl;
        }
        return true;
      }
    }
    available = true;
  }
  else {
    // this is ok, we will download again
    odlog(WARNING)<<"Warning: error looking up attributes of cached file: "<<strerror(errno)<<std::endl;
  }
  return true;
}

bool FileCache::stop(std::string url) {
  
  // if cache file is a symlink, remove remote cache lock, meta file and symlink
  std::string filename = file(url);
  struct stat fileStat;
  if (lstat(filename.c_str(), &fileStat) == 0 && S_ISLNK(fileStat.st_mode)) {
    char buf[1024];
    int link_size = readlink(filename.c_str(), buf, sizeof(buf));
    if (link_size == -1) {
      odlog(ERROR)<<"Could not read target of link "<<filename<<": "<<strerror(errno)<<". Manual intervention may be required to remove lock in remote cache"<<std::endl;
      return false;
    }
    std::string remote_lock(buf); remote_lock.resize(link_size); remote_lock += ".lock";
    if (remove(remote_lock.c_str()) != 0 && errno != ENOENT) {
      odlog(ERROR)<<"Failed to unlock remote cache lock "<<remote_lock<<": "<<strerror(errno)<<". Manual intervention may be required"<<std::endl;
      return false;
    }
    // delete the meta file - not critical so don't fail on error
    if (remove(_getMetaFileName(url).c_str()) != 0) {
      odlog(ERROR)<<"Failed to unlock file with lock "<<_getLockFileName(url)<<": "<<strerror(errno)<<std::endl;
    }
    if (remove(filename.c_str()) != 0) {
      odlog(DEBUG)<<"Error removing file "<<filename<<": "<<strerror(errno)<<". Manual intervention may be required"<<std::endl;
      return false;
    }
  }
  
  // check the lock is ok to delete
  if (!_checkLock(url)) return false;

  // delete the lock
  if (remove(_getLockFileName(url).c_str()) != 0) {
    odlog(ERROR)<<"Failed to unlock file with lock "<<_getLockFileName(url)<<": "<<strerror(errno)<<std::endl;
    return false;
  }
  return true;
}

bool FileCache::stopAndDelete(std::string url) {
  
  // if cache file is a symlink, remove remote cache lock
  std::string filename = file(url);
  struct stat fileStat;
  if (lstat(filename.c_str(), &fileStat) == 0 && S_ISLNK(fileStat.st_mode)) {
    char buf[1024];
    int link_size = readlink(filename.c_str(), buf, sizeof(buf));
    if (link_size == -1) {
      odlog(ERROR)<<"Could not read target of link "<<filename<<": "<<strerror(errno)<<". Manual intervention may be required to remove lock in remote cache"<<std::endl;
      return false;
    }
    std::string remote_lock(buf); remote_lock.resize(link_size); remote_lock += ".lock";
    if (remove(remote_lock.c_str()) != 0 && errno != ENOENT) {
      odlog(ERROR)<<"Failed to unlock remote cache lock "<<remote_lock<<": "<<strerror(errno)<<". Manual intervention may be required"<<std::endl;
      return false;
    }
  }
  
  // check the lock is ok to delete, and if so, remove the file and the
  // associated lock
  if (!_checkLock(url)) return false;
  
  // delete the cache file
  if (remove(file(url).c_str()) != 0 && errno != ENOENT) {
    odlog(ERROR)<<"Error removing cache file "<<file(url)<<": "<<strerror(errno)<<std::endl;
    return false;
  }
  
  // delete the meta file - not critical so don't fail on error
  if (remove(_getMetaFileName(url).c_str()) != 0) {
    odlog(ERROR)<<"Failed to unlock file with lock "<<_getLockFileName(url)<<": "<<strerror(errno)<<std::endl;
  }
  
  // delete the lock
  if (remove(_getLockFileName(url).c_str()) != 0) {
    odlog(ERROR)<<"Failed to unlock file with lock "<<_getLockFileName(url)<<": "<<strerror(errno)<<std::endl;
    return false;
  }
  return true;
}

std::string FileCache::file(std::string url) {
  
  // get the hash of the url
  std::string hash = FileCacheHash::getHash(url);
  
  int index = 0;
  for(int level = 0; level < CACHE_DIR_LEVELS; level ++) {
    hash.insert(index + CACHE_DIR_LENGTH, "/");
    // go to next slash position, add one since we just inserted a slash
    index += CACHE_DIR_LENGTH + 1;
  }
  return _caches[_chooseCache(hash)].cache_path+"/"+CACHE_DATA_DIR+"/"+hash;
}

bool FileCache::link_file(std::string link_path, std::string url) {

  // check the original file exists
  std::string cache_file = file(url);
  struct stat fileStat;
  if (lstat(cache_file.c_str(), &fileStat) != 0) {
    if (errno == ENOENT) { odlog(ERROR)<<"Error: Cache file "<<file(url)<<" does not exist"<<std::endl; }
    else { odlog(ERROR)<<"Error accessing cache file "<<file(url)<<": "<<strerror(errno)<<std::endl; }
    return false;
  }

  // choose cache
  struct CacheParameters cache_params = _caches[_chooseCache(FileCacheHash::getHash(url))];
  std::string hard_link_path = cache_params.cache_path + "/" + CACHE_JOB_DIR + "/" +_id;
  std::string cache_link_path = cache_params.cache_link_path;

  // check if cached file is a symlink - if so get link path from the remote cache
  if (S_ISLNK(fileStat.st_mode)) {
    char link_target_buf[1024];
    int link_size = readlink(file(url).c_str(), link_target_buf, sizeof(link_target_buf));
    if (link_size == -1) {
      odlog(ERROR)<<"Could not read target of link "<<file(url)<<": "<<strerror(errno)<<std::endl;
      return false;
    }
    // need to match the symlink target against the list of remote caches
    std::string link_target(link_target_buf); link_target.resize(link_size);
    for (std::vector<struct CacheParameters>::iterator it = _remote_caches.begin(); it != _remote_caches.end(); it++) {
      std::string remote_data_dir = it->cache_path+"/"+CACHE_DATA_DIR;
      if (link_target.find(remote_data_dir) == 0) {
        hard_link_path = it->cache_path+"/"+CACHE_JOB_DIR + "/" + _id;
        cache_link_path = it->cache_link_path;
        cache_file = link_target;
        break;
      }
    }
    if (hard_link_path == cache_params.cache_path + "/" + CACHE_JOB_DIR + "/" +_id) {
      odlog(ERROR)<<"Couldn't match link target "<<link_target<<" to any remote cache"<<std::endl;
      return false;
    }
  }

  // if cache_link_path is '.' then copy instead, bypassing the hard-link
  if (cache_link_path == ".") return copy_file(link_path, url);
  
  // create per-job hard link dir if necessary, making the final dir readable only by the job user
  if (!_cacheMkDir(hard_link_path, true)) {
    odlog(ERROR)<<"Error: Cannot create directory "<<hard_link_path<<" for per-job hard links."<<std::endl;
    return false;
  }
  if (chown(hard_link_path.c_str(), _uid, _gid) != 0) {
    odlog(ERROR)<<"Error: Cannot change owner of "<<hard_link_path<<std::endl;
    return false;
  }
  if (chmod(hard_link_path.c_str(), S_IRWXU) != 0) {
    odlog(ERROR)<<"Error: Cannot change permissions of "<<hard_link_path<<" to 0700"<<std::endl;
    return false;
  }
  
  std::string filename = link_path.substr(link_path.rfind("/")+1);
  std::string hard_link_file = hard_link_path + "/" + filename;
  std::string session_dir = link_path.substr(0, link_path.rfind("/"));
  
  // make the hard link
  if (link(cache_file.c_str(), hard_link_file.c_str()) != 0) {
    odlog(ERROR)<<"Failed to create hard link from "<<hard_link_file<<" to "<<cache_file<<": "<<strerror(errno)<<std::endl;
    return false;
  }
  // ensure the hard link is readable by all and owned by root (or GM user)
  // (to make cache file immutable but readable by all)
  if (chown(hard_link_file.c_str(), getuid(), getgid()) != 0) {
    odlog(ERROR)<<"Failed to change owner of hard link to "<<getuid()<<": "<<strerror(errno)<<std::endl;
    return false;
  }
  if (chmod(hard_link_file.c_str(), S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH) != 0) {
    odlog(ERROR)<<"Failed to change permissions of hard link to 0644: "<<strerror(errno)<<std::endl;
    return false;
  }
  
  // make necessary dirs for the soft link
  // this probably should have already been done... somewhere...
  if (!_cacheMkDir(session_dir, true)) return false;
  if (chown(session_dir.c_str(), _uid, _gid) != 0) {
    odlog(ERROR)<<"Failed to change owner of session dir to "<<_uid<<": "<<strerror(errno)<<std::endl;
    return false;
  }
  if (chmod(session_dir.c_str(), S_IRWXU) != 0) {
    odlog(ERROR)<<"Failed to change permissions of session dir to 0700: "<<strerror(errno)<<std::endl;
    return false;
  }
  
  // make the soft link, changing the target if cache_link_path is defined
  if (!cache_link_path.empty()) {
    hard_link_file = cache_link_path + "/" + CACHE_JOB_DIR + "/" +_id + "/" + filename;
  }
  if (symlink(hard_link_file.c_str(), link_path.c_str()) != 0) {
    odlog(ERROR)<<"Failed to create soft link: "<<strerror(errno)<<std::endl;
    return false;
  }
  
  // change the owner of the soft link to the job user
  if (lchown(link_path.c_str(), _uid, _gid) != 0) {
     odlog(ERROR)<<"Failed to change owner of session dir to "<<_uid<<": "<<strerror(errno)<<std::endl;
     return false;
  }
  return true;
}

bool FileCache::copy_file(std::string dest_path, std::string url, bool executable) {
  
  // check the original file exists
  std::string cache_file = file(url);
  struct stat fileStat;
  if (stat(cache_file.c_str(), &fileStat) != 0) {
    if (errno == ENOENT) { odlog(ERROR)<<"Error: Cache file "<<cache_file<<" does not exist"<<std::endl; }
    else { odlog(ERROR)<<"Error accessing cache file "<<cache_file<<": "<<strerror(errno)<<std::endl; }
    return false;
  }
  
  // make necessary dirs for the copy
  // this probably should have already been done... somewhere...
  std::string dest_dir = dest_path.substr(0, dest_path.rfind("/"));
  if (!_cacheMkDir(dest_dir, true)) return false;
  if (chown(dest_dir.c_str(), _uid, _gid) != 0) {
    odlog(ERROR)<<"Failed to change owner of destination dir to "<<_uid<<": "<<strerror(errno)<<std::endl;
    return false;
  }
  if (chmod(dest_dir.c_str(), S_IRWXU) != 0) {
    odlog(ERROR)<<"Failed to change permissions of session dir to 0700: "<<strerror(errno)<<std::endl;
    return false;
  }

  // do the copy - taken directly from old datacache.cc
  char buf[65536];
  mode_t perm = S_IRUSR | S_IWUSR;
  if (executable) perm |= S_IXUSR;
  int fdest = open(dest_path.c_str(), O_WRONLY | O_CREAT | O_EXCL, perm);
  if(fdest == -1) {
    odlog(ERROR)<<"Failed to create file "<<dest_path<<" for writing: "<<strerror(errno)<<std::endl;
    return false;
  };
  fchown(fdest, _uid, _gid);
  
  int fsource = open(cache_file.c_str(), O_RDONLY);
  if(fsource == -1) {
    close(fdest);
    odlog(ERROR)<<"Failed to open file "<<cache_file<<" for reading: "<<strerror(errno)<<std::endl;
    return false;
  };
  
  // source and dest opened ok - copy in chunks
  for(;;) {
    ssize_t lin = read(fsource, buf, sizeof(buf));
    if(lin == -1) {
      close(fdest); close(fsource);
      odlog(ERROR)<<"Failed to read file "<<cache_file<<": "<<strerror(errno)<<std::endl;
      return false;
    };
    if(lin == 0) break; // eof
    
    for(ssize_t lout = 0; lout < lin;) {
      ssize_t lwritten = write(fdest, buf+lout, lin-lout);
      if(lwritten == -1) {
        close(fdest); close(fsource);
        odlog(ERROR)<<"Failed to write file "<<dest_path<<": "<<strerror(errno)<<std::endl;
        return false;
      };
      lout += lwritten;
    };
  };
  close(fdest); close(fsource);
  return true;
}

bool FileCache::release() {
  
  // go through all caches (including remote caches) and remove per-job dirs for our job id
  std::vector<std::string> job_dirs;
  for (int i = 0; i < (int)_caches.size(); i++) job_dirs.push_back(_caches[i].cache_path + "/" + CACHE_JOB_DIR + "/" + _id);
  for (int i = 0; i < (int)_remote_caches.size(); i++) job_dirs.push_back(_remote_caches[i].cache_path + "/" + CACHE_JOB_DIR + "/" + _id);
  
  for (int i = 0; i < (int)job_dirs.size(); i++) {
    
    std::string job_dir = job_dirs[i];    
    // check if job dir exists
    DIR * dirp = opendir(job_dir.c_str());
    if ( dirp == NULL) {
      if (errno == ENOENT) continue;
      odlog(ERROR)<<"Error opening per-job dir "<<job_dir<<": "<<strerror(errno)<<std::endl;
      return false;
    }
    
    // list all files in the dir and delete them
    struct dirent *dp;
    errno = 0;
    while ((dp = readdir(dirp)))  {
      if (strcmp(dp->d_name, ".") == 0 || strcmp(dp->d_name, "..") == 0) continue;
      std::string to_delete = job_dir + "/" + dp->d_name; 
      odlog(DEBUG)<<"Removing "<<to_delete<<std::endl;
      if (remove(to_delete.c_str()) != 0) {
        odlog(ERROR)<<"Error: failed to remove hard link "<<to_delete<<": "<<strerror(errno)<<std::endl;
        closedir(dirp);
        return false;
      }
    }
    closedir(dirp);
    
    if (errno != 0) {
      odlog(ERROR)<<"Error listing dir "<<job_dir<<": "<<strerror(errno)<<std::endl;
      return false;
    }
    
    // remove now-empty dir
    odlog(DEBUG)<<"Removing "<<job_dir<<std::endl;
    if (rmdir(job_dir.c_str()) != 0) {
      odlog(ERROR)<<"Error: failed to remove cache per-job dir "<<job_dir<<": "<<strerror(errno)<<std::endl;
      return false;
    }
  }
  return true;
}

bool FileCache::addDN(std::string url, std::string DN, time_t expiry_time) {
  
  if (DN.empty()) return false;
  if (expiry_time == 0) expiry_time = time(NULL) + CACHE_DEFAULT_AUTH_VALIDITY;
  
  // add DN to the meta file. If already there, renew the expiry time
  std::string meta_file = _getMetaFileName(url);
  struct stat fileStat;
  int err = stat( meta_file.c_str(), &fileStat ); 
  if (0 != err) {
    odlog(ERROR)<<"Error reading meta file "<<meta_file<<": "<<strerror(errno)<<std::endl;
    return false;
  }
  FILE * pFile;
  char mystring [fileStat.st_size+1];
  pFile = fopen (meta_file.c_str(), "r");
  if (pFile == NULL) {
    odlog(ERROR)<<"Error opening meta file "<<meta_file<<": "<<strerror(errno)<<std::endl;
    return false;
  }
  // get the first line
  fgets (mystring, sizeof(mystring), pFile);

  // check for correct formatting and possible hash collisions between URLs
  std::string first_line(mystring);
  if (first_line.find('\n') == std::string::npos) first_line += '\n';
  std::string::size_type space_pos = first_line.rfind(' ');
  if (space_pos == std::string::npos) space_pos = first_line.length()-1;
  
  if (first_line.substr(0, space_pos) != url) {
    odlog(ERROR)<<"Error: File "<<url<<" is already cached at "<<file(url)<<" under a different URL: "<<first_line.substr(0, space_pos)<<" - will not add DN to cached list"<<std::endl;
    fclose(pFile);
    return false;
  }

  // read in list of DNs
  std::vector<std::string> dnlist;
  dnlist.push_back(DN + ' ' + inttostring(expiry_time) + '\n');
  
  char * res = fgets (mystring, sizeof(mystring), pFile);
  while (res) {
    std::string dnstring(mystring);
    space_pos = dnstring.rfind(' ');
    if (space_pos == std::string::npos) {
      odlog(WARNING)<<"Bad format detected in file "<<meta_file<<", in line "<<dnstring<<std::endl;
      continue;
    }
    // remove expired DNs (after some grace period)
    if (dnstring.substr(0, space_pos) != DN) {
      if (dnstring.find('\n') != std::string::npos) dnstring.resize(dnstring.find('\n'));
      int exp_time;
      if (!stringtoint(dnstring.substr(space_pos+1), exp_time)) continue;
      if (exp_time > time(NULL) - CACHE_DEFAULT_AUTH_VALIDITY) dnlist.push_back(dnstring+'\n');
    }
    res = fgets (mystring, sizeof(mystring), pFile);
  }
  fclose(pFile);

  // write everything back to the file
  pFile = fopen (meta_file.c_str(), "w");
  if (pFile == NULL) {
    odlog(ERROR)<<"Error opening meta file for writing "<<meta_file<<": "<<strerror(errno)<<std::endl;
    return false;
  }
  fputs ((char*)first_line.c_str(), pFile);
  for (std::vector<std::string>::iterator i = dnlist.begin(); i != dnlist.end(); i++) {
    fputs ((char*)i->c_str(), pFile);
  }
  fclose (pFile);
  return true;
}

bool FileCache::checkDN(std::string url, std::string DN) {
  
  if (DN.empty()) return false;
  
  std::string meta_file = _getMetaFileName(url);
  struct stat fileStat;
  int err = stat( meta_file.c_str(), &fileStat ); 
  if (0 != err) {
    if (errno != ENOENT) odlog(ERROR)<<"Error reading meta file "<<meta_file<<": "<<strerror(errno)<<std::endl;
    return false;
  }
  FILE * pFile;
  char mystring [fileStat.st_size+1];
  pFile = fopen (meta_file.c_str(), "r");
  if (pFile == NULL) {
    odlog(ERROR)<<"Error opening meta file "<<meta_file<<": "<<strerror(errno)<<std::endl;
    return false;
  }
  fgets (mystring, sizeof(mystring), pFile); // first line

  // read in list of DNs
  char * res = fgets (mystring, sizeof(mystring), pFile);
  while (res) {
    std::string dnstring(mystring);
    std::string::size_type space_pos = dnstring.rfind(' ');
    if (dnstring.substr(0, space_pos) == DN) {
      if (dnstring.find('\n') != std::string::npos) dnstring.resize(dnstring.find('\n'));
      std::string exp_time_s = dnstring.substr(space_pos+1);
      // convert to int
      int exp_time_i;
      if (!stringtoint(exp_time_s, exp_time_i) || exp_time_i < 0) {
        odlog(ERROR)<<"Error with converting time in file "<<_getMetaFileName(url)<<": "<<dnstring<<std::endl;
        fclose(pFile);
        return false;
      }
      if (exp_time_i > time(NULL)) {
        odlog(DEBUG)<<"DN "<<DN<<" is cached and is valid for another "<<(exp_time_i-time(NULL))<<"s for URL "<<url<<std::endl;
        fclose(pFile);
        return true;
      }
      else {
        odlog(DEBUG)<<"DN "<<DN<<" is cached but has expired for URL "<<url<<std::endl;
        fclose(pFile);
        return false;
      }
    }
    res = fgets (mystring, sizeof(mystring), pFile);
  }
  fclose(pFile);
  return false;
}

bool FileCache::created_available(std::string url) {
  
  // check the cache file exists - if so we can get the creation date
  // follow symlinks
  std::string cache_file = file(url);
  struct stat fileStat;
  return (stat(cache_file.c_str(), &fileStat) == 0) ? true : false;
}

time_t FileCache::created(std::string url) {
  
  // check the cache file exists
  std::string cache_file = file(url);
  // follow symlinks
  struct stat fileStat;
  if (stat(cache_file.c_str(), &fileStat) != 0) {
    if (errno == ENOENT) { odlog(ERROR)<<"Error: Cache file "<<cache_file<<" does not exist"<<std::endl; }
    else { odlog(ERROR)<<"Error accessing cache file "<<cache_file<<": "<<strerror(errno)<<std::endl; }
    return 0;
  }
  
  time_t mtime = fileStat.st_mtime;
  if (mtime <= 0) return 0;
  return mtime;
}

bool FileCache::validtill_available(std::string url) {
  return ((int)validtill(url) == 0) ? false : true;
}

time_t FileCache::validtill(std::string url) {
  
  // open meta file and pick out expiry time if it exists
  FILE * pFile;
  char mystring [1024]; // we only need the first line
  pFile = fopen ((char*)_getMetaFileName(url).c_str(), "r");
  if (pFile == NULL) {
    odlog(ERROR)<<"Error opening meta file "<<_getMetaFileName(url)<<": "<<strerror(errno)<<std::endl;
    return 0;
  }
  fgets (mystring, sizeof(mystring), pFile);
  fclose (pFile);
  
  std::string meta_str(mystring);
  // get the first line
  if (meta_str.find('\n') != std::string::npos) meta_str.resize(meta_str.find('\n'));
  
  // if the file contains only the url, we don't have an expiry time
  if (meta_str == url) return 0;

  // check sensible formatting - should be like "rls://rls1.ndgf.org/file1 1234567890"
  if (meta_str.substr(0, url.length()+1) != url+" ") {
    odlog(ERROR)<<"Error: mismatching url in file "<<_getMetaFileName(url)<<": "<<meta_str<<" Expected "<<url<<std::endl;
    return 0;
  }
  if (meta_str.length() != url.length() + 11) {
    odlog(ERROR)<<"Error: bad format in file "<<_getMetaFileName(url)<<": "<<meta_str<<std::endl;
    return 0;
  }
  if (meta_str.substr(url.length(), 1) != " ") {
    odlog(ERROR)<<"Error: bad separator in file "<<_getMetaFileName(url)<<": "<<meta_str<<std::endl;
    return 0;    
  }
  if (meta_str.substr(url.length() + 1).length() != 10) {
    odlog(ERROR)<<"Error: bad value of expiry time in "<<_getMetaFileName(url)<<": "<<meta_str<<std::endl;
    return 0;
  }
  
  // convert to int
  int exp_time;
  if(!stringtoint(meta_str.substr(url.length() + 1).c_str(), exp_time) || exp_time < 0) {
    odlog(ERROR)<<"Error with converting time in file "<<_getMetaFileName(url)<<": "<<meta_str<<std::endl;
    return 0;
  }
  return (time_t)exp_time;
}

bool FileCache::validtill_force(std::string url, time_t val) {
  
  std::string meta_file = _getMetaFileName(url);
  FILE * pFile;
  pFile = fopen ((char*)meta_file.c_str(), "w");
  if (pFile == NULL) {
    odlog(ERROR)<<"Error opening meta file "<<meta_file<<": "<<strerror(errno)<<std::endl;
    return false;
  }
  std::string file_data = url + ' ' + inttostring(val) + '\n';
  fputs ((char*)file_data.c_str(), pFile);
  fclose (pFile);
  return true;
}

bool FileCache::validtill(std::string url, time_t val) {
  // if no time is defined call force, otherwise return false
  return ((int)validtill(url) == 0) ? validtill_force(url, val) : false;
}

bool FileCache::operator==(const FileCache& a) {
  if (a._caches.size() != _caches.size()) return false;
  for (int i = 0; i < (int)a._caches.size(); i++) {
    if (a._caches[i].cache_path != _caches[i].cache_path) return false;
    if (a._caches[i].cache_link_path != _caches[i].cache_link_path) return false;
  }
  return (
    a._id == _id &&
    a._uid == _uid &&
    a._gid == _gid
  );
}
bool FileCache::_checkLock(std::string url) {
  
  std::string filename = file(url);
  std::string lock_file = _getLockFileName(url);
  
  // check for existence of lock file
  struct stat fileStat;
  int err = stat( lock_file.c_str(), &fileStat ); 
  if (0 != err) {
    if (errno == ENOENT) { odlog(ERROR)<<"Error: lock file "<<lock_file<<" doesn't exist"<<std::endl; }
    else { odlog(ERROR)<<"Error listing lock file "<<lock_file<<": "<<strerror(errno)<<std::endl; }
    return false;
  }
  
  // check the lock file's pid and hostname matches ours
  FILE * pFile;
  char lock_info [fileStat.st_size+1];
  pFile = fopen ((char*)lock_file.c_str(), "r");
  if (pFile == NULL) {
    odlog(ERROR)<<"Error opening lock file "<<lock_file<<": "<<strerror(errno)<<std::endl;
    return false;
  }
  fgets (lock_info, sizeof(lock_info), pFile);
  fclose (pFile);

  std::string lock_info_s(lock_info);
  std::string::size_type index = lock_info_s.find("@", 0);
  if (index == std::string::npos) {
    odlog(ERROR)<<"Error with formatting in lock file "<<lock_file<<": "<<lock_info_s<<std::endl;
    return false;
  }
  
  if (lock_info_s.substr(index+1) != _hostname) {
    odlog(DEBUG)<<"Lock is owned by a different host"<<std::endl;
    // TODO: here do ssh login and check
    return false;
  }
  if (lock_info_s.substr(0, index) != _pid) {
    odlog(ERROR)<<"Error: Another process owns the lock on file "<<filename<<". Must go back to start()"<<std::endl;
    return false;
  }
  return true;
}

std::string FileCache::_getLockFileName(std::string url) {
  return file(url)+CACHE_LOCK_SUFFIX;
}

std::string FileCache::_getMetaFileName(std::string url) {
  return file(url)+CACHE_META_SUFFIX;
}

bool FileCache::_cacheMkDir(std::string dir, bool all_read) {
  
  struct stat fileStat;
  int err = stat( dir.c_str(), &fileStat ); 
  if (0 != err) {
    odlog(DEBUG)<<"Creating directory "<<dir<<std::endl;
    std::string::size_type slashpos = 0;
    
    // set perms based on all_read
    mode_t perm = S_IRWXU;
    if (all_read) perm |= S_IRGRP | S_IROTH | S_IXGRP | S_IXOTH;
    
    do {
      slashpos = dir.find("/", slashpos+1);
      std::string dirname = dir.substr(0, slashpos);
      // list dir to see if it exists (we can't tell the difference between
      // dir already exists and permission denied)
      struct stat statbuf;
      if (stat(dirname.c_str(), &statbuf) == 0) {
        continue;
      };
      if (mkdir(dirname.c_str(), perm) != 0) {
        if (errno != EEXIST) {
          odlog(ERROR)<<"Error creating required dirs: "<<strerror(errno)<<std::endl;
          return false;
        };
      };
      // chmod to get around GM umask setting
      if (chmod(dirname.c_str(), perm) != 0) {
        odlog(ERROR)<<"Error changing permission of dir "<<dirname<<": "<<strerror(errno)<<std::endl;
        return false;
      };
    } while (slashpos != std::string::npos);
  }
  return true;
}

int FileCache::_chooseCache(std::string hash) {
  // choose which cache to use
  // divide based on the first two characters of the hash, choosing the cache
  // based on the int value mod number of caches
  // this algorithm limits the number of caches to 256
  if (hash.length() < 2 || _caches.size() < 1) return 0;
  int index;
  const char hash_start[3] = {hash[0], hash[1], '\0'};
  sscanf(hash_start, "%x", &index);
  int cacheno = index % _caches.size();
  return cacheno;
}
