Source code for aws_ops_alpha.vendor.hashes

# -*- coding: utf-8 -*-

"""
Made hashlib more user friendly.

Usage::

        >>> from fixa.hashes import hashes
        >>> print(hashes.of_bytes(b"hello"))
        b1fec41621e338896e2d26f232a6b006

        >>> print(hashes.of_str("world"))
        78e731027d8fd50ed642340b7c9a63b3

        >>> print(hashes.of_file("hashes.py"))
        4cddcb5562cbff652b0e4c8a0300337a

Ref:

- hashlib: https://docs.python.org/3/library/hashlib.html
"""

import typing as T
import enum
import hashlib
from pathlib import Path

__version__ = "0.1.1"

[docs]class HashAlgoEnum(str, enum.Enum):
    md5 = "md5"
    sha1 = "sha1"
    sha224 = "sha224"
    sha256 = "sha256"
    sha384 = "sha384"
    sha512 = "sha512"


[docs]class Hashes:
    """
    A hashlib wrapper class allow you to use one line to do hash as you wish.
    """

    def __init__(
        self,
        algo: HashAlgoEnum = HashAlgoEnum.md5,
        hexdigest: bool = True,
    ):
        self.algo = getattr(hashlib, algo.value)
        self.hexdigest: bool = hexdigest

[docs]    def use_md5(self) -> "Hashes":
        """
        Use md5 hash algorithm.
        """
        self.algo = getattr(hashlib, HashAlgoEnum.md5.value)
        return self

[docs]    def use_sha1(self) -> "Hashes":
        """
        Use sha1 hash algorithm.
        """
        self.algo = getattr(hashlib, HashAlgoEnum.sha1.value)
        return self

[docs]    def use_sha224(self) -> "Hashes":
        """
        Use sha224 hash algorithm.
        """
        self.algo = getattr(hashlib, HashAlgoEnum.sha224.value)
        return self

[docs]    def use_sha256(self) -> "Hashes":
        """
        Use sha256 hash algorithm.
        """
        self.algo = getattr(hashlib, HashAlgoEnum.sha256.value)
        return self

[docs]    def use_sha384(self) -> "Hashes":
        """
        Use sha384 hash algorithm.
        """
        self.algo = getattr(hashlib, HashAlgoEnum.sha384.value)
        return self

[docs]    def use_sha512(self) -> "Hashes":
        """
        Use sha512 hash algorithm.
        """
        self.algo = getattr(hashlib, HashAlgoEnum.sha512.value)
        return self

[docs]    def use_hexdigesst(self) -> "Hashes":
        """
        Return hash in hex string.
        """
        self.hexdigest = True
        return self

[docs]    def use_bytesdigest(self) -> "Hashes":
        """
        Return hash in bytes.
        """
        self.hexdigest = False
        return self

    def _construct(self, algo: T.Optional[HashAlgoEnum] = None):
        if algo is None:
            return self.algo()
        else:
            return getattr(hashlib, algo.value)()

    def _digest(self, m, hexdigest: T.Optional[bool]) -> T.Union[str, bytes]:
        if hexdigest is None:
            if self.hexdigest:
                return m.hexdigest()
            else:
                return m.digest()
        else:
            if hexdigest:
                return m.hexdigest()
            else:
                return m.digest()

[docs]    def of_str(
        self,
        s: str,
        algo: T.Optional[HashAlgoEnum] = None,
        hexdigest: T.Optional[bool] = None,
    ) -> T.Union[str, bytes]:
        """
        Return hash value of a string.
        """
        m = self._construct(algo)
        m.update(s.encode("utf-8"))
        return self._digest(m, hexdigest)

[docs]    def of_bytes(
        self,
        b: bytes,
        algo: T.Optional[HashAlgoEnum] = None,
        hexdigest: T.Optional[bool] = None,
    ) -> T.Union[str, bytes]:
        """
        Return hash value of a bytes.
        """
        m = self._construct(algo)
        m.update(b)
        return self._digest(m, hexdigest)

[docs]    def of_str_or_bytes(
        self,
        s_or_b: T.Union[bytes, str],
        algo: T.Optional[HashAlgoEnum] = None,
        hexdigest: T.Optional[bool] = None,
    ) -> T.Union[str, bytes]:
        """
        Return hash value of a bytes or string.
        """
        if isinstance(s_or_b, str):
            return self.of_str(s_or_b, algo, hexdigest)
        else:
            return self.of_bytes(s_or_b, algo, hexdigest)

[docs]    def of_file(
        self,
        abspath: T.Union[str, Path, T.Any],
        nbytes: int = 0,
        chunk_size: int = 1024,
        algo: T.Optional[HashAlgoEnum] = None,
        hexdigest: T.Optional[bool] = None,
    ) -> T.Union[str, bytes]:
        """
        Return hash value of a file, or only a piece of a file
        """
        p = Path(abspath)
        with p.open("rb") as f:
            return self.of_file_object(
                f,
                nbytes=nbytes,
                chunk_size=chunk_size,
                algo=algo,
                hexdigest=hexdigest,
            )

    def of_file_object(
        self,
        f,
        nbytes: int = 0,
        chunk_size: int = 4096,
        algo: T.Optional[HashAlgoEnum] = None,
        hexdigest: T.Optional[bool] = None,
    ) -> T.Union[str, bytes]:
        if nbytes < 0:
            raise ValueError("chunk_size cannot smaller than 0")
        if chunk_size < 1:
            raise ValueError("chunk_size cannot smaller than 1")
        if (nbytes > 0) and (nbytes < chunk_size):
            chunk_size = nbytes

        m = self._construct(algo)

        if nbytes:  # use first n bytes only
            have_reads = 0
            while True:
                have_reads += chunk_size
                if have_reads > nbytes:
                    n = nbytes - (have_reads - chunk_size)
                    if n:
                        data = f.read(n)
                        m.update(data)
                    break
                else:
                    data = f.read(chunk_size)
                    m.update(data)
        else:  # use entire content
            while True:
                data = f.read(chunk_size)
                if not data:
                    break
                m.update(data)
        return self._digest(m, hexdigest)

[docs]    def of_folder(
        self,
        abspath: T.Union[str, Path, T.Any],
        algo: T.Optional[HashAlgoEnum] = None,
        hexdigest: T.Optional[bool] = None,
    ) -> str:
        """
        Return hash value of a folder. It is based on the concatenation of
        the hash values of all files in the folder. The order of the files
        are sorted by their paths.
        """
        path = Path(abspath)
        if not path.is_dir():
            raise NotADirectoryError(f"{path} is not a folder!")
        hashes = list()
        for p in sorted(path.glob("**/*"), key=lambda x: str(x)):
            if p.is_file():
                hashes.append(self.of_file(p, algo=algo, hexdigest=hexdigest))
        return self.of_str(
            s="".join(hashes),
            algo=algo,
            hexdigest=hexdigest,
        )

[docs]    def of_paths(
        self,
        paths: T.List[T.Union[str, Path, T.Any]],
        algo: T.Optional[HashAlgoEnum] = None,
        hexdigest: T.Optional[bool] = None,
    ) -> str:
        """
        Return hash value of a list of paths. It is based on the concatenation of
        the hash values of all files and folders.
        """
        hashes = list()
        for path in paths:
            path = Path(path)
            if path.is_dir():
                hashes.append(self.of_folder(path, algo=algo, hexdigest=hexdigest))
            elif path.is_file():
                hashes.append(self.of_file(path, algo=algo, hexdigest=hexdigest))
            else:  # pragma: no cover
                pass
        return self.of_str(
            s="".join(hashes),
            algo=algo,
            hexdigest=hexdigest,
        )


hashes = Hashes()